diff --git a/app/config/config.js b/app/config/config.js index d718617a..74383761 100644 --- a/app/config/config.js +++ b/app/config/config.js @@ -205,7 +205,7 @@ function loadConfig() { withAttackDataModel: { doc: 'Enable validation of POST and PUT request bodies using the ATT&CK Data Model', format: Boolean, - default: false, + default: true, env: 'VALIDATE_WITH_ADM_SCHEMAS', }, withOpenApi: { diff --git a/app/lib/import-safety.js b/app/lib/import-safety.js new file mode 100644 index 00000000..ab29e2b0 --- /dev/null +++ b/app/lib/import-safety.js @@ -0,0 +1,86 @@ +'use strict'; + +/** + * Import-safety primitives. + * + * STIX-bundle import has a strict invariant: when a bundle is imported, the + * persisted objects must be byte-faithful to the bundle's `stix` content. The + * import path is allowed to populate Workbench-private metadata (everything + * under `workspace`), but it must NEVER alter the imported `stix` fields, + * because the bundle is the source of truth and round-trip fidelity matters + * for re-imports and downstream consumers. + * + * However, the lifecycle hooks and event listeners that fire during a normal + * create/update — `beforeCreate`, `afterCreate`, and the cross-service + * listeners on domain events — were not originally written with import + * fidelity in mind. Several of them mutate `stix.*` as part of their normal + * work (e.g. AnalyticsService.beforeCreate stamps `stix.name` from the + * ATT&CK ID; AnalyticsService.handleAnalyticsReferenced rewrites + * `stix.external_references` to embed a URL to the parent detection + * strategy). Those mutations are correct for user-driven POST/PUT flows but + * are incorrect for an import. + * + * Rather than rely on convention ("remember to gate every stix write behind + * `if (!options.import) { ... }`"), we enforce the contract structurally: + * before invoking any hook or listener in import mode, the framework calls + * `deepFreezeStix(doc)`. In Node strict mode (`'use strict'` at the top of + * every service file), an attempted assignment to a frozen property throws + * a `TypeError`. That makes a missing import gate fail loudly at the + * violating line on the first import test, instead of silently corrupting + * bundle content. + * + * The local rule for hook/listener authors becomes: + * + * 1. Workspace mutations are always allowed. + * 2. If you need to mutate `stix.*`, wrap the block in + * `if (!options.import) { ... }` (or `if (!payload.options?.import)` + * inside a listener). The framework guarantees that `options.import` + * is the only state where stix is frozen, so a missing gate produces + * an immediate TypeError pointing at the offending line. + * + * Read freely from frozen stix — only writes are blocked. + */ + +/** + * Deep-freezes the `stix` field of a document so any attempt to write to + * `doc.stix.*`, including writes through nested arrays/objects (e.g. + * `doc.stix.external_references.unshift(...)`), throws a `TypeError`. + * + * `Object.freeze` is shallow on its own, so we walk the immediate children + * (one level into nested objects/arrays) and freeze them as well. Two + * levels is sufficient for STIX in practice: the deepest commonly mutated + * paths are array elements (e.g. `stix.external_references[i]` or + * `stix.kill_chain_phases[i]`), which the loop covers. + * + * Safe to call multiple times — `Object.isFrozen` short-circuits. + * Safe to call on Mongoose documents: only the underlying `_doc.stix` + * subtree is frozen; Mongoose's wrapper accessors remain functional, and + * Mongoose does not mutate the source object when constructing new + * documents during save/insertMany. + * + * @param {Object} doc - A document of the shape `{ stix, workspace }`, or + * a Mongoose document with the same shape. No-op if `doc` or `doc.stix` + * is missing. + */ +function deepFreezeStix(doc) { + const stix = doc?.stix; + if (!stix || typeof stix !== 'object' || Object.isFrozen(stix)) return; + + Object.freeze(stix); + + for (const value of Object.values(stix)) { + if (!value || typeof value !== 'object' || Object.isFrozen(value)) continue; + Object.freeze(value); + if (Array.isArray(value)) { + for (const item of value) { + if (item && typeof item === 'object' && !Object.isFrozen(item)) { + Object.freeze(item); + } + } + } + } +} + +module.exports = { + deepFreezeStix, +}; diff --git a/app/lib/validation-schemas.js b/app/lib/validation-schemas.js index ede9f767..c68f6a4b 100644 --- a/app/lib/validation-schemas.js +++ b/app/lib/validation-schemas.js @@ -91,6 +91,12 @@ const STIX_SCHEMAS = { 'x-mitre-collection': collectionSchema, }; +// Cache for locally-derived partial schemas. ADM does not export prebuilt +// partials for every STIX type; for those types we call `.partial()` ourselves. +// That call is expensive enough to show up in bulk-import profiles, so we +// memoize the result per STIX type. +const derivedPartialCache = new Map(); + /** * Get the schema to use for validating a STIX object. * @@ -102,7 +108,7 @@ const STIX_SCHEMAS = { * - `work-in-progress` uses partial validation so drafts can omit required fields * - every other workflow state uses full validation * - if ADM exports a dedicated partial schema, use it directly - * - otherwise, derive a partial schema locally with `.partial()` + * - otherwise, derive a partial schema locally with `.partial()` (memoized) * * @param {string} stixType - The STIX `type` being validated (e.g. "attack-pattern") * @param {string} status - The workflow state (e.g. "work-in-progress", "awaiting-review", "reviewed") @@ -120,7 +126,14 @@ function getSchema(stixType, status) { return isWip ? admSchemaRef.partial : admSchemaRef.full; } - return isWip ? admSchemaRef.partial() : admSchemaRef; + if (!isWip) return admSchemaRef; + + let derived = derivedPartialCache.get(stixType); + if (!derived) { + derived = admSchemaRef.partial(); + derivedPartialCache.set(stixType, derived); + } + return derived; } module.exports = { diff --git a/app/repository/_base.repository.js b/app/repository/_base.repository.js index 9a3443eb..fbd7abc7 100644 --- a/app/repository/_base.repository.js +++ b/app/repository/_base.repository.js @@ -399,6 +399,124 @@ class BaseRepository extends AbstractRepository { } } + /** + * Bulk insert. Used by the STIX bundle import path to avoid one round-trip + * per object. + * + * `ordered: false` keeps MongoDB inserting the remaining docs after an + * individual failure. `throwOnValidationError: true` is critical: without + * it, Mongoose's `insertMany` silently drops documents that fail schema + * validation (e.g. a required field is missing) and reports success for + * the remaining valid docs — leaving the caller unable to record per-object + * import errors. With the flag, Mongoose throws a `MongooseBulkWriteError` + * after attempting the valid docs, carrying both the validation errors and + * the `results` array we use to map each failure back to its source index. + * + * Discriminator-aware: each child model's `insertMany` sets the correct + * `__t` discriminator key automatically, so callers should invoke this on + * the type-specific repository (not the AttackObject parent). + * + * @param {Array} dataArr - Array of plain objects to insert + * @param {Object} [options] + * @param {boolean} [options.ordered=false] - Stop on first error if true + * @returns {Promise<{ inserted: Array, errors: Array<{ index, message, code }> }>} + * `errors[].index` is the index into the input `dataArr`; the caller can + * use it to recover the original document for error reporting. + */ + async saveMany(dataArr, { ordered = false } = {}) { + if (!Array.isArray(dataArr) || dataArr.length === 0) { + return { inserted: [], errors: [] }; + } + try { + const inserted = await this.model.insertMany(dataArr, { + ordered, + throwOnValidationError: true, + }); + return { inserted, errors: [] }; + } catch (err) { + // MongooseBulkWriteError: one or more docs failed Mongoose schema + // validation. `err.results` mirrors the input order — successfully + // inserted entries are Mongoose documents (identifiable by `_id`), + // while failures are the original input objects (no `_id`). Walking + // the results in order, the k-th failure corresponds to + // `err.validationErrors[k]` (Mongoose pre-sorts validationErrors by + // source index). + if (err?.name === 'MongooseBulkWriteError') { + const errors = []; + const inserted = []; + const validationErrors = err.validationErrors || []; + const results = err.results || []; + let veIdx = 0; + for (let i = 0; i < results.length; i++) { + const r = results[i]; + if (r && r._id) { + inserted.push(r); + } else { + const ve = validationErrors[veIdx++]; + errors.push({ + index: i, + message: ve?.message ?? 'Mongoose validation error', + code: ve?.name || 'ValidationError', + }); + } + } + return { inserted, errors }; + } + // MongoDB driver-side failure (e.g., duplicate-key race). Per-doc + // errors are on `err.writeErrors`; successful inserts on + // `err.insertedDocs`. + if (err?.name === 'MongoBulkWriteError' || err?.writeErrors) { + const errors = (err.writeErrors || []).map((we) => ({ + index: we.index ?? we.err?.index, + message: we.errmsg || we.err?.errmsg || we.message, + code: we.code || we.err?.code, + })); + return { inserted: err.insertedDocs || [], errors }; + } + throw new DatabaseError(err); + } + } + + /** + * Retrieve every version of every document whose `stix.id` is in `stixIds`. + * Returns a Map keyed by stixId, value is an array of versions sorted + * newest-first (matching `retrieveAllById`'s ordering). + * + * Used by the bundle-import path to pre-fetch all existing versions in one + * query instead of N queries (one per imported object). + * + * @param {Array} stixIds - List of STIX IDs to look up + * @returns {Promise>>} + */ + async retrieveAllByStixIds(stixIds) { + if (!Array.isArray(stixIds) || stixIds.length === 0) { + return new Map(); + } + + try { + const documents = await this.model + .find({ 'stix.id': { $in: stixIds } }) + .sort('-stix.modified') + .select('-_id -__v -__t') + .lean() + .exec(); + + const byStixId = new Map(); + for (const doc of documents) { + const id = doc.stix.id; + let arr = byStixId.get(id); + if (!arr) { + arr = []; + byStixId.set(id, arr); + } + arr.push(doc); + } + return byStixId; + } catch (err) { + throw new DatabaseError(err); + } + } + async updateAndSave(document, data) { try { // TODO validate that document is valid mongoose object first diff --git a/app/services/meta-classes/base.service.js b/app/services/meta-classes/base.service.js index 93e86bf3..ee174b25 100644 --- a/app/services/meta-classes/base.service.js +++ b/app/services/meta-classes/base.service.js @@ -24,6 +24,7 @@ const { SelfRevocationError, } = require('../../exceptions'); const { getSchema } = require('../../lib/validation-schemas'); +const { deepFreezeStix } = require('../../lib/import-safety'); const ServiceWithHooks = require('./hooks.service'); const WorkflowResult = require('../../lib/workflow-result'); @@ -688,6 +689,73 @@ class BaseService extends ServiceWithHooks { * @private */ async _createFromImport(data, options) { + const { + data: composed, + warnings, + throwIfValidating, + } = await this.composeForImport(data, options); + + if (throwIfValidating) throw throwIfValidating; + + if (options.dryRun) { + return { ...composed, warnings }; + } + + // Import-fidelity contract: bundle stix must be persisted byte-faithful. + // Several `beforeCreate` hooks normally rewrite stix fields (e.g. + // AnalyticsService stamps stix.name from the ATT&CK ID; SoftwareService + // defaults stix.is_family). Those rewrites are intentional on user-driven + // POST/PUT flows but must NOT run during import. We freeze stix before + // invoking the hook so any forgotten `if (!options.import)` gate throws + // a TypeError at the violating line instead of silently corrupting the + // imported content. See app/lib/import-safety.js for the full rationale. + deepFreezeStix(composed); + await this.beforeCreate(composed, options); + + const createdDocument = await this.repository.save(composed); + + // Same contract applies to `afterCreate` and the listeners it triggers + // via emitted domain events — anything that reaches stix on this freshly + // saved document during import must crash, not silently mutate. + deepFreezeStix(createdDocument); + await this.afterCreate(createdDocument, options); + await this.emitCreatedEvent(createdDocument, options); + + const result = createdDocument.toObject ? createdDocument.toObject() : createdDocument; + result.warnings = warnings; + return result; + } + + /** + * Compose and validate an object for import — no I/O, no events. + * + * Stamps `workspace.attack_id` from the bundle's ATT&CK external reference, + * runs ADM validation (unless the object is revoked/deprecated), and + * applies fail-open semantics by attaching `workspace.validation` when + * errors are found and `options.validateContents` is not set. + * + * The result is a plain object ready to hand to `repository.save()` or + * `repository.saveMany()`. The bundle-import path uses this directly so it + * can batch persistence; the single-object import path wraps it in + * `_createFromImport` to keep lifecycle hooks and event emission. + * + * @param {Object} data - The request data ({ stix, workspace }) + * @param {Object} options - Options passed from create() + * @returns {Promise<{ + * data: Object, + * warnings: Array, + * throwIfValidating: ValidationError|null, + * validationErrors: Array<{message,path,code,input}> + * }>} + * - `validationErrors` is the full list of ADM errors that fired against + * this object (after bypass filtering). The bundle-import path uses it + * to surface per-object validation failures in + * `workspace.import_categories.errors` so an import response + * accurately reflects what was wrong — without that, fail-open mode + * silently buries the errors on each document and the import looks + * successful even when many objects are malformed. + */ + async composeForImport(data, options) { // Strip workspace.validation — server-controlled; the fail-open block // below is the only legitimate writer of this field on the import path. if (data.workspace) { @@ -714,40 +782,33 @@ class BaseService extends ServiceWithHooks { ({ errors, warnings } = await this.validateComposedObject(data)); } + let throwIfValidating = null; if (errors.length > 0) { if (options.validateContents) { - throw new ValidationError('ADM validation failed', { details: errors, warnings }); + throwIfValidating = new ValidationError('ADM validation failed', { + details: errors, + warnings, + }); + } else { + // Fail-open: store validation errors on the document + const { ATTACK_SPEC_VERSION } = require('@mitre-attack/attack-data-model'); + const admPkg = require('@mitre-attack/attack-data-model/package.json'); + + data.workspace = data.workspace || {}; + data.workspace.validation = { + errors: errors.map((e) => ({ message: e.message, path: e.path, code: e.code })), + attack_spec_version: ATTACK_SPEC_VERSION, + adm_version: admPkg.version, + validated_at: new Date(), + }; + + logger.warn( + `Import: ${data.stix.id} has ${errors.length} validation error(s), storing on document`, + ); } - - // Fail-open: store validation errors on the document - const { ATTACK_SPEC_VERSION } = require('@mitre-attack/attack-data-model'); - const admPkg = require('@mitre-attack/attack-data-model/package.json'); - - data.workspace = data.workspace || {}; - data.workspace.validation = { - errors: errors.map((e) => ({ message: e.message, path: e.path, code: e.code })), - attack_spec_version: ATTACK_SPEC_VERSION, - adm_version: admPkg.version, - validated_at: new Date(), - }; - - logger.warn( - `Import: ${data.stix.id} has ${errors.length} validation error(s), storing on document`, - ); } - if (options.dryRun) { - return { ...data, warnings }; - } - - await this.beforeCreate(data, options); - const createdDocument = await this.repository.save(data); - await this.afterCreate(createdDocument, options); - await this.emitCreatedEvent(createdDocument, options); - - const result = createdDocument.toObject ? createdDocument.toObject() : createdDocument; - result.warnings = warnings; - return result; + return { data, warnings, throwIfValidating, validationErrors: errors }; } /** diff --git a/app/services/stix/analytics-service.js b/app/services/stix/analytics-service.js index e87b9366..1f10fd6c 100644 --- a/app/services/stix/analytics-service.js +++ b/app/services/stix/analytics-service.js @@ -11,6 +11,7 @@ const { const EventBus = require('../../lib/event-bus'); const logger = require('../../lib/logger'); const Exceptions = require('../../exceptions'); +const { deepFreezeStix } = require('../../lib/import-safety'); /** * Service for managing analytics @@ -50,15 +51,21 @@ class AnalyticsService extends BaseService { /** * Handle analytics being referenced by a detection strategy - * Add inbound embedded_relationship and update external_references + * Add inbound embedded_relationship and (when not importing) refresh the + * ATT&CK external_references URL on each referenced analytic. * * @param {Object} payload - Event payload * @param {Object} payload.detectionStrategy - The detection strategy document that references the analytics * @param {string[]} payload.analyticIds - Array of analytic STIX IDs being referenced + * @param {Object} [payload.options] - The originating create-options forwarded from + * DetectionStrategiesService.afterCreate. Used here to honor the + * import-fidelity contract — see app/lib/import-safety.js. When + * `options.import` is true, the workspace metadata update still runs but + * the stix.external_references rewrite is skipped. * @returns {Promise} */ static async handleAnalyticsReferenced(payload) { - const { detectionStrategy, analyticIds } = payload; + const { detectionStrategy, analyticIds, options } = payload; logger.info( `Analytics Service heard event: 'x-mitre-detection-strategy::analytics-referenced' for ${detectionStrategy.stix.id}`, @@ -75,6 +82,13 @@ class AnalyticsService extends BaseService { continue; } + // Import-fidelity guard: when the triggering create came from a + // bundle import, freeze this analytic's stix so any forgotten + // import gate below crashes loudly with a TypeError instead of + // silently rewriting the persisted analytic's stix fields. + // Workspace mutations are unaffected. See app/lib/import-safety.js. + if (options?.import) deepFreezeStix(analytic); + // Initialize embedded_relationships if needed if (!analytic.workspace) { analytic.workspace = {}; @@ -102,23 +116,30 @@ class AnalyticsService extends BaseService { ); } - // Update external_references with URL to parent detection strategy - if (!analytic.stix.external_references) { - analytic.stix.external_references = []; - } - - // Remove existing ATT&CK external references - analytic.stix.external_references = removeAttackExternalReferences( - analytic.stix.external_references, - ); + // Refresh the analytic's ATT&CK external_references URL so it + // points at the parent detection strategy. This rewrites + // `stix.external_references` and must therefore be skipped on the + // import path; the bundle is the source of truth for stix content. + // The framework freeze above would throw here if this gate were + // missing — that's intentional. + if (!options?.import) { + if (!analytic.stix.external_references) { + analytic.stix.external_references = []; + } - // Create new ATT&CK external reference with URL - const attackRef = createAttackExternalReference(analytic.toObject()); - if (attackRef) { - analytic.stix.external_references.unshift(attackRef); - logger.info( - `AnalyticsService: Updated external_references URL for analytic ${analyticId}`, + // Remove existing ATT&CK external references + analytic.stix.external_references = removeAttackExternalReferences( + analytic.stix.external_references, ); + + // Create new ATT&CK external reference with URL + const attackRef = createAttackExternalReference(analytic.toObject()); + if (attackRef) { + analytic.stix.external_references.unshift(attackRef); + logger.info( + `AnalyticsService: Updated external_references URL for analytic ${analyticId}`, + ); + } } await analyticsRepository.saveDocument(analytic); @@ -215,12 +236,21 @@ class AnalyticsService extends BaseService { * @throws {Exceptions.NotFoundError} If a referenced data component does not exist * @returns {Promise} */ - // eslint-disable-next-line no-unused-vars async beforeCreate(data, options) { - // Analytic name matches its ATT&CK ID - const id = data.workspace.attack_id; - data.stix.name = id.replace(/^AN(\d+)$/, 'Analytic $1'); - logger.debug(`Setting name to match ATT&CK ID: ${data.stix.name}`); + // Import-fidelity contract: when a STIX bundle is being imported, the + // bundle's `stix` content must be persisted byte-faithful. Stamping + // `stix.name` from the ATT&CK ID is correct for user-driven POST flows, + // where the server is the authority on the analytic's display name, but + // incorrect for an import — the bundle already carries the name. The + // framework freezes `data.stix` during import-mode hooks (see + // app/lib/import-safety.js), so the assignment below would throw a + // TypeError without this gate. Workspace mutations further down still + // run unconditionally. + if (!options?.import) { + const id = data.workspace.attack_id; + data.stix.name = id.replace(/^AN(\d+)$/, 'Analytic $1'); + logger.debug(`Setting name to match ATT&CK ID: ${data.stix.name}`); + } // Initialize embedded_relationships if not present if (!data.workspace) { @@ -286,11 +316,13 @@ class AnalyticsService extends BaseService { * Emits domain event to notify DataComponentsService that data components were referenced * * @param {Object} createdDocument - The created analytic document - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options forwarded from BaseService. + * Threaded into the event payload so listeners can honor the + * import-fidelity contract (no stix mutations when `options.import`). + * See app/lib/import-safety.js. * @returns {Promise} */ - // eslint-disable-next-line no-unused-vars - async afterCreate(createdDocument, _options) { + async afterCreate(createdDocument, options) { // Extract data component IDs from x_mitre_log_source_references const dataComponentRefs = createdDocument.stix?.x_mitre_log_source_references?.map( @@ -307,6 +339,7 @@ class AnalyticsService extends BaseService { analyticId: createdDocument.stix.id, analytic: createdDocument.toObject ? createdDocument.toObject() : createdDocument, dataComponentIds: dataComponentRefs, + options, }); } } diff --git a/app/services/stix/campaigns-service.js b/app/services/stix/campaigns-service.js index 3817b12f..97d0915d 100644 --- a/app/services/stix/campaigns-service.js +++ b/app/services/stix/campaigns-service.js @@ -34,10 +34,17 @@ class CampaignService extends BaseService { * Ensures aliases[0] matches the object name * * @param {Object} data - The campaign object data - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options */ - // eslint-disable-next-line no-unused-vars - async beforeCreate(data, _options) { + async beforeCreate(data, options) { + // Import-fidelity contract: skip the alias normalization on the import + // path. The normalization rewrites `stix.aliases` (rearranging entries + // and prepending `stix.name`), which is correct for user-driven flows + // but deviates the persisted analytic from the bundle source-of-truth. + // `data.stix` is frozen during import-mode hooks (app/lib/import-safety.js), + // so a missing gate here would throw a TypeError at the assignment in + // `_normalizeAliases`. + if (options?.import) return; this._normalizeAliases(data); } diff --git a/app/services/stix/collection-bundles-service/import-bundle.js b/app/services/stix/collection-bundles-service/import-bundle.js index 02b55f5e..60924e63 100644 --- a/app/services/stix/collection-bundles-service/import-bundle.js +++ b/app/services/stix/collection-bundles-service/import-bundle.js @@ -11,11 +11,36 @@ const { defaultAttackSpecVersion, toEpoch, } = require('./bundle-helpers'); -const { DuplicateIdError } = require('../../../exceptions'); const logger = require('../../../lib/logger'); const config = require('../../../config/config'); const types = require('../../../lib/types'); +const { deepFreezeStix } = require('../../../lib/import-safety'); + +// Bounded concurrency for the compose-and-validate phase. Each task runs Zod +// validation and a small amount of synchronous work, so we cap concurrency +// to avoid pinning the event loop on extremely large bundles. +const COMPOSE_CONCURRENCY = 25; + +/** + * Run `task` against every item in `items` with at most `limit` in flight. + * Inline replacement for p-limit so we don't pull a new dependency (and + * avoid the ESM-only issue in recent p-limit versions). + */ +async function runWithConcurrency(items, limit, task) { + let next = 0; + async function worker() { + while (true) { + const i = next++; + if (i >= items.length) return; + await task(items[i], i); + } + } + const workerCount = Math.min(limit, items.length); + const workers = []; + for (let i = 0; i < workerCount; i++) workers.push(worker()); + await Promise.all(workers); +} const collectionsService = require('../collections-service'); const referencesService = require('../../system/references-service'); @@ -151,109 +176,324 @@ function checkIfAlias(importObject, sourceName) { } /** - * Process a single STIX object during bundle import - * @param {Object} importObject - The STIX object to process - * @param {Object} options - Import options + * Records an unknown-object-type error against the imported collection. + * @param {Object} importObject - The unknown STIX object * @param {Object} importedCollection - Collection being imported - * @param {Object} collectionReference - Reference to the collection - * @param {Map} importReferences - Map of references being imported - * @param {Object} referenceImportResults - Tracking of reference import stats - * @returns {Promise} Resolves when object is processed */ -async function processStixObject( - importObject, - options, - importedCollection, - collectionReference, - importReferences, - referenceImportResults, -) { - const service = getServiceForType(importObject.type); +function recordUnknownTypeError(importObject, importedCollection) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.unknownObjectType, + error_message: `Unknown object type: ${importObject.type}`, + }; + logger.verbose( + `Import Bundle Error: Unknown object type. id=${importObject.id}, modified=${importObject.modified}, type=${importObject.type}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); +} - if (!service) { - if (importObject.type === types.Collection) { - return; // Skip x-mitre-collection objects +/** + * Process one tier of same-type STIX objects: contents-map check, spec-version + * gate, bulk pre-fetch of existing versions, parallel compose-and-validate, + * then a single bulk insert. + * + * Tier-based grouping is sound because `sortObjectsByDependencies` returns a + * stable sort that keeps every type together — and types appear in dependency + * order (data-source before data-component, etc.). Each tier persists fully + * before the next tier begins. + * + * @param {string} type - STIX type for this tier + * @param {Array} objects - STIX objects of this type + * @param {Object} ctx - Shared import context + */ +async function processTier(type, objects, ctx) { + const { + options, + importedCollection, + contentsMap, + collectionReference, + importReferences, + referenceImportResults, + } = ctx; + + // Filter the tier: drain contents-map, gate on ATT&CK spec version, and + // record per-object errors. The result is the set of objects eligible for + // compose-and-insert. + const eligible = []; + for (const importObject of objects) { + // The contents-map check verifies that every imported object is also + // listed in the collection's `x_mitre_contents`. Two types are exempt + // because the export side (stix-bundles-service) deliberately omits them + // from `x_mitre_contents`: + // - `x-mitre-collection`: the collection is the container; it doesn't + // list itself. + // - `marking-definition`: marking-defs are referenced by the + // collection via `object_marking_refs` instead. Treating their + // absence from x_mitre_contents as a warning produces a false + // positive on every well-formed bundle (one per marking-def). + if ( + !contentsMap.delete(makeKeyFromObject(importObject)) && + importObject.type !== types.Collection && + importObject.type !== types.MarkingDefinition + ) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.notInContents, + error_message: + 'Warning: Object in bundle but not in x_mitre_contents. Object will be saved in database.', + }; + logger.verbose( + `Import Bundle Warning: Object not in x_mitre_contents. id=${importObject.id}, modified=${importObject.modified}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); } - // Record error for unknown type but continue import - const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.unknownObjectType, - error_message: `Unknown object type: ${importObject.type}`, - }; - logger.verbose( - `Import Bundle Error: Unknown object type. id=${importObject.id}, modified=${importObject.modified}, type=${importObject.type}`, - ); - importedCollection.workspace.import_categories.errors.push(importError); + if (importObject.type !== 'marking-definition') { + const objectAttackSpecVersion = + importObject.x_mitre_attack_spec_version ?? defaultAttackSpecVersion; + if (semver.gt(objectAttackSpecVersion, config.app.attackSpecVersion)) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.attackSpecVersionViolation, + error_message: 'Error: Object x_mitre_attack_spec_version later than system.', + }; + logger.verbose( + `Import Bundle Error: Object's x_mitre_attack_spec_version later than system. id=${importObject.id}, modified=${importObject.modified}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); + + if ( + !options.forceImportParameters?.includes( + forceImportParameters.attackSpecVersionViolations, + ) + ) { + throw new Error(errors.attackSpecVersionViolation); + } + continue; + } + } + eligible.push(importObject); + } + + const service = getServiceForType(type); + + // Unknown / unsupported types: record per-object errors but continue the import. + // Collection objects (the bundle itself) are deliberately skipped. + if (!service) { + if (type === types.Collection) return; + for (const importObject of eligible) { + recordUnknownTypeError(importObject, importedCollection); + } return; } + // Pre-fetch every existing version of every stixId in this tier in ONE query. + // Replaces N calls to service.retrieveById from the old per-object loop. + let existingByStixId; try { - // Retrieve existing objects with same STIX ID - const objects = await service.retrieveById(importObject.id, { versions: 'all' }); + const ids = eligible.map((o) => o.id); + existingByStixId = await service.repository.retrieveAllByStixIds(ids); + } catch (err) { + logger.error(err); + for (const importObject of eligible) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.retrievalError, + }; + logger.verbose( + `Import Bundle Error: Unable to retrieve objects with matching STIX id. id=${importObject.id}, modified=${importObject.modified}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); + } + return; + } + + // Compose-and-validate in parallel with bounded concurrency. Each task runs + // the duplicate check, categorization, external-references collection, + // Zod validation via `composeForImport`, and the service's `beforeCreate` + // hook (which populates outbound `workspace.embedded_relationships` on the + // doc being saved). Composed docs are accumulated into a single array for + // bulk insert. + const composedToInsert = []; + const composeOptions = { + import: true, + validateContents: options.validateContents, + }; - // Check for duplicate object - const isDuplicate = checkForDuplicate(importObject, objects); - if (isDuplicate) { + await runWithConcurrency(eligible, COMPOSE_CONCURRENCY, async (importObject) => { + const existing = existingByStixId.get(importObject.id) || []; + + if (checkForDuplicate(importObject, existing)) { importedCollection.workspace.import_categories.duplicates.push(importObject.id); return; } - // Categorize the object (addition, change, etc) - categorizeObject(importObject, objects, importedCollection); - - // Process external references + categorizeObject(importObject, existing, importedCollection); processExternalReferences(importObject, importReferences, referenceImportResults); - // Save the object if not preview mode - if (!options.previewOnly) { - const newObject = { - workspace: { - collections: [collectionReference], - }, - stix: importObject, - }; + if (options.previewOnly) return; - try { - // TODO should we bypass validation for imports? - // or possibly fail open on validation errors where we record the validation error on the object but still allow the import to proceed? - // for validation errors, the object may need to be placed into a quarantined state where it is visible but read-only except through a PUT operation that allows updates to be made to fix the validation errors - await service.create(newObject, { - import: true, - validateContents: options.validateContents, + const stagingDoc = { + workspace: { + collections: [collectionReference], + }, + stix: importObject, + }; + + try { + const { + data: composed, + throwIfValidating, + validationErrors, + } = await service.composeForImport(stagingDoc, composeOptions); + + // Strict-mode validation failure (`validateContents=true`). Surface the + // full ADM error list, not just the wrapper message — without `details` + // the caller has no way to act on the failure other than re-running the + // import with logs at debug level. Drop the doc from the bulk insert. + if (throwIfValidating) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.validationError, + error_message: `${validationErrors.length} ADM validation error(s)`, + details: validationErrors.map((e) => ({ + message: e.message, + path: e.path, + code: e.code, + })), + }; + logger.verbose( + `Import Bundle Error: Validation failed. id=${importObject.id}, ${throwIfValidating.message}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); + return; + } + + // Fail-open validation failures (`validateContents=false`, the default). + // The object IS persisted with the error list attached to its own + // `workspace.validation`, but a clean import response would otherwise + // give the caller no signal that anything was wrong. We mirror the + // per-object errors into `import_categories.errors` so the response + // surfaces them up front. One taxonomy entry per object regardless of + // how many issues that object had — the full per-issue list lives in + // `details` so the caller can drill down without querying each doc. + if (validationErrors.length > 0) { + const firstFew = validationErrors + .slice(0, 3) + .map((e) => e.message) + .join('; '); + const summary = + validationErrors.length > 3 + ? `${firstFew}; ...and ${validationErrors.length - 3} more` + : firstFew; + importedCollection.workspace.import_categories.errors.push({ + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.validationError, + error_message: `${validationErrors.length} ADM validation error(s): ${summary}`, + details: validationErrors.map((e) => ({ + message: e.message, + path: e.path, + code: e.code, + })), }); - } catch (err) { - if (err.message === service.errors?.duplicateId || err instanceof DuplicateIdError) { - throw err; - } - // Record save error but continue import + } + + // Run the service's beforeCreate hook so outbound embedded_relationships + // and any other pre-persist data shaping are present on the doc when + // saveMany writes it. Failures here are recorded as save errors and the + // doc is dropped from the bulk insert. + // + // Import-fidelity guard: freeze stix before invoking the hook so any + // forgotten `if (!options.import)` gate inside the service crashes + // loudly with a TypeError instead of silently mutating bundle content. + // See app/lib/import-safety.js for the full contract. + deepFreezeStix(composed); + try { + await service.beforeCreate(composed, composeOptions); + } catch (hookErr) { const importError = { object_ref: importObject.id, object_modified: importObject.modified, error_type: importErrors.saveError, - error_message: err.message, + error_message: hookErr.message, }; logger.verbose( - `Import Bundle Error: Unable to save object. id=${importObject.id}, modified=${importObject.modified}, ${err.message}`, + `Import Bundle Error: beforeCreate hook failed. id=${importObject.id}, ${hookErr.message}`, ); importedCollection.workspace.import_categories.errors.push(importError); + return; } + + composedToInsert.push(composed); + } catch (err) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.saveError, + error_message: err.message, + }; + logger.verbose( + `Import Bundle Error: Unable to compose object. id=${importObject.id}, modified=${importObject.modified}, ${err.message}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); } - } catch (err) { - logger.error(err); + }); + + if (composedToInsert.length === 0) return; - // Record retrieval error but continue import + // Bulk insert. `saveMany` uses MongoDB `insertMany` with `ordered:false`, + // so individual document failures (e.g., duplicate-id races) are returned + // per-doc and folded into the import errors below — they don't abort the + // remaining inserts. + const { inserted, errors: insertErrors } = await service.repository.saveMany(composedToInsert); + for (const wErr of insertErrors) { + const failedDoc = typeof wErr.index === 'number' ? composedToInsert[wErr.index] : undefined; const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.retrievalError, + object_ref: failedDoc?.stix?.id, + object_modified: failedDoc?.stix?.modified, + error_type: importErrors.saveError, + error_message: wErr.message, }; logger.verbose( - `Import Bundle Error: Unable to retrieve objects with matching STIX id. id=${importObject.id}, modified=${importObject.modified}`, + `Import Bundle Error: Unable to save object. id=${importError.object_ref}, modified=${importError.object_modified}, ${wErr.message}`, ); importedCollection.workspace.import_categories.errors.push(importError); } + + // Post-insert lifecycle: run `afterCreate` and emit the `{type}::created` + // event for each successfully inserted doc. These fire cross-service domain + // events that maintain INBOUND `workspace.embedded_relationships` on + // referenced documents (e.g., DetectionStrategy → Analytic, Analytic → + // DataComponent, DataComponent → DataSource). Skipping them would leave + // the frontend unable to navigate inbound relationships. + // + // Run in parallel with bounded concurrency; per-doc hook failures are + // logged but never abort the import. + await runWithConcurrency(inserted, COMPOSE_CONCURRENCY, async (doc) => { + // Import-fidelity guard for the post-insert lifecycle. afterCreate and + // the listeners that subscribe to the emitted `{type}::created` event + // are allowed to populate workspace metadata on referenced documents + // but must not deviate this freshly saved document's stix from the + // bundle. Freezing forces violations to crash here rather than + // silently corrupting the imported content. See app/lib/import-safety.js. + deepFreezeStix(doc); + try { + await service.afterCreate(doc, composeOptions); + } catch (err) { + logger.warn(`Import Bundle: afterCreate failed for ${doc?.stix?.id}: ${err.message}`); + } + try { + await service.emitCreatedEvent(doc, composeOptions); + } catch (err) { + logger.warn(`Import Bundle: emitCreatedEvent failed for ${doc?.stix?.id}: ${err.message}`); + } + }); } /** @@ -296,7 +536,14 @@ function sortObjectsByDependencies(objects) { } /** - * Process all objects in the bundle + * Process all objects in the bundle, batched by STIX type in dependency order. + * + * Each type tier runs sequentially (so e.g. data-sources finish before + * data-components start), but objects within a tier are composed in parallel + * and persisted with a single bulk `insertMany`. This replaces the previous + * per-object sequential loop that did a DB read + DB write + lifecycle hooks + * + event emission per imported object — the dominant cost in large bundles. + * * @param {Array} objects - Array of STIX objects to process * @param {Object} options - Import options * @param {Object} importedCollection - Collection being imported @@ -314,62 +561,32 @@ async function processObjects( importReferences, referenceImportResults, ) { - // Sort objects by dependencies before processing const sortedObjects = sortObjectsByDependencies(objects); - for (const importObject of sortedObjects) { - // Check if object is in x_mitre_contents - if ( - !contentsMap.delete(makeKeyFromObject(importObject)) && - importObject.type !== types.Collection - ) { - const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.notInContents, - error_message: - 'Warning: Object in bundle but not in x_mitre_contents. Object will be saved in database.', - }; - logger.verbose( - `Import Bundle Warning: Object not in x_mitre_contents. id=${importObject.id}, modified=${importObject.modified}`, - ); - importedCollection.workspace.import_categories.errors.push(importError); + // Group consecutive same-type objects into tiers. The sort above places + // every type contiguously and in dependency order, so a single pass over + // the sorted list is enough. + const tiers = []; + let currentTier = null; + for (const obj of sortedObjects) { + if (!currentTier || currentTier.type !== obj.type) { + currentTier = { type: obj.type, objects: [] }; + tiers.push(currentTier); } + currentTier.objects.push(obj); + } - if (importObject.type != 'marking-definition') { - // Check ATT&CK Spec Version compatibility - const objectAttackSpecVersion = - importObject.x_mitre_attack_spec_version ?? defaultAttackSpecVersion; - if (semver.gt(objectAttackSpecVersion, config.app.attackSpecVersion)) { - const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.attackSpecVersionViolation, - error_message: 'Error: Object x_mitre_attack_spec_version later than system.', - }; - logger.verbose( - `Import Bundle Error: Object's x_mitre_attack_spec_version later than system. id=${importObject.id}, modified=${importObject.modified}`, - ); - importedCollection.workspace.import_categories.errors.push(importError); + const ctx = { + options, + importedCollection, + contentsMap, + collectionReference, + importReferences, + referenceImportResults, + }; - if ( - !options.forceImportParameters?.includes( - forceImportParameters.attackSpecVersionViolations, - ) - ) { - throw new Error(errors.attackSpecVersionViolation); - } - continue; - } - } - await processStixObject( - importObject, - options, - importedCollection, - collectionReference, - importReferences, - referenceImportResults, - ); + for (const tier of tiers) { + await processTier(tier.type, tier.objects, ctx); } // Check for objects in x_mitre_contents but not in bundle diff --git a/app/services/stix/data-components-service.js b/app/services/stix/data-components-service.js index d909399f..7d2a454a 100644 --- a/app/services/stix/data-components-service.js +++ b/app/services/stix/data-components-service.js @@ -239,11 +239,13 @@ class DataComponentsService extends BaseService { * This handles both first-time creation and new version creation (versioning) * * @param {Object} createdDocument - The created data component document - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options forwarded from BaseService. + * Threaded into the event payload so listeners can honor the + * import-fidelity contract (no stix mutations when `options.import`). + * See app/lib/import-safety.js. * @returns {Promise} */ - // eslint-disable-next-line no-unused-vars - async afterCreate(createdDocument, _options) { + async afterCreate(createdDocument, options) { const addedRef = this._addedDataSourceRef; const removedRef = this._removedDataSourceRef; @@ -258,6 +260,7 @@ class DataComponentsService extends BaseService { dataComponentId: createdDocument.stix.id, dataComponent: createdDocument.toObject ? createdDocument.toObject() : createdDocument, dataSourceId: addedRef, + options, }); } @@ -271,6 +274,7 @@ class DataComponentsService extends BaseService { await EventBus.emit('x-mitre-data-component::data-source-removed', { dataComponentId: createdDocument.stix.id, dataSourceId: removedRef, + options, }); } @@ -287,6 +291,7 @@ class DataComponentsService extends BaseService { dataComponentId: createdDocument.stix.id, dataComponent: createdDocument.toObject ? createdDocument.toObject() : createdDocument, dataSourceId: currentDataSourceRef, + options, }); } diff --git a/app/services/stix/detection-strategies-service.js b/app/services/stix/detection-strategies-service.js index b3715eeb..d8ddb92f 100644 --- a/app/services/stix/detection-strategies-service.js +++ b/app/services/stix/detection-strategies-service.js @@ -114,8 +114,14 @@ class DetectionStrategiesService extends BaseService { * Handle post-creation logic * Emit domain events to notify AnalyticsService about referenced/removed analytics * This handles both first-time creation and new version creation (versioning) + * + * @param {Object} document - The persisted detection strategy + * @param {Object} [options] - Create options forwarded from BaseService. + * Threaded into the event payload so listeners can honor the + * import-fidelity contract (no stix mutations when `options.import`). + * See app/lib/import-safety.js for the contract. */ - async afterCreate(document) { + async afterCreate(document, options) { const addedRefs = this._addedAnalyticRefs || []; const removedRefs = this._removedAnalyticRefs || []; @@ -130,6 +136,7 @@ class DetectionStrategiesService extends BaseService { detectionStrategyId: document.stix.id, detectionStrategy: document.toObject ? document.toObject() : document, analyticIds: addedRefs, + options, }); } @@ -143,6 +150,7 @@ class DetectionStrategiesService extends BaseService { await EventBus.emit('x-mitre-detection-strategy::analytics-removed', { detectionStrategyId: document.stix.id, analyticIds: removedRefs, + options, }); } @@ -159,6 +167,7 @@ class DetectionStrategiesService extends BaseService { detectionStrategyId: document.stix.id, detectionStrategy: document.toObject ? document.toObject() : document, analyticIds: currentAnalyticRefs, + options, }); } diff --git a/app/services/stix/groups-service.js b/app/services/stix/groups-service.js index b182470d..552444d8 100644 --- a/app/services/stix/groups-service.js +++ b/app/services/stix/groups-service.js @@ -34,10 +34,16 @@ class GroupsService extends BaseService { * Ensures aliases[0] matches the object name * * @param {Object} data - The group object data - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options */ - // eslint-disable-next-line no-unused-vars - async beforeCreate(data, _options) { + async beforeCreate(data, options) { + // Import-fidelity contract: skip the alias normalization on the import + // path. The normalization rewrites `stix.aliases`, which is correct for + // user-driven flows but deviates the persisted group from the bundle + // source-of-truth. `data.stix` is frozen during import-mode hooks + // (app/lib/import-safety.js), so a missing gate here would throw a + // TypeError at the assignment in `_normalizeAliases`. + if (options?.import) return; this._normalizeAliases(data); } diff --git a/app/services/stix/software-service.js b/app/services/stix/software-service.js index 896d0d9b..fb7651f8 100644 --- a/app/services/stix/software-service.js +++ b/app/services/stix/software-service.js @@ -40,10 +40,19 @@ class SoftwareService extends BaseService { * - Ensures x_mitre_aliases[0] matches the object name * * @param {Object} data - The software object data - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options */ - // eslint-disable-next-line no-unused-vars - async beforeCreate(data, _options) { + async beforeCreate(data, options) { + // Import-fidelity contract: defaulting `stix.is_family` and rewriting + // `stix.x_mitre_aliases` is correct for user-driven flows where the + // server is the authority on these fields, but incorrect on the import + // path — the bundle carries authoritative values (including a deliberate + // omission of `is_family` for malware that doesn't represent a family, + // which must NOT be defaulted to `true`). `data.stix` is frozen during + // import-mode hooks (app/lib/import-safety.js), so a missing gate would + // throw a TypeError at the first attempted stix write below. + if (options?.import) return; + // Set is_family default for malware if (data.stix && data.stix.type === MalwareType && typeof data.stix.is_family !== 'boolean') { data.stix.is_family = true; diff --git a/docs/developer/import-fidelity-contract.md b/docs/developer/import-fidelity-contract.md new file mode 100644 index 00000000..d087ba65 --- /dev/null +++ b/docs/developer/import-fidelity-contract.md @@ -0,0 +1,203 @@ +# Import-Fidelity Contract + +When a STIX bundle is imported, the persisted objects must be +**byte-faithful** to the bundle's `stix` content. Workbench may +populate its private `workspace` metadata on each document, but +must not deviate any field under `stix`. + +This document defines the contract, explains why it exists, and +tells you how to author hooks, event listeners, and any new code +that runs during a bundle import. + +For the broader bundle-import pipeline, see +[`stix-bundle-import-pipeline.md`](./stix-bundle-import-pipeline.md). + +## Why the contract exists + +Workbench has a richer object model than raw STIX. Lifecycle hooks +and event listeners legitimately normalize and enrich STIX fields +on user-driven flows. A few examples that ship today: + +| Service | Hook / listener | Stix mutation | +|---|---|---| +| AnalyticsService | `beforeCreate` | Stamps `stix.name = "Analytic "` | +| AnalyticsService | `handleAnalyticsReferenced` listener | Rewrites `stix.external_references` to embed a URL to the parent detection strategy | +| CampaignsService | `beforeCreate` | Forces `stix.aliases[0]` to equal `stix.name` | +| GroupsService | `beforeCreate` | Same alias normalization as campaigns | +| SoftwareService | `beforeCreate` | Defaults `stix.is_family = true` for malware; normalizes `stix.x_mitre_aliases` | + +All five are correct on POST/PUT, where Workbench is the authority +on the object's display values. None of them are correct during an +import: the bundle is the source of truth for `stix.*`, and a +silent rewrite breaks round-trip fidelity and obscures the +provenance of the imported content. + +The framework therefore enforces a hard rule: + +> **During an import (`options.import === true`), `stix.*` is +> read-only. Workspace fields are still mutable.** + +## How the contract is enforced + +[`app/lib/import-safety.js`](../../app/lib/import-safety.js) exports +`deepFreezeStix(doc)`. It calls `Object.freeze` on `doc.stix` and on +the immediate children (nested objects, nested arrays, and the +array elements). In Node strict mode (`'use strict'` at the top of +every service file), an attempted write to any frozen property +throws `TypeError` immediately, pointing at the violating line. + +The framework calls `deepFreezeStix` at every point where untrusted +code (a hook, a listener) is about to run during an import: + +| Location | When | +|---|---| +| `BaseService._createFromImport` | Before `beforeCreate(composed, options)` and before `afterCreate(doc, options)` / `emitCreatedEvent(doc, options)`. | +| `collection-bundles-service/import-bundle.js` (compose worker) | Before each call to `service.beforeCreate(composed, composeOptions)`. | +| `collection-bundles-service/import-bundle.js` (post-insert worker) | Before each call to `service.afterCreate(doc, composeOptions)` and `service.emitCreatedEvent(doc, composeOptions)`. | + +Listeners that fetch a related document and mutate it then call +`deepFreezeStix(fetchedDoc)` themselves on entry when their +incoming payload indicates an import is in progress. The pattern +is one line: + +```js +if (payload.options?.import) deepFreezeStix(fetchedDoc); +``` + +The freeze is invisible to non-import paths: `deepFreezeStix` is +only called when the framework or a listener has confirmed +`options.import === true`. + +### Why deep, not shallow + +`Object.freeze` is shallow. Common stix mutations target nested +structures: + +- `analytic.stix.external_references.unshift(...)` (rewrites an array) +- `analytic.stix.external_references[0].external_id = '...'` (mutates an array element) + +A shallow freeze would let both succeed silently. `deepFreezeStix` +walks one level into objects and arrays (including array elements) +to cover the cases STIX content actually exhibits. Reads remain +unaffected at any depth. + +### Why freeze instead of clone-and-restore + +A snapshot-and-restore approach (clone `stix` before each hook, +restore after) would also enforce fidelity, but it requires the +framework to know which side effects to undo and risks leaving +half-written state when a hook mutates nested objects. A freeze +fails closed at the violating line, points the developer at +exactly the code that needs a gate, and adds zero runtime +overhead after the freeze is applied. + +## Author rules — writing hooks and listeners + +The contract translates into one rule for hook authors: + +> Workspace mutations are always allowed. Wrap any `stix.*` +> mutation in `if (!options?.import) { ... }`. + +A correctly-shaped `beforeCreate` looks like this: + +```js +async beforeCreate(data, options) { + // Workspace mutations — always allowed. + data.workspace = data.workspace || {}; + data.workspace.embedded_relationships = buildOutboundRels(data); + + // STIX mutations — gated. The framework freezes data.stix + // during import, so a missing gate throws a TypeError pointing + // at the line below on the first import test. + if (!options?.import) { + data.stix.name = deriveNameFromAttackId(data.workspace.attack_id); + } +} +``` + +And a correctly-shaped listener: + +```js +static async handleAnalyticsReferenced(payload) { + const { detectionStrategy, analyticIds, options } = payload; + + for (const analyticId of analyticIds) { + const analytic = await analyticsRepository.retrieveLatestByStixId(analyticId); + if (!analytic) continue; + + // Import-fidelity guard. The framework freezes the doc the + // emitter saw, but listeners fetch their own related docs — + // so each listener takes responsibility for freezing what + // it fetched. + if (options?.import) deepFreezeStix(analytic); + + // Workspace mutations — always allowed. + addInboundEmbeddedRelationship(analytic, detectionStrategy); + + // STIX mutations — gated, just like in beforeCreate. + if (!options?.import) { + refreshExternalReferencesUrl(analytic, detectionStrategy); + } + + await analyticsRepository.saveDocument(analytic); + } +} +``` + +## Forwarding `options` to listeners + +Listeners can only honor the contract if the originating service +forwards its create-options into the emitted event payload. The +three afterCreate emit sites that drive metadata cascades all do +this: + +- `DetectionStrategiesService.afterCreate(document, options)` + passes `options` into every `'x-mitre-detection-strategy::*'` + emit. +- `AnalyticsService.afterCreate(createdDocument, options)` + passes `options` into `'x-mitre-analytic::data-components-referenced'`. +- `DataComponentsService.afterCreate(createdDocument, options)` + passes `options` into `'x-mitre-data-component::data-source-*'`. + +If you add a new domain event that may fire during import, do the +same — include `options` in the payload. + +## Adding a new hook or listener + +Checklist for an author adding code that runs during a bundle +import: + +1. **Default to workspace.** If your work can be expressed as + workspace metadata (an embedded relationship, a derived index, + a denormalized cache), keep it under `workspace.*` — no gate + needed. + +2. **Gate stix writes.** If you genuinely need to mutate + `stix.*`, wrap the block in `if (!options?.import) { ... }`. + Forgetting the gate will not silently break things: the next + import test will crash with a TypeError pointing at your line. + +3. **Listeners freeze what they fetch.** If your listener fetches + a related document via a repository and may write to its + `stix.*`, add `if (options?.import) deepFreezeStix(fetched);` + at the top of the per-document block. + +4. **Emit `options` in event payloads.** If your service emits a + domain event from `afterCreate` / `afterUpdate`, include + `options` in the payload so downstream listeners can see when + an import is in progress. + +5. **Test it.** Round-trip a bundle through import — export, hash + the persisted `stix` content of a sample of objects, compare + to the bundle. If you forgot a gate, you'll have crashed on + the import attempt before you ever reach the comparison. + +## Files + +| Path | Role | +|---|---| +| [`app/lib/import-safety.js`](../../app/lib/import-safety.js) | `deepFreezeStix` helper and contract documentation. | +| [`app/services/meta-classes/base.service.js`](../../app/services/meta-classes/base.service.js) | Framework-level freeze in `_createFromImport`. | +| [`app/services/stix/collection-bundles-service/import-bundle.js`](../../app/services/stix/collection-bundles-service/import-bundle.js) | Framework-level freeze in the bulk pipeline. | +| [`app/services/stix/analytics-service.js`](../../app/services/stix/analytics-service.js) | Example of both forms of gate (`beforeCreate` and listener). | +| [`app/services/stix/detection-strategies-service.js`](../../app/services/stix/detection-strategies-service.js) | Example of forwarding `options` into event payloads. | diff --git a/docs/developer/stix-bundle-import-pipeline.md b/docs/developer/stix-bundle-import-pipeline.md new file mode 100644 index 00000000..5dbf77fe --- /dev/null +++ b/docs/developer/stix-bundle-import-pipeline.md @@ -0,0 +1,234 @@ +# STIX Bundle Import Pipeline + +This document describes the internal pipeline that runs when a +client POSTs to `/api/collection-bundles`. For user-facing +documentation of the endpoint's behavior and response shape, see +[`docs/user/stix-bundle-import.md`](../user/stix-bundle-import.md). + +For the rules that govern hook and listener behavior during import +(why bundle `stix` content stays byte-faithful through the +pipeline), see [`import-fidelity-contract.md`](./import-fidelity-contract.md). + +## Entry points + +``` +HTTP request + → app/controllers/collection-bundles-controller.js:importBundle + → app/services/stix/collection-bundles-service/index.js + → app/services/stix/collection-bundles-service/import-bundle.js (this pipeline) +``` + +The controller reads query parameters (`previewOnly`, +`validateContents`, `forceImport`) into an `options` object and +hands the entire bundle and options to `importBundle`. + +## Pipeline stages + +``` +┌────────────────────────────────────────────────────────────────┐ +│ 1. Initialize per-import state │ +│ - importedCollection skeleton (workspace.import_categories) │ +│ - contentsMap from collection.x_mitre_contents │ +│ - referenceMap │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 2. Check for duplicate collection │ +│ - Same stixId + same modified → existing collection │ +│ - forceImport=duplicate-collection: warn, attach a reimport │ +│ - Otherwise: throw, abort │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 3. processObjects │ +│ - Sort objects by dependency order │ +│ - Group consecutive same-type objects into TIERS │ +│ - For each tier sequentially: processTier(type, objects) │ +│ - Then: report contents-map orphans as "Missing object" │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 4. importReferences (sequential) │ +│ - Insert or update each unique external_reference │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 5. saveCollection │ +│ - Persist x-mitre-collection itself (or append reimport) │ +└────────────────────────────────────────────────────────────────┘ +``` + +## Dependency-ordered tiers + +`sortObjectsByDependencies` returns the bundle's objects in this +order (lower numbers persist first): + +| Tier | STIX type | Rationale | +|---|---|---| +| 0 | `marking-definition` | No outbound refs to other types | +| 1 | `identity` | No outbound refs to other types | +| 2 | `x-mitre-data-source` | Data components reference these | +| 3 | `x-mitre-data-component` | Analytics reference these | +| 4 | `x-mitre-analytic` | Detection strategies reference these | +| 5 | `x-mitre-detection-strategy` | (depends on analytics) | +| 6 | `attack-pattern` (techniques) | SDOs in general | +| 7 | `x-mitre-tactic` | | +| 8 | `course-of-action` (mitigations) | | +| 9 | `intrusion-set` (groups) | | +| 10 | `campaign` | | +| 11 | `malware` | | +| 12 | `tool` | | +| 13 | `x-mitre-asset` | | +| 14 | `x-mitre-matrix` | | +| 15 | `relationship` | SROs last so their endpoints exist | +| 16 | `note` | | +| 17 | `x-mitre-collection` | The bundle's own collection (skipped here; persisted separately) | + +Sort is stable, so within a tier order matches the bundle's order. + +## `processTier` — what runs inside one tier + +For each tier (objects of a single STIX type, in dependency order): + +``` +┌────────────────────────────────────────────────────────────────┐ +│ A. Synchronous eligibility filter (single pass over tier) │ +│ - contents-map drain (warn on bundle-object-not-in-contents)│ +│ - ATT&CK spec-version gate (throws or skips per forceImport)│ +│ → eligible[] │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ B. Bulk pre-fetch existing versions │ +│ repository.retrieveAllByStixIds(eligible.map(o => o.id)) │ +│ → Map> │ +│ One DB query for the entire tier, replacing N retrieveById │ +│ calls from the legacy per-object loop. │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ C. Parallel compose & validate (bounded concurrency, cap 25) │ +│ For each eligible object: │ +│ - checkForDuplicate vs pre-fetched versions │ +│ - categorizeObject (additions/changes/etc.) │ +│ - processExternalReferences │ +│ - service.composeForImport (Zod + workspace.attack_id + │ +│ fail-open workspace.validation) │ +│ - deepFreezeStix(composed) — import-fidelity guard │ +│ - service.beforeCreate(composed, options) │ +│ - record any validation errors into │ +│ importedCollection.workspace.import_categories.errors │ +│ - push composed doc into composedToInsert[] │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ D. Bulk insert (one MongoDB insertMany per tier) │ +│ repository.saveMany(composedToInsert) │ +│ → { inserted: [...], errors: [...writeErrors] } │ +│ writeErrors (ordered:false) → import_categories.errors │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ E. Parallel post-insert lifecycle (bounded concurrency, cap 25)│ +│ For each inserted doc: │ +│ - deepFreezeStix(doc) — import-fidelity guard │ +│ - service.afterCreate(doc, options) │ +│ - service.emitCreatedEvent(doc, options) │ +│ afterCreate emits domain events (e.g. │ +│ 'x-mitre-detection-strategy::analytics-referenced') │ +│ that drive INBOUND workspace.embedded_relationships │ +│ population on referenced docs in earlier-finished tiers. │ +└────────────────────────────────────────────────────────────────┘ +``` + +The order of tiers matters because the post-insert listener +cascade in stage E may modify documents from earlier tiers +(e.g. analytics that were persisted in tier 4 receive inbound +embedded_relationships when detection strategies in tier 5 are +processed). + +## Concurrency primitives + +`runWithConcurrency(items, limit, task)` in `import-bundle.js` is a +small worker-pool helper used in stages C and E. It pulls from a +shared index so each worker fetches the next available item rather +than partitioning ahead of time, which keeps utilization high even +when per-item cost varies (a worker that finishes a cheap doc +immediately picks up the next one). + +We do not pull in `p-limit`: it is not a direct dependency of this +project, and recent versions are ESM-only, which doesn't fit a +CommonJS codebase. The helper is small enough to keep inline. + +## Bulk persistence primitives + +Both new repository methods live on `_base.repository.js` and are +inherited by every concrete repository: + +- **`retrieveAllByStixIds(stixIds)`** — single `find({ 'stix.id': + { $in: ids } })` followed by an in-memory bucket by stixId. + Returns `Map>` with versions sorted + newest-first (matching `retrieveAllById`'s order). + +- **`saveMany(dataArr, { ordered })`** — wraps + `Model.insertMany(dataArr, { ordered: false })`. Returns + `{ inserted, errors }` where `errors` is a normalized + `{ index, message, code }` per failed document. `ordered: false` + ensures one bad document does not abort the rest of the tier. + +Both are only invoked from the bundle-import path. The +single-object create/update paths continue to use +`repository.save(data)` and `repository.retrieveLatestByStixId`. + +## Error model + +The pipeline never throws for per-object failures (except the +ATT&CK spec-version violation without forceImport, which is +considered fatal). Every recoverable error is appended to +`importedCollection.workspace.import_categories.errors` with a +typed `error_type` and as much context as is available. See the +[user doc](../user/stix-bundle-import.md#error-types-in-import_categorieserrors) +for the full error-type taxonomy. + +ADM validation errors are recorded in **both** branches: + +- `validateContents=true` (strict): the doc is dropped from the + bulk insert; one entry with full `details` is written. +- `validateContents=false` (fail-open, default): the doc is still + persisted with `workspace.validation` attached; an entry with + full `details` is **also** written so the import response + surfaces the failure up front. + +Both branches use `error_type: 'Validation error'` and include +the complete Zod issue list in the entry's `details` field. + +## Performance characteristics + +The pipeline scales primarily with two factors: + +1. **Number of objects in the bundle.** The MongoDB round-trips + are dominated by per-tier reads and writes, both of which are + O(1) queries per tier regardless of the number of objects in + it. The total round-trip count is ~`2 * number_of_tiers`. + +2. **Cost of the listener cascade.** Each `afterCreate` that emits + a domain event triggers a listener that fetches and updates the + referenced documents. This is currently O(refs) per source + object — if 10 analytics reference one data component, that + data component is fetched and saved 10 times. Consolidating + listener writes per target is a future optimization. + +On developer hardware, an Enterprise bundle import that previously +took 5+ minutes completes in 20-60 seconds depending on local +Mongo and CPU. + +## Files + +| Path | Role | +|---|---| +| [`app/services/stix/collection-bundles-service/import-bundle.js`](../../app/services/stix/collection-bundles-service/import-bundle.js) | The pipeline itself. `processObjects`, `processTier`, `runWithConcurrency`. | +| [`app/services/stix/collection-bundles-service/bundle-helpers.js`](../../app/services/stix/collection-bundles-service/bundle-helpers.js) | Constants for `importErrors`, `forceImportParameters`, `errors`. | +| [`app/services/meta-classes/base.service.js`](../../app/services/meta-classes/base.service.js) | `composeForImport` (validation + workspace fields) and `_createFromImport` (single-object path). | +| [`app/repository/_base.repository.js`](../../app/repository/_base.repository.js) | `retrieveAllByStixIds` and `saveMany`. | +| [`app/lib/validation-schemas.js`](../../app/lib/validation-schemas.js) | ADM schema selection with cached `.partial()` for WIP objects. | +| [`app/lib/import-safety.js`](../../app/lib/import-safety.js) | `deepFreezeStix` enforcement helper. | diff --git a/docs/user/stix-bundle-import.md b/docs/user/stix-bundle-import.md new file mode 100644 index 00000000..1d6e6f1c --- /dev/null +++ b/docs/user/stix-bundle-import.md @@ -0,0 +1,171 @@ +# Importing a STIX Bundle + +The ATT&CK Workbench REST API can ingest a STIX 2.1 bundle that wraps an +ATT&CK collection (`x-mitre-collection`). The endpoint persists every +object in the bundle, populates ATT&CK Workbench metadata on each one, +and returns a single response document summarizing what happened. + +Bundles are imported **as-is**: the `stix` content of every persisted +object matches what was in the bundle, byte-for-byte. Workbench adds +metadata in a separate `workspace` namespace but does not alter the +bundle's STIX fields. This guarantee holds even when Workbench's +hooks would otherwise rewrite fields like `stix.name`, +`stix.aliases`, or `stix.external_references` on a user-driven POST. + +## Usage + +``` +POST /api/collection-bundles +Content-Type: application/json +``` + +**Request body**: a STIX 2.1 bundle whose `objects` array contains +exactly one `x-mitre-collection` object and any number of +collection-member objects. + +### Query parameters + +| Parameter | Type | Default | Effect | +|---|---|---|---| +| `previewOnly` | boolean | `false` | Process the bundle and return the would-be import response without persisting anything. | +| `checkOnly` | boolean | `false` | Synonymous with `previewOnly` for backwards compatibility. | +| `validateContents` | boolean | `false` | When `true`, ADM validation is strict — see [Validation modes](#validation-modes). When `false` (default), validation runs but failures are recorded rather than rejected. | +| `forceImport` | repeated string | (none) | Allow import to proceed past specific blocking conditions. Supported values: `attack-spec-version-violations`, `duplicate-collection`. | + +## Validation modes + +Every object in the bundle is validated against the ATT&CK Data Model +(ADM) schemas during import — provided that ADM validation is enabled +in the deployment (`VALIDATE_WITH_ADM_SCHEMAS`, default `true`). +Objects marked `revoked: true` or `x_mitre_deprecated: true` skip +validation; everything else is checked. + +The behavior on validation failure depends on `validateContents`: + +### Default mode — `validateContents=false` (or unset) + +**Fail-open.** A failing object is still persisted, but two pieces of +state are written so the failure is visible: + +1. **On the document itself**, in `workspace.validation`: + + ```jsonc + "workspace": { + "validation": { + "errors": [ + { "message": "type is Invalid literal value", "path": ["type"], "code": "invalid_literal" } + ], + "attack_spec_version": "3.3.0", + "adm_version": "4.11.7", + "validated_at": "2026-05-14T12:00:00.000Z" + } + } + ``` + +2. **On the collection's import response**, in + `workspace.import_categories.errors`, one entry per failing + object: + + ```jsonc + { + "object_ref": "attack-pattern--1234...", + "object_modified": "2024-10-15T14:00:21.000Z", + "error_type": "Validation error", + "error_message": "3 ADM validation error(s): path.x is invalid_type; ...", + "details": [ + { "message": "x_mitre_platforms is Required", "path": ["x_mitre_platforms"], "code": "invalid_type" }, + { "message": "...", "path": ["..."], "code": "..." } + ] + } + ``` + + The `details` array preserves the full Zod issue list so callers + can act on the failure without fetching every object individually. + +Fail-open mode is the default because bundle import is the primary +way that legacy and version-skewed content enters the system; aborting +on every ADM mismatch would make migrations between ATT&CK versions +impossible. + +### Strict mode — `validateContents=true` + +A failing object is **not** persisted. The entry in +`import_categories.errors` is written exactly as above (with full +`details`), but the document is dropped from the bulk insert. Other +objects in the same bundle continue to be processed. The import as a +whole succeeds; only the failing objects are missing from the database. + +Use strict mode when you want the import to be a clean filter: only +objects that pass current ADM validation will be persisted, and the +import response tells you exactly which ones were rejected and why. + +## Reading the import response + +The response is the persisted `x-mitre-collection` document. Look at +`workspace.import_categories`: + +```jsonc +"workspace": { + "imported": "2026-05-14T12:00:00.000Z", + "import_categories": { + "additions": [ /* stixIds of new objects */ ], + "changes": [ /* stixIds where x_mitre_version increased */ ], + "minor_changes": [ /* stixIds where only modified changed */ ], + "revocations": [ /* stixIds newly revoked in this version */ ], + "deprecations": [ /* stixIds newly deprecated in this version */ ], + "supersedes_user_edits": [ ], + "supersedes_collection_changes": [ ], + "duplicates": [ /* stixIds whose modified matches an existing version */ ], + "out_of_date": [ /* stixIds where existing modified is newer */ ], + "errors": [ /* see below */ ] + }, + "import_references": { + "additions": [ /* source_names of newly inserted references */ ], + "changes": [ /* source_names of updated references */ ], + "duplicates": [ ] + } +} +``` + +### Error types in `import_categories.errors` + +| `error_type` | Meaning | +|---|---| +| `Validation error` | The object failed ADM schema validation. The `details` array contains every `{message, path, code}`. In fail-open mode the object is still persisted; in strict mode it is dropped. | +| `Save error` | A persistence failure (e.g. MongoDB duplicate-key race). | +| `Retrieval error` | The bulk pre-fetch for the tier failed; no object in that tier was processed. | +| `Not in contents` | The object exists in the bundle's `objects` array but is missing from the collection's `x_mitre_contents`. It is still persisted; this is a warning. | +| `Missing object` | The object is listed in `x_mitre_contents` but is missing from the bundle. | +| `Unknown object type` | The object's `type` is not one the server knows how to persist. | +| `ATT&CK Spec version violation` | The object's `x_mitre_attack_spec_version` is later than the server supports. Without `forceImport=attack-spec-version-violations`, the entire import aborts when this occurs. | +| `Duplicate collection object` | A second `x-mitre-collection` matching an already-persisted collection was found. Without `forceImport=duplicate-collection`, the import aborts. | + +## Re-importing the same bundle + +Re-importing a bundle whose collection (`x-mitre-collection`) already +exists at the same `modified` timestamp augments the existing +collection rather than creating a duplicate: the new import's +`import_categories` is appended to `workspace.reimports` and member +objects are upserted version-by-version. Members that match an +existing version exactly (same `modified`) appear in `duplicates`; +members whose `modified` is newer than what's stored appear in +`additions` / `changes` / `minor_changes` as appropriate. + +## Performance + +For very large bundles (the Enterprise ATT&CK bundle ships ~5,000 +objects), the import runs in tier-batched parallel passes — see +[`docs/developer/stix-bundle-import-pipeline.md`](../developer/stix-bundle-import-pipeline.md) +for the implementation detail. Typical wall-clock times on developer +hardware: + +| Bundle | Approximate import time | +|---|---| +| Mobile ATT&CK | < 5 seconds | +| ICS ATT&CK | < 5 seconds | +| Enterprise ATT&CK | 20-60 seconds (depending on hardware and Mongo configuration) | + +If the request seems hung past a minute, check the server logs for +`Import Bundle Error` entries — most often the cause is a deeper +issue (e.g. a Mongo connection problem) rather than continued +processing.