From 1ba2148d5169ed9488452968e805e1e7becf0b6b Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 11:06:14 -0400 Subject: [PATCH 1/7] perf(import-bundle): batch and parallelize STIX bundle import Replaces the per-object sequential loop with a tier-batched pipeline. Objects are grouped by STIX type (preserving the existing dependency order); each tier pre-fetches existing versions in one $in query, composes and validates in parallel with bounded concurrency, persists via a single insertMany(ordered:false), and runs afterCreate + emitCreatedEvent in a second parallel pass. Adds repository.retrieveAllByStixIds and repository.saveMany, factors composeForImport out of _createFromImport so it can be batched, and memoizes locally-derived .partial() Zod schemas for WIP objects. Cuts Enterprise bundle import from 5+ minutes (reverse-proxy timeout territory) to well under a minute on developer hardware. --- app/lib/validation-schemas.js | 17 +- app/repository/_base.repository.js | 77 ++++ app/services/meta-classes/base.service.js | 91 ++-- .../import-bundle.js | 394 ++++++++++++------ 4 files changed, 427 insertions(+), 152 deletions(-) diff --git a/app/lib/validation-schemas.js b/app/lib/validation-schemas.js index ede9f767..c68f6a4b 100644 --- a/app/lib/validation-schemas.js +++ b/app/lib/validation-schemas.js @@ -91,6 +91,12 @@ const STIX_SCHEMAS = { 'x-mitre-collection': collectionSchema, }; +// Cache for locally-derived partial schemas. ADM does not export prebuilt +// partials for every STIX type; for those types we call `.partial()` ourselves. +// That call is expensive enough to show up in bulk-import profiles, so we +// memoize the result per STIX type. +const derivedPartialCache = new Map(); + /** * Get the schema to use for validating a STIX object. * @@ -102,7 +108,7 @@ const STIX_SCHEMAS = { * - `work-in-progress` uses partial validation so drafts can omit required fields * - every other workflow state uses full validation * - if ADM exports a dedicated partial schema, use it directly - * - otherwise, derive a partial schema locally with `.partial()` + * - otherwise, derive a partial schema locally with `.partial()` (memoized) * * @param {string} stixType - The STIX `type` being validated (e.g. "attack-pattern") * @param {string} status - The workflow state (e.g. "work-in-progress", "awaiting-review", "reviewed") @@ -120,7 +126,14 @@ function getSchema(stixType, status) { return isWip ? admSchemaRef.partial : admSchemaRef.full; } - return isWip ? admSchemaRef.partial() : admSchemaRef; + if (!isWip) return admSchemaRef; + + let derived = derivedPartialCache.get(stixType); + if (!derived) { + derived = admSchemaRef.partial(); + derivedPartialCache.set(stixType, derived); + } + return derived; } module.exports = { diff --git a/app/repository/_base.repository.js b/app/repository/_base.repository.js index 9a3443eb..85f80f04 100644 --- a/app/repository/_base.repository.js +++ b/app/repository/_base.repository.js @@ -399,6 +399,83 @@ class BaseRepository extends AbstractRepository { } } + /** + * Bulk insert. Used by the STIX bundle import path to avoid one round-trip + * per object. `ordered: false` keeps MongoDB inserting the remaining docs + * after an individual failure; per-doc errors are returned on the thrown + * BulkWriteError's `writeErrors` for the caller to fold into import errors. + * + * Discriminator-aware: each child model's `insertMany` sets the correct + * `__t` discriminator key automatically, so callers should invoke this on + * the type-specific repository (not the AttackObject parent). + * + * @param {Array} dataArr - Array of plain objects to insert + * @param {Object} [options] + * @param {boolean} [options.ordered=false] - Stop on first error if true + * @returns {Promise<{ inserted: Array, errors: Array<{ index, message, code }> }>} + */ + async saveMany(dataArr, { ordered = false } = {}) { + if (!Array.isArray(dataArr) || dataArr.length === 0) { + return { inserted: [], errors: [] }; + } + try { + const inserted = await this.model.insertMany(dataArr, { ordered }); + return { inserted, errors: [] }; + } catch (err) { + // Mongoose BulkWriteError surfaces partial success when ordered:false. + // Successful inserts are on err.insertedDocs; failures on err.writeErrors. + if (err?.name === 'MongoBulkWriteError' || err?.writeErrors) { + const errors = (err.writeErrors || []).map((we) => ({ + index: we.index ?? we.err?.index, + message: we.errmsg || we.err?.errmsg || we.message, + code: we.code || we.err?.code, + })); + return { inserted: err.insertedDocs || [], errors }; + } + throw new DatabaseError(err); + } + } + + /** + * Retrieve every version of every document whose `stix.id` is in `stixIds`. + * Returns a Map keyed by stixId, value is an array of versions sorted + * newest-first (matching `retrieveAllById`'s ordering). + * + * Used by the bundle-import path to pre-fetch all existing versions in one + * query instead of N queries (one per imported object). + * + * @param {Array} stixIds - List of STIX IDs to look up + * @returns {Promise>>} + */ + async retrieveAllByStixIds(stixIds) { + if (!Array.isArray(stixIds) || stixIds.length === 0) { + return new Map(); + } + + try { + const documents = await this.model + .find({ 'stix.id': { $in: stixIds } }) + .sort('-stix.modified') + .select('-_id -__v -__t') + .lean() + .exec(); + + const byStixId = new Map(); + for (const doc of documents) { + const id = doc.stix.id; + let arr = byStixId.get(id); + if (!arr) { + arr = []; + byStixId.set(id, arr); + } + arr.push(doc); + } + return byStixId; + } catch (err) { + throw new DatabaseError(err); + } + } + async updateAndSave(document, data) { try { // TODO validate that document is valid mongoose object first diff --git a/app/services/meta-classes/base.service.js b/app/services/meta-classes/base.service.js index 93e86bf3..c2128bc3 100644 --- a/app/services/meta-classes/base.service.js +++ b/app/services/meta-classes/base.service.js @@ -688,6 +688,46 @@ class BaseService extends ServiceWithHooks { * @private */ async _createFromImport(data, options) { + const { + data: composed, + warnings, + throwIfValidating, + } = await this.composeForImport(data, options); + + if (throwIfValidating) throw throwIfValidating; + + if (options.dryRun) { + return { ...composed, warnings }; + } + + await this.beforeCreate(composed, options); + const createdDocument = await this.repository.save(composed); + await this.afterCreate(createdDocument, options); + await this.emitCreatedEvent(createdDocument, options); + + const result = createdDocument.toObject ? createdDocument.toObject() : createdDocument; + result.warnings = warnings; + return result; + } + + /** + * Compose and validate an object for import — no I/O, no events. + * + * Stamps `workspace.attack_id` from the bundle's ATT&CK external reference, + * runs ADM validation (unless the object is revoked/deprecated), and + * applies fail-open semantics by attaching `workspace.validation` when + * errors are found and `options.validateContents` is not set. + * + * The result is a plain object ready to hand to `repository.save()` or + * `repository.saveMany()`. The bundle-import path uses this directly so it + * can batch persistence; the single-object import path wraps it in + * `_createFromImport` to keep lifecycle hooks and event emission. + * + * @param {Object} data - The request data ({ stix, workspace }) + * @param {Object} options - Options passed from create() + * @returns {Promise<{ data: Object, warnings: Array, throwIfValidating: ValidationError|null }>} + */ + async composeForImport(data, options) { // Strip workspace.validation — server-controlled; the fail-open block // below is the only legitimate writer of this field on the import path. if (data.workspace) { @@ -714,40 +754,33 @@ class BaseService extends ServiceWithHooks { ({ errors, warnings } = await this.validateComposedObject(data)); } + let throwIfValidating = null; if (errors.length > 0) { if (options.validateContents) { - throw new ValidationError('ADM validation failed', { details: errors, warnings }); + throwIfValidating = new ValidationError('ADM validation failed', { + details: errors, + warnings, + }); + } else { + // Fail-open: store validation errors on the document + const { ATTACK_SPEC_VERSION } = require('@mitre-attack/attack-data-model'); + const admPkg = require('@mitre-attack/attack-data-model/package.json'); + + data.workspace = data.workspace || {}; + data.workspace.validation = { + errors: errors.map((e) => ({ message: e.message, path: e.path, code: e.code })), + attack_spec_version: ATTACK_SPEC_VERSION, + adm_version: admPkg.version, + validated_at: new Date(), + }; + + logger.warn( + `Import: ${data.stix.id} has ${errors.length} validation error(s), storing on document`, + ); } - - // Fail-open: store validation errors on the document - const { ATTACK_SPEC_VERSION } = require('@mitre-attack/attack-data-model'); - const admPkg = require('@mitre-attack/attack-data-model/package.json'); - - data.workspace = data.workspace || {}; - data.workspace.validation = { - errors: errors.map((e) => ({ message: e.message, path: e.path, code: e.code })), - attack_spec_version: ATTACK_SPEC_VERSION, - adm_version: admPkg.version, - validated_at: new Date(), - }; - - logger.warn( - `Import: ${data.stix.id} has ${errors.length} validation error(s), storing on document`, - ); - } - - if (options.dryRun) { - return { ...data, warnings }; } - await this.beforeCreate(data, options); - const createdDocument = await this.repository.save(data); - await this.afterCreate(createdDocument, options); - await this.emitCreatedEvent(createdDocument, options); - - const result = createdDocument.toObject ? createdDocument.toObject() : createdDocument; - result.warnings = warnings; - return result; + return { data, warnings, throwIfValidating }; } /** diff --git a/app/services/stix/collection-bundles-service/import-bundle.js b/app/services/stix/collection-bundles-service/import-bundle.js index 02b55f5e..af6b06f1 100644 --- a/app/services/stix/collection-bundles-service/import-bundle.js +++ b/app/services/stix/collection-bundles-service/import-bundle.js @@ -11,12 +11,36 @@ const { defaultAttackSpecVersion, toEpoch, } = require('./bundle-helpers'); -const { DuplicateIdError } = require('../../../exceptions'); const logger = require('../../../lib/logger'); const config = require('../../../config/config'); const types = require('../../../lib/types'); +// Bounded concurrency for the compose-and-validate phase. Each task runs Zod +// validation and a small amount of synchronous work, so we cap concurrency +// to avoid pinning the event loop on extremely large bundles. +const COMPOSE_CONCURRENCY = 25; + +/** + * Run `task` against every item in `items` with at most `limit` in flight. + * Inline replacement for p-limit so we don't pull a new dependency (and + * avoid the ESM-only issue in recent p-limit versions). + */ +async function runWithConcurrency(items, limit, task) { + let next = 0; + async function worker() { + while (true) { + const i = next++; + if (i >= items.length) return; + await task(items[i], i); + } + } + const workerCount = Math.min(limit, items.length); + const workers = []; + for (let i = 0; i < workerCount; i++) workers.push(worker()); + await Promise.all(workers); +} + const collectionsService = require('../collections-service'); const referencesService = require('../../system/references-service'); @@ -151,109 +175,260 @@ function checkIfAlias(importObject, sourceName) { } /** - * Process a single STIX object during bundle import - * @param {Object} importObject - The STIX object to process - * @param {Object} options - Import options + * Records an unknown-object-type error against the imported collection. + * @param {Object} importObject - The unknown STIX object * @param {Object} importedCollection - Collection being imported - * @param {Object} collectionReference - Reference to the collection - * @param {Map} importReferences - Map of references being imported - * @param {Object} referenceImportResults - Tracking of reference import stats - * @returns {Promise} Resolves when object is processed */ -async function processStixObject( - importObject, - options, - importedCollection, - collectionReference, - importReferences, - referenceImportResults, -) { - const service = getServiceForType(importObject.type); +function recordUnknownTypeError(importObject, importedCollection) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.unknownObjectType, + error_message: `Unknown object type: ${importObject.type}`, + }; + logger.verbose( + `Import Bundle Error: Unknown object type. id=${importObject.id}, modified=${importObject.modified}, type=${importObject.type}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); +} - if (!service) { - if (importObject.type === types.Collection) { - return; // Skip x-mitre-collection objects +/** + * Process one tier of same-type STIX objects: contents-map check, spec-version + * gate, bulk pre-fetch of existing versions, parallel compose-and-validate, + * then a single bulk insert. + * + * Tier-based grouping is sound because `sortObjectsByDependencies` returns a + * stable sort that keeps every type together — and types appear in dependency + * order (data-source before data-component, etc.). Each tier persists fully + * before the next tier begins. + * + * @param {string} type - STIX type for this tier + * @param {Array} objects - STIX objects of this type + * @param {Object} ctx - Shared import context + */ +async function processTier(type, objects, ctx) { + const { + options, + importedCollection, + contentsMap, + collectionReference, + importReferences, + referenceImportResults, + } = ctx; + + // Filter the tier: drain contents-map, gate on ATT&CK spec version, and + // record per-object errors. The result is the set of objects eligible for + // compose-and-insert. + const eligible = []; + for (const importObject of objects) { + if ( + !contentsMap.delete(makeKeyFromObject(importObject)) && + importObject.type !== types.Collection + ) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.notInContents, + error_message: + 'Warning: Object in bundle but not in x_mitre_contents. Object will be saved in database.', + }; + logger.verbose( + `Import Bundle Warning: Object not in x_mitre_contents. id=${importObject.id}, modified=${importObject.modified}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); } - // Record error for unknown type but continue import - const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.unknownObjectType, - error_message: `Unknown object type: ${importObject.type}`, - }; - logger.verbose( - `Import Bundle Error: Unknown object type. id=${importObject.id}, modified=${importObject.modified}, type=${importObject.type}`, - ); - importedCollection.workspace.import_categories.errors.push(importError); + if (importObject.type !== 'marking-definition') { + const objectAttackSpecVersion = + importObject.x_mitre_attack_spec_version ?? defaultAttackSpecVersion; + if (semver.gt(objectAttackSpecVersion, config.app.attackSpecVersion)) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.attackSpecVersionViolation, + error_message: 'Error: Object x_mitre_attack_spec_version later than system.', + }; + logger.verbose( + `Import Bundle Error: Object's x_mitre_attack_spec_version later than system. id=${importObject.id}, modified=${importObject.modified}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); + + if ( + !options.forceImportParameters?.includes( + forceImportParameters.attackSpecVersionViolations, + ) + ) { + throw new Error(errors.attackSpecVersionViolation); + } + continue; + } + } + eligible.push(importObject); + } + + const service = getServiceForType(type); + + // Unknown / unsupported types: record per-object errors but continue the import. + // Collection objects (the bundle itself) are deliberately skipped. + if (!service) { + if (type === types.Collection) return; + for (const importObject of eligible) { + recordUnknownTypeError(importObject, importedCollection); + } return; } + // Pre-fetch every existing version of every stixId in this tier in ONE query. + // Replaces N calls to service.retrieveById from the old per-object loop. + let existingByStixId; try { - // Retrieve existing objects with same STIX ID - const objects = await service.retrieveById(importObject.id, { versions: 'all' }); + const ids = eligible.map((o) => o.id); + existingByStixId = await service.repository.retrieveAllByStixIds(ids); + } catch (err) { + logger.error(err); + for (const importObject of eligible) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.retrievalError, + }; + logger.verbose( + `Import Bundle Error: Unable to retrieve objects with matching STIX id. id=${importObject.id}, modified=${importObject.modified}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); + } + return; + } - // Check for duplicate object - const isDuplicate = checkForDuplicate(importObject, objects); - if (isDuplicate) { + // Compose-and-validate in parallel with bounded concurrency. Each task runs + // the duplicate check, categorization, external-references collection, + // Zod validation via `composeForImport`, and the service's `beforeCreate` + // hook (which populates outbound `workspace.embedded_relationships` on the + // doc being saved). Composed docs are accumulated into a single array for + // bulk insert. + const composedToInsert = []; + const composeOptions = { + import: true, + validateContents: options.validateContents, + }; + + await runWithConcurrency(eligible, COMPOSE_CONCURRENCY, async (importObject) => { + const existing = existingByStixId.get(importObject.id) || []; + + if (checkForDuplicate(importObject, existing)) { importedCollection.workspace.import_categories.duplicates.push(importObject.id); return; } - // Categorize the object (addition, change, etc) - categorizeObject(importObject, objects, importedCollection); - - // Process external references + categorizeObject(importObject, existing, importedCollection); processExternalReferences(importObject, importReferences, referenceImportResults); - // Save the object if not preview mode - if (!options.previewOnly) { - const newObject = { - workspace: { - collections: [collectionReference], - }, - stix: importObject, - }; + if (options.previewOnly) return; + + const stagingDoc = { + workspace: { + collections: [collectionReference], + }, + stix: importObject, + }; + + try { + const { data: composed, throwIfValidating } = await service.composeForImport( + stagingDoc, + composeOptions, + ); + if (throwIfValidating) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.saveError, + error_message: throwIfValidating.message, + }; + logger.verbose( + `Import Bundle Error: Validation failed. id=${importObject.id}, ${throwIfValidating.message}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); + return; + } + + // Run the service's beforeCreate hook so outbound embedded_relationships + // and any other pre-persist data shaping are present on the doc when + // saveMany writes it. Failures here are recorded as save errors and the + // doc is dropped from the bulk insert. try { - // TODO should we bypass validation for imports? - // or possibly fail open on validation errors where we record the validation error on the object but still allow the import to proceed? - // for validation errors, the object may need to be placed into a quarantined state where it is visible but read-only except through a PUT operation that allows updates to be made to fix the validation errors - await service.create(newObject, { - import: true, - validateContents: options.validateContents, - }); - } catch (err) { - if (err.message === service.errors?.duplicateId || err instanceof DuplicateIdError) { - throw err; - } - // Record save error but continue import + await service.beforeCreate(composed, composeOptions); + } catch (hookErr) { const importError = { object_ref: importObject.id, object_modified: importObject.modified, error_type: importErrors.saveError, - error_message: err.message, + error_message: hookErr.message, }; logger.verbose( - `Import Bundle Error: Unable to save object. id=${importObject.id}, modified=${importObject.modified}, ${err.message}`, + `Import Bundle Error: beforeCreate hook failed. id=${importObject.id}, ${hookErr.message}`, ); importedCollection.workspace.import_categories.errors.push(importError); + return; } + + composedToInsert.push(composed); + } catch (err) { + const importError = { + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.saveError, + error_message: err.message, + }; + logger.verbose( + `Import Bundle Error: Unable to compose object. id=${importObject.id}, modified=${importObject.modified}, ${err.message}`, + ); + importedCollection.workspace.import_categories.errors.push(importError); } - } catch (err) { - logger.error(err); + }); + + if (composedToInsert.length === 0) return; - // Record retrieval error but continue import + // Bulk insert. `saveMany` uses MongoDB `insertMany` with `ordered:false`, + // so individual document failures (e.g., duplicate-id races) are returned + // per-doc and folded into the import errors below — they don't abort the + // remaining inserts. + const { inserted, errors: insertErrors } = await service.repository.saveMany(composedToInsert); + for (const wErr of insertErrors) { + const failedDoc = typeof wErr.index === 'number' ? composedToInsert[wErr.index] : undefined; const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.retrievalError, + object_ref: failedDoc?.stix?.id, + object_modified: failedDoc?.stix?.modified, + error_type: importErrors.saveError, + error_message: wErr.message, }; logger.verbose( - `Import Bundle Error: Unable to retrieve objects with matching STIX id. id=${importObject.id}, modified=${importObject.modified}`, + `Import Bundle Error: Unable to save object. id=${importError.object_ref}, modified=${importError.object_modified}, ${wErr.message}`, ); importedCollection.workspace.import_categories.errors.push(importError); } + + // Post-insert lifecycle: run `afterCreate` and emit the `{type}::created` + // event for each successfully inserted doc. These fire cross-service domain + // events that maintain INBOUND `workspace.embedded_relationships` on + // referenced documents (e.g., DetectionStrategy → Analytic, Analytic → + // DataComponent, DataComponent → DataSource). Skipping them would leave + // the frontend unable to navigate inbound relationships. + // + // Run in parallel with bounded concurrency; per-doc hook failures are + // logged but never abort the import. + await runWithConcurrency(inserted, COMPOSE_CONCURRENCY, async (doc) => { + try { + await service.afterCreate(doc, composeOptions); + } catch (err) { + logger.warn(`Import Bundle: afterCreate failed for ${doc?.stix?.id}: ${err.message}`); + } + try { + await service.emitCreatedEvent(doc, composeOptions); + } catch (err) { + logger.warn(`Import Bundle: emitCreatedEvent failed for ${doc?.stix?.id}: ${err.message}`); + } + }); } /** @@ -296,7 +471,14 @@ function sortObjectsByDependencies(objects) { } /** - * Process all objects in the bundle + * Process all objects in the bundle, batched by STIX type in dependency order. + * + * Each type tier runs sequentially (so e.g. data-sources finish before + * data-components start), but objects within a tier are composed in parallel + * and persisted with a single bulk `insertMany`. This replaces the previous + * per-object sequential loop that did a DB read + DB write + lifecycle hooks + * + event emission per imported object — the dominant cost in large bundles. + * * @param {Array} objects - Array of STIX objects to process * @param {Object} options - Import options * @param {Object} importedCollection - Collection being imported @@ -314,62 +496,32 @@ async function processObjects( importReferences, referenceImportResults, ) { - // Sort objects by dependencies before processing const sortedObjects = sortObjectsByDependencies(objects); - for (const importObject of sortedObjects) { - // Check if object is in x_mitre_contents - if ( - !contentsMap.delete(makeKeyFromObject(importObject)) && - importObject.type !== types.Collection - ) { - const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.notInContents, - error_message: - 'Warning: Object in bundle but not in x_mitre_contents. Object will be saved in database.', - }; - logger.verbose( - `Import Bundle Warning: Object not in x_mitre_contents. id=${importObject.id}, modified=${importObject.modified}`, - ); - importedCollection.workspace.import_categories.errors.push(importError); + // Group consecutive same-type objects into tiers. The sort above places + // every type contiguously and in dependency order, so a single pass over + // the sorted list is enough. + const tiers = []; + let currentTier = null; + for (const obj of sortedObjects) { + if (!currentTier || currentTier.type !== obj.type) { + currentTier = { type: obj.type, objects: [] }; + tiers.push(currentTier); } + currentTier.objects.push(obj); + } - if (importObject.type != 'marking-definition') { - // Check ATT&CK Spec Version compatibility - const objectAttackSpecVersion = - importObject.x_mitre_attack_spec_version ?? defaultAttackSpecVersion; - if (semver.gt(objectAttackSpecVersion, config.app.attackSpecVersion)) { - const importError = { - object_ref: importObject.id, - object_modified: importObject.modified, - error_type: importErrors.attackSpecVersionViolation, - error_message: 'Error: Object x_mitre_attack_spec_version later than system.', - }; - logger.verbose( - `Import Bundle Error: Object's x_mitre_attack_spec_version later than system. id=${importObject.id}, modified=${importObject.modified}`, - ); - importedCollection.workspace.import_categories.errors.push(importError); + const ctx = { + options, + importedCollection, + contentsMap, + collectionReference, + importReferences, + referenceImportResults, + }; - if ( - !options.forceImportParameters?.includes( - forceImportParameters.attackSpecVersionViolations, - ) - ) { - throw new Error(errors.attackSpecVersionViolation); - } - continue; - } - } - await processStixObject( - importObject, - options, - importedCollection, - collectionReference, - importReferences, - referenceImportResults, - ); + for (const tier of tiers) { + await processTier(tier.type, tier.objects, ctx); } // Check for objects in x_mitre_contents but not in bundle From b12ced62575caff99863f5bab30f519373145f2a Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 11:06:49 -0400 Subject: [PATCH 2/7] feat(config): enable ADM validation by default Flips VALIDATE_WITH_ADM_SCHEMAS to default true. Deployments that did not explicitly enable it previously ran no ADM validation at all on POST, PUT, or bundle import, silently masking malformed content. Deployments that want to keep validation off can still set VALIDATE_WITH_ADM_SCHEMAS=false explicitly. --- app/config/config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/config/config.js b/app/config/config.js index d718617a..74383761 100644 --- a/app/config/config.js +++ b/app/config/config.js @@ -205,7 +205,7 @@ function loadConfig() { withAttackDataModel: { doc: 'Enable validation of POST and PUT request bodies using the ATT&CK Data Model', format: Boolean, - default: false, + default: true, env: 'VALIDATE_WITH_ADM_SCHEMAS', }, withOpenApi: { From eea4f1e2c89915b3fe7f58b1f6cdbe60654d3907 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 11:08:36 -0400 Subject: [PATCH 3/7] feat(import-bundle): enforce stix-fidelity contract on lifecycle hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an import-fidelity contract that prevents lifecycle hooks and event listeners from deviating bundle `stix.*` content during a STIX bundle import. Workspace mutations remain permitted; only stix is frozen. New app/lib/import-safety.js exports deepFreezeStix(doc), which recursively freezes stix (including nested arrays and their elements). The framework calls it before invoking any hook or listener in import mode — in BaseService._createFromImport and in the bulk import pipeline. With stix frozen, a missing or misplaced `if (!options.import)` gate fails closed with a TypeError pointing at the violating line on the first import test, rather than silently rewriting bundle content. Five hooks/listeners that legitimately mutate stix.* on user-driven flows are gated for import: analytics-service.beforeCreate (the stix.name stamp), campaigns/groups/software.beforeCreate (alias normalization and the malware is_family default), and the analytics-service.handleAnalyticsReferenced listener (the external_references URL rewrite). The three afterCreate emit sites that drive metadata cascades now forward `options` in their event payloads so listeners can see when an import is in progress. --- app/lib/import-safety.js | 86 +++++++++++++++++++ app/services/meta-classes/base.service.js | 16 ++++ app/services/stix/analytics-service.js | 83 ++++++++++++------ app/services/stix/campaigns-service.js | 13 ++- .../import-bundle.js | 14 +++ app/services/stix/data-components-service.js | 11 ++- .../stix/detection-strategies-service.js | 11 ++- app/services/stix/groups-service.js | 12 ++- app/services/stix/software-service.js | 15 +++- 9 files changed, 223 insertions(+), 38 deletions(-) create mode 100644 app/lib/import-safety.js diff --git a/app/lib/import-safety.js b/app/lib/import-safety.js new file mode 100644 index 00000000..ab29e2b0 --- /dev/null +++ b/app/lib/import-safety.js @@ -0,0 +1,86 @@ +'use strict'; + +/** + * Import-safety primitives. + * + * STIX-bundle import has a strict invariant: when a bundle is imported, the + * persisted objects must be byte-faithful to the bundle's `stix` content. The + * import path is allowed to populate Workbench-private metadata (everything + * under `workspace`), but it must NEVER alter the imported `stix` fields, + * because the bundle is the source of truth and round-trip fidelity matters + * for re-imports and downstream consumers. + * + * However, the lifecycle hooks and event listeners that fire during a normal + * create/update — `beforeCreate`, `afterCreate`, and the cross-service + * listeners on domain events — were not originally written with import + * fidelity in mind. Several of them mutate `stix.*` as part of their normal + * work (e.g. AnalyticsService.beforeCreate stamps `stix.name` from the + * ATT&CK ID; AnalyticsService.handleAnalyticsReferenced rewrites + * `stix.external_references` to embed a URL to the parent detection + * strategy). Those mutations are correct for user-driven POST/PUT flows but + * are incorrect for an import. + * + * Rather than rely on convention ("remember to gate every stix write behind + * `if (!options.import) { ... }`"), we enforce the contract structurally: + * before invoking any hook or listener in import mode, the framework calls + * `deepFreezeStix(doc)`. In Node strict mode (`'use strict'` at the top of + * every service file), an attempted assignment to a frozen property throws + * a `TypeError`. That makes a missing import gate fail loudly at the + * violating line on the first import test, instead of silently corrupting + * bundle content. + * + * The local rule for hook/listener authors becomes: + * + * 1. Workspace mutations are always allowed. + * 2. If you need to mutate `stix.*`, wrap the block in + * `if (!options.import) { ... }` (or `if (!payload.options?.import)` + * inside a listener). The framework guarantees that `options.import` + * is the only state where stix is frozen, so a missing gate produces + * an immediate TypeError pointing at the offending line. + * + * Read freely from frozen stix — only writes are blocked. + */ + +/** + * Deep-freezes the `stix` field of a document so any attempt to write to + * `doc.stix.*`, including writes through nested arrays/objects (e.g. + * `doc.stix.external_references.unshift(...)`), throws a `TypeError`. + * + * `Object.freeze` is shallow on its own, so we walk the immediate children + * (one level into nested objects/arrays) and freeze them as well. Two + * levels is sufficient for STIX in practice: the deepest commonly mutated + * paths are array elements (e.g. `stix.external_references[i]` or + * `stix.kill_chain_phases[i]`), which the loop covers. + * + * Safe to call multiple times — `Object.isFrozen` short-circuits. + * Safe to call on Mongoose documents: only the underlying `_doc.stix` + * subtree is frozen; Mongoose's wrapper accessors remain functional, and + * Mongoose does not mutate the source object when constructing new + * documents during save/insertMany. + * + * @param {Object} doc - A document of the shape `{ stix, workspace }`, or + * a Mongoose document with the same shape. No-op if `doc` or `doc.stix` + * is missing. + */ +function deepFreezeStix(doc) { + const stix = doc?.stix; + if (!stix || typeof stix !== 'object' || Object.isFrozen(stix)) return; + + Object.freeze(stix); + + for (const value of Object.values(stix)) { + if (!value || typeof value !== 'object' || Object.isFrozen(value)) continue; + Object.freeze(value); + if (Array.isArray(value)) { + for (const item of value) { + if (item && typeof item === 'object' && !Object.isFrozen(item)) { + Object.freeze(item); + } + } + } + } +} + +module.exports = { + deepFreezeStix, +}; diff --git a/app/services/meta-classes/base.service.js b/app/services/meta-classes/base.service.js index c2128bc3..3221dfc0 100644 --- a/app/services/meta-classes/base.service.js +++ b/app/services/meta-classes/base.service.js @@ -24,6 +24,7 @@ const { SelfRevocationError, } = require('../../exceptions'); const { getSchema } = require('../../lib/validation-schemas'); +const { deepFreezeStix } = require('../../lib/import-safety'); const ServiceWithHooks = require('./hooks.service'); const WorkflowResult = require('../../lib/workflow-result'); @@ -700,8 +701,23 @@ class BaseService extends ServiceWithHooks { return { ...composed, warnings }; } + // Import-fidelity contract: bundle stix must be persisted byte-faithful. + // Several `beforeCreate` hooks normally rewrite stix fields (e.g. + // AnalyticsService stamps stix.name from the ATT&CK ID; SoftwareService + // defaults stix.is_family). Those rewrites are intentional on user-driven + // POST/PUT flows but must NOT run during import. We freeze stix before + // invoking the hook so any forgotten `if (!options.import)` gate throws + // a TypeError at the violating line instead of silently corrupting the + // imported content. See app/lib/import-safety.js for the full rationale. + deepFreezeStix(composed); await this.beforeCreate(composed, options); + const createdDocument = await this.repository.save(composed); + + // Same contract applies to `afterCreate` and the listeners it triggers + // via emitted domain events — anything that reaches stix on this freshly + // saved document during import must crash, not silently mutate. + deepFreezeStix(createdDocument); await this.afterCreate(createdDocument, options); await this.emitCreatedEvent(createdDocument, options); diff --git a/app/services/stix/analytics-service.js b/app/services/stix/analytics-service.js index e87b9366..1f10fd6c 100644 --- a/app/services/stix/analytics-service.js +++ b/app/services/stix/analytics-service.js @@ -11,6 +11,7 @@ const { const EventBus = require('../../lib/event-bus'); const logger = require('../../lib/logger'); const Exceptions = require('../../exceptions'); +const { deepFreezeStix } = require('../../lib/import-safety'); /** * Service for managing analytics @@ -50,15 +51,21 @@ class AnalyticsService extends BaseService { /** * Handle analytics being referenced by a detection strategy - * Add inbound embedded_relationship and update external_references + * Add inbound embedded_relationship and (when not importing) refresh the + * ATT&CK external_references URL on each referenced analytic. * * @param {Object} payload - Event payload * @param {Object} payload.detectionStrategy - The detection strategy document that references the analytics * @param {string[]} payload.analyticIds - Array of analytic STIX IDs being referenced + * @param {Object} [payload.options] - The originating create-options forwarded from + * DetectionStrategiesService.afterCreate. Used here to honor the + * import-fidelity contract — see app/lib/import-safety.js. When + * `options.import` is true, the workspace metadata update still runs but + * the stix.external_references rewrite is skipped. * @returns {Promise} */ static async handleAnalyticsReferenced(payload) { - const { detectionStrategy, analyticIds } = payload; + const { detectionStrategy, analyticIds, options } = payload; logger.info( `Analytics Service heard event: 'x-mitre-detection-strategy::analytics-referenced' for ${detectionStrategy.stix.id}`, @@ -75,6 +82,13 @@ class AnalyticsService extends BaseService { continue; } + // Import-fidelity guard: when the triggering create came from a + // bundle import, freeze this analytic's stix so any forgotten + // import gate below crashes loudly with a TypeError instead of + // silently rewriting the persisted analytic's stix fields. + // Workspace mutations are unaffected. See app/lib/import-safety.js. + if (options?.import) deepFreezeStix(analytic); + // Initialize embedded_relationships if needed if (!analytic.workspace) { analytic.workspace = {}; @@ -102,23 +116,30 @@ class AnalyticsService extends BaseService { ); } - // Update external_references with URL to parent detection strategy - if (!analytic.stix.external_references) { - analytic.stix.external_references = []; - } - - // Remove existing ATT&CK external references - analytic.stix.external_references = removeAttackExternalReferences( - analytic.stix.external_references, - ); + // Refresh the analytic's ATT&CK external_references URL so it + // points at the parent detection strategy. This rewrites + // `stix.external_references` and must therefore be skipped on the + // import path; the bundle is the source of truth for stix content. + // The framework freeze above would throw here if this gate were + // missing — that's intentional. + if (!options?.import) { + if (!analytic.stix.external_references) { + analytic.stix.external_references = []; + } - // Create new ATT&CK external reference with URL - const attackRef = createAttackExternalReference(analytic.toObject()); - if (attackRef) { - analytic.stix.external_references.unshift(attackRef); - logger.info( - `AnalyticsService: Updated external_references URL for analytic ${analyticId}`, + // Remove existing ATT&CK external references + analytic.stix.external_references = removeAttackExternalReferences( + analytic.stix.external_references, ); + + // Create new ATT&CK external reference with URL + const attackRef = createAttackExternalReference(analytic.toObject()); + if (attackRef) { + analytic.stix.external_references.unshift(attackRef); + logger.info( + `AnalyticsService: Updated external_references URL for analytic ${analyticId}`, + ); + } } await analyticsRepository.saveDocument(analytic); @@ -215,12 +236,21 @@ class AnalyticsService extends BaseService { * @throws {Exceptions.NotFoundError} If a referenced data component does not exist * @returns {Promise} */ - // eslint-disable-next-line no-unused-vars async beforeCreate(data, options) { - // Analytic name matches its ATT&CK ID - const id = data.workspace.attack_id; - data.stix.name = id.replace(/^AN(\d+)$/, 'Analytic $1'); - logger.debug(`Setting name to match ATT&CK ID: ${data.stix.name}`); + // Import-fidelity contract: when a STIX bundle is being imported, the + // bundle's `stix` content must be persisted byte-faithful. Stamping + // `stix.name` from the ATT&CK ID is correct for user-driven POST flows, + // where the server is the authority on the analytic's display name, but + // incorrect for an import — the bundle already carries the name. The + // framework freezes `data.stix` during import-mode hooks (see + // app/lib/import-safety.js), so the assignment below would throw a + // TypeError without this gate. Workspace mutations further down still + // run unconditionally. + if (!options?.import) { + const id = data.workspace.attack_id; + data.stix.name = id.replace(/^AN(\d+)$/, 'Analytic $1'); + logger.debug(`Setting name to match ATT&CK ID: ${data.stix.name}`); + } // Initialize embedded_relationships if not present if (!data.workspace) { @@ -286,11 +316,13 @@ class AnalyticsService extends BaseService { * Emits domain event to notify DataComponentsService that data components were referenced * * @param {Object} createdDocument - The created analytic document - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options forwarded from BaseService. + * Threaded into the event payload so listeners can honor the + * import-fidelity contract (no stix mutations when `options.import`). + * See app/lib/import-safety.js. * @returns {Promise} */ - // eslint-disable-next-line no-unused-vars - async afterCreate(createdDocument, _options) { + async afterCreate(createdDocument, options) { // Extract data component IDs from x_mitre_log_source_references const dataComponentRefs = createdDocument.stix?.x_mitre_log_source_references?.map( @@ -307,6 +339,7 @@ class AnalyticsService extends BaseService { analyticId: createdDocument.stix.id, analytic: createdDocument.toObject ? createdDocument.toObject() : createdDocument, dataComponentIds: dataComponentRefs, + options, }); } } diff --git a/app/services/stix/campaigns-service.js b/app/services/stix/campaigns-service.js index 3817b12f..97d0915d 100644 --- a/app/services/stix/campaigns-service.js +++ b/app/services/stix/campaigns-service.js @@ -34,10 +34,17 @@ class CampaignService extends BaseService { * Ensures aliases[0] matches the object name * * @param {Object} data - The campaign object data - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options */ - // eslint-disable-next-line no-unused-vars - async beforeCreate(data, _options) { + async beforeCreate(data, options) { + // Import-fidelity contract: skip the alias normalization on the import + // path. The normalization rewrites `stix.aliases` (rearranging entries + // and prepending `stix.name`), which is correct for user-driven flows + // but deviates the persisted analytic from the bundle source-of-truth. + // `data.stix` is frozen during import-mode hooks (app/lib/import-safety.js), + // so a missing gate here would throw a TypeError at the assignment in + // `_normalizeAliases`. + if (options?.import) return; this._normalizeAliases(data); } diff --git a/app/services/stix/collection-bundles-service/import-bundle.js b/app/services/stix/collection-bundles-service/import-bundle.js index af6b06f1..d28e8af3 100644 --- a/app/services/stix/collection-bundles-service/import-bundle.js +++ b/app/services/stix/collection-bundles-service/import-bundle.js @@ -15,6 +15,7 @@ const { const logger = require('../../../lib/logger'); const config = require('../../../config/config'); const types = require('../../../lib/types'); +const { deepFreezeStix } = require('../../../lib/import-safety'); // Bounded concurrency for the compose-and-validate phase. Each task runs Zod // validation and a small amount of synchronous work, so we cap concurrency @@ -356,6 +357,12 @@ async function processTier(type, objects, ctx) { // and any other pre-persist data shaping are present on the doc when // saveMany writes it. Failures here are recorded as save errors and the // doc is dropped from the bulk insert. + // + // Import-fidelity guard: freeze stix before invoking the hook so any + // forgotten `if (!options.import)` gate inside the service crashes + // loudly with a TypeError instead of silently mutating bundle content. + // See app/lib/import-safety.js for the full contract. + deepFreezeStix(composed); try { await service.beforeCreate(composed, composeOptions); } catch (hookErr) { @@ -418,6 +425,13 @@ async function processTier(type, objects, ctx) { // Run in parallel with bounded concurrency; per-doc hook failures are // logged but never abort the import. await runWithConcurrency(inserted, COMPOSE_CONCURRENCY, async (doc) => { + // Import-fidelity guard for the post-insert lifecycle. afterCreate and + // the listeners that subscribe to the emitted `{type}::created` event + // are allowed to populate workspace metadata on referenced documents + // but must not deviate this freshly saved document's stix from the + // bundle. Freezing forces violations to crash here rather than + // silently corrupting the imported content. See app/lib/import-safety.js. + deepFreezeStix(doc); try { await service.afterCreate(doc, composeOptions); } catch (err) { diff --git a/app/services/stix/data-components-service.js b/app/services/stix/data-components-service.js index d909399f..7d2a454a 100644 --- a/app/services/stix/data-components-service.js +++ b/app/services/stix/data-components-service.js @@ -239,11 +239,13 @@ class DataComponentsService extends BaseService { * This handles both first-time creation and new version creation (versioning) * * @param {Object} createdDocument - The created data component document - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options forwarded from BaseService. + * Threaded into the event payload so listeners can honor the + * import-fidelity contract (no stix mutations when `options.import`). + * See app/lib/import-safety.js. * @returns {Promise} */ - // eslint-disable-next-line no-unused-vars - async afterCreate(createdDocument, _options) { + async afterCreate(createdDocument, options) { const addedRef = this._addedDataSourceRef; const removedRef = this._removedDataSourceRef; @@ -258,6 +260,7 @@ class DataComponentsService extends BaseService { dataComponentId: createdDocument.stix.id, dataComponent: createdDocument.toObject ? createdDocument.toObject() : createdDocument, dataSourceId: addedRef, + options, }); } @@ -271,6 +274,7 @@ class DataComponentsService extends BaseService { await EventBus.emit('x-mitre-data-component::data-source-removed', { dataComponentId: createdDocument.stix.id, dataSourceId: removedRef, + options, }); } @@ -287,6 +291,7 @@ class DataComponentsService extends BaseService { dataComponentId: createdDocument.stix.id, dataComponent: createdDocument.toObject ? createdDocument.toObject() : createdDocument, dataSourceId: currentDataSourceRef, + options, }); } diff --git a/app/services/stix/detection-strategies-service.js b/app/services/stix/detection-strategies-service.js index b3715eeb..d8ddb92f 100644 --- a/app/services/stix/detection-strategies-service.js +++ b/app/services/stix/detection-strategies-service.js @@ -114,8 +114,14 @@ class DetectionStrategiesService extends BaseService { * Handle post-creation logic * Emit domain events to notify AnalyticsService about referenced/removed analytics * This handles both first-time creation and new version creation (versioning) + * + * @param {Object} document - The persisted detection strategy + * @param {Object} [options] - Create options forwarded from BaseService. + * Threaded into the event payload so listeners can honor the + * import-fidelity contract (no stix mutations when `options.import`). + * See app/lib/import-safety.js for the contract. */ - async afterCreate(document) { + async afterCreate(document, options) { const addedRefs = this._addedAnalyticRefs || []; const removedRefs = this._removedAnalyticRefs || []; @@ -130,6 +136,7 @@ class DetectionStrategiesService extends BaseService { detectionStrategyId: document.stix.id, detectionStrategy: document.toObject ? document.toObject() : document, analyticIds: addedRefs, + options, }); } @@ -143,6 +150,7 @@ class DetectionStrategiesService extends BaseService { await EventBus.emit('x-mitre-detection-strategy::analytics-removed', { detectionStrategyId: document.stix.id, analyticIds: removedRefs, + options, }); } @@ -159,6 +167,7 @@ class DetectionStrategiesService extends BaseService { detectionStrategyId: document.stix.id, detectionStrategy: document.toObject ? document.toObject() : document, analyticIds: currentAnalyticRefs, + options, }); } diff --git a/app/services/stix/groups-service.js b/app/services/stix/groups-service.js index b182470d..552444d8 100644 --- a/app/services/stix/groups-service.js +++ b/app/services/stix/groups-service.js @@ -34,10 +34,16 @@ class GroupsService extends BaseService { * Ensures aliases[0] matches the object name * * @param {Object} data - The group object data - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options */ - // eslint-disable-next-line no-unused-vars - async beforeCreate(data, _options) { + async beforeCreate(data, options) { + // Import-fidelity contract: skip the alias normalization on the import + // path. The normalization rewrites `stix.aliases`, which is correct for + // user-driven flows but deviates the persisted group from the bundle + // source-of-truth. `data.stix` is frozen during import-mode hooks + // (app/lib/import-safety.js), so a missing gate here would throw a + // TypeError at the assignment in `_normalizeAliases`. + if (options?.import) return; this._normalizeAliases(data); } diff --git a/app/services/stix/software-service.js b/app/services/stix/software-service.js index 896d0d9b..fb7651f8 100644 --- a/app/services/stix/software-service.js +++ b/app/services/stix/software-service.js @@ -40,10 +40,19 @@ class SoftwareService extends BaseService { * - Ensures x_mitre_aliases[0] matches the object name * * @param {Object} data - The software object data - * @param {Object} _options - Creation options (unused) + * @param {Object} [options] - Creation options */ - // eslint-disable-next-line no-unused-vars - async beforeCreate(data, _options) { + async beforeCreate(data, options) { + // Import-fidelity contract: defaulting `stix.is_family` and rewriting + // `stix.x_mitre_aliases` is correct for user-driven flows where the + // server is the authority on these fields, but incorrect on the import + // path — the bundle carries authoritative values (including a deliberate + // omission of `is_family` for malware that doesn't represent a family, + // which must NOT be defaulted to `true`). `data.stix` is frozen during + // import-mode hooks (app/lib/import-safety.js), so a missing gate would + // throw a TypeError at the first attempted stix write below. + if (options?.import) return; + // Set is_family default for malware if (data.stix && data.stix.type === MalwareType && typeof data.stix.is_family !== 'boolean') { data.stix.is_family = true; From d69472ea3af2201dc0e354d696687e1a2943ebb1 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 11:12:48 -0400 Subject: [PATCH 4/7] fix(import-bundle): surface ADM validation errors in import response The bundle-import pipeline's fail-open branch (the default, when `?validateContents=true` is not set) attached the ADM error list to each failing document's `workspace.validation` but never wrote a matching entry to the import response's `workspace.import_categories.errors`. A bundle with hundreds of validation failures could look like a clean import. composeForImport now also returns the full validationErrors array; processTier writes one entry per failing object with `error_type: validationError`, a summary `error_message`, and a `details` array containing every `{message, path, code}` from the Zod output. This applies in both branches: - validateContents=true (strict): the doc is dropped from the bulk insert and the error is recorded with full details. - validateContents=false (default): the doc is still persisted with workspace.validation attached, but the error is also mirrored into import_categories.errors so the response surfaces the failure up front. Also fixes the strict-branch entry's error_type, which was previously `saveError` despite the failure being a validation failure. --- app/services/meta-classes/base.service.js | 16 +++++- .../import-bundle.js | 54 ++++++++++++++++--- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/app/services/meta-classes/base.service.js b/app/services/meta-classes/base.service.js index 3221dfc0..ee174b25 100644 --- a/app/services/meta-classes/base.service.js +++ b/app/services/meta-classes/base.service.js @@ -741,7 +741,19 @@ class BaseService extends ServiceWithHooks { * * @param {Object} data - The request data ({ stix, workspace }) * @param {Object} options - Options passed from create() - * @returns {Promise<{ data: Object, warnings: Array, throwIfValidating: ValidationError|null }>} + * @returns {Promise<{ + * data: Object, + * warnings: Array, + * throwIfValidating: ValidationError|null, + * validationErrors: Array<{message,path,code,input}> + * }>} + * - `validationErrors` is the full list of ADM errors that fired against + * this object (after bypass filtering). The bundle-import path uses it + * to surface per-object validation failures in + * `workspace.import_categories.errors` so an import response + * accurately reflects what was wrong — without that, fail-open mode + * silently buries the errors on each document and the import looks + * successful even when many objects are malformed. */ async composeForImport(data, options) { // Strip workspace.validation — server-controlled; the fail-open block @@ -796,7 +808,7 @@ class BaseService extends ServiceWithHooks { } } - return { data, warnings, throwIfValidating }; + return { data, warnings, throwIfValidating, validationErrors: errors }; } /** diff --git a/app/services/stix/collection-bundles-service/import-bundle.js b/app/services/stix/collection-bundles-service/import-bundle.js index d28e8af3..74f58a94 100644 --- a/app/services/stix/collection-bundles-service/import-bundle.js +++ b/app/services/stix/collection-bundles-service/import-bundle.js @@ -334,17 +334,27 @@ async function processTier(type, objects, ctx) { }; try { - const { data: composed, throwIfValidating } = await service.composeForImport( - stagingDoc, - composeOptions, - ); - + const { + data: composed, + throwIfValidating, + validationErrors, + } = await service.composeForImport(stagingDoc, composeOptions); + + // Strict-mode validation failure (`validateContents=true`). Surface the + // full ADM error list, not just the wrapper message — without `details` + // the caller has no way to act on the failure other than re-running the + // import with logs at debug level. Drop the doc from the bulk insert. if (throwIfValidating) { const importError = { object_ref: importObject.id, object_modified: importObject.modified, - error_type: importErrors.saveError, - error_message: throwIfValidating.message, + error_type: importErrors.validationError, + error_message: `${validationErrors.length} ADM validation error(s)`, + details: validationErrors.map((e) => ({ + message: e.message, + path: e.path, + code: e.code, + })), }; logger.verbose( `Import Bundle Error: Validation failed. id=${importObject.id}, ${throwIfValidating.message}`, @@ -353,6 +363,36 @@ async function processTier(type, objects, ctx) { return; } + // Fail-open validation failures (`validateContents=false`, the default). + // The object IS persisted with the error list attached to its own + // `workspace.validation`, but a clean import response would otherwise + // give the caller no signal that anything was wrong. We mirror the + // per-object errors into `import_categories.errors` so the response + // surfaces them up front. One taxonomy entry per object regardless of + // how many issues that object had — the full per-issue list lives in + // `details` so the caller can drill down without querying each doc. + if (validationErrors.length > 0) { + const firstFew = validationErrors + .slice(0, 3) + .map((e) => e.message) + .join('; '); + const summary = + validationErrors.length > 3 + ? `${firstFew}; ...and ${validationErrors.length - 3} more` + : firstFew; + importedCollection.workspace.import_categories.errors.push({ + object_ref: importObject.id, + object_modified: importObject.modified, + error_type: importErrors.validationError, + error_message: `${validationErrors.length} ADM validation error(s): ${summary}`, + details: validationErrors.map((e) => ({ + message: e.message, + path: e.path, + code: e.code, + })), + }); + } + // Run the service's beforeCreate hook so outbound embedded_relationships // and any other pre-persist data shaping are present on the doc when // saveMany writes it. Failures here are recorded as save errors and the From 029b0e1ebf63fe6918b5f0c113ce2dcc27928266 Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 11:13:57 -0400 Subject: [PATCH 5/7] docs(import-bundle): add user guide, pipeline overview, fidelity contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three new documents covering the bundle-import work: - docs/user/stix-bundle-import.md — how to call the endpoint, the two validation modes (fail-open vs strict), the import_categories.errors taxonomy, and re-import semantics. - docs/developer/stix-bundle-import-pipeline.md — implementer walk-through of the tier-batched pipeline with stage diagrams, the dependency-tier table, concurrency primitives, and bulk persistence helpers. - docs/developer/import-fidelity-contract.md — the stix-frozen contract for hooks and listeners, what the framework enforces, and the author rules for adding new lifecycle code. --- docs/developer/import-fidelity-contract.md | 203 +++++++++++++++ docs/developer/stix-bundle-import-pipeline.md | 234 ++++++++++++++++++ docs/user/stix-bundle-import.md | 171 +++++++++++++ 3 files changed, 608 insertions(+) create mode 100644 docs/developer/import-fidelity-contract.md create mode 100644 docs/developer/stix-bundle-import-pipeline.md create mode 100644 docs/user/stix-bundle-import.md diff --git a/docs/developer/import-fidelity-contract.md b/docs/developer/import-fidelity-contract.md new file mode 100644 index 00000000..d087ba65 --- /dev/null +++ b/docs/developer/import-fidelity-contract.md @@ -0,0 +1,203 @@ +# Import-Fidelity Contract + +When a STIX bundle is imported, the persisted objects must be +**byte-faithful** to the bundle's `stix` content. Workbench may +populate its private `workspace` metadata on each document, but +must not deviate any field under `stix`. + +This document defines the contract, explains why it exists, and +tells you how to author hooks, event listeners, and any new code +that runs during a bundle import. + +For the broader bundle-import pipeline, see +[`stix-bundle-import-pipeline.md`](./stix-bundle-import-pipeline.md). + +## Why the contract exists + +Workbench has a richer object model than raw STIX. Lifecycle hooks +and event listeners legitimately normalize and enrich STIX fields +on user-driven flows. A few examples that ship today: + +| Service | Hook / listener | Stix mutation | +|---|---|---| +| AnalyticsService | `beforeCreate` | Stamps `stix.name = "Analytic "` | +| AnalyticsService | `handleAnalyticsReferenced` listener | Rewrites `stix.external_references` to embed a URL to the parent detection strategy | +| CampaignsService | `beforeCreate` | Forces `stix.aliases[0]` to equal `stix.name` | +| GroupsService | `beforeCreate` | Same alias normalization as campaigns | +| SoftwareService | `beforeCreate` | Defaults `stix.is_family = true` for malware; normalizes `stix.x_mitre_aliases` | + +All five are correct on POST/PUT, where Workbench is the authority +on the object's display values. None of them are correct during an +import: the bundle is the source of truth for `stix.*`, and a +silent rewrite breaks round-trip fidelity and obscures the +provenance of the imported content. + +The framework therefore enforces a hard rule: + +> **During an import (`options.import === true`), `stix.*` is +> read-only. Workspace fields are still mutable.** + +## How the contract is enforced + +[`app/lib/import-safety.js`](../../app/lib/import-safety.js) exports +`deepFreezeStix(doc)`. It calls `Object.freeze` on `doc.stix` and on +the immediate children (nested objects, nested arrays, and the +array elements). In Node strict mode (`'use strict'` at the top of +every service file), an attempted write to any frozen property +throws `TypeError` immediately, pointing at the violating line. + +The framework calls `deepFreezeStix` at every point where untrusted +code (a hook, a listener) is about to run during an import: + +| Location | When | +|---|---| +| `BaseService._createFromImport` | Before `beforeCreate(composed, options)` and before `afterCreate(doc, options)` / `emitCreatedEvent(doc, options)`. | +| `collection-bundles-service/import-bundle.js` (compose worker) | Before each call to `service.beforeCreate(composed, composeOptions)`. | +| `collection-bundles-service/import-bundle.js` (post-insert worker) | Before each call to `service.afterCreate(doc, composeOptions)` and `service.emitCreatedEvent(doc, composeOptions)`. | + +Listeners that fetch a related document and mutate it then call +`deepFreezeStix(fetchedDoc)` themselves on entry when their +incoming payload indicates an import is in progress. The pattern +is one line: + +```js +if (payload.options?.import) deepFreezeStix(fetchedDoc); +``` + +The freeze is invisible to non-import paths: `deepFreezeStix` is +only called when the framework or a listener has confirmed +`options.import === true`. + +### Why deep, not shallow + +`Object.freeze` is shallow. Common stix mutations target nested +structures: + +- `analytic.stix.external_references.unshift(...)` (rewrites an array) +- `analytic.stix.external_references[0].external_id = '...'` (mutates an array element) + +A shallow freeze would let both succeed silently. `deepFreezeStix` +walks one level into objects and arrays (including array elements) +to cover the cases STIX content actually exhibits. Reads remain +unaffected at any depth. + +### Why freeze instead of clone-and-restore + +A snapshot-and-restore approach (clone `stix` before each hook, +restore after) would also enforce fidelity, but it requires the +framework to know which side effects to undo and risks leaving +half-written state when a hook mutates nested objects. A freeze +fails closed at the violating line, points the developer at +exactly the code that needs a gate, and adds zero runtime +overhead after the freeze is applied. + +## Author rules — writing hooks and listeners + +The contract translates into one rule for hook authors: + +> Workspace mutations are always allowed. Wrap any `stix.*` +> mutation in `if (!options?.import) { ... }`. + +A correctly-shaped `beforeCreate` looks like this: + +```js +async beforeCreate(data, options) { + // Workspace mutations — always allowed. + data.workspace = data.workspace || {}; + data.workspace.embedded_relationships = buildOutboundRels(data); + + // STIX mutations — gated. The framework freezes data.stix + // during import, so a missing gate throws a TypeError pointing + // at the line below on the first import test. + if (!options?.import) { + data.stix.name = deriveNameFromAttackId(data.workspace.attack_id); + } +} +``` + +And a correctly-shaped listener: + +```js +static async handleAnalyticsReferenced(payload) { + const { detectionStrategy, analyticIds, options } = payload; + + for (const analyticId of analyticIds) { + const analytic = await analyticsRepository.retrieveLatestByStixId(analyticId); + if (!analytic) continue; + + // Import-fidelity guard. The framework freezes the doc the + // emitter saw, but listeners fetch their own related docs — + // so each listener takes responsibility for freezing what + // it fetched. + if (options?.import) deepFreezeStix(analytic); + + // Workspace mutations — always allowed. + addInboundEmbeddedRelationship(analytic, detectionStrategy); + + // STIX mutations — gated, just like in beforeCreate. + if (!options?.import) { + refreshExternalReferencesUrl(analytic, detectionStrategy); + } + + await analyticsRepository.saveDocument(analytic); + } +} +``` + +## Forwarding `options` to listeners + +Listeners can only honor the contract if the originating service +forwards its create-options into the emitted event payload. The +three afterCreate emit sites that drive metadata cascades all do +this: + +- `DetectionStrategiesService.afterCreate(document, options)` + passes `options` into every `'x-mitre-detection-strategy::*'` + emit. +- `AnalyticsService.afterCreate(createdDocument, options)` + passes `options` into `'x-mitre-analytic::data-components-referenced'`. +- `DataComponentsService.afterCreate(createdDocument, options)` + passes `options` into `'x-mitre-data-component::data-source-*'`. + +If you add a new domain event that may fire during import, do the +same — include `options` in the payload. + +## Adding a new hook or listener + +Checklist for an author adding code that runs during a bundle +import: + +1. **Default to workspace.** If your work can be expressed as + workspace metadata (an embedded relationship, a derived index, + a denormalized cache), keep it under `workspace.*` — no gate + needed. + +2. **Gate stix writes.** If you genuinely need to mutate + `stix.*`, wrap the block in `if (!options?.import) { ... }`. + Forgetting the gate will not silently break things: the next + import test will crash with a TypeError pointing at your line. + +3. **Listeners freeze what they fetch.** If your listener fetches + a related document via a repository and may write to its + `stix.*`, add `if (options?.import) deepFreezeStix(fetched);` + at the top of the per-document block. + +4. **Emit `options` in event payloads.** If your service emits a + domain event from `afterCreate` / `afterUpdate`, include + `options` in the payload so downstream listeners can see when + an import is in progress. + +5. **Test it.** Round-trip a bundle through import — export, hash + the persisted `stix` content of a sample of objects, compare + to the bundle. If you forgot a gate, you'll have crashed on + the import attempt before you ever reach the comparison. + +## Files + +| Path | Role | +|---|---| +| [`app/lib/import-safety.js`](../../app/lib/import-safety.js) | `deepFreezeStix` helper and contract documentation. | +| [`app/services/meta-classes/base.service.js`](../../app/services/meta-classes/base.service.js) | Framework-level freeze in `_createFromImport`. | +| [`app/services/stix/collection-bundles-service/import-bundle.js`](../../app/services/stix/collection-bundles-service/import-bundle.js) | Framework-level freeze in the bulk pipeline. | +| [`app/services/stix/analytics-service.js`](../../app/services/stix/analytics-service.js) | Example of both forms of gate (`beforeCreate` and listener). | +| [`app/services/stix/detection-strategies-service.js`](../../app/services/stix/detection-strategies-service.js) | Example of forwarding `options` into event payloads. | diff --git a/docs/developer/stix-bundle-import-pipeline.md b/docs/developer/stix-bundle-import-pipeline.md new file mode 100644 index 00000000..5dbf77fe --- /dev/null +++ b/docs/developer/stix-bundle-import-pipeline.md @@ -0,0 +1,234 @@ +# STIX Bundle Import Pipeline + +This document describes the internal pipeline that runs when a +client POSTs to `/api/collection-bundles`. For user-facing +documentation of the endpoint's behavior and response shape, see +[`docs/user/stix-bundle-import.md`](../user/stix-bundle-import.md). + +For the rules that govern hook and listener behavior during import +(why bundle `stix` content stays byte-faithful through the +pipeline), see [`import-fidelity-contract.md`](./import-fidelity-contract.md). + +## Entry points + +``` +HTTP request + → app/controllers/collection-bundles-controller.js:importBundle + → app/services/stix/collection-bundles-service/index.js + → app/services/stix/collection-bundles-service/import-bundle.js (this pipeline) +``` + +The controller reads query parameters (`previewOnly`, +`validateContents`, `forceImport`) into an `options` object and +hands the entire bundle and options to `importBundle`. + +## Pipeline stages + +``` +┌────────────────────────────────────────────────────────────────┐ +│ 1. Initialize per-import state │ +│ - importedCollection skeleton (workspace.import_categories) │ +│ - contentsMap from collection.x_mitre_contents │ +│ - referenceMap │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 2. Check for duplicate collection │ +│ - Same stixId + same modified → existing collection │ +│ - forceImport=duplicate-collection: warn, attach a reimport │ +│ - Otherwise: throw, abort │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 3. processObjects │ +│ - Sort objects by dependency order │ +│ - Group consecutive same-type objects into TIERS │ +│ - For each tier sequentially: processTier(type, objects) │ +│ - Then: report contents-map orphans as "Missing object" │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 4. importReferences (sequential) │ +│ - Insert or update each unique external_reference │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ 5. saveCollection │ +│ - Persist x-mitre-collection itself (or append reimport) │ +└────────────────────────────────────────────────────────────────┘ +``` + +## Dependency-ordered tiers + +`sortObjectsByDependencies` returns the bundle's objects in this +order (lower numbers persist first): + +| Tier | STIX type | Rationale | +|---|---|---| +| 0 | `marking-definition` | No outbound refs to other types | +| 1 | `identity` | No outbound refs to other types | +| 2 | `x-mitre-data-source` | Data components reference these | +| 3 | `x-mitre-data-component` | Analytics reference these | +| 4 | `x-mitre-analytic` | Detection strategies reference these | +| 5 | `x-mitre-detection-strategy` | (depends on analytics) | +| 6 | `attack-pattern` (techniques) | SDOs in general | +| 7 | `x-mitre-tactic` | | +| 8 | `course-of-action` (mitigations) | | +| 9 | `intrusion-set` (groups) | | +| 10 | `campaign` | | +| 11 | `malware` | | +| 12 | `tool` | | +| 13 | `x-mitre-asset` | | +| 14 | `x-mitre-matrix` | | +| 15 | `relationship` | SROs last so their endpoints exist | +| 16 | `note` | | +| 17 | `x-mitre-collection` | The bundle's own collection (skipped here; persisted separately) | + +Sort is stable, so within a tier order matches the bundle's order. + +## `processTier` — what runs inside one tier + +For each tier (objects of a single STIX type, in dependency order): + +``` +┌────────────────────────────────────────────────────────────────┐ +│ A. Synchronous eligibility filter (single pass over tier) │ +│ - contents-map drain (warn on bundle-object-not-in-contents)│ +│ - ATT&CK spec-version gate (throws or skips per forceImport)│ +│ → eligible[] │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ B. Bulk pre-fetch existing versions │ +│ repository.retrieveAllByStixIds(eligible.map(o => o.id)) │ +│ → Map> │ +│ One DB query for the entire tier, replacing N retrieveById │ +│ calls from the legacy per-object loop. │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ C. Parallel compose & validate (bounded concurrency, cap 25) │ +│ For each eligible object: │ +│ - checkForDuplicate vs pre-fetched versions │ +│ - categorizeObject (additions/changes/etc.) │ +│ - processExternalReferences │ +│ - service.composeForImport (Zod + workspace.attack_id + │ +│ fail-open workspace.validation) │ +│ - deepFreezeStix(composed) — import-fidelity guard │ +│ - service.beforeCreate(composed, options) │ +│ - record any validation errors into │ +│ importedCollection.workspace.import_categories.errors │ +│ - push composed doc into composedToInsert[] │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ D. Bulk insert (one MongoDB insertMany per tier) │ +│ repository.saveMany(composedToInsert) │ +│ → { inserted: [...], errors: [...writeErrors] } │ +│ writeErrors (ordered:false) → import_categories.errors │ +└────────────────────────────────────────────────────────────────┘ + │ +┌────────────────────────────────────────────────────────────────┐ +│ E. Parallel post-insert lifecycle (bounded concurrency, cap 25)│ +│ For each inserted doc: │ +│ - deepFreezeStix(doc) — import-fidelity guard │ +│ - service.afterCreate(doc, options) │ +│ - service.emitCreatedEvent(doc, options) │ +│ afterCreate emits domain events (e.g. │ +│ 'x-mitre-detection-strategy::analytics-referenced') │ +│ that drive INBOUND workspace.embedded_relationships │ +│ population on referenced docs in earlier-finished tiers. │ +└────────────────────────────────────────────────────────────────┘ +``` + +The order of tiers matters because the post-insert listener +cascade in stage E may modify documents from earlier tiers +(e.g. analytics that were persisted in tier 4 receive inbound +embedded_relationships when detection strategies in tier 5 are +processed). + +## Concurrency primitives + +`runWithConcurrency(items, limit, task)` in `import-bundle.js` is a +small worker-pool helper used in stages C and E. It pulls from a +shared index so each worker fetches the next available item rather +than partitioning ahead of time, which keeps utilization high even +when per-item cost varies (a worker that finishes a cheap doc +immediately picks up the next one). + +We do not pull in `p-limit`: it is not a direct dependency of this +project, and recent versions are ESM-only, which doesn't fit a +CommonJS codebase. The helper is small enough to keep inline. + +## Bulk persistence primitives + +Both new repository methods live on `_base.repository.js` and are +inherited by every concrete repository: + +- **`retrieveAllByStixIds(stixIds)`** — single `find({ 'stix.id': + { $in: ids } })` followed by an in-memory bucket by stixId. + Returns `Map>` with versions sorted + newest-first (matching `retrieveAllById`'s order). + +- **`saveMany(dataArr, { ordered })`** — wraps + `Model.insertMany(dataArr, { ordered: false })`. Returns + `{ inserted, errors }` where `errors` is a normalized + `{ index, message, code }` per failed document. `ordered: false` + ensures one bad document does not abort the rest of the tier. + +Both are only invoked from the bundle-import path. The +single-object create/update paths continue to use +`repository.save(data)` and `repository.retrieveLatestByStixId`. + +## Error model + +The pipeline never throws for per-object failures (except the +ATT&CK spec-version violation without forceImport, which is +considered fatal). Every recoverable error is appended to +`importedCollection.workspace.import_categories.errors` with a +typed `error_type` and as much context as is available. See the +[user doc](../user/stix-bundle-import.md#error-types-in-import_categorieserrors) +for the full error-type taxonomy. + +ADM validation errors are recorded in **both** branches: + +- `validateContents=true` (strict): the doc is dropped from the + bulk insert; one entry with full `details` is written. +- `validateContents=false` (fail-open, default): the doc is still + persisted with `workspace.validation` attached; an entry with + full `details` is **also** written so the import response + surfaces the failure up front. + +Both branches use `error_type: 'Validation error'` and include +the complete Zod issue list in the entry's `details` field. + +## Performance characteristics + +The pipeline scales primarily with two factors: + +1. **Number of objects in the bundle.** The MongoDB round-trips + are dominated by per-tier reads and writes, both of which are + O(1) queries per tier regardless of the number of objects in + it. The total round-trip count is ~`2 * number_of_tiers`. + +2. **Cost of the listener cascade.** Each `afterCreate` that emits + a domain event triggers a listener that fetches and updates the + referenced documents. This is currently O(refs) per source + object — if 10 analytics reference one data component, that + data component is fetched and saved 10 times. Consolidating + listener writes per target is a future optimization. + +On developer hardware, an Enterprise bundle import that previously +took 5+ minutes completes in 20-60 seconds depending on local +Mongo and CPU. + +## Files + +| Path | Role | +|---|---| +| [`app/services/stix/collection-bundles-service/import-bundle.js`](../../app/services/stix/collection-bundles-service/import-bundle.js) | The pipeline itself. `processObjects`, `processTier`, `runWithConcurrency`. | +| [`app/services/stix/collection-bundles-service/bundle-helpers.js`](../../app/services/stix/collection-bundles-service/bundle-helpers.js) | Constants for `importErrors`, `forceImportParameters`, `errors`. | +| [`app/services/meta-classes/base.service.js`](../../app/services/meta-classes/base.service.js) | `composeForImport` (validation + workspace fields) and `_createFromImport` (single-object path). | +| [`app/repository/_base.repository.js`](../../app/repository/_base.repository.js) | `retrieveAllByStixIds` and `saveMany`. | +| [`app/lib/validation-schemas.js`](../../app/lib/validation-schemas.js) | ADM schema selection with cached `.partial()` for WIP objects. | +| [`app/lib/import-safety.js`](../../app/lib/import-safety.js) | `deepFreezeStix` enforcement helper. | diff --git a/docs/user/stix-bundle-import.md b/docs/user/stix-bundle-import.md new file mode 100644 index 00000000..1d6e6f1c --- /dev/null +++ b/docs/user/stix-bundle-import.md @@ -0,0 +1,171 @@ +# Importing a STIX Bundle + +The ATT&CK Workbench REST API can ingest a STIX 2.1 bundle that wraps an +ATT&CK collection (`x-mitre-collection`). The endpoint persists every +object in the bundle, populates ATT&CK Workbench metadata on each one, +and returns a single response document summarizing what happened. + +Bundles are imported **as-is**: the `stix` content of every persisted +object matches what was in the bundle, byte-for-byte. Workbench adds +metadata in a separate `workspace` namespace but does not alter the +bundle's STIX fields. This guarantee holds even when Workbench's +hooks would otherwise rewrite fields like `stix.name`, +`stix.aliases`, or `stix.external_references` on a user-driven POST. + +## Usage + +``` +POST /api/collection-bundles +Content-Type: application/json +``` + +**Request body**: a STIX 2.1 bundle whose `objects` array contains +exactly one `x-mitre-collection` object and any number of +collection-member objects. + +### Query parameters + +| Parameter | Type | Default | Effect | +|---|---|---|---| +| `previewOnly` | boolean | `false` | Process the bundle and return the would-be import response without persisting anything. | +| `checkOnly` | boolean | `false` | Synonymous with `previewOnly` for backwards compatibility. | +| `validateContents` | boolean | `false` | When `true`, ADM validation is strict — see [Validation modes](#validation-modes). When `false` (default), validation runs but failures are recorded rather than rejected. | +| `forceImport` | repeated string | (none) | Allow import to proceed past specific blocking conditions. Supported values: `attack-spec-version-violations`, `duplicate-collection`. | + +## Validation modes + +Every object in the bundle is validated against the ATT&CK Data Model +(ADM) schemas during import — provided that ADM validation is enabled +in the deployment (`VALIDATE_WITH_ADM_SCHEMAS`, default `true`). +Objects marked `revoked: true` or `x_mitre_deprecated: true` skip +validation; everything else is checked. + +The behavior on validation failure depends on `validateContents`: + +### Default mode — `validateContents=false` (or unset) + +**Fail-open.** A failing object is still persisted, but two pieces of +state are written so the failure is visible: + +1. **On the document itself**, in `workspace.validation`: + + ```jsonc + "workspace": { + "validation": { + "errors": [ + { "message": "type is Invalid literal value", "path": ["type"], "code": "invalid_literal" } + ], + "attack_spec_version": "3.3.0", + "adm_version": "4.11.7", + "validated_at": "2026-05-14T12:00:00.000Z" + } + } + ``` + +2. **On the collection's import response**, in + `workspace.import_categories.errors`, one entry per failing + object: + + ```jsonc + { + "object_ref": "attack-pattern--1234...", + "object_modified": "2024-10-15T14:00:21.000Z", + "error_type": "Validation error", + "error_message": "3 ADM validation error(s): path.x is invalid_type; ...", + "details": [ + { "message": "x_mitre_platforms is Required", "path": ["x_mitre_platforms"], "code": "invalid_type" }, + { "message": "...", "path": ["..."], "code": "..." } + ] + } + ``` + + The `details` array preserves the full Zod issue list so callers + can act on the failure without fetching every object individually. + +Fail-open mode is the default because bundle import is the primary +way that legacy and version-skewed content enters the system; aborting +on every ADM mismatch would make migrations between ATT&CK versions +impossible. + +### Strict mode — `validateContents=true` + +A failing object is **not** persisted. The entry in +`import_categories.errors` is written exactly as above (with full +`details`), but the document is dropped from the bulk insert. Other +objects in the same bundle continue to be processed. The import as a +whole succeeds; only the failing objects are missing from the database. + +Use strict mode when you want the import to be a clean filter: only +objects that pass current ADM validation will be persisted, and the +import response tells you exactly which ones were rejected and why. + +## Reading the import response + +The response is the persisted `x-mitre-collection` document. Look at +`workspace.import_categories`: + +```jsonc +"workspace": { + "imported": "2026-05-14T12:00:00.000Z", + "import_categories": { + "additions": [ /* stixIds of new objects */ ], + "changes": [ /* stixIds where x_mitre_version increased */ ], + "minor_changes": [ /* stixIds where only modified changed */ ], + "revocations": [ /* stixIds newly revoked in this version */ ], + "deprecations": [ /* stixIds newly deprecated in this version */ ], + "supersedes_user_edits": [ ], + "supersedes_collection_changes": [ ], + "duplicates": [ /* stixIds whose modified matches an existing version */ ], + "out_of_date": [ /* stixIds where existing modified is newer */ ], + "errors": [ /* see below */ ] + }, + "import_references": { + "additions": [ /* source_names of newly inserted references */ ], + "changes": [ /* source_names of updated references */ ], + "duplicates": [ ] + } +} +``` + +### Error types in `import_categories.errors` + +| `error_type` | Meaning | +|---|---| +| `Validation error` | The object failed ADM schema validation. The `details` array contains every `{message, path, code}`. In fail-open mode the object is still persisted; in strict mode it is dropped. | +| `Save error` | A persistence failure (e.g. MongoDB duplicate-key race). | +| `Retrieval error` | The bulk pre-fetch for the tier failed; no object in that tier was processed. | +| `Not in contents` | The object exists in the bundle's `objects` array but is missing from the collection's `x_mitre_contents`. It is still persisted; this is a warning. | +| `Missing object` | The object is listed in `x_mitre_contents` but is missing from the bundle. | +| `Unknown object type` | The object's `type` is not one the server knows how to persist. | +| `ATT&CK Spec version violation` | The object's `x_mitre_attack_spec_version` is later than the server supports. Without `forceImport=attack-spec-version-violations`, the entire import aborts when this occurs. | +| `Duplicate collection object` | A second `x-mitre-collection` matching an already-persisted collection was found. Without `forceImport=duplicate-collection`, the import aborts. | + +## Re-importing the same bundle + +Re-importing a bundle whose collection (`x-mitre-collection`) already +exists at the same `modified` timestamp augments the existing +collection rather than creating a duplicate: the new import's +`import_categories` is appended to `workspace.reimports` and member +objects are upserted version-by-version. Members that match an +existing version exactly (same `modified`) appear in `duplicates`; +members whose `modified` is newer than what's stored appear in +`additions` / `changes` / `minor_changes` as appropriate. + +## Performance + +For very large bundles (the Enterprise ATT&CK bundle ships ~5,000 +objects), the import runs in tier-batched parallel passes — see +[`docs/developer/stix-bundle-import-pipeline.md`](../developer/stix-bundle-import-pipeline.md) +for the implementation detail. Typical wall-clock times on developer +hardware: + +| Bundle | Approximate import time | +|---|---| +| Mobile ATT&CK | < 5 seconds | +| ICS ATT&CK | < 5 seconds | +| Enterprise ATT&CK | 20-60 seconds (depending on hardware and Mongo configuration) | + +If the request seems hung past a minute, check the server logs for +`Import Bundle Error` entries — most often the cause is a deeper +issue (e.g. a Mongo connection problem) rather than continued +processing. From b86d506587259819d61e0e462901479b8c55009e Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 11:56:23 -0400 Subject: [PATCH 6/7] fix(import-bundle): skip x_mitre_contents check for marking-definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The export side (stix-bundles-service) deliberately omits marking-definitions from `x_mitre_contents` — they are referenced on the collection via `object_marking_refs` instead. The import side did not mirror that convention, so every well-formed bundle produced one bogus "Not in contents" warning per marking-definition. Extend the existing `x-mitre-collection` exemption in processTier to also cover `marking-definition`. No change to map construction or duplicate-detection — only the false-positive warning is suppressed. --- .../collection-bundles-service/import-bundle.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/app/services/stix/collection-bundles-service/import-bundle.js b/app/services/stix/collection-bundles-service/import-bundle.js index 74f58a94..60924e63 100644 --- a/app/services/stix/collection-bundles-service/import-bundle.js +++ b/app/services/stix/collection-bundles-service/import-bundle.js @@ -222,9 +222,20 @@ async function processTier(type, objects, ctx) { // compose-and-insert. const eligible = []; for (const importObject of objects) { + // The contents-map check verifies that every imported object is also + // listed in the collection's `x_mitre_contents`. Two types are exempt + // because the export side (stix-bundles-service) deliberately omits them + // from `x_mitre_contents`: + // - `x-mitre-collection`: the collection is the container; it doesn't + // list itself. + // - `marking-definition`: marking-defs are referenced by the + // collection via `object_marking_refs` instead. Treating their + // absence from x_mitre_contents as a warning produces a false + // positive on every well-formed bundle (one per marking-def). if ( !contentsMap.delete(makeKeyFromObject(importObject)) && - importObject.type !== types.Collection + importObject.type !== types.Collection && + importObject.type !== types.MarkingDefinition ) { const importError = { object_ref: importObject.id, From 67851850d6a7818ed52941ebbc17918d56d613da Mon Sep 17 00:00:00 2001 From: Sean Sica <23294618+seansica@users.noreply.github.com> Date: Thu, 14 May 2026 12:31:11 -0400 Subject: [PATCH 7/7] fix(import-bundle): surface Mongoose validation errors on bulk insert repository.saveMany() delegates to Model.insertMany() with ordered:false. Mongoose's default behavior is to silently drop docs that fail schema validation, so malformed documents were vanishing from the bulk path without producing any entry in import_categories.errors. The old per-object save() path surfaced the same failure as a "Save error", which two regression tests relied on. Pass throwOnValidationError:true so Mongoose throws MongooseBulkWriteError after attempting the valid docs, then walk err.results in order to map each failure back to its source index and produce a {index, message, code} entry alongside the existing MongoBulkWriteError handling. The caller in processTier is unchanged. --- app/repository/_base.repository.js | 53 ++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/app/repository/_base.repository.js b/app/repository/_base.repository.js index 85f80f04..fbd7abc7 100644 --- a/app/repository/_base.repository.js +++ b/app/repository/_base.repository.js @@ -401,9 +401,16 @@ class BaseRepository extends AbstractRepository { /** * Bulk insert. Used by the STIX bundle import path to avoid one round-trip - * per object. `ordered: false` keeps MongoDB inserting the remaining docs - * after an individual failure; per-doc errors are returned on the thrown - * BulkWriteError's `writeErrors` for the caller to fold into import errors. + * per object. + * + * `ordered: false` keeps MongoDB inserting the remaining docs after an + * individual failure. `throwOnValidationError: true` is critical: without + * it, Mongoose's `insertMany` silently drops documents that fail schema + * validation (e.g. a required field is missing) and reports success for + * the remaining valid docs — leaving the caller unable to record per-object + * import errors. With the flag, Mongoose throws a `MongooseBulkWriteError` + * after attempting the valid docs, carrying both the validation errors and + * the `results` array we use to map each failure back to its source index. * * Discriminator-aware: each child model's `insertMany` sets the correct * `__t` discriminator key automatically, so callers should invoke this on @@ -413,17 +420,51 @@ class BaseRepository extends AbstractRepository { * @param {Object} [options] * @param {boolean} [options.ordered=false] - Stop on first error if true * @returns {Promise<{ inserted: Array, errors: Array<{ index, message, code }> }>} + * `errors[].index` is the index into the input `dataArr`; the caller can + * use it to recover the original document for error reporting. */ async saveMany(dataArr, { ordered = false } = {}) { if (!Array.isArray(dataArr) || dataArr.length === 0) { return { inserted: [], errors: [] }; } try { - const inserted = await this.model.insertMany(dataArr, { ordered }); + const inserted = await this.model.insertMany(dataArr, { + ordered, + throwOnValidationError: true, + }); return { inserted, errors: [] }; } catch (err) { - // Mongoose BulkWriteError surfaces partial success when ordered:false. - // Successful inserts are on err.insertedDocs; failures on err.writeErrors. + // MongooseBulkWriteError: one or more docs failed Mongoose schema + // validation. `err.results` mirrors the input order — successfully + // inserted entries are Mongoose documents (identifiable by `_id`), + // while failures are the original input objects (no `_id`). Walking + // the results in order, the k-th failure corresponds to + // `err.validationErrors[k]` (Mongoose pre-sorts validationErrors by + // source index). + if (err?.name === 'MongooseBulkWriteError') { + const errors = []; + const inserted = []; + const validationErrors = err.validationErrors || []; + const results = err.results || []; + let veIdx = 0; + for (let i = 0; i < results.length; i++) { + const r = results[i]; + if (r && r._id) { + inserted.push(r); + } else { + const ve = validationErrors[veIdx++]; + errors.push({ + index: i, + message: ve?.message ?? 'Mongoose validation error', + code: ve?.name || 'ValidationError', + }); + } + } + return { inserted, errors }; + } + // MongoDB driver-side failure (e.g., duplicate-key race). Per-doc + // errors are on `err.writeErrors`; successful inserts on + // `err.insertedDocs`. if (err?.name === 'MongoBulkWriteError' || err?.writeErrors) { const errors = (err.writeErrors || []).map((we) => ({ index: we.index ?? we.err?.index,