From 86e0348c58aa936542d83b22c361886a69e388b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 12 Mar 2026 23:26:43 +0100 Subject: [PATCH 01/10] feat(actor): validate all schemas in actor.json (#1036) --- package.json | 1 + src/commands/_register.ts | 4 +- src/commands/validate-schema.ts | 157 ++++++++++-- src/lib/input_schema.ts | 37 +++ test/local/commands/validate-schema.test.ts | 252 ++++++++++++++++++-- test/local/lib/command-framework.test.ts | 4 +- yarn.lock | 1 + 7 files changed, 416 insertions(+), 40 deletions(-) diff --git a/package.json b/package.json index 3f0db2e9f..13e759b30 100644 --- a/package.json +++ b/package.json @@ -68,6 +68,7 @@ "@apify/actor-templates": "^0.1.5", "@apify/consts": "^2.36.0", "@apify/input_schema": "^3.17.0", + "@apify/json_schemas": "^0.13.0", "@apify/utilities": "^2.18.0", "@crawlee/memory-storage": "^3.12.0", "@inquirer/core": "^11.0.0", diff --git a/src/commands/_register.ts b/src/commands/_register.ts index 16657acd8..93c266e05 100644 --- a/src/commands/_register.ts +++ b/src/commands/_register.ts @@ -32,7 +32,7 @@ import { RunsIndexCommand } from './runs/_index.js'; import { SecretsIndexCommand } from './secrets/_index.js'; import { TasksIndexCommand } from './task/_index.js'; import { TelemetryIndexCommand } from './telemetry/_index.js'; -import { ValidateInputSchemaCommand } from './validate-schema.js'; +import { ValidateSchemaCommand } from './validate-schema.js'; export const apifyCommands = [ // namespaces @@ -62,7 +62,7 @@ export const apifyCommands = [ TopLevelPullCommand, ToplevelPushCommand, RunCommand, - ValidateInputSchemaCommand, + ValidateSchemaCommand, HelpCommand, // test commands diff --git a/src/commands/validate-schema.ts b/src/commands/validate-schema.ts index 71cf4ae54..a5d95e540 100644 --- a/src/commands/validate-schema.ts +++ b/src/commands/validate-schema.ts @@ -1,41 +1,160 @@ import process from 'node:process'; +import { validateInputSchema } from '@apify/input_schema'; + import { ApifyCommand } from '../lib/command-framework/apify-command.js'; import { Args } from '../lib/command-framework/args.js'; -import { LOCAL_CONFIG_PATH } from '../lib/consts.js'; -import { readAndValidateInputSchema } from '../lib/input_schema.js'; -import { success } from '../lib/outputs.js'; +import { CommandExitCodes, LOCAL_CONFIG_PATH } from '../lib/consts.js'; +import { + readDatasetSchema, + readInputSchema, + readOutputSchema, + readStorageSchema, + validateDatasetSchema, + validateKvsSchema, + validateOutputSchema, +} from '../lib/input_schema.js'; +import { error, info, success } from '../lib/outputs.js'; +import { Ajv2019 } from '../lib/utils.js'; -export class ValidateInputSchemaCommand extends ApifyCommand { +export class ValidateSchemaCommand extends ApifyCommand { static override name = 'validate-schema' as const; - static override description = `Validates Actor input schema from one of these locations (in priority order): - 1. Object in '${LOCAL_CONFIG_PATH}' under "input" key - 2. JSON file path in '${LOCAL_CONFIG_PATH}' "input" key - 3. .actor/INPUT_SCHEMA.json - 4. INPUT_SCHEMA.json + static override description = `Validates Actor schemas. + +When a path argument is provided, validates only the input schema at that path. -Optionally specify custom schema path to validate.`; +When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': + - Input schema (from "input" key or default locations) + - Dataset schema (from "storages.dataset") + - Output schema (from "output") + - Key-Value Store schema (from "storages.keyValueStore")`; static override args = { path: Args.string({ required: false, - description: 'Optional path to your INPUT_SCHEMA.json file. If not provided ./INPUT_SCHEMA.json is used.', + description: 'Optional path to your INPUT_SCHEMA.json file. If not provided, validates all schemas in actor.json.', }), }; static override hiddenAliases = ['vis']; async run() { - await readAndValidateInputSchema({ - forcePath: this.args.path, - cwd: process.cwd(), - getMessage: (path) => - path - ? `Validating input schema at ${path}` - : `Validating input schema embedded in '${LOCAL_CONFIG_PATH}'`, - }); + if (this.args.path) { + await this.validateInputSchemaAtPath(this.args.path); + return; + } + + await this.validateAllSchemas(); + } + + private async validateInputSchemaAtPath(forcePath: string) { + const { inputSchema } = await readInputSchema({ forcePath, cwd: process.cwd() }); + + if (!inputSchema) { + throw new Error(`Input schema has not been found at ${forcePath}.`); + } + + info({ message: `Validating input schema at ${forcePath}` }); + + const validator = new Ajv2019({ strict: false }); + validateInputSchema(validator, inputSchema); success({ message: 'Input schema is valid.' }); } + + private async validateAllSchemas() { + const cwd = process.cwd(); + let foundAny = false; + let hasErrors = false; + + // Input schema + const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd }); + + if (inputSchema) { + foundAny = true; + + const location = inputSchemaPath + ? `at ${inputSchemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating input schema ${location}` }); + + try { + const validator = new Ajv2019({ strict: false }); + validateInputSchema(validator, inputSchema); + success({ message: 'Input schema is valid.' }); + } catch (err) { + hasErrors = true; + error({ message: (err as Error).message }); + } + } + + // Dataset schema + const datasetResult = readDatasetSchema({ cwd }); + + if (datasetResult) { + foundAny = true; + + const location = datasetResult.datasetSchemaPath + ? `at ${datasetResult.datasetSchemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating Dataset schema ${location}` }); + + try { + validateDatasetSchema(datasetResult.datasetSchema); + success({ message: 'Dataset schema is valid.' }); + } catch (err) { + hasErrors = true; + error({ message: (err as Error).message }); + } + } + + // Output schema + const outputResult = readOutputSchema({ cwd }); + + if (outputResult) { + foundAny = true; + + const location = outputResult.outputSchemaPath + ? `at ${outputResult.outputSchemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating Output schema ${location}` }); + + try { + validateOutputSchema(outputResult.outputSchema); + success({ message: 'Output schema is valid.' }); + } catch (err) { + hasErrors = true; + error({ message: (err as Error).message }); + } + } + + // Key-Value Store schema + const kvsResult = readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store' }); + + if (kvsResult) { + foundAny = true; + + const location = kvsResult.schemaPath + ? `at ${kvsResult.schemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating Key-Value Store schema ${location}` }); + + try { + validateKvsSchema(kvsResult.schema); + success({ message: 'Key-Value Store schema is valid.' }); + } catch (err) { + hasErrors = true; + error({ message: (err as Error).message }); + } + } + + if (!foundAny) { + throw new Error(`No schemas found. Make sure '${LOCAL_CONFIG_PATH}' exists and defines at least one schema.`); + } + + if (hasErrors) { + process.exitCode = CommandExitCodes.BuildFailed; + } + } } diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index 8279fab83..49009df75 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -3,6 +3,11 @@ import { join } from 'node:path'; import { KEY_VALUE_STORE_KEYS } from '@apify/consts'; import { validateInputSchema } from '@apify/input_schema'; +import { + getDatasetSchemaValidator, + getKeyValueStoreSchemaValidator, + getOutputSchemaValidator, +} from '@apify/json_schemas'; import { ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_PATH } from './consts.js'; import { info, warning } from './outputs.js'; @@ -253,6 +258,38 @@ export const getDefaultsFromInputSchema = (inputSchema: any) => { return defaults; }; +function formatSchemaValidationErrors(errors: import('ajv').ErrorObject[], schemaName: string): string { + const details = errors + .map((err) => { + const path = err.instancePath ? ` at ${err.instancePath}` : ''; + return ` - ${err.message}${path}`; + }) + .join('\n'); + + return `${schemaName} schema is not valid:\n${details}`; +} + +export function validateDatasetSchema(schema: Record): void { + const validate = getDatasetSchemaValidator(); + if (!validate(schema)) { + throw new Error(formatSchemaValidationErrors(validate.errors!, 'Dataset')); + } +} + +export function validateOutputSchema(schema: Record): void { + const validate = getOutputSchemaValidator(); + if (!validate(schema)) { + throw new Error(formatSchemaValidationErrors(validate.errors!, 'Output')); + } +} + +export function validateKvsSchema(schema: Record): void { + const validate = getKeyValueStoreSchemaValidator(); + if (!validate(schema)) { + throw new Error(formatSchemaValidationErrors(validate.errors!, 'Key-Value Store')); + } +} + // Lots of code copied from @apify-packages/actor, this really should be moved to the shared input_schema package export const getAjvValidator = (inputSchema: any, ajvInstance: import('ajv').Ajv) => { const copyOfSchema = structuredClone(inputSchema); diff --git a/test/local/commands/validate-schema.test.ts b/test/local/commands/validate-schema.test.ts index d9fa9eeab..e80ea44e7 100644 --- a/test/local/commands/validate-schema.test.ts +++ b/test/local/commands/validate-schema.test.ts @@ -1,38 +1,256 @@ -import { ValidateInputSchemaCommand } from '../../../src/commands/validate-schema.js'; +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { basename, join } from 'node:path'; + +import { ValidateSchemaCommand } from '../../../src/commands/validate-schema.js'; import { testRunCommand } from '../../../src/lib/command-framework/apify-command.js'; +import { validDatasetSchemaPath } from '../../__setup__/dataset-schemas/paths.js'; import { useConsoleSpy } from '../../__setup__/hooks/useConsoleSpy.js'; +import { useTempPath } from '../../__setup__/hooks/useTempPath.js'; import { invalidInputSchemaPath, unparsableInputSchemaPath, validInputSchemaPath, } from '../../__setup__/input-schemas/paths.js'; +import { validKvsSchemaPath } from '../../__setup__/kvs-schemas/paths.js'; +import { validOutputSchemaPath } from '../../__setup__/output-schemas/paths.js'; + +const { lastErrorMessage, logMessages } = useConsoleSpy(); + +async function setupActorConfig( + basePath: string, + { + inputSchema, + datasetSchemaRef, + outputSchemaRef, + kvsSchemaRef, + }: { + inputSchema?: Record; + datasetSchemaRef?: string | Record; + outputSchemaRef?: string | Record; + kvsSchemaRef?: string | Record; + }, +) { + const actorDir = join(basePath, '.actor'); + await mkdir(actorDir, { recursive: true }); + + const minimalInput = inputSchema ?? { + title: 'Test', + type: 'object', + schemaVersion: 1, + properties: { + foo: { title: 'Foo', description: 'A foo field', type: 'string', default: 'bar', editor: 'textfield' }, + }, + }; + + await writeFile(join(actorDir, 'input_schema.json'), JSON.stringify(minimalInput, null, '\t')); + + const actorJson: Record = { + actorSpecification: 1, + name: 'test-actor', + version: '0.1', + input: './input_schema.json', + }; -const { lastErrorMessage } = useConsoleSpy(); + const storages: Record = {}; + + if (datasetSchemaRef !== undefined) { + if (typeof datasetSchemaRef === 'string') { + const content = await readFile(datasetSchemaRef, 'utf-8'); + const fileName = basename(datasetSchemaRef); + await writeFile(join(actorDir, fileName), content); + storages.dataset = `./${fileName}`; + } else { + storages.dataset = datasetSchemaRef; + } + } + + if (kvsSchemaRef !== undefined) { + if (typeof kvsSchemaRef === 'string') { + const content = await readFile(kvsSchemaRef, 'utf-8'); + const fileName = `kvs-${basename(kvsSchemaRef)}`; + await writeFile(join(actorDir, fileName), content); + storages.keyValueStore = `./${fileName}`; + } else { + storages.keyValueStore = kvsSchemaRef; + } + } + + if (Object.keys(storages).length > 0) { + actorJson.storages = storages; + } + + if (outputSchemaRef !== undefined) { + if (typeof outputSchemaRef === 'string') { + const content = await readFile(outputSchemaRef, 'utf-8'); + const fileName = `output-${basename(outputSchemaRef)}`; + await writeFile(join(actorDir, fileName), content); + actorJson.output = `./${fileName}`; + } else { + actorJson.output = outputSchemaRef; + } + } + + await writeFile(join(actorDir, 'actor.json'), JSON.stringify(actorJson, null, '\t')); +} describe('apify validate-schema', () => { - it('should correctly validate schema 1', async () => { - await testRunCommand(ValidateInputSchemaCommand, { - args_path: validInputSchemaPath, + describe('with path argument (backward compat)', () => { + it('should correctly validate schema 1', async () => { + await testRunCommand(ValidateSchemaCommand, { + args_path: validInputSchemaPath, + }); + + expect(lastErrorMessage()).toMatch(/is valid/); }); - expect(lastErrorMessage()).toMatch(/is valid/); - }); + it('should correctly validate schema 2', async () => { + await testRunCommand(ValidateSchemaCommand, { + args_path: invalidInputSchemaPath, + }); - it('should correctly validate schema 2', async () => { - await testRunCommand(ValidateInputSchemaCommand, { - args_path: invalidInputSchemaPath, + expect(lastErrorMessage()).to.contain( + 'Field schema.properties.queries.editor must be equal to one of the allowed values', + ); }); - expect(lastErrorMessage()).to.contain( - 'Field schema.properties.queries.editor must be equal to one of the allowed values', - ); + it('should correctly validate schema 3', async () => { + await testRunCommand(ValidateSchemaCommand, { + args_path: unparsableInputSchemaPath, + }); + + expect(lastErrorMessage()).to.contain.oneOf([ + 'Unexpected token }', + "Expected ',' or ']' after array element", + ]); + }); }); - it('should correctly validate schema 3', async () => { - await testRunCommand(ValidateInputSchemaCommand, { - args_path: unparsableInputSchemaPath, + describe('without path argument (all schemas)', () => { + const { joinPath, beforeAllCalls, afterAllCalls } = useTempPath('validate-schema', { + create: true, + remove: true, + cwd: true, + cwdParent: false, + }); + + beforeEach(async () => { + await beforeAllCalls(); + }); + + afterEach(async () => { + await afterAllCalls(); + }); + + it('should validate all schemas when no path is provided', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: validDatasetSchemaPath, + outputSchemaRef: validOutputSchemaPath, + kvsSchemaRef: validKvsSchemaPath, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Dataset schema is valid'); + expect(allMessages).toContain('Output schema is valid'); + expect(allMessages).toContain('Key-Value Store schema is valid'); }); - expect(lastErrorMessage()).to.contain.oneOf(['Unexpected token }', "Expected ',' or ']' after array element"]); + it('should skip schemas not defined in actor.json', async () => { + await setupActorConfig(joinPath(), {}); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).not.toContain('Dataset'); + expect(allMessages).not.toContain('Output'); + expect(allMessages).not.toContain('Key-Value Store'); + }); + + it('should report error for invalid dataset schema', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: { + // missing actorSpecification — invalid + fields: {}, + views: {}, + }, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Dataset schema is not valid'); + }); + + it('should report error for invalid output schema', async () => { + await setupActorConfig(joinPath(), { + outputSchemaRef: { + // missing actorOutputSchemaVersion — invalid + properties: {}, + }, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Output schema is not valid'); + }); + + it('should report error for invalid KVS schema', async () => { + await setupActorConfig(joinPath(), { + kvsSchemaRef: { + // missing actorKeyValueStoreSchemaVersion — invalid + collections: {}, + }, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Key-Value Store schema is not valid'); + }); + + it('should only validate input schema when path arg is provided', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: validDatasetSchemaPath, + outputSchemaRef: validOutputSchemaPath, + kvsSchemaRef: validKvsSchemaPath, + }); + + await testRunCommand(ValidateSchemaCommand, { + args_path: validInputSchemaPath, + }); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).not.toContain('Dataset'); + expect(allMessages).not.toContain('Output'); + expect(allMessages).not.toContain('Key-Value Store'); + }); + + it('should continue validating remaining schemas when one fails', async () => { + await setupActorConfig(joinPath(), { + datasetSchemaRef: { + // invalid dataset schema + fields: {}, + views: {}, + }, + outputSchemaRef: validOutputSchemaPath, + kvsSchemaRef: validKvsSchemaPath, + }); + + await testRunCommand(ValidateSchemaCommand, {}); + + const allMessages = logMessages.error.join('\n'); + expect(allMessages).toContain('Input schema is valid'); + expect(allMessages).toContain('Dataset schema is not valid'); + expect(allMessages).toContain('Output schema is valid'); + expect(allMessages).toContain('Key-Value Store schema is valid'); + }); }); }); diff --git a/test/local/lib/command-framework.test.ts b/test/local/lib/command-framework.test.ts index c254e6de7..b319330b8 100644 --- a/test/local/lib/command-framework.test.ts +++ b/test/local/lib/command-framework.test.ts @@ -1,10 +1,10 @@ -import { ValidateInputSchemaCommand } from '../../../src/commands/validate-schema.js'; +import { ValidateSchemaCommand } from '../../../src/commands/validate-schema.js'; import { testRunCommand } from '../../../src/lib/command-framework/apify-command.js'; import { validInputSchemaPath } from '../../__setup__/input-schemas/paths.js'; describe('Command Framework', () => { test('testRunCommand helper works', async () => { - await testRunCommand(ValidateInputSchemaCommand, { + await testRunCommand(ValidateSchemaCommand, { args_path: validInputSchemaPath, }); }); diff --git a/yarn.lock b/yarn.lock index d67d0bf97..d5ec4e4b3 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2250,6 +2250,7 @@ __metadata: "@apify/consts": "npm:^2.36.0" "@apify/eslint-config": "npm:^1.0.0" "@apify/input_schema": "npm:^3.17.0" + "@apify/json_schemas": "npm:^0.13.0" "@apify/tsconfig": "npm:^0.1.1" "@apify/utilities": "npm:^2.18.0" "@biomejs/biome": "npm:^2.0.0" From aee175d6a97c4fe1499b53726a32e66ec1a18a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 00:47:50 +0200 Subject: [PATCH 02/10] fixes --- src/commands/validate-schema.ts | 110 +++++++++++--------------------- src/lib/input_schema.ts | 13 +++- 2 files changed, 49 insertions(+), 74 deletions(-) diff --git a/src/commands/validate-schema.ts b/src/commands/validate-schema.ts index a5d95e540..0b35d44c4 100644 --- a/src/commands/validate-schema.ts +++ b/src/commands/validate-schema.ts @@ -6,9 +6,8 @@ import { ApifyCommand } from '../lib/command-framework/apify-command.js'; import { Args } from '../lib/command-framework/args.js'; import { CommandExitCodes, LOCAL_CONFIG_PATH } from '../lib/consts.js'; import { - readDatasetSchema, + readAndValidateInputSchema, readInputSchema, - readOutputSchema, readStorageSchema, validateDatasetSchema, validateKvsSchema, @@ -49,16 +48,11 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': } private async validateInputSchemaAtPath(forcePath: string) { - const { inputSchema } = await readInputSchema({ forcePath, cwd: process.cwd() }); - - if (!inputSchema) { - throw new Error(`Input schema has not been found at ${forcePath}.`); - } - - info({ message: `Validating input schema at ${forcePath}` }); - - const validator = new Ajv2019({ strict: false }); - validateInputSchema(validator, inputSchema); + await readAndValidateInputSchema({ + forcePath, + cwd: process.cwd(), + getMessage: (path) => `Validating input schema at ${path ?? forcePath}`, + }); success({ message: 'Input schema is valid.' }); } @@ -68,82 +62,54 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': let foundAny = false; let hasErrors = false; - // Input schema - const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd }); + // Input schema — not using readAndValidateInputSchema here because it throws + // when no schema is found; in the all-schemas scan, a missing input schema + // should be silently skipped, not treated as an error. + try { + const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd }); - if (inputSchema) { - foundAny = true; + if (inputSchema) { + foundAny = true; - const location = inputSchemaPath - ? `at ${inputSchemaPath}` - : `embedded in '${LOCAL_CONFIG_PATH}'`; - info({ message: `Validating input schema ${location}` }); + const location = inputSchemaPath + ? `at ${inputSchemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating input schema ${location}` }); - try { const validator = new Ajv2019({ strict: false }); validateInputSchema(validator, inputSchema); success({ message: 'Input schema is valid.' }); - } catch (err) { - hasErrors = true; - error({ message: (err as Error).message }); } - } - - // Dataset schema - const datasetResult = readDatasetSchema({ cwd }); - - if (datasetResult) { + } catch (err) { foundAny = true; - - const location = datasetResult.datasetSchemaPath - ? `at ${datasetResult.datasetSchemaPath}` - : `embedded in '${LOCAL_CONFIG_PATH}'`; - info({ message: `Validating Dataset schema ${location}` }); - - try { - validateDatasetSchema(datasetResult.datasetSchema); - success({ message: 'Dataset schema is valid.' }); - } catch (err) { - hasErrors = true; - error({ message: (err as Error).message }); - } + hasErrors = true; + error({ message: (err as Error).message }); } - // Output schema - const outputResult = readOutputSchema({ cwd }); - - if (outputResult) { - foundAny = true; - - const location = outputResult.outputSchemaPath - ? `at ${outputResult.outputSchemaPath}` - : `embedded in '${LOCAL_CONFIG_PATH}'`; - info({ message: `Validating Output schema ${location}` }); + // Storage schemas (Dataset, Output, Key-Value Store) + const storageSchemas = [ + { label: 'Dataset', read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset' }), validate: validateDatasetSchema }, + { label: 'Output', read: () => readStorageSchema({ cwd, key: 'output', label: 'Output', getRef: (config) => config?.output }), validate: validateOutputSchema }, + { label: 'Key-Value Store', read: () => readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store' }), validate: validateKvsSchema }, + ]; + for (const { label, read, validate } of storageSchemas) { try { - validateOutputSchema(outputResult.outputSchema); - success({ message: 'Output schema is valid.' }); - } catch (err) { - hasErrors = true; - error({ message: (err as Error).message }); - } - } + const result = read(); - // Key-Value Store schema - const kvsResult = readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store' }); + if (result) { + foundAny = true; - if (kvsResult) { - foundAny = true; - - const location = kvsResult.schemaPath - ? `at ${kvsResult.schemaPath}` - : `embedded in '${LOCAL_CONFIG_PATH}'`; - info({ message: `Validating Key-Value Store schema ${location}` }); + const location = result.schemaPath + ? `at ${result.schemaPath}` + : `embedded in '${LOCAL_CONFIG_PATH}'`; + info({ message: `Validating ${label} schema ${location}` }); - try { - validateKvsSchema(kvsResult.schema); - success({ message: 'Key-Value Store schema is valid.' }); + validate(result.schema); + success({ message: `${label} schema is valid.` }); + } } catch (err) { + foundAny = true; hasErrors = true; error({ message: (err as Error).message }); } diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index f25884acb..b365cb600 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -39,7 +39,7 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; const localConfig = getLocalConfig(cwd); - if (typeof localConfig?.input === 'object') { + if (typeof localConfig?.input === 'object' && localConfig.input !== null) { return { inputSchema: localConfig.input as Record, inputSchemaPath: null, @@ -48,8 +48,17 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; if (typeof localConfig?.input === 'string') { const fullPath = join(cwd, ACTOR_SPECIFICATION_FOLDER, localConfig.input); + const schema = getJsonFileContent(fullPath); + + if (!schema) { + warning({ + message: `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + }); + return { inputSchema: null, inputSchemaPath: fullPath }; + } + return { - inputSchema: getJsonFileContent(fullPath), + inputSchema: schema, inputSchemaPath: fullPath, }; } From 9915073248ce4abbd537e1d15d25bb05f908fdf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 01:03:38 +0200 Subject: [PATCH 03/10] fix --- src/lib/input_schema.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index b365cb600..287abc063 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -51,10 +51,9 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; const schema = getJsonFileContent(fullPath); if (!schema) { - warning({ - message: `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, - }); - return { inputSchema: null, inputSchemaPath: fullPath }; + throw new Error( + `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + ); } return { @@ -151,10 +150,9 @@ export const readStorageSchema = ({ const schema = getJsonFileContent(fullPath); if (!schema) { - warning({ - message: `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, - }); - return null; + throw new Error( + `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + ); } return { From 691f51d55fb55a9f830a41075c2f5428f78bd832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 13:55:27 +0200 Subject: [PATCH 04/10] fix --- src/commands/validate-schema.ts | 10 +++++----- src/lib/input_schema.ts | 34 ++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/commands/validate-schema.ts b/src/commands/validate-schema.ts index 0b35d44c4..35181b749 100644 --- a/src/commands/validate-schema.ts +++ b/src/commands/validate-schema.ts @@ -66,7 +66,7 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': // when no schema is found; in the all-schemas scan, a missing input schema // should be silently skipped, not treated as an error. try { - const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd }); + const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd, throwOnMissing: true }); if (inputSchema) { foundAny = true; @@ -88,9 +88,9 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': // Storage schemas (Dataset, Output, Key-Value Store) const storageSchemas = [ - { label: 'Dataset', read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset' }), validate: validateDatasetSchema }, - { label: 'Output', read: () => readStorageSchema({ cwd, key: 'output', label: 'Output', getRef: (config) => config?.output }), validate: validateOutputSchema }, - { label: 'Key-Value Store', read: () => readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store' }), validate: validateKvsSchema }, + { label: 'Dataset', read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset', throwOnMissing: true }), validate: validateDatasetSchema }, + { label: 'Output', read: () => readStorageSchema({ cwd, key: 'output', label: 'Output', getRef: (config) => config?.output, throwOnMissing: true }), validate: validateOutputSchema }, + { label: 'Key-Value Store', read: () => readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store', throwOnMissing: true }), validate: validateKvsSchema }, ]; for (const { label, read, validate } of storageSchemas) { @@ -120,7 +120,7 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': } if (hasErrors) { - process.exitCode = CommandExitCodes.BuildFailed; + process.exitCode = CommandExitCodes.InvalidInput; } } } diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index 287abc063..eed86fa05 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -29,7 +29,7 @@ const DEFAULT_INPUT_SCHEMA_PATHS = [ * In such a case, path would be set to the location * where the input schema would be expected to be found (and e.g. can be created there). */ -export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; cwd: string }) => { +export const readInputSchema = async ({ forcePath, cwd, throwOnMissing = false }: { forcePath?: string; cwd: string; throwOnMissing?: boolean }) => { if (forcePath) { return { inputSchema: getJsonFileContent(forcePath), @@ -51,9 +51,20 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; const schema = getJsonFileContent(fullPath); if (!schema) { - throw new Error( - `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, - ); + if (throwOnMissing) { + throw new Error( + `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + ); + } + + warning({ + message: `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + }); + + return { + inputSchema: null, + inputSchemaPath: fullPath, + }; } return { @@ -128,11 +139,13 @@ export const readStorageSchema = ({ key, label, getRef, + throwOnMissing = false, }: { cwd: string; key: string; label: string; getRef?: (config: ReturnType) => unknown; + throwOnMissing?: boolean; }): { schema: Record; schemaPath: string | null } | null => { const localConfig = getLocalConfig(cwd); @@ -150,9 +163,16 @@ export const readStorageSchema = ({ const schema = getJsonFileContent(fullPath); if (!schema) { - throw new Error( - `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, - ); + if (throwOnMissing) { + throw new Error( + `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + ); + } + + warning({ + message: `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, + }); + return null; } return { From 840ea5406c401de80de5dffa190d025e3f24e7c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 14:19:47 +0200 Subject: [PATCH 05/10] fix --- src/commands/edit-input-schema.ts | 18 +++++++++++++----- src/commands/run.ts | 9 ++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/commands/edit-input-schema.ts b/src/commands/edit-input-schema.ts index 533ed1bf9..a50b21cbc 100644 --- a/src/commands/edit-input-schema.ts +++ b/src/commands/edit-input-schema.ts @@ -41,11 +41,19 @@ export class EditInputSchemaCommand extends ApifyCommand>; + + try { + result = await readInputSchema({ + forcePath: this.args.path, + cwd: process.cwd(), + }); + } catch (err) { + error({ message: (err as Error).message }); + return; + } + + const { inputSchema: existingSchema, inputSchemaPath } = result; if (existingSchema && !inputSchemaPath) { // If path is not returned, it means the input schema must be directly embedded as object in actor.json diff --git a/src/commands/run.ts b/src/commands/run.ts index 917f9593f..f024f5548 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -424,7 +424,14 @@ export class RunCommand extends ApifyCommand { * @param inputOverride Optional input received through command flags */ private async validateAndStoreInput(inputOverride?: { input: Record; source: string }) { - const { inputSchema } = await readInputSchema({ cwd: process.cwd() }); + let inputSchema: Record | null; + + try { + ({ inputSchema } = await readInputSchema({ cwd: process.cwd() })); + } catch (err) { + warning({ message: (err as Error).message }); + inputSchema = null; + } if (!inputSchema) { if (!inputOverride) { From e73ed244c508ad641b1b6e5e9dd78fba1279a7c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 15:21:36 +0200 Subject: [PATCH 06/10] lint fix + npm install --- src/commands/validate-schema.ts | 37 ++++++++++++++++++++++++++------- src/lib/input_schema.ts | 14 +++++++++---- yarn.lock | 10 +++++++++ 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/commands/validate-schema.ts b/src/commands/validate-schema.ts index 35181b749..757631454 100644 --- a/src/commands/validate-schema.ts +++ b/src/commands/validate-schema.ts @@ -32,7 +32,8 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': static override args = { path: Args.string({ required: false, - description: 'Optional path to your INPUT_SCHEMA.json file. If not provided, validates all schemas in actor.json.', + description: + 'Optional path to your INPUT_SCHEMA.json file. If not provided, validates all schemas in actor.json.', }), }; @@ -71,9 +72,7 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': if (inputSchema) { foundAny = true; - const location = inputSchemaPath - ? `at ${inputSchemaPath}` - : `embedded in '${LOCAL_CONFIG_PATH}'`; + const location = inputSchemaPath ? `at ${inputSchemaPath}` : `embedded in '${LOCAL_CONFIG_PATH}'`; info({ message: `Validating input schema ${location}` }); const validator = new Ajv2019({ strict: false }); @@ -88,9 +87,29 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': // Storage schemas (Dataset, Output, Key-Value Store) const storageSchemas = [ - { label: 'Dataset', read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset', throwOnMissing: true }), validate: validateDatasetSchema }, - { label: 'Output', read: () => readStorageSchema({ cwd, key: 'output', label: 'Output', getRef: (config) => config?.output, throwOnMissing: true }), validate: validateOutputSchema }, - { label: 'Key-Value Store', read: () => readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store', throwOnMissing: true }), validate: validateKvsSchema }, + { + label: 'Dataset', + read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset', throwOnMissing: true }), + validate: validateDatasetSchema, + }, + { + label: 'Output', + read: () => + readStorageSchema({ + cwd, + key: 'output', + label: 'Output', + getRef: (config) => config?.output, + throwOnMissing: true, + }), + validate: validateOutputSchema, + }, + { + label: 'Key-Value Store', + read: () => + readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store', throwOnMissing: true }), + validate: validateKvsSchema, + }, ]; for (const { label, read, validate } of storageSchemas) { @@ -116,7 +135,9 @@ When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}': } if (!foundAny) { - throw new Error(`No schemas found. Make sure '${LOCAL_CONFIG_PATH}' exists and defines at least one schema.`); + throw new Error( + `No schemas found. Make sure '${LOCAL_CONFIG_PATH}' exists and defines at least one schema.`, + ); } if (hasErrors) { diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index eed86fa05..8a603791c 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -29,7 +29,15 @@ const DEFAULT_INPUT_SCHEMA_PATHS = [ * In such a case, path would be set to the location * where the input schema would be expected to be found (and e.g. can be created there). */ -export const readInputSchema = async ({ forcePath, cwd, throwOnMissing = false }: { forcePath?: string; cwd: string; throwOnMissing?: boolean }) => { +export const readInputSchema = async ({ + forcePath, + cwd, + throwOnMissing = false, +}: { + forcePath?: string; + cwd: string; + throwOnMissing?: boolean; +}) => { if (forcePath) { return { inputSchema: getJsonFileContent(forcePath), @@ -52,9 +60,7 @@ export const readInputSchema = async ({ forcePath, cwd, throwOnMissing = false } if (!schema) { if (throwOnMissing) { - throw new Error( - `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`, - ); + throw new Error(`Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`); } warning({ diff --git a/yarn.lock b/yarn.lock index c8812f4a6..751ba6213 100644 --- a/yarn.lock +++ b/yarn.lock @@ -96,6 +96,16 @@ __metadata: languageName: node linkType: hard +"@apify/json_schemas@npm:^0.13.0": + version: 0.13.0 + resolution: "@apify/json_schemas@npm:0.13.0" + dependencies: + "@apify/consts": "npm:^2.51.0" + ajv: "npm:^8.17.1" + checksum: 10c0/2612be7a73802b810c0bf82fc45ee8a5e3488b94303a66b66af1b13b8dc0b8aa8b4ca88e63fb7340fd9e82f6fa27f3b0d2ab66d51578b4a351ba47f6358887a4 + languageName: node + linkType: hard + "@apify/json_schemas@npm:^0.14.2": version: 0.14.2 resolution: "@apify/json_schemas@npm:0.14.2" From e9c3e69b8b0708a58f775daa58bb412bfcb37f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 15:27:36 +0200 Subject: [PATCH 07/10] fix ts --- src/commands/run.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/run.ts b/src/commands/run.ts index f024f5548..3e531c778 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -424,7 +424,7 @@ export class RunCommand extends ApifyCommand { * @param inputOverride Optional input received through command flags */ private async validateAndStoreInput(inputOverride?: { input: Record; source: string }) { - let inputSchema: Record | null; + let inputSchema: Record | null | undefined; try { ({ inputSchema } = await readInputSchema({ cwd: process.cwd() })); From bbcdc9603a7e5fffa22950ddbc9a0ff952e05ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 2 Apr 2026 17:32:54 +0200 Subject: [PATCH 08/10] fix import --- src/lib/input_schema.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib/input_schema.ts b/src/lib/input_schema.ts index 8a603791c..8c95f15ee 100644 --- a/src/lib/input_schema.ts +++ b/src/lib/input_schema.ts @@ -1,6 +1,7 @@ import { existsSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; +import type { Ajv, ErrorObject } from 'ajv'; import { cloneDeep } from 'es-toolkit'; import { KEY_VALUE_STORE_KEYS } from '@apify/consts'; @@ -293,7 +294,7 @@ export const getDefaultsFromInputSchema = (inputSchema: any) => { return defaults; }; -function formatSchemaValidationErrors(errors: import('ajv').ErrorObject[], schemaName: string): string { +function formatSchemaValidationErrors(errors: ErrorObject[], schemaName: string): string { const details = errors .map((err) => { const path = err.instancePath ? ` at ${err.instancePath}` : ''; @@ -326,7 +327,7 @@ export function validateKvsSchema(schema: Record): void { } // Lots of code copied from @apify-packages/actor, this really should be moved to the shared input_schema package -export const getAjvValidator = (inputSchema: any, ajvInstance: import('ajv').Ajv) => { +export const getAjvValidator = (inputSchema: any, ajvInstance: Ajv) => { const copyOfSchema = cloneDeep(inputSchema); copyOfSchema.required = []; From 09f28948a19ff3c17bdd51a664cf672695c07f75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Fri, 10 Apr 2026 10:42:09 +0200 Subject: [PATCH 09/10] remove try catch --- src/commands/run.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/commands/run.ts b/src/commands/run.ts index 3e531c778..917f9593f 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -424,14 +424,7 @@ export class RunCommand extends ApifyCommand { * @param inputOverride Optional input received through command flags */ private async validateAndStoreInput(inputOverride?: { input: Record; source: string }) { - let inputSchema: Record | null | undefined; - - try { - ({ inputSchema } = await readInputSchema({ cwd: process.cwd() })); - } catch (err) { - warning({ message: (err as Error).message }); - inputSchema = null; - } + const { inputSchema } = await readInputSchema({ cwd: process.cwd() }); if (!inputSchema) { if (!inputOverride) { From 247c6b94e859b6fd033baa83791df46205d75872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Fri, 10 Apr 2026 10:44:46 +0200 Subject: [PATCH 10/10] remove try catch --- src/commands/edit-input-schema.ts | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/commands/edit-input-schema.ts b/src/commands/edit-input-schema.ts index a50b21cbc..533ed1bf9 100644 --- a/src/commands/edit-input-schema.ts +++ b/src/commands/edit-input-schema.ts @@ -41,19 +41,11 @@ export class EditInputSchemaCommand extends ApifyCommand>; - - try { - result = await readInputSchema({ - forcePath: this.args.path, - cwd: process.cwd(), - }); - } catch (err) { - error({ message: (err as Error).message }); - return; - } - - const { inputSchema: existingSchema, inputSchemaPath } = result; + // This call fails if no input schema is found on any of the default locations + const { inputSchema: existingSchema, inputSchemaPath } = await readInputSchema({ + forcePath: this.args.path, + cwd: process.cwd(), + }); if (existingSchema && !inputSchemaPath) { // If path is not returned, it means the input schema must be directly embedded as object in actor.json