Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 91 additions & 35 deletions src/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import type { ExecaError } from 'execa';
import mime from 'mime';
import { minVersion } from 'semver';

import { APIFY_ENV_VARS } from '@apify/consts';
import { ACTOR_ENV_VARS, APIFY_ENV_VARS } from '@apify/consts';
import { validateInputSchema, validateInputUsingValidator } from '@apify/input_schema';

import { ApifyCommand, StdinMode } from '../lib/command-framework/apify-command.js';
Expand All @@ -26,6 +26,7 @@ import { useActorConfig } from '../lib/hooks/useActorConfig.js';
import { ProjectLanguage, useCwdProject } from '../lib/hooks/useCwdProject.js';
import { useModuleVersion } from '../lib/hooks/useModuleVersion.js';
import { getAjvValidator, getDefaultsFromInputSchema, readInputSchema } from '../lib/input_schema.js';
import { CRAWLEE_INPUT_KEY_ENV, resolveInputKey, TEMP_INPUT_KEY_PREFIX } from '../lib/input-key.js';
import { error, info, warning } from '../lib/outputs.js';
import { replaceSecretsValue } from '../lib/secrets.js';
import {
Expand All @@ -42,6 +43,19 @@ import {
purgeDefaultQueue,
} from '../lib/utils.js';

interface TempInputResult {
tempInputKey: string;
tempInputFilePath: string;
}

interface OverwrittenInputResult {
existingInput: ReturnType<typeof getLocalInput>;
inputFilePath: string;
writtenAt: number;
}

type ValidateAndStoreInputResult = TempInputResult | OverwrittenInputResult;

enum RunType {
DirectFile = 0,
Module = 1,
Expand Down Expand Up @@ -124,6 +138,7 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
const { config: localConfig } = localConfigResult.unwrap();

const actualStoragePath = getLocalStorageDir();
const resolvedInputKey = resolveInputKey();

const projectRuntimeResult = await useCwdProject({ cwd });

Expand Down Expand Up @@ -233,13 +248,17 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
CRAWLEE_PURGE_ON_START = '1';

if (crawleeVersion.isNone()) {
await Promise.all([purgeDefaultQueue(), purgeDefaultKeyValueStore(), purgeDefaultDataset()]);
await Promise.all([
purgeDefaultQueue(),
purgeDefaultKeyValueStore(resolvedInputKey),
purgeDefaultDataset(),
]);
info({ message: 'All default local stores were purged.' });
}
}

if (!this.flags.purge) {
const isStorageEmpty = await checkIfStorageIsEmpty();
const isStorageEmpty = await checkIfStorageIsEmpty(resolvedInputKey);

if (!isStorageEmpty && !this.flags.resurrect) {
warning({
Expand All @@ -258,13 +277,35 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
return;
}

const storedInputResults = await this.validateAndStoreInput(inputOverride);
const storedInputResults = await this.validateAndStoreInput(inputOverride, resolvedInputKey);

// When a temp input file was created, disable crawlee's purge so it doesn't
// delete the temp file (its name doesn't match the input key regex that purge skips).
// Also determine the effective input key for env vars (temp key overrides resolved key).
let effectiveInputKey = resolvedInputKey;
if (storedInputResults && 'tempInputKey' in storedInputResults) {
if (this.flags.purge && crawleeVersion.isSome()) {
// Crawlee would have purged on start, but we need to disable that to protect
// the temp file. Purge from CLI side instead, preserving both input files.
await Promise.all([
purgeDefaultQueue(),
purgeDefaultKeyValueStore(resolvedInputKey, storedInputResults.tempInputKey),
purgeDefaultDataset(),
]);
}
CRAWLEE_PURGE_ON_START = '0';
effectiveInputKey = storedInputResults.tempInputKey;
}

// Attach env vars from local config files
// Attach env vars from local config files.
// Set all three input key env vars so both Node.js and Python SDKs pick up the resolved key.
const localEnvVars: Record<string, string> = {
[APIFY_ENV_VARS.LOCAL_STORAGE_DIR]: actualStoragePath,
CRAWLEE_STORAGE_DIR: actualStoragePath,
CRAWLEE_PURGE_ON_START,
[ACTOR_ENV_VARS.INPUT_KEY]: effectiveInputKey,
[APIFY_ENV_VARS.INPUT_KEY]: effectiveInputKey,
[CRAWLEE_INPUT_KEY_ENV]: effectiveInputKey,
};

if (proxy && proxy.password) localEnvVars[APIFY_ENV_VARS.PROXY_PASSWORD] = proxy.password;
Expand All @@ -279,8 +320,8 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
Object.assign(localEnvVars, updatedEnv);
}

// NOTE: User can overwrite env vars
const env = Object.assign(localEnvVars, process.env);
// localEnvVars must take priority so the CLI can redirect the SDK to temp input files
const env = { ...process.env, ...localEnvVars };

if (!userId) {
warning({
Expand Down Expand Up @@ -393,7 +434,10 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
}
} finally {
if (storedInputResults) {
if (storedInputResults.existingInput) {
if ('tempInputKey' in storedInputResults) {
// Temp input file: just delete it, user's INPUT.json was never touched
await deleteFile(storedInputResults.tempInputFilePath);
} else if (storedInputResults.existingInput) {
// Check if the input file was modified since we modified it. If it was, we abort the re-overwrite and warn the user
const stats = await stat(storedInputResults.inputFilePath);

Expand All @@ -420,10 +464,17 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
}

/**
* Ensures the input that the actor will be ran with locally matches the input schema (and prefills default values if missing)
* Validates the input against the input schema and writes to disk only when necessary.
* When the user already has an input file and no override is provided, it writes the
* merged defaults to a separate temp file so the user's file is never touched.
* The caller redirects the SDK to the temp file via the ACTOR_INPUT_KEY env var.
* @param inputOverride Optional input received through command flags
* @param resolvedInputKey The input key resolved from env vars (default "INPUT")
*/
private async validateAndStoreInput(inputOverride?: { input: Record<string, unknown>; source: string }) {
private async validateAndStoreInput(
inputOverride?: { input: Record<string, unknown>; source: string },
resolvedInputKey = 'INPUT',
): Promise<ValidateAndStoreInputResult | null> {
const { inputSchema } = await readInputSchema({ cwd: process.cwd() });

if (!inputSchema) {
Expand All @@ -432,22 +483,18 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
}

// We cannot validate input schema if it is not found -> default to no validation and overriding if flags are given
const existingInput = getLocalInput(process.cwd());
// Write the override to a temp file so the user's input file is never touched.
const defaultStorePath = join(process.cwd(), getLocalKeyValueStorePath());
await mkdir(defaultStorePath, { recursive: true });

// Prepare the file path for where we'll temporarily store the validated input
const inputFilePath = join(
process.cwd(),
getLocalKeyValueStorePath(),
existingInput?.fileName ?? 'INPUT.json',
);
const tempInputKey = `${TEMP_INPUT_KEY_PREFIX}${resolvedInputKey}`;
const tempInputFilePath = join(defaultStorePath, `${tempInputKey}.json`);

await mkdir(dirname(inputFilePath), { recursive: true });
await writeFile(inputFilePath, JSON.stringify(inputOverride.input, null, 2));
await writeFile(tempInputFilePath, JSON.stringify(inputOverride.input, null, 2));

return {
existingInput,
inputFilePath,
writtenAt: Date.now(),
tempInputKey,
tempInputFilePath,
};
}

Expand All @@ -458,11 +505,15 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
const defaults = getDefaultsFromInputSchema(inputSchema);
const compiledInputSchema = getAjvValidator(inputSchema, validator);

// Step 2: try to fetch the existing INPUT from the local storage
const existingInput = getLocalInput(process.cwd());
// Step 2: try to fetch the existing input from the local storage
const existingInput = getLocalInput(process.cwd(), resolvedInputKey);

// Prepare the file path for where we'll temporarily store the validated input
const inputFilePath = join(process.cwd(), getLocalKeyValueStorePath(), existingInput?.fileName ?? 'INPUT.json');
const inputFilePath = join(
process.cwd(),
getLocalKeyValueStorePath(),
existingInput?.fileName ?? `${resolvedInputKey}.json`,
);

let errorHeader: string;

Expand Down Expand Up @@ -501,13 +552,16 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
);
}

// Write to a temp file so the user's input file is never touched.
const tempInputKey = `${TEMP_INPUT_KEY_PREFIX}${resolvedInputKey}`;
const tempInputFilePath = join(dirname(inputFilePath), `${tempInputKey}.json`);

await mkdir(dirname(inputFilePath), { recursive: true });
await writeFile(inputFilePath, JSON.stringify(fullInputOverride, null, 2));
await writeFile(tempInputFilePath, JSON.stringify(fullInputOverride, null, 2));

return {
existingInput,
inputFilePath,
writtenAt: Date.now(),
tempInputKey,
tempInputFilePath,
};
}

Expand Down Expand Up @@ -546,14 +600,16 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
);
}

// Step 4: store the input
await mkdir(dirname(inputFilePath), { recursive: true });
await writeFile(inputFilePath, JSON.stringify(fullInput, null, 2));
// Write merged input to a temp file so the user's INPUT.json is never touched.
// The SDK is redirected to this file via the ACTOR_INPUT_KEY env var.
const tempInputKey = `${TEMP_INPUT_KEY_PREFIX}${resolvedInputKey}`;
const tempInputFilePath = join(dirname(inputFilePath), `${tempInputKey}.json`);

await writeFile(tempInputFilePath, JSON.stringify(fullInput, null, 2));

return {
existingInput,
inputFilePath,
writtenAt: Date.now(),
tempInputKey,
tempInputFilePath,
};
}

Expand Down
4 changes: 1 addition & 3 deletions src/lib/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { homedir } from 'node:os';
import { join } from 'node:path';

import { KEY_VALUE_STORE_KEYS, META_ORIGINS } from '@apify/consts';
import { META_ORIGINS } from '@apify/consts';

import pkg from '../../package.json' with { type: 'json' };

Expand Down Expand Up @@ -49,8 +49,6 @@ export const LOCAL_CONFIG_NAME = 'actor.json';

export const LOCAL_CONFIG_PATH = join(ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME);

export const INPUT_FILE_REG_EXP = new RegExp(`(^${KEY_VALUE_STORE_KEYS.INPUT}(?:\\.[^.]+)?$)`);

export const SUPPORTED_NODEJS_VERSION = pkg.engines.node;

export const APIFY_CLIENT_DEFAULT_HEADERS = { 'X-Apify-Request-Origin': META_ORIGINS.CLI };
Expand Down
28 changes: 28 additions & 0 deletions src/lib/input-key.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import escapeStringRegexp from 'escape-string-regexp';

import { ACTOR_ENV_VARS, APIFY_ENV_VARS } from '@apify/consts';

export const CRAWLEE_INPUT_KEY_ENV = 'CRAWLEE_INPUT_KEY';

export const TEMP_INPUT_KEY_PREFIX = '__CLI_';

/**
* Resolves the input key from environment variables in priority order:
* ACTOR_INPUT_KEY > APIFY_INPUT_KEY > CRAWLEE_INPUT_KEY > "INPUT"
*/
export function resolveInputKey(): string {
return (
process.env[ACTOR_ENV_VARS.INPUT_KEY] ||
process.env[APIFY_ENV_VARS.INPUT_KEY] ||
process.env[CRAWLEE_INPUT_KEY_ENV] ||
'INPUT'
);
}

/**
* Creates a RegExp that matches the given key with an optional file extension.
* e.g. inputFileRegExp('INPUT') matches 'INPUT', 'INPUT.json', 'INPUT.bin'
*/
export function inputFileRegExp(key: string): RegExp {
return new RegExp(`(^${escapeStringRegexp(key)}(?:\\.[^.]+)?$)`);
}
20 changes: 10 additions & 10 deletions src/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ import {
CommandExitCodes,
DEFAULT_LOCAL_STORAGE_DIR,
GLOBAL_CONFIGS_FOLDER,
INPUT_FILE_REG_EXP,
LOCAL_CONFIG_PATH,
MINIMUM_SUPPORTED_PYTHON_VERSION,
SUPPORTED_NODEJS_VERSION,
} from './consts.js';
import { deleteFile, ensureFolderExistsSync, rimrafPromised } from './files.js';
import { inputFileRegExp, TEMP_INPUT_KEY_PREFIX } from './input-key.js';
import type { AuthJSON } from './types.js';
import { cliDebugPrint } from './utils/cliDebugPrint.js';

Expand Down Expand Up @@ -504,15 +504,15 @@ export const createActZip = async (zipName: string, pathsToZip: string[], cwd: s
/**
* Get Actor input from local store
*/
export const getLocalInput = (cwd: string) => {
export const getLocalInput = (cwd: string, inputKey?: string) => {
const defaultLocalStorePath = getLocalKeyValueStorePath();

const folderExists = existsSync(join(cwd, defaultLocalStorePath));

if (!folderExists) return;

const files = readdirSync(join(cwd, defaultLocalStorePath));
const inputName = files.find((file) => !!file.match(INPUT_FILE_REG_EXP));
const inputName = files.find((file) => !!file.match(inputFileRegExp(inputKey ?? 'INPUT')));

// No input file
if (!inputName) return;
Expand All @@ -536,16 +536,17 @@ export const purgeDefaultDataset = async () => {
}
};

export const purgeDefaultKeyValueStore = async () => {
export const purgeDefaultKeyValueStore = async (...inputKeys: string[]) => {
const defaultKeyValueStorePath = getLocalKeyValueStorePath();
if (!existsSync(getLocalStorageDir()) || !existsSync(defaultKeyValueStorePath)) {
return;
}
const filesToDelete = readdirSync(defaultKeyValueStorePath);
const preserveRegExps = (inputKeys.length > 0 ? inputKeys : ['INPUT']).map(inputFileRegExp);

const deletePromises: Promise<void>[] = [];
filesToDelete.forEach((file) => {
if (!file.match(INPUT_FILE_REG_EXP)) {
if (!preserveRegExps.some((re) => re.test(file))) {
deletePromises.push(deleteFile(join(defaultKeyValueStorePath, file)));
}
});
Expand Down Expand Up @@ -626,13 +627,12 @@ export const getNpmCmd = (): string => {
/**
* Returns true if apify storage is empty (expect INPUT.*)
*/
export const checkIfStorageIsEmpty = async () => {
export const checkIfStorageIsEmpty = async (inputKey?: string) => {
const key = inputKey || KEY_VALUE_STORE_KEYS.INPUT;
const filesWithoutInput = await glob([
`${getLocalStorageDir()}/**`,
// Omit INPUT.* file
`!${getLocalKeyValueStorePath()}/${KEY_VALUE_STORE_KEYS.INPUT}.*`,
// Omit INPUT_CLI-* files
`!${getLocalKeyValueStorePath()}/${KEY_VALUE_STORE_KEYS.INPUT}_CLI-*`,
`!${getLocalKeyValueStorePath()}/${key}.*`,
`!${getLocalKeyValueStorePath()}/${TEMP_INPUT_KEY_PREFIX}${key}.*`,
]);

return filesWithoutInput.length === 0;
Expand Down
11 changes: 7 additions & 4 deletions test/local/commands/crawlee/run.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,25 @@ describe('apify run', () => {
expect(lastErrorMessage()).toMatch(/Field awesome is required/i);
});

it('prefills input with defaults', async () => {
it('validates input without modifying file', async () => {
await writeFile(inputPath, originalInput);

await testRunCommand(RunCommand, {});

const output = JSON.parse(await readFile(outputPath, 'utf8'));
expect(output).toStrictEqual({ awesome: true, help: 'this_maze_is_not_meant_for_you' });

const inputAfterRun = await readFile(inputPath, 'utf8');
expect(inputAfterRun).toBe(originalInput);
});

it('should restore the original input file after run', async () => {
it('does not modify input file during run', async () => {
await writeFile(inputPath, originalInputWithExtraField);

await testRunCommand(RunCommand, {});

const input = JSON.parse(await readFile(inputPath, 'utf8'));
expect(input).toStrictEqual({ awesome: true, extra: 'field' });
const inputAfterRun = await readFile(inputPath, 'utf8');
expect(inputAfterRun).toBe(originalInputWithExtraField);

const output = JSON.parse(await readFile(outputPath, 'utf8'));
expect(output).toStrictEqual({ awesome: true, help: 'this_maze_is_not_meant_for_you', extra: 'field' });
Expand Down
Loading