Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/khaki-donuts-shout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@cloudflare/shell": patch
---

fix(shell): replace LIKE pattern matching with primary-key range scans in `Workspace.rm({ recursive: true })` and the `glob` prefilter. D1 can reject the previous `LIKE ? ESCAPE ?` queries with `D1_ERROR: LIKE or GLOB pattern too complex: SQLITE_ERROR`; the range predicate (`path >= '{dir}/' AND path < '{dir}0'`) avoids that limit, scans the `path` index directly, and needs no escaping of `%`/`_` in path names.
41 changes: 25 additions & 16 deletions packages/shell/src/filesystem.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ const TEXT_DECODER = new TextDecoder();

const MAX_SYMLINK_DEPTH = 40;
const VALID_NAMESPACE = /^[a-zA-Z][a-zA-Z0-9_]*$/;
const LIKE_ESCAPE = "\\";
const MAX_STREAM_SIZE = 100 * 1024 * 1024;
const MAX_DIFF_LINES = 10_000;
const MAX_PATH_LENGTH = 4096;
Expand Down Expand Up @@ -1072,10 +1071,19 @@ export class Workspace {
await this.ensureInit();
const normalized = normalizePath(pattern);
const prefix = getGlobPrefix(normalized);
const likePattern = escapeLike(prefix) + "%";
const regex = globToRegex(normalized);
const T = this.tableName;

// Prefilter with a range scan on the path primary key instead of LIKE
// (D1 can reject LIKE/ESCAPE with "LIKE or GLOB pattern too complex",
// see #1539). getGlobPrefix returns either a directory prefix ending in
// "/" — covered by [prefix, prefix-with-trailing-"/"-bumped-to-"0")
// since "0" is the character right after "/" — or, when the pattern has
// no wildcards, the whole pattern, which the regex matches exactly.
const [whereSql, params]: [string, string[]] = prefix.endsWith("/")
? ["path >= ? AND path < ?", [prefix, `${prefix.slice(0, -1)}0`]]
: ["path = ?", [prefix]];

const rows = await this.sql.query<{
path: string;
name: string;
Expand All @@ -1088,10 +1096,9 @@ export class Workspace {
}>(
`SELECT path, name, type, mime_type, size, created_at, modified_at, target
FROM ${T}
WHERE path LIKE ? ESCAPE ?
WHERE ${whereSql}
ORDER BY path`,
likePattern,
LIKE_ESCAPE
...params
);

return rows.filter((r) => regex.test(r.path)).map(toFileInfo);
Expand Down Expand Up @@ -1525,16 +1532,22 @@ export class Workspace {
}

private async deleteDescendants(dirPath: string): Promise<void> {
const pattern = escapeLike(dirPath) + "/%";
// Range scan on the path primary key instead of LIKE: D1 can reject
// LIKE/ESCAPE with "LIKE or GLOB pattern too complex" (#1539), and the
// range predicate uses the index directly. "0" is the character right
// after "/", so [`${dirPath}/`, `${dirPath}0`) matches exactly the
// paths strictly under dirPath (dirPath is normalized and never "/").
const lower = `${dirPath}/`;
const upper = `${dirPath}0`;
const T = this.tableName;

const r2Rows = await this.sql.query<{ r2_key: string }>(
`SELECT r2_key FROM ${T}
WHERE path LIKE ? ESCAPE ?
WHERE path >= ? AND path < ?
AND storage_backend = 'r2'
AND r2_key IS NOT NULL`,
pattern,
LIKE_ESCAPE
lower,
upper
);

if (r2Rows.length > 0) {
Expand All @@ -1546,9 +1559,9 @@ export class Workspace {
}

await this.sql.run(
`DELETE FROM ${T} WHERE path LIKE ? ESCAPE ?`,
pattern,
LIKE_ESCAPE
`DELETE FROM ${T} WHERE path >= ? AND path < ?`,
lower,
upper
);
}
}
Expand Down Expand Up @@ -1583,10 +1596,6 @@ function base64ToBytes(b64: string): Uint8Array {

// ── Path helpers ─────────────────────────────────────────────────────

function escapeLike(s: string): string {
return s.replace(/[\\%_]/g, (ch) => "\\" + ch);
}

function normalizePath(path: string): string {
if (!path.startsWith("/")) path = "/" + path;
const parts = path.split("/");
Expand Down
72 changes: 72 additions & 0 deletions packages/shell/src/tests/workspace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1775,6 +1775,78 @@ describe("workspace — security: LIKE injection", () => {
});
});

// Regression tests for #1539: deleteDescendants/glob use a range scan on the
// path primary key ([`${dir}/`, `${dir}0`)) instead of LIKE/ESCAPE, which D1
// can reject with "LIKE or GLOB pattern too complex".

describe("workspace — rm recursive range-scan boundaries (#1539)", () => {
it("removes a deep skill-shaped tree entirely", async () => {
const agent = await freshAgent("rm-range-skill-tree");
await agent.write("/skill/SKILL.md", "skill");
await agent.write("/skill/scripts/setup.mjs", "setup");
await agent.write("/skill/references/readme.md", "ref");
await agent.write("/skill/assets/sample.txt", "asset");
await agent.rmCall("/skill", { recursive: true });
expect(await agent.stat("/skill")).toBeNull();
expect(await agent.stat("/skill/scripts/setup.mjs")).toBeNull();
expect(await agent.stat("/skill/assets/sample.txt")).toBeNull();
});

it("does not delete siblings sharing the directory name as a prefix", async () => {
const agent = await freshAgent("rm-range-prefix-sibling");
await agent.write("/skill/inside.txt", "in dir");
await agent.write("/skill-extra/other.txt", "sibling dir");
await agent.write("/skill.txt", "sibling file");
await agent.rmCall("/skill", { recursive: true });
expect(await agent.stat("/skill")).toBeNull();
expect((await agent.read("/skill-extra/other.txt")) as unknown).toBe(
"sibling dir"
);
expect((await agent.read("/skill.txt")) as unknown).toBe("sibling file");
});

it("does not delete siblings at the upper range boundary ('0' after '/')", async () => {
const agent = await freshAgent("rm-range-upper-boundary");
await agent.write("/dir/inside.txt", "in dir");
await agent.write("/dir0", "boundary file");
await agent.write("/dir00/nested.txt", "boundary dir");
await agent.rmCall("/dir", { recursive: true });
expect(await agent.stat("/dir")).toBeNull();
expect((await agent.read("/dir0")) as unknown).toBe("boundary file");
expect((await agent.read("/dir00/nested.txt")) as unknown).toBe(
"boundary dir"
);
});
});

describe("workspace — glob range-scan prefilter (#1539)", () => {
it("matches inside directories with LIKE special chars in the name", async () => {
const agent = await freshAgent("glob-range-special-chars");
await agent.write("/a%b/match.ts", "x");
await agent.write("/a_b/match.ts", "y");
await agent.write("/axb/decoy.ts", "z");
const percent = (await agent.globCall(
"/a%b/*.ts"
)) as unknown as FileInfo[];
expect(percent.map((f) => f.path)).toEqual(["/a%b/match.ts"]);
const underscore = (await agent.globCall(
"/a_b/*.ts"
)) as unknown as FileInfo[];
expect(underscore.map((f) => f.path)).toEqual(["/a_b/match.ts"]);
});

it("does not match prefix-sibling directories outside the glob prefix", async () => {
const agent = await freshAgent("glob-range-prefix-sibling");
await agent.write("/a/file.ts", "a");
await agent.write("/a-extra/file.ts", "b");
await agent.write("/a0/file.ts", "c");
const results = (await agent.globCall(
"/a/**/*.ts"
)) as unknown as FileInfo[];
expect(results.map((f) => f.path)).toEqual(["/a/file.ts"]);
});
});

describe("workspace — security: path normalization", () => {
it(".. is resolved so files are reachable via readDir", async () => {
const agent = await freshAgent("sec-dotdot-resolve");
Expand Down
Loading