diff --git a/core/utils/streamMarkdownUtils.ts b/core/utils/streamMarkdownUtils.ts index 88e8eabec39..673cb2ef6d0 100644 --- a/core/utils/streamMarkdownUtils.ts +++ b/core/utils/streamMarkdownUtils.ts @@ -1,5 +1,6 @@ import { LineStream } from "../diff/util"; -import { isMarkdownFile, MarkdownBlockStateTracker } from "./markdownUtils"; +import type { MarkdownBlockStateTracker } from "./markdownUtils"; +import { isMarkdownFile } from "./markdownUtils"; /** * Determines if we should stop at a markdown block based on nested markdown logic. @@ -50,48 +51,67 @@ export async function* stopAtLinesWithMarkdownSupport( return; } + // Collect all lines from the LLM stream first const allLines: string[] = []; for await (const line of lines) { allLines.push(line); } - const source = allLines.join("\n"); - if (!source.match(/```(\w*|.*)(md|markdown|gfm|github-markdown)/)) { - // No nested markdown blocks detected, check for simple ``` stopping condition - let foundStandaloneBackticks = false; - for (let i = 0; i < allLines.length; i++) { - if (allLines[i].trim() === "```") { - // Found standalone backticks, yield lines up to this point + // The LLM reply starts *inside* the outer fence (the prompt prefills the opening fence). + // Inner blocks in the markdown body open with a fence line; while one is open, everything + // except a valid closer is content (per CommonMark, fences do not nest). A fence line + // needs 3+ backticks and at most 3 spaces of indentation; a closer must use at least as + // many backticks as its opener and carry no info string. + // A bare top-level fence is ambiguous (plain inner opener vs. the outer closer): treat it + // as the outer closer only when no fence lines follow it. + const FENCE_RE = /^ {0,3}(`{3,})(.*)$/; + const fenceIndices: number[] = []; + for (let i = 0; i < allLines.length; i++) { + if (FENCE_RE.test(allLines[i])) { + fenceIndices.push(i); + } + } + const lastFenceLine = fenceIndices[fenceIndices.length - 1] ?? -1; + + let innerOpener: number | null = null; // Backtick count of the open inner fence + + for (const i of fenceIndices) { + const fenceMatch = allLines[i].match(FENCE_RE)!; + const backtickCount = fenceMatch[1].length; + const infoString = fenceMatch[2].trim(); + + if (innerOpener === null) { + if (infoString.length === 0 && i === lastFenceLine) { + // Bare fence at the top level with nothing after it: the outer closing fence. + // Stop before it so the wrapper never leaks into the applied file. for (let j = 0; j < i; j++) { yield allLines[j]; } - foundStandaloneBackticks = true; return; } + // Opening fence (with or without an info string) — enter an inner block + innerOpener = backtickCount; + } else if (infoString.length === 0 && backtickCount >= innerOpener) { + // Valid closer for the open inner block + innerOpener = null; } - - // No standalone backticks found, yield all lines - if (!foundStandaloneBackticks) { - for (const line of allLines) { - yield line; - } - } - return; + // Any other backtick line inside an open inner block is content } - // Use optimized state tracker for markdown block analysis - const stateTracker = new MarkdownBlockStateTracker(allLines); - - for (let i = 0; i < allLines.length; i++) { - if (stateTracker.shouldStopAtPosition(i)) { - for (let j = 0; j < i; j++) { - yield allLines[j]; - } - return; + // No outer closing fence found. If the stream ends with an unclosed inner fence + // (malformed markdown) or its final line is a fence, prefer treating that last fence + // as the outer closer so the wrapper delimiter never leaks into the edit output. + if ( + lastFenceLine >= 0 && + (innerOpener !== null || lastFenceLine === allLines.length - 1) + ) { + for (let j = 0; j < lastFenceLine; j++) { + yield allLines[j]; } + return; } - // If we get here, yield all lines + // Yield everything for (const line of allLines) { yield line; } diff --git a/core/utils/streamMarkdownUtils.vitest.ts b/core/utils/streamMarkdownUtils.vitest.ts index 097b1fbd57a..87d36c9acd1 100644 --- a/core/utils/streamMarkdownUtils.vitest.ts +++ b/core/utils/streamMarkdownUtils.vitest.ts @@ -83,14 +83,14 @@ test("stopAtLinesWithMarkdownSupport handles markdown files with no nested block const filename = "test.md"; const result = stopAtLinesWithMarkdownSupport(lineStream, filename); - // Should yield everything since there's no nested markdown blocks + // Should yield everything since there's no outer closing fence const collected = []; for await (const line of result) { collected.push(line); } - // The function should stop at the standalone ``` line (not yield it or lines after) - // This is the correct behavior - it stops when it encounters standalone backticks + // With the new depth-counting algorithm, we only stop at the outer closing fence + // Since there's no outer fence here, we yield everything expect(collected).toEqual([ "Some markdown text", "", @@ -98,5 +98,300 @@ test("stopAtLinesWithMarkdownSupport handles markdown files with no nested block "function test() {", " return true;", "}", + "```", + "More text", + ]); +}); + +async function* toLineStream(lines: string[]): LineStream { + for (const line of lines) { + yield line; + } +} + +test("stopAtLinesWithMarkdownSupport: markdown file with single plain fenced block yields all content", async () => { + const mockLines = [ + "# Title", + "Some example:", + "```javascript", + "const x = 1;", + "```", + "More content.", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "README.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // Should include everything up to (but not including) the final outer ``` + expect(collected).toEqual([ + "# Title", + "Some example:", + "```javascript", + "const x = 1;", + "```", + "More content.", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: markdown file with two sequential fenced blocks yields all content", async () => { + const mockLines = [ + "# Docs", + "```bash", + "npm install", + "```", + "Then:", + "```python", + "print('hello')", + "```", + "Done.", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "docs.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + expect(collected).toEqual([ + "# Docs", + "```bash", + "npm install", + "```", + "Then:", + "```python", + "print('hello')", + "```", + "Done.", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: markdown file with nested markdown fence still works", async () => { + const mockLines = [ + "# Nested Example", + "```markdown", + "# Inner", + "```javascript", + "code();", + "```", + "End inner", + "```", + "After nested.", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "README.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // Should include all content up to but not including the final outer closing fence + expect(collected).toEqual([ + "# Nested Example", + "```markdown", + "# Inner", + "```javascript", + "code();", + "```", + "End inner", + "```", + "After nested.", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: markdown file with no fenced blocks yields all lines", async () => { + const mockLines = ["# Title", "Plain text only.", "No fences here."]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "README.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + expect(collected).toEqual(["# Title", "Plain text only.", "No fences here."]); +}); + +test("stopAtLinesWithMarkdownSupport: non-markdown file still stops at first bare backticks (regression guard)", async () => { + const mockLines = ["line 1", "line 2", "```", "line 4"]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "script.js", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + expect(collected).toEqual(["line 1", "line 2"]); +}); + +test("stopAtLinesWithMarkdownSupport: four-backtick block with inner triple-backtick content", async () => { + // Test case (a): markdown file with a four-backtick opening and closing fence + // The four-backtick outer fence contains triple-backtick content inside + const mockLines = [ + "# Document", + "````markdown", + "# Inner markdown", + "```javascript", + "const x = 1;", + "```", + "End of inner", + "````", + "After block.", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "example.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // Should stop at the final outer closing fence (triple backticks) + // and NOT include it in the output + expect(collected).toEqual([ + "# Document", + "````markdown", + "# Inner markdown", + "```javascript", + "const x = 1;", + "```", + "End of inner", + "````", + "After block.", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: unclosed inner fence with outer closing fence", async () => { + // Test case (b): unclosed inner markdown fence followed by the outer closing fence + // This should prefer treating the final fence as the outer closer + const mockLines = [ + "# Document", + "```markdown", + "# Inner title", + "```javascript", + "const x = 1;", + "// Missing close fence for the inner block", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "README.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // Should NOT include the outer closing fence in output + // The outer ``` should not appear in the applied file + expect(collected).toEqual([ + "# Document", + "```markdown", + "# Inner title", + "```javascript", + "const x = 1;", + "// Missing close fence for the inner block", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: multi-backtick fence properly closes only with >= backticks", async () => { + // Additional test to ensure backtick count validation works + // When a block opens with 4 backticks, only 4 or more backticks on a bare line close it + const mockLines = [ + "Example:", + "````markdown", + "Content inside 4-backtick fence", + "```", + "Still inside (3 < 4)", + "````", + "After outer close.", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "test.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // Stop at the final ``` (outer closing fence at depth 0) + expect(collected).toEqual([ + "Example:", + "````markdown", + "Content inside 4-backtick fence", + "```", + "Still inside (3 < 4)", + "````", + "After outer close.", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: info-string fence inside an open inner block is content, not a new opener", async () => { + // Per CommonMark, fences do not nest: inside an open inner block, a line like + // "```bash" is code content. It must not push depth and consume the real closer. + const mockLines = [ + "# Doc", + "```markdown", + "Example of a fence inside a fence:", + "```bash", + "echo hi", + "```", + "After inner close.", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "README.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // The "```bash" line is content of the markdown block; the first bare ``` closes it, + // "After inner close." is body text, and the final bare ``` is the outer closer. + expect(collected).toEqual([ + "# Doc", + "```markdown", + "Example of a fence inside a fence:", + "```bash", + "echo hi", + "```", + "After inner close.", + ]); +}); + +test("stopAtLinesWithMarkdownSupport: short or over-indented backtick lines are not fences", async () => { + // CommonMark requires 3+ fence chars and at most 3 spaces of indentation. + const mockLines = [ + "Some text", + "``not a fence``", + " ```", + "still content (over-indented fence is code)", + "```", + ]; + const result = stopAtLinesWithMarkdownSupport( + toLineStream(mockLines), + "notes.md", + ); + const collected: string[] = []; + for await (const line of result) { + collected.push(line); + } + // Only the final flush-left bare ``` is the outer closer. + expect(collected).toEqual([ + "Some text", + "``not a fence``", + " ```", + "still content (over-indented fence is code)", ]); });