From 3bf6846bef763eefcb956b26c18684da77ab2127 Mon Sep 17 00:00:00 2001 From: "Anthony Fu (via agent)" Date: Tue, 9 Jun 2026 08:39:44 +0000 Subject: [PATCH 1/3] test(comark): add skipped repro for numeric inline component names `:8100` is parsed as an inline component named `8100`, which makes renderers call createElement('8100') and crash. A purely numeric name should stay plain text. Added as a skipped SPEC repro (following the existing leaf-block-directive.md precedent) so the suite stays green until the inline-component parser rejects names that don't start with a letter. --- .../COMARK/inline-component-numeric-name.md | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 packages/comark/SPEC/COMARK/inline-component-numeric-name.md diff --git a/packages/comark/SPEC/COMARK/inline-component-numeric-name.md b/packages/comark/SPEC/COMARK/inline-component-numeric-name.md new file mode 100644 index 00000000..c81e12e4 --- /dev/null +++ b/packages/comark/SPEC/COMARK/inline-component-numeric-name.md @@ -0,0 +1,38 @@ +--- +skip: true +comment: // BUG repro — `:8100` is parsed as an inline component named `8100`, producing createElement('8100') and crashing renderers. A purely numeric name should stay plain text. Unskip once the inline-component parser rejects names that don't start with a letter. +--- + +## Input + +```md +The server is running on :8100 +``` + +## AST + +```json +{ + "frontmatter": {}, + "meta": {}, + "nodes": [ + [ + "p", + {}, + "The server is running on :8100" + ] + ] +} +``` + +## HTML + +```html +

The server is running on :8100

+``` + +## Markdown + +```md +The server is running on :8100 +``` From a256b9315ec6ad489336b6ee58ed2f4f738142d3 Mon Sep 17 00:00:00 2001 From: "Anthony Fu (via agent)" Date: Wed, 10 Jun 2026 02:06:07 +0000 Subject: [PATCH 2/3] fix(comark): treat colon followed by digits as plain text, not a component A component name must start with a letter or `$`, but the inline and block parsers accepted digit-led names: - inline `:8100` was captured as a component (`['8100', {}]`), making renderers call `createElement('8100')` and crash the app; - block `:8100` / `::8100` made `parseBlockParams` throw `Invalid block params` during parsing. Guard all three entry points with a shared `isComponentNameStart` helper that mirrors the existing block name grammar (`RE_BLOCK_NAME = /^[a-z$]/i`), so a colon sequence whose name doesn't start with a letter or `$` stays plain text. Replaces the earlier skipped SPEC repro with a real regression test (`test/component-name.test.ts`) covering inline and block cases plus no-regression checks for valid components. --- .../COMARK/inline-component-numeric-name.md | 38 ------------ packages/comark/src/plugins/syntax.ts | 32 +++++++++- packages/comark/test/component-name.test.ts | 62 +++++++++++++++++++ 3 files changed, 93 insertions(+), 39 deletions(-) delete mode 100644 packages/comark/SPEC/COMARK/inline-component-numeric-name.md create mode 100644 packages/comark/test/component-name.test.ts diff --git a/packages/comark/SPEC/COMARK/inline-component-numeric-name.md b/packages/comark/SPEC/COMARK/inline-component-numeric-name.md deleted file mode 100644 index c81e12e4..00000000 --- a/packages/comark/SPEC/COMARK/inline-component-numeric-name.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -skip: true -comment: // BUG repro — `:8100` is parsed as an inline component named `8100`, producing createElement('8100') and crashing renderers. A purely numeric name should stay plain text. Unskip once the inline-component parser rejects names that don't start with a letter. ---- - -## Input - -```md -The server is running on :8100 -``` - -## AST - -```json -{ - "frontmatter": {}, - "meta": {}, - "nodes": [ - [ - "p", - {}, - "The server is running on :8100" - ] - ] -} -``` - -## HTML - -```html -

The server is running on :8100

-``` - -## Markdown - -```md -The server is running on :8100 -``` diff --git a/packages/comark/src/plugins/syntax.ts b/packages/comark/src/plugins/syntax.ts index f11fc2d5..662dcae9 100644 --- a/packages/comark/src/plugins/syntax.ts +++ b/packages/comark/src/plugins/syntax.ts @@ -53,6 +53,23 @@ export interface SyntaxOptions { bindingTag?: string } +/** + * Whether a character code can start a component name. + * + * Component names must begin with an ASCII letter or `$`, mirroring the block + * name grammar (`RE_BLOCK_NAME = /^[a-z$]/i`). This prevents sequences such as + * `:8100` or `::30` from being treated as components — a purely numeric name is + * not a valid component and would otherwise produce invalid output like + * `createElement('8100')` (inline) or throw `Invalid block params` (block). + */ +function isComponentNameStart(code: number): boolean { + return ( + (code >= 0x61 && code <= 0x7a) || // a-z + (code >= 0x41 && code <= 0x5a) || // A-Z + code === 0x24 // $ + ) +} + // #region Block component plugin (`::name` and `::name ... ::`) const blockYamlLines: Record = { @@ -74,7 +91,9 @@ const markdownItComarkBlock: PluginSimple = (md) => { function comark_block_shorthand(state, startLine, _endLine, silent) { const line = state.src.slice(state.bMarks[startLine] + state.tShift[startLine], state.eMarks[startLine]) - if (!/^:\w/.test(line)) return false + // `:name` shorthand — the name must start with a letter or `$`. This also + // leaves `::` (block) and `:8100` (plain text) to other handlers. + if (line[0] !== ':' || !isComponentNameStart(line.charCodeAt(1))) return false const { name, content, props, remaining } = parseBlockParams(line.slice(1)) @@ -142,6 +161,13 @@ const markdownItComarkBlock: PluginSimple = (md) => { if (marker_count < min_markers) return false const markup = state.src.slice(start, pos) + + // When a name is present it must start with a letter or `$`; otherwise + // treat the line as plain text rather than letting parseBlockParams throw + // on e.g. `::8100`. + const nameStart = state.skipSpaces(pos) + if (nameStart < max && !isComponentNameStart(state.src.charCodeAt(nameStart))) return false + const params = parseBlockParams(state.src.slice(pos, max)) if (!params.name) return false @@ -459,6 +485,10 @@ const markdownItInlineComponent: PluginSimple = (md) => { const prevChar = state.src[start - 1] if (start > 0 && !ALLOWED_PREV_CHARS.has(prevChar)) return false + // A component name must start with a letter or `$`. Without this, `:8100`, + // `:30`, etc. are captured as components (e.g. `createElement('8100')`). + if (!isComponentNameStart(state.src.charCodeAt(start + 1))) return false + let index = start + 1 let nameEnd = -1 let contentStart = -1 diff --git a/packages/comark/test/component-name.test.ts b/packages/comark/test/component-name.test.ts new file mode 100644 index 00000000..3ddf5243 --- /dev/null +++ b/packages/comark/test/component-name.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from 'vitest' +import { parse } from '../src/parse' + +// Regression tests for component-name validation. +// +// A component name must start with a letter or `$`. Before the fix, a colon +// followed by digits was captured as a component name: +// - inline `:8100` produced `['8100', {}]`, making renderers call +// `createElement('8100')` and crash the app; +// - block `:8100` / `::8100` made `parseBlockParams` throw `Invalid block +// params` during parsing. +// In all of these cases the colon sequence should stay plain text. +describe('component name validation', () => { + describe('inline components', () => { + it('keeps `:8100` as plain text (does not parse digits as a component)', async () => { + const tree = await parse('The server is running on :8100') + expect(tree.nodes).toEqual([['p', {}, 'The server is running on :8100']]) + }) + + it('keeps a colon followed by digits as plain text mid-sentence', async () => { + const tree = await parse('Meet me at :30 past the hour') + expect(tree.nodes).toEqual([['p', {}, 'Meet me at :30 past the hour']]) + }) + + it('still parses a valid letter-led inline component', async () => { + const tree = await parse('an :inline-component here') + expect(tree.nodes).toEqual([['p', {}, 'an ', ['inline-component', {}], ' here']]) + }) + + it('still parses an inline component with bracket content', async () => { + const tree = await parse('a :badge[New] tag') + expect(tree.nodes).toEqual([['p', {}, 'a ', ['badge', {}, 'New'], ' tag']]) + }) + + it('still allows digits after the leading letter (`:h2`)', async () => { + const tree = await parse('see :h2 below') + expect(tree.nodes).toEqual([['p', {}, 'see ', ['h2', {}], ' below']]) + }) + }) + + describe('block components', () => { + it('does not throw on a numeric `:name` shorthand', async () => { + const tree = await parse(':8100') + expect(tree.nodes).toEqual([['p', {}, ':8100']]) + }) + + it('does not throw on a numeric `:name` shorthand with following content', async () => { + const tree = await parse(':8100\nhello') + expect(tree.nodes).toEqual([['p', {}, ':8100\nhello']]) + }) + + it('does not throw on a numeric `::name` block', async () => { + const tree = await parse('::8100') + expect(tree.nodes).toEqual([['p', {}, '::8100']]) + }) + + it('still parses a valid `::name` block component', async () => { + const tree = await parse('::alert\nHello\n::') + expect(tree.nodes).toEqual([['alert', {}, 'Hello']]) + }) + }) +}) From b81ef9052801abaf1cf926cacdcff16a30319c88 Mon Sep 17 00:00:00 2001 From: "Anthony Fu (via agent)" Date: Wed, 10 Jun 2026 02:23:32 +0000 Subject: [PATCH 3/3] refactor(comark): validate component names with a regex helper Replace the char-code `isComponentNameStart` with a regex-based `isValidComponentName` (`/^[a-z$][\w$-]*/i`) that mirrors the block name grammar. Behaviour is unchanged; the three guards now validate the candidate name string instead of a single leading char code. --- packages/comark/src/plugins/syntax.ts | 45 ++++++++++++--------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/packages/comark/src/plugins/syntax.ts b/packages/comark/src/plugins/syntax.ts index 662dcae9..742ff224 100644 --- a/packages/comark/src/plugins/syntax.ts +++ b/packages/comark/src/plugins/syntax.ts @@ -54,20 +54,21 @@ export interface SyntaxOptions { } /** - * Whether a character code can start a component name. + * A component name must start with a letter or `$`, followed by word chars, + * `$` or `-`. Mirrors the block name grammar (`RE_BLOCK_NAME = /^[a-z$]/i`). + */ +const RE_COMPONENT_NAME = /^[a-z$][\w$-]*/i + +/** + * Whether `name` begins with a syntactically valid component name. * - * Component names must begin with an ASCII letter or `$`, mirroring the block - * name grammar (`RE_BLOCK_NAME = /^[a-z$]/i`). This prevents sequences such as - * `:8100` or `::30` from being treated as components — a purely numeric name is - * not a valid component and would otherwise produce invalid output like - * `createElement('8100')` (inline) or throw `Invalid block params` (block). + * This prevents sequences such as `:8100` or `::30` from being treated as + * components — a purely numeric name is not a valid component and would + * otherwise produce invalid output like `createElement('8100')` (inline) or + * throw `Invalid block params` (block). */ -function isComponentNameStart(code: number): boolean { - return ( - (code >= 0x61 && code <= 0x7a) || // a-z - (code >= 0x41 && code <= 0x5a) || // A-Z - code === 0x24 // $ - ) +function isValidComponentName(name: string): boolean { + return RE_COMPONENT_NAME.test(name) } // #region Block component plugin (`::name` and `::name ... ::`) @@ -91,9 +92,7 @@ const markdownItComarkBlock: PluginSimple = (md) => { function comark_block_shorthand(state, startLine, _endLine, silent) { const line = state.src.slice(state.bMarks[startLine] + state.tShift[startLine], state.eMarks[startLine]) - // `:name` shorthand — the name must start with a letter or `$`. This also - // leaves `::` (block) and `:8100` (plain text) to other handlers. - if (line[0] !== ':' || !isComponentNameStart(line.charCodeAt(1))) return false + if (line[0] !== ':' || !isValidComponentName(line.slice(1))) return false const { name, content, props, remaining } = parseBlockParams(line.slice(1)) @@ -162,11 +161,10 @@ const markdownItComarkBlock: PluginSimple = (md) => { const markup = state.src.slice(start, pos) - // When a name is present it must start with a letter or `$`; otherwise - // treat the line as plain text rather than letting parseBlockParams throw - // on e.g. `::8100`. + // Bail out (plain text) on an invalid name instead of letting + // parseBlockParams throw on e.g. `::8100`. const nameStart = state.skipSpaces(pos) - if (nameStart < max && !isComponentNameStart(state.src.charCodeAt(nameStart))) return false + if (nameStart < max && !isValidComponentName(state.src.slice(nameStart, max))) return false const params = parseBlockParams(state.src.slice(pos, max)) @@ -485,10 +483,6 @@ const markdownItInlineComponent: PluginSimple = (md) => { const prevChar = state.src[start - 1] if (start > 0 && !ALLOWED_PREV_CHARS.has(prevChar)) return false - // A component name must start with a letter or `$`. Without this, `:8100`, - // `:30`, etc. are captured as components (e.g. `createElement('8100')`). - if (!isComponentNameStart(state.src.charCodeAt(start + 1))) return false - let index = start + 1 let nameEnd = -1 let contentStart = -1 @@ -515,12 +509,13 @@ const markdownItInlineComponent: PluginSimple = (md) => { // Empty name if (nameEnd <= start + 1) return false + const name = state.src.slice(start + 1, nameEnd) + if (!isValidComponentName(name)) return false + state.pos = index if (silent) return true - const name = state.src.slice(start + 1, nameEnd) - if (contentStart !== -1) { state.push('mdc_inline_component', name, 1)