From 570e478a495df593349616a81526aeb5f538dfe8 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 17 Feb 2026 17:51:57 -0800 Subject: [PATCH 01/10] Add support for named repeat ranges This adds the ability to add a label to a repeat range so that a subsequent expression can match the same repetition. This is intended to help with expressing things like raw strings where the `#` characters must be balanced on both sides, with a limit on the number of matches. --- dev-guide/src/grammar.md | 9 +++++++-- src/notation.md | 2 ++ tools/grammar/src/lib.rs | 6 +++++- tools/grammar/src/parser.rs | 20 ++++++++++++++++++- .../src/grammar/render_markdown.rs | 9 ++++++++- .../src/grammar/render_railroad.rs | 17 ++++++++++++++++ 6 files changed, 58 insertions(+), 5 deletions(-) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 7a4cdea466..8f536a2ace 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -57,6 +57,7 @@ Quantifier -> | RepeatPlusNonGreedy | RepeatRange | RepeatRangeInclusive + | RepeatRangeNamed Optional -> `?` @@ -68,9 +69,11 @@ RepeatPlus -> `+` RepeatPlusNonGreedy -> `+?` -RepeatRange -> `{` Range? `..` Range? `}` +RepeatRange -> `{` ( Name `:` )? Range? `..` Range? `}` -RepeatRangeInclusive -> `{` Range? `..=` Range `}` +RepeatRangeInclusive -> `{` ( Name `:` )? Range? `..=` Range `}` + +RepeatRangeNamed -> `{` Name `}` Range -> [0-9]+ @@ -150,6 +153,8 @@ The general format is a series of productions separated by blank lines. The expr | RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. | | RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. | | RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the inclusive range of times specified. The lower bound can be omitted. | +| Named RepeatRangeInclusive | Expr{name:2..=4} | If a name precedes the range, then the number of repetitions are stored in a variable with that name that subsequent RepeatRangeNamed expressions can refer to. | +| RepeatRangeNamed | Expr{name} | Repeat the number of times from the previously labeled repetition. | ## Automatic linking diff --git a/src/notation.md b/src/notation.md index b74c74b22f..ce3eee2ef8 100644 --- a/src/notation.md +++ b/src/notation.md @@ -18,6 +18,8 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets: | x+ | _MacroMatch_+ | 1 or more of x | | xa..b | HEX_DIGIT1..6 | a to b repetitions of x, exclusive of b | | xa..=b | HEX_DIGIT1..=5 | a to b repetitions of x, inclusive of b | +| xn:a..=b | `#`n:1..=255 | a labeled repetition that a subsequent repetition can refer to | +| xn | `#`n | repeat the number of times from the previously labeled repetition | | Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order | | \| | `u8` \| `u16`, Block \| Item | Either one or another | | ! | !COMMENT | Matches if the expression does not follow, without consuming any input | diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs index 1d64e45143..5a7fb3c460 100644 --- a/tools/grammar/src/lib.rs +++ b/tools/grammar/src/lib.rs @@ -61,13 +61,16 @@ pub enum ExpressionKind { RepeatPlus(Box), /// `A+?` RepeatPlusNonGreedy(Box), - /// `A{2..4}` or `A{2..=4}` + /// `A{2..4}` or `A{2..=4}` or `A{name:2..=4}` RepeatRange { expr: Box, + name: Option, min: Option, max: Option, limit: RangeLimit, }, + /// `A{name}` + RepeatRangeNamed(Box, String), /// `NonTerminal` Nt(String), /// `` `string` `` @@ -172,6 +175,7 @@ impl Expression { | ExpressionKind::RepeatPlus(e) | ExpressionKind::RepeatPlusNonGreedy(e) | ExpressionKind::RepeatRange { expr: e, .. } + | ExpressionKind::RepeatRangeNamed(e, _) | ExpressionKind::NegExpression(e) | ExpressionKind::Cut(e) => { e.visit_nt(callback); diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index 828af0fb7c..5a8a756a32 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -459,9 +459,26 @@ impl Parser<'_> { }) } - /// Parse `{a..b}` | `{a..=b}` after expression. + /// Parse `{a..b}` | `{a..=b}` | `{name:a..=b}` | `{name}` after expression. + // + // `name:` before the range is a named binding. `{name}` refers to that binding. fn parse_repeat_range(&mut self, kind: ExpressionKind) -> Result { self.expect("{", "expected `{`")?; + let start = self.index; + let name = match (self.parse_name(), self.peek()) { + (Some(name), Some(b':')) => { + self.index += 1; + Some(name) + } + (Some(name), Some(b'}')) => { + self.index += 1; + return Ok(ExpressionKind::RepeatRangeNamed(box_kind(kind), name)); + } + _ => { + self.index = start; + None + } + }; let min = self.take_while(&|x| x.is_ascii_digit()); let Ok(min) = (!min.is_empty()).then(|| min.parse::()).transpose() else { bail!(self, "malformed range start"); @@ -492,6 +509,7 @@ impl Parser<'_> { self.expect("}", "expected `}`")?; Ok(ExpressionKind::RepeatRange { expr: box_kind(kind), + name, min, max, limit, diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs index 316eb9aaf3..1cc76f781b 100644 --- a/tools/mdbook-spec/src/grammar/render_markdown.rs +++ b/tools/mdbook-spec/src/grammar/render_markdown.rs @@ -73,6 +73,7 @@ fn last_expr(expr: &Expression) -> &ExpressionKind { | ExpressionKind::RepeatPlus(_) | ExpressionKind::RepeatPlusNonGreedy(_) | ExpressionKind::RepeatRange { .. } + | ExpressionKind::RepeatRangeNamed(_, _) | ExpressionKind::Nt(_) | ExpressionKind::Terminal(_) | ExpressionKind::Prose(_) @@ -142,6 +143,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) { } ExpressionKind::RepeatRange { expr, + name, min, max, limit, @@ -149,12 +151,17 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) { render_expression(expr, cx, output); write!( output, - "{min}{limit}{max}", + "{name}{min}{limit}{max}", + name = name.as_ref().map(|n| format!("{n}:")).unwrap_or_default(), min = min.map(|v| v.to_string()).unwrap_or_default(), max = max.map(|v| v.to_string()).unwrap_or_default(), ) .unwrap(); } + ExpressionKind::RepeatRangeNamed(e, name) => { + render_expression(e, cx, output); + write!(output, "{name}").unwrap(); + } ExpressionKind::Nt(nt) => { let dest = cx.md_link_map.get(nt).map_or("missing", |d| d.as_str()); write!(output, "[{nt}]({dest})").unwrap(); diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index ad7b291e57..94f8060962 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -81,6 +81,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Option Option Option Option Option Option Option Option Option unreachable!("closed range must have upper bound"), + ExpressionKind::RepeatRangeNamed(e, name) => { + let n = render_expression(e, cx, stack)?; + let cmt = format!("repeat exactly {name} times"); + let lbox = LabeledBox::new(n, Comment::new(cmt)); + Box::new(lbox) + } ExpressionKind::Nt(nt) => node_for_nt(cx, nt), ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())), ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())), @@ -405,6 +421,7 @@ mod tests { fn range_expr(min: Option, max: Option, limit: RangeLimit) -> Expression { Expression::new_kind(ExpressionKind::RepeatRange { expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), + name: None, min, max, limit, From 72e083ff1b96627e46cc7998c72f372d14e84054 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 17 Feb 2026 18:01:46 -0800 Subject: [PATCH 02/10] Change raw strings to use named repetitions This changes the raw string grammars to use named repetition to represent that the `#` characters need to be balanced within a specific limit. This also adds a cut after the `#` and before the `"` because rustc generates an error in this situation if a `"` is not found. It's maybe not the prettiest, and I'm on the fence whether this makes it clearer. --- src/tokens.md | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/tokens.md b/src/tokens.md index d878eabfe2..0f0964bfce 100644 --- a/src/tokens.md +++ b/src/tokens.md @@ -214,11 +214,13 @@ r[lex.token.literal.str-raw] r[lex.token.literal.str-raw.syntax] ```grammar,lexer -RAW_STRING_LITERAL -> `r` RAW_STRING_CONTENT SUFFIX? +RAW_STRING_LITERAL -> + `r` `"` ^ RAW_STRING_CONTENT `"` SUFFIX? + | `r` `#`{n:1..=255} ^ `"` RAW_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX? -RAW_STRING_CONTENT -> - `"` ^ ( ~CR )*? `"` - | `#` RAW_STRING_CONTENT `#` +RAW_STRING_CONTENT -> (!`"` ~CR )* + +RAW_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ~CR )* ``` r[lex.token.literal.str-raw.intro] @@ -301,11 +303,12 @@ r[lex.token.str-byte-raw] r[lex.token.str-byte-raw.syntax] ```grammar,lexer RAW_BYTE_STRING_LITERAL -> - `br` RAW_BYTE_STRING_CONTENT SUFFIX? + `br` `"` ^ RAW_BYTE_STRING_CONTENT `"` SUFFIX? + | `br` `#`{n:1..=255} ^ `"` RAW_BYTE_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX? + +RAW_BYTE_STRING_CONTENT -> (!`"` ASCII_FOR_RAW )* -RAW_BYTE_STRING_CONTENT -> - `"` ^ ASCII_FOR_RAW*? `"` - | `#` RAW_BYTE_STRING_CONTENT `#` +RAW_BYTE_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ASCII_FOR_RAW )* ASCII_FOR_RAW -> !CR ASCII ``` @@ -395,11 +398,12 @@ r[lex.token.str-c-raw] r[lex.token.str-c-raw.syntax] ```grammar,lexer RAW_C_STRING_LITERAL -> - `cr` RAW_C_STRING_CONTENT SUFFIX? + `cr` `"` ^ RAW_C_STRING_CONTENT `"` SUFFIX? + | `cr` `#`{n:1..=255} ^ `"` RAW_C_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX? + +RAW_C_STRING_CONTENT -> (!`"` ~[CR NUL] )* -RAW_C_STRING_CONTENT -> - `"` ^ ( ~[CR NUL] )*? `"` - | `#` RAW_C_STRING_CONTENT `#` +RAW_C_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ~[CR NUL] )* ``` r[lex.token.str-c-raw.intro] From 645f949ca5df80ba13c58f2ce31fcc74fe367f9b Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 17 Feb 2026 18:05:38 -0800 Subject: [PATCH 03/10] Remove non-greedy grammar support These non-greedy repetitions are no longer needed because we now use negative lookahead in the rules that were using them. This is intended to simplify things a little, and lean in on the negative lookahead. There were two alternate interpretations of `R1 -> A E*? S B`: R1 -> A _0 B _0 -> S | (E _0) or R1 -> A _0 _0 -> (S B) | (E _0) Rather than trying to document this subtle interpretation, this chooses to just get rid of it and be explicit about what is not allowed to follow. --- dev-guide/src/grammar.md | 8 -------- tools/grammar/src/lib.rs | 6 ------ tools/grammar/src/parser.rs | 16 ++++------------ tools/mdbook-spec/src/grammar/render_markdown.rs | 10 ---------- tools/mdbook-spec/src/grammar/render_railroad.rs | 12 ------------ 5 files changed, 4 insertions(+), 48 deletions(-) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 8f536a2ace..341c09522c 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -52,9 +52,7 @@ Footnote -> `[^` ~[`]` LF]+ `]` Quantifier -> Optional | Repeat - | RepeatNonGreedy | RepeatPlus - | RepeatPlusNonGreedy | RepeatRange | RepeatRangeInclusive | RepeatRangeNamed @@ -63,12 +61,8 @@ Optional -> `?` Repeat -> `*` -RepeatNonGreedy -> `*?` - RepeatPlus -> `+` -RepeatPlusNonGreedy -> `+?` - RepeatRange -> `{` ( Name `:` )? Range? `..` Range? `}` RepeatRangeInclusive -> `{` ( Name `:` )? Range? `..=` Range `}` @@ -148,9 +142,7 @@ The general format is a series of productions separated by blank lines. The expr | Optional | Expr? | The preceding expression is optional. | | NegativeLookahead | !Expr | Matches if Expr does not follow, without consuming any input. | | Repeat | Expr* | The preceding expression is repeated 0 or more times. | -| RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. | | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. | -| RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. | | RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. | | RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the inclusive range of times specified. The lower bound can be omitted. | | Named RepeatRangeInclusive | Expr{name:2..=4} | If a name precedes the range, then the number of repetitions are stored in a variable with that name that subsequent RepeatRangeNamed expressions can refer to. | diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs index 5a7fb3c460..c16a07211c 100644 --- a/tools/grammar/src/lib.rs +++ b/tools/grammar/src/lib.rs @@ -55,12 +55,8 @@ pub enum ExpressionKind { NegativeLookahead(Box), /// `A*` Repeat(Box), - /// `A*?` - RepeatNonGreedy(Box), /// `A+` RepeatPlus(Box), - /// `A+?` - RepeatPlusNonGreedy(Box), /// `A{2..4}` or `A{2..=4}` or `A{name:2..=4}` RepeatRange { expr: Box, @@ -171,9 +167,7 @@ impl Expression { | ExpressionKind::Optional(e) | ExpressionKind::NegativeLookahead(e) | ExpressionKind::Repeat(e) - | ExpressionKind::RepeatNonGreedy(e) | ExpressionKind::RepeatPlus(e) - | ExpressionKind::RepeatPlusNonGreedy(e) | ExpressionKind::RepeatRange { expr: e, .. } | ExpressionKind::RepeatRangeNamed(e, _) | ExpressionKind::NegExpression(e) diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index 5a8a756a32..b94d8d13c4 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -439,24 +439,16 @@ impl Parser<'_> { Ok(ExpressionKind::Optional(box_kind(kind))) } - /// Parse `*` | `*?` after expression. + /// Parse `*` after expression. fn parse_repeat(&mut self, kind: ExpressionKind) -> Result { self.expect("*", "expected `*`")?; - Ok(if self.take_str("?") { - ExpressionKind::RepeatNonGreedy(box_kind(kind)) - } else { - ExpressionKind::Repeat(box_kind(kind)) - }) + Ok(ExpressionKind::Repeat(box_kind(kind))) } - /// Parse `+` | `+?` after expression. + /// Parse `+` after expression. fn parse_repeat_plus(&mut self, kind: ExpressionKind) -> Result { self.expect("+", "expected `+`")?; - Ok(if self.take_str("?") { - ExpressionKind::RepeatPlusNonGreedy(box_kind(kind)) - } else { - ExpressionKind::RepeatPlus(box_kind(kind)) - }) + Ok(ExpressionKind::RepeatPlus(box_kind(kind))) } /// Parse `{a..b}` | `{a..=b}` | `{name:a..=b}` | `{name}` after expression. diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs index 1cc76f781b..d79c949325 100644 --- a/tools/mdbook-spec/src/grammar/render_markdown.rs +++ b/tools/mdbook-spec/src/grammar/render_markdown.rs @@ -69,9 +69,7 @@ fn last_expr(expr: &Expression) -> &ExpressionKind { | ExpressionKind::Optional(_) | ExpressionKind::NegativeLookahead(_) | ExpressionKind::Repeat(_) - | ExpressionKind::RepeatNonGreedy(_) | ExpressionKind::RepeatPlus(_) - | ExpressionKind::RepeatPlusNonGreedy(_) | ExpressionKind::RepeatRange { .. } | ExpressionKind::RepeatRangeNamed(_, _) | ExpressionKind::Nt(_) @@ -129,18 +127,10 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) { render_expression(e, cx, output); output.push_str("\\*"); } - ExpressionKind::RepeatNonGreedy(e) => { - render_expression(e, cx, output); - output.push_str("\\* (non-greedy)"); - } ExpressionKind::RepeatPlus(e) => { render_expression(e, cx, output); output.push_str("+"); } - ExpressionKind::RepeatPlusNonGreedy(e) => { - render_expression(e, cx, output); - output.push_str("+ (non-greedy)"); - } ExpressionKind::RepeatRange { expr, name, diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index 94f8060962..aed4e8f151 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -174,12 +174,6 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option { - let n = render_expression(e, cx, stack)?; - let r = Box::new(Optional::new(Repeat::new(n, railroad::Empty))); - let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string())); - Box::new(lbox) - } // Treat `e+` and `e{1..}` equally. ExpressionKind::RepeatPlus(e) | ExpressionKind::RepeatRange { @@ -192,12 +186,6 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option { - let n = render_expression(e, cx, stack)?; - let r = Repeat::new(n, railroad::Empty); - let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string())); - Box::new(lbox) - } // For `e{..=0}` / `e{0..=0}` or `e{..1}` / `e{0..1}` render an empty node. ExpressionKind::RepeatRange { max: Some(0), .. } | ExpressionKind::RepeatRange { From 6c326c5e87845c2d776822565d59445d14c41ce9 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 03:22:16 +0000 Subject: [PATCH 04/10] Improve named-range table entries in grammar.md The table entries for named repetition ranges have a few issues worth cleaning up. The `Expression` column shows "Named RepeatRangeInclusive" with a space, breaking the pattern. Since naming is a feature of both `RepeatRange` and `RepeatRangeInclusive` (both productions accept the optional `( Name ':' )` prefix), let's present this as "RepeatRange (named)" with a note that the same applies to `RepeatRangeInclusive`. The description says "the number of repetitions are stored in a variable" -- "number" is the singular head noun, so let's say "is", and "stored in a variable" is a bit implementation-flavored. Let's say "is bound to that name". The `RepeatRangeNamed` row uses imperative voice ("Repeat the number of times...") while other entries use indicative voice ("The preceding expression is..."). Let's match the surrounding entries. --- dev-guide/src/grammar.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md index 341c09522c..2d9b22756d 100644 --- a/dev-guide/src/grammar.md +++ b/dev-guide/src/grammar.md @@ -145,8 +145,8 @@ The general format is a series of productions separated by blank lines. The expr | RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. | | RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. | | RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the inclusive range of times specified. The lower bound can be omitted. | -| Named RepeatRangeInclusive | Expr{name:2..=4} | If a name precedes the range, then the number of repetitions are stored in a variable with that name that subsequent RepeatRangeNamed expressions can refer to. | -| RepeatRangeNamed | Expr{name} | Repeat the number of times from the previously labeled repetition. | +| RepeatRange (named) | Expr{name:2..4} | When a name precedes the range, the number of repetitions is bound to that name so that subsequent RepeatRangeNamed expressions can refer to it. The same applies to RepeatRangeInclusive. | +| RepeatRangeNamed | Expr{name} | The preceding expression is repeated the number of times determined by a previously named RepeatRange or RepeatRangeInclusive. | ## Automatic linking From ef42cac9771296247a808b91c639010afccf2a39 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 03:22:45 +0000 Subject: [PATCH 05/10] Expand named-repetition descriptions in Notation The descriptions for the two new named-repetition rows are a bit terse compared to the surrounding entries. Let's rewrite both descriptions to be parallel with the existing range entries. We'll make the labeled row read "a to b repetitions of x (inclusive of b), with the count bound to the name n", echoing the structure of the "a..b" and "a..=b" rows. For the back-reference row, we'll say "x repeated the number of times bound to n by a previous labeled repetition", matching the declarative voice of the table. --- src/notation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/notation.md b/src/notation.md index ce3eee2ef8..7537c67ddc 100644 --- a/src/notation.md +++ b/src/notation.md @@ -18,8 +18,8 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets: | x+ | _MacroMatch_+ | 1 or more of x | | xa..b | HEX_DIGIT1..6 | a to b repetitions of x, exclusive of b | | xa..=b | HEX_DIGIT1..=5 | a to b repetitions of x, inclusive of b | -| xn:a..=b | `#`n:1..=255 | a labeled repetition that a subsequent repetition can refer to | -| xn | `#`n | repeat the number of times from the previously labeled repetition | +| xn:a..=b | `#`n:1..=255 | a to b repetitions of x (inclusive of b), with the count bound to the name n | +| xn | `#`n | x repeated the number of times bound to n by a previous labeled repetition | | Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order | | \| | `u8` \| `u16`, Block \| Item | Either one or another | | ! | !COMMENT | Matches if the expression does not follow, without consuming any input | From ad3af9fcdf86f0f46592c6cc5068eeb28cc385a9 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 05:18:15 +0000 Subject: [PATCH 06/10] Reject digit-leading names in `parse_name()` The `parse_name()` function greedily consumes any alphanumeric characters, which means `{123}` is parsed as `RepeatRangeNamed(_, "123")` instead of being rejected. With this commit, we now check that the first character is alphabetic or an underscore before consuming input. Digits are still allowed in subsequent positions (e.g., `n1` is valid), but a name that starts with a digit such as `123` causes `parse_name()` to return `None`, letting the parser fall through to range syntax where it correctly reports an error. Three tests verify the fix: one for the rejection of leading digits, one for a name starting with a letter followed by a digit, and one for a name starting with an underscore. --- tools/grammar/src/parser.rs | 62 +++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index b94d8d13c4..e7c66b83cd 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -152,6 +152,12 @@ impl Parser<'_> { } fn parse_name(&mut self) -> Option { + // Names must start with an alphabetic character or + // underscore. + let first = self.input[self.index..].chars().next()?; + if !first.is_alphabetic() && first != '_' { + return None; + } let name = self.take_while(&|c: char| c.is_alphanumeric() || c == '_'); if name.is_empty() { None @@ -1117,4 +1123,60 @@ mod tests { Character::Unicode((ch, _)) if *ch == '\u{7F}' )); } + + // --- `parse_name` digit rejection tests --- + + #[test] + fn parse_name_rejects_leading_digits() { + // `{123}` should not parse as a named reference. The + // digits don't form a valid name and there is no `..` + // range operator, so the parser should reject this. + let err = parse("A -> x{123}").unwrap_err(); + assert!( + err.contains("expected `..`"), + "expected range-syntax error for {{123}}, got: {err}" + ); + } + + #[test] + fn parse_name_allows_letter_then_digit() { + // `n1` is a valid name (starts with a letter). + let grammar = parse("A -> x{n1:2..5}").unwrap(); + let rule = grammar.productions.get("A").unwrap(); + let ExpressionKind::RepeatRange { + name, + min, + max, + limit, + .. + } = &rule.expression.kind + else { + panic!("expected RepeatRange, got {:?}", rule.expression.kind); + }; + assert_eq!(name.as_deref(), Some("n1")); + assert_eq!(*min, Some(2)); + assert_eq!(*max, Some(5)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn parse_name_allows_underscore_start() { + // `_n` is a valid name (starts with underscore). + let grammar = parse("A -> x{_n:2..5}").unwrap(); + let rule = grammar.productions.get("A").unwrap(); + let ExpressionKind::RepeatRange { + name, + min, + max, + limit, + .. + } = &rule.expression.kind + else { + panic!("expected RepeatRange, got {:?}", rule.expression.kind); + }; + assert_eq!(name.as_deref(), Some("_n")); + assert_eq!(*min, Some(2)); + assert_eq!(*max, Some(5)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } } From 730c4a987e2d1888d74f5f6a6bd061d6f1c36601 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 05:18:59 +0000 Subject: [PATCH 07/10] Add parser tests for named repeat ranges The named repeat range feature adds two new AST variants -- `RepeatRange` with a `name` field and `RepeatRangeNamed`. We add a `named_repeat_range()` helper that extracts all four fields (name, min, max, limit) and nine tests covering: - Named binding with closed range (`{n:1..=255}`). - Named binding with half-open range (`{n:2..5}`). - Named binding with omitted min (`{n:..=5}`). - Named binding with omitted max (`{n:2..}`). - Named reference (`{n}`) producing `RepeatRangeNamed`. - Combined named binding and reference in one production. - Backtrack from name parsing to plain range (`{2..5}`). - Error: Name followed by colon with no range (`{n:}`). - Error: Empty braces (`{}`). --- tools/grammar/src/parser.rs | 143 +++++++++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs index e7c66b83cd..aef8160df9 100644 --- a/tools/grammar/src/parser.rs +++ b/tools/grammar/src/parser.rs @@ -778,7 +778,7 @@ mod tests { #[test] fn test_range_closed_exact() { - // `x{2..=2}` means exactly 2 — not empty. + // `x{2..=2}` means exactly 2 -- not empty. let (min, max, limit) = repeat_range("A -> x{2..=2}"); assert_eq!(min, Some(2)); assert_eq!(max, Some(2)); @@ -1179,4 +1179,145 @@ mod tests { assert_eq!(*max, Some(5)); assert!(matches!(limit, RangeLimit::HalfOpen)); } + + // --- Named repeat range tests --- + + /// Extract full `RepeatRange` fields including the name. + fn named_repeat_range(input: &str) -> (Option, Option, Option, RangeLimit) { + let grammar = parse(input).unwrap(); + let rule = grammar.productions.get("A").unwrap(); + let ExpressionKind::RepeatRange { + name, + min, + max, + limit, + .. + } = &rule.expression.kind + else { + panic!("expected RepeatRange, got {:?}", rule.expression.kind); + }; + (name.clone(), *min, *max, *limit) + } + + #[test] + fn named_range_closed() { + let (name, min, max, limit) = named_repeat_range("A -> x{n:1..=255}"); + assert_eq!(name.as_deref(), Some("n")); + assert_eq!(min, Some(1)); + assert_eq!(max, Some(255)); + assert!(matches!(limit, RangeLimit::Closed)); + } + + #[test] + fn named_range_half_open() { + let (name, min, max, limit) = named_repeat_range("A -> x{n:2..5}"); + assert_eq!(name.as_deref(), Some("n")); + assert_eq!(min, Some(2)); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn named_range_omitted_min() { + let (name, min, max, limit) = named_repeat_range("A -> x{n:..=5}"); + assert_eq!(name.as_deref(), Some("n")); + assert_eq!(min, None); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::Closed)); + } + + #[test] + fn named_range_omitted_max() { + let (name, min, max, limit) = named_repeat_range("A -> x{n:2..}"); + assert_eq!(name.as_deref(), Some("n")); + assert_eq!(min, Some(2)); + assert_eq!(max, None); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn named_reference() { + // `{n}` without a colon or range produces a + // RepeatRangeNamed variant. + let grammar = parse("A -> x{n}").unwrap(); + let rule = grammar.productions.get("A").unwrap(); + let ExpressionKind::RepeatRangeNamed(_, name) = &rule.expression.kind else { + panic!("expected RepeatRangeNamed, got {:?}", rule.expression.kind); + }; + assert_eq!(name, "n"); + } + + #[test] + fn named_binding_and_reference_in_sequence() { + // A production with a named binding and a named reference. + let grammar = parse("A -> x{n:1..=255} y{n}").unwrap(); + let rule = grammar.productions.get("A").unwrap(); + let ExpressionKind::Sequence(seq) = &rule.expression.kind else { + panic!("expected Sequence, got {:?}", rule.expression.kind); + }; + assert_eq!(seq.len(), 2); + + // First element: x{n:1..=255} + let ExpressionKind::RepeatRange { + name, + min, + max, + limit, + .. + } = &seq[0].kind + else { + panic!("expected RepeatRange, got {:?}", seq[0].kind); + }; + assert_eq!(name.as_deref(), Some("n")); + assert_eq!(*min, Some(1)); + assert_eq!(*max, Some(255)); + assert!(matches!(limit, RangeLimit::Closed)); + + // Second element: y{n} + let ExpressionKind::RepeatRangeNamed(_, ref_name) = &seq[1].kind else { + panic!("expected RepeatRangeNamed, got {:?}", seq[1].kind); + }; + assert_eq!(ref_name, "n"); + } + + #[test] + fn named_range_backtrack_to_plain_range() { + // When parse_name() succeeds but the next byte is + // neither `:` nor `}`, the parser backtracks and + // falls through to plain range parsing. `{2..5}` is + // such a case after the parse_name fix (digits are + // rejected), but let's test a scenario where a name is + // parsed and then backtracked. + // + // There is no single-character token after a name that + // triggers backtrack in valid grammar (the match arms + // cover `:` and `}`), but the fallback resets the index + // and tries plain range parsing. We verify that + // `{2..5}` parses correctly as a plain range even + // though it starts with a digit. + let (min, max, limit) = repeat_range("A -> x{2..5}"); + assert_eq!(min, Some(2)); + assert_eq!(max, Some(5)); + assert!(matches!(limit, RangeLimit::HalfOpen)); + } + + #[test] + fn named_range_err_colon_missing_dots() { + // `{n:}` -- name followed by colon, then no `..`. + let err = parse("A -> x{n:}").unwrap_err(); + assert!( + err.contains("expected `..`"), + "expected `..` error for {{n:}}, got: {err}" + ); + } + + #[test] + fn named_range_err_empty_braces() { + // `{}` -- empty braces contain no name and no range. + let err = parse("A -> x{}").unwrap_err(); + assert!( + err.contains("expected `..`"), + "expected `..` error for {{}}, got: {err}" + ); + } } From 1deed3f2db9974643e5b8c102f97a9599974af55 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 05:19:40 +0000 Subject: [PATCH 08/10] Add markdown rendering tests for named ranges The named repeat range feature adds new rendering paths in the markdown renderer. We add three tests: - `RepeatRange` with a name renders as `n:1..=255`, verifying the `name:` prefix appears before the range. - `RepeatRange` without a name renders as `2..5` with no spurious colon. - `RepeatRangeNamed` renders as `n`, verifying that a named reference produces the expected superscript. --- .../src/grammar/render_markdown.rs | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs index d79c949325..ad26d5d8ca 100644 --- a/tools/mdbook-spec/src/grammar/render_markdown.rs +++ b/tools/mdbook-spec/src/grammar/render_markdown.rs @@ -251,6 +251,7 @@ fn markdown_escape(s: &str) -> Cow<'_, str> { #[cfg(test)] mod tests { use super::*; + use grammar::RangeLimit; use std::collections::HashMap; /// Creates a minimal `RenderCtx` for testing. @@ -424,4 +425,56 @@ mod tests { fn markdown_escape_plain() { assert_eq!(markdown_escape("abc"), "abc"); } + + // -- Named repeat range tests -- + + #[test] + fn repeat_range_with_name() { + // A RepeatRange with a name renders as `n:1..=255`. + let result = render(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("x".to_string()))), + name: Some("n".to_string()), + min: Some(1), + max: Some(255), + limit: RangeLimit::Closed, + }); + assert!( + result.contains("n:1..=255"), + "expected n:1..=255, got: {result}" + ); + } + + #[test] + fn repeat_range_without_name() { + // A RepeatRange without a name renders with no spurious + // colon -- just `2..5`. + let result = render(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("x".to_string()))), + name: None, + min: Some(2), + max: Some(5), + limit: RangeLimit::HalfOpen, + }); + assert!( + result.contains("2..5"), + "expected 2..5, got: {result}" + ); + assert!( + !result.contains(":"), + "unnamed range should not contain a colon" + ); + } + + #[test] + fn repeat_range_named_reference() { + // A RepeatRangeNamed renders as `n`. + let result = render(ExpressionKind::RepeatRangeNamed( + Box::new(Expression::new_kind(ExpressionKind::Nt("x".to_string()))), + "n".to_string(), + )); + assert!( + result.contains("n"), + "expected n, got: {result}" + ); + } } From 67811caf3b66bad0b03a9b4281ed09f6b54ad2d4 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 05:20:05 +0000 Subject: [PATCH 09/10] Add railroad rendering tests for named ranges The railroad renderer handles `RepeatRangeNamed` with a labeled box showing "repeat exactly n times". We add two tests: - `RepeatRangeNamed` renders SVG containing the label "repeat exactly n times". - `RepeatRange` with a name and a closed range (`{n:2..=5}`) renders successfully and produces the expected structural elements (nonterminal and repeat comment). --- .../src/grammar/render_railroad.rs | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index aed4e8f151..c3093823b6 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -596,4 +596,45 @@ mod tests { "neg expression should have exception label" ); } + + // -- Named repeat range tests -- + + #[test] + fn repeat_range_named_reference() { + // RepeatRangeNamed renders with a "repeat exactly n times" + // label. + let expr = Expression::new_kind(ExpressionKind::RepeatRangeNamed( + Box::new(Expression::new_kind(ExpressionKind::Nt("x".to_string()))), + "n".to_string(), + )); + let svg = render_to_svg(&expr).unwrap(); + assert!( + svg.contains("repeat exactly n times"), + "expected 'repeat exactly n times' label, got: {svg}" + ); + } + + #[test] + fn repeat_range_with_name_renders() { + // A RepeatRange with a name should render without crash. + // The name is not currently displayed in railroad diagrams, + // so we just verify that SVG output is produced and + // contains the expected structural elements. + let expr = Expression::new_kind(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), + name: Some("n".to_string()), + min: Some(2), + max: Some(5), + limit: RangeLimit::Closed, + }); + let svg = render_to_svg(&expr).unwrap(); + assert!( + svg.contains("nonterminal"), + "expected nonterminal in SVG output" + ); + assert!( + svg.contains("more times"), + "expected 'more times' repeat comment" + ); + } } From c646a6e34aa5ce7ca53ceb7f3ac0ec7eb917abc9 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Wed, 18 Feb 2026 06:25:03 +0000 Subject: [PATCH 10/10] Render named repeat range def-site labels in railroad When the named repeat range syntax is used, in the railroad diagrams, we were displaying the variable name at the use-site ("repeat exactly n times) but not at the def-site. Let's display these at the def-site too to help the reader tie this together. We'll add a labeled box around the def-site that says, e.g. "repeat count n". --- .../src/grammar/render_railroad.rs | 85 ++++++++++++++++--- 1 file changed, 72 insertions(+), 13 deletions(-) diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs index c3093823b6..f24d23523c 100644 --- a/tools/mdbook-spec/src/grammar/render_railroad.rs +++ b/tools/mdbook-spec/src/grammar/render_railroad.rs @@ -197,7 +197,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Option Option Option Option + } else { + n + }; if let Some(suffix) = &expr.suffix { let suffix = strip_markdown(suffix); let lbox = LabeledBox::new(n, Comment::new(suffix)); @@ -616,10 +628,7 @@ mod tests { #[test] fn repeat_range_with_name_renders() { - // A RepeatRange with a name should render without crash. - // The name is not currently displayed in railroad diagrams, - // so we just verify that SVG output is produced and - // contains the expected structural elements. + // A named RepeatRange should display the name as a label. let expr = Expression::new_kind(ExpressionKind::RepeatRange { expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), name: Some("n".to_string()), @@ -629,12 +638,62 @@ mod tests { }); let svg = render_to_svg(&expr).unwrap(); assert!( - svg.contains("nonterminal"), - "expected nonterminal in SVG output" + svg.contains("repeat count n"), + "expected 'repeat count n' label, got: {svg}" ); + } + + #[test] + fn repeat_range_with_name_optional() { + // `e{k:0..=5}` decomposes to Optional(RepeatRange). The + // name label should still appear on the outermost node. + let expr = Expression::new_kind(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), + name: Some("k".to_string()), + min: Some(0), + max: Some(5), + limit: RangeLimit::Closed, + }); + let svg = render_to_svg(&expr).unwrap(); assert!( - svg.contains("more times"), - "expected 'more times' repeat comment" + svg.contains("repeat count k"), + "expected 'repeat count k' label, got: {svg}" + ); + } + + #[test] + fn repeat_range_without_name_no_label() { + // An unnamed RepeatRange should not have a "repeat count" + // label. + let expr = Expression::new_kind(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), + name: None, + min: Some(2), + max: Some(5), + limit: RangeLimit::Closed, + }); + let svg = render_to_svg(&expr).unwrap(); + assert!( + !svg.contains("repeat count"), + "unnamed range should not have a 'repeat count' label" + ); + } + + #[test] + fn repeat_range_with_name_identity() { + // `e{n:1..=1}` renders as plain `e` but should still + // display the name label. + let expr = Expression::new_kind(ExpressionKind::RepeatRange { + expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))), + name: Some("n".to_string()), + min: Some(1), + max: Some(1), + limit: RangeLimit::Closed, + }); + let svg = render_to_svg(&expr).unwrap(); + assert!( + svg.contains("repeat count n"), + "expected 'repeat count n' label on identity range" ); } }