Skip to content

Commit 3983e4d

Browse files
authored
Merge pull request #22070 from github/tausbn/yeast-add-raw-capture-syntax
yeast: Extend `rule!` macro with support for raw captures
2 parents 3058198 + 70ca7af commit 3983e4d

7 files changed

Lines changed: 276 additions & 233 deletions

File tree

shared/yeast-macros/src/lib.rs

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -121,37 +121,3 @@ pub fn rule(input: TokenStream) -> TokenStream {
121121
Err(err) => err.to_compile_error().into(),
122122
}
123123
}
124-
125-
/// Define a desugaring rule whose transform is a hand-written Rust block.
126-
///
127-
/// Use `manual_rule!` when the transform needs control over capture
128-
/// translation timing — for example, when an outer rule needs to set
129-
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
130-
/// translation reaches inner rules that read that state.
131-
///
132-
/// ```text
133-
/// manual_rule!(
134-
/// (query_pattern field: (_) @name)
135-
/// {
136-
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
137-
/// // (`name: NodeRef`, etc.) are bound from the query.
138-
/// let translated = ctx.translate(name)?;
139-
/// Ok(translated)
140-
/// }
141-
/// )
142-
/// ```
143-
///
144-
/// Differences from [`rule!`]:
145-
/// - Captures are **not** auto-translated before the body runs; they
146-
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
147-
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
148-
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
149-
/// tree template, no `Ok(...)` wrap.
150-
#[proc_macro]
151-
pub fn manual_rule(input: TokenStream) -> TokenStream {
152-
let input2: TokenStream2 = input.into();
153-
match parse::parse_manual_rule_top(input2) {
154-
Ok(output) => output.into(),
155-
Err(err) => err.to_compile_error().into(),
156-
}
157-
}

shared/yeast-macros/src/parse.rs

Lines changed: 43 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
2222
/// Parse a single query node (possibly with a trailing `@capture`).
2323
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
2424
let base = parse_query_atom(tokens)?;
25-
// Check for trailing @capture
25+
// Check for trailing @capture or @@capture
2626
if peek_is_at(tokens) {
27-
tokens.next(); // consume @
28-
let capture_name = expect_ident(tokens, "expected capture name after @")?;
27+
let capture_name = consume_capture_marker(tokens)?;
2928
let name_str = capture_name.to_string();
3029
Ok(quote! {
3130
yeast::query::QueryNode::Capture {
@@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
159158
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
160159
} else {
161160
let child = if peek_is_at(tokens) {
162-
tokens.next();
163-
let capture_name = expect_ident(tokens, "expected capture name after @")?;
161+
let capture_name = consume_capture_marker(tokens)?;
164162
let name_str = capture_name.to_string();
165163
quote! {
166164
yeast::query::QueryNode::Capture {
@@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
650648
struct CaptureInfo {
651649
name: String,
652650
multiplicity: CaptureMultiplicity,
651+
/// `true` for `@@name` captures: the auto-translate prefix skips them,
652+
/// so the bound `NodeRef` refers to the raw (input-schema) node.
653+
raw: bool,
653654
}
654655

655656
#[derive(Clone, Copy, PartialEq)]
@@ -708,6 +709,14 @@ fn extract_captures_inner(
708709
extract_captures_inner(&mut inner, captures, child_mult);
709710
}
710711
TokenTree::Punct(p) if p.as_char() == '@' => {
712+
// `@@name` marks the capture as raw (skip auto-translate).
713+
let raw = matches!(
714+
tokens.peek(),
715+
Some(TokenTree::Punct(p)) if p.as_char() == '@'
716+
);
717+
if raw {
718+
tokens.next(); // consume the second `@`
719+
}
711720
if let Some(TokenTree::Ident(name)) = tokens.next() {
712721
let mult = if parent_mult == CaptureMultiplicity::Repeated
713722
|| last_mult == CaptureMultiplicity::Repeated
@@ -723,6 +732,7 @@ fn extract_captures_inner(
723732
captures.push(CaptureInfo {
724733
name: name.to_string(),
725734
multiplicity: mult,
735+
raw,
726736
});
727737
}
728738
last_mult = CaptureMultiplicity::Single;
@@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
776786
// Parse query
777787
let query_code = parse_query_top(query_stream.clone())?;
778788

789+
// Capture names marked `@@name` (raw) — passed to the auto-translate
790+
// prefix as a skip list so those captures keep their input-schema ids.
791+
let raw_capture_names: Vec<&str> = captures
792+
.iter()
793+
.filter(|c| c.raw)
794+
.map(|c| c.name.as_str())
795+
.collect();
796+
779797
// Generate capture bindings
780798
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
781799
let bindings: Vec<TokenStream> = captures
@@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
891909
let __query = #query_code;
892910
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
893911
// Auto-translation prefix: recursively translate every
894-
// captured node before invoking the user's transform body.
912+
// captured node before invoking the user's transform body,
913+
// except for `@@name` captures listed in `__skip` which the
914+
// body consumes raw.
895915
// For OneShot rules this preserves the legacy behaviour
896916
// (input-schema captures translated to output-schema
897917
// nodes); for Repeating rules it is a no-op.
898-
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
918+
let __skip: &[&str] = &[#(#raw_capture_names),*];
919+
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
899920
#(#bindings)*
900921
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
901922
let __result: Vec<usize> = { #transform_body };
@@ -905,106 +926,6 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
905926
})
906927
}
907928

908-
/// Parse `manual_rule!( query { body } )`.
909-
///
910-
/// Like [`parse_rule_top`] but:
911-
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
912-
/// - Generates code that does NOT auto-translate captures before
913-
/// running the body. Capture variables refer to raw (input-schema)
914-
/// nodes; the body is responsible for explicit translation via
915-
/// `ctx.translate(...)`.
916-
/// - The body is included verbatim and must evaluate to
917-
/// `Result<Vec<usize>, String>`.
918-
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
919-
let mut tokens = input.into_iter().peekable();
920-
921-
// Collect query tokens up to the body block `{ ... }`.
922-
let mut query_tokens = Vec::new();
923-
loop {
924-
match tokens.peek() {
925-
None => {
926-
return Err(syn::Error::new(
927-
Span::call_site(),
928-
"expected a Rust block `{ ... }` after the query in manual_rule!",
929-
))
930-
}
931-
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
932-
_ => {
933-
query_tokens.push(tokens.next().unwrap());
934-
}
935-
}
936-
}
937-
938-
let query_stream: TokenStream = query_tokens.into_iter().collect();
939-
940-
// Extract captures from the query (same as in `rule!`).
941-
let captures = extract_captures(&query_stream);
942-
943-
// Parse the query into the QueryNode-building expression.
944-
let query_code = parse_query_top(query_stream)?;
945-
946-
// Generate capture bindings (same as in `rule!`).
947-
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
948-
let bindings: Vec<TokenStream> = captures
949-
.iter()
950-
.map(|cap| {
951-
let name = Ident::new(&cap.name, Span::call_site());
952-
let name_str = &cap.name;
953-
match cap.multiplicity {
954-
CaptureMultiplicity::Repeated => quote! {
955-
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
956-
.into_iter()
957-
.map(yeast::NodeRef)
958-
.collect();
959-
},
960-
CaptureMultiplicity::Optional => quote! {
961-
let #name: Option<yeast::NodeRef> =
962-
__captures.get_opt(#name_str).map(yeast::NodeRef);
963-
},
964-
CaptureMultiplicity::Single => quote! {
965-
let #name: yeast::NodeRef =
966-
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
967-
},
968-
}
969-
})
970-
.collect();
971-
972-
// Consume the body block.
973-
let body_group = match tokens.next() {
974-
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
975-
other => {
976-
return Err(syn::Error::new(
977-
Span::call_site(),
978-
format!(
979-
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
980-
),
981-
))
982-
}
983-
};
984-
let body_stream = body_group.stream();
985-
986-
// No tokens should follow the body.
987-
if let Some(tok) = tokens.next() {
988-
return Err(syn::Error::new_spanned(
989-
tok,
990-
"unexpected token after manual_rule! body",
991-
));
992-
}
993-
994-
Ok(quote! {
995-
{
996-
let __query = #query_code;
997-
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
998-
// No auto-translate prefix for manual rules — the body
999-
// is responsible for translating captures explicitly.
1000-
#(#bindings)*
1001-
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
1002-
#body_stream
1003-
}))
1004-
}
1005-
})
1006-
}
1007-
1008929
// ---------------------------------------------------------------------------
1009930
// Token utilities
1010931
// ---------------------------------------------------------------------------
@@ -1013,6 +934,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
1013934
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
1014935
}
1015936

937+
/// Consume an `@` or `@@` capture marker and the following name ident.
938+
/// Caller has already verified `peek_is_at(tokens)`.
939+
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
940+
tokens.next(); // consume the first `@`
941+
if peek_is_at(tokens) {
942+
tokens.next(); // consume the second `@` of `@@`
943+
}
944+
expect_ident(tokens, "expected capture name after `@` or `@@`")
945+
}
946+
1016947
fn peek_is_literal(tokens: &mut Tokens) -> bool {
1017948
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
1018949
}
@@ -1113,8 +1044,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
11131044

11141045
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
11151046
if peek_is_at(tokens) {
1116-
tokens.next(); // consume @
1117-
let name = expect_ident(tokens, "expected capture name after @")?;
1047+
let name = consume_capture_marker(tokens)?;
11181048
let name_str = name.to_string();
11191049
Ok(quote! {
11201050
yeast::query::QueryNode::Capture {
@@ -1141,13 +1071,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
11411071
}
11421072
}
11431073

1144-
/// If `@name` follows a Repeated list element, wrap each child SingleNode
1145-
/// inside the repetition with a Capture. This matches tree-sitter semantics
1146-
/// where `(_)* @name` captures each matched node.
1074+
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
1075+
/// child SingleNode inside the repetition with a Capture. This matches
1076+
/// tree-sitter semantics where `(_)* @name` captures each matched node.
11471077
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
11481078
if peek_is_at(tokens) {
1149-
tokens.next();
1150-
let name = expect_ident(tokens, "expected capture name after @")?;
1079+
let name = consume_capture_marker(tokens)?;
11511080
let name_str = name.to_string();
11521081
// Re-parse the element isn't practical, so we generate a wrapper
11531082
// that creates a new Repeated with each child wrapped in a capture.

shared/yeast/doc/yeast.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
292292
single capture (`Id`) and `{..name}` splices a repeated capture
293293
(`Vec<Id>`).
294294

295+
### Raw captures (`@@name`)
296+
297+
The default `@name` capture marker is *auto-translated*: in OneShot
298+
phases the macro recursively translates the captured node before
299+
binding it, so `{name}` in the output template splices a node that
300+
already conforms to the output schema.
301+
302+
For rules that need the raw (input-schema) capture — typically to read
303+
its source text or to translate it explicitly with mutable context
304+
state between calls — use `@@name` instead. The body sees the original
305+
input-schema `NodeRef`:
306+
307+
```rust
308+
yeast::rule!(
309+
(assignment left: (_) @@raw_lhs right: (_) @rhs)
310+
=>
311+
{
312+
// raw_lhs is untranslated: read its original source text.
313+
let text = ctx.ast.source_text(raw_lhs.into());
314+
// rhs is already translated by the auto-translate prefix.
315+
tree!((call
316+
method: (identifier #{text.as_str()})
317+
receiver: {rhs}))
318+
}
319+
);
320+
```
321+
322+
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
323+
markers are equivalent (auto-translation is a no-op for repeating
324+
rules).
325+
295326
## Complete example: for-loop desugaring
296327

297328
This rule rewrites Ruby's `for pat in val do body end` into

shared/yeast/src/captures.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,28 @@ impl Captures {
8080
}
8181
Ok(())
8282
}
83+
84+
/// Like [`try_map_all_captures`] but leaves captures whose name appears
85+
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
86+
/// (raw) captures alongside the default auto-translated `@name`
87+
/// captures.
88+
pub fn try_map_captures_except<E>(
89+
&mut self,
90+
skip: &[&str],
91+
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
92+
) -> Result<(), E> {
93+
for (name, ids) in self.captures.iter_mut() {
94+
if skip.contains(name) {
95+
continue;
96+
}
97+
let mut new_ids = Vec::with_capacity(ids.len());
98+
for &id in ids.iter() {
99+
new_ids.extend(f(id)?);
100+
}
101+
*ids = new_ids;
102+
}
103+
Ok(())
104+
}
83105
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
84106
if let Some(from_ids) = self.captures.get(from) {
85107
let new_values = from_ids.iter().copied().map(f).collect();

0 commit comments

Comments
 (0)