@@ -9,7 +9,6 @@ mod languages;
99
1010#[ derive( Debug ) ]
1111struct CorpusCase {
12- name : String ,
1312 input : String ,
1413 raw : String ,
1514 expected : String ,
@@ -21,129 +20,43 @@ fn update_mode_enabled() -> bool {
2120 . unwrap_or ( false )
2221}
2322
24- fn is_header_rule ( line : & str ) -> bool {
25- let trimmed = line. trim ( ) ;
26- trimmed. len ( ) >= 3 && trimmed. chars ( ) . all ( |c| c == '=' )
27- }
28-
29- fn is_next_case_header ( lines : & [ & str ] , i : usize ) -> bool {
30- is_header_rule ( lines[ i] )
31- && i + 2 < lines. len ( )
32- && !lines[ i + 1 ] . trim ( ) . is_empty ( )
33- && is_header_rule ( lines[ i + 2 ] )
34- }
35-
36- fn parse_corpus ( content : & str ) -> Vec < CorpusCase > {
37- let lines: Vec < & str > = content. lines ( ) . collect ( ) ;
38- let mut i = 0 ;
39- let mut cases = Vec :: new ( ) ;
40-
41- while i < lines. len ( ) {
42- while i < lines. len ( ) && lines[ i] . trim ( ) . is_empty ( ) {
43- i += 1 ;
44- }
45- if i >= lines. len ( ) {
46- break ;
47- }
48-
49- assert ! (
50- is_header_rule( lines[ i] ) ,
51- "Expected header delimiter at line {}" ,
52- i + 1
53- ) ;
54- i += 1 ;
55-
56- assert ! ( i < lines. len( ) , "Missing test name at line {}" , i + 1 ) ;
57- let name = lines[ i] . trim ( ) . to_string ( ) ;
58- i += 1 ;
59-
60- assert ! (
61- i < lines. len( ) && is_header_rule( lines[ i] ) ,
62- "Missing closing header delimiter for case {name}"
63- ) ;
64- i += 1 ;
65-
66- let input_start = i;
67- while i < lines. len ( ) && lines[ i] . trim ( ) != "---" {
68- if is_next_case_header ( & lines, i) {
69- break ;
70- }
71- i += 1 ;
72- }
73- let input = lines[ input_start..i] . join ( "\n " ) . trim_end ( ) . to_string ( ) ;
74- let raw;
75- let expected;
76- if i >= lines. len ( ) || lines[ i] . trim ( ) != "---" {
77- // No `---` separator before next case (or EOF). Treat the
78- // remaining sections as empty.
79- raw = String :: new ( ) ;
80- expected = String :: new ( ) ;
81- } else {
82- i += 1 ;
83-
84- // Raw tree-sitter parse section. New-format files have a second
85- // `---` separator between the raw tree and the mapped AST. Legacy
86- // files (with only one separator) have no raw section — in that
87- // case `raw` stays empty and update mode will populate it.
88- let raw_start = i;
89- let mut next_sep = i;
90- while next_sep < lines. len ( ) && lines[ next_sep] . trim ( ) != "---" {
91- if is_next_case_header ( & lines, next_sep) {
92- break ;
93- }
94- next_sep += 1 ;
95- }
96- raw = if next_sep < lines. len ( ) && lines[ next_sep] . trim ( ) == "---" {
97- let raw_text = lines[ raw_start..next_sep] . join ( "\n " ) . trim ( ) . to_string ( ) ;
98- i = next_sep + 1 ;
99- raw_text
100- } else {
101- String :: new ( )
102- } ;
103-
104- let expected_start = i;
105- while i < lines. len ( ) {
106- if is_next_case_header ( & lines, i) {
107- break ;
108- }
109- i += 1 ;
110- }
111- expected = lines[ expected_start..i] . join ( "\n " ) . trim ( ) . to_string ( ) ;
112- }
23+ /// Parse a corpus `.output` file. The file holds a single test case made of
24+ /// three sections separated by `---` delimiter lines:
25+ ///
26+ /// ```text
27+ /// <test case source>
28+ ///
29+ /// ---
30+ ///
31+ /// <raw tree-sitter parse tree>
32+ ///
33+ /// ---
34+ ///
35+ /// <mapped AST>
36+ /// ```
37+ ///
38+ /// The test name is the file name, so there is no header section. Missing
39+ /// trailing sections (e.g. a freshly added file) are treated as empty.
40+ fn parse_corpus ( content : & str ) -> CorpusCase {
41+ let lines: Vec < & str > = content. split ( '\n' ) . collect ( ) ;
42+ let mut sections = lines
43+ . split ( |line| line. trim ( ) == "---" )
44+ . map ( |chunk| chunk. join ( "\n " ) . trim ( ) . to_string ( ) ) ;
11345
114- cases. push ( CorpusCase {
115- name,
116- input,
117- raw,
118- expected,
119- } ) ;
46+ CorpusCase {
47+ input : sections. next ( ) . unwrap_or_default ( ) ,
48+ raw : sections. next ( ) . unwrap_or_default ( ) ,
49+ expected : sections. next ( ) . unwrap_or_default ( ) ,
12050 }
121-
122- cases
12351}
12452
125- fn render_corpus ( cases : & [ CorpusCase ] ) -> String {
126- let mut out = String :: new ( ) ;
127-
128- for ( idx, case) in cases. iter ( ) . enumerate ( ) {
129- if idx > 0 {
130- // Blank line between cases.
131- out. push ( '\n' ) ;
132- }
133- out. push_str ( "===\n " ) ;
134- out. push_str ( case. name . trim ( ) ) ;
135- out. push_str ( "\n ===\n \n " ) ;
136- out. push_str ( case. input . trim ( ) ) ;
137- out. push_str ( "\n \n ---\n \n " ) ;
138- out. push_str ( case. raw . trim ( ) ) ;
139- out. push_str ( "\n \n ---\n \n " ) ;
140- out. push_str ( case. expected . trim ( ) ) ;
141- // Single trailing newline per case; the inter-case blank line is
142- // added by the prefix above, and the file ends with exactly one `\n`.
143- out. push ( '\n' ) ;
144- }
145-
146- out
53+ fn render_corpus ( case : & CorpusCase ) -> String {
54+ format ! (
55+ "{}\n \n ---\n \n {}\n \n ---\n \n {}\n " ,
56+ case. input. trim( ) ,
57+ case. raw. trim( ) ,
58+ case. expected. trim( )
59+ )
14760}
14861
14962fn run_desugaring ( lang : & simple:: LanguageSpec , input : & str ) -> Result < yeast:: Ast , String > {
@@ -182,6 +95,26 @@ fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result<String, St
18295 Ok ( dump_ast ( & ast, ast. get_root ( ) , input) )
18396}
18497
98+ /// Collect the set of corpus test "stems" (paths without an extension) under
99+ /// `dir`. A stem is discovered from either a `.swift` source file or a
100+ /// `.output` file, so that a `.swift` with no `.output` (a freshly added test)
101+ /// and an orphaned `.output` with no `.swift` are both surfaced.
102+ fn collect_corpus_stems ( dir : & Path , out : & mut Vec < std:: path:: PathBuf > ) {
103+ let entries = fs:: read_dir ( dir)
104+ . unwrap_or_else ( |e| panic ! ( "Failed to read corpus directory {}: {e}" , dir. display( ) ) ) ;
105+ for entry in entries {
106+ let path = entry. expect ( "Failed to read corpus entry" ) . path ( ) ;
107+ if path. is_dir ( ) {
108+ collect_corpus_stems ( & path, out) ;
109+ } else if path
110+ . extension ( )
111+ . is_some_and ( |ext| ext == "swift" || ext == "output" )
112+ {
113+ out. push ( path. with_extension ( "" ) ) ;
114+ }
115+ }
116+ }
117+
185118#[ test]
186119fn test_corpus ( ) {
187120 let update_mode = update_mode_enabled ( ) ;
@@ -200,77 +133,115 @@ fn test_corpus() {
200133 continue ;
201134 }
202135
203- let mut corpus_files: Vec < _ > = fs:: read_dir ( & lang_corpus_dir)
204- . unwrap_or_else ( |e| {
205- panic ! (
206- "Failed to read corpus directory {}: {e}" ,
207- lang_corpus_dir. display( )
208- )
209- } )
210- . map ( |entry| entry. expect ( "Failed to read corpus entry" ) . path ( ) )
211- . filter ( |path| path. extension ( ) . is_some_and ( |ext| ext == "txt" ) )
212- . collect ( ) ;
213- corpus_files. sort ( ) ;
136+ let mut stems = Vec :: new ( ) ;
137+ collect_corpus_stems ( & lang_corpus_dir, & mut stems) ;
138+ stems. sort ( ) ;
139+ stems. dedup ( ) ;
214140
215- for corpus_path in corpus_files {
216- let content = fs:: read_to_string ( & corpus_path)
217- . unwrap_or_else ( |e| panic ! ( "Failed to read {}: {e}" , corpus_path. display( ) ) ) ;
218- let mut cases = parse_corpus ( & content) ;
141+ for stem in stems {
142+ let swift_path = stem. with_extension ( "swift" ) ;
143+ let output_path = stem. with_extension ( "output" ) ;
219144 let mut failures = Vec :: new ( ) ;
220- assert ! (
221- !cases. is_empty( ) ,
222- "No corpus cases found in {}" ,
223- corpus_path. display( )
224- ) ;
225145
226- for case in & mut cases {
227- match dump_raw_parse ( & lang, & case. input ) {
146+ // The canonical test case lives in the `.swift` file and is the
147+ // source of truth. An `.output` file without a `.swift` sibling is
148+ // an orphan: there is nothing to drive it from.
149+ if !swift_path. exists ( ) {
150+ panic ! (
151+ "Found {} with no corresponding test case; add {} or remove the output file" ,
152+ output_path. display( ) ,
153+ swift_path. display( )
154+ ) ;
155+ }
156+
157+ let swift_input = fs:: read_to_string ( & swift_path)
158+ . unwrap_or_else ( |e| panic ! ( "Failed to read {}: {e}" , swift_path. display( ) ) )
159+ . trim ( )
160+ . to_string ( ) ;
161+
162+ // Build the case from the existing `.output` file when present.
163+ // When it is missing (a freshly added `.swift`), start from an
164+ // empty case: update mode will generate it, and a normal test run
165+ // reports the missing output.
166+ let mut case = if output_path. exists ( ) {
167+ let content = fs:: read_to_string ( & output_path)
168+ . unwrap_or_else ( |e| panic ! ( "Failed to read {}: {e}" , output_path. display( ) ) ) ;
169+ parse_corpus ( & content)
170+ } else {
171+ if !update_mode {
172+ failures. push ( format ! (
173+ "Missing output file {}; run scripts/update-corpus.sh to generate it" ,
174+ output_path. display( )
175+ ) ) ;
176+ }
177+ CorpusCase {
178+ input : String :: new ( ) ,
179+ raw : String :: new ( ) ,
180+ expected : String :: new ( ) ,
181+ }
182+ } ;
183+
184+ {
185+ // The input section in the `.output` file is a generated copy
186+ // of the `.swift` source, kept only so reviewers can see the
187+ // source alongside its printed ASTs.
188+ if update_mode {
189+ case. input = swift_input. clone ( ) ;
190+ } else if output_path. exists ( ) && case. input . trim ( ) != swift_input {
191+ failures. push ( format ! (
192+ "Test case copy out of date in {}; rerun update-corpus to regenerate from {}" ,
193+ output_path. display( ) ,
194+ swift_path. display( )
195+ ) ) ;
196+ }
197+ // Ensure the AST passes below operate on the source of truth.
198+ let case_input = swift_input. clone ( ) ;
199+
200+ match dump_raw_parse ( & lang, & case_input) {
228201 Err ( e) => {
229202 failures. push ( format ! (
230- "Raw parse failed for {} in {}: {}" ,
231- case. name,
232- corpus_path. display( ) ,
203+ "Raw parse failed in {}: {}" ,
204+ output_path. display( ) ,
233205 e
234206 ) ) ;
235207 }
236208 Ok ( actual_raw) => {
237209 if update_mode {
238210 case. raw = actual_raw. trim ( ) . to_string ( ) ;
239- } else if case. raw . trim ( ) != actual_raw. trim ( ) {
211+ } else if output_path . exists ( ) && case. raw . trim ( ) != actual_raw. trim ( ) {
240212 failures. push ( format ! (
241- "Raw parse mismatch in {}: \" {}\" \n EXPECTED:\n \n {}\n \n ACTUAL:\n \n {}" ,
242- corpus_path. display( ) ,
243- case. name,
213+ "Raw parse mismatch in {}:\n EXPECTED:\n \n {}\n \n ACTUAL:\n \n {}" ,
214+ output_path. display( ) ,
244215 case. raw. trim( ) ,
245216 actual_raw. trim( )
246217 ) ) ;
247218 }
248219 }
249220 }
250221
251- match run_desugaring ( & lang, & case . input ) {
222+ match run_desugaring ( & lang, & case_input ) {
252223 Err ( e) => {
253224 failures. push ( format ! (
254- "Desugaring failed for {} in {}: {}" ,
255- case. name,
256- corpus_path. display( ) ,
225+ "Desugaring failed in {}: {}" ,
226+ output_path. display( ) ,
257227 e
258228 ) ) ;
259229 }
260230 Ok ( actual) => {
261231 let actual_dump = dump_ast_with_type_errors (
262232 & actual,
263233 actual. get_root ( ) ,
264- & case . input ,
234+ & case_input ,
265235 & output_schema,
266236 ) ;
267237 if update_mode {
268238 case. expected = actual_dump. trim ( ) . to_string ( ) ;
269- } else if case. expected . trim ( ) != actual_dump. trim ( ) {
239+ } else if output_path. exists ( )
240+ && case. expected . trim ( ) != actual_dump. trim ( )
241+ {
270242 failures. push ( format ! (
271- "Test failed in {}: \" {}\" \n EXPECTED:\n \n {}\n \n ACTUAL:\n \n {}" ,
272- corpus_path. display( ) ,
273- case. name,
243+ "Test failed in {}:\n EXPECTED:\n \n {}\n \n ACTUAL:\n \n {}" ,
244+ output_path. display( ) ,
274245 case. expected. trim( ) ,
275246 actual_dump. trim( )
276247 ) ) ;
@@ -282,12 +253,12 @@ fn test_corpus() {
282253 assert ! ( failures. is_empty( ) , "{}" , failures. join( "\n \n " ) + "\n \n " ) ;
283254
284255 if update_mode {
285- let updated = render_corpus ( & cases ) ;
286- let write_result = fs:: write ( & corpus_path , updated) ;
256+ let updated = render_corpus ( & case ) ;
257+ let write_result = fs:: write ( & output_path , updated) ;
287258 assert ! (
288259 write_result. is_ok( ) ,
289260 "Failed to update corpus file {}: {}" ,
290- corpus_path . display( ) ,
261+ output_path . display( ) ,
291262 write_result
292263 . err( )
293264 . map_or_else( String :: new, |e| e. to_string( ) )
0 commit comments