-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_nested_functions.c
More file actions
executable file
·423 lines (370 loc) · 16 KB
/
Copy pathextract_nested_functions.c
File metadata and controls
executable file
·423 lines (370 loc) · 16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
//usr/bin/env sh -c 'cc "$0" -Wall -o /tmp/script && /tmp/script "$@"' "$0" "$@"; exit
// This script allows you to write NESTED_FUNCTION macros in standard C programs
// without depending on compiler extensions. It is intended to be run as a
// preprocessing step before compilation. It generates a separate file that you
// can include in your project which defines all the nested functions.
//
// The generated file also defines the NESTED_FUNCTION macro. The macro discards
// the function body and replaces it with a function pointer to one of the
// functions in the generated file. You must #define NESTED_FUNCTION_NAME at the
// top of each source file so that the functions can be uniquely identified.
//
// The script preserves the original whitespace and comments in nested functions,
// and emits #line directives so that compiler errors refer to the source location.
// The generated file also includes header guards, and the script will print a
// warning to stderr if NESTED_FUNCTION_NAME is not defined.
//
// Note that nested functions are not lambdas. They do not capture enclosing
// scope so you must provide any context that is needed via function arguments.
// This preprocessor script is MIT licensed, authored by Chris Patuzzo, 2026.
// See https://github.com/tuzz/nested_functions for more information.
//
// Example usage:
//
// ```c
// #include "my_nested_functions.c"
// #include <stdio.h>
//
// #define NESTED_FUNCTION_NAME nested_function_src_main
//
// int main(void) {
// void *fn = NESTED_FUNCTION(int, (int a, int b), {
// return a + b; // Add two numbers.
// });
//
// int (*sum)(int, int) = fn;
// printf("The sum is %d.\n", sum(3, 4));
// }
// ```
//
// ```sh
// chmod a+x extract_nested_functions.c && ./extract_nested_functions.c src/* > src/my_nested_functions.c && cc src/main.c && ./a.out
// The sum is 7.
// ```
//
// ```c
// // This file was generated by the extract_nested_functions.c script.
//
// #ifndef NESTED_FUNCTIONS_SRC_MAIN
// #define NESTED_FUNCTIONS_SRC_MAIN
//
// #ifndef NESTED_FUNCTION
// #define __NESTED_FUNCTION_CONCAT(a, b, c) a##b##c
// #define _NESTED_FUNCTION_CONCAT(a, b, c) __NESTED_FUNCTION_CONCAT(a, b, c)
// #define NESTED_FUNCTION(return_type, params, ...) _NESTED_FUNCTION_CONCAT(NESTED_FUNCTION_NAME, _line_, __LINE__)
// #endif // NESTED_FUNCTION
//
// #line 7 "src/main.c"
// static int nested_function_src_main_line_9(int a, int b) {
// #line 8 "src/main.c"
// return a + b; // Add two numbers.
// }
//
// #endif // NESTED_FUNCTIONS_SRC_MAIN
// ```
//
// Note that "line_9" refers to the closing line of the nested function. You
// should use the same compiler for running the preprocessor and compiling your
// program to avoid any differences in how the __LINE__ macro is evaluated.
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#define MAX_FILE_SIZE (8 * 1024 * 1024) // 8 MiB
static char file_content[MAX_FILE_SIZE];
static int cursor, line, file_size;
// Returns the character at the cursor without advancing.
static char peek(void) {
return cursor < file_size ? file_content[cursor] : '\0';
}
// Advances past the current character. Tracks line numbers so that function
// names like nested_function_test_line_57 get the correct line number.
static char advance(void) {
char c = file_content[cursor++]; if (c == '\n') line++; return c;
}
// Skips whitespace including newlines. Used between the three arguments
// of NESTED_FUNCTION(return_type, (params), { body }).
static void skip_whitespace(void) {
while (cursor < file_size && (peek() == ' ' || peek() == '\t' || peek() == '\n' || peek() == '\r')) advance();
}
// Skips a "..." string literal (opening " already consumed). Handles escape
// sequences so that \" inside a string doesn't end the skip prematurely.
static void skip_string(void) {
while (cursor < file_size) { char c = advance(); if (c == '\\' && cursor < file_size) { advance(); } else if (c == '"') return; }
}
// Skips a '...' character literal (opening ' already consumed).
static void skip_char_literal(void) {
while (cursor < file_size) { char c = advance(); if (c == '\\' && cursor < file_size) { advance(); } else if (c == '\'') return; }
}
// Skips a // line comment to the end of the line.
static void skip_line_comment(void) {
while (cursor < file_size && file_content[cursor] != '\n') cursor++;
}
// Skips a /* block comment */ including nested newlines.
static void skip_block_comment(void) {
while (cursor < file_size) {
if (file_content[cursor] == '*' && cursor + 1 < file_size && file_content[cursor + 1] == '/') { cursor += 2; return; }
advance();
}
}
// Skips a preprocessor directive (#define, #include, etc.) to the end of line,
// handling backslash-newline continuations.
static void skip_preprocessor_directive(void) {
while (cursor < file_size) {
if (file_content[cursor] == '\\' && cursor + 1 < file_size && file_content[cursor + 1] == '\n') { advance(); advance(); continue; }
if (file_content[cursor] == '\n') return;
advance();
}
}
// Tries to skip a string, char literal, or comment at the current position.
// Returns 1 if something was skipped. Called before every character-level
// check so that special characters inside these constructs are ignored.
//
// Example: the '}' in printf("}") won't be mistaken for the end of a body.
static int skip_non_code(void) {
if (peek() == '"') { advance(); skip_string(); return 1; }
if (peek() == '\'') { advance(); skip_char_literal(); return 1; }
if (peek() == '/' && cursor + 1 < file_size) {
if (file_content[cursor + 1] == '/') { advance(); advance(); skip_line_comment(); return 1; }
if (file_content[cursor + 1] == '*') { advance(); advance(); skip_block_comment(); return 1; }
}
return 0;
}
// Advances past a balanced pair of delimiters, e.g. '(' and ')' or '{' and '}'.
// Assumes the opening delimiter has already been consumed. Handles nesting,
// strings, char literals, and comments so that ")" inside a string or comment
// doesn't prematurely close the match.
//
// Example input: cursor is just past the '{' in { printf("}"); }
// result: cursor is just past the final '}'
static void find_balanced(char open, char close) {
int depth = 1;
while (cursor < file_size && depth > 0) {
if (skip_non_code()) continue;
char c = advance();
if (c == open) depth++;
if (c == close) depth--;
}
}
// Reads until a separator character at the top level (not inside parens,
// braces, strings, or comments). Returns the start position. Used to extract
// the return type from NESTED_FUNCTION(void, ...) which ends at the first ','.
static int read_until(char sep) {
int start = cursor;
while (cursor < file_size) {
if (skip_non_code()) continue;
if (peek() == sep) return start;
if (peek() == '(') { advance(); find_balanced('(', ')'); continue; }
if (peek() == '{') { advance(); find_balanced('{', '}'); continue; }
advance();
}
return start;
}
// Sanitizes a file path into a C identifier fragment. Non-alphanumeric
// characters become underscores. Consecutive separators are collapsed.
// The file extension is stripped.
//
// Example: "src/entities/test.c" -> "src_entities_test"
// Example: "./src/entities/my-level.c" -> "src_entities_my_level"
static void sanitize_path(const char *path, char *out, int cap) {
const char *ext = NULL;
for (const char *p = path; *p; p++) {
if (*p == '.') ext = p;
if (*p == '/' || *p == '\\') ext = NULL; // Dot was in a directory name.
}
int i = 0;
for (const char *p = path; *p && p != ext && i + 1 < cap; p++) {
if (isalnum((unsigned char)*p)) {
out[i++] = *p;
} else if (i > 0 && out[i - 1] != '_') {
out[i++] = '_';
}
}
while (i > 0 && out[i - 1] == '_') i--;
out[i] = '\0';
}
// Converts a string to uppercase in place. Used for the header guard.
static void to_upper(char *s) {
for (; *s; s++) *s = toupper((unsigned char)*s);
}
// Trims leading blank lines and trailing whitespace from a range in buf. Both
// *start and *end are updated. The leading trim only steps over \n and \r, so
// the first surviving line keeps its original indentation.
static void trim_body(const char *buf, int *start, int *end) {
while (*start < *end && (buf[*start] == '\n' || buf[*start] == '\r')) (*start)++;
while (*end > *start && (buf[*end - 1] == '\n' || buf[*end - 1] == '\r' || buf[*end - 1] == ' ' || buf[*end - 1] == '\t')) (*end)--;
}
// Writes the function body to stdout, preserving the original source spacing.
// We deliberately do NOT dedent: the #line directives emitted alongside the
// body make the compiler report errors against the source, and keeping the
// original leading whitespace means the reported column matches the source too.
// trim_body strips surrounding blank lines so the generated function doesn't
// gain a stray blank line right after { or before }.
//
// Example input (inside entity_create, 8-space body indent in source):
//
// NESTED_FUNCTION(void, (int x), {
// printf("hello\n");
// if (x) {
// printf("world\n");
// }
// })
//
// Output (indentation preserved verbatim):
//
// static void nested_function_test_line_57(int x) {
// printf("hello\n");
// if (x) {
// printf("world\n");
// }
// }
static void write_body(const char *buf, int start, int end) {
trim_body(buf, &start, &end);
if (start >= end) return;
int i = start;
while (i < end) {
int line_start = i;
while (i < end && buf[i] != '\n') i++;
fwrite(&buf[line_start], 1, i - line_start, stdout);
fputc('\n', stdout);
if (i < end) i++;
}
}
// Searches the file content for a #define NESTED_FUNCTION_NAME <identifier>
// directive and stores the identifier in function_name. If not found, derives
// a default from the file path: "nested_function_<sanitized_path>".
//
// Example: #define NESTED_FUNCTION_NAME nested_function_test
// -> function_name = "nested_function_test"
//
// Example: no define, path = "src/entities/test.c"
// -> function_name = "nested_function_src_entities_test"
static char function_name[512];
static int function_name_was_explicit;
static void detect_function_name(const char *path) {
function_name[0] = '\0';
function_name_was_explicit = 0;
const char *needle = "#define NESTED_FUNCTION_NAME ";
int needle_len = strlen(needle);
const char *match = strstr(file_content, needle);
if (match) {
const char *p = match + needle_len;
while (*p == ' ' || *p == '\t') p++;
int i = 0;
while (p[i] && (isalnum((unsigned char)p[i]) || p[i] == '_') && i + 1 < (int)sizeof(function_name)) {
function_name[i] = p[i];
i++;
}
function_name[i] = '\0';
if (i > 0) function_name_was_explicit = 1;
}
if (!function_name[0]) {
char sanitized[256];
sanitize_path(path, sanitized, sizeof(sanitized));
snprintf(function_name, sizeof(function_name), "nested_function_%s", sanitized);
}
}
static int total_function_count;
// Reads a file and scans for NESTED_FUNCTION blocks, extracting each into a
// top-level static function written to stdout. Each function is preceded by
// #line directives so the compiler reports errors against the original source
// file, line and column rather than the generated file.
static int scan_file(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) { fprintf(stderr, "Failed to open %s\n", path); return 1; }
file_size = fread(file_content, 1, MAX_FILE_SIZE - 1, f);
file_content[file_size] = '\0';
fclose(f);
detect_function_name(path);
const char *token = "NESTED_FUNCTION(";
int token_len = strlen(token);
int file_function_count = 0;
cursor = 0;
line = 1;
while (cursor < file_size) {
if (skip_non_code()) continue;
if (peek() == '#') { skip_preprocessor_directive(); continue; }
if (cursor + token_len <= file_size && memcmp(&file_content[cursor], token, token_len) == 0) {
int fn_line = line; // Source line of the NESTED_FUNCTION( token; anchors the generated signature below.
cursor += token_len;
// Extract the return type: NESTED_FUNCTION(>void<, ...)
skip_whitespace();
int type_start = read_until(',');
int type_end = cursor;
while (type_end > type_start && file_content[type_end - 1] == ' ') type_end--;
advance(); // Skip comma.
// Extract the parameter list: NESTED_FUNCTION(void, >(int x)<, ...)
skip_whitespace();
if (peek() != '(') { fprintf(stderr, "%s:%d: expected '(' for parameter list\n", path, line); return 1; }
advance();
int params_start = cursor;
find_balanced('(', ')');
int params_end = cursor - 1;
skip_whitespace();
if (peek() != ',') { fprintf(stderr, "%s:%d: expected ',' after parameter list\n", path, line); return 1; }
advance();
// Extract the function body: NESTED_FUNCTION(void, (int x), >{ ... }<)
skip_whitespace();
if (peek() != '{') { fprintf(stderr, "%s:%d: expected '{' for function body\n", path, line); return 1; }
advance();
int body_brace_line = line; // Source line of the body's opening brace; anchors the body below.
int body_start = cursor;
find_balanced('{', '}');
int body_end = cursor - 1;
// trim_body (in write_body) skips leading blank lines by stepping over \n
// and \r, so count those same characters here to find the source line of
// the first line we'll actually emit. The two #line directives below then
// map the signature and the body back to the source for the compiler.
int body_first_line = body_brace_line;
for (int k = body_start; k < body_end && (file_content[k] == '\n' || file_content[k] == '\r'); k++) {
if (file_content[k] == '\n') body_first_line++;
}
fputc('\n', stdout);
// Anchor the signature to the NESTED_FUNCTION( line, then the body to its first line.
// Two directives (not one) keep both exact even when the NESTED_FUNCTION(...) header
// spans multiple lines before the opening brace.
fprintf(stdout, "#line %d \"%s\"\n", fn_line, path);
fprintf(stdout, "static ");
fwrite(&file_content[type_start], 1, type_end - type_start, stdout);
fprintf(stdout, " %s_line_%d(", function_name, line);
fwrite(&file_content[params_start], 1, params_end - params_start, stdout);
fprintf(stdout, ") {\n");
fprintf(stdout, "#line %d \"%s\"\n", body_first_line, path);
write_body(file_content, body_start, body_end);
fprintf(stdout, "}\n");
total_function_count++;
file_function_count++;
continue;
}
advance();
}
if (file_function_count > 0 && !function_name_was_explicit) {
fprintf(stderr, "WARNING: %s has %d nested function(s) but no '#define NESTED_FUNCTION_NAME ...'\n", path, file_function_count);
fprintf(stderr, " defaulting to: %s\n", function_name);
fprintf(stderr, " add '#define NESTED_FUNCTION_NAME %s' to the top of the file\n", function_name);
}
return 0;
}
int main(int argc, char **argv) {
if (argc < 2) { fprintf(stderr, "Usage: ./extract_nested_functions.c <file1.c> ...\n"); return 1; }
// Header guard derived from the first input file.
char guard[512];
sanitize_path(argv[1], guard, sizeof(guard));
to_upper(guard);
fprintf(stdout, "// This file was generated by the extract_nested_functions.c script.\n\n");
fprintf(stdout, "#ifndef NESTED_FUNCTIONS_%s\n", guard);
fprintf(stdout, "#define NESTED_FUNCTIONS_%s\n", guard);
// Emit the NESTED_FUNCTION macro behind its own guard so users don't need a
// separate header. The guard lets users override it with their own definition.
fprintf(stdout, "\n#ifndef NESTED_FUNCTION\n");
fprintf(stdout, "#define __NESTED_FUNCTION_CONCAT(a, b, c) a##b##c\n");
fprintf(stdout, "#define _NESTED_FUNCTION_CONCAT(a, b, c) __NESTED_FUNCTION_CONCAT(a, b, c)\n");
fprintf(stdout, "#define NESTED_FUNCTION(return_type, params, ...) _NESTED_FUNCTION_CONCAT(NESTED_FUNCTION_NAME, _line_, __LINE__)\n");
fprintf(stdout, "#endif // NESTED_FUNCTION\n");
// Extract and emit functions from all files.
for (int i = 1; i < argc; i++) {
int result = scan_file(argv[i]);
if (result) return result;
}
fprintf(stdout, "\n#endif // NESTED_FUNCTIONS_%s\n", guard);
return 0;
}