Skip to content

Commit ccd2324

Browse files
committed
fix(ir-gen): scope IfLet binding to its then-branch via alias table
Reported by @SiyuanSun0736 on PR #19. Signed-off-by: Cong Wang <cwang@multikernel.io>
1 parent d5f25c1 commit ccd2324

3 files changed

Lines changed: 133 additions & 30 deletions

File tree

src/ir_generator.ml

Lines changed: 77 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ type ir_context = {
6767
map_origin_variables: (string, (string * ir_value * (ir_value_desc * ir_type))) Hashtbl.t; (* var_name -> (map_name, key, underlying_info) *)
6868
(* Track inferred variable types for proper lookups *)
6969
variable_types: (string, ir_type) Hashtbl.t; (* var_name -> ir_type *)
70+
(* Active IfLet bindings: source name -> synthetic IR name, for the duration
71+
of the then-branch. Reads, simple assignments, and compound assignments
72+
of the source name are rewritten to the synthetic name; the synthetic
73+
name is what was actually declared in IR, so an outer variable of the
74+
same name is never clobbered when the backend hoists declarations. *)
75+
iflet_aliases: (string, string) Hashtbl.t;
7076
mutable current_program_type: program_type option;
7177
}
7278

@@ -92,6 +98,7 @@ let create_context ?(global_variables = []) ?(helper_functions = []) symbol_tabl
9298
tbl);
9399
map_origin_variables = Hashtbl.create 32;
94100
variable_types = Hashtbl.create 32;
101+
iflet_aliases = Hashtbl.create 4;
95102
current_program_type = None;
96103
helper_functions = (let tbl = Hashtbl.create 16 in
97104
List.iter (fun helper_name -> Hashtbl.add tbl helper_name ()) helper_functions;
@@ -557,13 +564,22 @@ let expand_map_operation ctx map_name operation key_val value_val_opt pos =
557564

558565
| _ -> failwith ("Unknown map operation: " ^ operation)
559566

567+
(** Resolve a source-level identifier to its current IR-level name.
568+
Returns the synthetic name if the identifier is currently bound by an
569+
enclosing IfLet, otherwise the name unchanged. *)
570+
let resolve_iflet_alias ctx name =
571+
match Hashtbl.find_opt ctx.iflet_aliases name with
572+
| Some synth -> synth
573+
| None -> name
574+
560575
(** Lower AST expressions to IR values *)
561576
let rec lower_expression ctx (expr : Ast.expr) =
562577
match expr.expr_desc with
563578
| Ast.Literal lit ->
564579
lower_literal lit expr.expr_pos
565580

566581
| Ast.Identifier name ->
582+
let name = resolve_iflet_alias ctx name in
567583
(* Check if this is a map identifier *)
568584
if Hashtbl.mem ctx.maps name then
569585
(* For map identifiers, create a map reference *)
@@ -1450,6 +1466,7 @@ and lower_statement ctx stmt =
14501466
())
14511467

14521468
| Ast.Assignment (name, expr) ->
1469+
let name = resolve_iflet_alias ctx name in
14531470
let value = lower_expression ctx expr in
14541471

14551472
(* Track if this assignment is from a map access *)
@@ -1501,6 +1518,7 @@ and lower_statement ctx stmt =
15011518
emit_instruction ctx assign_instr
15021519

15031520
| Ast.CompoundAssignment (name, op, expr) ->
1521+
let name = resolve_iflet_alias ctx name in
15041522
let value = lower_expression ctx expr in
15051523

15061524
(* Check if this is a global variable assignment *)
@@ -2042,14 +2060,19 @@ and lower_statement ctx stmt =
20422060

20432061
| Ast.CompoundFieldIndexAssignment (map_expr, key_expr, field, op, value_expr) ->
20442062
(* Desugar `map[k].field op= rhs` to:
2045-
if (var __cidx_field_N = map[k]) {
2063+
var __cidx_field_N = map[k]
2064+
if (__cidx_field_N != null) {
20462065
__cidx_field_N.field = __cidx_field_N.field op rhs
20472066
}
2048-
The IfLet lowering (above) handles the presence check; the field-store
2049-
lowers to a pointer-checked `ptr->field = ...` via IRStructFieldAssignment.
2050-
We look up the field's AST type from the map's value-struct definition so
2051-
that the synthesized FieldAccess/BinaryOp get correct expr_type — without
2052-
this the IR generator defaults to IRU32, mis-sizing wider fields. *)
2067+
The synthetic name is fresh, so it cannot collide with any user
2068+
variable — we go straight to a plain Declaration + If rather than
2069+
routing through `Ast.IfLet` (whose alpha-rename machinery is only
2070+
needed when the binding name comes from user source). The
2071+
field-store lowers to a pointer-checked `ptr->field = ...` via
2072+
IRStructFieldAssignment. We look up the field's AST type from the
2073+
map's value-struct definition so the synthesized FieldAccess /
2074+
BinaryOp get the correct expr_type — without this the IR generator
2075+
defaults to IRU32, mis-sizing wider fields. *)
20532076
let pos = stmt.stmt_pos in
20542077
let synth_name = generate_temp_variable ctx "cidx_field" in
20552078
let map_name = match map_expr.expr_desc with
@@ -2079,31 +2102,58 @@ and lower_statement ctx stmt =
20792102
let tmp_id = mk_expr (Ast.Identifier synth_name) in
20802103
let cur_field = mk_expr ?ty:field_ast_type (Ast.FieldAccess (tmp_id, field)) in
20812104
let bin = mk_expr ?ty:field_ast_type (Ast.BinaryOp (cur_field, op, value_expr)) in
2082-
let store = { Ast.stmt_desc = Ast.FieldAssignment (tmp_id, field, bin); stmt_pos = pos } in
2083-
let iflet = { Ast.stmt_desc = Ast.IfLet (synth_name, access, [store], None); stmt_pos = pos } in
2084-
lower_statement ctx iflet
2105+
let store =
2106+
{ Ast.stmt_desc = Ast.FieldAssignment (tmp_id, field, bin); stmt_pos = pos } in
2107+
let cond =
2108+
mk_expr ~ty:Ast.Bool
2109+
(Ast.BinaryOp (tmp_id, Ast.Ne, mk_expr (Ast.Literal Ast.NullLit))) in
2110+
lower_statement ctx
2111+
{ Ast.stmt_desc = Ast.Declaration (synth_name, None, Some access);
2112+
stmt_pos = pos };
2113+
lower_statement ctx
2114+
{ Ast.stmt_desc = Ast.If (cond, [store], None); stmt_pos = pos }
20852115

20862116
| Ast.IfLet (name, expr, then_stmts, else_opt) ->
20872117
(* Desugar `if (var name = expr) { T } else { E }` into:
2088-
var name = expr
2089-
if (name != null) { T } else { E }
2090-
The eBPF/userspace codegen rule for `IRMapAccess <op> NullLit` (and
2091-
the symmetric form for raw pointers) emits a pointer presence
2092-
check, so this lowers correctly without an extra dereference. *)
2118+
var __iflet_<name>_<N> = expr
2119+
if (__iflet_<name>_<N> != null) { T } else { E }
2120+
The synthetic name is what the IR actually declares, so an outer
2121+
variable of the same name is never clobbered when the backend
2122+
hoists declarations to function scope. References to `name` inside
2123+
`T` are redirected to the synthetic name through
2124+
`ctx.iflet_aliases`, which is set up only around the lowering of
2125+
the then-branch — the else-branch sees the un-aliased name. The
2126+
codegen rule for `IRMapAccess <op> NullLit` (and the symmetric
2127+
form for raw pointers) emits a pointer presence check, so this
2128+
lowers correctly without an extra dereference. *)
20932129
let pos = stmt.stmt_pos in
2094-
let decl_stmt = { Ast.stmt_desc = Ast.Declaration (name, None, Some expr); stmt_pos = pos } in
2095-
let name_ident = { Ast.expr_desc = Ast.Identifier name; expr_type = expr.Ast.expr_type;
2096-
expr_pos = pos; type_checked = false; program_context = None;
2097-
map_scope = None } in
2098-
let null_lit = { Ast.expr_desc = Ast.Literal Ast.NullLit; expr_type = None;
2099-
expr_pos = pos; type_checked = false; program_context = None;
2100-
map_scope = None } in
2101-
let cond = { Ast.expr_desc = Ast.BinaryOp (name_ident, Ast.Ne, null_lit);
2102-
expr_type = Some Ast.Bool; expr_pos = pos; type_checked = false;
2103-
program_context = None; map_scope = None } in
2104-
let if_stmt = { Ast.stmt_desc = Ast.If (cond, then_stmts, else_opt); stmt_pos = pos } in
2105-
lower_statement ctx decl_stmt;
2106-
lower_statement ctx if_stmt
2130+
let synth = generate_temp_variable ctx ("iflet_" ^ name) in
2131+
let mk_expr ?ty d =
2132+
{ Ast.expr_desc = d; expr_pos = pos; expr_type = ty;
2133+
type_checked = false; program_context = None; map_scope = None } in
2134+
lower_statement ctx
2135+
{ Ast.stmt_desc = Ast.Declaration (synth, None, Some expr); stmt_pos = pos };
2136+
let cond_val = lower_expression ctx (mk_expr ~ty:Ast.Bool
2137+
(Ast.BinaryOp
2138+
(mk_expr ?ty:expr.Ast.expr_type (Ast.Identifier synth),
2139+
Ast.Ne,
2140+
mk_expr (Ast.Literal Ast.NullLit)))) in
2141+
let collect_block stmts =
2142+
let saved = ctx.current_block in
2143+
ctx.current_block <- [];
2144+
List.iter (lower_statement ctx) stmts;
2145+
let instrs = List.rev ctx.current_block in
2146+
ctx.current_block <- saved;
2147+
instrs in
2148+
let prev_alias = Hashtbl.find_opt ctx.iflet_aliases name in
2149+
Hashtbl.replace ctx.iflet_aliases name synth;
2150+
let then_instrs = collect_block then_stmts in
2151+
(match prev_alias with
2152+
| Some p -> Hashtbl.replace ctx.iflet_aliases name p
2153+
| None -> Hashtbl.remove ctx.iflet_aliases name);
2154+
let else_instrs_opt = Option.map collect_block else_opt in
2155+
emit_instruction ctx
2156+
(make_ir_instruction (IRIf (cond_val, then_instrs, else_instrs_opt)) pos)
21072157

21082158
| Ast.For (var, start_expr, end_expr, body_stmts) ->
21092159
(* Analyze the loop to determine if it's bounded or unbounded *)

tests/test_compound_index_assignment.ml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,12 @@ var stats : hash<u32, Stats>(1024)
465465
let contains s =
466466
try let _ = Str.search_forward (Str.regexp_string s) c 0 in true
467467
with Not_found -> false in
468-
(* (a) pointer-typed synthetic binding initialised from the lookup pointer *)
468+
(* (a) pointer-typed synthetic binding initialised from the lookup pointer.
469+
The Phase 2 desugaring emits a plain `var __cidx_field_<N> = m[k]`
470+
(the synthetic name is fresh by construction, so the IfLet alpha-
471+
rename machinery is not needed and is bypassed). The codegen then
472+
produces `struct Stats* __cidx_field_<N> = __map_lookup_<M>` via the
473+
pointer-from-map-access path in IRVariableDecl. *)
469474
check bool "synthetic binding declared as a struct pointer" true
470475
(contains "struct Stats* __cidx_field_");
471476
let bad_value_init = contains

tests/test_iflet.ml

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,13 @@ var counters : hash<u32, u64>(1024)
219219
}
220220
|} in
221221
let c = codegen_ebpf source in
222+
(* The IfLet binding is alpha-renamed to a fresh synthetic name during IR
223+
lowering (see `subst_ident_stmts` in ir_generator.ml) so that an outer
224+
variable of the same name is not silently clobbered when the backend
225+
hoists declarations to function scope. The synthetic name has the
226+
form `__iflet_<orig>_<N>`. *)
222227
check bool "scalar binding declared as value, not pointer" true
223-
(contains_substr c "__u64 c =");
228+
(contains_substr c "__u64 __iflet_c_");
224229
check bool "binding init uses the dereffed value statement-expression" true
225230
(contains_substr c "__val = *(");
226231
check bool "presence check on the underlying lookup pointer" true
@@ -259,13 +264,55 @@ var stats : hash<u32, Stats>(1024)
259264
}
260265
|} in
261266
let c = codegen_ebpf source in
267+
(* Binding is alpha-renamed to `__iflet_s_<N>` — see the comment on
268+
`test_codegen_scalar_value_binding` for why. *)
262269
check bool "binding declared with value type, not pointer" true
263-
(contains_substr c "struct Stats s =");
270+
(contains_substr c "struct Stats __iflet_s_");
264271
check bool "value-typed binding uses deref-load init" true
265272
(contains_substr c "struct Stats __val");
266273
check bool "field write goes through the underlying lookup pointer" true
267274
(contains_substr c "->count =")
268275

276+
(** 11. Codegen (shadowing): an outer binding of the same name as the IfLet
277+
binding must survive both branches and remain referenceable after the
278+
if. The branch-local invariant the frontend enforces (binding visible
279+
only inside the then-branch) has to be preserved end-to-end through
280+
IR lowering — i.e., the inner binding cannot collapse onto the outer
281+
name in the generated C. *)
282+
let test_codegen_shadow_outer_binding () =
283+
let source = {|
284+
var counters : hash<u32, u64>(1024)
285+
286+
@xdp fn probe(ctx: *xdp_md) -> xdp_action {
287+
var c : u64 = 100
288+
if (var c = counters[1]) {
289+
return XDP_DROP
290+
}
291+
if (c == 100) {
292+
return XDP_PASS
293+
}
294+
return XDP_DROP
295+
}
296+
|} in
297+
let c = codegen_ebpf source in
298+
(* The outer `c = 100` declaration must remain literally — the inner binding
299+
must not reuse the name. *)
300+
check bool "outer c declared with literal value" true
301+
(contains_substr c "__u64 c = 100");
302+
(* The outer `c` must NOT be reassigned by the IfLet's lowering. The bug
303+
symptom was a statement-expression assignment `c = ({ ... });` that
304+
clobbered the outer binding with the lookup result (or zero on miss).
305+
A bare `c = ({` (no `__u64` prefix) is the giveaway. *)
306+
let outer_clobber =
307+
try let _ = Str.search_forward
308+
(Str.regexp "[^_a-zA-Z0-9]c = ({") c 0 in true
309+
with Not_found -> false in
310+
check bool "outer c not clobbered by iflet init" false outer_clobber;
311+
(* The post-if comparison `c == 100` must reference the outer `c`, not be
312+
rewritten into another fresh map deref. *)
313+
check bool "post-if uses outer c by name" true
314+
(contains_substr c "(c == 100)")
315+
269316
let suite = [
270317
"parse_iflet_no_else", `Quick, test_parse_iflet_no_else;
271318
"parse_iflet_with_else", `Quick, test_parse_iflet_with_else;
@@ -277,6 +324,7 @@ let suite = [
277324
"codegen_struct_in_place", `Quick, test_codegen_struct_in_place;
278325
"codegen_scalar_value_binding", `Quick, test_codegen_scalar_value_binding;
279326
"codegen_struct_value_binding_shape", `Quick, test_codegen_struct_value_binding_shape;
327+
"codegen_shadow_outer_binding", `Quick, test_codegen_shadow_outer_binding;
280328
]
281329

282330
let () =

0 commit comments

Comments
 (0)