Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 87 additions & 44 deletions compiler/rustc_codegen_ssa/src/mir/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ enum MergingSucc {

/// Indicates to the call terminator codegen whether a call
/// is a normal call or an explicit tail call.
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Clone, Copy)]
enum CallKind {
Normal,
Tail,
Expand Down Expand Up @@ -1195,8 +1195,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {

// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
//
// Normally an indirect argument that is allocated in the caller's stack frame
// would be passed as a pointer into the callee's stack frame.
// Normally an indirect pointer that is allocated in the caller's stack frame
// would be passed as an argument.
// For tail calls, that would be unsound, because the caller's
// stack frame is overwritten by the callee's stack frame.
//
Expand All @@ -1216,6 +1216,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
// }
// ```
let mut tail_call_temporaries = vec![];
let mut tail_call_tupled_temporary = None;
if kind == CallKind::Tail {
tail_call_temporaries = vec![None; first_args.len()];
// Copy the arguments that use `PassMode::Indirect { on_stack: false , ..}`
Expand All @@ -1227,11 +1228,21 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {

let op = self.codegen_operand(bx, &arg.node);
let tmp = PlaceRef::alloca(bx, op.layout);
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
tmp.storage_live(bx);
op.store_with_annotation(bx, tmp);

tail_call_temporaries[i] = Some(tmp);
}
// Copy the tuple, which may be one of the caller’s arguments, to a temporary stack allocation.
if let Some(untuple) = untuple {
let tuple = self.codegen_operand(bx, &untuple.node);
if let Ref(_) = tuple.val {
let tmp = PlaceRef::alloca(bx, tuple.layout);
tmp.storage_live(bx);
tuple.store_with_annotation(bx, tmp);
tail_call_tupled_temporary = Some(tmp);
}
}
}

// When generating arguments we sometimes introduce temporary allocations with lifetime
Expand Down Expand Up @@ -1289,69 +1300,60 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
}
}

let by_move = if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode
let tmp = if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode
&& kind == CallKind::Tail
{
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
//
// Normally an indirect argument that is allocated in the caller's stack frame
// would be passed as a pointer into the callee's stack frame.
// Normally an indirect pointer that is allocated in the caller's stack frame
// would be passed as an argument.
// For tail calls, that would be unsound, because the caller's
// stack frame is overwritten by the callee's stack frame.
//
// To handle the case, we introduce `tail_call_temporaries` to copy arguments into
// temporaries, then copy back to the caller's argument slots.
// Finally, we pass the caller's argument slots as arguments.
//
// To do that, the argument must be MUST-by-move value.

@dianqk dianqk Jun 22, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MUST-by-move is removed in this PR.
FYI @nikic

View changes since the review

// Finally, we pass the caller's argument slots as arguments which is implemented in
// `codegen_argument`.
let Some(tmp) = tail_call_temporaries[i].take() else {
span_bug!(fn_span, "missing temporary for indirect tail call argument #{i}")
};

let local = self.mir.args_iter().nth(i).unwrap();

match &self.locals[local] {
Comment on lines -1311 to -1313

@dianqk dianqk Jun 22, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a reliable way to get the LLVM value for the argument. We are allowed to introduce a copy alloca in this case:

let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
.
cc @folkertdev

View changes since the review

LocalRef::Place(arg) => {
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
op.val = Ref(arg.val);
}
LocalRef::Operand(arg) => {
let Ref(place_value) = arg.val else {
bug!("only `Ref` should use `PassMode::Indirect`");
};
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
op.val = arg.val;
}
LocalRef::UnsizedPlace(_) => {
span_bug!(fn_span, "unsized types are not supported")
}
LocalRef::PendingOperand => {
span_bug!(fn_span, "argument local should not be pending")
}
};

bx.lifetime_end(tmp.val.llval, tmp.layout.size);
true
op.val = Ref(tmp.val);
Some(tmp)
} else {
matches!(arg.node, mir::Operand::Move(_))
None
};

self.codegen_argument(
bx,
op,
by_move,
matches!(arg.node, mir::Operand::Move(_)),
&mut llargs,
&fn_abi.args[i],
&mut lifetime_ends_after_call,
fn_span,
kind,
);

if let Some(tmp) = tmp {
tmp.storage_dead(bx);
}
}
let num_untupled = untuple.map(|tup| {
// For untupled arguments, it is safe to store them directly into the caller's
// argument slots without temporaries, because the untupled arguments are
// always passed through a tuple alloca. The alloca serves as a temporary
// that does not overlap with any caller argument.
// No temporaries are needed for the caller's untupled arguments either,
// because they are used last.
Comment on lines +1342 to +1347

@WaffleLapkin WaffleLapkin Jun 23, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't quite get this. First of all, this should probably mention tail calls, as otherwise it doesn't make sense why anything here wouldn't be ok.

But second of all, I checked out your branch locally and I'm getting incorrect codegen (rustc t.rs --emit=llvm-ir -O):

#![feature(unboxed_closures)]
#![feature(explicit_tail_calls)]
#![crate_type = "lib"]

type Tuple = (u64, u64, u64, u64);

trait X {
    extern "rust-call" fn f(self, args: Tuple) -> u8;
    extern "rust-call" fn g(self, args: Tuple) -> u8;
}

impl X for Tuple {
    extern "rust-call" fn f(self, args: Tuple) -> u8 {
        become args.g(self)
    }

    #[inline(never)]
    extern "rust-call" fn g(self, args: Tuple) -> u8 {
        std::hint::black_box((self, args));
        0
    }
}
; <(u64, u64, u64, u64) as t::X>::f
; Function Attrs: nounwind nonlazybind uwtable
define noundef i8 @_RNvXCs146zwHEvxau_1tTyyyyENtB2_1X1f(ptr dead_on_return noalias nofree noundef align 8 captures(none) dereferenceable(32) initializes((0, 32)) %self, i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3) unnamed_addr #0 {
start:
  store i64 %0, ptr %self, align 8
  %.sroa.4.0.self.sroa_idx = getelementptr inbounds nuw i8, ptr %self, i64 8
  store i64 %1, ptr %.sroa.4.0.self.sroa_idx, align 8
  %.sroa.5.0.self.sroa_idx = getelementptr inbounds nuw i8, ptr %self, i64 16
  store i64 %2, ptr %.sroa.5.0.self.sroa_idx, align 8
  %.sroa.6.0.self.sroa_idx = getelementptr inbounds nuw i8, ptr %self, i64 24
  store i64 %3, ptr %.sroa.6.0.self.sroa_idx, align 8
; call <(u64, u64, u64, u64) as t::X>::g
  %4 = musttail call noundef i8 @_RNvXCs146zwHEvxau_1tTyyyyENtB2_1X1g(ptr noalias nofree noundef nonnull align 8 captures(address) dereferenceable(32) %self, i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3) #4
  ret i8 %4
}

Granted, this code is weird and I couldn't come up with a more normal looking one, but I don't get the justification for why this is sound...

View changes since the review

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thanks for the case.

self.codegen_arguments_untupled(
bx,
&tup.node,
tail_call_tupled_temporary,
&mut llargs,
&fn_abi.args[first_args.len()..],
&mut lifetime_ends_after_call,
fn_span,
kind,
)
});

Expand Down Expand Up @@ -1382,6 +1384,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
&mut llargs,
last_arg,
&mut lifetime_ends_after_call,
fn_span,
kind,
);
}

Expand Down Expand Up @@ -1701,6 +1705,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
llargs: &mut Vec<Bx::Value>,
arg: &ArgAbi<'tcx, Ty<'tcx>>,
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
fn_span: Span,
kind: CallKind,
) {
match arg.mode {
PassMode::Ignore => return,
Expand Down Expand Up @@ -1753,23 +1759,34 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
PassMode::Ignore | PassMode::Pair(..) => unreachable!("handled above"),
},
Ref(op_place_val) => match arg.mode {
PassMode::Indirect { attrs, on_stack, .. } => {
PassMode::Indirect { attrs, meta_attrs, on_stack, .. } => {
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
// alignment requirements may be higher than the type's alignment, so copy
// to a higher-aligned alloca.
let required_align = match attrs.pointee_align {
Some(pointee_align) => cmp::max(pointee_align, arg.layout.align.abi),
None => arg.layout.align.abi,
};
// Copy to an alloca when the argument is neither by-val nor by-move.
if op_place_val.align < required_align || (!on_stack && !by_move) {
if kind == CallKind::Tail && !on_stack {
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
// We pass the caller's argument slots as arguments,
// because the caller's stack frame is overwritten by the callee's stack frame.
if meta_attrs.is_some() {
span_bug!(fn_span, "unsized types are not supported")
};
let caller_arg =
PlaceRef::new_sized(bx.get_param(llargs.len()), arg.layout);
op.store_with_annotation(bx, caller_arg);
(caller_arg.val.llval, caller_arg.val.align, true)
} else if op_place_val.align >= required_align && (on_stack || by_move) {
// We can skip copy to an alloca when the argument is by-val or by-move.
(op_place_val.llval, op_place_val.align, true)
} else {
let scratch = PlaceValue::alloca(bx, arg.layout.size, required_align);
bx.lifetime_start(scratch.llval, arg.layout.size);
op.store_with_annotation(bx, scratch.with_type(arg.layout));
lifetime_ends_after_call.push((scratch.llval, arg.layout.size));
(scratch.llval, scratch.align, true)
} else {
(op_place_val.llval, op_place_val.align, true)
}
}
_ => (op_place_val.llval, op_place_val.align, true),
Expand Down Expand Up @@ -1846,9 +1863,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
&mut self,
bx: &mut Bx,
operand: &mir::Operand<'tcx>,
tmp: Option<PlaceRef<'tcx, Bx::Value>>,
llargs: &mut Vec<Bx::Value>,
args: &[ArgAbi<'tcx, Ty<'tcx>>],
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
fn_span: Span,
kind: CallKind,
) -> usize {
let tuple = self.codegen_operand(bx, operand);
let by_move = matches!(operand, mir::Operand::Move(_));
Expand All @@ -1858,7 +1878,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
if place_val.llextra.is_some() {
bug!("closure arguments must be sized");
}
let tuple_ptr = place_val.with_type(tuple.layout);
let tuple_ptr = if kind == CallKind::Tail {
tmp.unwrap_or_else(|| {
span_bug!(
fn_span,
"missing temporary for indirect tail call argument #untupled"
)
})
} else {
place_val.with_type(tuple.layout)
};
for i in 0..tuple.layout.fields.count() {
let field_ptr = tuple_ptr.project_field(bx, i);
let field = bx.load_operand(field_ptr);
Expand All @@ -1869,13 +1898,27 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
llargs,
&args[i],
lifetime_ends_after_call,
fn_span,
kind,
);
}
if let Some(tmp) = tmp {
tmp.storage_dead(bx);
}
} else {
// If the tuple is immediate, the elements are as well.
for i in 0..tuple.layout.fields.count() {
let op = tuple.extract_field(self, bx, i);
self.codegen_argument(bx, op, by_move, llargs, &args[i], lifetime_ends_after_call);
self.codegen_argument(
bx,
op,
by_move,
llargs,
&args[i],
lifetime_ends_after_call,
fn_span,
kind,
);
}
}
tuple.layout.fields.count()
Expand Down
126 changes: 126 additions & 0 deletions tests/codegen-llvm/tail-call-untuple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
//! Regression test for issue <https://github.com/rust-lang/rust/issues/158017>.
// Checks that we pass the caller's argument slots as arguments at tail call,
// because the caller's stack frame is overwritten by the callee's stack frame.
// In LLVM, Calls marked 'tail' cannot read or write allocas from the current frame
// because the current frame might be destroyed by the time they run. These writes will be
// eliminated by DSE.
//@ add-minicore
//@ revisions: x64-linux i686-linux i686-windows
//@ compile-flags: -C opt-level=3
//@[x64-linux] compile-flags: --target x86_64-unknown-linux-gnu
//@[x64-linux] needs-llvm-components: x86
//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
//@[i686-linux] needs-llvm-components: x86
//@[i686-windows] compile-flags: --target i686-pc-windows-msvc
//@[i686-windows] needs-llvm-components: x86

#![crate_type = "lib"]
#![feature(explicit_tail_calls, no_core, unboxed_closures)]
#![expect(incomplete_features)]
#![no_std]
#![no_core]

extern crate minicore;

struct Indirect(u64, u64, u64, u64);

// CHECK-LABEL: @caller_untuple_1
// CHECK-SAME: (ptr {{.*}}[[A:%.*]])
// CHECK-NEXT: start:
// CHECK-NEXT: musttail call {{.*}}i64 @callee_untuple_1(ptr {{.*}}[[A]])
#[unsafe(no_mangle)]
extern "rust-call" fn caller_untuple_1((a,): (Indirect,)) -> u64 {
become callee_untuple_1((a,));
}

// CHECK-LABEL: @caller_untuple_1_const
// CHECK-SAME: (ptr {{.*}}[[A:%.*]])
// x64-linux: store i64 1, ptr [[A]]
// i686-linux: store <2 x i64> <i64 1, i64 2>, ptr [[A]]
// i686-windows: store <2 x i64> <i64 1, i64 2>, ptr [[A]]
// CHECK: musttail call {{.*}}i64 @callee_untuple_1(ptr {{.*}}[[A]])
#[unsafe(no_mangle)]
extern "rust-call" fn caller_untuple_1_const((_,): (Indirect,)) -> u64 {
become callee_untuple_1((Indirect(1, 2, 3, 4),));
}

// CHECK-LABEL: @caller_untuple_2
// CHECK-SAME: (ptr {{.*}}[[A:%.*]], ptr {{.*}}[[B:%.*]])
// CHECK-NEXT: start:
// CHECK-NEXT: musttail call {{.*}}i64 @callee_untuple_2(ptr {{.*}}[[A]], ptr {{.*}}[[B]])
#[unsafe(no_mangle)]
extern "rust-call" fn caller_untuple_2((a, b): (Indirect, Indirect)) -> u64 {
become callee_untuple_2((a, b));
}

// CHECK-LABEL: @caller_untuple_2_swapper
// CHECK-SAME: (ptr {{.*}}[[A:%.*]], ptr {{.*}}[[B:%.*]])
// CHECK: call void @llvm.memcpy.{{.+}}(ptr {{.*}}[[TMP:%.*]], ptr {{.*}}[[A]]
// CHECK: call void @llvm.memcpy.{{.+}}(ptr {{.*}}[[A]], ptr {{.*}}[[B]]
// CHECK: call void @llvm.memcpy.{{.+}}(ptr {{.*}}[[B]], ptr {{.*}}[[TMP]]
// CHECK: musttail call {{.*}}i64 @callee_untuple_2(ptr {{.*}}[[A]], ptr {{.*}}[[B]])
#[unsafe(no_mangle)]
extern "rust-call" fn caller_untuple_2_swapper((a, b): (Indirect, Indirect)) -> u64 {
become callee_untuple_2((b, a));
}

mod swap_self {
type Tuple = (u64, u64, u64, u64);

trait X {
extern "rust-call" fn swap_self(self, args: Tuple) -> u8;
extern "rust-call" fn swap_self_helper(self, args: Tuple) -> u8;
}

impl X for Tuple {
// CHECK-LABEL: @swap_self
// CHECK-SAME: (ptr {{.*}}[[SELF:%.*]], i64 {{.*}}[[TUPLE_0:%.*]], i64 {{.*}}[[TUPLE_1:%.*]], i64 {{.*}}[[TUPLE_2:%.*]], i64 {{.*}}[[TUPLE_3:%.*]])
// CHECK: [[SELF_0:%.*]] = load i64, ptr [[SELF]]
// CHECK: [[PTR_1:%.*]] = getelementptr inbounds {{.*}}i8, ptr %self, {{i32|i64}} 8
// CHECK: [[SELF_1:%.*]] = load i64, ptr [[PTR_1]]
// CHECK: [[PTR_2:%.*]] = getelementptr inbounds {{.*}}i8, ptr %self, {{i32|i64}} 16
// CHECK: [[SELF_2:%.*]] = load i64, ptr [[PTR_2]]
// CHECK: [[PTR_3:%.*]] = getelementptr inbounds {{.*}}i8, ptr %self, {{i32|i64}} 24
// CHECK: [[SELF_3:%.*]] = load i64, ptr [[PTR_3]]
// CHECK: store i64 [[TUPLE_0]], ptr [[SELF]]
// CHECK: store i64 [[TUPLE_1]], ptr [[PTR_1]]
// CHECK: store i64 [[TUPLE_2]], ptr [[PTR_2]]
// CHECK: store i64 [[TUPLE_3]], ptr [[PTR_3]]
// CHECK: musttail call {{.*}}i8 @swap_self_helper(ptr {{.*}}[[SELF]], i64 {{.*}}[[SELF_0]], i64 {{.*}}[[SELF_1]], i64 {{.*}}[[SELF_2]], i64 {{.*}}[[SELF_3]])
#[unsafe(no_mangle)]
extern "rust-call" fn swap_self(self, args: Tuple) -> u8 {
become args.swap_self_helper(self)
}

#[inline(never)]
#[unsafe(no_mangle)]
extern "rust-call" fn swap_self_helper(self, args: Tuple) -> u8 {
opaque_tuple(self);
opaque_tuple(args);
0
}
}

unsafe extern "Rust" {
safe fn opaque_tuple(_: Tuple);
}
}

unsafe extern "Rust" {
safe fn opaque(_: u64);
}

#[inline(never)]
#[unsafe(no_mangle)]
extern "rust-call" fn callee_untuple_1((a,): (Indirect,)) -> u64 {
opaque(a.0);
a.0
}

#[inline(never)]
#[unsafe(no_mangle)]
extern "rust-call" fn callee_untuple_2((a, b): (Indirect, Indirect)) -> u64 {
opaque(a.0);
opaque(b.0);
a.0
}
Loading
Loading