From 6ce0650633aeef5e17102afc94b9004df919f043 Mon Sep 17 00:00:00 2001 From: Paul Murphy Date: Mon, 16 Feb 2026 16:19:57 -0600 Subject: [PATCH 1/2] Add -Zinstrument-mcount=fentry to -Zinstrument-mcount fentry is essentially a simpler version of mcount where the counting function is called before any other function prologue actions happen. fentry is still used by the linux x86-64 kernel. It's unclear if or when patchable-function-entry will replace it. It is also used by clang for some x86-64 mingw toolchains. This is only supported on some x86, x86-64, and s390x targets. --- compiler/rustc_codegen_llvm/src/attributes.rs | 40 ++++++++++++------- compiler/rustc_codegen_ssa/src/back/link.rs | 6 +-- compiler/rustc_interface/src/tests.rs | 12 +++--- compiler/rustc_session/src/config.rs | 22 +++++++--- compiler/rustc_session/src/options.rs | 17 +++++++- compiler/rustc_session/src/session.rs | 8 +++- compiler/rustc_target/src/spec/json.rs | 3 ++ compiler/rustc_target/src/spec/mod.rs | 4 ++ .../spec/targets/i686_unknown_linux_gnu.rs | 1 + .../spec/targets/i686_unknown_linux_musl.rs | 1 + .../spec/targets/s390x_unknown_linux_gnu.rs | 1 + .../spec/targets/s390x_unknown_linux_musl.rs | 1 + .../targets/s390x_unknown_none_softfloat.rs | 1 + .../spec/targets/x86_64_unknown_linux_gnu.rs | 1 + .../spec/targets/x86_64_unknown_linux_musl.rs | 1 + .../spec/targets/x86_64_unknown_linux_none.rs | 1 + tests/assembly-llvm/fentry.rs | 25 ++++++++++++ tests/codegen-llvm/instrument-fentry.rs | 25 ++++++++++++ tests/codegen-llvm/instrument_fn.rs | 9 ++++- 19 files changed, 147 insertions(+), 32 deletions(-) create mode 100644 tests/assembly-llvm/fentry.rs create mode 100644 tests/codegen-llvm/instrument-fentry.rs diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 0176f891ddda9..fe36a9865485d 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -7,7 +7,9 @@ use rustc_middle::middle::codegen_fn_attrs::{ TargetFeature, }; use rustc_middle::ty::{self, Instance, TyCtxt}; -use rustc_session::config::{BranchProtection, FunctionReturn, OptLevel, PAuthKey, PacRet}; +use rustc_session::config::{ + BranchProtection, FunctionReturn, InstrumentMcount, OptLevel, PAuthKey, PacRet, +}; use rustc_span::sym; use rustc_symbol_mangling::mangle_internal_symbol; use rustc_target::spec::{Arch, FramePointer, SanitizerSet, StackProbeType, StackProtector}; @@ -177,7 +179,7 @@ pub(crate) fn frame_pointer(sess: &Session) -> FramePointer { let opts = &sess.opts; // "mcount" function relies on stack pointer. // See . - if opts.unstable_opts.instrument_mcount { + if opts.unstable_opts.instrument_mcount == InstrumentMcount::Mcount { fp.ratchet(FramePointer::Always); } fp.ratchet(opts.cg.force_frame_pointers); @@ -214,7 +216,7 @@ fn instrument_function_attr<'ll>( instrument_fn: InstrumentFnAttr, ) -> SmallVec<[&'ll Attribute; 4]> { let mut attrs = SmallVec::new(); - if sess.opts.unstable_opts.instrument_mcount { + if sess.opts.unstable_opts.instrument_mcount != InstrumentMcount::Disabled { // Similar to `clang -pg` behavior. Handled by the // `post-inline-ee-instrument` LLVM pass. @@ -224,18 +226,26 @@ fn instrument_function_attr<'ll>( }; if instrument_entry { - // The function name varies on platforms. - // See test/CodeGen/mcount.c in clang. - let mcount_name = match &sess.target.llvm_mcount_intrinsic { - Some(llvm_mcount_intrinsic) => llvm_mcount_intrinsic.as_ref(), - None => sess.target.mcount.as_ref(), - }; - - attrs.push(llvm::CreateAttrStringValue( - cx.llcx, - "instrument-function-entry-inlined", - mcount_name, - )); + match sess.opts.unstable_opts.instrument_mcount { + InstrumentMcount::Mcount => { + // The function name varies on platforms. + // See test/CodeGen/mcount.c in clang. + let mcount_name = match &sess.target.llvm_mcount_intrinsic { + Some(llvm_mcount_intrinsic) => llvm_mcount_intrinsic.as_ref(), + None => sess.target.mcount.as_ref(), + }; + + attrs.push(llvm::CreateAttrStringValue( + cx.llcx, + "instrument-function-entry-inlined", + mcount_name, + )); + } + InstrumentMcount::Fentry => { + attrs.push(llvm::CreateAttrStringValue(cx.llcx, "fentry-call", "true")); + } + InstrumentMcount::Disabled => {} + } } } if let Some(options) = &sess.opts.unstable_opts.instrument_xray { diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 2c3ee1bae09f8..d5e5493bf8536 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -35,8 +35,8 @@ use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerFile; use rustc_middle::middle::dependency_format::Linkage; use rustc_middle::middle::exported_symbols::SymbolExportKind; use rustc_session::config::{ - self, CFGuard, CrateType, DebugInfo, LinkerFeaturesCli, OutFileName, OutputFilenames, - OutputType, PrintKind, SplitDwarfKind, Strip, + self, CFGuard, CrateType, DebugInfo, InstrumentMcount, LinkerFeaturesCli, OutFileName, + OutputFilenames, OutputType, PrintKind, SplitDwarfKind, Strip, }; use rustc_session::lint::builtin::LINKER_MESSAGES; use rustc_session::output::{check_file_is_writeable, invalid_output_for_target, out_filename}; @@ -2882,7 +2882,7 @@ fn add_order_independent_options( cmd.pgo_gen(); } - if sess.opts.unstable_opts.instrument_mcount { + if sess.opts.unstable_opts.instrument_mcount != InstrumentMcount::Disabled { cmd.enable_profiling(); } diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index ce21a79a02dfd..5933d0fd4b356 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -13,11 +13,11 @@ use rustc_session::config::{ AnnotateMoves, AutoDiff, BranchProtection, CFGuard, Cfg, CodegenRetagOptions, CoverageLevel, CoverageOptions, DebugInfo, DumpMonoStatsFormat, ErrorOutputType, ExternEntry, ExternLocation, Externs, FmtDebug, FunctionReturn, IncrementalStateAssertion, InliningThreshold, Input, - InstrumentCoverage, InstrumentXRay, LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, - MirIncludeSpans, NextSolverConfig, Offload, Options, OutFileName, OutputType, OutputTypes, - PAuthKey, PacRet, Passes, PatchableFunctionEntry, Polonius, ProcMacroExecutionStrategy, Strip, - SwitchWithOptPath, SymbolManglingVersion, WasiExecModel, build_configuration, - build_session_options, rustc_optgroups, + InstrumentCoverage, InstrumentMcount, InstrumentXRay, LinkSelfContained, LinkerPluginLto, + LocationDetail, LtoCli, MirIncludeSpans, NextSolverConfig, Offload, Options, OutFileName, + OutputType, OutputTypes, PAuthKey, PacRet, Passes, PatchableFunctionEntry, Polonius, + ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingVersion, WasiExecModel, + build_configuration, build_session_options, rustc_optgroups, }; use rustc_session::lint::Level; use rustc_session::search_paths::SearchPath; @@ -834,7 +834,7 @@ fn test_unstable_options_tracking_hash() { tracked!(inline_mir, Some(true)); tracked!(inline_mir_hint_threshold, Some(123)); tracked!(inline_mir_threshold, Some(123)); - tracked!(instrument_mcount, true); + tracked!(instrument_mcount, InstrumentMcount::Mcount); tracked!(instrument_xray, Some(InstrumentXRay::default())); tracked!(link_directives, false); tracked!(link_only, true); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index 089a9322e6a3c..c7da2d9a1fc38 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -255,6 +255,17 @@ pub enum AnnotateMoves { Enabled(Option), } +/// The different settings that the `-Z Instrument-mcount` flag can have. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum InstrumentMcount { + /// `-Z instrument-mcount=no` + Disabled, + /// `-Z instrument-mcount=yes` + Mcount, + /// `-Z instrument-mcount=fentry` + Fentry, +} + /// Settings for `-Z instrument-xray` flag. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] pub struct InstrumentXRay { @@ -3085,11 +3096,11 @@ pub(crate) mod dep_tracking { use super::{ AnnotateMoves, AutoDiff, BranchProtection, CFGuard, CFProtection, CodegenRetagOptions, CoverageOptions, CrateType, DebugInfo, DebugInfoCompression, ErrorOutputType, FmtDebug, - FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay, LinkerPluginLto, - LocationDetail, LtoCli, MirStripDebugInfo, NextSolverConfig, Offload, OptLevel, - OutFileName, OutputType, OutputTypes, PatchableFunctionEntry, Polonius, ResolveDocLinks, - SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, SymbolManglingVersion, - WasiExecModel, + FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentMcount, InstrumentXRay, + LinkerPluginLto, LocationDetail, LtoCli, MirStripDebugInfo, NextSolverConfig, Offload, + OptLevel, OutFileName, OutputType, OutputTypes, PatchableFunctionEntry, Polonius, + ResolveDocLinks, SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, + SymbolManglingVersion, WasiExecModel, }; use crate::lint; use crate::utils::NativeLib; @@ -3151,6 +3162,7 @@ pub(crate) mod dep_tracking { TlsModel, InstrumentCoverage, CoverageOptions, + InstrumentMcount, InstrumentXRay, CrateType, MergeFunctions, diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 2f9fa09c322bb..90a2d72ea13ca 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -786,6 +786,8 @@ mod desc { pub(crate) const parse_coverage_options: &str = "`block` | `branch` | `condition`"; pub(crate) const parse_codegen_retag_options: &str = "either no value or a comma-separated list of settings: `no-precise-im`, `no-precise-pin`"; + pub(crate) const parse_instrument_mcount: &str = + "either a boolean (`yes`, `no`, `on`, `off`, etc), or `fentry` on supported targets."; pub(crate) const parse_instrument_xray: &str = "either a boolean (`yes`, `no`, `on`, `off`, etc), or a comma separated list of settings: `always` or `never` (mutually exclusive), `ignore-loops`, `instruction-threshold=N`, `skip-entry`, `skip-exit`"; pub(crate) const parse_unpretty: &str = "`string` or `string=string`"; pub(crate) const parse_treat_err_as_bug: &str = "either no value or a non-negative number"; @@ -1565,6 +1567,19 @@ pub mod parse { true } + pub(crate) fn parse_instrument_mcount(slot: &mut InstrumentMcount, v: Option<&str>) -> bool { + let mut use_mcount = false; + if parse_bool(&mut use_mcount, v) { + *slot = if use_mcount { InstrumentMcount::Mcount } else { InstrumentMcount::Disabled }; + true + } else if let Some("fentry") = v { + *slot = InstrumentMcount::Fentry; + true + } else { + false + } + } + pub(crate) fn parse_instrument_xray( slot: &mut Option, v: Option<&str>, @@ -2433,7 +2448,7 @@ options! { "a default MIR inlining threshold (default: 50)"), input_stats: bool = (false, parse_bool, [UNTRACKED], "print some statistics about AST and HIR (default: no)"), - instrument_mcount: bool = (false, parse_bool, [TRACKED], + instrument_mcount: InstrumentMcount = (InstrumentMcount::Disabled, parse_instrument_mcount, [TRACKED], "insert function instrument code for mcount-based tracing (default: no)"), instrument_xray: Option = (None, parse_instrument_xray, [TRACKED], "insert function instrument code for XRay-based tracing (default: no) diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index c19a2d71e2078..ed69841050781 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -38,7 +38,7 @@ use crate::code_stats::CodeStats; pub use crate::code_stats::{DataTypeKind, FieldInfo, FieldKind, SizeKind, VariantInfo}; use crate::config::{ self, Cfg, CheckCfg, CoverageLevel, CoverageOptions, CrateType, DebugInfo, ErrorOutputType, - FunctionReturn, Input, InstrumentCoverage, OptLevel, OutFileName, OutputType, + FunctionReturn, Input, InstrumentCoverage, InstrumentMcount, OptLevel, OutFileName, OutputType, SwitchWithOptPath, }; use crate::filesearch::FileSearch; @@ -1340,6 +1340,12 @@ fn validate_commandline_args_with_session_available(sess: &Session) { } } + if sess.opts.unstable_opts.instrument_mcount == InstrumentMcount::Fentry + && !sess.target.options.supports_fentry + { + sess.dcx().emit_err(errors::InstrumentationNotSupported { us: "fentry".to_string() }); + } + if sess.opts.unstable_opts.instrument_xray.is_some() && !sess.target.options.supports_xray { sess.dcx().emit_err(errors::InstrumentationNotSupported { us: "XRay".to_string() }); } diff --git a/compiler/rustc_target/src/spec/json.rs b/compiler/rustc_target/src/spec/json.rs index 7156c3b08b918..922b1fb3dff8d 100644 --- a/compiler/rustc_target/src/spec/json.rs +++ b/compiler/rustc_target/src/spec/json.rs @@ -223,6 +223,7 @@ impl Target { forward!(supports_stack_protector); forward!(small_data_threshold_support); forward!(entry_name); + forward!(supports_fentry); forward!(supports_xray); // we're going to run `update_from_cli`, but that won't change the target's AbiMap @@ -407,6 +408,7 @@ impl ToJson for Target { target_option_val!(small_data_threshold_support); target_option_val!(entry_name); target_option_val!(entry_abi); + target_option_val!(supports_fentry); target_option_val!(supports_xray); // Serializing `-Clink-self-contained` needs a dynamic key to support the @@ -626,6 +628,7 @@ struct TargetSpecJson { supports_stack_protector: Option, small_data_threshold_support: Option, entry_name: Option>, + supports_fentry: Option, supports_xray: Option, entry_abi: Option, } diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 87c40fa588c03..ef5c98642e9f8 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -2688,6 +2688,9 @@ pub struct TargetOptions { /// Default value is `CanonAbi::C` pub entry_abi: CanonAbi, + /// Whether the target supports fentry instrumentation. + pub supports_fentry: bool, + /// Whether the target supports XRay instrumentation. pub supports_xray: bool, @@ -2929,6 +2932,7 @@ impl Default for TargetOptions { supports_stack_protector: true, entry_name: "main".into(), entry_abi: CanonAbi::C, + supports_fentry: false, supports_xray: false, default_address_space: rustc_abi::AddressSpace::ZERO, small_data_threshold_support: SmallDataThresholdSupport::DefaultForArch, diff --git a/compiler/rustc_target/src/spec/targets/i686_unknown_linux_gnu.rs b/compiler/rustc_target/src/spec/targets/i686_unknown_linux_gnu.rs index 932f6034433e5..7bc48b1998fac 100644 --- a/compiler/rustc_target/src/spec/targets/i686_unknown_linux_gnu.rs +++ b/compiler/rustc_target/src/spec/targets/i686_unknown_linux_gnu.rs @@ -22,6 +22,7 @@ pub(crate) fn target() -> Target { base.supported_sanitizers = SanitizerSet::ADDRESS; base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32"]); base.stack_probes = StackProbeType::Inline; + base.supports_fentry = true; Target { llvm_target: "i686-unknown-linux-gnu".into(), diff --git a/compiler/rustc_target/src/spec/targets/i686_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/targets/i686_unknown_linux_musl.rs index 5c85a3e13820c..212c2b8db56da 100644 --- a/compiler/rustc_target/src/spec/targets/i686_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/targets/i686_unknown_linux_musl.rs @@ -12,6 +12,7 @@ pub(crate) fn target() -> Target { base.max_atomic_width = Some(64); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32", "-Wl,-melf_i386"]); base.stack_probes = StackProbeType::Inline; + base.supports_fentry = true; // FIXME(compiler-team#422): musl targets should be dynamically linked by default. base.crt_static_default = true; diff --git a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs index 8859e0d650a2b..e6b58bff22848 100644 --- a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs +++ b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_gnu.rs @@ -12,6 +12,7 @@ pub(crate) fn target() -> Target { base.stack_probes = StackProbeType::Inline; base.supported_sanitizers = SanitizerSet::ADDRESS | SanitizerSet::LEAK | SanitizerSet::MEMORY | SanitizerSet::THREAD; + base.supports_fentry = true; Target { llvm_target: "s390x-unknown-linux-gnu".into(), diff --git a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs index 21e705ebbec5d..5ff9f025ba1d9 100644 --- a/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/targets/s390x_unknown_linux_musl.rs @@ -13,6 +13,7 @@ pub(crate) fn target() -> Target { base.stack_probes = StackProbeType::Inline; base.supported_sanitizers = SanitizerSet::ADDRESS | SanitizerSet::LEAK | SanitizerSet::MEMORY | SanitizerSet::THREAD; + base.supports_fentry = true; Target { llvm_target: "s390x-unknown-linux-musl".into(), diff --git a/compiler/rustc_target/src/spec/targets/s390x_unknown_none_softfloat.rs b/compiler/rustc_target/src/spec/targets/s390x_unknown_none_softfloat.rs index 8c411091ff50a..d90b8df9f37dc 100644 --- a/compiler/rustc_target/src/spec/targets/s390x_unknown_none_softfloat.rs +++ b/compiler/rustc_target/src/spec/targets/s390x_unknown_none_softfloat.rs @@ -20,6 +20,7 @@ pub(crate) fn target() -> Target { rustc_abi: Some(RustcAbi::Softfloat), stack_probes: StackProbeType::Inline, supported_sanitizers: SanitizerSet::KERNELADDRESS, + supports_fentry: true, ..Default::default() }; diff --git a/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_gnu.rs b/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_gnu.rs index defa9f146d798..0d87a7b760c61 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_gnu.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_gnu.rs @@ -19,6 +19,7 @@ pub(crate) fn target() -> Target { | SanitizerSet::SAFESTACK | SanitizerSet::THREAD | SanitizerSet::REALTIME; + base.supports_fentry = true; base.supports_xray = true; Target { diff --git a/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_musl.rs index ee883532ad4ac..a664f22479913 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_musl.rs @@ -15,6 +15,7 @@ pub(crate) fn target() -> Target { | SanitizerSet::LEAK | SanitizerSet::MEMORY | SanitizerSet::THREAD; + base.supports_fentry = true; base.supports_xray = true; // FIXME(compiler-team#422): musl targets should be dynamically linked by default. base.crt_static_default = true; diff --git a/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_none.rs b/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_none.rs index 768b1a1ba1128..c0d65818d325b 100644 --- a/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_none.rs +++ b/compiler/rustc_target/src/spec/targets/x86_64_unknown_linux_none.rs @@ -10,6 +10,7 @@ pub(crate) fn target() -> Target { base.linker_flavor = LinkerFlavor::Gnu(Cc::No, Lld::Yes); base.linker = Some("rust-lld".into()); base.panic_strategy = PanicStrategy::Abort; + base.supports_fentry = true; Target { llvm_target: "x86_64-unknown-linux-none".into(), diff --git a/tests/assembly-llvm/fentry.rs b/tests/assembly-llvm/fentry.rs new file mode 100644 index 0000000000000..598db8fb150df --- /dev/null +++ b/tests/assembly-llvm/fentry.rs @@ -0,0 +1,25 @@ +//@ assembly-output: emit-asm +//@ compile-flags: -Zinstrument-mcount=fentry +//@ add-minicore + +//@ revisions: X86 S390X +//@[X86] compile-flags: --target=x86_64-unknown-linux-gnu -Cllvm-args=-x86-asm-syntax=intel +//@[X86] needs-llvm-components: x86 +//@[S390X] compile-flags: --target=s390x-unknown-linux-gnu +//@[S390X] needs-llvm-components: systemz + +#![crate_type = "lib"] +#![feature(no_core)] +#![no_core] + +extern crate minicore; + +// CHECK-LABEL: mcount_func: +#[no_mangle] +pub fn mcount_func(a: isize, b: isize) -> isize { + // X86: call __fentry__ + // S390X: brasl %r0, __fentry__@PLT + a + b + // X86: ret + // S390X: br %r14 +} diff --git a/tests/codegen-llvm/instrument-fentry.rs b/tests/codegen-llvm/instrument-fentry.rs new file mode 100644 index 0000000000000..57cb29d7fc426 --- /dev/null +++ b/tests/codegen-llvm/instrument-fentry.rs @@ -0,0 +1,25 @@ +//@ add-minicore +//@ compile-flags: -Z instrument-mcount=fentry -Copt-level=0 +// +//@ revisions: x86_64-linux +//@[x86_64-linux] compile-flags: --target=x86_64-unknown-linux-gnu +//@[x86_64-linux] needs-llvm-components: x86 +// +//@ revisions: x86-linux +//@[x86-linux] compile-flags: --target=i686-unknown-linux-gnu +//@[x86-linux] needs-llvm-components: x86 +// +//@ revisions: s390x-linux +//@[s390x-linux] compile-flags: --target=s390x-unknown-linux-gnu +//@[s390x-linux] needs-llvm-components: systemz + +#![feature(no_core)] +#![no_std] +#![no_core] +#![crate_type = "lib"] + +extern crate minicore; +use minicore::*; + +// CHECK: attributes #{{.*}} "fentry-call"="true" +pub fn foo() {} diff --git a/tests/codegen-llvm/instrument_fn.rs b/tests/codegen-llvm/instrument_fn.rs index cd1e7ddb55cfd..00336d3e7b983 100644 --- a/tests/codegen-llvm/instrument_fn.rs +++ b/tests/codegen-llvm/instrument_fn.rs @@ -1,10 +1,13 @@ // Verify the #[instrument_fn] applies the correct LLVM IR function attributes. // -//@ revisions:XRAY MCOUNT +//@ revisions:XRAY MCOUNT FENTRY //@ add-minicore //@ compile-flags: -Copt-level=0 //@ [XRAY] compile-flags: -Zinstrument-xray --target=x86_64-unknown-linux-gnu //@ [XRAY] needs-llvm-components: x86 +//@ [FENTRY] compile-flags: -Zinstrument-mcount=fentry -Copt-level=0 +//@ [FENTRY] compile-flags: --target=x86_64-unknown-linux-gnu +//@ [FENTRY] needs-llvm-components: x86 //@ [MCOUNT] compile-flags: -Zinstrument-mcount #![feature(no_core)] @@ -27,12 +30,16 @@ fn instrument_off() {} #[no_mangle] #[instrument_fn = "on"] // MCOUNT: define {{.*}}void @instrument_on() {{.*}} [[DFLT_ATTR]] +// FENTRY: define void @instrument_on() {{.*}} [[DFLT_ATTR]] // XRAY: define void @instrument_on() {{.*}} [[ON_ATTR:#[0-9]+]] fn instrument_on() {} // MCOUNT: attributes [[DFLT_ATTR]] {{.*}} "instrument-function-entry-inlined"= // MCOUNT-NOT: attributes [[OFF_ATTR]] {{.*}} "instrument-function-entry-inlined"= +// FENTRY: attributes [[DFLT_ATTR]] {{.*}} "fentry-call"="true" +// FENTRY-NOT: attributes [[OFF_ATTR]] {{.*}} "fentry-call"="true" + // XRAY-NOT: attributes [[DFLT_ATTR]] {{.*}} "function-instrument"="xray-always" // XRAY-NOT: attributes [[DFLT_ATTR]] {{.*}} "function-instrument"="xray-never" // XRAY-NOT: attributes [[DFLT_ATTR]] {{.*}} "xray-skip-exit" From 87514a37a050452ab1222f075e6c695717b7211e Mon Sep 17 00:00:00 2001 From: Paul Murphy Date: Mon, 22 Jun 2026 16:25:33 -0500 Subject: [PATCH 2/2] doc/unstable-book: document -Zinstrument-mcount --- .../src/compiler-flags/instrument-mcount.md | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 src/doc/unstable-book/src/compiler-flags/instrument-mcount.md diff --git a/src/doc/unstable-book/src/compiler-flags/instrument-mcount.md b/src/doc/unstable-book/src/compiler-flags/instrument-mcount.md new file mode 100644 index 0000000000000..376b509048dab --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/instrument-mcount.md @@ -0,0 +1,143 @@ +# `instrument-mcount` + +Insert calls to a counting function at the entry of each function. Traditionally, the name of this function was +mcount, but the exact name may vary depending on target and option usage. + +The counting function is a special function which does not typically follow a target's ABI. It generally takes +two arguments, the address of the calling function and the address of the called function. It was intially used +to profile applications, but has expanded to other usages (for example [ftrace on Linux](https://docs.kernel.org/trace/ftrace.html)). + +Supported options: + + - `no`, `n`, `off`: Do no enable instrumentation. The default option. This requires, and enables frame pointer generation. + - `yes`, `y`, `on`: Enable mcount based function instrumentation. + - `fentry`: Enable fentry based function instrument, where supported. The calling conventions for this are different than mcount, with less overhead, and no frame pointer requirements. This counting function is always named `__fentry__`. This is only available on x86 and s390x targets. + +|target |mcount function|supports fentry|ABI notes| +|--- |--- |--- |--- | +|aarch64-apple-darwin | `\u{1}mcount` | | | +|aarch64-pc-windows-msvc | `mcount` | | | +|aarch64-unknown-linux-gnu| `_mcount` | | | +|i686-pc-windows-msvc | `mcount` | x| | +|i686-unknown-linux-gnu | `mcount` | x| | +|x86_64-pc-windows-gnu | `_mcount` | x| | +|x86_64-pc-windows-msvc | `mcount` | x| | +|x86_64-unknown-linux-gnu | `mcount` | x| 1| + +On arm eabi targets, the mcount function is usually named `__gnu_mcount_nc`, though some targets may use different names. Implementers of counting function should consult the target specific documentation for quirks of each ABI function. + +1. On x86-64, mcount and fentry must preserve the argument registers `rax`, `rcx`, `rdx`, `rsi`, `rdi`, `r8`, `r9`. When using fentry, the stack pointer `rsp` may need aligned to meet ABI requirements. + +## Implementing custom counting functions + +In essence, this is implementing the function `fn mcount(caller: *const std::ffi::c_void, callee: *const std::ffi::c_void)`. The calling convention for mcount follows its own ABI, which isn't usually the standard ABI for the target, but is enforced by preexisting convention. + +A trivial example on x86_64-unknown-linux-gnu looks something like the following. The `#[instrument_fn]` attribute can be used to disable profiling to simplify writing counting functions, but implementors must be very careful when calling other functions (or closures which fail to inline) as they may also call mcount. + +The following example can be compiled with `-Zinstrument-mcount=yes` or `-Zinstrument-mcount=fentry` on an x86_64-unknown-linux-gnu target. It is also acceptable to link objects with different usages of `-Zinstrument-mcount`, however doing so will require implementing both `__fentry__` and `mcount` on targets which support both. + +```rust +#![feature(instrument_fn)] +#![feature(abi_custom)] + +fn main() { + // Ensure all the early startup occurs before attempting to call this trivial, single-threaded + // counting function. + unsafe { + PROFILING_ENABLED = true; + } + println!("main() called"); + unsafe { + PROFILING_ENABLED = false; + } +} + +// This example is not threadsafe. +pub static mut IN_MCOUNT: isize = 0; +pub static mut PROFILING_ENABLED: bool = false; + +#[unsafe(no_mangle)] +#[instrument_fn = "off"] +unsafe extern "C" fn __count_fn(caller: u64, callee: u64) { + unsafe { + if IN_MCOUNT == 0 && PROFILING_ENABLED { + IN_MCOUNT += 1; + { + println!("mcount: call from 0x{caller:x} to 0x{callee:x}"); + } + IN_MCOUNT -= 1; + } + } +} + +// Define a custom mcount function. This may partially or fully override the glibc +// implementation depending on linker options. +#[unsafe(naked)] +#[unsafe(no_mangle)] +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +unsafe extern "custom" fn mcount() { + core::arch::naked_asm!( + // A simplified version based on the glibc x86-64 mcount wrapper. + // Save register arguments to the stack, and call the mcount above. + "push rax", + "push rcx", + "push rdx", + "push rsi", + "push rdi", + "push r8", + "push r9", + "mov rsi, 56[rsp]", + "mov rdi, 8[rbp]", + "call __count_fn", + "pop r9", + "pop r8", + "pop rdi", + "pop rsi", + "pop rdx", + "pop rcx", + "pop rax", + "ret", + ) +} + +// Supply a custom __fentry__ instead of glibc's. This has the same linker +// restrictions as noted with mcount, but does not require a frame pointer. +#[unsafe(naked)] +#[unsafe(no_mangle)] +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +unsafe extern "custom" fn __fentry__() { + core::arch::naked_asm!( + // __fentry__ is called before any other prologue actions, be careful + // with stack alignment. The stack slots look something like: + // [...] + // [caller return address] + // [callee return address] <- top of stack + "sub rsp, 8", + "push rax", + "push rcx", + "push rdx", + "push rsi", + "push rdi", + "push r8", + "push r9", + "mov rsi, 64[rsp]", + "mov rdi, 72[rsp]", + "call __count_fn", + "pop r9", + "pop r8", + "pop rdi", + "pop rsi", + "pop rdx", + "pop rcx", + "pop rax", + "add rsp, 8", + "ret", + ) +} +``` + +When run, the above program should produce output similar to: +```txt +mcount: call from 0x5614c97d778a to 0x5614c97d76e5 +main() called +```