From f4942c94ec400715aa8999bdb2eea6663899b881 Mon Sep 17 00:00:00 2001 From: Louis Vialar Date: Sat, 6 Jun 2026 16:07:29 +0200 Subject: [PATCH 1/2] smp: always wake up core when ready This is a partial revert of commit f0bb0a0e --- src/arch/x86_64/kernel/apic.rs | 5 +---- src/arch/x86_64/kernel/core_local.rs | 6 ------ src/arch/x86_64/kernel/interrupts.rs | 2 -- src/scheduler/mod.rs | 14 -------------- 4 files changed, 1 insertion(+), 26 deletions(-) diff --git a/src/arch/x86_64/kernel/apic.rs b/src/arch/x86_64/kernel/apic.rs index a5fefe4fbd..b8d1b0bf4a 100644 --- a/src/arch/x86_64/kernel/apic.rs +++ b/src/arch/x86_64/kernel/apic.rs @@ -886,10 +886,7 @@ pub fn ipi_tlb_flush() { #[allow(unused_variables)] pub fn wakeup_core(core_id_to_wakeup: CoreId) { #[cfg(all(feature = "smp", not(feature = "idle-poll")))] - if core_id_to_wakeup != core_id() - && !processor::supports_mwait() - && scheduler::take_core_hlt_state(core_id_to_wakeup) - { + if core_id_to_wakeup != core_id() && !processor::supports_mwait() { without_interrupts(|| { let apic_ids = CPU_LOCAL_APIC_IDS.lock(); let local_apic_id = apic_ids[core_id_to_wakeup as usize]; diff --git a/src/arch/x86_64/kernel/core_local.rs b/src/arch/x86_64/kernel/core_local.rs index 9b89a7659b..000b07ca26 100644 --- a/src/arch/x86_64/kernel/core_local.rs +++ b/src/arch/x86_64/kernel/core_local.rs @@ -1,8 +1,6 @@ use alloc::boxed::Box; use core::arch::asm; use core::cell::Cell; -#[cfg(feature = "smp")] -use core::sync::atomic::AtomicBool; use core::sync::atomic::Ordering; use core::{mem, ptr}; @@ -34,8 +32,6 @@ pub(crate) struct CoreLocal { irq_statistics: &'static IrqStatistics, /// The core-local async executor. ex: StaticLocalExecutor, - #[cfg(feature = "smp")] - pub hlt: AtomicBool, /// Queues to handle incoming requests from the other cores #[cfg(feature = "smp")] pub scheduler_input: InterruptTicketMutex, @@ -63,8 +59,6 @@ impl CoreLocal { irq_statistics, ex: StaticLocalExecutor::new(), #[cfg(feature = "smp")] - hlt: AtomicBool::new(false), - #[cfg(feature = "smp")] scheduler_input: InterruptTicketMutex::new(SchedulerInput::new()), }; let this = if core_id == 0 { diff --git a/src/arch/x86_64/kernel/interrupts.rs b/src/arch/x86_64/kernel/interrupts.rs index 6d39f076ef..02c8e3cfec 100644 --- a/src/arch/x86_64/kernel/interrupts.rs +++ b/src/arch/x86_64/kernel/interrupts.rs @@ -82,8 +82,6 @@ pub(crate) fn enable_and_wait() { ); } } else { - #[cfg(feature = "smp")] - crate::CoreLocal::get().hlt.store(true, Ordering::Relaxed); enable_and_hlt(); } } diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 8731482ace..e665e82cab 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -8,8 +8,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::cell::RefCell; use core::ptr; -#[cfg(all(target_arch = "x86_64", feature = "smp"))] -use core::sync::atomic::AtomicBool; use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use ahash::RandomState; @@ -40,8 +38,6 @@ static NO_TASKS: AtomicU32 = AtomicU32::new(0); #[cfg(feature = "smp")] static SCHEDULER_INPUTS: SpinMutex>> = SpinMutex::new(Vec::new()); -#[cfg(all(target_arch = "x86_64", feature = "smp"))] -static CORE_HLT_STATE: SpinMutex> = SpinMutex::new(Vec::new()); /// Map between Task ID and Queue of waiting tasks static WAITING_TASKS: InterruptTicketMutex>> = InterruptTicketMutex::new(BTreeMap::new()); @@ -891,19 +887,9 @@ pub(crate) fn add_current_core() { core_id.try_into().unwrap(), &CoreLocal::get().scheduler_input, ); - #[cfg(target_arch = "x86_64")] - CORE_HLT_STATE - .lock() - .insert(core_id.try_into().unwrap(), &CoreLocal::get().hlt); } } -#[inline] -#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] -pub(crate) fn take_core_hlt_state(core_id: CoreId) -> bool { - CORE_HLT_STATE.lock()[usize::try_from(core_id).unwrap()].swap(false, Ordering::Acquire) -} - #[inline] #[cfg(feature = "smp")] fn get_scheduler_input(core_id: CoreId) -> &'static InterruptTicketMutex { From 8e50433eaffc935e95083effd4be2d552c8eb5a2 Mon Sep 17 00:00:00 2001 From: Louis Vialar Date: Sat, 6 Jun 2026 18:15:08 +0200 Subject: [PATCH 2/2] feat(smp): re-add an anti-spurious-wakeup feature This one has tons of comments to convince myself it works. I am relatively convinced, but concurrent processing is hard you know? --- src/arch/x86_64/kernel/apic.rs | 2 +- src/arch/x86_64/kernel/interrupts.rs | 5 ++ src/scheduler/mod.rs | 129 +++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) diff --git a/src/arch/x86_64/kernel/apic.rs b/src/arch/x86_64/kernel/apic.rs index b8d1b0bf4a..626035f2fd 100644 --- a/src/arch/x86_64/kernel/apic.rs +++ b/src/arch/x86_64/kernel/apic.rs @@ -886,7 +886,7 @@ pub fn ipi_tlb_flush() { #[allow(unused_variables)] pub fn wakeup_core(core_id_to_wakeup: CoreId) { #[cfg(all(feature = "smp", not(feature = "idle-poll")))] - if core_id_to_wakeup != core_id() && !processor::supports_mwait() { + if core_id_to_wakeup != core_id() && !processor::supports_mwait() && scheduler::core_wake_up(core_id_to_wakeup) { without_interrupts(|| { let apic_ids = CPU_LOCAL_APIC_IDS.lock(); let local_apic_id = apic_ids[core_id_to_wakeup as usize]; diff --git a/src/arch/x86_64/kernel/interrupts.rs b/src/arch/x86_64/kernel/interrupts.rs index 02c8e3cfec..c175398952 100644 --- a/src/arch/x86_64/kernel/interrupts.rs +++ b/src/arch/x86_64/kernel/interrupts.rs @@ -82,6 +82,11 @@ pub(crate) fn enable_and_wait() { ); } } else { + #[cfg(feature = "smp")] + if !scheduler::core_sleep() { + return; + } + enable_and_hlt(); } } diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index e665e82cab..0287f28fb9 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -8,6 +8,8 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::cell::RefCell; use core::ptr; +#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] +use core::sync::atomic::AtomicU8; use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use ahash::RandomState; @@ -44,6 +46,129 @@ static WAITING_TASKS: InterruptTicketMutex /// Map between Task ID and TaskHandle static TASKS: InterruptTicketMutex> = InterruptTicketMutex::new(BTreeMap::new()); +#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] +static CORE_HLT_STATE: RwSpinLock> = RwSpinLock::new(Vec::new()); + +#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] +struct CoreState(AtomicU8); + +/// # Race condition prevention +/// +/// Two methods matter here: [arch::kernel::interrupts::enable_and_wait] and [arch::kernel::apic::wakeup_core]. +/// `enable_and_wait` checks the status, ensuring it is set to 0+setting it to 1, then sleeps. +/// `wakeup_core` checks the status, ensuring it is set to 1+setting it to 0, then issues interrupt. +/// +/// A race would happen if `go_to_sleep` returns true, and `wake_up` returns false. +/// The result of these two methods must therefore be atomically determined in a single operation. +/// No ordering should exist in which that condition can happen. +/// Any other variant is okay: +/// - if `go_to_sleep` is false and `wake_up` is true, we have a un-necessary core wakeup (which is okay) +/// - if both are false, the core is not entering sleep and is therefore not woken up +/// - if both are true, the core sleeps and is woken up +#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] +impl CoreState { + /// The core is currently busy and should not be interrupted for new tasks + const STATUS_ACTIVE: u8 = 0; + + /// The core is currently halted and can be interrupted for new tasks + const STATUS_IDLE: u8 = 1; + + /// Another core tried to wake up this core but it was already active. + const STATUS_DONT_SLEEP: u8 = 2; + + pub fn new() -> Self { + Self(AtomicU8::new(CoreState::STATUS_ACTIVE)) + } + + #[inline(always)] + pub fn set_active(&self) { + self.0.store(CoreState::STATUS_ACTIVE, Ordering::SeqCst); + } + + /// Indicates that this core will go to HLT. + /// This must be called *before* entering a HLT loop, and *with interrupts disabled*. + /// Returns a boolean indicating if the core should enter the HLT loop or not + #[inline(always)] + pub fn go_to_sleep(&self) -> bool { + if self + .0 + .compare_exchange( + CoreState::STATUS_ACTIVE, + CoreState::STATUS_IDLE, + Ordering::SeqCst, + Ordering::Relaxed, + ) + .is_err() + { + // There is a possible race condition here, because the two atomic operations can be + // interleaved, but this has no effect on the final result: we're not going to sleep + // anyway. + // If the state was already set to ACTIVE, this has no effect. + // Normally, nothing can set the state to IDLE except this method. + // So the race should have no effect. + // IN ANY CASE: as per top-level comments, returning false is the safe default here. + self.set_active(); + false + } else { + // We have correctly read status ACTIVE and set STATUS_IDLE. We go to sleep. This is + // safe because: + // - Another `wake_up` could be processing, either BEFORE or AFTER the atomic `swap`. + // * BEFORE: + // We have set the state to STATUS_IDLE, so `swap` will read that value and + // wake_up will return true ==> SAFE. + // * AFTER: **We cannot be here!** Indeed, `swap` has set the status to + // `STATUS_DONT_SLEEP`, so `compare_exchange` is in the `Err` case. + true + } + } + + /// Request to wake up this core. + /// Returns a boolean indicating if an interrupt should be sent, or if the core is already active + #[inline(always)] + pub fn wake_up(&self) -> bool { + // Ask the core not to sleep. + // This makes sure that if the two atomic operations become interleaved, the core will + // not go to sleep with us assuming it was running. + let previous_state = self.0.swap(CoreState::STATUS_DONT_SLEEP, Ordering::SeqCst); + + // If the core was idle, we can actually wake it up + if previous_state == CoreState::STATUS_IDLE { + // Again, this operation is not necessarily ordered. + // - Another `go_to_sleep` could be processing, either BEFORE or AFTER the atomic op. + // * BEFORE: + // We have set the state to STATUS_DONT_SLEEP, so the atomic op will read that value + // and go_to_sleep will return false. We will send an interrupt which could have + // been avoided, but that's okay. + // * AFTER: + // The value read at the atomic OP does not matter here, in any case we WILL send + // the interrupt and wake the core up. + // IN ANY CASE: as per top-level comments, returning true is the safe default here. + self.set_active(); + true + } else { + // We don't wake up the core. This is safe, because: + // - Another `go_to_sleep` could be processing, either BEFORE or AFTER the atomic op. + // * BEFORE: + // We have set the state to STATUS_DONT_SLEEP, so the atomic op will read that value + // and go_to_sleep will return false ==> SAFE. + // * AFTER: **We cannot be here!** Indeed, the atomic operation in `go_to_sleep` + // also sets the state to STATUS_IDLE, so `previous_state` MUST BE STATUS_IDLE. + false + } + } +} + +#[inline] +#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] +pub fn core_sleep() -> bool { + CORE_HLT_STATE.read()[usize::try_from(core_id()).unwrap()].go_to_sleep() +} + +#[inline] +#[cfg(all(target_arch = "x86_64", feature = "smp", not(feature = "idle-poll")))] +pub fn core_wake_up(core_id: CoreId) -> bool { + CORE_HLT_STATE.read()[usize::try_from(core_id).unwrap()].wake_up() +} /// Unique identifier for a core. pub type CoreId = u32; @@ -887,6 +1012,10 @@ pub(crate) fn add_current_core() { core_id.try_into().unwrap(), &CoreLocal::get().scheduler_input, ); + #[cfg(all(target_arch = "x86_64", not(feature = "idle-poll")))] + CORE_HLT_STATE + .write() + .insert(core_id.try_into().unwrap(), CoreState::new()); } }