From 0ed3fb5629f377d008ffb097a0b72bd67a692814 Mon Sep 17 00:00:00 2001 From: "erwan.viollet" Date: Tue, 23 Jun 2026 15:25:22 +0200 Subject: [PATCH] fix(profiler): close two TOCTOU races between SIGPROF handler and JFR lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPU profiler sends SIGPROF to all threads via per-thread kernel timers. The signal handler checks _enabled and, if true, calls recordSample() which accesses JFR buffers. Two races existed around the recording cycle transition (default every 60 s) where JFR structures could be in mid-init or mid-teardown while the handler was active: Race 1 — stop() side (TOCTOU on _enabled vs _jfr.stop()): A handler that passed the _enabled=true check could still be executing inside recordSample() when disableEngines() set _enabled=false and _jfr.stop() freed JFR buffers — use-after-free → SIGSEGV. Fix: add an _inflight counter (incremented on handler entry, decremented on all exits). CTimer::stop() calls drainInflight() after deleting per- thread timers, spinning until _inflight==0 before returning to the caller that proceeds to _jfr.stop(). Any handler that fires after disableEngines() sees _enabled=false and returns early without touching JFR. Race 2 — start() side (enableEngines() before _jfr.start()): enableEngines() set _enabled=true before _jfr.start() had completed. A SIGPROF in that window would see _enabled=true and call recordSample() on partially-initialized JFR structures. Fix: move enableEngines() to after _jfr.start() and _cpu_engine->start() have both returned successfully (just before _state.store(RUNNING)). Validated empirically: a controlled reproducer in DataDog/profiling-backend (AnalysisEndpointTest.testResourceExhausted with a 60 s recording period) showed ~60% failure rate without the fix (SIGSEGV / hang), 0% with both fixes applied (12/12 iterations clean). Each fix alone only partially addressed the failures, confirming both races were independently active. Co-Authored-By: Claude Sonnet 4.5 --- ddprof-lib/src/main/cpp/ctimer.h | 13 +++++++++++++ ddprof-lib/src/main/cpp/ctimer_linux.cpp | 20 +++++++++++++++++++- ddprof-lib/src/main/cpp/profiler.cpp | 12 +++++++++--- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/ddprof-lib/src/main/cpp/ctimer.h b/ddprof-lib/src/main/cpp/ctimer.h index c99d2cd0e..ce2c6b96f 100644 --- a/ddprof-lib/src/main/cpp/ctimer.h +++ b/ddprof-lib/src/main/cpp/ctimer.h @@ -28,6 +28,10 @@ class CTimer : public Engine { protected: // This is accessed from signal handlers, so must be async-signal-safe static bool _enabled; + // Count of signal handlers currently executing past the _enabled check. + // stop() drains this to zero before tearing down JFR structures, closing + // the TOCTOU window between the _enabled check and JFR buffer access. + static int _inflight; static long _interval; static CStack _cstack; static int _signal; @@ -57,6 +61,15 @@ class CTimer : public Engine { __atomic_store_n(&_enabled, enabled, __ATOMIC_RELEASE); } + // Spin until all signal handlers that passed the _enabled=true check have + // returned. Must be called with _enabled already false (after disableEngines()), + // before any JFR teardown that handlers could race against. + static void drainInflight() { + while (__atomic_load_n(&_inflight, __ATOMIC_ACQUIRE) > 0) { + sched_yield(); + } + } + // Get the signal number used by CTimer (0 if not initialized) static int getSignal() { return _signal; } }; diff --git a/ddprof-lib/src/main/cpp/ctimer_linux.cpp b/ddprof-lib/src/main/cpp/ctimer_linux.cpp index e0d2b8b0c..1d487d896 100644 --- a/ddprof-lib/src/main/cpp/ctimer_linux.cpp +++ b/ddprof-lib/src/main/cpp/ctimer_linux.cpp @@ -49,6 +49,7 @@ int CTimer::_max_timers = 0; int *CTimer::_timers = NULL; CStack CTimer::_cstack; bool CTimer::_enabled = false; +int CTimer::_inflight = 0; int CTimer::_signal; int CTimer::registerThread(int tid) { @@ -180,6 +181,10 @@ void CTimer::stop() { for (int i = 0; i < _max_timers; i++) { unregisterThread(i); } + // _enabled is already false (disableEngines() is called before stop()). + // Spin until all handlers that passed the _enabled=true check have returned, + // so it is safe to tear down JFR structures on return. + drainInflight(); } Error CTimerJvmti::check(Arguments &args) { @@ -210,12 +215,15 @@ void CTimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { } Counters::increment(CTIMER_SIGNAL_OWN); + __atomic_fetch_add(&_inflight, 1, __ATOMIC_ACQUIRE); CriticalSection cs; if (!cs.entered()) { + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); return; } int saved_errno = errno; if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE)) { + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); errno = saved_errno; return; } @@ -245,6 +253,7 @@ void CTimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { Profiler::instance()->recordSampleDelegated(ucontext, _interval, tid, BCI_CPU, &event); Shims::instance().setSighandlerTid(-1); + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); errno = saved_errno; } @@ -261,17 +270,24 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { } Counters::increment(CTIMER_SIGNAL_OWN); + // Increment before the _enabled check so drainInflight() reliably waits + // for all handlers currently past the check. + __atomic_fetch_add(&_inflight, 1, __ATOMIC_ACQUIRE); + // Atomically try to enter critical section - prevents all reentrancy races CriticalSection cs; if (!cs.entered()) { + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); return; // Another critical section is active, defer profiling } // Save the current errno value int saved_errno = errno; // we want to ensure memory order because of the possibility the instance gets // cleared - if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE)) + if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE)) { + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); return; + } int tid = 0; ProfiledThread *current = ProfiledThread::currentSignalSafe(); assert(current == nullptr || !current->isDeepCrashHandler()); @@ -282,6 +298,7 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { if (current != nullptr && JVMThread::isInitialized() && JVMThread::current() == nullptr && current->inInitWindow()) { current->tickInitWindow(); + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); errno = saved_errno; return; } @@ -298,6 +315,7 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { Profiler::instance()->recordSample(ucontext, _interval, tid, BCI_CPU, 0, &event); Shims::instance().setSighandlerTid(-1); + __atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE); // we need to avoid spoiling the value of errno (tsan report) errno = saved_errno; } diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index def717294..f14abca91 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1381,8 +1381,6 @@ Error Profiler::start(Arguments &args, bool reset) { _libs->updateBuildIds(); } - enableEngines(); - // Refresher must be running before the trap fires: dlopen_hook's // signal-context branch only marks dirty and relies on the refresher // to call refresh() within REFRESH_INTERVAL_NS (500 ms). @@ -1396,7 +1394,6 @@ Error Profiler::start(Arguments &args, bool reset) { _num_context_attributes = args._context_attributes.size(); error = _jfr.start(args, reset); if (error) { - disableEngines(); switchLibraryTrap(false); _libs->stopRefresher(); return error; @@ -1471,6 +1468,15 @@ Error Profiler::start(Arguments &args, bool reset) { // TODO: find a better way to resolve the thread name. onThreadStart(nullptr, nullptr, nullptr); + // enableEngines() (_enabled=true) is intentionally deferred until here, + // after _jfr.start() and all engine starts. Previously it was called + // before _jfr.start(), creating a TOCTOU race: a SIGPROF delivered + // between enableEngines() and _jfr.start() completing would enter + // recordSample() on partially-initialized JFR structures. + // Paired with drainInflight() in CTimer::stop() which closes the + // symmetric race on the stop side. + enableEngines(); + _state.store(RUNNING, std::memory_order_release); _start_time = time(NULL); __atomic_add_fetch(&_epoch, 1, __ATOMIC_RELAXED);