Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions ddprof-lib/src/main/cpp/ctimer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class CTimer : public Engine {
protected:
// This is accessed from signal handlers, so must be async-signal-safe
static bool _enabled;
// Count of signal handlers currently executing past the _enabled check.
// stop() drains this to zero before tearing down JFR structures, closing
// the TOCTOU window between the _enabled check and JFR buffer access.
static int _inflight;
static long _interval;
static CStack _cstack;
static int _signal;
Expand Down Expand Up @@ -57,6 +61,15 @@ class CTimer : public Engine {
__atomic_store_n(&_enabled, enabled, __ATOMIC_RELEASE);
}

// Spin until all signal handlers that passed the _enabled=true check have
// returned. Must be called with _enabled already false (after disableEngines()),
// before any JFR teardown that handlers could race against.
static void drainInflight() {
while (__atomic_load_n(&_inflight, __ATOMIC_ACQUIRE) > 0) {
sched_yield();
}
}

// Get the signal number used by CTimer (0 if not initialized)
static int getSignal() { return _signal; }
};
Expand Down
20 changes: 19 additions & 1 deletion ddprof-lib/src/main/cpp/ctimer_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ int CTimer::_max_timers = 0;
int *CTimer::_timers = NULL;
CStack CTimer::_cstack;
bool CTimer::_enabled = false;
int CTimer::_inflight = 0;
int CTimer::_signal;

int CTimer::registerThread(int tid) {
Expand Down Expand Up @@ -180,6 +181,10 @@ void CTimer::stop() {
for (int i = 0; i < _max_timers; i++) {
unregisterThread(i);
}
// _enabled is already false (disableEngines() is called before stop()).
// Spin until all handlers that passed the _enabled=true check have returned,
// so it is safe to tear down JFR structures on return.
drainInflight();
}

Error CTimerJvmti::check(Arguments &args) {
Expand Down Expand Up @@ -210,12 +215,15 @@ void CTimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
}
Counters::increment(CTIMER_SIGNAL_OWN);

__atomic_fetch_add(&_inflight, 1, __ATOMIC_ACQUIRE);
CriticalSection cs;
if (!cs.entered()) {
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
return;
}
int saved_errno = errno;
if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE)) {
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
errno = saved_errno;
return;
}
Expand Down Expand Up @@ -245,6 +253,7 @@ void CTimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
Profiler::instance()->recordSampleDelegated(ucontext, _interval, tid,
BCI_CPU, &event);
Shims::instance().setSighandlerTid(-1);
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
errno = saved_errno;
}

Expand All @@ -261,17 +270,24 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
}
Counters::increment(CTIMER_SIGNAL_OWN);

// Increment before the _enabled check so drainInflight() reliably waits
// for all handlers currently past the check.
__atomic_fetch_add(&_inflight, 1, __ATOMIC_ACQUIRE);

// Atomically try to enter critical section - prevents all reentrancy races
CriticalSection cs;
if (!cs.entered()) {
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
return; // Another critical section is active, defer profiling
}
// Save the current errno value
int saved_errno = errno;
// we want to ensure memory order because of the possibility the instance gets
// cleared
if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE))
if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE)) {
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
return;
}
int tid = 0;
ProfiledThread *current = ProfiledThread::currentSignalSafe();
assert(current == nullptr || !current->isDeepCrashHandler());
Expand All @@ -282,6 +298,7 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
if (current != nullptr && JVMThread::isInitialized() && JVMThread::current() == nullptr
&& current->inInitWindow()) {
current->tickInitWindow();
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
errno = saved_errno;
return;
}
Expand All @@ -298,6 +315,7 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
Profiler::instance()->recordSample(ucontext, _interval, tid, BCI_CPU, 0,
&event);
Shims::instance().setSighandlerTid(-1);
__atomic_fetch_sub(&_inflight, 1, __ATOMIC_RELEASE);
// we need to avoid spoiling the value of errno (tsan report)
errno = saved_errno;
}
Expand Down
12 changes: 9 additions & 3 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1381,8 +1381,6 @@ Error Profiler::start(Arguments &args, bool reset) {
_libs->updateBuildIds();
}

enableEngines();

// Refresher must be running before the trap fires: dlopen_hook's
// signal-context branch only marks dirty and relies on the refresher
// to call refresh() within REFRESH_INTERVAL_NS (500 ms).
Expand All @@ -1396,7 +1394,6 @@ Error Profiler::start(Arguments &args, bool reset) {
_num_context_attributes = args._context_attributes.size();
error = _jfr.start(args, reset);
if (error) {
disableEngines();
switchLibraryTrap(false);
_libs->stopRefresher();
return error;
Expand Down Expand Up @@ -1471,6 +1468,15 @@ Error Profiler::start(Arguments &args, bool reset) {
// TODO: find a better way to resolve the thread name.
onThreadStart(nullptr, nullptr, nullptr);

// enableEngines() (_enabled=true) is intentionally deferred until here,
// after _jfr.start() and all engine starts. Previously it was called
// before _jfr.start(), creating a TOCTOU race: a SIGPROF delivered
// between enableEngines() and _jfr.start() completing would enter
// recordSample() on partially-initialized JFR structures.
// Paired with drainInflight() in CTimer::stop() which closes the
// symmetric race on the stop side.
enableEngines();

_state.store(RUNNING, std::memory_order_release);
_start_time = time(NULL);
__atomic_add_fetch(&_epoch, 1, __ATOMIC_RELAXED);
Expand Down
Loading