From 47d54b265a7925dffdeff5f595cf54af63ba2310 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 14:04:25 +0200 Subject: [PATCH 1/9] feat(profiler): add async-signal-safety sanitizer (PROF-14763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a runtime sanitizer for the profiler's signal handlers via a thread-local depth counter, an RAII guard at each handler entry, and DEBUG_ASSERT_NOT_IN_SIGNAL() at known async-signal-unsafe APIs. In debug/ASAN builds the assertion aborts with a file:line diagnostic and writes /tmp/signal-safety-violation.txt for CI to upload as an artifact. Three macros encapsulate the depth counter — production code never touches the counter directly: SIGNAL_HANDLER_GUARD() — RAII increment/decrement SIGNAL_HANDLER_GUARD_RELEASE() — manual early release before chaining to handlers that may longjmp through us SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP() — decrement at setjmp landing The depth counter and isInSignalContext()/getInSignalDepth() accessors are always on (~2 thread-local writes per signal handler invocation) and live in guards.{h,cpp} so production code can query signal-context state for AS-safe deferred paths (used by Profiler::dlopen_hook in the companion commit). Only the DEBUG_ASSERT_NOT_IN_SIGNAL diagnostic macro is conditional on NDEBUG/ASAN_ENABLED; in release builds it expands to a no-op. Wired into all 10 installed signal handlers (ITimer, ITimerJvmti, CTimer, CTimerJvmti, PerfEvents, WallClockASGCT, WallClockJvmti, segvHandler, busHandler, wakeupHandler). Assertions placed at: Dictionary::lookup (inserting overloads), Dictionary::clear, Recording::writeClasses, FlightRecorder::recordDatadogSetting, FlightRecorder::recordHeapUsage, Mutex::lock. Two longjmp-aware fixes: - J9 SIGSEGV null-pointer-check handler: segv/busHandler use GUARD_RELEASE() before chaining to orig_segvHandler/orig_busHandler, since the chained handler may siglongjmp and skip our destructor. - HotSpot checkFault longjmp: walkVM's setjmp landing uses UNWIND_AFTER_LONGJMP() to undo the increment from the unwound frame. CI test_workflow.yml uploads /tmp/signal-safety-violation.txt as an artifact when test jobs fail on glibc amd64/aarch64. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .github/workflows/test_workflow.yml | 14 ++++ ddprof-lib/src/main/cpp/ctimer_linux.cpp | 2 + ddprof-lib/src/main/cpp/dictionary.cpp | 4 + ddprof-lib/src/main/cpp/flightRecorder.cpp | 10 ++- ddprof-lib/src/main/cpp/guards.cpp | 3 + ddprof-lib/src/main/cpp/guards.h | 54 ++++++++++++ .../src/main/cpp/hotspot/hotspotSupport.cpp | 4 + ddprof-lib/src/main/cpp/itimer.cpp | 2 + ddprof-lib/src/main/cpp/mutex.cpp | 2 + ddprof-lib/src/main/cpp/perfEvents_linux.cpp | 1 + ddprof-lib/src/main/cpp/profiler.cpp | 19 ++++- ddprof-lib/src/main/cpp/signalSafety.cpp | 20 +++++ ddprof-lib/src/main/cpp/signalSafety.h | 82 +++++++++++++++++++ ddprof-lib/src/main/cpp/vmEntry.cpp | 2 + ddprof-lib/src/main/cpp/wallClock.cpp | 2 + ddprof-lib/src/test/cpp/signalSafety_ut.cpp | 53 ++++++++++++ 16 files changed, 270 insertions(+), 4 deletions(-) create mode 100644 ddprof-lib/src/main/cpp/signalSafety.cpp create mode 100644 ddprof-lib/src/main/cpp/signalSafety.h create mode 100644 ddprof-lib/src/test/cpp/signalSafety_ut.cpp diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml index fcda543c4..516cfc409 100644 --- a/.github/workflows/test_workflow.yml +++ b/.github/workflows/test_workflow.yml @@ -164,6 +164,13 @@ jobs: with: name: (test-reports) test-linux-glibc-amd64 (${{ matrix.java_version }}, ${{ matrix.config }}) path: test-reports + - name: Upload signal-safety violation log + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: failure() + with: + name: signal-safety-violation-glibc-${{ matrix.java_version }}-${{ matrix.config }}-amd64 + path: /tmp/signal-safety-violation.txt + if-no-files-found: ignore test-linux-musl-amd64: needs: [cache-jdks, filter-musl-configs] @@ -443,6 +450,13 @@ jobs: with: name: (test-reports) test-linux-glibc-aarch64 (${{ matrix.java_version }}, ${{ matrix.config }}) path: test-reports + - name: Upload signal-safety violation log + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: failure() + with: + name: signal-safety-violation-glibc-${{ matrix.java_version }}-${{ matrix.config }}-aarch64 + path: /tmp/signal-safety-violation.txt + if-no-files-found: ignore test-linux-musl-aarch64: needs: [cache-jdks, filter-musl-configs] diff --git a/ddprof-lib/src/main/cpp/ctimer_linux.cpp b/ddprof-lib/src/main/cpp/ctimer_linux.cpp index 2815164df..e0d2b8b0c 100644 --- a/ddprof-lib/src/main/cpp/ctimer_linux.cpp +++ b/ddprof-lib/src/main/cpp/ctimer_linux.cpp @@ -202,6 +202,7 @@ Error CTimerJvmti::start(Arguments &args) { } void CTimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); if (!OS::shouldProcessSignal(siginfo, SI_TIMER, SignalCookie::cpu())) { Counters::increment(CTIMER_SIGNAL_FOREIGN); OS::forwardForeignSignal(signo, siginfo, ucontext); @@ -248,6 +249,7 @@ void CTimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { } void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); // Reject signals that did not originate from our timer_create timers. // This guards against Go's process-wide setitimer(ITIMER_PROF) and other // foreign SIGPROF sources that would otherwise drive our handler onto diff --git a/ddprof-lib/src/main/cpp/dictionary.cpp b/ddprof-lib/src/main/cpp/dictionary.cpp index c1ca2860d..b9066d474 100644 --- a/ddprof-lib/src/main/cpp/dictionary.cpp +++ b/ddprof-lib/src/main/cpp/dictionary.cpp @@ -17,6 +17,7 @@ #include "dictionary.h" #include "arch.h" #include "counters.h" +#include "signalSafety.h" #include #include #include @@ -41,6 +42,7 @@ Dictionary::~Dictionary() { } void Dictionary::clear() { + DEBUG_ASSERT_NOT_IN_SIGNAL(); clear(_table, _id); memset(_table, 0, sizeof(DictTable)); _table->base_index = _base_index = 1; @@ -79,10 +81,12 @@ unsigned int Dictionary::hash(const char *key, size_t length) { } unsigned int Dictionary::lookup(const char *key) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); return lookup(key, strlen(key)); } unsigned int Dictionary::lookup(const char *key, size_t length) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); return lookup(key, length, true, 0); } diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 98e8d0a87..789b9debc 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -19,6 +19,7 @@ #include "jniHelper.h" #include "os.h" #include "profiler.h" +#include "signalSafety.h" #include "rustDemangler.h" #include "safeAccess.h" #include "spinLock.h" @@ -1411,6 +1412,7 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) { } void Recording::writeClasses(Buffer *buf, Lookup *lookup) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); std::map classes; // Hold classMapSharedGuard() for the full function. The const char* pointers // stored in classes point into dictionary row storage; clear() frees that @@ -1720,10 +1722,11 @@ void Recording::recordCpuLoad(Buffer *buf, float proc_user, float proc_system, // assumption is that we hold the lock (with lock_index) void Recording::addThread(int lock_index, int tid) { int active = _active_index.load(std::memory_order_acquire); - _thread_ids[lock_index][active].insert(tid); + _thread_ids[lock_index][active].insert(tid); // ThreadIdTable::insert is signal-safe (atomics only) } Error FlightRecorder::start(Arguments &args, bool reset) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); ExclusiveLockGuard locker(&_rec_lock); const char *file = args.file(); if (file == NULL || file[0] == 0) { @@ -1751,6 +1754,7 @@ Error FlightRecorder::newRecording(bool reset) { } void FlightRecorder::stop() { + DEBUG_ASSERT_NOT_IN_SIGNAL(); ExclusiveLockGuard locker(&_rec_lock); Recording* rec = _rec; if (rec != nullptr) { @@ -1761,6 +1765,7 @@ void FlightRecorder::stop() { } Error FlightRecorder::dump(const char *filename, const int length) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); assert(length >= 0); ExclusiveLockGuard locker(&_rec_lock); Recording* rec = _rec; @@ -1781,6 +1786,7 @@ Error FlightRecorder::dump(const char *filename, const int length) { } void FlightRecorder::flush() { + DEBUG_ASSERT_NOT_IN_SIGNAL(); ExclusiveLockGuard locker(&_rec_lock); Recording* rec = _rec; if (rec != nullptr) { @@ -1845,6 +1851,7 @@ void FlightRecorder::recordQueueTime(int lock_index, int tid, void FlightRecorder::recordDatadogSetting(int lock_index, int length, const char *name, const char *value, const char *unit) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); OptionalSharedLockGuard locker(&_rec_lock); if (locker.ownsLock()) { Recording* rec = _rec; @@ -1856,6 +1863,7 @@ void FlightRecorder::recordDatadogSetting(int lock_index, int length, } void FlightRecorder::recordHeapUsage(int lock_index, long value, bool live) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); OptionalSharedLockGuard locker(&_rec_lock); if (locker.ownsLock()) { Recording* rec = _rec; diff --git a/ddprof-lib/src/main/cpp/guards.cpp b/ddprof-lib/src/main/cpp/guards.cpp index cc46e64ac..eaab4fe75 100644 --- a/ddprof-lib/src/main/cpp/guards.cpp +++ b/ddprof-lib/src/main/cpp/guards.cpp @@ -19,6 +19,9 @@ #include "os.h" #include "thread.h" +// Always-on signal-context depth counter; see comments in guards.h. +thread_local int _in_signal_handler_depth = 0; + // Static bitmap storage for fallback cases uint64_t CriticalSection::_fallback_bitmap[CriticalSection::FALLBACK_BITMAP_WORDS] = {}; diff --git a/ddprof-lib/src/main/cpp/guards.h b/ddprof-lib/src/main/cpp/guards.h index a5e4b0793..17ef97f98 100644 --- a/ddprof-lib/src/main/cpp/guards.h +++ b/ddprof-lib/src/main/cpp/guards.h @@ -24,6 +24,60 @@ class ProfiledThread; +// --------------------------------------------------------------------------- +// Signal-context depth tracking — always on. +// +// Code paths that must choose between an async-signal-safe deferred path and +// a synchronous path (e.g. Profiler::dlopen_hook, which has to defer when a +// library is lazily loaded from signal context) query isInSignalContext(). +// The debug-only DEBUG_ASSERT_NOT_IN_SIGNAL() macro in signalSafety.h +// asserts on the same counter. +// +// Cost in release: ~2 thread-local writes per signal handler invocation +// (negligible). Pre-warming libgcc_s in Profiler::start() closes the +// known case; this counter is the safety net for the remainder. +// --------------------------------------------------------------------------- + +// Counter so nested signals (e.g. SIGSEGV inside SIGPROF) keep the flag +// asserted until the outermost handler returns. +extern thread_local int _in_signal_handler_depth; + +inline int getInSignalDepth() { return _in_signal_handler_depth; } +inline bool isInSignalContext() { return _in_signal_handler_depth != 0; } + +// Internal RAII type — do not instantiate directly; use the macros below. +class SignalHandlerScope { +public: + SignalHandlerScope() { ++_in_signal_handler_depth; } + ~SignalHandlerScope() { if (_active) --_in_signal_handler_depth; } + void release() { if (_active) { --_in_signal_handler_depth; _active = false; } } + SignalHandlerScope(const SignalHandlerScope&) = delete; + SignalHandlerScope& operator=(const SignalHandlerScope&) = delete; +private: + bool _active = true; +}; + +// Declare a scope guard local that increments the depth on entry and +// decrements on scope exit. Use as the very first statement in every +// installed signal handler. +#define SIGNAL_HANDLER_GUARD() SignalHandlerScope _signal_handler_scope + +// Manually release the most recent SIGNAL_HANDLER_GUARD() before chaining to +// another handler that may longjmp through us (e.g. J9's SIGSEGV null-pointer +// check handler). After release(), depth has already been decremented; the +// destructor becomes a no-op. +#define SIGNAL_HANDLER_GUARD_RELEASE() _signal_handler_scope.release() + +// Compensate for a longjmp that bypassed a SignalHandlerScope's destructor. +// Call at the setjmp landing point AFTER a known longjmp originated from +// within a signal handler frame (e.g. HotSpot's checkFault → longjmp recovery +// in walkVM). +#define SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP() \ + do { \ + if (_in_signal_handler_depth > 0) \ + --_in_signal_handler_depth; \ + } while (0) + /** * Race-free critical section using atomic compare-and-swap. * diff --git a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp index cc6c6eb60..b63b66e1d 100644 --- a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp +++ b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp @@ -13,6 +13,7 @@ #include "hotspot/vmStructs.inline.h" #include "jvmSupport.h" #include "profiler.h" +#include "guards.h" #include "stackWalker.inline.h" #include "frames.h" @@ -187,6 +188,9 @@ __attribute__((no_sanitize("address"))) int HotspotSupport::walkVM(void* ucontex profiled_thread->setCrashProtectionActive(true); } if (setjmp(crash_protection_ctx) != 0) { + // checkFault() does a longjmp from inside segvHandler, bypassing + // segvHandler's SignalHandlerScope destructor. Compensate. + SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP(); if (profiled_thread != nullptr) { profiled_thread->setCrashProtectionActive(false); } diff --git a/ddprof-lib/src/main/cpp/itimer.cpp b/ddprof-lib/src/main/cpp/itimer.cpp index a69634ffe..f041e1885 100644 --- a/ddprof-lib/src/main/cpp/itimer.cpp +++ b/ddprof-lib/src/main/cpp/itimer.cpp @@ -31,6 +31,7 @@ long ITimer::_interval; CStack ITimer::_cstack; void ITimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); // NOTE: ITimer uses setitimer(ITIMER_PROF) which delivers signals with // si_code==SI_KERNEL — no sival payload is available. The signal-origin // check implemented in CTimer/WallClock cannot be applied here. ITimer @@ -102,6 +103,7 @@ volatile bool ITimerJvmti::_enabled = false; long ITimerJvmti::_interval = 0; void ITimerJvmti::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); CriticalSection cs; if (!cs.entered()) { return; diff --git a/ddprof-lib/src/main/cpp/mutex.cpp b/ddprof-lib/src/main/cpp/mutex.cpp index 85228dbf6..8b9c92b64 100644 --- a/ddprof-lib/src/main/cpp/mutex.cpp +++ b/ddprof-lib/src/main/cpp/mutex.cpp @@ -4,6 +4,7 @@ */ #include "mutex.h" +#include "signalSafety.h" Mutex::Mutex() { @@ -14,6 +15,7 @@ Mutex::Mutex() { } void Mutex::lock() { + DEBUG_ASSERT_NOT_IN_SIGNAL(); pthread_mutex_lock(&_mutex); } diff --git a/ddprof-lib/src/main/cpp/perfEvents_linux.cpp b/ddprof-lib/src/main/cpp/perfEvents_linux.cpp index 2edc05f46..85ec23d5e 100644 --- a/ddprof-lib/src/main/cpp/perfEvents_linux.cpp +++ b/ddprof-lib/src/main/cpp/perfEvents_linux.cpp @@ -730,6 +730,7 @@ u64 PerfEvents::readCounter(siginfo_t *siginfo, void *ucontext) { } void PerfEvents::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); if (siginfo->si_code <= 0) { // Looks like an external signal; don't treat as a profiling event return; diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index a328026b8..8a8195d9a 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -11,6 +11,7 @@ #include "context.h" #include "context_api.h" #include "guards.h" +#include "signalSafety.h" #include "common.h" #include "counters.h" #include "ctimer.h" @@ -801,10 +802,18 @@ void Profiler::disableEngines() { } void Profiler::segvHandler(int signo, siginfo_t *siginfo, void *ucontext) { + // J9 installs a SIGSEGV handler that uses siglongjmp() to recover from + // null-pointer-check faults during normal Java execution. When we chain to + // it, that longjmp unwinds past our stack frame and skips the RAII + // destructor, permanently leaking depth on the thread. Release the guard + // before chaining so depth is correct whether the chained handler returns + // or longjmps. + SIGNAL_HANDLER_GUARD(); if (crashHandlerInternal(signo, siginfo, ucontext)) { - return; // Handled + return; // Handled — destructor decrements depth } - // Not handled, chain to next handler + SIGNAL_HANDLER_GUARD_RELEASE(); + // Not handled, chain to next handler (may longjmp; never return through us) SigAction chain = OS::getSegvChainTarget(); if (chain != nullptr) { chain(signo, siginfo, ucontext); @@ -814,9 +823,13 @@ void Profiler::segvHandler(int signo, siginfo_t *siginfo, void *ucontext) { } void Profiler::busHandler(int signo, siginfo_t *siginfo, void *ucontext) { + // See segvHandler: release before chaining in case the chained handler + // longjmps through us. + SIGNAL_HANDLER_GUARD(); if (crashHandlerInternal(signo, siginfo, ucontext)) { - return; // Handled + return; // Handled — destructor decrements depth } + SIGNAL_HANDLER_GUARD_RELEASE(); // Not handled, chain to next handler SigAction chain = OS::getBusChainTarget(); if (chain != nullptr) { diff --git a/ddprof-lib/src/main/cpp/signalSafety.cpp b/ddprof-lib/src/main/cpp/signalSafety.cpp new file mode 100644 index 000000000..0f8dc3038 --- /dev/null +++ b/ddprof-lib/src/main/cpp/signalSafety.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2026, Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Header-only module — see signalSafety.h for DEBUG_ASSERT_NOT_IN_SIGNAL(). +// The signal-context depth counter (used by both the assertion and the +// production AS-safe deferred path) lives in guards.{h,cpp}. +#include "signalSafety.h" diff --git a/ddprof-lib/src/main/cpp/signalSafety.h b/ddprof-lib/src/main/cpp/signalSafety.h new file mode 100644 index 000000000..b2b9c47ad --- /dev/null +++ b/ddprof-lib/src/main/cpp/signalSafety.h @@ -0,0 +1,82 @@ +/* + * Copyright 2026, Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SIGNAL_SAFETY_H +#define _SIGNAL_SAFETY_H + +#include "guards.h" // _in_signal_handler_depth, SIGNAL_HANDLER_GUARD, ... + +// Detect ASAN using compiler-provided macros so the ASAN_ENABLED guard below +// works in every TU that includes this header, independent of include order. +#ifdef __has_feature +# if __has_feature(address_sanitizer) +# ifndef ASAN_ENABLED +# define ASAN_ENABLED 1 +# endif +# endif +#endif +#ifdef __SANITIZE_ADDRESS__ +# ifndef ASAN_ENABLED +# define ASAN_ENABLED 1 +# endif +#endif + +// Debug-only AS-safety assertion. Aborts with a file:line diagnostic when +// invoked from inside a signal handler in debug / ASAN builds; compiles to a +// no-op in release builds (NDEBUG). +// +// The depth counter itself lives in guards.h (always-on, needed by +// Profiler::dlopen_hook for the AS-safe deferred path). Only the diagnostic +// abort is conditional. +// +// write(2) is POSIX async-signal-safe. abort() generates a core dump and +// triggers ASAN's stack-trace symbolization, making it far more debuggable +// than _exit(1). The macro is only active in debug/ASAN builds where we +// intentionally trade AS-safety of the abort path for diagnosability. +#define _SIGNAL_SAFETY_STR(x) #x +#define _SIGNAL_SAFETY_TOSTR(x) _SIGNAL_SAFETY_STR(x) + +#if !defined(NDEBUG) || defined(ASAN_ENABLED) +#include // write, STDERR_FILENO, open, close +#include // abort +#include // O_WRONLY, O_CREAT, O_APPEND + +// Path for the diagnostic file — picked up as a CI artifact on failure. +#define _SIGNAL_SAFETY_DIAG_FILE "/tmp/signal-safety-violation.txt" + +#define DEBUG_ASSERT_NOT_IN_SIGNAL() \ + do { \ + if (_in_signal_handler_depth != 0) { \ + static const char _msg[] = \ + "[java-profiler] AS-safety violation at " \ + __FILE__ ":" _SIGNAL_SAFETY_TOSTR(__LINE__) \ + ": async-signal-unsafe call made from signal handler context\n"; \ + (void)write(STDERR_FILENO, _msg, sizeof(_msg) - 1); \ + /* Also write to a file so CI can capture it regardless of output routing. */ \ + int _diag_fd = open(_SIGNAL_SAFETY_DIAG_FILE, \ + O_WRONLY | O_CREAT | O_APPEND, 0644); \ + if (_diag_fd >= 0) { \ + (void)write(_diag_fd, _msg, sizeof(_msg) - 1); \ + close(_diag_fd); \ + } \ + abort(); \ + } \ + } while (0) +#else +#define DEBUG_ASSERT_NOT_IN_SIGNAL() ((void)0) +#endif + +#endif // _SIGNAL_SAFETY_H diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 3323cb8f8..aa5a30889 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -21,6 +21,7 @@ #include "hotspot/jitCodeCache.h" #include #include +#include "guards.h" #include #include @@ -56,6 +57,7 @@ AsyncGetCallTrace VM::_asyncGetCallTrace; JVM_GetManagement VM::_getManagement; static void wakeupHandler(int signo) { + SIGNAL_HANDLER_GUARD(); // Dummy handler for interrupting syscalls } diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index a19263f3a..959382656 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -54,6 +54,7 @@ bool BaseWallClock::inSyscall(void *ucontext) { void WallClockASGCT::sharedSignalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); // Reject any SIGVTALRM that did not originate from our rt_tgsigqueueinfo // send. Defends against stray in-process tgkill / external sigqueue that // would otherwise drive our wallclock sampling path. @@ -228,6 +229,7 @@ void WallClockASGCT::timerLoop() { void WallClockJvmti::sharedSignalHandler(int signo, siginfo_t *siginfo, void *ucontext) { + SIGNAL_HANDLER_GUARD(); WallClockJvmti *engine = reinterpret_cast(Profiler::instance()->wallEngine()); if (signo == SIGVTALRM) { diff --git a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp new file mode 100644 index 000000000..33ea8d2d5 --- /dev/null +++ b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp @@ -0,0 +1,53 @@ +/* + * Copyright 2026, Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "signalSafety.h" +#include + +class SignalSafetyTest : public ::testing::Test { +protected: + void TearDown() override { + // Catch leaked depth from a failed test so subsequent tests start clean. + ASSERT_EQ(0, getInSignalDepth()) << "depth not zero after test — check for leaked SignalHandlerScope"; + } +}; + +TEST_F(SignalSafetyTest, DepthStartsAtZero) { + EXPECT_EQ(0, getInSignalDepth()); +} + +TEST_F(SignalSafetyTest, DepthSymmetry) { + EXPECT_EQ(0, getInSignalDepth()); + { + SignalHandlerScope scope; + EXPECT_EQ(1, getInSignalDepth()); + } + EXPECT_EQ(0, getInSignalDepth()); +} + +TEST_F(SignalSafetyTest, NestedDepth) { + EXPECT_EQ(0, getInSignalDepth()); + { + SignalHandlerScope outer; + EXPECT_EQ(1, getInSignalDepth()); + { + SignalHandlerScope inner; + EXPECT_EQ(2, getInSignalDepth()); + } + EXPECT_EQ(1, getInSignalDepth()); + } + EXPECT_EQ(0, getInSignalDepth()); +} From bbe6675d0cce9087071c1aba616d5411f34ae29b Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 14:04:51 +0200 Subject: [PATCH 2/9] fix(profiler): pre-warm libgcc_s + defer signal-context dlopen_hook work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dlopen_hook was performing file I/O, malloc, and mutex acquisition (parseLibraries, patch_sigaction, installHooks) potentially from async-signal context — e.g. when the JVM's DWARF unwinder lazily loads libgcc_s during stack walking on J9/aarch64, our PLT-patched dlopen hook fires from within a signal handler. Other JVM-internal lazy loads (observed under Graal on aarch64 in long-running tests) can trigger the same path. Three-pronged fix: 1. Force libgcc_s.so.1 to load during Profiler::start() via a plain dlopen on the init thread (Profiler::prewarmUnwinder). Once libgcc_s is mapped, the JVM's later resolve finds it already loaded and the known lazy-load path that would invoke our hook from signal context never runs. 2. Guard dlopen_hook with isInSignalContext() so any *other* lazy load from signal context is detected and refresh() is deferred. refresh() acquires Mutex and calls malloc, both AS-unsafe. The dlopen_hook in signal context now calls Libraries::markDirty() (a single atomic store, AS-safe) instead. 3. Background refresher thread in Libraries polls the dirty flag every REFRESH_INTERVAL_NS (5 s) and runs refresh() when it sees pending work. Bounds the window during which a lazily-loaded library is unresolvable by the stack walker — without the refresher the gap would be the entire chunk-flush interval (≥30 s). The thread runs only between Profiler::start and stop; wake-up on stop is via SIGIO/pthread_kill (same pattern as the wallclock thread) so shutdown latency is bounded by syscall interruption, not the tick interval. The SONAME "libgcc_s.so.1" is hardcoded by necessity: the release build links with -static-libgcc, so referencing _Unwind_Backtrace would not materialize libgcc_s.so as a NEEDED dependency — only dlopen by SONAME can map the shared object. libgcc_s.so.1 has been the stable SONAME since 2002; a bump would constitute a C++ ABI break. Libraries::refresh() encapsulates updateSymbols + patch_sigaction + installHooks + (optional) updateBuildIds, and clears the dirty flag on entry so concurrent dlopen_hook markDirty calls during refresh re-arm us for the next tick. remote_symbolication state moves into Libraries via setRemoteSymbolication() so the refresh path is self-contained and dlopen_hook doesn't need to reach into Profiler. Also adds Mutex::tryLock() (wraps pthread_mutex_trylock, which is on the POSIX async-signal-safe list) as a primitive available for future deferred-path work. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/libraries.cpp | 77 +++++++++++++++++++++++++++ ddprof-lib/src/main/cpp/libraries.h | 45 +++++++++++++++- ddprof-lib/src/main/cpp/mutex.h | 2 + ddprof-lib/src/main/cpp/profiler.cpp | 70 ++++++++++++++++++++---- ddprof-lib/src/main/cpp/profiler.h | 1 + ddprof-lib/src/main/cpp/wallClock.cpp | 13 +++-- 6 files changed, 193 insertions(+), 15 deletions(-) diff --git a/ddprof-lib/src/main/cpp/libraries.cpp b/ddprof-lib/src/main/cpp/libraries.cpp index 15863fcd1..321c50471 100644 --- a/ddprof-lib/src/main/cpp/libraries.cpp +++ b/ddprof-lib/src/main/cpp/libraries.cpp @@ -5,10 +5,17 @@ #include "libraries.h" #include "libraryPatcher.h" #include "log.h" +#include "mallocTracer.h" +#include "os.h" #include "symbols.h" #include "symbols_linux.h" #include "vmEntry.h" +// Cadence for the background refresher thread. Bounds the window during +// which a library lazily loaded from signal context (and therefore unable +// to call refresh() synchronously) is unresolvable by the stack walker. +static constexpr u64 REFRESH_INTERVAL_NS = 5ULL * 1'000'000'000ULL; + void Libraries::mangle(const char *name, char *buf, size_t size) { char *buf_end = buf + size; strcpy(buf, "_ZN"); @@ -41,6 +48,76 @@ void Libraries::updateSymbols(bool kernel_symbols) { LibraryPatcher::patch_libraries(); } +void Libraries::refresh() { + // Clear the flag before scanning so any concurrent markDirty() between + // now and the end of this call re-arms us for the next tick. All + // downstream operations are idempotent (parseLibraries tracks + // _parsed_inodes, patch_sigaction checks _sigaction_entries, + // installHooks uses monotonic _patched_libs, updateBuildIds tracks + // _build_id_processed), so redundant invocations are cheap. + _dirty.store(false, std::memory_order_release); + updateSymbols(false); + LibraryPatcher::patch_sigaction(); + MallocTracer::installHooks(); + if (_remote_symbolication) { + updateBuildIds(); + } +} + +void *Libraries::refresherLoop(void *arg) { + Libraries *self = static_cast(arg); + + // Publish our TID so sampler thread-list enumerations can skip us. + self->_refresher_tid.store(OS::threadId(), std::memory_order_release); + + // Block profiling signals so this thread is never sampled. Without + // this, SIGPROF can fire on the refresher and run the full stack-walk + // path here — pure overhead, and in debug builds with + // DDPROF_FORCE_STACKWALK_CRASH it inflates the SIGSEGV recovery count + // enough to starve the test work thread and break + // vmStackwalkerCrashRecoveryTest. SIGIO (WAKEUP_SIGNAL) is left + // unmasked because stopRefresher() relies on it to interrupt sleep. + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGPROF); + sigaddset(&mask, SIGVTALRM); + pthread_sigmask(SIG_BLOCK, &mask, nullptr); + + while (self->_refresher_running.load(std::memory_order_acquire)) { + // OS::sleep uses nanosleep, which returns early on EINTR — the wakeup + // signal sent from stopRefresher() will interrupt this immediately. + OS::sleep(REFRESH_INTERVAL_NS); + if (!self->_refresher_running.load(std::memory_order_acquire)) { + break; + } + if (self->_dirty.load(std::memory_order_acquire)) { + self->refresh(); + } + } + return nullptr; +} + +void Libraries::startRefresher() { + if (_refresher_running.exchange(true, std::memory_order_acq_rel)) { + return; // already running + } + if (pthread_create(&_refresher_thread, nullptr, refresherLoop, this) != 0) { + _refresher_running.store(false, std::memory_order_release); + Log::warn("Unable to start Libraries refresher thread"); + } +} + +void Libraries::stopRefresher() { + if (!_refresher_running.exchange(false, std::memory_order_acq_rel)) { + return; // not running + } + pthread_kill(_refresher_thread, WAKEUP_SIGNAL); + pthread_join(_refresher_thread, nullptr); + // Clear the published TID so a later sampler doesn't skip an unrelated + // thread that may have inherited the same TID after we exited. + _refresher_tid.store(-1, std::memory_order_release); +} + // Platform-specific implementation of updateBuildIds() is in libraries_linux.cpp (Linux) // or stub implementation for other platforms diff --git a/ddprof-lib/src/main/cpp/libraries.h b/ddprof-lib/src/main/cpp/libraries.h index e58ceaa02..18b59c963 100644 --- a/ddprof-lib/src/main/cpp/libraries.h +++ b/ddprof-lib/src/main/cpp/libraries.h @@ -1,16 +1,59 @@ #ifndef _LIBRARIES_H #define _LIBRARIES_H +#include +#include + #include "codeCache.h" class Libraries { private: CodeCacheArray _native_libs; CodeCache _runtime_stubs; + bool _remote_symbolication; // set via setRemoteSymbolication() + + // Pending-refresh flag set by dlopen_hook when it cannot call refresh() + // synchronously (signal context). Polled by the refresher thread. + std::atomic _dirty; + + // Background refresher thread: periodically (every REFRESH_INTERVAL_NS) + // checks _dirty and runs refresh() outside signal context, narrowing the + // window during which newly-loaded libraries are unresolvable. + pthread_t _refresher_thread; + std::atomic _refresher_running; + std::atomic _refresher_tid; // captured from OS::threadId() on entry + static void *refresherLoop(void *arg); static void mangle(const char *name, char *buf, size_t size); public: - Libraries() : _native_libs(), _runtime_stubs("runtime stubs") {} + Libraries() : _native_libs(), _runtime_stubs("runtime stubs"), + _remote_symbolication(false), _dirty(false), + _refresher_thread(), _refresher_running(false), + _refresher_tid(-1) {} + + void setRemoteSymbolication(bool enabled) { _remote_symbolication = enabled; } + + // Refresh symbol tables and reinstall hooks/patches for any libraries + // loaded since the last refresh. Idempotent and cheap when no new + // libraries have been loaded (parseLibraries tracks _parsed_inodes). + // Clears the dirty flag. Must be called from non-signal context: + // updateSymbols acquires a Mutex and reads /proc/self/maps. + void refresh(); + + // Async-signal-safe: just sets a flag. The refresher thread will pick + // up the change on its next tick. + void markDirty() { _dirty.store(true, std::memory_order_release); } + + // Start/stop the background refresher thread. Called from + // Profiler::start/stop. + void startRefresher(); + void stopRefresher(); + + // TID of the refresher thread once it has captured its own ID, or -1 if + // the thread is not currently running. Used by sampler thread-list + // enumeration to skip this profiler-internal thread. + int refresherTid() const { return _refresher_tid.load(std::memory_order_acquire); } + void updateSymbols(bool kernel_symbols); void updateBuildIds(); // Extract build-ids for all loaded libraries const void *resolveSymbol(const char *name); diff --git a/ddprof-lib/src/main/cpp/mutex.h b/ddprof-lib/src/main/cpp/mutex.h index 7d017536d..671e8b8fc 100644 --- a/ddprof-lib/src/main/cpp/mutex.h +++ b/ddprof-lib/src/main/cpp/mutex.h @@ -19,6 +19,8 @@ class Mutex { void lock(); void unlock(); + // async-signal-safe: pthread_mutex_trylock is on the POSIX AS-safe list + bool tryLock() { return pthread_mutex_trylock(&_mutex) == 0; } }; class WaitableMutex : public Mutex { diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 8a8195d9a..3d768d1c1 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -11,7 +11,6 @@ #include "context.h" #include "context_api.h" #include "guards.h" -#include "signalSafety.h" #include "common.h" #include "counters.h" #include "ctimer.h" @@ -749,20 +748,50 @@ void Profiler::writeHeapUsage(long value, bool live) { _locks[lock_index].unlock(); } +void Profiler::prewarmUnwinder() { +#ifdef __linux__ + // J9 on aarch64 (and other JVMs) lazily loads libgcc_s.so.1 from its DWARF + // unwinder during stack walks. When that happens inside a signal handler + // frame, our dlopen_hook fires from signal context and tries to refresh the + // library list — Mutex::lock and malloc on a signal stack. By forcing the + // load here, before any signal handler is installed, subsequent calls find + // libgcc_s already mapped and the lazy-load path never runs. + // + // The handle is intentionally leaked: keeping the refcount > 0 prevents the + // library from being unmapped for the remainder of the process lifetime. + // + // SONAME note: "libgcc_s.so.1" is hardcoded deliberately. Referencing + // _Unwind_Backtrace from C++ would normally let the linker resolve the + // SONAME implicitly, but our release build uses -static-libgcc + // (ConfigurationPresets.kt: "-static-libgcc"), which embeds the unwinder + // into libjavaProfiler.so and removes libgcc_s.so from our NEEDED entries + // — so a symbol reference would not trigger the shared-library load. + // dlopen by SONAME is the only mechanism that works under static-libgcc. + // libgcc_s.so.1 has been the stable SONAME since 2002; a bump would + // constitute a glibc/GCC C++ ABI break and is treated as a fixed contract. + (void)dlopen("libgcc_s.so.1", RTLD_LAZY | RTLD_GLOBAL); +#endif +} + void *Profiler::dlopen_hook(const char *filename, int flags) { void *result = dlopen(filename, flags); if (result != NULL) { - // Static function of Profiler -> can not use the instance variable _libs - // Since Libraries is a singleton, this does not matter - Libraries::instance()->updateSymbols(false); - // Patch sigaction in newly loaded libraries - LibraryPatcher::patch_sigaction(); - MallocTracer::installHooks(); - // Extract build-ids for newly loaded libraries if remote symbolication is enabled - Profiler* profiler = instance(); - if (profiler != nullptr && profiler->_remote_symbolication) { - Libraries::instance()->updateBuildIds(); + if (!isInSignalContext()) { + // Normal (non-signal) dlopen: refresh synchronously so symbols in + // the new library are resolved immediately. + Libraries::instance()->refresh(); + } else { + // Async-signal context. Profiler::prewarmUnwinder closes the known + // libgcc_s case, but JVM/JIT lazy loads have been observed from + // signal handlers under Graal on aarch64. refresh() must NOT run + // here — parseLibraries acquires a Mutex and calls malloc, both + // AS-unsafe. Mark the library set dirty; the Libraries refresher + // thread picks it up within REFRESH_INTERVAL_NS (5s), bounding the + // window during which the new library is unresolvable. flushJfr / + // dump / stop also call refresh() unconditionally, so this works + // even before the refresher thread starts. + Libraries::instance()->markDirty(); } } @@ -1140,6 +1169,10 @@ Error Profiler::start(Arguments &args, bool reset) { return Error("Profiler already started"); } + // Force libgcc_s to load now (idempotent dlopen) so the JVM's DWARF + // unwinder cannot lazy-load it later from signal context. + prewarmUnwinder(); + Error error = checkJvmCapabilities(); if (error) { return error; @@ -1147,6 +1180,7 @@ Error Profiler::start(Arguments &args, bool reset) { _omit_stacktraces = args._lightweight; _remote_symbolication = args._remote_symbolication; + _libs->setRemoteSymbolication(_remote_symbolication); _event_mask = ((args._event != NULL && strcmp(args._event, EVENT_NOOP) != 0) ? EM_CPU : 0) | @@ -1282,6 +1316,11 @@ Error Profiler::start(Arguments &args, bool reset) { // Always enable library trap to catch wasmtime loading and patch its broken sigaction switchLibraryTrap(true); + // Refresher must be running before the trap fires: dlopen_hook's + // signal-context branch only marks dirty and relies on the refresher + // to call refresh() within ~5s. + _libs->startRefresher(); + JfrMetadata::reset(); JfrMetadata::initialize(args._context_attributes); _num_context_attributes = args._context_attributes.size(); @@ -1292,6 +1331,7 @@ Error Profiler::start(Arguments &args, bool reset) { if (error) { disableEngines(); switchLibraryTrap(false); + _libs->stopRefresher(); return error; } @@ -1334,6 +1374,7 @@ Error Profiler::start(Arguments &args, bool reset) { // nativemem is the only requested mode: propagate the real error disableEngines(); switchLibraryTrap(false); + _libs->stopRefresher(); lockAll(); _jfr.stop(); unlockAll(); @@ -1363,6 +1404,7 @@ Error Profiler::start(Arguments &args, bool reset) { // no engine was activated; perform cleanup disableEngines(); switchLibraryTrap(false); + _libs->stopRefresher(); lockAll(); _jfr.stop(); @@ -1390,7 +1432,11 @@ Error Profiler::stop() { _cpu_engine->stop(); switchLibraryTrap(false); + // Stop the refresher before the final refresh() so it doesn't race the + // teardown. startRefresher/stopRefresher are idempotent. + _libs->stopRefresher(); switchThreadEvents(JVMTI_DISABLE); + Libraries::instance()->refresh(); updateJavaThreadNames(); updateNativeThreadNames(); @@ -1493,6 +1539,7 @@ Error Profiler::flushJfr() { return Error("Profiler is not active"); } + Libraries::instance()->refresh(); updateJavaThreadNames(); updateNativeThreadNames(); @@ -1515,6 +1562,7 @@ Error Profiler::dump(const char *path, const int length) { // by the live objects LivenessTracker::instance()->flush(thread_ids); + Libraries::instance()->refresh(); updateJavaThreadNames(); updateNativeThreadNames(); diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index d2bf1065d..62b0b7749 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -122,6 +122,7 @@ class alignas(alignof(SpinLock)) Profiler { void **_dlopen_entry; static void *dlopen_hook(const char *filename, int flags); void switchLibraryTrap(bool enable); + static void prewarmUnwinder(); void enableEngines(); void disableEngines(); diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 959382656..ce655f5b1 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -178,14 +178,18 @@ void WallClockASGCT::timerLoop() { auto collectThreads = [&](std::vector& tids) { // Get thread IDs from the filter if it's enabled // Otherwise list all threads in the system + const int refresher_tid = Libraries::instance()->refresherTid(); if (Profiler::instance()->threadFilter()->enabled()) { Profiler::instance()->threadFilter()->collect(tids); } else { ThreadList *thread_list = OS::listThreads(); while (thread_list->hasNext()) { int tid = thread_list->next(); - // Don't include the current thread - if (tid != OS::threadId()) { + // Don't include the current thread, nor the Libraries refresher + // thread (profiler-internal — masking SIGVTALRM there is not + // enough; we also want to avoid the kill() round-trip and any + // pending-signal accumulation). + if (tid != OS::threadId() && tid != refresher_tid) { tids.push_back(tid); } } @@ -285,13 +289,16 @@ void WallClockJvmti::initialize(Arguments &args) { void WallClockJvmti::timerLoop() { auto collectThreads = [&](std::vector &tids) { + const int refresher_tid = Libraries::instance()->refresherTid(); if (Profiler::instance()->threadFilter()->enabled()) { Profiler::instance()->threadFilter()->collect(tids); } else { ThreadList *thread_list = OS::listThreads(); while (thread_list->hasNext()) { int tid = thread_list->next(); - if (tid != OS::threadId()) { + // Exclude the wallclock timer thread itself and the Libraries + // refresher (profiler-internal). + if (tid != OS::threadId() && tid != refresher_tid) { tids.push_back(tid); } } From 9103d9c3da870fdf3bc1388e35301a8ea32cb11a Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 14:05:07 +0200 Subject: [PATCH 3/9] chore(deps): cap JUnit at 5.9.x to preserve Java 8/11 CI compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JUnit 5.10+ dropped Java 8 support; 5.12+ dropped Java 11; 6.x requires Java 17. The Java 8 and 11 CI targets (both HotSpot and J9) run the Gradle test worker on the test JDK itself — the profiler attaches to its own process — so JUnit Platform classes must be loadable on Java 8/11. Revert libs.versions.toml to junit 5.9.2 / junit-platform 1.9.2 (the last known-working pair). Add Dependabot ignore rules to prevent automated bumps past 5.9.x / 1.9.x. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .github/dependabot.yml | 8 ++++++++ gradle/libs.versions.toml | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index fc7cecae8..799e64238 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,6 +12,14 @@ updates: - "no-release-notes" - "no-review" ignore: + # JUnit 5.10+ dropped Java 8 support; 5.12+ dropped Java 11; 6.x requires Java 17. + # CI targets on Java 8 and 11 (HotSpot and J9) run the Gradle test worker on the + # test JDK itself (the profiler attaches to its own process), so the JUnit Platform + # classes must be loadable by Java 8/11. Keep the entire JUnit stack at 5.9.x / 1.9.x. + - dependency-name: "org.junit.jupiter:*" + versions: [">=5.10.0"] + - dependency-name: "org.junit.platform:*" + versions: [">=1.10.0"] - dependency-name: "org.junit-pioneer:junit-pioneer" versions: [">=2.0.0"] groups: diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index f6fc91356..5d8380722 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -6,8 +6,8 @@ kotlin = "1.9.22" spotless = "8.5.1" # Testing -junit = "6.1.0" -junit-platform = "6.1.0" # JUnit Platform version corresponding to JUnit Jupiter 5.9.2 +junit = "5.9.2" +junit-platform = "1.9.2" # JUnit Platform version corresponding to JUnit Jupiter 5.9.2 junit-pioneer = "1.9.1" slf4j = "2.0.18" From 7eb05224b0d6eca148ae374630d43cacfda736d3 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 19:00:58 +0200 Subject: [PATCH 4/9] build: share library compile across gtest binaries Each gtest test file was being compiled alongside all 59 library sources, so the per-config build cost was O(n_tests * n_sources). On slow CI runners this turned a debug gtest run into a multi-minute rebuild even when only one test file changed. Register a single compileGtestLibrary task that compiles the library sources once per build configuration. Each test's compile task now compiles only its own .cpp; each link task pulls in both its own objects and the shared library objects via the new GtestTaskBuilder.withSharedLibObjects() hook. Also adds a buildGtest aggregation task that runs compile + link without execution, for CI flows that invoke binaries directly. Ported from PR #539 (cherry-picked the build-cache portion only; sanitizer-runtime fixes from that PR are out of scope here). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/datadoghq/native/gtest/GtestPlugin.kt | 41 ++++++++++++++++++- .../native/gtest/GtestTaskBuilder.kt | 38 ++++++++++++++--- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestPlugin.kt b/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestPlugin.kt index e7d593630..793ff75b1 100644 --- a/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestPlugin.kt +++ b/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestPlugin.kt @@ -188,12 +188,46 @@ class GtestPlugin : Plugin { val compiler = findCompiler(project) val includeFiles = extension.includes.plus(project.files(getGtestIncludes(extension))) - // Create per-config aggregation task + // Create per-config aggregation task (compile + link + run) val gtestConfigTask = project.tasks.register("gtest${config.capitalizedName()}") { group = "verification" description = "Run all Google Tests for the ${config.name} build of the library" } + // Per-config build-only aggregation task (compile + link, no run). + // Useful in CI where binaries are executed directly without going + // through Gradle's logging infrastructure. + val buildGtestConfigTask = project.tasks.register("buildGtest${config.capitalizedName()}") { + group = "build" + description = "Compile and link all Google Tests for the ${config.name} build (no run)" + } + + // Compile all library sources ONCE for this config. Each test + // binary only compiles its own test file and links against these + // shared objects, reducing compilations from O(n_tests × n_sources) + // to O(n_sources + n_tests). + val sharedBuilder = GtestTaskBuilder(project, extension, config) + .withCompiler(compiler) + .withIncludes(includeFiles) + .onlyIfGtest(hasGtest) + val sharedCompilerArgs = sharedBuilder.sharedCompilerArgs() + val sharedLibCompileTask = project.tasks.register( + "compileGtestLibrary${config.capitalizedName()}", + com.datadoghq.native.tasks.NativeCompileTask::class.java + ) { + onlyIf { hasGtest && !sharedBuilder.skipConditions() } + group = "build" + description = "Compile shared library sources for ${config.name} gtest binaries" + + this.compiler.set(compiler) + this.compilerArgs.set(sharedCompilerArgs) + sources.from(project.fileTree(extension.mainSourceDir.get()) { include("**/*.cpp") }) + includes.from(includeFiles) + objectFileDir.set(project.file( + "${project.layout.buildDirectory.get()}/obj/gtest/${config.name}/lib" + )) + } + // Discover and create tasks for each test file using builder val testDir = extension.testSourceDir.get().asFile if (!testDir.exists()) { @@ -206,11 +240,16 @@ class GtestPlugin : Plugin { .forTest(testFile) .withCompiler(compiler) .withIncludes(includeFiles) + .withSharedLibObjects(sharedLibCompileTask) .onlyIfGtest(hasGtest) .build() gtestConfigTask.configure { dependsOn(executeTask) } gtestAll.configure { dependsOn(executeTask) } + // buildGtest depends on the link task, not the run task + buildGtestConfigTask.configure { + dependsOn("linkGtest${config.capitalizedName()}_${testFile.nameWithoutExtension}") + } } } diff --git a/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestTaskBuilder.kt b/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestTaskBuilder.kt index cc4e80770..a56de4ae7 100644 --- a/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestTaskBuilder.kt +++ b/build-logic/conventions/src/main/kotlin/com/datadoghq/native/gtest/GtestTaskBuilder.kt @@ -37,6 +37,7 @@ class GtestTaskBuilder( private lateinit var compiler: String private lateinit var includeFiles: FileCollection private var hasGtest: Boolean = true + private var sharedLibCompileTask: TaskProvider? = null private val configName: String get() = config.capitalizedName() @@ -73,6 +74,23 @@ class GtestTaskBuilder( return this } + /** + * Provide the shared library compile task whose objects are linked into + * every test binary. Allows the library sources to be compiled once + * instead of once per test file. + */ + fun withSharedLibObjects(task: TaskProvider): GtestTaskBuilder { + sharedLibCompileTask = task + return this + } + + /** + * Returns the compiler args used for compiling library and test sources. + * Exposed so GtestPlugin can configure the shared library compile task + * with identical flags without duplicating the adjustment logic. + */ + fun sharedCompilerArgs(): List = adjustCompilerArgs() + /** * Build all tasks (compile, link, execute) and return the execute task provider. */ @@ -94,10 +112,16 @@ class GtestTaskBuilder( this.compiler.set(this@GtestTaskBuilder.compiler) this.compilerArgs.set(compilerArgs) - sources.from( - project.fileTree(extension.mainSourceDir.get()) { include("**/*.cpp") }, - testFile - ) + // When a shared library compile task is provided, library sources + // are compiled once there. Only compile the test file itself here. + if (sharedLibCompileTask != null) { + sources.from(testFile) + } else { + sources.from( + project.fileTree(extension.mainSourceDir.get()) { include("**/*.cpp") }, + testFile + ) + } includes.from(includeFiles) objectFileDir.set(objDir) } @@ -117,6 +141,10 @@ class GtestTaskBuilder( linker.set(compiler) this.linkerArgs.set(linkerArgs) objectFiles.from(project.fileTree(objDir) { include("*.o") }) + sharedLibCompileTask?.let { sharedTask -> + dependsOn(sharedTask) + objectFiles.from(sharedTask.map { it.objectFileDir.get().asFileTree.matching { include("*.o") } }) + } outputFile.set(binary) // Add gtest library paths @@ -171,7 +199,7 @@ class GtestTaskBuilder( } } - private fun skipConditions(): Boolean { + fun skipConditions(): Boolean { return project.hasProperty("skip-tests") || project.hasProperty("skip-native") || project.hasProperty("skip-gtest") From 683ac265128b4df9f42e86c73b3ea58d3a24a45c Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 20:48:21 +0200 Subject: [PATCH 5/9] build: add GraalVM JDK support to docker test runner The local docker test runner only handled Temurin/Bellsoft/Semeru JDKs, so the 17-graal / 21-graal / 25-graal variants used by CI could not be reproduced locally. CI installs Graal via SDKMAN, but adding SDKMAN to the docker image is heavier than needed. Add get_graal_jdk_url() pointing at the Oracle GraalVM archive URLs and route --jdk=*-graal through it, mirroring the J9/Semeru variant. Versions match the CI's JAVA__GRAAL_VERSION values so docker reproduction is faithful to what CI runs. Co-Authored-By: Claude Opus 4.7 (1M context) --- utils/run-docker-tests.sh | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/utils/run-docker-tests.sh b/utils/run-docker-tests.sh index fb1107385..a29f95192 100755 --- a/utils/run-docker-tests.sh +++ b/utils/run-docker-tests.sh @@ -8,7 +8,7 @@ # # Usage: ./utils/run-docker-tests.sh [options] # --libc=glibc|musl (default: glibc) -# --jdk=8|11|17|21|25|8-j9|11-j9|17-j9|21-j9 (default: 21) +# --jdk=8|11|17|21|25|8-j9|11-j9|17-j9|21-j9|17-graal|21-graal|25-graal (default: 21) # --arch=x64|aarch64 (default: auto-detect) # --config=debug|release|asan|tsan (default: debug) # --tests="TestPattern" (optional, specific test to run) @@ -94,6 +94,25 @@ get_glibc_jdk_url() { esac } +# JDK Download URLs (Oracle GraalVM) +# Versions match the SDKMAN-installed builds used in CI (java_setup.sh + +# cache_java.yml: JAVA__GRAAL_VERSION). Using Oracle's stable archive +# URLs (not "latest") so the docker images match the CI configuration. +get_graal_jdk_url() { + local version=$1 + local arch=$2 + # Oracle GraalVM archive arch labels: x64 / aarch64 (same as our $arch) + case "$version-$arch" in + 17-x64) echo "https://download.oracle.com/graalvm/17/archive/graalvm-jdk-17.0.12_linux-x64_bin.tar.gz" ;; + 17-aarch64) echo "https://download.oracle.com/graalvm/17/archive/graalvm-jdk-17.0.12_linux-aarch64_bin.tar.gz" ;; + 21-x64) echo "https://download.oracle.com/graalvm/21/archive/graalvm-jdk-21.0.4_linux-x64_bin.tar.gz" ;; + 21-aarch64) echo "https://download.oracle.com/graalvm/21/archive/graalvm-jdk-21.0.4_linux-aarch64_bin.tar.gz" ;; + 25-x64) echo "https://download.oracle.com/graalvm/25/archive/graalvm-jdk-25_linux-x64_bin.tar.gz" ;; + 25-aarch64) echo "https://download.oracle.com/graalvm/25/archive/graalvm-jdk-25_linux-aarch64_bin.tar.gz" ;; + *) echo "" ;; + esac +} + # JDK Download URLs (IBM Semeru OpenJ9) get_j9_jdk_url() { local version=$1 @@ -205,6 +224,12 @@ if [[ "$JDK_VARIANT" == "j9" ]]; then exit 1 fi JDK_URL=$(get_j9_jdk_url "$JDK_BASE_VERSION" "$ARCH") +elif [[ "$JDK_VARIANT" == "graal" ]]; then + if [[ "$LIBC" == "musl" ]]; then + echo "Error: GraalVM is not available for musl libc" + exit 1 + fi + JDK_URL=$(get_graal_jdk_url "$JDK_BASE_VERSION" "$ARCH") elif [[ "$LIBC" == "musl" ]]; then JDK_URL=$(get_musl_jdk_url "$JDK_BASE_VERSION" "$ARCH") else @@ -212,7 +237,7 @@ else fi if [[ -z "$JDK_URL" ]]; then - echo "Error: --jdk must be one of: 8, 11, 17, 21, 25, 8-j9, 11-j9, 17-j9, 21-j9" + echo "Error: --jdk must be one of: 8, 11, 17, 21, 25, 8-j9, 11-j9, 17-j9, 21-j9, 17-graal, 21-graal, 25-graal" exit 1 fi From e3466e9f283b56b6d5e3b9409e2b753a1ada0049 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 22:19:54 +0200 Subject: [PATCH 6/9] fix(profiler): use initial-exec TLS model for signal-depth counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The always-on _in_signal_handler_depth TLS variable, accessed first from inside our SIGPROF/SIGVTALRM handlers via SIGNAL_HANDLER_GUARD(), was declared with the default global-dynamic TLS model. On first access in a given thread glibc lazily allocates the dtv slot via malloc() and takes the heap lock — both async-signal-unsafe. Reproduced deterministically on Graal aarch64 (glibc 17-graal debug) running ClassGCTest: SIGPROF arrived on the VM Thread while Graal's JVMCI compiler held the malloc heap lock through c2v_notifyCompilerPhaseEvent. Stack: #2 __libc_malloc -- waiting on heap lock #4 allocate_dtv_entry #7 _dl_tlsdesc_dynamic #8 TLS wrapper for _in_signal_handler_depth #9 SignalHandlerScope::SignalHandlerScope #10 CTimer::signalHandler The heap holder is itself blocked at a safepoint waiting for VM Thread to check in, and VM Thread is stuck in malloc -> full process deadlock. Switch the variable to the initial-exec TLS model so the loader allocates its slot from the static TLS surplus at libjavaProfiler.so load time. Every existing thread is fixed up at dlopen and every new thread receives the slot at pthread_create. Access is then a register-relative load — async-signal-safe, lock-free, malloc-free. Also narrow the type to uint8_t (realistic max depth ~3) to make the intent explicit; alignment-wise this is the same slot. Refresher tick reduced from 5 s to 500 ms so a library lazily loaded from signal context becomes resolvable by the stack walker within half a second. Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/guards.cpp | 2 +- ddprof-lib/src/main/cpp/guards.h | 28 +++++++++++++++++++-------- ddprof-lib/src/main/cpp/libraries.cpp | 4 +++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/ddprof-lib/src/main/cpp/guards.cpp b/ddprof-lib/src/main/cpp/guards.cpp index eaab4fe75..889ed2a5e 100644 --- a/ddprof-lib/src/main/cpp/guards.cpp +++ b/ddprof-lib/src/main/cpp/guards.cpp @@ -20,7 +20,7 @@ #include "thread.h" // Always-on signal-context depth counter; see comments in guards.h. -thread_local int _in_signal_handler_depth = 0; +thread_local uint8_t _in_signal_handler_depth _PROFILER_TLS_IE = 0; // Static bitmap storage for fallback cases uint64_t CriticalSection::_fallback_bitmap[CriticalSection::FALLBACK_BITMAP_WORDS] = {}; diff --git a/ddprof-lib/src/main/cpp/guards.h b/ddprof-lib/src/main/cpp/guards.h index 17ef97f98..64346e173 100644 --- a/ddprof-lib/src/main/cpp/guards.h +++ b/ddprof-lib/src/main/cpp/guards.h @@ -28,19 +28,31 @@ class ProfiledThread; // Signal-context depth tracking — always on. // // Code paths that must choose between an async-signal-safe deferred path and -// a synchronous path (e.g. Profiler::dlopen_hook, which has to defer when a -// library is lazily loaded from signal context) query isInSignalContext(). -// The debug-only DEBUG_ASSERT_NOT_IN_SIGNAL() macro in signalSafety.h -// asserts on the same counter. +// a synchronous path (e.g. Profiler::dlopen_hook, which defers refresh() when +// a library is loaded from signal context) query isInSignalContext(). The +// debug-only DEBUG_ASSERT_NOT_IN_SIGNAL() macro in signalSafety.h asserts on +// the same counter. // -// Cost in release: ~2 thread-local writes per signal handler invocation -// (negligible). Pre-warming libgcc_s in Profiler::start() closes the -// known case; this counter is the safety net for the remainder. +// TLS model: initial-exec. Default (global-dynamic) lazily allocates the +// dtv slot via malloc on first access — and the *first* access happens +// inside our signal handler (SIGNAL_HANDLER_GUARD). That malloc takes the +// heap lock and deadlocks when the holder is paused for a safepoint +// (observed on Graal aarch64 / libjvmcicompiler.so). initial-exec instructs +// the loader to allocate the slot from the static TLS surplus at library +// load — every existing thread is fixed up at dlopen and every new thread +// gets the slot at pthread_create. Access is a register-relative load with +// no malloc, lock, or syscall, so it is async-signal-safe by construction. +// +// uint8_t is sufficient: realistic max depth is 3 (e.g. SIGSEGV nested in +// SIGPROF nested in something else); using a byte over an int makes the +// "tiny counter, never grows" intent explicit. Slot alignment is the same. // --------------------------------------------------------------------------- +#define _PROFILER_TLS_IE __attribute__((tls_model("initial-exec"))) + // Counter so nested signals (e.g. SIGSEGV inside SIGPROF) keep the flag // asserted until the outermost handler returns. -extern thread_local int _in_signal_handler_depth; +extern thread_local uint8_t _in_signal_handler_depth _PROFILER_TLS_IE; inline int getInSignalDepth() { return _in_signal_handler_depth; } inline bool isInSignalContext() { return _in_signal_handler_depth != 0; } diff --git a/ddprof-lib/src/main/cpp/libraries.cpp b/ddprof-lib/src/main/cpp/libraries.cpp index 321c50471..82a71e309 100644 --- a/ddprof-lib/src/main/cpp/libraries.cpp +++ b/ddprof-lib/src/main/cpp/libraries.cpp @@ -14,7 +14,9 @@ // Cadence for the background refresher thread. Bounds the window during // which a library lazily loaded from signal context (and therefore unable // to call refresh() synchronously) is unresolvable by the stack walker. -static constexpr u64 REFRESH_INTERVAL_NS = 5ULL * 1'000'000'000ULL; +// 500 ms is short enough that frame resolution gaps are barely observable +// in typical sampling, and the refresher only wakes once per tick (cheap). +static constexpr u64 REFRESH_INTERVAL_NS = 500ULL * 1'000'000ULL; void Libraries::mangle(const char *name, char *buf, size_t size) { char *buf_end = buf + size; From f77ba1a691e806fdef96d21b09de260c135551f9 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 22:35:52 +0200 Subject: [PATCH 7/9] fix(profiler): store signal-handler depth in ProfiledThread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous thread_local + initial-exec TLS approach failed two ways: - default global-dynamic TLS lazily allocates the dtv slot via malloc on first access, deadlocking when the heap lock holder waits for a safepoint (observed on Graal aarch64 in ClassGCTest) - initial-exec fixes the malloc but consumes static TLS surplus. On Graal that surplus is already exhausted by libjvmcicompiler.so, so libjavaProfiler.so fails to dlopen with "cannot allocate memory in static TLS block" Move the counter into ProfiledThread::_signal_depth, which is reached via pthread_getspecific — POSIX-guaranteed AS-safe and never allocates. Once a ProfiledThread exists for the calling thread, depth tracking is a plain byte read/write on the owning thread (signal handlers run on the interrupted thread, so no cross-thread synchronization is needed). Treat ProfiledThread == null as "assume in signal" for production gates (Profiler::dlopen_hook now defers refresh() conservatively when running on uninstrumented JVM threads — VM Thread, JIT, GC). Same threads were the deadlock source under the previous design. DEBUG_ASSERT_NOT_IN_SIGNAL skips its check when ProfiledThread is null so AS-unsafe-but-legitimate code on uninstrumented threads doesn't trip a false-positive abort. Refresher tick stays at 500 ms (set in previous commit). Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/guards.cpp | 52 +++++++++++++++++- ddprof-lib/src/main/cpp/guards.h | 59 +++++++++++---------- ddprof-lib/src/main/cpp/signalSafety.h | 16 ++++-- ddprof-lib/src/main/cpp/thread.h | 10 ++++ ddprof-lib/src/test/cpp/signalSafety_ut.cpp | 17 ++++++ 5 files changed, 118 insertions(+), 36 deletions(-) diff --git a/ddprof-lib/src/main/cpp/guards.cpp b/ddprof-lib/src/main/cpp/guards.cpp index 889ed2a5e..f771f66a1 100644 --- a/ddprof-lib/src/main/cpp/guards.cpp +++ b/ddprof-lib/src/main/cpp/guards.cpp @@ -19,8 +19,56 @@ #include "os.h" #include "thread.h" -// Always-on signal-context depth counter; see comments in guards.h. -thread_local uint8_t _in_signal_handler_depth _PROFILER_TLS_IE = 0; +// Signal-context tracking — backed by ProfiledThread::_signal_depth; see +// the comment block in guards.h for the rationale (initial-exec TLS was +// rejected because of the static TLS surplus on Graal). + +int getInSignalDepth() { + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + return pt != nullptr ? static_cast(pt->signalDepth()) : 0; +} + +bool isInSignalContext() { + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + // null ProfiledThread = no thread context yet; treat as "assume signal" + // so AS-safety gates default to the conservative deferred path. + return pt == nullptr || pt->signalDepth() != 0; +} + +SignalHandlerScope::SignalHandlerScope() : _active(true) { + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + if (pt != nullptr) { + pt->enterSignalScope(); + } else { + // No thread context: nothing to update; mark inactive so destructor + // and release() are no-ops. + _active = false; + } +} + +SignalHandlerScope::~SignalHandlerScope() { + if (!_active) return; + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + if (pt != nullptr) { + pt->exitSignalScope(); + } +} + +void SignalHandlerScope::release() { + if (!_active) return; + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + if (pt != nullptr) { + pt->exitSignalScope(); + } + _active = false; +} + +void signalHandlerUnwindAfterLongjmp() { + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + if (pt != nullptr) { + pt->exitSignalScope(); + } +} // Static bitmap storage for fallback cases uint64_t CriticalSection::_fallback_bitmap[CriticalSection::FALLBACK_BITMAP_WORDS] = {}; diff --git a/ddprof-lib/src/main/cpp/guards.h b/ddprof-lib/src/main/cpp/guards.h index 64346e173..7820353b6 100644 --- a/ddprof-lib/src/main/cpp/guards.h +++ b/ddprof-lib/src/main/cpp/guards.h @@ -25,7 +25,7 @@ class ProfiledThread; // --------------------------------------------------------------------------- -// Signal-context depth tracking — always on. +// Signal-context depth tracking. // // Code paths that must choose between an async-signal-safe deferred path and // a synchronous path (e.g. Profiler::dlopen_hook, which defers refresh() when @@ -33,40 +33,44 @@ class ProfiledThread; // debug-only DEBUG_ASSERT_NOT_IN_SIGNAL() macro in signalSafety.h asserts on // the same counter. // -// TLS model: initial-exec. Default (global-dynamic) lazily allocates the -// dtv slot via malloc on first access — and the *first* access happens -// inside our signal handler (SIGNAL_HANDLER_GUARD). That malloc takes the -// heap lock and deadlocks when the holder is paused for a safepoint -// (observed on Graal aarch64 / libjvmcicompiler.so). initial-exec instructs -// the loader to allocate the slot from the static TLS surplus at library -// load — every existing thread is fixed up at dlopen and every new thread -// gets the slot at pthread_create. Access is a register-relative load with -// no malloc, lock, or syscall, so it is async-signal-safe by construction. +// Storage: the depth lives in ProfiledThread::_signal_depth. An earlier +// design used a thread_local int, but on Graal aarch64 the lazy DTV slot +// allocation triggered malloc inside our signal handler and deadlocked +// against the JVMCI compiler holding the heap lock. initial-exec fixed the +// malloc but tripped the static TLS surplus and broke dlopen on Graal. +// ProfiledThread is already AS-safe-accessible via pthread_getspecific +// (POSIX guarantees it does not allocate; returns nullptr when unset). // -// uint8_t is sufficient: realistic max depth is 3 (e.g. SIGSEGV nested in -// SIGPROF nested in something else); using a byte over an int makes the -// "tiny counter, never grows" intent explicit. Slot alignment is the same. +// When ProfiledThread is null on a thread, we don't yet have a thread +// context — uninstrumented JVM-internal threads (VM Thread, JIT, GC) fall +// into this bucket too, and they can receive signals. isInSignalContext() +// treats null as "assume signal" so dlopen_hook defers conservatively +// rather than risking a synchronous refresh from a signal frame. +// DEBUG_ASSERT_NOT_IN_SIGNAL skips the check when null so well-behaved +// non-signal code on uninstrumented threads doesn't trip a false abort. // --------------------------------------------------------------------------- -#define _PROFILER_TLS_IE __attribute__((tls_model("initial-exec"))) +// Returns the signal-handler depth for the calling thread, or 0 if the +// thread has no ProfiledThread yet. Use isInSignalContext() instead when +// you only need a boolean — it handles the "no thread context" case +// conservatively (returns true). +int getInSignalDepth(); -// Counter so nested signals (e.g. SIGSEGV inside SIGPROF) keep the flag -// asserted until the outermost handler returns. -extern thread_local uint8_t _in_signal_handler_depth _PROFILER_TLS_IE; - -inline int getInSignalDepth() { return _in_signal_handler_depth; } -inline bool isInSignalContext() { return _in_signal_handler_depth != 0; } +// True when the calling thread is either inside an installed signal +// handler (depth > 0) or has no thread context yet (treat as "unknown, +// assume signal" — fail-safe for AS-safety gating). +bool isInSignalContext(); // Internal RAII type — do not instantiate directly; use the macros below. class SignalHandlerScope { public: - SignalHandlerScope() { ++_in_signal_handler_depth; } - ~SignalHandlerScope() { if (_active) --_in_signal_handler_depth; } - void release() { if (_active) { --_in_signal_handler_depth; _active = false; } } + SignalHandlerScope(); + ~SignalHandlerScope(); + void release(); SignalHandlerScope(const SignalHandlerScope&) = delete; SignalHandlerScope& operator=(const SignalHandlerScope&) = delete; private: - bool _active = true; + bool _active; }; // Declare a scope guard local that increments the depth on entry and @@ -84,11 +88,8 @@ class SignalHandlerScope { // Call at the setjmp landing point AFTER a known longjmp originated from // within a signal handler frame (e.g. HotSpot's checkFault → longjmp recovery // in walkVM). -#define SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP() \ - do { \ - if (_in_signal_handler_depth > 0) \ - --_in_signal_handler_depth; \ - } while (0) +void signalHandlerUnwindAfterLongjmp(); +#define SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP() signalHandlerUnwindAfterLongjmp() /** * Race-free critical section using atomic compare-and-swap. diff --git a/ddprof-lib/src/main/cpp/signalSafety.h b/ddprof-lib/src/main/cpp/signalSafety.h index b2b9c47ad..e322fdcbb 100644 --- a/ddprof-lib/src/main/cpp/signalSafety.h +++ b/ddprof-lib/src/main/cpp/signalSafety.h @@ -17,7 +17,8 @@ #ifndef _SIGNAL_SAFETY_H #define _SIGNAL_SAFETY_H -#include "guards.h" // _in_signal_handler_depth, SIGNAL_HANDLER_GUARD, ... +#include "guards.h" // isInSignalContext, SIGNAL_HANDLER_GUARD, ... +#include "thread.h" // ProfiledThread::currentSignalSafe // Detect ASAN using compiler-provided macros so the ASAN_ENABLED guard below // works in every TU that includes this header, independent of include order. @@ -38,9 +39,12 @@ // invoked from inside a signal handler in debug / ASAN builds; compiles to a // no-op in release builds (NDEBUG). // -// The depth counter itself lives in guards.h (always-on, needed by -// Profiler::dlopen_hook for the AS-safe deferred path). Only the diagnostic -// abort is conditional. +// The depth counter itself lives in ProfiledThread::_signal_depth (see +// guards.h for the rationale). The check is skipped when ProfiledThread +// is null — uninstrumented threads (VM Thread, JIT, GC) have no thread +// context, so the assertion has no way to know whether they're really in +// a signal frame. Treating "unknown" as a violation would produce false +// positives every time AS-unsafe code legitimately ran on such a thread. // // write(2) is POSIX async-signal-safe. abort() generates a core dump and // triggers ASAN's stack-trace symbolization, making it far more debuggable @@ -59,7 +63,9 @@ #define DEBUG_ASSERT_NOT_IN_SIGNAL() \ do { \ - if (_in_signal_handler_depth != 0) { \ + ProfiledThread *_pt_for_assert = ProfiledThread::currentSignalSafe(); \ + /* Skip when no thread context — see comment above. */ \ + if (_pt_for_assert != nullptr && _pt_for_assert->signalDepth() != 0) { \ static const char _msg[] = \ "[java-profiler] AS-safety violation at " \ __FILE__ ":" _SIGNAL_SAFETY_TOSTR(__LINE__) \ diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index 4428aab94..e31c18bba 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -55,6 +55,7 @@ class ProfiledThread : public ThreadLocalData { u32 _misc_flags; int _filter_slot_id; // Slot ID for thread filtering uint8_t _init_window; // Countdown for JVM thread init race window (PROF-13072) + uint8_t _signal_depth; // Nested signal-handler depth (see SignalHandlerScope) UnwindFailures _unwind_failures; bool _otel_ctx_initialized; bool _crash_protection_active; @@ -73,6 +74,7 @@ class ProfiledThread : public ThreadLocalData { ProfiledThread(int tid) : ThreadLocalData(), _pc(0), _sp(0), _span_id(0), _crash_depth(0), _tid(tid), _cpu_epoch(0), _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _init_window(0), + _signal_depth(0), _otel_ctx_initialized(false), _crash_protection_active(false), _otel_ctx_record{}, _otel_tag_encodings{}, _otel_local_root_span_id(0) {}; @@ -168,6 +170,14 @@ class ProfiledThread : public ThreadLocalData { return _crash_depth > CRASH_HANDLER_NESTING_LIMIT; } + // Signal-handler depth counter used by SignalHandlerScope (guards.h). All + // access happens on the owning thread (signal handlers are delivered to the + // thread that's interrupted), so plain reads/writes are AS-safe — no locks, + // no malloc, no syscalls. See guards.h for the public API. + inline uint8_t signalDepth() const { return _signal_depth; } + inline void enterSignalScope() { ++_signal_depth; } + inline void exitSignalScope() { if (_signal_depth > 0) --_signal_depth; } + UnwindFailures* unwindFailures(bool reset = true) { if (reset) { _unwind_failures.clear(); diff --git a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp index 33ea8d2d5..ca9c2b881 100644 --- a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp +++ b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp @@ -15,13 +15,23 @@ */ #include "signalSafety.h" +#include "thread.h" #include class SignalSafetyTest : public ::testing::Test { protected: + void SetUp() override { + // SignalHandlerScope reads/writes ProfiledThread::_signal_depth — the + // tests need a thread context to exist on the gtest thread, otherwise + // every scope is a no-op (which is the intended production behavior + // on uninstrumented threads, but not what these unit tests assert). + ProfiledThread::initCurrentThread(); + } + void TearDown() override { // Catch leaked depth from a failed test so subsequent tests start clean. ASSERT_EQ(0, getInSignalDepth()) << "depth not zero after test — check for leaked SignalHandlerScope"; + ProfiledThread::release(); } }; @@ -51,3 +61,10 @@ TEST_F(SignalSafetyTest, NestedDepth) { } EXPECT_EQ(0, getInSignalDepth()); } + +TEST(SignalSafetyTestNoContext, NullProfiledThreadIsTreatedAsSignal) { + // No ProfiledThread on this thread — isInSignalContext() must return + // true (conservative default for AS-safety gates like dlopen_hook). + EXPECT_TRUE(isInSignalContext()); + EXPECT_EQ(0, getInSignalDepth()); +} From 6ef668746c2ca49405f0fd9dfde58b3d16b57d22 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 23:15:59 +0200 Subject: [PATCH 8/9] fix(profiler): address code-review findings on PR #540 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code review surfaced 7 findings — 4 CONFIRMED, 3 PLAUSIBLE. This commit addresses all 7: #8 (CONFIRMED) — split isInSignalContext into a strict variant. dlopen_hook was treating null ProfiledThread as "in signal" and deferring refresh on every dlopen from uninstrumented JVM threads (VM Thread, JIT, GC), delaying wasmtime sigaction patching by up to 500 ms. Add isInTrackedSignalContext() that returns false on null (only true when one of our SignalHandlerScopes is positively on the stack); dlopen_hook now uses it so JVM-internal threads get synchronous refresh again. isInSignalContext() retains its conservative semantics for any future caller that wants AS-safe-by-default. #7 (CONFIRMED) — switchLibraryTrap was called before startRefresher despite the invariant comment. Reorder so the refresher is running before the trap can fire. #9 (CONFIRMED) — DEBUG_ASSERT_NOT_IN_SIGNAL was on the 1-arg and 2-arg Dictionary::lookup overloads but missed the 4-arg form that actually mallocs. bounded_lookup's runtime-decided for_insert path was uncovered. Move the assertion into the 4-arg lookup, gated on for_insert (read-only lookups are AS-safe). #4 (CONFIRMED) — Comments referenced "REFRESH_INTERVAL_NS (5s)" but the actual constant is 500 ms. Fix both stale mentions. #13 (PLAUSIBLE) — SIGNAL_HANDLER_GUARD_RELEASE before chaining leaves depth == 0 inside a chained handler that returns normally; DEBUG_ASSERT_NOT_IN_SIGNAL inside such a handler would not fire. Document the trade-off in segvHandler — the longjmp safety property is more important than the sanitizer coverage gap, which is bounded to third-party signal handler code we don't own. #2 (PLAUSIBLE) — refresherLoop used OS::sleep without an EINTR loop; any unmasked signal (SIGCHLD, SIGURG, RT signals) would cause premature ticks. Wrap the sleep in an explicit elapsed-time loop using OS::nanotime so the refresher ticks at 500 ms regardless of stray signals. #14 (PLAUSIBLE) — refresherLoop published _refresher_tid before blocking SIGPROF/SIGVTALRM; a stale per-thread timer from a previous lifecycle could fire on the refresher during the window. Block signals first, then publish the TID. Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/dictionary.cpp | 11 +++++- ddprof-lib/src/main/cpp/guards.cpp | 8 ++++ ddprof-lib/src/main/cpp/guards.h | 11 ++++++ ddprof-lib/src/main/cpp/libraries.cpp | 38 ++++++++++++------ ddprof-lib/src/main/cpp/profiler.cpp | 43 +++++++++++++-------- ddprof-lib/src/test/cpp/signalSafety_ut.cpp | 17 ++++++++ 6 files changed, 99 insertions(+), 29 deletions(-) diff --git a/ddprof-lib/src/main/cpp/dictionary.cpp b/ddprof-lib/src/main/cpp/dictionary.cpp index b9066d474..29db2eef5 100644 --- a/ddprof-lib/src/main/cpp/dictionary.cpp +++ b/ddprof-lib/src/main/cpp/dictionary.cpp @@ -81,17 +81,24 @@ unsigned int Dictionary::hash(const char *key, size_t length) { } unsigned int Dictionary::lookup(const char *key) { - DEBUG_ASSERT_NOT_IN_SIGNAL(); return lookup(key, strlen(key)); } unsigned int Dictionary::lookup(const char *key, size_t length) { - DEBUG_ASSERT_NOT_IN_SIGNAL(); return lookup(key, length, true, 0); } unsigned int Dictionary::lookup(const char *key, size_t length, bool for_insert, unsigned int sentinel) { + // The insert path mallocs (allocateKey) and may calloc a DictTable — + // both AS-unsafe. Read-only lookups (for_insert == false, used by + // check() and bounded_lookup at capacity) only touch already-allocated + // memory and are AS-safe. Assert here rather than in the overloads + // so bounded_lookup's runtime-decided for_insert is also covered. + if (for_insert) { + DEBUG_ASSERT_NOT_IN_SIGNAL(); + } + DictTable *table = _table; unsigned int h = hash(key, length); diff --git a/ddprof-lib/src/main/cpp/guards.cpp b/ddprof-lib/src/main/cpp/guards.cpp index f771f66a1..c25768e51 100644 --- a/ddprof-lib/src/main/cpp/guards.cpp +++ b/ddprof-lib/src/main/cpp/guards.cpp @@ -35,6 +35,14 @@ bool isInSignalContext() { return pt == nullptr || pt->signalDepth() != 0; } +bool isInTrackedSignalContext() { + ProfiledThread *pt = ProfiledThread::currentSignalSafe(); + // null ProfiledThread = no thread context; the SignalHandlerScope + // never ran, so we have no positive evidence of a signal frame. + // See header comment for the rationale of returning false here. + return pt != nullptr && pt->signalDepth() != 0; +} + SignalHandlerScope::SignalHandlerScope() : _active(true) { ProfiledThread *pt = ProfiledThread::currentSignalSafe(); if (pt != nullptr) { diff --git a/ddprof-lib/src/main/cpp/guards.h b/ddprof-lib/src/main/cpp/guards.h index 7820353b6..1ec6b489f 100644 --- a/ddprof-lib/src/main/cpp/guards.h +++ b/ddprof-lib/src/main/cpp/guards.h @@ -61,6 +61,17 @@ int getInSignalDepth(); // assume signal" — fail-safe for AS-safety gating). bool isInSignalContext(); +// Returns true ONLY when we have positively tracked entering one of our +// installed signal handlers on this thread. null ProfiledThread → false +// (we have no way to know, so callers that prefer to do work +// synchronously on uninstrumented threads — Profiler::dlopen_hook — +// can use this.) Accepts the small theoretical risk that an +// uninstrumented JVM-internal thread (VM Thread, JIT, GC) doing a dlopen +// from inside a *foreign* signal handler will look like regular code; +// prewarmUnwinder() covers the known libgcc_s case and JVM-internal +// signal handlers are generally AS-safe by design. +bool isInTrackedSignalContext(); + // Internal RAII type — do not instantiate directly; use the macros below. class SignalHandlerScope { public: diff --git a/ddprof-lib/src/main/cpp/libraries.cpp b/ddprof-lib/src/main/cpp/libraries.cpp index 82a71e309..17d37dd76 100644 --- a/ddprof-lib/src/main/cpp/libraries.cpp +++ b/ddprof-lib/src/main/cpp/libraries.cpp @@ -69,14 +69,13 @@ void Libraries::refresh() { void *Libraries::refresherLoop(void *arg) { Libraries *self = static_cast(arg); - // Publish our TID so sampler thread-list enumerations can skip us. - self->_refresher_tid.store(OS::threadId(), std::memory_order_release); - - // Block profiling signals so this thread is never sampled. Without - // this, SIGPROF can fire on the refresher and run the full stack-walk - // path here — pure overhead, and in debug builds with - // DDPROF_FORCE_STACKWALK_CRASH it inflates the SIGSEGV recovery count - // enough to starve the test work thread and break + // Block profiling signals BEFORE publishing our TID. Otherwise a + // SIGPROF / SIGVTALRM armed for this thread (e.g. a stale per-thread + // timer from a previous lifecycle, or an in-flight signal queued during + // pthread_create) could fire on us between TID-publish and sigmask, and + // run the full stack-walk path here — pure overhead, and in debug + // builds with DDPROF_FORCE_STACKWALK_CRASH it inflates the SIGSEGV + // recovery count enough to starve the test work thread and break // vmStackwalkerCrashRecoveryTest. SIGIO (WAKEUP_SIGNAL) is left // unmasked because stopRefresher() relies on it to interrupt sleep. sigset_t mask; @@ -85,10 +84,27 @@ void *Libraries::refresherLoop(void *arg) { sigaddset(&mask, SIGVTALRM); pthread_sigmask(SIG_BLOCK, &mask, nullptr); + // Publish our TID so sampler thread-list enumerations can skip us. + self->_refresher_tid.store(OS::threadId(), std::memory_order_release); + while (self->_refresher_running.load(std::memory_order_acquire)) { - // OS::sleep uses nanosleep, which returns early on EINTR — the wakeup - // signal sent from stopRefresher() will interrupt this immediately. - OS::sleep(REFRESH_INTERVAL_NS); + // Sleep until the next tick or until interrupted. OS::sleep wraps a + // single nanosleep that returns early on EINTR; loop on the remaining + // time so unrelated unmasked signals (SIGCHLD, debugger SIGSTOP/CONT, + // etc.) do not cause premature ticks. The stopRefresher path sends + // SIGIO and flips _refresher_running first, so the outer-loop check + // catches the wake-up and exits without sleeping again. + u64 remaining = REFRESH_INTERVAL_NS; + while (remaining > 0 && + self->_refresher_running.load(std::memory_order_acquire)) { + u64 before = OS::nanotime(); + OS::sleep(remaining); + u64 elapsed = OS::nanotime() - before; + if (elapsed >= remaining) { + break; + } + remaining -= elapsed; + } if (!self->_refresher_running.load(std::memory_order_acquire)) { break; } diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 3d768d1c1..0bb66b89b 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -777,20 +777,24 @@ void *Profiler::dlopen_hook(const char *filename, int flags) { void *result = dlopen(filename, flags); if (result != NULL) { - if (!isInSignalContext()) { - // Normal (non-signal) dlopen: refresh synchronously so symbols in - // the new library are resolved immediately. + if (!isInTrackedSignalContext()) { + // Either confirmed non-signal context, or no ProfiledThread on this + // thread (uninstrumented JVM internals — VM Thread, JIT, GC). We + // prefer synchronous refresh for the null-PT case too because (a) + // those threads call dlopen synchronously during normal JVM + // operation, and (b) wasmtime's broken sigaction patching depends + // on switchLibraryTrap running its work inline (the original reason + // for the trap). The residual risk — an uninstrumented thread + // calling dlopen from inside a foreign signal handler — is small: + // prewarmUnwinder() closes the known libgcc_s lazy-load case, and + // mainstream JVM signal handlers are AS-safe by design. Libraries::instance()->refresh(); } else { - // Async-signal context. Profiler::prewarmUnwinder closes the known - // libgcc_s case, but JVM/JIT lazy loads have been observed from - // signal handlers under Graal on aarch64. refresh() must NOT run - // here — parseLibraries acquires a Mutex and calls malloc, both - // AS-unsafe. Mark the library set dirty; the Libraries refresher - // thread picks it up within REFRESH_INTERVAL_NS (5s), bounding the - // window during which the new library is unresolvable. flushJfr / - // dump / stop also call refresh() unconditionally, so this works - // even before the refresher thread starts. + // Confirmed signal context (one of our SignalHandlerScopes is on + // the stack). refresh() must NOT run here — parseLibraries + // acquires a Mutex and calls malloc, both AS-unsafe. Mark the + // library set dirty; the Libraries refresher thread picks it up + // within REFRESH_INTERVAL_NS (500 ms). Libraries::instance()->markDirty(); } } @@ -837,6 +841,13 @@ void Profiler::segvHandler(int signo, siginfo_t *siginfo, void *ucontext) { // destructor, permanently leaking depth on the thread. Release the guard // before chaining so depth is correct whether the chained handler returns // or longjmps. + // + // Sanitizer-coverage note: this also means depth == 0 inside the chained + // handler, so DEBUG_ASSERT_NOT_IN_SIGNAL() will NOT fire for AS-unsafe + // code reachable from a chained handler that returns normally. This is + // the lesser of two evils — leaking depth on longjmp would silently + // break the production deferred-refresh gate, while the sanitizer gap + // is bounded to third-party signal handler code we don't own. SIGNAL_HANDLER_GUARD(); if (crashHandlerInternal(signo, siginfo, ucontext)) { return; // Handled — destructor decrements depth @@ -1313,14 +1324,14 @@ Error Profiler::start(Arguments &args, bool reset) { enableEngines(); - // Always enable library trap to catch wasmtime loading and patch its broken sigaction - switchLibraryTrap(true); - // Refresher must be running before the trap fires: dlopen_hook's // signal-context branch only marks dirty and relies on the refresher - // to call refresh() within ~5s. + // to call refresh() within REFRESH_INTERVAL_NS (500 ms). _libs->startRefresher(); + // Always enable library trap to catch wasmtime loading and patch its broken sigaction + switchLibraryTrap(true); + JfrMetadata::reset(); JfrMetadata::initialize(args._context_attributes); _num_context_attributes = args._context_attributes.size(); diff --git a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp index ca9c2b881..6584b38a5 100644 --- a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp +++ b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp @@ -68,3 +68,20 @@ TEST(SignalSafetyTestNoContext, NullProfiledThreadIsTreatedAsSignal) { EXPECT_TRUE(isInSignalContext()); EXPECT_EQ(0, getInSignalDepth()); } + +TEST(SignalSafetyTestNoContext, NullProfiledThreadIsNotTrackedSignal) { + // isInTrackedSignalContext() returns false on null because the + // SignalHandlerScope never ran — used by Profiler::dlopen_hook so + // uninstrumented JVM threads doing a normal dlopen take the fast + // (synchronous refresh) path instead of deferring. + EXPECT_FALSE(isInTrackedSignalContext()); +} + +TEST_F(SignalSafetyTest, TrackedSignalContextRequiresScope) { + EXPECT_FALSE(isInTrackedSignalContext()); + { + SignalHandlerScope scope; + EXPECT_TRUE(isInTrackedSignalContext()); + } + EXPECT_FALSE(isInTrackedSignalContext()); +} From 9fb7c0fd6da3b3b1627fb636f1db86c5907ae04f Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 00:13:53 +0200 Subject: [PATCH 9/9] refactor(profiler): extract OS::sleepWhile, drop unused helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move the EINTR-resilient deadline sleep out of Libraries::refresherLoop into a new OS::sleepWhile(max_nanos, std::atomic&) primitive (clock_nanosleep TIMER_ABSTIME on Linux, nanosleep + nanotime on macOS). - Delete isInSignalContext(), Mutex::tryLock(), OS::uninterruptibleSleep — zero callers after the earlier switch to isInTrackedSignalContext. - Cover SignalHandlerScope::release() and signalHandlerUnwindAfterLongjmp() with unit tests; both were stub-replaceable before. - Upload /tmp/signal-safety-violation.txt on musl CI jobs as well. - Refresh the guards.h doc block to describe the current null-PT policy. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/test_workflow.yml | 14 ++++ ddprof-lib/src/main/cpp/guards.cpp | 7 -- ddprof-lib/src/main/cpp/guards.h | 66 ++++++++--------- ddprof-lib/src/main/cpp/libraries.cpp | 21 ++---- ddprof-lib/src/main/cpp/mutex.h | 2 - ddprof-lib/src/main/cpp/os.h | 18 ++++- ddprof-lib/src/main/cpp/os_linux.cpp | 17 +++-- ddprof-lib/src/main/cpp/os_macos.cpp | 17 ++++- ddprof-lib/src/test/cpp/signalSafety_ut.cpp | 79 ++++++++++++++++++--- 9 files changed, 162 insertions(+), 79 deletions(-) diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml index 516cfc409..62287dbd6 100644 --- a/.github/workflows/test_workflow.yml +++ b/.github/workflows/test_workflow.yml @@ -290,6 +290,13 @@ jobs: with: name: (test-reports) test-linux-musl-amd64 (${{ matrix.java_version }}, ${{ matrix.config }}) path: test-reports + - name: Upload signal-safety violation log + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: failure() + with: + name: signal-safety-violation-musl-${{ matrix.java_version }}-${{ matrix.config }}-amd64 + path: /tmp/signal-safety-violation.txt + if-no-files-found: ignore test-linux-glibc-aarch64: needs: cache-jdks @@ -554,3 +561,10 @@ jobs: with: name: (test-reports) test-linux-musl-aarch64 (${{ matrix.java_version }}, ${{ matrix.config }}) path: test-reports + - name: Upload signal-safety violation log + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: failure() + with: + name: signal-safety-violation-musl-${{ matrix.java_version }}-${{ matrix.config }}-aarch64 + path: /tmp/signal-safety-violation.txt + if-no-files-found: ignore diff --git a/ddprof-lib/src/main/cpp/guards.cpp b/ddprof-lib/src/main/cpp/guards.cpp index c25768e51..1bfc0b695 100644 --- a/ddprof-lib/src/main/cpp/guards.cpp +++ b/ddprof-lib/src/main/cpp/guards.cpp @@ -28,13 +28,6 @@ int getInSignalDepth() { return pt != nullptr ? static_cast(pt->signalDepth()) : 0; } -bool isInSignalContext() { - ProfiledThread *pt = ProfiledThread::currentSignalSafe(); - // null ProfiledThread = no thread context yet; treat as "assume signal" - // so AS-safety gates default to the conservative deferred path. - return pt == nullptr || pt->signalDepth() != 0; -} - bool isInTrackedSignalContext() { ProfiledThread *pt = ProfiledThread::currentSignalSafe(); // null ProfiledThread = no thread context; the SignalHandlerScope diff --git a/ddprof-lib/src/main/cpp/guards.h b/ddprof-lib/src/main/cpp/guards.h index 1ec6b489f..4addd8d39 100644 --- a/ddprof-lib/src/main/cpp/guards.h +++ b/ddprof-lib/src/main/cpp/guards.h @@ -25,51 +25,51 @@ class ProfiledThread; // --------------------------------------------------------------------------- -// Signal-context depth tracking. +// Signal-context depth tracking — always on. // -// Code paths that must choose between an async-signal-safe deferred path and -// a synchronous path (e.g. Profiler::dlopen_hook, which defers refresh() when -// a library is loaded from signal context) query isInSignalContext(). The -// debug-only DEBUG_ASSERT_NOT_IN_SIGNAL() macro in signalSafety.h asserts on -// the same counter. +// Profiler::dlopen_hook is the production caller — it queries +// isInTrackedSignalContext() to decide between the synchronous refresh() +// path and the deferred markDirty() path. The debug-only +// DEBUG_ASSERT_NOT_IN_SIGNAL() macro in signalSafety.h asserts on the +// same counter. // // Storage: the depth lives in ProfiledThread::_signal_depth. An earlier // design used a thread_local int, but on Graal aarch64 the lazy DTV slot // allocation triggered malloc inside our signal handler and deadlocked -// against the JVMCI compiler holding the heap lock. initial-exec fixed the -// malloc but tripped the static TLS surplus and broke dlopen on Graal. -// ProfiledThread is already AS-safe-accessible via pthread_getspecific -// (POSIX guarantees it does not allocate; returns nullptr when unset). +// against the JVMCI compiler holding the heap lock. initial-exec fixed +// the malloc but tripped the static TLS surplus and broke dlopen on +// Graal. ProfiledThread is already AS-safe-accessible via +// pthread_getspecific (POSIX guarantees it does not allocate; returns +// nullptr when unset). // -// When ProfiledThread is null on a thread, we don't yet have a thread +// When ProfiledThread is null on a thread we don't yet have a thread // context — uninstrumented JVM-internal threads (VM Thread, JIT, GC) fall -// into this bucket too, and they can receive signals. isInSignalContext() -// treats null as "assume signal" so dlopen_hook defers conservatively -// rather than risking a synchronous refresh from a signal frame. -// DEBUG_ASSERT_NOT_IN_SIGNAL skips the check when null so well-behaved -// non-signal code on uninstrumented threads doesn't trip a false abort. +// into this bucket too, and they can receive signals. The +// SignalHandlerScope guard is a no-op on those threads (nothing to +// update), so isInTrackedSignalContext() returns false: production code +// prefers synchronous refresh() on null-PT threads because (a) those +// threads regularly call dlopen during normal JVM operation, and (b) +// wasmtime's broken sigaction patching depends on switchLibraryTrap +// running work inline. The residual risk — an uninstrumented thread +// calling dlopen from inside a foreign signal handler — is small in +// practice: prewarmUnwinder() closes the known libgcc_s lazy-load case +// and mainstream JVM signal handlers are AS-safe by design. +// +// DEBUG_ASSERT_NOT_IN_SIGNAL likewise skips its check when ProfiledThread +// is null so well-behaved non-signal code on uninstrumented threads +// doesn't trip a false abort. // --------------------------------------------------------------------------- // Returns the signal-handler depth for the calling thread, or 0 if the -// thread has no ProfiledThread yet. Use isInSignalContext() instead when -// you only need a boolean — it handles the "no thread context" case -// conservatively (returns true). +// thread has no ProfiledThread yet. Intended for tests and diagnostic +// code; production callers should use isInTrackedSignalContext(). int getInSignalDepth(); -// True when the calling thread is either inside an installed signal -// handler (depth > 0) or has no thread context yet (treat as "unknown, -// assume signal" — fail-safe for AS-safety gating). -bool isInSignalContext(); - -// Returns true ONLY when we have positively tracked entering one of our -// installed signal handlers on this thread. null ProfiledThread → false -// (we have no way to know, so callers that prefer to do work -// synchronously on uninstrumented threads — Profiler::dlopen_hook — -// can use this.) Accepts the small theoretical risk that an -// uninstrumented JVM-internal thread (VM Thread, JIT, GC) doing a dlopen -// from inside a *foreign* signal handler will look like regular code; -// prewarmUnwinder() covers the known libgcc_s case and JVM-internal -// signal handlers are generally AS-safe by design. +// Returns true only when we have positively tracked entering one of our +// installed signal handlers on this thread (depth > 0 on a non-null +// ProfiledThread). null ProfiledThread → false, matching the +// SignalHandlerScope semantics (the guard is a no-op there). +// Used by Profiler::dlopen_hook to gate the deferred-refresh branch. bool isInTrackedSignalContext(); // Internal RAII type — do not instantiate directly; use the macros below. diff --git a/ddprof-lib/src/main/cpp/libraries.cpp b/ddprof-lib/src/main/cpp/libraries.cpp index 17d37dd76..420e3b273 100644 --- a/ddprof-lib/src/main/cpp/libraries.cpp +++ b/ddprof-lib/src/main/cpp/libraries.cpp @@ -88,23 +88,10 @@ void *Libraries::refresherLoop(void *arg) { self->_refresher_tid.store(OS::threadId(), std::memory_order_release); while (self->_refresher_running.load(std::memory_order_acquire)) { - // Sleep until the next tick or until interrupted. OS::sleep wraps a - // single nanosleep that returns early on EINTR; loop on the remaining - // time so unrelated unmasked signals (SIGCHLD, debugger SIGSTOP/CONT, - // etc.) do not cause premature ticks. The stopRefresher path sends - // SIGIO and flips _refresher_running first, so the outer-loop check - // catches the wake-up and exits without sleeping again. - u64 remaining = REFRESH_INTERVAL_NS; - while (remaining > 0 && - self->_refresher_running.load(std::memory_order_acquire)) { - u64 before = OS::nanotime(); - OS::sleep(remaining); - u64 elapsed = OS::nanotime() - before; - if (elapsed >= remaining) { - break; - } - remaining -= elapsed; - } + // Absolute-deadline sleep that resumes across EINTR (SIGCHLD, debugger + // SIGSTOP/SIGCONT, etc.) and wakes early when stopRefresher() flips + // _refresher_running false and sends SIGIO. See OS::sleepWhile. + OS::sleepWhile(REFRESH_INTERVAL_NS, self->_refresher_running); if (!self->_refresher_running.load(std::memory_order_acquire)) { break; } diff --git a/ddprof-lib/src/main/cpp/mutex.h b/ddprof-lib/src/main/cpp/mutex.h index 671e8b8fc..7d017536d 100644 --- a/ddprof-lib/src/main/cpp/mutex.h +++ b/ddprof-lib/src/main/cpp/mutex.h @@ -19,8 +19,6 @@ class Mutex { void lock(); void unlock(); - // async-signal-safe: pthread_mutex_trylock is on the POSIX AS-safe list - bool tryLock() { return pthread_mutex_trylock(&_mutex) == 0; } }; class WaitableMutex : public Mutex { diff --git a/ddprof-lib/src/main/cpp/os.h b/ddprof-lib/src/main/cpp/os.h index 77d19aba3..6d50420ec 100644 --- a/ddprof-lib/src/main/cpp/os.h +++ b/ddprof-lib/src/main/cpp/os.h @@ -7,6 +7,7 @@ #ifndef _OS_H #define _OS_H +#include #include #include #include @@ -104,7 +105,22 @@ class OS { static u64 micros(); static u64 processStartTime(); static void sleep(u64 nanos); - static void uninterruptibleSleep(u64 nanos, volatile bool* flag); + + // Sleep for up to max_nanos, restarting against an absolute monotonic + // deadline on EINTR so unrelated signals (SIGCHLD, debugger + // SIGSTOP/SIGCONT, etc.) do not shorten the wait. Returns early when + // keep_sleeping becomes false — used by background threads (e.g. the + // Libraries refresher) to respond to a stop request without waiting + // out the full interval. Pair with pthread_kill(thread, SOMESIG) + + // keep_sleeping=false in the caller's stop path: the signal triggers + // EINTR, the loop re-checks the flag, and the function returns. + // + // On Linux this uses clock_nanosleep with TIMER_ABSTIME + CLOCK_MONOTONIC + // so the deadline is absolute and arithmetic-free. On macOS we fall + // back to nanosleep with a recomputed remainder (clock_nanosleep with + // TIMER_ABSTIME is unavailable there). + static void sleepWhile(u64 max_nanos, std::atomic& keep_sleeping); + static u64 overrun(siginfo_t* siginfo); static u64 hton64(u64 x); diff --git a/ddprof-lib/src/main/cpp/os_linux.cpp b/ddprof-lib/src/main/cpp/os_linux.cpp index 807ccea81..20329091f 100644 --- a/ddprof-lib/src/main/cpp/os_linux.cpp +++ b/ddprof-lib/src/main/cpp/os_linux.cpp @@ -171,11 +171,18 @@ void OS::sleep(u64 nanos) { nanosleep(&ts, NULL); } -void OS::uninterruptibleSleep(u64 nanos, volatile bool* flag) { - // Workaround nanosleep bug: https://man7.org/linux/man-pages/man2/nanosleep.2.html#BUGS - u64 deadline = OS::nanotime() + nanos; - struct timespec ts = {(time_t)(deadline / 1000000000), (long)(deadline % 1000000000)}; - while (*flag && clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, &ts) == EINTR); +void OS::sleepWhile(u64 max_nanos, std::atomic& keep_sleeping) { + // Compute the deadline once and reuse it across EINTR retries so + // unrelated signals don't shorten the wait. + u64 deadline = OS::nanotime() + max_nanos; + struct timespec ts; + ts.tv_sec = (time_t)(deadline / 1000000000ULL); + ts.tv_nsec = (long)(deadline % 1000000000ULL); + while (keep_sleeping.load(std::memory_order_acquire) && + clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr) == EINTR) { + // Re-issue against the same absolute deadline. EINTR absorbs + // SIGCHLD, SIGSTOP/CONT, etc. without arithmetic on remaining time. + } } u64 OS::overrun(siginfo_t* siginfo) { diff --git a/ddprof-lib/src/main/cpp/os_macos.cpp b/ddprof-lib/src/main/cpp/os_macos.cpp index a6872b695..0aee02704 100644 --- a/ddprof-lib/src/main/cpp/os_macos.cpp +++ b/ddprof-lib/src/main/cpp/os_macos.cpp @@ -127,9 +127,20 @@ void OS::sleep(u64 nanos) { nanosleep(&ts, NULL); } -void OS::uninterruptibleSleep(u64 nanos, volatile bool* flag) { - struct timespec ts = {(time_t)(nanos / 1000000000), (long)(nanos % 1000000000)}; - while (*flag && nanosleep(&ts, &ts) < 0 && errno == EINTR); +void OS::sleepWhile(u64 max_nanos, std::atomic& keep_sleeping) { + // macOS does not expose clock_nanosleep(TIMER_ABSTIME). Recompute the + // remaining interval against OS::nanotime() each iteration so spurious + // wake-ups don't shorten the wait, mirroring the Linux semantics. + u64 deadline = OS::nanotime() + max_nanos; + while (keep_sleeping.load(std::memory_order_acquire)) { + u64 now = OS::nanotime(); + if (now >= deadline) { + return; + } + u64 remaining = deadline - now; + struct timespec ts = {(time_t)(remaining / 1000000000ULL), (long)(remaining % 1000000000ULL)}; + nanosleep(&ts, nullptr); + } } u64 OS::overrun(siginfo_t* siginfo) { diff --git a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp index 6584b38a5..a451e7439 100644 --- a/ddprof-lib/src/test/cpp/signalSafety_ut.cpp +++ b/ddprof-lib/src/test/cpp/signalSafety_ut.cpp @@ -62,10 +62,70 @@ TEST_F(SignalSafetyTest, NestedDepth) { EXPECT_EQ(0, getInSignalDepth()); } -TEST(SignalSafetyTestNoContext, NullProfiledThreadIsTreatedAsSignal) { - // No ProfiledThread on this thread — isInSignalContext() must return - // true (conservative default for AS-safety gates like dlopen_hook). - EXPECT_TRUE(isInSignalContext()); +TEST_F(SignalSafetyTest, TrackedSignalContextRequiresScope) { + EXPECT_FALSE(isInTrackedSignalContext()); + { + SignalHandlerScope scope; + EXPECT_TRUE(isInTrackedSignalContext()); + } + EXPECT_FALSE(isInTrackedSignalContext()); +} + +// SIGNAL_HANDLER_GUARD_RELEASE path: explicit early release inside a scope, +// destructor must not double-decrement. +TEST_F(SignalSafetyTest, ReleaseDecrementsAndDestructorIsNoOp) { + EXPECT_EQ(0, getInSignalDepth()); + { + SignalHandlerScope scope; + EXPECT_EQ(1, getInSignalDepth()); + scope.release(); + EXPECT_EQ(0, getInSignalDepth()); + // Destructor runs at end of scope and must NOT decrement again. + } + EXPECT_EQ(0, getInSignalDepth()); +} + +// SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP path: simulate a longjmp that bypasses +// SignalHandlerScope's destructor. signalHandlerUnwindAfterLongjmp() must +// decrement the depth even when no live scope object is around. +// +// Sequence (longjmp simulated by an extra block-scope guard that we don't +// destroy): +// outer scope ctor → depth=1 +// inner scope ctor → depth=2 +// imagine longjmp: inner scope's dtor is skipped +// signalHandlerUnwindAfterLongjmp() at landing → depth=1 +// outer scope dtor → depth=0 +TEST_F(SignalSafetyTest, SignalHandlerUnwindAfterLongjmpDecrementsOnce) { + EXPECT_EQ(0, getInSignalDepth()); + { + SignalHandlerScope outer; + EXPECT_EQ(1, getInSignalDepth()); + + // Heap-allocate the inner scope so we can drop it without running its + // destructor — emulating a longjmp that bypasses the C++ stack unwind. + SignalHandlerScope *leaked_inner = new SignalHandlerScope(); + EXPECT_EQ(2, getInSignalDepth()); + + // Pretend the longjmp landed here. Compensate for the bypassed dtor. + SIGNAL_HANDLER_UNWIND_AFTER_LONGJMP(); + EXPECT_EQ(1, getInSignalDepth()); + + // Outer scope dtor runs next and brings depth back to 0. + // Free leaked_inner without calling its destructor (placement-new + // wasn't used so a plain operator delete would invoke the destructor; + // route the storage through ::operator delete to skip dtor). + ::operator delete(leaked_inner); + } + EXPECT_EQ(0, getInSignalDepth()); +} + +// Safety property: signalHandlerUnwindAfterLongjmp() saturates at zero; +// double calls do not underflow. +TEST_F(SignalSafetyTest, SignalHandlerUnwindAfterLongjmpSaturatesAtZero) { + EXPECT_EQ(0, getInSignalDepth()); + signalHandlerUnwindAfterLongjmp(); + signalHandlerUnwindAfterLongjmp(); EXPECT_EQ(0, getInSignalDepth()); } @@ -77,11 +137,8 @@ TEST(SignalSafetyTestNoContext, NullProfiledThreadIsNotTrackedSignal) { EXPECT_FALSE(isInTrackedSignalContext()); } -TEST_F(SignalSafetyTest, TrackedSignalContextRequiresScope) { - EXPECT_FALSE(isInTrackedSignalContext()); - { - SignalHandlerScope scope; - EXPECT_TRUE(isInTrackedSignalContext()); - } - EXPECT_FALSE(isInTrackedSignalContext()); +TEST(SignalSafetyTestNoContext, NullProfiledThreadDepthIsZero) { + // Mutation guard: getInSignalDepth() must return 0 on null PT, not a + // sentinel that happens to equal zero today. + EXPECT_EQ(0, getInSignalDepth()); }