diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index 4436e80a8..4860c9a1d 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -364,6 +364,14 @@ Error Arguments::parse(const char *args) { _remote_symbolication = true; } + CASE("wallprecheck") + if (value != NULL) { + _wall_precheck = strcmp(value, "false") != 0 && strcmp(value, "0") != 0; + } else { + // No value means disable + _wall_precheck = false; + } + CASE("wallsampler") if (value != NULL) { switch (value[0]) { diff --git a/ddprof-lib/src/main/cpp/arguments.h b/ddprof-lib/src/main/cpp/arguments.h index 2d400f213..874e9daf7 100644 --- a/ddprof-lib/src/main/cpp/arguments.h +++ b/ddprof-lib/src/main/cpp/arguments.h @@ -168,6 +168,7 @@ class Arguments { long _cpu; long _wall; bool _wall_collapsing; + bool _wall_precheck; int _wall_threads_per_tick; WallclockSampler _wallclock_sampler; long _memory; @@ -204,6 +205,7 @@ class Arguments { _cpu(-1), _wall(-1), _wall_collapsing(false), + _wall_precheck(false), _wall_threads_per_tick(DEFAULT_WALL_THREADS_PER_TICK), _wallclock_sampler(ASGCT), _memory(-1), diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index 7ae44f9bc..7c12478bd 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -60,6 +60,14 @@ X(AGCT_NATIVE_NO_JAVA_CONTEXT, "agct_native_no_java_context") \ X(AGCT_BLOCKED_IN_VM, "agct_blocked_in_vm") \ X(SKIPPED_WALLCLOCK_UNWINDS, "skipped_wallclock_unwinds") \ + X(WC_SIGNAL_SUPPRESSED_SAMPLED_RUN, "wc_signals_suppressed_sampled_run") \ + X(WC_SIGNAL_SKIPPED_PARKED, "wc_signals_skipped_parked") \ + X(WC_SIGNAL_SKIPPED_PARKED_SPANLESS, "wc_signals_skipped_parked_spanless") \ + X(WC_SIGNAL_SKIPPED_PARKED_ACTIVE_SPAN, "wc_signals_skipped_parked_active_span") \ + X(WC_SIGNAL_QUEUE_FULL, "wc_signals_queue_full") \ + X(TASK_BLOCK_EMITTED, "task_block_emitted") \ + X(TASK_BLOCK_SKIPPED_SPAN_ZERO, "task_block_skipped_span_zero") \ + X(TASK_BLOCK_SKIPPED_TOO_SHORT, "task_block_skipped_too_short") \ X(UNWINDING_TIME_ASYNC, "unwinding_ticks_async") \ X(UNWINDING_TIME_JVMTI, "unwinding_ticks_jvmti") \ X(CALLTRACE_STORAGE_DROPPED, "calltrace_storage_dropped_traces") \ @@ -102,7 +110,14 @@ X(CTIMER_SIGNAL_OWN, "ctimer_signal_own") \ X(CTIMER_SIGNAL_FOREIGN, "ctimer_signal_foreign") \ X(WALLCLOCK_SIGNAL_OWN, "wallclock_signal_own") \ - X(WALLCLOCK_SIGNAL_FOREIGN, "wallclock_signal_foreign") + X(WALLCLOCK_SIGNAL_FOREIGN, "wallclock_signal_foreign") \ + /* JVMTI monitor-event callbacks (synchronized contention + Object.wait). \ + * Driven by the can_generate_monitor_events capability. Zero when the agent \ + * has signalled delegate_monitor_events on JDK 21+ (Java-side modules own \ + * the populations); non-zero on JDK < 21 or when no delegate is registered. \ + * Used by MonitorEventDelegationTest to verify the handshake. */ \ + X(JVMTI_MONITOR_CONTENDED_ENTER, "jvmti_monitor_contended_enter") \ + X(JVMTI_OBJECT_WAIT_ENTER, "jvmti_object_wait_enter") #define X_ENUM(a, b) a, typedef enum CounterId : int { DD_COUNTER_TABLE(X_ENUM) DD_NUM_COUNTERS diff --git a/ddprof-lib/src/main/cpp/event.h b/ddprof-lib/src/main/cpp/event.h index 752db842d..55269888d 100644 --- a/ddprof-lib/src/main/cpp/event.h +++ b/ddprof-lib/src/main/cpp/event.h @@ -109,12 +109,21 @@ class WallClockEpochEvent { u32 _num_failed_samples; u32 _num_exited_threads; u32 _num_permission_denied; + u64 _num_suppressed_sampled_run; + u64 _num_skipped_parked_spanless; + u64 _num_skipped_parked_active_span; + u64 _num_task_block_emitted; + u64 _num_task_block_skipped_span_zero; + u64 _num_task_block_skipped_too_short; WallClockEpochEvent(u64 start_time) : _dirty(false), _start_time(start_time), _duration_millis(0), _num_samplable_threads(0), _num_successful_samples(0), _num_failed_samples(0), _num_exited_threads(0), - _num_permission_denied(0) {} + _num_permission_denied(0), _num_suppressed_sampled_run(0), + _num_skipped_parked_spanless(0), _num_skipped_parked_active_span(0), + _num_task_block_emitted(0), _num_task_block_skipped_span_zero(0), + _num_task_block_skipped_too_short(0) {} bool hasChanged() { return _dirty; } @@ -153,6 +162,48 @@ class WallClockEpochEvent { } } + void updateNumSuppressedSampledRun(u64 n) { + if (_num_suppressed_sampled_run != n) { + _dirty = true; + _num_suppressed_sampled_run = n; + } + } + + void updateNumSkippedParkedSpanless(u64 n) { + if (_num_skipped_parked_spanless != n) { + _dirty = true; + _num_skipped_parked_spanless = n; + } + } + + void updateNumSkippedParkedActiveSpan(u64 n) { + if (_num_skipped_parked_active_span != n) { + _dirty = true; + _num_skipped_parked_active_span = n; + } + } + + void updateNumTaskBlockEmitted(u64 n) { + if (_num_task_block_emitted != n) { + _dirty = true; + _num_task_block_emitted = n; + } + } + + void updateNumTaskBlockSkippedSpanZero(u64 n) { + if (_num_task_block_skipped_span_zero != n) { + _dirty = true; + _num_task_block_skipped_span_zero = n; + } + } + + void updateNumTaskBlockSkippedTooShort(u64 n) { + if (_num_task_block_skipped_too_short != n) { + _dirty = true; + _num_task_block_skipped_too_short = n; + } + } + void endEpoch(u64 millis) { _duration_millis = millis; } void clean() { _dirty = false; } @@ -184,4 +235,13 @@ typedef struct QueueTimeEvent { u32 _queueLength; } QueueTimeEvent; +typedef struct TaskBlockEvent { + u64 _start; + u64 _end; + u64 _blocker; + u64 _unblockingSpanId; + /** Span IDs and tag encodings for JFR (park exit uses snapshot from park enter). */ + Context _ctx; +} TaskBlockEvent; + #endif // _EVENT_H diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index cefbc476b..82213677f 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1535,6 +1535,12 @@ void Recording::recordWallClockEpoch(Buffer *buf, WallClockEpochEvent *event) { buf->putVar64(event->_num_failed_samples); buf->putVar64(event->_num_exited_threads); buf->putVar64(event->_num_permission_denied); + buf->putVar64(event->_num_suppressed_sampled_run); + buf->putVar64(event->_num_skipped_parked_spanless); + buf->putVar64(event->_num_skipped_parked_active_span); + buf->putVar64(event->_num_task_block_emitted); + buf->putVar64(event->_num_task_block_skipped_span_zero); + buf->putVar64(event->_num_task_block_skipped_too_short); writeEventSizePrefix(buf, start); flushIfNeeded(buf); } @@ -1570,6 +1576,39 @@ void Recording::recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event) { flushIfNeeded(buf); } +void Recording::recordTaskBlockLive(Buffer *buf, int tid, TaskBlockEvent *event) { + // Live capture: TaskBlock-specific fields first, then the standard context block via + // writeCurrentContext — same shape as recordQueueTime. + int start = buf->skip(1); + buf->putVar64(T_TASK_BLOCK); + buf->putVar64(event->_start); + buf->putVar64(event->_end - event->_start); + buf->putVar64(tid); + buf->putVar64(event->_blocker); + buf->putVar64(event->_unblockingSpanId); + writeCurrentContext(buf); + writeEventSizePrefix(buf, start); + flushIfNeeded(buf); +} + +void Recording::recordTaskBlockDeferred(Buffer *buf, int tid, + TaskBlockEvent *event) { + // Deferred capture: context was snapshotted earlier (parkEnter / ObjectWaitEnter from + // JVMTI MonitorWait) and is emitted from event->_ctx via writeContextSnapshot — same + // on-disk layout as the live path, distinct only in the source of the spanId/rootSpanId/tag + // triple. + int start = buf->skip(1); + buf->putVar64(T_TASK_BLOCK); + buf->putVar64(event->_start); + buf->putVar64(event->_end - event->_start); + buf->putVar64(tid); + buf->putVar64(event->_blocker); + buf->putVar64(event->_unblockingSpanId); + writeContextSnapshot(buf, event->_ctx); + writeEventSizePrefix(buf, start); + flushIfNeeded(buf); +} + void Recording::recordAllocation(RecordingBuffer *buf, int tid, u64 call_trace_id, AllocEvent *event) { int start = buf->skip(1); @@ -1772,6 +1811,7 @@ void FlightRecorder::recordTraceRoot(int lock_index, int tid, Recording* rec = _rec; if (rec != nullptr) { Buffer *buf = rec->buffer(lock_index); + rec->addThread(lock_index, tid); rec->recordTraceRoot(buf, tid, event); } } @@ -1784,11 +1824,39 @@ void FlightRecorder::recordQueueTime(int lock_index, int tid, Recording* rec = _rec; if (rec != nullptr) { Buffer *buf = rec->buffer(lock_index); + rec->addThread(lock_index, tid); + rec->addThread(lock_index, event->_origin); rec->recordQueueTime(buf, tid, event); } } } +void FlightRecorder::recordTaskBlockLive(int lock_index, int tid, + TaskBlockEvent *event) { + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->addThread(lock_index, tid); + rec->recordTaskBlockLive(buf, tid, event); + } + } +} + +void FlightRecorder::recordTaskBlockDeferred(int lock_index, int tid, + TaskBlockEvent *event) { + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->addThread(lock_index, tid); + rec->recordTaskBlockDeferred(buf, tid, event); + } + } +} + void FlightRecorder::recordDatadogSetting(int lock_index, int length, const char *name, const char *value, const char *unit) { diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index e9aa3cde1..fee6be259 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -279,6 +279,8 @@ class Recording { void recordWallClockEpoch(Buffer *buf, WallClockEpochEvent *event); void recordTraceRoot(Buffer *buf, int tid, TraceRootEvent *event); void recordQueueTime(Buffer *buf, int tid, QueueTimeEvent *event); + void recordTaskBlockLive(Buffer *buf, int tid, TaskBlockEvent *event); + void recordTaskBlockDeferred(Buffer *buf, int tid, TaskBlockEvent *event); void recordAllocation(RecordingBuffer *buf, int tid, u64 call_trace_id, AllocEvent *event); void recordMallocSample(Buffer *buf, int tid, u64 call_trace_id, @@ -347,6 +349,8 @@ class FlightRecorder { void wallClockEpoch(int lock_index, WallClockEpochEvent *event); void recordTraceRoot(int lock_index, int tid, TraceRootEvent *event); void recordQueueTime(int lock_index, int tid, QueueTimeEvent *event); + void recordTaskBlockLive(int lock_index, int tid, TaskBlockEvent *event); + void recordTaskBlockDeferred(int lock_index, int tid, TaskBlockEvent *event); bool active() const { return _rec != NULL; } diff --git a/ddprof-lib/src/main/cpp/hotspot/vmStructs.inline.h b/ddprof-lib/src/main/cpp/hotspot/vmStructs.inline.h index cbe3c9407..383b58553 100644 --- a/ddprof-lib/src/main/cpp/hotspot/vmStructs.inline.h +++ b/ddprof-lib/src/main/cpp/hotspot/vmStructs.inline.h @@ -9,12 +9,21 @@ #include "hotspot/vmStructs.h" #include "jvmThread.h" +#include "vmEntry.h" VMThread* VMThread::current() { + // JVMThread::current() is the native thread self pointer. On OpenJ9/Zing it + // is not a HotSpot JavaThread*; only HotSpot may reinterpret it as VMThread*. + if (!VM::isHotspot() || JVMThread::current() == nullptr) { + return nullptr; + } return VMThread::cast(JVMThread::current()); } VMThread* VMThread::fromJavaThread(JNIEnv* env, jthread thread) { + if (!VM::isHotspot()) { + return nullptr; + } assert(_eetop != nullptr); if (_eetop != nullptr) { jlong eetop = env->GetLongField(thread, _eetop); diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index 6f70ae54e..84f4cfd59 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -24,12 +24,13 @@ #include "counters.h" #include "common.h" #include "engine.h" -#include "hotspot/vmStructs.h" +#include "hotspot/vmStructs.inline.h" #include "incbin.h" #include "jvmThread.h" #include "os.h" #include "otel_process_ctx.h" #include "profiler.h" +#include "taskBlockRecorder.h" #include "thread.h" #include "tsc.h" #include "vmEntry.h" @@ -67,9 +68,14 @@ class JniString { }; extern "C" DLLEXPORT jboolean JNICALL -Java_com_datadoghq_profiler_JavaProfiler_init0(JNIEnv *env, jclass unused) { - // JavaVM* has already been stored when the native library was loaded so we can pass nullptr here - return VM::initProfilerBridge(nullptr, true); +Java_com_datadoghq_profiler_JavaProfiler_init0(JNIEnv *env, jclass unused, + jboolean delegate_monitor_events) { + // JavaVM* has already been stored when the native library was loaded so we can pass nullptr here. + // delegate_monitor_events lets dd-trace-java signal that its Java-side instrumentation modules + // (object-wait, synchronized-contention) will cover the populations the JVMTI MonitorWait / + // MonitorContended* callbacks observe, so the native callbacks can be skipped on JDK 21+ to + // avoid the can_generate_monitor_events per-monitor instrumentation cost. + return VM::initProfilerBridge(nullptr, true, delegate_monitor_events == JNI_TRUE); } extern "C" DLLEXPORT void JNICALL @@ -150,15 +156,21 @@ JavaCritical_com_datadoghq_profiler_JavaProfiler_filterThreadAdd0() { int slot_id = current->filterSlotId(); if (unlikely(slot_id == -1)) { - // Thread doesn't have a slot ID yet (e.g., main thread), so register it - // Happens when we are not enabled before thread start + // Thread doesn't have a slot ID yet (e.g., main thread or profiler started + // after thread creation). Register now. slot_id = thread_filter->registerThread(); current->setFilterSlotId(slot_id); } - + if (unlikely(slot_id == -1)) { return; // Failed to register thread } + // Refresh HotSpot VMThread* for wall thread-filter precheck (vmStructs OS state). + // HotSpot only: VMThread::current() asserts VM::isHotspot(). OpenJ9/Zing: leave null. + thread_filter->setVMThread(slot_id, VM::isHotspot() ? VMThread::current() : nullptr); + // Refresh ProfiledThread* so wall-clock mitigations can observe per-thread parked state. + // Publish pointer fields before publishing tid via add() to preserve visibility ordering. + thread_filter->setProfiledThread(slot_id, current); thread_filter->add(tid, slot_id); } @@ -313,6 +325,47 @@ Java_com_datadoghq_profiler_JavaProfiler_recordQueueEnd0( Profiler::instance()->recordQueueTime(tid, &event); } +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_JavaProfiler_recordTaskBlock0( + JNIEnv *env, jclass unused, jlong startTicks, jlong endTicks, + jlong blocker, jlong unblockingSpanId) { + int tid = ProfiledThread::currentTid(); + if (tid < 0) { + return; + } + // Context (span ids + tags) is read natively from the OTEP TLS sidecar at JFR write time + // via writeCurrentContext, mirroring recordQueueTime. No span ids cross JNI. + recordTaskBlockLiveIfEligible(tid, (u64)startTicks, (u64)endTicks, + (u64)blocker, (u64)unblockingSpanId); +} + +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_JavaProfiler_parkEnter0(JNIEnv *env, jclass unused) { + ProfiledThread *current = ProfiledThread::current(); + if (current == nullptr) { + return; + } + current->parkEnter(TSC::ticks()); +} + +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_JavaProfiler_parkExit0( + JNIEnv *env, jclass unused, jlong blocker, jlong unblockingSpanId) { + ProfiledThread *current = ProfiledThread::current(); + if (current == nullptr) { + return; + } + u64 start_ticks = 0; + Context park_context = {}; + if (!current->parkExit(start_ticks, park_context)) { + return; + } + u64 end_ticks = TSC::ticks(); + recordTaskBlockDeferredIfEligible(current->tid(), start_ticks, end_ticks, + park_context, (u64)blocker, + (u64)unblockingSpanId); +} + extern "C" DLLEXPORT jlong JNICALL Java_com_datadoghq_profiler_JavaProfiler_currentTicks0(JNIEnv *env, jclass unused) { diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index 54e0f6a15..114b41d37 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -142,18 +142,41 @@ void JfrMetadata::initialize( contextAttributes) << (type("datadog.WallClockSamplingEpoch", T_WALLCLOCK_SAMPLE_EPOCH, - "WallClock Sampling Epoch") + "Wall Clock Sampling Epoch") << category("Datadog", "Profiling") << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) << field("duration", T_LONG, "Duration", F_DURATION_MILLIS) << field("samplePoolSize", T_INT, "Sample Pool Size") << field("numSuccessfulSamples", T_INT, - "Number of Successful Samples") - << field("numFailedSamples", T_INT, "Number of Failed Samples") + "SIGVTALRM deliveries attempted successfully in the last timer iteration " + "that updated this field") + << field("numFailedSamples", T_INT, + "Failed signal deliveries in the last timer iteration that updated this field") << field("numExitedThreads", T_INT, - "Number of Exited Threads Before Handling Signal") + "Threads already exited (ESRCH) in the last timer iteration that updated " + "this field") << field("numPermissionDenied", T_INT, - "Number of Permission Denied Errors")) + "EPERM signal failures in the last timer iteration that updated this field") + << field("numSuppressedSampledRun", T_LONG, + "Signals suppressed by the once-per-run wall-clock filter: handler " + "drained since the previous timer iteration reset. After the first " + "MethodSample of a SLEEPING / CONDVAR_WAIT run is emitted, subsequent " + "signals on that thread are suppressed until the OS state leaves the " + "skip set. Covers sleep, park/parkNanos, JDK 21+ Thread.sleep via " + "condvar wait, etc.") + << field("numSkippedParkedSpanless", T_LONG, + "Park-flag suppressions without span: drained since the previous timer " + "iteration reset (same boundary as TaskBlock counters below)") + << field("numSkippedParkedActiveSpan", T_LONG, + "Park-flag suppressions with span: drained since the previous timer " + "iteration reset") + << field("numTaskBlockEmitted", T_LONG, + "TaskBlock emissions: drained total since the previous timer iteration reset") + << field("numTaskBlockSkippedSpanZero", T_LONG, + "TaskBlock skips (no span): drained since the previous timer iteration reset") + << field("numTaskBlockSkippedTooShort", T_LONG, + "TaskBlock skips (below min duration): drained since the previous timer " + "iteration reset")) << (type("datadog.ObjectSample", T_ALLOC, "Allocation sample") << category("Datadog", "Profiling") @@ -205,6 +228,17 @@ void JfrMetadata::initialize( << field("localRootSpanId", T_LONG, "Local Root Span ID") || contextAttributes) + << (type("datadog.TaskBlock", T_TASK_BLOCK, "Task Block") + << category("Datadog") + << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) + << field("duration", T_LONG, "Duration", F_DURATION_TICKS) + << field("eventThread", T_THREAD, "Event Thread", F_CPOOL) + << field("blocker", T_LONG, "Blocker Identity Hash") + << field("unblockingSpanId", T_LONG, "Unblocking Span ID") + << field("spanId", T_LONG, "Span ID") + << field("localRootSpanId", T_LONG, "Local Root Span ID") || + contextAttributes) + << (type("datadog.HeapUsage", T_HEAP_USAGE, "JVM Heap Usage") << category("Datadog") << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.h b/ddprof-lib/src/main/cpp/jfrMetadata.h index 52c2e0ae8..bd021d1a8 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.h +++ b/ddprof-lib/src/main/cpp/jfrMetadata.h @@ -80,6 +80,7 @@ enum JfrType { T_DATADOG_COUNTER = 125, T_UNWIND_FAILURE = 126, T_MALLOC = 127, + T_TASK_BLOCK = 128, T_ANNOTATION = 200, T_LABEL = 201, T_CATEGORY = 202, diff --git a/ddprof-lib/src/main/cpp/livenessTracker.cpp b/ddprof-lib/src/main/cpp/livenessTracker.cpp index afb2c1f74..ee4371326 100644 --- a/ddprof-lib/src/main/cpp/livenessTracker.cpp +++ b/ddprof-lib/src/main/cpp/livenessTracker.cpp @@ -217,6 +217,13 @@ Error LivenessTracker::initialize(Arguments &args) { } if (_initialized) { + // Tracker settings are sticky across recordings. Preserve the historical + // table/config behavior, but allow HeapUsage recording to be enabled later + // (e.g. if an earlier test initialized liveness without ':L'). Once heap usage + // recording has been requested for this JVM lifetime, keep emitting HeapUsage + // even if a later recording omits the flag — avoids dropping JVM heap telemetry + // when liveness was turned on first without ':L'. + _record_heap_usage = _record_heap_usage || args._record_heap_usage; // if the tracker was previously initialized return the stored result for // consistency this hack also means that if the profiler is started with // different arguments for liveness tracking those will be ignored it is diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f613103ee..771202554 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -31,6 +31,8 @@ #include "stackFrame.h" #include "stackWalker.h" #include "symbols.h" +#include "jniHelper.h" +#include "taskBlockRecorder.h" #include "thread.h" #include "tsc.h" #include "utils.h" @@ -59,6 +61,22 @@ volatile bool Profiler::_need_JDK_8313796_workaround = true; static void (*orig_segvHandler)(int signo, siginfo_t *siginfo, void *ucontext); static void (*orig_busHandler)(int signo, siginfo_t *siginfo, void *ucontext); +namespace { + +/** Same 32-bit identity bits as Java {@code System.identityHashCode(Object)} (unsigned widen). */ +static u64 jvmtiBlockerIdentity(jvmtiEnv *jvmti, jobject object) { + if (object == nullptr) { + return 0; + } + jint hash_code = 0; + if (jvmti->GetObjectHashCode(object, &hash_code) != JVMTI_ERROR_NONE) { + return 0; + } + return (u64)(uint32_t)hash_code; +} + +} // namespace + static Engine noop_engine; static MallocTracer malloc_tracer; static PerfEvents perf_events; @@ -75,6 +93,9 @@ void Profiler::onThreadStart(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) { if (_thread_filter.enabled()) { int slot_id = _thread_filter.registerThread(); current->setFilterSlotId(slot_id); + // VMThread / vmStructs are HotSpot-only; VMThread::current() asserts VM::isHotspot(). + _thread_filter.setVMThread(slot_id, VM::isHotspot() ? VMThread::current() : nullptr); + _thread_filter.setProfiledThread(slot_id, current); _thread_filter.remove(slot_id); // Remove from filtering initially } if (thread != NULL) { @@ -95,6 +116,8 @@ void Profiler::onThreadEnd(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) { tid = current->tid(); if (_thread_filter.enabled()) { + _thread_filter.setVMThread(slot_id, nullptr); + _thread_filter.setProfiledThread(slot_id, nullptr); _thread_filter.unregisterThread(slot_id); current->setFilterSlotId(-1); } @@ -115,6 +138,71 @@ void Profiler::onThreadEnd(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) { _wall_engine->unregisterThread(tid); } +void JNICALL Profiler::ObjectWaitEnter(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object, jlong timeout) { + // Counter is incremented unconditionally on entry: MonitorEventDelegationTest reads it + // to verify that the JDK 21+ delegate handshake actually disables the native callbacks + // (delta must be zero when delegateMonitorEvents=true on JDK 21+). + Counters::increment(JVMTI_OBJECT_WAIT_ENTER); + ProfiledThread *current = ProfiledThread::current(); + if (current == nullptr) { + return; + } + + Context context = ContextApi::snapshot(); + // Align with park/TaskBlock Java path: blocker uses System.identityHashCode bits. + current->objectWaitEnter(TSC::ticks(), context, jvmtiBlockerIdentity(jvmti, object)); +} + +void JNICALL Profiler::ObjectWaitExit(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object, jboolean timed_out) { + ProfiledThread *current = ProfiledThread::current(); + if (current == nullptr) { + return; + } + + u64 start_ticks = 0; + Context context = {}; + u64 blocker = 0; + if (!current->objectWaitExit(start_ticks, context, blocker)) { + return; + } + + recordTaskBlockDeferredIfEligible(current->tid(), start_ticks, TSC::ticks(), + context, blocker, 0); +} + +void JNICALL Profiler::MonitorContendedEnter(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object) { + // See ObjectWaitEnter — same delegation-handshake observability counter. + Counters::increment(JVMTI_MONITOR_CONTENDED_ENTER); + ProfiledThread *current = ProfiledThread::current(); + if (current == nullptr) { + return; + } + + Context context = ContextApi::snapshot(); + current->monitorContendedEnter(TSC::ticks(), context, jvmtiBlockerIdentity(jvmti, object)); +} + +void JNICALL Profiler::MonitorContendedExit(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object) { + ProfiledThread *current = ProfiledThread::current(); + if (current == nullptr) { + return; + } + + u64 start_ticks = 0; + Context context = {}; + u64 blocker = 0; + if (!current->monitorContendedExit(start_ticks, context, blocker)) { + return; + } + + recordTaskBlockDeferredIfEligible(current->tid(), start_ticks, TSC::ticks(), + context, blocker, 0); +} + int Profiler::registerThread(int tid) { return _instance->_cpu_engine->registerThread(tid) | _instance->_wall_engine->registerThread(tid); @@ -629,6 +717,30 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) { _locks[lock_index].unlock(); } +bool Profiler::recordTaskBlockLive(int tid, TaskBlockEvent *event) { + u32 lock_index = getLockIndex(tid); + if (!_locks[lock_index].tryLock() && + !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && + !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { + return false; + } + _jfr.recordTaskBlockLive(lock_index, tid, event); + _locks[lock_index].unlock(); + return true; +} + +bool Profiler::recordTaskBlockDeferred(int tid, TaskBlockEvent *event) { + u32 lock_index = getLockIndex(tid); + if (!_locks[lock_index].tryLock() && + !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && + !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { + return false; + } + _jfr.recordTaskBlockDeferred(lock_index, tid, event); + _locks[lock_index].unlock(); + return true; +} + void Profiler::recordExternalSample(u64 weight, int tid, int num_frames, ASGCT_CallFrame *frames, bool truncated, jint event_type, Event *event) { @@ -1142,6 +1254,8 @@ Error Profiler::start(Arguments &args, bool reset) { assert(current != nullptr); int slot_id = _thread_filter.registerThread(); current->setFilterSlotId(slot_id); + _thread_filter.setVMThread(slot_id, VM::isHotspot() ? VMThread::current() : nullptr); + _thread_filter.setProfiledThread(slot_id, current); _thread_filter.remove(slot_id); // Remove from filtering initially (matches onThreadStart behavior) } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index d9dc28ac7..12ea3ad9d 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -337,6 +337,8 @@ class alignas(alignof(SpinLock)) Profiler { void recordWallClockEpoch(int tid, WallClockEpochEvent *event); void recordTraceRoot(int tid, TraceRootEvent *event); void recordQueueTime(int tid, QueueTimeEvent *event); + bool recordTaskBlockLive(int tid, TaskBlockEvent *event); + bool recordTaskBlockDeferred(int tid, TaskBlockEvent *event); void writeLog(LogLevel level, const char *message); void writeLog(LogLevel level, const char *message, size_t len); void writeDatadogProfilerSetting(int tid, int length, const char *name, @@ -366,6 +368,20 @@ class alignas(alignof(SpinLock)) Profiler { instance()->onThreadEnd(jvmti, jni, thread); } + static void JNICALL ObjectWaitEnter(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object, jlong timeout); + static void JNICALL ObjectWaitExit(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object, jboolean timed_out); + + // Synchronized contention: thread blocks on entering a contested monitor + // (HotSpot OSThreadState::MONITOR_WAIT). Driven by JVMTI MonitorContendedEnter + // (thread joins entry queue) and MonitorContendedEntered (thread acquired the + // lock). Different population than ObjectWait* above, which observes Object.wait(). + static void JNICALL MonitorContendedEnter(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object); + static void JNICALL MonitorContendedExit(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, + jobject object); + // Keep backward compatibility with the upstream async-profiler inline CodeCache* findLibraryByAddress(const void *address) { #ifdef DEBUG diff --git a/ddprof-lib/src/main/cpp/taskBlockRecorder.cpp b/ddprof-lib/src/main/cpp/taskBlockRecorder.cpp new file mode 100644 index 000000000..a6289b8d3 --- /dev/null +++ b/ddprof-lib/src/main/cpp/taskBlockRecorder.cpp @@ -0,0 +1,97 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "taskBlockRecorder.h" + +#include "context_api.h" +#include "counters.h" +#include "event.h" +#include "profiler.h" +#include "tsc.h" +#include "wallClockCounters.h" + +namespace { + +const u64 kMinTaskBlockNanos = 1000000; // 1 ms + +bool exceedsMinTaskBlockDuration(u64 start_ticks, u64 end_ticks) { + // TSC::frequency() is stable after profiler startup; cache the converted + // threshold once per process. C++17 guarantees thread-safe initialization + // of function-local statics. + static const u64 min_ticks = + (TSC::frequency() * kMinTaskBlockNanos) / 1000000000ULL; + return end_ticks > start_ticks && (end_ticks - start_ticks) >= min_ticks; +} + +void incrementSpanZeroSkip() { + Counters::increment(TASK_BLOCK_SKIPPED_SPAN_ZERO); + WallClockCounters::incrementTaskBlockSkippedSpanZero(); +} + +void incrementTooShortSkip() { + Counters::increment(TASK_BLOCK_SKIPPED_TOO_SHORT); + WallClockCounters::incrementTaskBlockSkippedTooShort(); +} + +void incrementEmit() { + Counters::increment(TASK_BLOCK_EMITTED); + WallClockCounters::incrementTaskBlockEmitted(); +} + +} // namespace + +bool recordTaskBlockLiveIfEligible(int tid, u64 start_ticks, u64 end_ticks, + u64 blocker, u64 unblocking_span_id) { + // Peek at the OTEP TLS via the cheap span-id reader. If the sidecar is mid-detach/attach + // (valid=0) or no span is active, skip emission — matches the validity gate that + // writeCurrentContext would apply at JFR write time. + u64 span_id = 0; + u64 root_span_id = 0; + if (!ContextApi::get(span_id, root_span_id) || span_id == 0) { + incrementSpanZeroSkip(); + return false; + } + if (!exceedsMinTaskBlockDuration(start_ticks, end_ticks)) { + incrementTooShortSkip(); + return false; + } + + TaskBlockEvent event{}; + event._start = start_ticks; + event._end = end_ticks; + event._blocker = blocker; + event._unblockingSpanId = unblocking_span_id; + // _ctx is unused on the live path; the writer pulls context from TLS via writeCurrentContext. + if (Profiler::instance()->recordTaskBlockLive(tid, &event)) { + incrementEmit(); + return true; + } + return false; +} + +bool recordTaskBlockDeferredIfEligible(int tid, u64 start_ticks, u64 end_ticks, + const Context& context, u64 blocker, + u64 unblocking_span_id) { + if (context.spanId == 0) { + incrementSpanZeroSkip(); + return false; + } + if (!exceedsMinTaskBlockDuration(start_ticks, end_ticks)) { + incrementTooShortSkip(); + return false; + } + + TaskBlockEvent event{}; + event._start = start_ticks; + event._end = end_ticks; + event._blocker = blocker; + event._unblockingSpanId = unblocking_span_id; + event._ctx = context; + if (Profiler::instance()->recordTaskBlockDeferred(tid, &event)) { + incrementEmit(); + return true; + } + return false; +} diff --git a/ddprof-lib/src/main/cpp/taskBlockRecorder.h b/ddprof-lib/src/main/cpp/taskBlockRecorder.h new file mode 100644 index 000000000..e086ac782 --- /dev/null +++ b/ddprof-lib/src/main/cpp/taskBlockRecorder.h @@ -0,0 +1,28 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _TASK_BLOCK_RECORDER_H +#define _TASK_BLOCK_RECORDER_H + +#include "context.h" + +/** + * Live-capture path: span context is read from the current OTEP TLS at JFR write time + * (mirrors recordQueueTime). Used by the synchronous Java `recordTaskBlock` JNI entry. + * Eligibility (non-zero span, minimum duration) is checked using a TLS peek via ContextApi::get. + */ +bool recordTaskBlockLiveIfEligible(int tid, u64 start_ticks, u64 end_ticks, + u64 blocker, u64 unblocking_span_id); + +/** + * Deferred-capture path: span context was snapshotted earlier on this same thread (e.g. at + * parkEnter or the JVMTI MonitorWait callback, which observes Object.wait()) and is emitted + * via writeContextSnapshot. Used by parkExit and the JVMTI MonitorWaited callback. + */ +bool recordTaskBlockDeferredIfEligible(int tid, u64 start_ticks, u64 end_ticks, + const Context& context, u64 blocker, + u64 unblocking_span_id); + +#endif // _TASK_BLOCK_RECORDER_H diff --git a/ddprof-lib/src/main/cpp/thread.cpp b/ddprof-lib/src/main/cpp/thread.cpp index 0490f6a08..16f482fc0 100644 --- a/ddprof-lib/src/main/cpp/thread.cpp +++ b/ddprof-lib/src/main/cpp/thread.cpp @@ -64,7 +64,6 @@ void ProfiledThread::release() { } } - int ProfiledThread::currentTid() { ProfiledThread *tls = current(); if (tls != NULL) { diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index 53357f87c..f7e239e5b 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -1,5 +1,5 @@ /* - * Copyright 2025, Datadog, Inc. + * Copyright 2025, 2026, Datadog, Inc. * SPDX-License-Identifier: Apache-2.0 */ @@ -7,9 +7,11 @@ #define _THREAD_H #include "context.h" +#include "context_api.h" #include "otel_context.h" #include "os.h" #include "threadLocalData.h" +#include "threadState.h" #include "unwindStats.h" #include #include @@ -19,7 +21,7 @@ #include #include -class ProfiledThread : public ThreadLocalData { +class ProfiledThread : public ThreadLocalData { public: enum ThreadType : u32 { TYPE_UNKNOWN = 0, @@ -28,6 +30,15 @@ class ProfiledThread : public ThreadLocalData { TYPE_MASK = TYPE_JAVA_THREAD | TYPE_NOT_JAVA_THREAD }; + static constexpr u32 FLAG_PARKED = 0x4u; + static constexpr u32 FLAG_PARKED_WITH_SPAN = 0x8u; + + enum class ParkedWallClockState : u32 { + NOT_PARKED = 0, + PARKED_SPANLESS = 1, + PARKED_ACTIVE_SPAN = 2 + }; + private: // We are allowing several levels of nesting because we can be // eg. in a crash handler when wallclock signal kicks in, @@ -53,6 +64,21 @@ class ProfiledThread : public ThreadLocalData { u64 _call_trace_id; u32 _recording_epoch; u32 _misc_flags; + u64 _park_start_ticks; + Context _park_context; + u64 _object_wait_start_ticks; + Context _object_wait_context; + u64 _object_wait_blocker; + // Written on the waiting thread from JVMTI MonitorWait/MonitorWaited (which + // observe Object.wait()); wall-clock path reads. + bool _object_wait_active; + // JVMTI MonitorContendedEnter/MonitorContendedEntered (synchronized contention, + // OSThreadState::MONITOR_WAIT). Different population than Object.wait above — + // the thread is parked in the monitor's entry queue, not its wait set. + u64 _monitor_contended_start_ticks; + Context _monitor_contended_context; + u64 _monitor_contended_blocker; + bool _monitor_contended_active; int _filter_slot_id; // Slot ID for thread filtering uint8_t _init_window; // Countdown for JVM thread init race window (PROF-13072) UnwindFailures _unwind_failures; @@ -70,9 +96,23 @@ class ProfiledThread : public ThreadLocalData { alignas(8) u32 _otel_tag_encodings[DD_TAGS_CAPACITY]; u64 _otel_local_root_span_id; + // Wall-clock once-per-run filter state used to live here (per ProfiledThread). It now + // lives on ThreadFilter::Slot — the wall-clock timer needs to read it cross-thread for + // its IPI fast-path, and ProfiledThread instances are deleted on thread exit, which + // made the cross-thread deref a use-after-free. ThreadFilter::Slot instances are + // JVM-process stable, eliminating the hazard. The handler still writes the state + // (via current->filterSlotId() → threadFilter()->slotForId() → markSampledThisRun/ + // resetSampledRun); see WallClockASGCT::signalHandler in wallClock.cpp. + ProfiledThread(int tid) : ThreadLocalData(), _pc(0), _sp(0), _span_id(0), _crash_depth(0), _tid(tid), _cpu_epoch(0), - _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _init_window(0), + _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), + _park_start_ticks(0), _park_context{}, + _object_wait_start_ticks(0), _object_wait_context{}, _object_wait_blocker(0), + _object_wait_active(false), + _monitor_contended_start_ticks(0), _monitor_contended_context{}, + _monitor_contended_blocker(0), _monitor_contended_active(false), + _filter_slot_id(-1), _init_window(0), _otel_ctx_initialized(false), _crash_protection_active(false), _otel_ctx_record{}, _otel_tag_encodings{}, _otel_local_root_span_id(0) {}; @@ -239,6 +279,94 @@ class ProfiledThread : public ThreadLocalData { _otel_local_root_span_id = 0; } + inline void parkEnter(u64 start_ticks) { + // Snapshot the OTEP TLS context atomically (spanId/rootSpanId/tags captured under the + // single validity gate inside ContextApi::snapshot). This replaces the prior hand-rolled + // tag loop and removes the JNI-passed spanId/rootSpanId override path. + _park_context = ContextApi::snapshot(); + _park_start_ticks = start_ticks; + // Transition both park flags in a single CAS-driven RMW so the signal + // handler can never observe a torn intermediate state (FLAG_PARKED set + // with the wrong FLAG_PARKED_WITH_SPAN value, or vice versa). Other bits + // of _misc_flags (thread type, etc.) are preserved across the update. + const bool with_span = _park_context.spanId != 0; + const u32 set_bits = with_span ? (FLAG_PARKED | FLAG_PARKED_WITH_SPAN) : FLAG_PARKED; + const u32 clear_bits = with_span ? 0u : FLAG_PARKED_WITH_SPAN; + u32 expected = __atomic_load_n(&_misc_flags, __ATOMIC_RELAXED); + u32 desired; + do { + desired = (expected & ~clear_bits) | set_bits; + } while (!__atomic_compare_exchange_n(&_misc_flags, &expected, desired, + /*weak=*/true, + __ATOMIC_RELEASE, __ATOMIC_RELAXED)); + } + + inline bool parkExit(u64 &start_ticks, Context &park_context) { + u32 prev = __atomic_fetch_and(&_misc_flags, ~(FLAG_PARKED | FLAG_PARKED_WITH_SPAN), __ATOMIC_ACQ_REL); + if ((prev & FLAG_PARKED) == 0) { + return false; + } + start_ticks = _park_start_ticks; + park_context = _park_context; + return true; + } + + inline bool isParkedForWallclock() const { + return (__atomic_load_n(&_misc_flags, __ATOMIC_ACQUIRE) & FLAG_PARKED) != 0; + } + + inline ParkedWallClockState parkedWallClockState() const { + u32 flags = __atomic_load_n(&_misc_flags, __ATOMIC_ACQUIRE); + if ((flags & FLAG_PARKED) == 0) { + return ParkedWallClockState::NOT_PARKED; + } + return (flags & FLAG_PARKED_WITH_SPAN) != 0 + ? ParkedWallClockState::PARKED_ACTIVE_SPAN + : ParkedWallClockState::PARKED_SPANLESS; + } + + inline void objectWaitEnter(u64 start_ticks, const Context& context, u64 blocker) { + _object_wait_start_ticks = start_ticks; + _object_wait_context = context; + _object_wait_blocker = blocker; + _object_wait_active = true; + } + + inline bool objectWaitExit(u64& start_ticks, Context& context, u64& blocker) { + if (!_object_wait_active) { + return false; + } + _object_wait_active = false; + start_ticks = _object_wait_start_ticks; + context = _object_wait_context; + blocker = _object_wait_blocker; + return true; + } + + inline void monitorContendedEnter(u64 start_ticks, const Context& context, u64 blocker) { + _monitor_contended_start_ticks = start_ticks; + _monitor_contended_context = context; + _monitor_contended_blocker = blocker; + _monitor_contended_active = true; + } + + inline bool monitorContendedExit(u64& start_ticks, Context& context, u64& blocker) { + if (!_monitor_contended_active) { + return false; + } + _monitor_contended_active = false; + start_ticks = _monitor_contended_start_ticks; + context = _monitor_contended_context; + blocker = _monitor_contended_blocker; + return true; + } + + // Wall-clock once-per-run filter accessors live on ThreadFilter::Slot + // (see threadFilter.h Slot::sampledThisRun/lastSampledState/markSampledThisRun/ + // resetSampledRun). Reaching them from the signal handler: + // ThreadFilter::Slot* slot = + // Profiler::instance()->threadFilter()->slotForId(current->filterSlotId()); + Context snapshotContext(size_t numAttrs); private: diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 77e6dfb53..95f457820 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -21,7 +21,9 @@ #include "threadFilter.h" #include "arch.h" +#include "hotspot/vmStructs.h" #include "os.h" +#include "thread.h" #include #include #include @@ -78,6 +80,8 @@ void ThreadFilter::initializeChunk(int chunk_idx) { ChunkStorage* new_chunk = new ChunkStorage(); for (auto& slot : new_chunk->slots) { slot.value.store(-1, std::memory_order_relaxed); + slot.vm_thread.store(nullptr, std::memory_order_relaxed); + slot.profiled_thread.store(nullptr, std::memory_order_relaxed); } // Try to install it atomically @@ -197,6 +201,8 @@ void ThreadFilter::remove(SlotID slot_id) { } chunk->slots[slot_idx].value.store(-1, std::memory_order_release); + chunk->slots[slot_idx].vm_thread.store(nullptr, std::memory_order_release); + chunk->slots[slot_idx].profiled_thread.store(nullptr, std::memory_order_release); } void ThreadFilter::unregisterThread(SlotID slot_id) { @@ -284,6 +290,58 @@ void ThreadFilter::collect(std::vector& tids) const { } } +// Cleared to nullptr on thread unregister/end before the filter slot is reclaimed so the +// wall-clock timer never follows a stale VMThread* after native teardown ordering completes. +void ThreadFilter::setVMThread(SlotID slot_id, VMThread* vm_thread) { + if (slot_id < 0) return; + int chunk_idx = slot_id >> kChunkShift; + int slot_idx = slot_id & kChunkMask; + ChunkStorage* chunk = _chunks[chunk_idx].load(std::memory_order_acquire); + if (chunk != nullptr) { + chunk->slots[slot_idx].vm_thread.store(vm_thread, std::memory_order_release); + } +} + +void ThreadFilter::setProfiledThread(SlotID slot_id, ProfiledThread* profiled_thread) { + if (slot_id < 0) return; + int chunk_idx = slot_id >> kChunkShift; + int slot_idx = slot_id & kChunkMask; + ChunkStorage* chunk = _chunks[chunk_idx].load(std::memory_order_acquire); + if (chunk != nullptr) { + Slot& slot = chunk->slots[slot_idx]; + // Slot reuse: clear stale once-per-run filter state left by the previous occupant + // BEFORE publishing the new profiled_thread. Without this, a slot whose previous + // owner exited mid-park (sampledThisRun=true, lastSampledState=CONDVAR_WAIT) could + // stuck-suppress the new owner's first park signal, since the timer's fast path + // would observe a matching state-and-flag pair the handler never gets a chance + // to reset. Done unconditionally on assignment — cheap, idempotent. + if (profiled_thread != nullptr) { + slot.resetSampledRun(OSThreadState::UNKNOWN); + } + slot.profiled_thread.store(profiled_thread, std::memory_order_release); + } +} + +void ThreadFilter::collectWithState(std::vector& entries) const { + entries.clear(); + entries.reserve(512); + + int num_chunks = _num_chunks.load(std::memory_order_relaxed); + for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { + ChunkStorage* chunk = _chunks[chunk_idx].load(std::memory_order_acquire); + if (chunk == nullptr) continue; + + for (const auto& slot : chunk->slots) { + int slot_tid = slot.value.load(std::memory_order_acquire); + if (slot_tid != -1) { + VMThread* vm = slot.vm_thread.load(std::memory_order_acquire); + ProfiledThread* pt = slot.profiled_thread.load(std::memory_order_acquire); + entries.push_back({slot_tid, vm, pt, &slot}); + } + } + } +} + void ThreadFilter::init(const char* filter) { // Simple logic: any filter value (including "0") enables filtering // Only explicitly registered threads via addThread() will be sampled diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index 2440d4351..19b30e9f9 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -23,6 +23,11 @@ #include #include "arch.h" +#include "threadState.h" + +class VMThread; +class ProfiledThread; +struct ThreadEntry; // defined after ThreadFilter; carries a pointer to a ThreadFilter::Slot class ThreadFilter { public: @@ -37,6 +42,66 @@ class ThreadFilter { // High-performance free list using Treiber stack, 64 shards static constexpr int kFreeListSize = 1024; // power-of-two for fast modulo static constexpr int kShardCount = 64; // power-of-two for fast modulo + + // Optimized slot structure with padding to avoid false sharing. + // Pointers are placed before the int to avoid implicit alignment padding between them. + // + // Lifetime: Slot instances live inside ChunkStorage arrays that are allocated on first + // use and never freed for the JVM-process lifetime. A captured `Slot*` is therefore + // dereferenceable for the rest of the process, regardless of whether the thread that + // currently owns the slot has exited. This is the lifetime guarantee the wall-clock + // timer relies on for its once-per-run fast-path (sampledThisRun/lastSampledState are + // read cross-thread). + struct alignas(DEFAULT_CACHE_LINE_SIZE) Slot { + std::atomic vm_thread{nullptr}; // 8 bytes + std::atomic profiled_thread{nullptr}; // 8 bytes + std::atomic value{-1}; // 4 bytes + // Wall-clock once-per-run filter state (wallprecheck=true). Written by the + // SIGVTALRM handler on the slot's owning thread; read by both the handler and + // the wall-clock timer thread. Reset on slot reuse via resetSampledRun() in + // setProfiledThread() when a new ProfiledThread is associated with the slot. + // + // Memory ordering: the flag uses release/acquire so a reader that observes + // sampledThisRun()==true is guaranteed to also observe the lastSampledState + // value that was stored immediately before the flag was set. The state itself + // can use relaxed loads because the && in the reader short-circuits on the + // acquire-load of the flag, fencing any subsequent state read. + std::atomic last_sampled_state{OSThreadState::UNKNOWN}; // 4 bytes + std::atomic sampled_this_run{false}; // 1 byte + char padding[DEFAULT_CACHE_LINE_SIZE + - sizeof(std::atomic) + - sizeof(std::atomic) + - sizeof(std::atomic) + - sizeof(std::atomic) + - sizeof(std::atomic)]; + + inline bool sampledThisRun() const { + return sampled_this_run.load(std::memory_order_acquire); + } + inline OSThreadState lastSampledState() const { + return last_sampled_state.load(std::memory_order_relaxed); + } + // Order matters: store the state under relaxed first, then publish the flag + // with release. Readers do the inverse: acquire on the flag, then relaxed + // read on the state. This pairing makes the "state-paired-with-flag" + // invariant portable across weakly-ordered architectures (ARM64, POWER, etc.), + // not just TSO. Stale reads on a flag observed as false fall through to + // the unfenced authoritative check in the signal handler. + inline void markSampledThisRun(OSThreadState state) { + last_sampled_state.store(state, std::memory_order_relaxed); + sampled_this_run.store(true, std::memory_order_release); + } + inline void resetSampledRun(OSThreadState state) { + last_sampled_state.store(state, std::memory_order_relaxed); + sampled_this_run.store(false, std::memory_order_release); + } + }; + static_assert(sizeof(Slot) == DEFAULT_CACHE_LINE_SIZE, "Slot must be exactly one cache line"); + static_assert(std::atomic::is_always_lock_free, + "Slot::last_sampled_state must be lock-free for signal-handler safety"); + static_assert(std::atomic::is_always_lock_free, + "Slot::sampled_this_run must be lock-free for signal-handler safety"); + ThreadFilter(); ~ThreadFilter(); @@ -48,17 +113,26 @@ class ThreadFilter { void add(int tid, SlotID slot_id); void remove(SlotID slot_id); void collect(std::vector& tids) const; + void setVMThread(SlotID slot_id, VMThread* vm_thread); + void setProfiledThread(SlotID slot_id, ProfiledThread* profiled_thread); + void collectWithState(std::vector& entries) const; + + // Returns the (process-lifetime) Slot pointer for a registered slot_id, or nullptr + // if slot_id is invalid or its chunk has not been allocated. Safe to call from the + // signal handler and the wall-clock timer thread. + inline Slot* slotForId(SlotID slot_id) const { + if (slot_id < 0) return nullptr; + int chunk_idx = slot_id >> kChunkShift; + int slot_idx = slot_id & kChunkMask; + if (chunk_idx >= kMaxChunks) return nullptr; + ChunkStorage* chunk = _chunks[chunk_idx].load(std::memory_order_acquire); + return chunk != nullptr ? &chunk->slots[slot_idx] : nullptr; + } SlotID registerThread(); void unregisterThread(SlotID slot_id); private: - // Optimized slot structure with padding to avoid false sharing - struct alignas(DEFAULT_CACHE_LINE_SIZE) Slot { - std::atomic value{-1}; - char padding[DEFAULT_CACHE_LINE_SIZE - sizeof(value)]; // Pad to cache line size - }; - static_assert(sizeof(Slot) == DEFAULT_CACHE_LINE_SIZE, "Slot must be exactly one cache line"); // Lock-free free list using a stack-like structure struct FreeListNode { @@ -93,4 +167,16 @@ class ThreadFilter { SlotID popFromFreeList(); }; +// Snapshot captured by ThreadFilter::collectWithState. The `slot` pointer is JVM-process +// stable (see Slot lifetime comment above) and is the safe channel for the wall-clock +// timer to read once-per-run filter state cross-thread — `profiled_thread` is kept for +// other consumers but MUST NOT be dereferenced from the timer (its lifetime ends at +// ProfiledThread::release()). +struct ThreadEntry { + int tid; + VMThread* vm_thread; + ProfiledThread* profiled_thread; + const ThreadFilter::Slot* slot; +}; + #endif // _THREADFILTER_H diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 76fd49c68..730115505 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -375,7 +375,7 @@ bool VM::initLibrary(JavaVM *vm) { return true; } -bool VM::initProfilerBridge(JavaVM *vm, bool attach) { +bool VM::initProfilerBridge(JavaVM *vm, bool attach, bool delegate_monitor_events) { TEST_LOG("VM::initProfilerBridge"); if (!initShared(vm)) { return false; @@ -398,6 +398,30 @@ bool VM::initProfilerBridge(JavaVM *vm, bool attach) { jvmtiCapabilities potential_capabilities = {0}; _jvmti->GetPotentialCapabilities(&potential_capabilities); + // JVMTI MonitorWait / MonitorWaited (which observe Object.wait()) drive + // Profiler::ObjectWaitEnter / ObjectWaitExit for datadog.TaskBlock. JDK < 21 + // always uses the native callbacks because there is no Java-side replacement + // yet. On JDK >= 21 the callbacks are gated off only when the dd-java-agent + // (or another in-process consumer) has signalled via delegate_monitor_events + // that it will take over the population via Java-level instrumentation + // (object-wait module). Without that handshake we fall back to native to + // avoid silently losing coverage on standalone -agentpath: deployments. + // Note: these JVMTI events are named after the ObjectMonitor backing + // Object.wait(), NOT after OSThreadState::MONITOR_WAIT (which means "in the + // entry queue of a contested synchronized block"). + const bool enable_jvmti_object_wait_callbacks = + java_version() < 21 || !delegate_monitor_events; + + // JVMTI MonitorContendedEnter / MonitorContendedEntered observe synchronized + // contention (HotSpot OSThreadState::MONITOR_WAIT) — a different population + // than Object.wait() above. Both populations share the + // can_generate_monitor_events capability (requesting it enables per-monitor + // bytecode instrumentation on HotSpot >= 21), so the gate semantics are the + // same: native is on whenever there is no Java-side delegate, and is off + // when the agent owns the population. The flag is named separately to keep + // the populations explicit and to allow independent widening if a JDK 21+ + // replacement lands for only one of them. + const bool enable_jvmti_monitor_contended_callbacks = enable_jvmti_object_wait_callbacks; _can_sample_objects = potential_capabilities.can_generate_sampled_object_alloc_events && @@ -421,7 +445,16 @@ bool VM::initProfilerBridge(JavaVM *vm, bool attach) { capabilities.can_get_source_file_name = 1; capabilities.can_get_line_numbers = 1; capabilities.can_generate_compiled_method_load_events = 1; - capabilities.can_generate_monitor_events = 1; + // Match the JVMTI MonitorWait/MonitorWaited (Object.wait) and + // MonitorContendedEnter/MonitorContendedEntered (synchronized contention) + // callback gating below: requesting can_generate_monitor_events on HotSpot + // enables per-monitor bytecode instrumentation even when no notifications + // are subscribed, so JDK 21+ (where we rely on Java-level instrumentation + // for Object.wait and have no contention replacement yet) should not + // request it. + if (enable_jvmti_object_wait_callbacks || enable_jvmti_monitor_contended_callbacks) { + capabilities.can_generate_monitor_events = 1; + } capabilities.can_tag_objects = 1; _jvmti->AddCapabilities(&capabilities); @@ -435,6 +468,14 @@ bool VM::initProfilerBridge(JavaVM *vm, bool attach) { callbacks.DynamicCodeGenerated = JitCodeCache::DynamicCodeGenerated; callbacks.ThreadStart = Profiler::ThreadStart; callbacks.ThreadEnd = Profiler::ThreadEnd; + if (enable_jvmti_object_wait_callbacks) { + callbacks.MonitorWait = Profiler::ObjectWaitEnter; + callbacks.MonitorWaited = Profiler::ObjectWaitExit; + } + if (enable_jvmti_monitor_contended_callbacks) { + callbacks.MonitorContendedEnter = Profiler::MonitorContendedEnter; + callbacks.MonitorContendedEntered = Profiler::MonitorContendedExit; + } callbacks.SampledObjectAlloc = ObjectSampler::SampledObjectAlloc; callbacks.GarbageCollectionFinish = LivenessTracker::GarbageCollectionFinish; callbacks.NativeMethodBind = VMStructs::NativeMethodBind; @@ -448,6 +489,18 @@ bool VM::initProfilerBridge(JavaVM *vm, bool attach) { JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_NATIVE_METHOD_BIND, NULL); + if (enable_jvmti_object_wait_callbacks) { + _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_WAIT, + NULL); + _jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_MONITOR_WAITED, + NULL); + } + if (enable_jvmti_monitor_contended_callbacks) { + _jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_MONITOR_CONTENDED_ENTER, NULL); + _jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_MONITOR_CONTENDED_ENTERED, NULL); + } if (hotspot_version() == 0 || !CodeHeap::available()) { // Workaround for JDK-8173361: avoid CompiledMethodLoad events when possible diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index 6be4c87bb..d0a440f2b 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -149,7 +149,13 @@ class VM { static JVM_GetManagement _getManagement; static bool initLibrary(JavaVM *vm); - static bool initProfilerBridge(JavaVM *vm, bool attach); + // delegate_monitor_events: when true on JDK 21+, the wall-clock JVMTI monitor callbacks + // (MonitorWait/Waited, MonitorContendedEnter/Entered) are suppressed because the Java-side + // dd-trace-java instrumentation modules (object-wait, synchronized-contention) cover the same + // populations without paying the can_generate_monitor_events per-monitor bytecode + // instrumentation cost. On JDK < 21 this flag is ignored — there is no Java-side fallback + // for those populations yet, so native callbacks always engage. + static bool initProfilerBridge(JavaVM *vm, bool attach, bool delegate_monitor_events = false); static jvmtiEnv *jvmti() { return _jvmti; } diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index a95aef42d..6e24b738e 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -19,6 +19,7 @@ #include "thread.h" #include "threadState.inline.h" #include "guards.h" +#include "wallClockCounters.h" #include #include #include @@ -87,6 +88,38 @@ void WallClockASGCT::signalHandler(int signo, siginfo_t *siginfo, void *ucontext current->tickInitWindow(); return; } + // Wall-clock once-per-run filter (wallprecheck=true): emit exactly one + // MethodSample at the first signal of a SLEEPING / CONDVAR_WAIT run on this + // thread, suppress subsequent signals until the OS state leaves the skip set. + // State lives on the JVM-process-stable ThreadFilter::Slot (not on ProfiledThread, + // which is deleted on thread exit), so the wall-clock timer can safely read these + // bits cross-thread for its fast-path skip without a use-after-free hazard. + // The osThreadState read is HotSpot-only; on J9/Zing getOSThreadState() returns + // UNKNOWN so the filter is a no-op (every signal falls through to emission, same + // as wallprecheck=false). + if (current != nullptr && _precheck) { + ThreadFilter::Slot* slot = + Profiler::instance()->threadFilter()->slotForId(current->filterSlotId()); + if (slot != nullptr) { + OSThreadState precheck_state = getOSThreadState(); + bool is_blocked_skip_state = (precheck_state == OSThreadState::SLEEPING || + precheck_state == OSThreadState::CONDVAR_WAIT); + if (is_blocked_skip_state) { + if (slot->sampledThisRun() && precheck_state == slot->lastSampledState()) { + Counters::increment(WC_SIGNAL_SUPPRESSED_SAMPLED_RUN); + WallClockCounters::incrementSuppressedSampledRun(); + return; // suppress emission — same blocked-state run, already armed + } + // First signal of this blocked-state run — arm the filter and emit. + // Transitions within the skip set (e.g. SLEEPING → CONDVAR_WAIT) re-arm + // because the state-equality check above gates suppression on identity. + slot->markSampledThisRun(precheck_state); + } else { + // Out of the skip set: reset so the next blocked-state entry emits. + slot->resetSampledRun(precheck_state); + } + } + } int tid = current != NULL ? current->tid() : OS::threadId(); Shims::instance().setSighandlerTid(tid); u64 call_trace_id = 0; @@ -165,6 +198,7 @@ bool BaseWallClock::isEnabled() const { void WallClockASGCT::initialize(Arguments& args) { _collapsing = args._wall_collapsing; + _precheck = args._wall_precheck; // J9WallClock uses JVMTI GetAllStackTracesExtended polling, not SIGVTALRM // signals — it has no sharedSignalHandler and needs no signal-origin gate. // Engines are started sequentially; this call is idempotent (no-op after first). @@ -173,41 +207,78 @@ void WallClockASGCT::initialize(Arguments& args) { } void WallClockASGCT::timerLoop() { - // todo: re-allocating the vector every time is not efficient - auto collectThreads = [&](std::vector& tids) { - // Get thread IDs from the filter if it's enabled - // Otherwise list all threads in the system + auto collectThreads = [&](std::vector& entries) { if (Profiler::instance()->threadFilter()->enabled()) { - Profiler::instance()->threadFilter()->collect(tids); + Profiler::instance()->threadFilter()->collectWithState(entries); } else { ThreadList *thread_list = OS::listThreads(); while (thread_list->hasNext()) { int tid = thread_list->next(); - // Don't include the current thread if (tid != OS::threadId()) { - tids.push_back(tid); + // No-filter mode: vm_thread / profiled_thread / slot are all nullptr. + // The wallprecheck fast path below gates on (vm_thread && slot) so it + // simply degrades to "always send the signal" here, matching prior behavior. + entries.push_back({tid, nullptr, nullptr, nullptr}); } } delete thread_list; } }; - auto sampleThreads = [&](int tid, int& num_failures, int& threads_already_exited, int& permission_denied) { - // Deliver SIGVTALRM with a cookie so our handler can distinguish this - // signal from any other in-process sender (see signalCookie.h / - // wallClock.cpp sharedSignalHandler). - if (!OS::sendSignalWithCookie(tid, SIGVTALRM, SignalCookie::wallclock())) { + auto sampleThreads = [&](ThreadEntry entry, int& num_failures, int& threads_already_exited, + int& permission_denied) { + // Safety of dereferencing entry.vm_thread / entry.slot here relies on: + // 1) collectWithState uses acquire loads when reading the filter slot, so once + // ThreadFilter::setVMThread(slot, nullptr) has been published the timer + // thread observes a null vm_thread for any *future* tick. The same applies + // to the slot pointer (set to nullptr in the no-filter path; never cleared + // in the filtered path because slots are JVM-lifetime stable). + // 2) On HotSpot, JavaThread instances are not freed when the underlying OS + // thread exits; they remain valid for the JVM process lifetime. The pointer + // captured into the local entry above stays dereferenceable for the duration + // of this tick even if the target thread detached after collectWithState ran. + // 3) ThreadFilter::Slot instances live inside ChunkStorage arrays that are + // allocated on first use and never freed for the JVM-process lifetime, so + // a captured Slot* is dereferenceable forever. This is the lifetime + // guarantee that makes the cross-thread once-per-run read below safe — we + // deliberately do NOT touch entry.profiled_thread here (it would be a UAF + // hazard because ProfiledThread is deleted in ProfiledThread::release() + // after the slot is cleared, but the timer has already captured the stale + // pointer into its local snapshot). + // + // Wall-clock once-per-run filter — timer-thread fast path (wallprecheck=true): + // elide pthread_kill / kernel IPI when the candidate is already in a blocked-state + // run we have armed a sample for. Mirrors the handler-side filter (which still runs + // as the authoritative correctness check for any signal that reaches it — covers the + // race window where the state read here was stale or the worker's run transitioned + // between this peek and signal delivery). The "already armed" bit is written by the + // handler on the owning thread with release ordering; the timer reads it with + // acquire ordering, fencing the subsequent relaxed read of the paired state. + if (_precheck && entry.vm_thread != nullptr && entry.slot != nullptr) { + OSThreadState peek_state = entry.vm_thread->osThreadState(); + if (peek_state == OSThreadState::SLEEPING || peek_state == OSThreadState::CONDVAR_WAIT) { + // Read order matters: sampledThisRun() (acquire) must come first so the + // && short-circuit fences the subsequent relaxed load of lastSampledState(). + if (entry.slot->sampledThisRun() && + peek_state == entry.slot->lastSampledState()) { + Counters::increment(WC_SIGNAL_SUPPRESSED_SAMPLED_RUN); + WallClockCounters::incrementSuppressedSampledRun(); + return true; // intentional skip — not a sampling failure + } + } + } + if (!OS::sendSignalWithCookie(entry.tid, SIGVTALRM, SignalCookie::wallclock())) { num_failures++; if (errno != 0) { if (errno == ESRCH) { - threads_already_exited++; + threads_already_exited++; } else if (errno == EPERM) { - permission_denied++; + permission_denied++; } else if (errno == EAGAIN) { - // Signal queue limit (RLIMIT_SIGPENDING) reached; signal was not - // delivered — count as missed sample. + // Signal queue limit (RLIMIT_SIGPENDING) reached; not a permission error. + Counters::increment(WC_SIGNAL_QUEUE_FULL); } else { - Log::debug("unexpected error %s", strerror(errno)); + Log::debug("unexpected error %s", strerror(errno)); } } return false; @@ -218,5 +289,5 @@ void WallClockASGCT::timerLoop() { auto doNothing = []() { }; - timerLoopCommon(collectThreads, sampleThreads, doNothing, _reservoir_size, _interval); + timerLoopCommon(collectThreads, sampleThreads, doNothing, _reservoir_size, _interval); } diff --git a/ddprof-lib/src/main/cpp/wallClock.h b/ddprof-lib/src/main/cpp/wallClock.h index 89338fbaf..28ec2eb22 100644 --- a/ddprof-lib/src/main/cpp/wallClock.h +++ b/ddprof-lib/src/main/cpp/wallClock.h @@ -16,6 +16,7 @@ #include "threadFilter.h" #include "threadState.h" #include "tsc.h" +#include "wallClockCounters.h" class BaseWallClock : public Engine { private: @@ -81,16 +82,24 @@ class BaseWallClock : public Engine { int num_failures = 0; int threads_already_exited = 0; int permission_denied = 0; + u32 num_successful_samples = 0; std::vector sample = reservoir.sample(threads); for (ThreadType thread : sample) { - if (!sampleThreads(thread, num_failures, threads_already_exited, permission_denied)) { - continue; + if (sampleThreads(thread, num_failures, threads_already_exited, permission_denied)) { + num_successful_samples++; } } epoch.updateNumSamplableThreads(threads.size()); epoch.updateNumFailedSamples(num_failures); - epoch.updateNumSuccessfulSamples(sample.size() - num_failures); + epoch.updateNumSuccessfulSamples(num_successful_samples); + WallClockCounterSnapshot counter_snapshot = WallClockCounters::drain(); + epoch.updateNumSuppressedSampledRun(counter_snapshot.suppressed_sampled_run); + epoch.updateNumSkippedParkedSpanless(counter_snapshot.skipped_parked_spanless); + epoch.updateNumSkippedParkedActiveSpan(counter_snapshot.skipped_parked_active_span); + epoch.updateNumTaskBlockEmitted(counter_snapshot.task_block_emitted); + epoch.updateNumTaskBlockSkippedSpanZero(counter_snapshot.task_block_skipped_span_zero); + epoch.updateNumTaskBlockSkippedTooShort(counter_snapshot.task_block_skipped_too_short); epoch.updateNumExitedThreads(threads_already_exited); epoch.updateNumPermissionDenied(permission_denied); u64 endTime = TSC::ticks(); @@ -141,6 +150,7 @@ class BaseWallClock : public Engine { class WallClockASGCT : public BaseWallClock { private: bool _collapsing; + bool _precheck; static bool inSyscall(void* ucontext); @@ -151,7 +161,7 @@ class WallClockASGCT : public BaseWallClock { void timerLoop() override; public: - WallClockASGCT() : BaseWallClock(), _collapsing(false) {} + WallClockASGCT() : BaseWallClock(), _collapsing(false), _precheck(false) {} const char* name() override { return "WallClock (ASGCT)"; } diff --git a/ddprof-lib/src/main/cpp/wallClockCounters.h b/ddprof-lib/src/main/cpp/wallClockCounters.h new file mode 100644 index 000000000..c181a2f7e --- /dev/null +++ b/ddprof-lib/src/main/cpp/wallClockCounters.h @@ -0,0 +1,84 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _WALLCLOCK_COUNTERS_H +#define _WALLCLOCK_COUNTERS_H + +#include "arch.h" + +struct WallClockCounterSnapshot { + u64 skipped_parked_spanless; + u64 skipped_parked_active_span; + u64 task_block_emitted; + u64 task_block_skipped_span_zero; + u64 task_block_skipped_too_short; + u64 suppressed_sampled_run; +}; + +class WallClockCounters { +private: + inline static volatile long long _skipped_parked_spanless = 0; + inline static volatile long long _skipped_parked_active_span = 0; + inline static volatile long long _task_block_emitted = 0; + inline static volatile long long _task_block_skipped_span_zero = 0; + inline static volatile long long _task_block_skipped_too_short = 0; + inline static volatile long long _suppressed_sampled_run = 0; + + /** + * Drains one counter to zero. These counters are best-effort diagnostics: + * increments use __ATOMIC_RELAXED on the signal-handler hot path, so on + * weakly-ordered ISAs (e.g. arm64) the acquire exchange below is not + * guaranteed to observe every increment that has not yet propagated. + * Small undercounts across a drain are acceptable and expected; values + * are intended for observability of trends, not exact accounting. + * + * The acquire tag on the exchange is retained as a cheap one-way fence on + * the draining thread so that any subsequent reads on this thread are not + * reordered before the drain. It does NOT form a release/acquire pair + * with the relaxed increments — those carry no ordering of their own — + * so visibility of recent increments relies entirely on cache coherence, + * which is sufficient for trend-level observability. + */ + static u64 drainCounter(volatile long long& counter) { + return (u64)__atomic_exchange_n(&counter, 0, __ATOMIC_ACQUIRE); + } + +public: + static void incrementSkippedParkedSpanless() { + atomicIncRelaxed(_skipped_parked_spanless); + } + + static void incrementSkippedParkedActiveSpan() { + atomicIncRelaxed(_skipped_parked_active_span); + } + + static void incrementTaskBlockEmitted() { + atomicIncRelaxed(_task_block_emitted); + } + + static void incrementTaskBlockSkippedSpanZero() { + atomicIncRelaxed(_task_block_skipped_span_zero); + } + + static void incrementTaskBlockSkippedTooShort() { + atomicIncRelaxed(_task_block_skipped_too_short); + } + + static void incrementSuppressedSampledRun() { + atomicIncRelaxed(_suppressed_sampled_run); + } + + static WallClockCounterSnapshot drain() { + return { + drainCounter(_skipped_parked_spanless), + drainCounter(_skipped_parked_active_span), + drainCounter(_task_block_emitted), + drainCounter(_task_block_skipped_span_zero), + drainCounter(_task_block_skipped_too_short), + drainCounter(_suppressed_sampled_run)}; + } +}; + +#endif // _WALLCLOCK_COUNTERS_H diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index 33e0bdc13..90c497ce3 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -60,7 +60,7 @@ public static JavaProfiler getInstance() throws IOException { * @param scratchDir directory where the bundled library will be exploded before linking */ public static JavaProfiler getInstance(String scratchDir) throws IOException { - return getInstance(null, scratchDir); + return getInstance(null, scratchDir, false); } /** @@ -70,7 +70,31 @@ public static JavaProfiler getInstance(String scratchDir) throws IOException { * @param libLocation the path to the native library to be used instead of the bundled one * @param scratchDir directory where the bundled library will be exploded before linking; ignored when 'libLocation' is {@literal null} */ - public static synchronized JavaProfiler getInstance(String libLocation, String scratchDir) throws IOException { + public static JavaProfiler getInstance(String libLocation, String scratchDir) throws IOException { + return getInstance(libLocation, scratchDir, false); + } + + /** + * Get a {@linkplain JavaProfiler} instance backed by the given native library and using + * the given directory as the scratch where the bundled library will be exploded + * before linking. + * + *

{@code delegateMonitorEvents} lets the in-process Java agent (typically dd-trace-java) + * signal that it will cover the {@code Object.wait} and {@code synchronized} contention + * populations via Java-level bytecode instrumentation. When {@code true} on JDK 21+ the + * native profiler skips requesting the JVMTI {@code can_generate_monitor_events} capability + * and does not register the corresponding callbacks, avoiding HotSpot's per-monitor bytecode + * instrumentation cost. On JDK < 21 the flag is ignored (there is no Java-side fallback + * for those populations yet) and the native callbacks always engage. With the flag + * {@code false} (the default, used by standalone {@code -agentpath:} loads) the native + * callbacks engage on every JDK so coverage is preserved without a Java agent. + * + * @param libLocation the path to the native library to be used instead of the bundled one + * @param scratchDir directory where the bundled library will be exploded before linking; ignored when 'libLocation' is {@literal null} + * @param delegateMonitorEvents see above + */ + public static synchronized JavaProfiler getInstance(String libLocation, String scratchDir, + boolean delegateMonitorEvents) throws IOException { if (instance != null) { return instance; } @@ -80,7 +104,7 @@ public static synchronized JavaProfiler getInstance(String libLocation, String s if (!result.succeeded) { throw new IOException("Failed to load Datadog Java profiler library", result.error); } - init0(); + init0(delegateMonitorEvents); instance = profiler; @@ -277,6 +301,39 @@ public void recordQueueTime(long startTicks, recordQueueEnd0(startTicks, endTicks, task.getName(), scheduler.getName(), origin, queueType.getName(), queueLength); } + /** + * Records a {@code datadog.TaskBlock} interval for the current thread (direct API). + * + *

Span and root-span identity are captured natively from the OTEP TLS sidecar at the JNI + * boundary, matching the {@link #recordQueueTime} convention. No span ids need to cross JNI. + * + * @param startTicks TSC tick at block start + * @param endTicks TSC tick at block end + * @param blocker blocker identity (e.g. monitor hash), or 0 + * @param unblockingSpanId span id of unblocking thread, or 0 + */ + public void recordTaskBlock(long startTicks, + long endTicks, + long blocker, + long unblockingSpanId) { + recordTaskBlock0(startTicks, endTicks, blocker, unblockingSpanId); + } + + /** + * Called before {@code LockSupport.park}; native wall-clock sampling may skip SIGVTALRM for + * this interval. Span context is captured natively from the OTEP TLS sidecar. + */ + public void parkEnter() { + parkEnter0(); + } + + /** + * Called after {@code LockSupport.park}; clears parked state and may emit {@code datadog.TaskBlock}. + */ + public void parkExit(long blocker, long unblockingSpanId) { + parkExit0(blocker, unblockingSpanId); + } + /** * Get the ticks for the current thread. * @return ticks @@ -311,7 +368,7 @@ private static ThreadContext initializeThreadContext() { return new ThreadContext(buffer, metadata); } - private static native boolean init0(); + private static native boolean init0(boolean delegateMonitorEvents); private native void stop0() throws IllegalStateException; private native String execute0(String command) throws IllegalArgumentException, IllegalStateException, IOException; @@ -332,6 +389,12 @@ private static ThreadContext initializeThreadContext() { private static native void recordQueueEnd0(long startTicks, long endTicks, String task, String scheduler, Thread origin, String queueType, int queueLength); + private static native void recordTaskBlock0(long startTicks, long endTicks, long blocker, long unblockingSpanId); + + private static native void parkEnter0(); + + private static native void parkExit0(long blocker, long unblockingSpanId); + private static native long currentTicks0(); private static native long tscFrequency0(); diff --git a/ddprof-lib/src/test/cpp/park_state_ut.cpp b/ddprof-lib/src/test/cpp/park_state_ut.cpp new file mode 100644 index 000000000..052c73804 --- /dev/null +++ b/ddprof-lib/src/test/cpp/park_state_ut.cpp @@ -0,0 +1,144 @@ +/* + * Copyright 2026 Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "thread.h" +#include "threadFilter.h" + +// ProfiledThread::parkEnter is responsible for two things: +// (1) capturing the OTEP TLS context into _park_context via ContextApi::snapshot, and +// (2) atomically transitioning the FLAG_PARKED / FLAG_PARKED_WITH_SPAN bits. +// These unit tests cover (2). The end-to-end context-capture behavior, including the +// stale-snapshot guarantee that tag encodings mutated after parkEnter do not leak into +// the recorded TaskBlock, is exercised by the Java integration test ParkTaskBlockTest +// where real OTEP TLS is set up. + +TEST(ProfiledThreadParkStateTest, ParkFlagLifecycle) { + ProfiledThread *thread = ProfiledThread::forTid(12345); + + EXPECT_FALSE(thread->isParkedForWallclock()); + + thread->parkEnter(777); + EXPECT_TRUE(thread->isParkedForWallclock()); + + u64 start_ticks = 0; + Context park_context = {}; + EXPECT_TRUE(thread->parkExit(start_ticks, park_context)); + EXPECT_EQ(777ULL, start_ticks); + // No TLS context is initialized in this unit test, so the snapshot captured at + // parkEnter is empty (validity-gated) — spanId=0 / tags=0. + EXPECT_EQ(0ULL, park_context.spanId); + EXPECT_EQ(0ULL, park_context.rootSpanId); + EXPECT_FALSE(thread->isParkedForWallclock()); + + // Second exit is a no-op once the parked bit is cleared. + EXPECT_FALSE(thread->parkExit(start_ticks, park_context)); +} + +TEST(ProfiledThreadParkStateTest, ParkedSpanlessWhenNoActiveSpan) { + ProfiledThread *thread = ProfiledThread::forTid(12346); + thread->parkEnter(888); + // ContextApi::snapshot() returns spanId=0 outside an initialized OTEP TLS, so the + // parked-with-span bit must remain clear. + EXPECT_EQ(ProfiledThread::ParkedWallClockState::PARKED_SPANLESS, + thread->parkedWallClockState()); +} + +TEST(ProfiledThreadParkStateTest, NewThreadStartsWithoutParkOrMonitorState) { + ProfiledThread *thread = ProfiledThread::forTid(12347); + + EXPECT_FALSE(thread->isParkedForWallclock()); + EXPECT_EQ(ProfiledThread::ParkedWallClockState::NOT_PARKED, + thread->parkedWallClockState()); + u64 start_ticks = 0; + u64 blocker = 0; + Context out_ctx = {}; + EXPECT_FALSE(thread->objectWaitExit(start_ticks, out_ctx, blocker)); +} + +// Once-per-run filter state lives on ThreadFilter::Slot (JVM-process-stable storage), not +// on ProfiledThread (per-thread heap, freed on exit). Both the SIGVTALRM handler (writer + +// reader on the owning thread) and the wall-clock timer thread (reader, fast-path skip) +// reach it through the slot. Behaviour invariants under wallprecheck=true: +// - First signal of a blocked-state run -> markSampledThisRun(state); handler emits MethodSample. +// - Subsequent signal in the SAME blocked state -> sampledThisRun() && state == lastSampledState(); +// handler suppresses emission. +// - Transition within the skip set (SLEEPING -> CONDVAR_WAIT) -> state != lastSampledState(); +// handler re-arms and emits. +// - Transition out of the skip set -> resetSampledRun(state); next blocked-state entry emits again. +TEST(WallClockOncePerRunFilterTest, SlotStateTransitions) { + ThreadFilter::Slot slot; + + // Initial state: never sampled; state = UNKNOWN. Mirrors a freshly-allocated slot + // (default-constructed via in-class initializers). + EXPECT_FALSE(slot.sampledThisRun()); + EXPECT_EQ(OSThreadState::UNKNOWN, slot.lastSampledState()); + + // First signal of a SLEEPING run: arm + emit. + slot.markSampledThisRun(OSThreadState::SLEEPING); + EXPECT_TRUE(slot.sampledThisRun()); + EXPECT_EQ(OSThreadState::SLEEPING, slot.lastSampledState()); + + // Subsequent signal in the SAME SLEEPING run: suppression decision is + // (sampledThisRun() && state == lastSampledState()). The release-acquire ordering on the + // flag pairs with the relaxed state load so an observer that sees the flag also sees + // a matching state without needing barriers between the two reads. + EXPECT_TRUE(slot.sampledThisRun() && + OSThreadState::SLEEPING == slot.lastSampledState()); + + // Within-skip-set transition SLEEPING -> CONDVAR_WAIT: identity check fails, + // so the handler re-arms and emits again. Re-arming overwrites lastSampledState. + EXPECT_FALSE(slot.sampledThisRun() && + OSThreadState::CONDVAR_WAIT == slot.lastSampledState()); + slot.markSampledThisRun(OSThreadState::CONDVAR_WAIT); + EXPECT_TRUE(slot.sampledThisRun()); + EXPECT_EQ(OSThreadState::CONDVAR_WAIT, slot.lastSampledState()); + + // Transition out of skip set (RUNNABLE): resetSampledRun clears the armed bit so the + // next blocked-state entry will emit again. + slot.resetSampledRun(OSThreadState::RUNNABLE); + EXPECT_FALSE(slot.sampledThisRun()); + EXPECT_EQ(OSThreadState::RUNNABLE, slot.lastSampledState()); + + // Next blocked-state entry after reset: arm + emit again. + slot.markSampledThisRun(OSThreadState::SLEEPING); + EXPECT_TRUE(slot.sampledThisRun()); + EXPECT_EQ(OSThreadState::SLEEPING, slot.lastSampledState()); +} + +// Regression for the slot-reuse stuck-suppression hazard: if Slot retained +// {sampledThisRun=true, lastSampledState=CONDVAR_WAIT} from a previous owner that exited +// mid-park, the timer's fast-path mirror would match the next occupant's park state and +// elide every IPI to it — including the very first one that the handler needs to receive +// in order to reset the armed flag. ThreadFilter::setProfiledThread(slot, non-null) +// is responsible for issuing resetSampledRun(UNKNOWN) at the moment a new ProfiledThread +// is associated with the slot; that path can't be invoked directly from this unit-test +// scope (no Profiler), so the test exercises the same primitive (resetSampledRun) and +// asserts post-conditions equivalent to what new-thread association must establish. +TEST(WallClockOncePerRunFilterTest, ResetClearsArmedFlagOnSlotReuse) { + ThreadFilter::Slot slot; + slot.markSampledThisRun(OSThreadState::CONDVAR_WAIT); + EXPECT_TRUE(slot.sampledThisRun()); + + // Simulate the slot reset that setProfiledThread(slot_id, new ProfiledThread*) issues. + slot.resetSampledRun(OSThreadState::UNKNOWN); + + // Post-conditions a new occupant requires: flag clear AND state distinct from any blocked + // skip-set value, so the very next signal in any state goes through the handler's + // is-blocked-skip-state branch as "first of run" rather than a suppress decision. + EXPECT_FALSE(slot.sampledThisRun()); + EXPECT_EQ(OSThreadState::UNKNOWN, slot.lastSampledState()); +} diff --git a/ddprof-lib/src/test/cpp/wallprecheck_args_ut.cpp b/ddprof-lib/src/test/cpp/wallprecheck_args_ut.cpp new file mode 100644 index 000000000..f993afda9 --- /dev/null +++ b/ddprof-lib/src/test/cpp/wallprecheck_args_ut.cpp @@ -0,0 +1,45 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include "arguments.h" + +TEST(WallPrecheckArgsTest, DefaultsToDisabled) { + Arguments args; + + EXPECT_FALSE(args._wall_precheck); +} + +TEST(WallPrecheckArgsTest, BareFlagEnablesPrecheck) { + Arguments args; + + Error error = args.parse("wallprecheck"); + + EXPECT_FALSE(error); + EXPECT_TRUE(args._wall_precheck); +} + +TEST(WallPrecheckArgsTest, ExplicitBooleanValues) { + Arguments args; + + Error error = args.parse("wallprecheck=true"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._wall_precheck); + + args = Arguments(); + error = args.parse("wallprecheck=false"); + EXPECT_FALSE(error); + EXPECT_FALSE(args._wall_precheck); + + args = Arguments(); + error = args.parse("wallprecheck=1"); + EXPECT_FALSE(error); + EXPECT_TRUE(args._wall_precheck); + + args = Arguments(); + error = args.parse("wallprecheck=0"); + EXPECT_FALSE(error); + EXPECT_FALSE(args._wall_precheck); +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/MonitorContendedTaskBlockTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/MonitorContendedTaskBlockTest.java new file mode 100644 index 000000000..e6596f9bc --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/MonitorContendedTaskBlockTest.java @@ -0,0 +1,139 @@ +/* + * Copyright 2026 Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.openjdk.jmc.common.item.Attribute.attr; +import static org.openjdk.jmc.common.unit.UnitLookup.NUMBER; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IAttribute; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.common.unit.IQuantity; + +/** + * JVMTI {@code MonitorContendedEnter}/{@code MonitorContendedEntered} (synchronized contention, + * HotSpot {@code OSThreadState::MONITOR_WAIT}) TaskBlock path. Dispatched in our native code as + * {@code Profiler::MonitorContendedEnter} / {@code Profiler::MonitorContendedExit}. Asserts the + * emitted {@code blocker} matches {@link System#identityHashCode(Object)} on the contested monitor. + * + *

Distinct population from {@link ObjectWaitTaskBlockTest}: that path observes the thread sitting + * in the monitor's wait set after {@code obj.wait()}; this path observes the thread sitting + * in the monitor's entry queue while another thread holds the lock. + */ +public class MonitorContendedTaskBlockTest extends AbstractProfilerTest { + + private static final IAttribute BLOCKER = + attr("blocker", "blocker", "blocker", NUMBER); + + private static final long HOLD_NANOS = 200_000_000L; + + @Test + public void contendedSyncEmitsTaskBlockWithIdentityHashBlocker() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + // On JDK 21+ the native JVMTI MonitorContendedEnter/Entered capability is not requested. + // The ByteBuddy path (dd-trace-java synchronized-contention module) covers JDK 21+; + // that coverage is asserted in dd-smoke-tests/profiling-integration-tests/ + // SynchronizedContentionProfilingTest. This in-process harness does not load -javaagent. + Assumptions.assumeTrue(!Platform.isJavaVersionAtLeast(21)); + + final Object lock = new Object(); + final long expectedBlocker = Integer.toUnsignedLong(System.identityHashCode(lock)); + final long spanId = 0xc04ec04eL; + final long rootSpanId = 0xbadbabe0L; + + final CountDownLatch holderHasLock = new CountDownLatch(1); + final CountDownLatch holderMayRelease = new CountDownLatch(1); + + Thread holder = new Thread(() -> { + synchronized (lock) { + holderHasLock.countDown(); + try { + // Hold the lock long enough that the contender's entry-queue wait + // clearly exceeds taskBlockRecorder's 1 ms minimum-duration filter. + holderMayRelease.await(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + }, "monitor-contended-holder"); + holder.setDaemon(true); + holder.start(); + + holderHasLock.await(); + + Thread contender = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + profiler.setContext(rootSpanId, spanId, 0, 0); + try { + // Will block in the entry queue until `holder` releases. + synchronized (lock) { + // Intentionally empty: the TaskBlock interval is [contended-enter, + // contended-entered], i.e. the wait in the entry queue. + } + } finally { + profiler.clearContext(); + } + }, "monitor-contended-worker"); + contender.setDaemon(true); + contender.start(); + + // Give the contender enough scheduler time to actually be parked in the + // entry queue before we release the holder. + Thread.sleep(HOLD_NANOS / 1_000_000L); + holderMayRelease.countDown(); + + holder.join(); + contender.join(); + + stopProfiler(); + + IItemCollection taskBlocks = verifyEvents("datadog.TaskBlock"); + boolean found = false; + for (IItemIterable items : taskBlocks) { + IMemberAccessor blockerAcc = BLOCKER.getAccessor(items.getType()); + IMemberAccessor spanAcc = SPAN_ID.getAccessor(items.getType()); + if (blockerAcc == null || spanAcc == null) { + continue; + } + for (IItem item : items) { + if (spanAcc.getMember(item).longValue() == spanId + && blockerAcc.getMember(item).longValue() == expectedBlocker) { + found = true; + break; + } + } + } + assertTrue( + found, + "Expected datadog.TaskBlock with spanId and blocker matching the contested monitor identity"); + } + + @Override + protected String getProfilerCommand() { + return "wall=1ms,filter=0"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/MonitorEventDelegationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/MonitorEventDelegationTest.java new file mode 100644 index 000000000..af1c63362 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/MonitorEventDelegationTest.java @@ -0,0 +1,157 @@ +/* + * Copyright 2026 Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; + +/** + * Regression guard for the WS3 conditional JVMTI deactivation. The test runner loads the native + * profiler via {@code JavaProfiler.getInstance()} which routes through {@code + * getInstance(null, null, false)} — i.e. {@code delegateMonitorEvents=false}, the standalone / + * {@code -agentpath:} path. With that flag false, the {@code can_generate_monitor_events} + * capability must be requested and the JVMTI {@code MonitorContendedEnter} / + * {@code MonitorContendedEntered} + {@code MonitorWait} / {@code MonitorWaited} callbacks must + * fire on every JDK, including JDK 21+. + * + *

This is exactly the coverage hole the WS3 gate fix closes: pre-WS3, the gate was {@code + * java_version() < 21} unconditionally, which silently dropped the populations on JDK 21+ even + * when no Java-side delegate was wired in. After WS3 the gate is {@code java_version() < 21 || + * !delegate_monitor_events}, so standalone runs get the native callbacks back on JDK 21+. + * + *

The assertion uses the native counters {@code jvmti_monitor_contended_enter} (incremented + * in {@link Profiler::MonitorContendedEnter}) and {@code jvmti_object_wait_enter} (incremented + * in {@link Profiler::ObjectWaitEnter}); both must be strictly positive after the workload. + */ +public class MonitorEventDelegationTest extends AbstractProfilerTest { + + private static final String COUNTER_MONITOR_CONTENDED = "jvmti_monitor_contended_enter"; + private static final String COUNTER_OBJECT_WAIT = "jvmti_object_wait_enter"; + + @Test + public void standalonePathEngagesNativeMonitorCallbacksOnEveryJdk() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + // JDK < 8 / JDK 8 vendor inconsistencies around monitor states make this test flaky + // on the oldest configurations; keep the floor at 11 to match the rest of the suite. + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + + Map before = profiler.getDebugCounters(); + // Counters are only emitted in COUNTERS-enabled builds; skip if not present rather than + // failing on a stripped build. + Assumptions.assumeTrue( + before.containsKey(COUNTER_MONITOR_CONTENDED) && before.containsKey(COUNTER_OBJECT_WAIT), + "JVMTI monitor counters not available in this build (COUNTERS disabled)"); + + final long baselineContended = before.getOrDefault(COUNTER_MONITOR_CONTENDED, 0L); + final long baselineObjectWait = before.getOrDefault(COUNTER_OBJECT_WAIT, 0L); + + // Drive synchronized contention: a holder thread keeps the lock, contender waits in the + // entry queue. JVMTI MonitorContendedEnter fires on the contender as it enters the queue. + final Object lock = new Object(); + final CountDownLatch holderHasLock = new CountDownLatch(1); + final CountDownLatch holderMayRelease = new CountDownLatch(1); + + Thread holder = + new Thread( + () -> { + synchronized (lock) { + holderHasLock.countDown(); + try { + holderMayRelease.await(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + }, + "delegation-holder"); + holder.setDaemon(true); + holder.start(); + holderHasLock.await(); + + Thread contender = + new Thread( + () -> { + registerCurrentThreadForWallClockProfiling(); + synchronized (lock) { + // Just need to enter the queue and acquire after the holder releases. + } + }, + "delegation-contender"); + contender.setDaemon(true); + contender.start(); + + Thread.sleep(100); // give the contender time to land in the entry queue + holderMayRelease.countDown(); + holder.join(); + contender.join(); + + // Drive an Object.wait() to exercise MonitorWait/MonitorWaited. + final Object waitMonitor = new Object(); + Thread waiter = + new Thread( + () -> { + registerCurrentThreadForWallClockProfiling(); + synchronized (waitMonitor) { + try { + waitMonitor.wait(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + }, + "delegation-waiter"); + waiter.setDaemon(true); + waiter.start(); + waiter.join(); + + stopProfiler(); + + Map after = profiler.getDebugCounters(); + long deltaContended = after.getOrDefault(COUNTER_MONITOR_CONTENDED, 0L) - baselineContended; + long deltaObjectWait = after.getOrDefault(COUNTER_OBJECT_WAIT, 0L) - baselineObjectWait; + + System.out.printf( + "MonitorEventDelegationTest [delegate=false]: jvmti_monitor_contended_enter +%d, jvmti_object_wait_enter +%d%n", + deltaContended, deltaObjectWait); + + // Both counters must be positive: WS3 routes standalone runs (delegate=false) through the + // native callbacks on every JDK. A zero counter on JDK 21+ would indicate the gate logic + // regressed back to the unconditional `java_version() < 21` form. + assertTrue( + deltaContended > 0, + "Expected JVMTI MonitorContendedEnter to fire (delegate=false on JDK " + + System.getProperty("java.version") + + "); saw zero new events. WS3 gate likely regressed."); + assertTrue( + deltaObjectWait > 0, + "Expected JVMTI MonitorWait to fire (delegate=false on JDK " + + System.getProperty("java.version") + + "); saw zero new events. WS3 gate likely regressed."); + } + + @Override + protected String getProfilerCommand() { + return "wall=10ms,filter=0"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ObjectWaitTaskBlockTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ObjectWaitTaskBlockTest.java new file mode 100644 index 000000000..1864cbc09 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ObjectWaitTaskBlockTest.java @@ -0,0 +1,91 @@ +/* + * Copyright 2026 Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.openjdk.jmc.common.item.Attribute.attr; +import static org.openjdk.jmc.common.unit.UnitLookup.NUMBER; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IAttribute; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.common.unit.IQuantity; + +/** + * JVMTI {@code MonitorWait}/{@code MonitorWaited} (which observe {@code Object.wait()}) TaskBlock + * path (registered only on Java versions before 21). Dispatched in our native code as + * {@code Profiler::ObjectWaitEnter} / {@code Profiler::ObjectWaitExit}. Asserts {@code blocker} + * matches {@link System#identityHashCode(Object)} on the waited-on object. + */ +public class ObjectWaitTaskBlockTest extends AbstractProfilerTest { + + private static final IAttribute BLOCKER = + attr("blocker", "blocker", "blocker", NUMBER); + + @Test + public void objectWaitEmitsTaskBlockWithIdentityHashBlocker() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + Assumptions.assumeTrue(!Platform.isJavaVersionAtLeast(21)); + + registerCurrentThreadForWallClockProfiling(); + + final Object lock = new Object(); + final long expectedBlocker = Integer.toUnsignedLong(System.identityHashCode(lock)); + long spanId = 0xfeedbac2L; + long rootSpanId = 0xbeef1234L; + profiler.setContext(rootSpanId, spanId, 0, 0); + + synchronized (lock) { + lock.wait(1500); + } + profiler.clearContext(); + + stopProfiler(); + + IItemCollection taskBlocks = verifyEvents("datadog.TaskBlock"); + boolean found = false; + for (IItemIterable items : taskBlocks) { + IMemberAccessor blockerAcc = BLOCKER.getAccessor(items.getType()); + IMemberAccessor spanAcc = SPAN_ID.getAccessor(items.getType()); + if (blockerAcc == null || spanAcc == null) { + continue; + } + for (IItem item : items) { + if (spanAcc.getMember(item).longValue() == spanId + && blockerAcc.getMember(item).longValue() == expectedBlocker) { + found = true; + break; + } + } + } + assertTrue( + found, + "Expected datadog.TaskBlock with spanId and blocker matching Object.wait monitor identity"); + } + + @Override + protected String getProfilerCommand() { + return "wall=1ms,filter=0"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ParkTaskBlockTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ParkTaskBlockTest.java new file mode 100644 index 000000000..bae1a20ab --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ParkTaskBlockTest.java @@ -0,0 +1,128 @@ +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.ContextSetter; +import com.datadoghq.profiler.Platform; +import java.util.Arrays; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.Aggregators; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; + +/** + * Approach B integration test: {@code parkEnter}/{@code parkExit} must emit {@code TaskBlock} + * events carrying the parked run's duration, while regular wall-clock sampling continues outside + * the parked interval. + * + *

Also verifies that span identity and custom attribute encodings populated in the OTEP TLS + * sidecar before {@code parkEnter} flow into the emitted {@code datadog.TaskBlock} event. This is + * the regression case for the snapshot-based context capture: {@code parkEnter} now reads + * {@code spanId}, {@code rootSpanId} and tag encodings from {@code ContextApi::snapshot()} rather + * than receiving span ids over JNI, so any future divergence between the OTEP record and what + * lands in {@code _park_context} would surface here. + */ +public class ParkTaskBlockTest extends AbstractProfilerTest { + + /** Verifies TaskBlock emission and MethodSample co-existence on the same thread. */ + @Test + public void parkIntervalEmitsTaskBlockAndSuppressesSignals() { + Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); + + long spanId = 0x1234L; + long rootSpanId = 0x5678L; + profiler.setContext(rootSpanId, spanId, 0, 0); + + long parkUntil = System.nanoTime() + 250_000_000L; // 250 ms + profiler.parkEnter(); + while (System.nanoTime() < parkUntil) { + // Deliberately stay runnable while "parked": this test exercises the explicit + // parkEnter/parkExit TaskBlock path, not the OS-state-driven once-per-run filter. + } + profiler.parkExit(System.identityHashCode(this), 0L); + profiler.clearContext(); + + // Keep profiler active after park interval so regular wall samples still occur. + long activeUntil = System.nanoTime() + 120_000_000L; + while (System.nanoTime() < activeUntil) { + // busy + } + + stopProfiler(); + + IItemCollection taskBlocks = verifyEvents("datadog.TaskBlock"); + long taskBlockCount = taskBlocks.getAggregate(Aggregators.count()).longValue(); + assertTrue(taskBlockCount > 0, "Expected datadog.TaskBlock events after parkEnter/parkExit"); + + IItemCollection methodSamples = verifyEvents("datadog.MethodSample"); + long methodSampleCount = methodSamples.getAggregate(Aggregators.count()).longValue(); + assertTrue(methodSampleCount > 0, "Expected MethodSample events outside the parked interval"); + } + + /** + * Regression: span id and tag encodings active in the OTEP TLS sidecar at the moment of + * {@code parkEnter} must be captured into {@code _park_context} and emitted on the resulting + * {@code datadog.TaskBlock}. This protects against any future refactor that breaks the + * snapshot-based capture path. + */ + @Test + public void parkIntervalCarriesSpanAndCustomAttribute() { + Assumptions.assumeTrue(!Platform.isJ9()); + registerCurrentThreadForWallClockProfiling(); + + ContextSetter contextSetter = new ContextSetter(profiler, Arrays.asList("tag1", "tag2")); + + long spanId = 0xabc12345L; + long rootSpanId = 0x9876fedcL; + profiler.setContext(rootSpanId, spanId, 0, 0); + assertTrue(contextSetter.setContextValue("tag1", "park-tag-value")); + + long parkUntil = System.nanoTime() + 50_000_000L; // 50 ms — well over the 1 ms TaskBlock floor + profiler.parkEnter(); + // Mutate the live encoding after parkEnter: parkExit must surface the pre-park snapshot. + assertTrue(contextSetter.setContextValue("tag1", "after-park")); + while (System.nanoTime() < parkUntil) { + // busy + } + profiler.parkExit(System.identityHashCode(this), 0L); + contextSetter.clearContextValue("tag1"); + profiler.clearContext(); + + stopProfiler(); + + IItemCollection taskBlocks = verifyEvents("datadog.TaskBlock"); + boolean found = false; + for (IItemIterable items : taskBlocks) { + IMemberAccessor spanAcc = SPAN_ID.getAccessor(items.getType()); + IMemberAccessor tag1Acc = TAG_1.getAccessor(items.getType()); + assertNotNull(spanAcc, "TaskBlock type missing spanId field"); + assertNotNull(tag1Acc, "TaskBlock type missing tag1 field — register attributes via profiler command"); + for (IItem item : items) { + Object spanVal = spanAcc.getMember(item); + if (spanVal == null) { + continue; + } + long span = ((org.openjdk.jmc.common.unit.IQuantity) spanVal).longValue(); + String tag1 = tag1Acc.getMember(item); + if (span == spanId && "park-tag-value".equals(tag1)) { + found = true; + break; + } + } + } + assertTrue( + found, + "Expected a TaskBlock with parkEnter-snapshotted spanId and tag1 value (got mutated/zero values)"); + } + + @Override + protected String getProfilerCommand() { + return "wall=1ms,filter=0,attributes=tag1;tag2"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/PrecheckEfficiencyTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/PrecheckEfficiencyTest.java new file mode 100644 index 000000000..a7e386d6f --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/PrecheckEfficiencyTest.java @@ -0,0 +1,358 @@ +package com.datadoghq.profiler.wallclock; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Measures theoretical wall-clock signal suppression from sample distributions. Runs with {@code + * wallprecheck=false} so every state is sampled; percentages estimate the upper bound on how many + * {@code SIGVTALRM} deliveries {@code wallprecheck=true} would suppress with its once-per-run + * filter (subsequent signals on the same SLEEPING / CONDVAR_WAIT run after the entry sample). The + * entry signal of each run still fires under {@code wallprecheck=true}, so the actual suppression + * is slightly below these percentages. + * + *

Once-per-run filter (opt-in via {@code wallprecheck=true}; default is {@code false}) + * emits one MethodSample at the entry signal of a {@code SLEEPING} / {@code CONDVAR_WAIT} run and + * suppresses subsequent signals on the same thread until the OS state leaves the skip set. That + * covers {@code Thread.sleep} (legacy {@code SLEEPING}; JDK 21+ sleep uses wait/park), {@code + * LockSupport.park}, and other condvar waits — not {@code Object.wait} ({@code OBJECT_WAIT}), + * which the current filter does not skip. + * + *

Sample classification prefers Java thread name ({@code EVENT_THREAD_NAME}) for the fixed worker + * threads when present, then JFR thread state, then stack strings — some JVMs (e.g. Graal on aarch64) + * omit SLEEPING / {@code Thread.sleep} in state or stacks while samples still hit the worker. + */ +public class PrecheckEfficiencyTest extends AbstractProfilerTest { + + private static final String EFFICIENCY_SLEEPING = "efficiency-sleeping"; + private static final String EFFICIENCY_PARKED = "efficiency-parked"; + private static final String EFFICIENCY_WAITING = "efficiency-waiting"; + private static final String EFFICIENCY_WORKING = "efficiency-working"; + + @Test + public void compareSuppressionRates() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + + CountDownLatch ready = new CountDownLatch(4); + AtomicBoolean stop = new AtomicBoolean(false); + Object monitor = new Object(); + + // Thread.sleep — typically SLEEPING or (JDK 21+) CONDVAR_WAIT; suppressed by once-per-run filter + Thread sleeping = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + ready.countDown(); + try { Thread.sleep(10_000); } catch (InterruptedException ignored) {} + }, EFFICIENCY_SLEEPING); + + // LockSupport.parkNanos — CONDVAR_WAIT; suppressed by once-per-run filter (same as JDK 21+ sleep path) + Thread parked = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + ready.countDown(); + LockSupport.parkNanos(10_000_000_000L); + }, EFFICIENCY_PARKED); + + // Object.wait — OBJECT_WAIT; not in the once-per-run filter's skip set today + Thread waiting = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + ready.countDown(); + synchronized (monitor) { + try { monitor.wait(10_000); } catch (InterruptedException ignored) {} + } + }, EFFICIENCY_WAITING); + + // CPU spin — RUNNABLE; not skipped by the once-per-run filter + Thread working = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + ready.countDown(); + long x = 0; + while (!stop.get()) { x++; } + }, EFFICIENCY_WORKING); + + sleeping.setDaemon(true); + parked.setDaemon(true); + waiting.setDaemon(true); + working.setDaemon(true); + + sleeping.start(); + parked.start(); + waiting.start(); + working.start(); + + ready.await(); + Thread.sleep(500); + + stop.set(true); + sleeping.interrupt(); + LockSupport.unpark(parked); + synchronized (monitor) { monitor.notifyAll(); } + + sleeping.join(1000); + parked.join(1000); + waiting.join(1000); + working.join(1000); + + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.MethodSample", false); + + long sleepSamples = 0, parkSamples = 0, objectWaitSamples = 0, runnableSamples = 0; + + for (IItemIterable batch : events) { + IMemberAccessor stackAccessor = JdkAttributes.STACK_TRACE_STRING.getAccessor(batch.getType()); + IMemberAccessor stateAccessor = THREAD_STATE.getAccessor(batch.getType()); + IMemberAccessor threadNameAccessor = + JdkAttributes.EVENT_THREAD_NAME.getAccessor(batch.getType()); + if (stackAccessor == null && stateAccessor == null && threadNameAccessor == null) { + continue; + } + for (IItem item : batch) { + if (threadNameAccessor != null) { + String threadName = threadNameAccessor.getMember(item); + if (EFFICIENCY_SLEEPING.equals(threadName)) { + sleepSamples++; + continue; + } + if (EFFICIENCY_PARKED.equals(threadName)) { + parkSamples++; + continue; + } + if (EFFICIENCY_WAITING.equals(threadName)) { + objectWaitSamples++; + continue; + } + if (EFFICIENCY_WORKING.equals(threadName)) { + runnableSamples++; + continue; + } + } + String state = stateAccessor != null ? stateAccessor.getMember(item) : null; + // Native OSThreadState is written as jdk.types.ThreadState; CONDVAR_WAIT → "PARKED" + // in JFR metadata (flightRecorder.cpp writeThreadStates). Prefer state over stacks: + // stacks often omit LockSupport/Unsafe frames after inlining. + if (state != null && !state.isEmpty()) { + switch (state) { + case "SLEEPING": + sleepSamples++; + continue; + case "PARKED": + parkSamples++; + continue; + case "WAITING": + objectWaitSamples++; + continue; + default: + break; + } + } + String stack = stackAccessor != null ? stackAccessor.getMember(item) : null; + if (stack != null && (stack.contains("Thread.sleep") || stack.contains("sleep0"))) { + sleepSamples++; + } else if (stack != null && (stack.contains("LockSupport.park") || stack.contains("Unsafe.park") + || stack.contains("parkNanos"))) { + parkSamples++; + } else if (stack != null && (stack.contains("Object.wait") || stack.contains("wait0"))) { + objectWaitSamples++; + } else { + runnableSamples++; + } + } + } + + long total = sleepSamples + parkSamples + objectWaitSamples + runnableSamples; + if (total == 0) { + System.out.println("No samples collected — skipping efficiency report"); + return; + } + + double pctSleep = 100.0 * sleepSamples / total; + double pctPark = 100.0 * parkSamples / total; + double pctObjectWait = 100.0 * objectWaitSamples / total; + double pctRunnable = 100.0 * runnableSamples / total; + + double oncePerRunSuppression = pctSleep + pctPark; + double hypotheticalIfAlsoObjectWait = pctSleep + pctPark + pctObjectWait; + + System.out.printf("%nPrecheck efficiency report (wallprecheck=false baseline, %d total samples):%n", total); + System.out.printf(" SLEEPING (Thread.sleep): %4d samples (%5.1f%%)%n", sleepSamples, pctSleep); + System.out.printf(" CONDVAR_WAIT (LockSupport.park): %4d samples (%5.1f%%)%n", parkSamples, pctPark); + System.out.printf(" OBJECT_WAIT (Object.wait): %4d samples (%5.1f%%)%n", objectWaitSamples, pctObjectWait); + System.out.printf(" RUNNABLE / other: %4d samples (%5.1f%%)%n", runnableSamples, pctRunnable); + System.out.printf( + "Once-per-run filter (SLEEPING + CONDVAR_WAIT): %.1f%% of signals suppressed (upper bound)%n", + oncePerRunSuppression); + System.out.printf( + "Hypothetical if also suppressing OBJECT_WAIT: %.1f%% of signals suppressed%n", + hypotheticalIfAlsoObjectWait); + + // Sanity: each controlled thread type should produce at least a few samples. + // JDK 8 can collapse park/wait into WAITING-only classification depending on runtime/JFR details. + assertTrue(sleepSamples > 0, "Expected samples from sleeping thread"); + if (Platform.isJavaVersion(8)) { + assertTrue(parkSamples + objectWaitSamples > 0, + "Expected WAITING/PARKED samples from parked or object-waiting threads on JDK 8"); + } else { + assertTrue(parkSamples > 0, "Expected samples from parked thread"); + assertTrue(objectWaitSamples > 0, "Expected samples from object-waiting thread"); + } + assertTrue(runnableSamples > 0, "Expected RUNNABLE samples (working thread or unidentified)"); + } + + /** + * Simulates a typical Java service: a fixed thread pool that is mostly idle (threads parked + * in {@code LinkedBlockingQueue.take()}), plus a scheduler thread doing periodic + * {@code Thread.sleep} wakeups, plus a continuously-busy computation thread. + * + * This workload is representative of real applications where {@code LockSupport.park} + * dominates the thread state distribution. The printed suppression estimate uses {@code + * pctSleep + pctPark} as an upper bound on the once-per-run filter's effect ({@code SLEEPING} + * + {@code CONDVAR_WAIT}); the entry signal of each run still fires under {@code + * wallprecheck=true}. + */ + @Test + public void realisticServiceWorkload() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + + final int POOL_SIZE = 8; + final int TASK_DURATION_MS = 20; // each submitted task takes ~20 ms + final int SCHEDULE_INTERVAL_MS = 50; // scheduler fires every 50 ms + + AtomicBoolean stop = new AtomicBoolean(false); + AtomicInteger threadIndex = new AtomicInteger(0); + + // Thread pool whose threads register themselves with the wall-clock filter. + // When idle, pool threads sit in LinkedBlockingQueue.take() → LockSupport.park (CONDVAR_WAIT). + ExecutorService pool = Executors.newFixedThreadPool(POOL_SIZE, r -> { + Thread t = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + r.run(); + }); + t.setName("realistic-pool-" + threadIndex.incrementAndGet()); + t.setDaemon(true); + return t; + }); + + // Pre-warm: submit N tasks so the executor creates all POOL_SIZE threads before measurement. + CountDownLatch primed = new CountDownLatch(POOL_SIZE); + for (int i = 0; i < POOL_SIZE; i++) { + pool.submit(primed::countDown); + } + primed.await(); + Thread.sleep(50); // let all pool threads return to idle (parked) state + + // Scheduler: sleeps between submissions. Thread.sleep → SLEEPING or (JDK 21+) CONDVAR_WAIT. + Thread scheduler = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + while (!stop.get()) { + try { + Thread.sleep(SCHEDULE_INTERVAL_MS); + } catch (InterruptedException e) { + break; + } + // Submit a short CPU task to one pool thread + pool.submit(() -> { + long x = 0; + long deadline = System.nanoTime() + TASK_DURATION_MS * 1_000_000L; + while (System.nanoTime() < deadline) { x++; } + return x; + }); + } + }, "realistic-scheduler"); + scheduler.setDaemon(true); + scheduler.start(); + + // Always-busy thread: simulates a background aggregation/analytics loop. + Thread hotThread = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + long x = 0; + while (!stop.get()) { x++; } + }, "realistic-hot"); + hotThread.setDaemon(true); + hotThread.start(); + + // Measurement window + Thread.sleep(500); + + stop.set(true); + scheduler.interrupt(); + pool.shutdownNow(); + pool.awaitTermination(2, TimeUnit.SECONDS); + hotThread.join(1000); + + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.MethodSample", false); + + long sleepSamples = 0, parkSamples = 0, otherSamples = 0; + + for (IItemIterable batch : events) { + IMemberAccessor stackAccessor = JdkAttributes.STACK_TRACE_STRING.getAccessor(batch.getType()); + if (stackAccessor == null) continue; + for (IItem item : batch) { + String stack = stackAccessor.getMember(item); + if (stack == null) { + otherSamples++; + } else if (stack.contains("Thread.sleep") || stack.contains("sleep0")) { + // SLEEPING / JDK 21+ sleep-on-condvar — suppressed by once-per-run filter + sleepSamples++; + } else if (stack.contains("LockSupport.park") || stack.contains("Unsafe.park")) { + // CONDVAR_WAIT — suppressed by once-per-run filter together with sleep buckets above + parkSamples++; + } else { + // RUNNABLE or other — not skipped by the once-per-run filter + otherSamples++; + } + } + } + + long total = sleepSamples + parkSamples + otherSamples; + if (total == 0) { + System.out.println("No samples collected — skipping realistic workload report"); + return; + } + + double pctSleep = 100.0 * sleepSamples / total; + double pctPark = 100.0 * parkSamples / total; + double pctOther = 100.0 * otherSamples / total; + + double oncePerRunSuppression = pctSleep + pctPark; + + System.out.printf("%nRealistic service workload report (%d pool threads, 1 scheduler, 1 hot thread, 500ms):%n", POOL_SIZE); + System.out.printf( + " Scheduler sleep (Thread.sleep): %4d samples (%5.1f%%)%n", sleepSamples, pctSleep); + System.out.printf( + " Idle pool park (LockSupport.park): %4d samples (%5.1f%%)%n", parkSamples, pctPark); + System.out.printf( + " RUNNABLE / other (active work): %4d samples (%5.1f%%)%n", otherSamples, pctOther); + System.out.printf( + "Once-per-run filter (SLEEPING + CONDVAR_WAIT): %.1f%% of signals suppressed (upper bound)%n", + oncePerRunSuppression); + + // The idle pool threads should dominate: most samples should be parked + assertTrue(parkSamples > otherSamples, + String.format("Expected idle pool threads (park=%d) to dominate active threads (other=%d)", parkSamples, otherSamples)); + // The scheduler must appear (it sleeps 50ms at a time over 500ms → ~10 cycles) + assertTrue(sleepSamples > 0, "Expected samples from scheduler's Thread.sleep"); + } + + @Override + protected String getProfilerCommand() { + return "wall=1ms"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/PrecheckTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/PrecheckTest.java new file mode 100644 index 000000000..fef1789c3 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/PrecheckTest.java @@ -0,0 +1,68 @@ +package com.datadoghq.profiler.wallclock; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.Aggregators; + +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Verifies once-per-run signal suppression (opt-in via {@code wallprecheck=true}): the wall-clock + * SIGVTALRM handler reads {@code VMThread::osThreadState()} and arms a per-thread filter on the + * first signal of a {@code SLEEPING} / {@code CONDVAR_WAIT} run. The first signal emits a + * {@code MethodSample} (so the parked thread's stack is captured); subsequent signals on the same + * thread are suppressed until the OS state leaves the skip set. Legacy JDKs report {@code + * Thread.sleep} as {@code SLEEPING}; JDK 21+ uses a condvar wait path for sleep (often {@code + * CONDVAR_WAIT}), same broad category as {@code LockSupport.park}. + * + *

A thread in {@code Thread.sleep} for 300 ms at a 1 ms wall interval should produce a handful + * of {@code MethodSample} events (one entry sample plus residual boundary samples), not ~300. The + * suppression counter {@code wc_signals_suppressed_sampled_run} accounts for the dropped signals. + * + *

Runs only on JDK 11+: JDK 8 HotSpot often does not expose consistent OSThread states for sleep + * in vmStructs, so the once-per-run filter cannot engage reliably on JDK 8 CI. + * + *

CI should execute this test on at least one JDK 21+ runner so {@code CONDVAR_WAIT}-based sleep + * suppression stays covered alongside JDK 11–20 ({@code SLEEPING}) configurations. + */ +public class PrecheckTest extends AbstractProfilerTest { + + @Test + public void testSleepingThreadIsNotSampled() throws InterruptedException { + Assumptions.assumeTrue(!Platform.isJ9()); + // Wall precheck reads VMThread::osThreadState() -> SLEEPING / CONDVAR_WAIT (wallClock.cpp). + // JDK 8 frequently misreports vs JDK 11+ across vendors/libcs (Oracle, musl, glibc), so + // nearly every wall tick still produces a MethodSample on JDK 8. + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + registerCurrentThreadForWallClockProfiling(); + + // Sleep for 300 ms — without suppression we'd see ~300 ticks at 1 ms interval. With the + // once-per-run filter, only the entry signal of the sleep run emits a MethodSample. + Thread.sleep(300); + + stopProfiler(); + + // Once-per-run filter: handler emits at the first SIGVTALRM of the SLEEPING / CONDVAR_WAIT + // run, then suppresses the rest. We expect a small handful (entry + boundary jitter), not 300. + long sampleCount = verifyEvents("datadog.MethodSample", false) + .getAggregate(Aggregators.count()).longValue(); + assertTrue(sampleCount < 10, + "Expected nearly no MethodSample events for a sleeping thread with wallprecheck=true, got: " + sampleCount); + + // Confirm the suppression counter incremented (only available in COUNTERS-enabled builds). + Map counters = profiler.getDebugCounters(); + if (counters.containsKey("wc_signals_suppressed_sampled_run")) { + assertTrue(counters.get("wc_signals_suppressed_sampled_run") > 0, + "wc_signals_suppressed_sampled_run should be > 0 for a 300 ms Thread.sleep()"); + } + } + + @Override + protected String getProfilerCommand() { + return "wall=1ms,wallprecheck=true"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/RegressionGateTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/RegressionGateTest.java new file mode 100644 index 000000000..7aacff8c4 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/RegressionGateTest.java @@ -0,0 +1,96 @@ +/* + * Copyright 2026 Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.DynamicTest.dynamicTest; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.api.DynamicTest; +import org.junit.jupiter.api.TestFactory; + +/** + * Curated list of test classes that form the wall-clock / TaskBlock regression gate for the + * close-coverage-gaps work. Each entry is verified to be loadable; if any test class is renamed + * or deleted without updating this list, this gate fails fast. + * + *

The gate itself does not run the listed tests — Gradle's + * {@code --tests "com.datadoghq.profiler.wallclock.*"} pattern already does that. This class + * exists to make the curated set discoverable and reviewable from a single place so PR + * reviewers know exactly which tests must pass before the close-coverage-gaps feature ships. + * + *

Per-PR command (run from {@code java-profiler/}): + * + *

+ *   ./gradlew :ddprof-test:testDebug --tests "com.datadoghq.profiler.wallclock.*"
+ * 
+ * + * The corresponding dd-trace-java gate is the {@code *TaskBlockProfilingTest} family in + * {@code dd-smoke-tests:profiling-integration-tests}, plus the per-module unit tests: + * + *
+ *   ./gradlew \
+ *     :dd-java-agent:instrumentation:datadog:profiling:thread-sleep:test \
+ *     :dd-java-agent:instrumentation:datadog:profiling:nio-selector:test \
+ *     :dd-java-agent:instrumentation:datadog:profiling:synchronized-contention:test \
+ *     :dd-smoke-tests:profiling-integration-tests:test --tests "*TaskBlock*"
+ * 
+ */ +public class RegressionGateTest { + + /** + * Curated list of fully-qualified test class names that constitute the wall-clock / TaskBlock + * regression gate. Update this list when adding new gate-relevant tests. + */ + private static final List GATE_MEMBERS = + Arrays.asList( + // Wall-clock once-per-run filter, IPI-eliding fast path, baseline behaviour. + "com.datadoghq.profiler.wallclock.PrecheckTest", + "com.datadoghq.profiler.wallclock.WallclockPrecheckBaselineTest", + "com.datadoghq.profiler.wallclock.WallclockTimerFastPathTest", + // TaskBlock coverage across populations. + "com.datadoghq.profiler.wallclock.TaskBlockCoverageTest", + "com.datadoghq.profiler.wallclock.ParkTaskBlockTest", + "com.datadoghq.profiler.wallclock.ObjectWaitTaskBlockTest", + "com.datadoghq.profiler.wallclock.MonitorContendedTaskBlockTest", + "com.datadoghq.profiler.wallclock.SynchronizedMethodTaskBlockTest", + // WS3: JVMTI delegation handshake. + "com.datadoghq.profiler.wallclock.MonitorEventDelegationTest"); + + @TestFactory + Stream gateMembersAreLoadable() { + return GATE_MEMBERS.stream() + .map( + className -> + dynamicTest( + "loadable: " + className, + () -> { + try { + Class.forName(className); + } catch (ClassNotFoundException e) { + throw new AssertionError( + "Regression gate is broken: " + + className + + " is not present in the classpath. Either restore the test or " + + "update RegressionGateTest.GATE_MEMBERS to reflect the rename / " + + "removal.", + e); + } + })); + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/SynchronizedMethodTaskBlockTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/SynchronizedMethodTaskBlockTest.java new file mode 100644 index 000000000..483bb6d80 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/SynchronizedMethodTaskBlockTest.java @@ -0,0 +1,231 @@ +/* + * Copyright 2026 Datadog, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.openjdk.jmc.common.item.Attribute.attr; +import static org.openjdk.jmc.common.unit.UnitLookup.NUMBER; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IAttribute; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.common.unit.IQuantity; + +/** + * {@code ACC_SYNCHRONIZED} method-level contention TaskBlock path. Exhaustive companion to + * {@link MonitorContendedTaskBlockTest} (which covers block-level {@code synchronized(obj){}}). + * + *

On JDK < 21 the native JVMTI {@code MonitorContendedEnter}/{@code Entered} callbacks fire + * on every monitor acquisition regardless of whether the source was a block or a synchronized + * method, so both forms are covered by the native path on the older runtimes. + * + *

On JDK 21+ the native callbacks are gated off; the companion dd-trace-java + * {@code synchronized-contention} module covers that gap via bytecode rewriting. Coverage for + * JDK 21+ is asserted in {@code dd-smoke-tests/profiling-integration-tests/ + * SynchronizedContentionProfilingTest}; this in-process harness does not load {@code -javaagent}. + */ +public class SynchronizedMethodTaskBlockTest extends AbstractProfilerTest { + + private static final IAttribute BLOCKER = + attr("blocker", "blocker", "blocker", NUMBER); + + private static final long HOLD_NANOS = 200_000_000L; + + /** Receiver type for the instance-method contention scenario. */ + static final class Receiver { + synchronized void hold(CountDownLatch entered, CountDownLatch mayRelease) throws Exception { + entered.countDown(); + mayRelease.await(2, TimeUnit.SECONDS); + } + + synchronized void contend() { + // Body intentionally empty: the TaskBlock interval is the entry-queue wait. + } + } + + /** Class holder for the static-method contention scenario. */ + static final class StaticLockOwner { + static synchronized void hold(CountDownLatch entered, CountDownLatch mayRelease) + throws Exception { + entered.countDown(); + mayRelease.await(2, TimeUnit.SECONDS); + } + + static synchronized void contend() { + // Body intentionally empty. + } + } + + @Test + public void contendedInstanceSyncMethodEmitsTaskBlockWithIdentityHashBlocker() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + // See note on the class-level Javadoc: JDK 21+ requires the dd-java-agent + // synchronized-contention module to be attached; until ddprof-test wires it in + // we keep the same skip as the block-level test. + Assumptions.assumeTrue(!Platform.isJavaVersionAtLeast(21)); + + final Receiver receiver = new Receiver(); + final long expectedBlocker = Integer.toUnsignedLong(System.identityHashCode(receiver)); + final long spanId = 0xc04ec04eL; + final long rootSpanId = 0xbadbabe0L; + + runContention( + receiver, + spanId, + rootSpanId, + () -> { + try { + receiver.contend(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, + (entered, mayRelease) -> { + try { + receiver.hold(entered, mayRelease); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + assertBlockerEvent(spanId, expectedBlocker); + } + + @Test + public void contendedStaticSyncMethodEmitsTaskBlockWithIdentityHashBlocker() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + Assumptions.assumeTrue(!Platform.isJavaVersionAtLeast(21)); + + final long expectedBlocker = Integer.toUnsignedLong(System.identityHashCode(StaticLockOwner.class)); + final long spanId = 0xc04ec04fL; + final long rootSpanId = 0xbadbabe1L; + + runContention( + StaticLockOwner.class, + spanId, + rootSpanId, + () -> { + try { + StaticLockOwner.contend(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, + (entered, mayRelease) -> { + try { + StaticLockOwner.hold(entered, mayRelease); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + assertBlockerEvent(spanId, expectedBlocker); + } + + // --------------------------------------------------------------------- helpers + + @FunctionalInterface + private interface HolderBody { + void run(CountDownLatch entered, CountDownLatch mayRelease) throws Exception; + } + + private void runContention( + final Object lockOrOwner, + final long spanId, + final long rootSpanId, + final Runnable contenderBody, + final HolderBody holderBody) + throws Exception { + final CountDownLatch holderHasLock = new CountDownLatch(1); + final CountDownLatch holderMayRelease = new CountDownLatch(1); + + Thread holder = + new Thread( + () -> { + try { + holderBody.run(holderHasLock, holderMayRelease); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, + "sync-method-holder-" + System.identityHashCode(lockOrOwner)); + holder.setDaemon(true); + holder.start(); + + holderHasLock.await(); + + Thread contender = + new Thread( + () -> { + registerCurrentThreadForWallClockProfiling(); + profiler.setContext(rootSpanId, spanId, 0, 0); + try { + contenderBody.run(); + } finally { + profiler.clearContext(); + } + }, + "sync-method-worker-" + System.identityHashCode(lockOrOwner)); + contender.setDaemon(true); + contender.start(); + + Thread.sleep(HOLD_NANOS / 1_000_000L); + holderMayRelease.countDown(); + + holder.join(); + contender.join(); + + stopProfiler(); + } + + private void assertBlockerEvent(final long spanId, final long expectedBlocker) throws Exception { + IItemCollection taskBlocks = verifyEvents("datadog.TaskBlock"); + boolean found = false; + for (IItemIterable items : taskBlocks) { + IMemberAccessor blockerAcc = BLOCKER.getAccessor(items.getType()); + IMemberAccessor spanAcc = SPAN_ID.getAccessor(items.getType()); + if (blockerAcc == null || spanAcc == null) { + continue; + } + for (IItem item : items) { + if (spanAcc.getMember(item).longValue() == spanId + && blockerAcc.getMember(item).longValue() == expectedBlocker) { + found = true; + break; + } + } + } + assertTrue( + found, + "Expected datadog.TaskBlock with spanId and blocker matching the contested monitor identity"); + } + + @Override + protected String getProfilerCommand() { + return "wall=1ms,filter=0"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/TaskBlockCoverageTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/TaskBlockCoverageTest.java new file mode 100644 index 000000000..10ce7f711 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/TaskBlockCoverageTest.java @@ -0,0 +1,170 @@ +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.openjdk.jmc.common.unit.UnitLookup.EPOCH_NS; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.locks.LockSupport; + +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.common.unit.IQuantity; +import org.openjdk.jmc.common.unit.IRange; +import org.openjdk.jmc.flightrecorder.JfrAttributes; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +/** + * Within-recording proof of the gap-fill property of the once-per-run filter ({@code + * wallprecheck=true}): the time spans where MethodSamples would have fired on a parked thread + * are covered by the {@code TaskBlock} intervals emitted by {@code parkEnter}/{@code parkExit} + * on that same thread. + * + *

Mechanism: {@code parkEnter} stamps {@code start_ticks} just before the kernel-side + * {@code pthread_cond_timedwait}, {@code parkExit} stamps {@code end_ticks} just after, and a + * {@code PARKED} (HotSpot {@code CONDVAR_WAIT}) {@code MethodSample} can only be emitted while + * the thread is inside that wait. So any {@code PARKED MethodSample} timestamp on the worker + * must lie inside some {@code TaskBlock} interval on the worker. + * + *

Assertion: with the filter armed, the worker's {@code TaskBlock} intervals should be + * empty of {@code PARKED MethodSamples}. The baseline counterpart (TaskBlock intervals contain + * the samples that the filter then suppresses) lives in {@link WallclockPrecheckBaselineTest}. + */ +public class TaskBlockCoverageTest extends AbstractProfilerTest { + + private static final String WORKER_NAME = "task-block-coverage-T"; + private static final int PARK_RUNS = 200; + private static final long PARK_NANOS = 50_000_000L; + + @Test + public void taskBlockCoversSuppressedMethodSamples() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + // wallprecheck reads HotSpot OSThreadState; JDK 8 misreports sleep / park states. + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + + Thread worker = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + // TaskBlock emission requires a non-zero spanId — taskBlockRecorder skips otherwise. + long spanId = 0x1234L; + long rootSpanId = 0x5678L; + profiler.setContext(rootSpanId, spanId, 0, 0); + try { + for (int i = 0; i < PARK_RUNS; i++) { + profiler.parkEnter(); + LockSupport.parkNanos(PARK_NANOS); + profiler.parkExit(System.identityHashCode(this), 0L); + } + } finally { + profiler.clearContext(); + } + }, WORKER_NAME); + worker.setDaemon(true); + worker.start(); + worker.join(); + + stopProfiler(); + + List taskBlocks = collectTaskBlocksOnWorker(); + List parkedSamples = collectParkedSamplesOnWorker(); + + // parkEnter/parkExit path is independent of the filter — should emit ~PARK_RUNS + // TaskBlocks. The 60% floor absorbs sub-millisecond parks that fall under the 1 ms + // minimum-duration filter inside taskBlockRecorder. + long minTbExpected = (PARK_RUNS * 6L) / 10L; + assertTrue(taskBlocks.size() >= minTbExpected, + "expected ≥" + minTbExpected + " TaskBlocks on " + WORKER_NAME + ", got " + + taskBlocks.size()); + + // Per-TaskBlock coverage invariant — strict expression of the once-per-run filter + // specification: "emit exactly one MethodSample at the first signal of a blocked-state + // run, suppress the rest of the run". A TaskBlock interval [parkEnter, parkExit] is one + // CONDVAR_WAIT run by construction. Therefore at most one PARKED MethodSample may fall + // inside any single TaskBlock interval — never two. (Across the recording, multiple + // TaskBlocks may each contain one sample if the handler observed a state-transition + // reset between them; that is allowed and counted by the gap-ratio check in + // WallclockPrecheckBaselineTest. The per-interval bound here is independent.) + int maxSamplesInAnyTaskBlock = 0; + int worstTaskBlockIdx = -1; + for (int i = 0; i < taskBlocks.size(); i++) { + Interval tb = taskBlocks.get(i); + int count = 0; + for (Interval s : parkedSamples) { + if (s.startNs >= tb.startNs && s.startNs <= tb.endNs) { + count++; + } + } + if (count > maxSamplesInAnyTaskBlock) { + maxSamplesInAnyTaskBlock = count; + worstTaskBlockIdx = i; + } + } + assertTrue(maxSamplesInAnyTaskBlock <= 1, + "wallprecheck=true should leave at most one PARKED MethodSample per TaskBlock " + + "interval on " + WORKER_NAME + "; TaskBlock #" + worstTaskBlockIdx + + " contained " + maxSamplesInAnyTaskBlock + " PARKED samples " + + "(PARKED total on worker: " + parkedSamples.size() + + ", TaskBlocks: " + taskBlocks.size() + ")"); + + // Counter sanity: the suppression accounting must be non-zero. + Map counters = profiler.getDebugCounters(); + if (counters.containsKey("wc_signals_suppressed_sampled_run")) { + long c = counters.get("wc_signals_suppressed_sampled_run"); + assertTrue(c > 0, + "wc_signals_suppressed_sampled_run should be > 0 after " + PARK_RUNS + " parks"); + } + } + + private static final class Interval { + final long startNs; + final long endNs; + Interval(long startNs, long endNs) { + this.startNs = startNs; + this.endNs = endNs; + } + } + + private List collectTaskBlocksOnWorker() throws Exception { + return collectIntervals(verifyEvents("datadog.TaskBlock"), WORKER_NAME, false); + } + + private List collectParkedSamplesOnWorker() throws Exception { + return collectIntervals(verifyEvents("datadog.MethodSample", false), WORKER_NAME, true); + } + + private List collectIntervals(IItemCollection events, String workerName, + boolean requirePARKEDState) throws Exception { + List out = new ArrayList<>(); + for (IItemIterable batch : events) { + IMemberAccessor threadNameAccessor = + JdkAttributes.EVENT_THREAD_NAME.getAccessor(batch.getType()); + IMemberAccessor, IItem> lifetimeAccessor = + JfrAttributes.LIFETIME.getAccessor(batch.getType()); + IMemberAccessor stateAccessor = + requirePARKEDState ? THREAD_STATE.getAccessor(batch.getType()) : null; + if (threadNameAccessor == null || lifetimeAccessor == null) continue; + if (requirePARKEDState && stateAccessor == null) continue; + for (IItem item : batch) { + if (!workerName.equals(threadNameAccessor.getMember(item))) continue; + if (requirePARKEDState && !"PARKED".equals(stateAccessor.getMember(item))) continue; + IRange lifetime = lifetimeAccessor.getMember(item); + long startNs = lifetime.getStart().longValueIn(EPOCH_NS); + long endNs = lifetime.getEnd().longValueIn(EPOCH_NS); + out.add(new Interval(startNs, endNs)); + } + } + return out; + } + + @Override + protected String getProfilerCommand() { + return "wall=10ms,wallprecheck=true,filter=0"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockMitigationsCombinedTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockMitigationsCombinedTest.java new file mode 100644 index 000000000..e3511c180 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockMitigationsCombinedTest.java @@ -0,0 +1,121 @@ +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.Aggregators; +import org.openjdk.jmc.common.item.IItemCollection; + +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Combined wall-clock mitigations integration test: + * - Approach A (once-per-run signal suppression under {@code wallprecheck=true}): a sleeping thread + * should produce roughly one MethodSample (the entry sample of the SLEEPING / CONDVAR_WAIT run) + * and {@code wc_signals_suppressed_sampled_run} should be positive. + * - Approach B ({@code parkEnter}/{@code parkExit}): a runnable thread flagged via the parked API + * should emit {@code datadog.TaskBlock} events carrying the run's duration. + * - Correctness guard: purely runnable thread still produces MethodSample events. + */ +public class WallclockMitigationsCombinedTest extends AbstractProfilerTest { + + /** Verifies Approach A and B counters/events can be observed concurrently. */ + @Test + public void precheckAndParkSuppressionWorkTogether() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + Assumptions.assumeTrue( + Platform.isJavaVersionAtLeast(11), + "Sleeping-state precheck assertions are stable on JDK 11+"); + + CountDownLatch ready = new CountDownLatch(3); + AtomicBoolean stop = new AtomicBoolean(false); + + Thread sleeping = + new Thread( + () -> { + registerCurrentThreadForWallClockProfiling(); + ready.countDown(); + try { + Thread.sleep(280); + } catch (InterruptedException ignored) { + } + }, + "combined-sleeping"); + + Thread parkedBusy = + new Thread( + () -> { + registerCurrentThreadForWallClockProfiling(); + long spanId = 0x1111L; + long rootSpanId = 0x2222L; + profiler.setContext(rootSpanId, spanId, 0, 0); + ready.countDown(); + profiler.parkEnter(); + long parkedUntil = System.nanoTime() + 280_000_000L; + while (System.nanoTime() < parkedUntil) { + // Runnable while flagged parked. + } + profiler.parkExit(System.identityHashCode(this), 0L); + profiler.clearContext(); + }, + "combined-parked"); + + Thread runnable = + new Thread( + () -> { + registerCurrentThreadForWallClockProfiling(); + ready.countDown(); + while (!stop.get()) { + // keep runnable + } + }, + "combined-runnable"); + + sleeping.setDaemon(true); + parkedBusy.setDaemon(true); + runnable.setDaemon(true); + sleeping.start(); + parkedBusy.start(); + runnable.start(); + + ready.await(); + Thread.sleep(350); + stop.set(true); + + sleeping.interrupt(); + sleeping.join(1000); + parkedBusy.join(1000); + runnable.join(1000); + + stopProfiler(); + + IItemCollection taskBlocks = verifyEvents("datadog.TaskBlock"); + assertTrue( + taskBlocks.getAggregate(Aggregators.count()).longValue() > 0, + "Expected TaskBlock events from parked interval"); + + IItemCollection methodSamples = verifyEvents("datadog.MethodSample"); + assertTrue( + methodSamples.getAggregate(Aggregators.count()).longValue() > 0, + "Expected runnable MethodSample events while mitigations are enabled"); + + Map counters = profiler.getDebugCounters(); + if (counters.containsKey("wc_signals_suppressed_sampled_run")) { + assertTrue( + counters.get("wc_signals_suppressed_sampled_run") > 0, + "Expected once-per-run suppression counter to increase"); + } + } + + /** Enables wall-clock profiling with the OS-state precheck explicitly turned on so both + * Approach A (precheck) and Approach B (park flag) are exercised in this combined test. */ + @Override + protected String getProfilerCommand() { + return "wall=1ms,filter=0,wallprecheck=true"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockPrecheckBaselineTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockPrecheckBaselineTest.java new file mode 100644 index 000000000..7e05442e0 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockPrecheckBaselineTest.java @@ -0,0 +1,243 @@ +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.openjdk.jmc.common.unit.UnitLookup.EPOCH_NS; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.locks.LockSupport; + +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.common.unit.IQuantity; +import org.openjdk.jmc.common.unit.IRange; +import org.openjdk.jmc.flightrecorder.JfrAttributes; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +/** + * Cross-recording proof of the gap-fill property. + * + *

Recording A ({@code wallprecheck=false}): every {@code SIGVTALRM} fires; almost every + * {@code PARKED MethodSample} on the worker should lie inside some {@code TaskBlock} interval + * on the worker, because {@code parkEnter}/{@code parkExit} fences exactly the kernel-wait + * time during which {@code PARKED} state is observable. + * + *

Recording B ({@code wallprecheck=true}): the same workload produces the same + * {@code TaskBlock} intervals (parkEnter/parkExit is filter-independent), but those intervals + * are now empty of {@code PARKED MethodSamples} — the once-per-run filter has suppressed every + * signal during the runs. + * + *

Together, the two checks form the gap-fill proof: each {@code TaskBlock} interval contained + * sample(s) in baseline (recording A) and is empty under the filter (recording B), so the + * {@code TaskBlock} interval covers the slot left by suppression one-for-one. + * + *

{@link TaskBlockCoverageTest} is the within-recording counterpart that exercises recording-B + * containment in isolation. + */ +public class WallclockPrecheckBaselineTest extends AbstractProfilerTest { + + private static final String WORKER_A = "wallprecheck-baseline-A"; + private static final String WORKER_B = "wallprecheck-baseline-B"; + private static final int PARK_RUNS = 200; + private static final long PARK_NANOS = 50_000_000L; + private static final long SAMPLING_INTERVAL_MS = 10L; + + @Test + public void taskBlocksAccountForSuppressedSamples() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + // wallprecheck reads HotSpot OSThreadState; JDK 8 misreports sleep / park states. + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + + // Recording A: wallprecheck=false (started by @BeforeEach via getProfilerCommand()). + runParkWorkload(WORKER_A); + stopProfiler(); + List taskBlocksA = + collectIntervals(verifyEvents("datadog.TaskBlock"), WORKER_A, false); + List parkedSamplesA = + collectIntervals(verifyEvents("datadog.MethodSample", false), WORKER_A, true); + + // Recording B: wallprecheck=true, same workload, separate JFR file. + Path recordingB = Files.createTempFile(Paths.get("/tmp/recordings"), + "WallclockPrecheckBaselineTest_B_", ".jfr"); + profiler.execute("start,wall=" + SAMPLING_INTERVAL_MS + "ms,wallprecheck=true,filter=0" + + ",attributes=tag1;tag2;tag3,jfr,file=" + recordingB.toAbsolutePath()); + runParkWorkload(WORKER_B); + profiler.stop(); + + List taskBlocksB = + collectIntervals(verifyEvents(recordingB, "datadog.TaskBlock", true), WORKER_B, false); + List parkedSamplesB = + collectIntervals(verifyEvents(recordingB, "datadog.MethodSample", false), WORKER_B, true); + + Files.deleteIfExists(recordingB); + + long coveredA = countSamplesInsideAnyInterval(parkedSamplesA, taskBlocksA); + long emptyTaskBlocksB = countIntervalsWithoutAnySample(taskBlocksB, parkedSamplesB); + + System.out.printf( + "%nWallclockPrecheckBaselineTest:%n" + + " Recording A (precheck=false): TaskBlocks=%d, PARKED MethodSamples=%d, " + + "PARKED inside any TaskBlock=%d%n" + + " Recording B (precheck=true ): TaskBlocks=%d, PARKED MethodSamples=%d, " + + "empty TaskBlock intervals=%d%n", + taskBlocksA.size(), parkedSamplesA.size(), coveredA, + taskBlocksB.size(), parkedSamplesB.size(), emptyTaskBlocksB); + + // Structural: parkEnter/parkExit must emit in both modes (filter is sample-side only). + long minTb = PARK_RUNS / 2L; + assertTrue(taskBlocksA.size() >= minTb, + "recording A: expected ≥" + minTb + " TaskBlocks, got " + taskBlocksA.size()); + assertTrue(taskBlocksB.size() >= minTb, + "recording B: expected ≥" + minTb + " TaskBlocks, got " + taskBlocksB.size()); + + // The baseline must have enough PARKED samples for the coverage ratio to be meaningful. + assertTrue(parkedSamplesA.size() >= 10, + "recording A: expected ≥10 PARKED samples to make coverage check meaningful, got " + + parkedSamplesA.size()); + + // Baseline containment: ≥80% of PARKED MethodSamples on the worker in recording A lie + // inside some TaskBlock interval on the worker. This proves the TaskBlock interval + // temporally contains the moments where signals fire on a parked thread — i.e. the + // interval physically covers what the filter would later suppress. + double coverageRatioA = (double) coveredA / parkedSamplesA.size(); + assertTrue(coverageRatioA >= 0.8, + "recording A: expected ≥80% of PARKED samples to lie inside TaskBlock intervals, " + + "got " + coveredA + " / " + parkedSamplesA.size() + + " = " + String.format("%.1f%%", 100.0 * coverageRatioA)); + + // Gap-fill under filter: ≥95% of TaskBlock intervals on the worker in recording B + // contain zero PARKED samples on the worker. Combined with the recording A check, this + // proves the filter created gaps in MethodSample emission that the TaskBlock intervals + // cover one-for-one. + double gapRatioB = (double) emptyTaskBlocksB / taskBlocksB.size(); + assertTrue(gapRatioB >= 0.95, + "recording B: expected ≥95% of TaskBlock intervals to be empty of PARKED samples, " + + "got " + emptyTaskBlocksB + " / " + taskBlocksB.size() + + " = " + String.format("%.1f%%", 100.0 * gapRatioB)); + + // Per-interval strict invariant of the once-per-run filter: a TaskBlock interval is + // one CONDVAR_WAIT run by construction, so at most one PARKED MethodSample may fall + // inside any single TaskBlock interval — never two. Complements the gap-ratio check + // above (which bounds how many intervals carry an entry sample at all). + int maxSamplesInAnyTaskBlockB = 0; + int worstTaskBlockIdxB = -1; + for (int i = 0; i < taskBlocksB.size(); i++) { + Interval tb = taskBlocksB.get(i); + int count = 0; + for (Interval s : parkedSamplesB) { + if (s.startNs >= tb.startNs && s.startNs <= tb.endNs) { + count++; + } + } + if (count > maxSamplesInAnyTaskBlockB) { + maxSamplesInAnyTaskBlockB = count; + worstTaskBlockIdxB = i; + } + } + assertTrue(maxSamplesInAnyTaskBlockB <= 1, + "recording B: expected at most one PARKED MethodSample per TaskBlock interval, " + + "TaskBlock #" + worstTaskBlockIdxB + " contained " + + maxSamplesInAnyTaskBlockB + " PARKED samples"); + } + + private void runParkWorkload(String workerName) throws Exception { + Thread worker = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + // TaskBlock emission requires a non-zero spanId — taskBlockRecorder skips otherwise. + long spanId = 0x1234L; + long rootSpanId = 0x5678L; + profiler.setContext(rootSpanId, spanId, 0, 0); + try { + for (int i = 0; i < PARK_RUNS; i++) { + profiler.parkEnter(); + LockSupport.parkNanos(PARK_NANOS); + profiler.parkExit(System.identityHashCode(this), 0L); + } + } finally { + profiler.clearContext(); + } + }, workerName); + worker.setDaemon(true); + worker.start(); + worker.join(); + } + + private static final class Interval { + final long startNs; + final long endNs; + Interval(long startNs, long endNs) { + this.startNs = startNs; + this.endNs = endNs; + } + } + + private List collectIntervals(IItemCollection events, String workerName, + boolean requirePARKEDState) throws Exception { + List out = new ArrayList<>(); + for (IItemIterable batch : events) { + IMemberAccessor threadNameAccessor = + JdkAttributes.EVENT_THREAD_NAME.getAccessor(batch.getType()); + IMemberAccessor, IItem> lifetimeAccessor = + JfrAttributes.LIFETIME.getAccessor(batch.getType()); + IMemberAccessor stateAccessor = + requirePARKEDState ? THREAD_STATE.getAccessor(batch.getType()) : null; + if (threadNameAccessor == null || lifetimeAccessor == null) continue; + if (requirePARKEDState && stateAccessor == null) continue; + for (IItem item : batch) { + if (!workerName.equals(threadNameAccessor.getMember(item))) continue; + if (requirePARKEDState && !"PARKED".equals(stateAccessor.getMember(item))) continue; + IRange lifetime = lifetimeAccessor.getMember(item); + long startNs = lifetime.getStart().longValueIn(EPOCH_NS); + long endNs = lifetime.getEnd().longValueIn(EPOCH_NS); + out.add(new Interval(startNs, endNs)); + } + } + return out; + } + + private static long countSamplesInsideAnyInterval(List samples, + List intervals) { + long count = 0; + for (Interval s : samples) { + for (Interval iv : intervals) { + if (s.startNs >= iv.startNs && s.startNs <= iv.endNs) { + count++; + break; + } + } + } + return count; + } + + private static long countIntervalsWithoutAnySample(List intervals, + List samples) { + long count = 0; + for (Interval iv : intervals) { + boolean hasSample = false; + for (Interval s : samples) { + if (s.startNs >= iv.startNs && s.startNs <= iv.endNs) { + hasSample = true; + break; + } + } + if (!hasSample) count++; + } + return count; + } + + @Override + protected String getProfilerCommand() { + // Recording A: wallprecheck explicitly disabled (every signal fires). + return "wall=" + SAMPLING_INTERVAL_MS + "ms,wallprecheck=false,filter=0"; + } +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockTimerFastPathTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockTimerFastPathTest.java new file mode 100644 index 000000000..313228273 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/WallclockTimerFastPathTest.java @@ -0,0 +1,265 @@ +package com.datadoghq.profiler.wallclock; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; + +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; + +/** + * Regression guard for the wall-clock timer-side fast path that elides + * {@code pthread_kill}/{@code SIGVTALRM} delivery (and the cross-CPU IPIs it triggers) when a + * registered thread is in a {@code SLEEPING} / {@code CONDVAR_WAIT} run and has already produced + * its one entry {@code MethodSample}. + * + *

The handler-side once-per-run filter (covered by {@link PrecheckTest} and {@link + * TaskBlockCoverageTest}) suppresses JFR events after the entry sample, but the signals + * still fire. The fast path on the timer thread reads the same slot state and skips the syscall + * entirely — that is the property that actually reduces wakeups/IPIs on the production target. + * + *

{@link PrecheckTest} and {@link WallclockPrecheckBaselineTest} only assert on emitted JFR + * events, which are unaffected by whether the signal was elided at the timer or dropped in the + * handler. So if the timer fast path regresses (e.g. someone takes the slot lookup out, or the + * memory ordering breaks visibility of the {@code sampled_this_run} flag), those tests still pass + * and the IPI savings are silently lost. This test fills that gap by comparing the {@code + * wallclock_signal_own} counter (incremented once per handler invocation == once per delivered + * signal) between a {@code wallprecheck=false} baseline run and a {@code wallprecheck=true} run. + * + *

Two complementary workloads: + *

    + *
  • {@link #timerElidesIpiForLongParkRuns()} — one worker, long {@code LockSupport.parkNanos} + * runs. Maximises the elision ratio (most ticks land inside a single park run); makes the + * check tight and easy to diagnose when it fails.
  • + *
  • {@link #timerElidesIpiForRealisticServiceWorkload()} — a fixed thread pool whose threads + * sit parked in {@code LinkedBlockingQueue.take()} plus a {@code Thread.sleep}-driven + * scheduler and a CPU-bound thread. Mirrors a typical Java service profile, where + * {@code LockSupport.park} dominates the state distribution.
  • + *
+ * + *

Both methods register the workload threads with the wall-clock thread filter and run with + * {@code filter=0} so that only those threads receive signals — keeping the counter math + * dominated by workload signals rather than JVM-internal threads. + */ +public class WallclockTimerFastPathTest extends AbstractProfilerTest { + + private static final String COUNTER_KEY = "wallclock_signal_own"; + private static final long SAMPLING_INTERVAL_MS = 10L; + + // Allow up to 50% of the baseline signal volume after the fast path engages. The theoretical + // ratio is much lower (entry sample only), but CI noise (boundary jitter when the thread + // transitions in/out of park, JVM warmup ticks before the slot is associated, etc.) means a + // 50% guard is the right balance between catching regressions and not flaking. With the + // fast path working we routinely see ratios well below 25%. + private static final double MAX_ALLOWED_RATIO = 0.5; + + @Test + public void timerElidesIpiForLongParkRuns() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + // wallprecheck reads HotSpot OSThreadState; JDK 8 misreports sleep / park states. + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + + Map beforeA = profiler.getDebugCounters(); + Assumptions.assumeTrue(beforeA.containsKey(COUNTER_KEY), + "wallclock_signal_own counter not available (build without COUNTERS)"); + + // Recording A: wallprecheck=false (started by @BeforeEach via getProfilerCommand()). + runParkWorkload("fastpath-park-A"); + stopProfiler(); + long signalsA = signalDelta(beforeA, profiler.getDebugCounters()); + + // Recording B: wallprecheck=true, same workload, separate JFR file. + Path recordingB = Files.createTempFile(Paths.get("/tmp/recordings"), + "WallclockTimerFastPathTest_park_B_", ".jfr"); + profiler.execute("start,wall=" + SAMPLING_INTERVAL_MS + "ms,wallprecheck=true,filter=0" + + ",attributes=tag1;tag2;tag3,jfr,file=" + recordingB.toAbsolutePath()); + Map beforeB = profiler.getDebugCounters(); + runParkWorkload("fastpath-park-B"); + profiler.stop(); + long signalsB = signalDelta(beforeB, profiler.getDebugCounters()); + + Files.deleteIfExists(recordingB); + + assertIpiFastPathEngaged("park", signalsA, signalsB); + } + + @Test + public void timerElidesIpiForRealisticServiceWorkload() throws Exception { + Assumptions.assumeTrue(!Platform.isJ9()); + Assumptions.assumeTrue(Platform.isJavaVersionAtLeast(11)); + + Map beforeA = profiler.getDebugCounters(); + Assumptions.assumeTrue(beforeA.containsKey(COUNTER_KEY), + "wallclock_signal_own counter not available (build without COUNTERS)"); + + runRealisticWorkload("realistic-A"); + stopProfiler(); + long signalsA = signalDelta(beforeA, profiler.getDebugCounters()); + + Path recordingB = Files.createTempFile(Paths.get("/tmp/recordings"), + "WallclockTimerFastPathTest_realistic_B_", ".jfr"); + profiler.execute("start,wall=" + SAMPLING_INTERVAL_MS + "ms,wallprecheck=true,filter=0" + + ",attributes=tag1;tag2;tag3,jfr,file=" + recordingB.toAbsolutePath()); + Map beforeB = profiler.getDebugCounters(); + runRealisticWorkload("realistic-B"); + profiler.stop(); + long signalsB = signalDelta(beforeB, profiler.getDebugCounters()); + + Files.deleteIfExists(recordingB); + + assertIpiFastPathEngaged("realistic", signalsA, signalsB); + } + + private long signalDelta(Map before, Map after) { + return after.getOrDefault(COUNTER_KEY, 0L) - before.getOrDefault(COUNTER_KEY, 0L); + } + + private void assertIpiFastPathEngaged(String tag, long signalsA, long signalsB) { + double ratio = signalsA == 0 ? Double.NaN : (double) signalsB / signalsA; + System.out.printf( + "%nWallclockTimerFastPathTest [%s]:%n" + + " Recording A (wallprecheck=false): wallclock_signal_own delta = %d%n" + + " Recording B (wallprecheck=true ): wallclock_signal_own delta = %d%n" + + " ratio B/A = %.3f (max allowed %.3f)%n", + tag, signalsA, signalsB, ratio, MAX_ALLOWED_RATIO); + + // Sanity: the baseline must actually have produced signals — otherwise the ratio is + // meaningless and the test is silently green for the wrong reason. The threshold is + // intentionally permissive (a few dozen) since CI runners vary widely; the meaningful + // assertion is the ratio check below. + assertTrue(signalsA > 20, + "recording A: baseline produced too few wallclock_signal_own ticks (" + signalsA + + "); workload may not be running long enough"); + + // Core IPI-elide invariant: the wall-clock timer must skip the syscall for parked + // threads after the entry sample. If this regresses, JP6 still suppresses the JFR + // events (so PrecheckTest passes) but every tick still wakes the worker CPU. + assertTrue(signalsB < signalsA * MAX_ALLOWED_RATIO, + "timer fast path appears disengaged: recording A delivered " + signalsA + + " signals, recording B delivered " + signalsB + + " (ratio " + String.format("%.3f", ratio) + + "; expected < " + MAX_ALLOWED_RATIO + ")"); + } + + private void runParkWorkload(String workerName) throws Exception { + // Re-park loop: LockSupport.parkNanos returns early when SIGVTALRM interrupts the + // underlying pthread_cond_timedwait, which without re-parking would collapse the + // effective parked time to near-zero (one signal per nominal park run). We loop + // until the deadline to keep the worker continuously parked for ~PARK_NANOS each + // run, which is what the timer fast path is supposed to exploit. + final int PARK_RUNS = 100; + final long PARK_NANOS = 100_000_000L; // 100 ms per run → ~10 s total parked time + AtomicBoolean stop = new AtomicBoolean(false); + + Thread worker = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + // TaskBlock emission requires a non-zero spanId — keeps the workload aligned with + // how parkEnter/parkExit are exercised in the other wall-clock tests. The counter + // we check (wallclock_signal_own) is independent of TaskBlock emission. + profiler.setContext(0x5678L, 0x1234L, 0, 0); + try { + for (int i = 0; i < PARK_RUNS && !stop.get(); i++) { + profiler.parkEnter(); + long deadline = System.nanoTime() + PARK_NANOS; + long remaining; + while (!stop.get() && (remaining = deadline - System.nanoTime()) > 0) { + LockSupport.parkNanos(remaining); + } + profiler.parkExit(System.identityHashCode(this), 0L); + } + } finally { + profiler.clearContext(); + } + }, workerName); + worker.setDaemon(true); + worker.start(); + worker.join(); + } + + private void runRealisticWorkload(String tag) throws Exception { + // Tuned for ~1s of measurement, mirroring PrecheckEfficiencyTest.realisticServiceWorkload + // but scaled up enough to give the counter a clear signal under filter=0. + final int POOL_SIZE = 8; + final int TASK_DURATION_MS = 20; + final int SCHEDULE_INTERVAL_MS = 50; + final int MEASUREMENT_MS = 1000; + + AtomicBoolean stop = new AtomicBoolean(false); + AtomicInteger threadIndex = new AtomicInteger(0); + + // Mostly-idle pool: threads park in LinkedBlockingQueue.take() between submissions. + ExecutorService pool = Executors.newFixedThreadPool(POOL_SIZE, r -> { + Thread t = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + r.run(); + }); + t.setName(tag + "-pool-" + threadIndex.incrementAndGet()); + t.setDaemon(true); + return t; + }); + + // Pre-warm: ensure all POOL_SIZE threads exist and are parked before measurement. + CountDownLatch primed = new CountDownLatch(POOL_SIZE); + for (int i = 0; i < POOL_SIZE; i++) { + pool.submit(primed::countDown); + } + primed.await(); + Thread.sleep(50); + + Thread scheduler = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + while (!stop.get()) { + try { + Thread.sleep(SCHEDULE_INTERVAL_MS); + } catch (InterruptedException e) { + break; + } + pool.submit(() -> { + long x = 0; + long deadline = System.nanoTime() + TASK_DURATION_MS * 1_000_000L; + while (System.nanoTime() < deadline) { x++; } + return x; + }); + } + }, tag + "-scheduler"); + scheduler.setDaemon(true); + scheduler.start(); + + Thread hot = new Thread(() -> { + registerCurrentThreadForWallClockProfiling(); + long x = 0; + while (!stop.get()) { x++; } + }, tag + "-hot"); + hot.setDaemon(true); + hot.start(); + + Thread.sleep(MEASUREMENT_MS); + + stop.set(true); + scheduler.interrupt(); + pool.shutdownNow(); + pool.awaitTermination(2, TimeUnit.SECONDS); + hot.join(1000); + scheduler.join(1000); + } + + @Override + protected String getProfilerCommand() { + // Recording A baseline: wallprecheck explicitly disabled, filter=0 so only registered + // threads are sampled (keeps wallclock_signal_own dominated by workload signals). + return "wall=" + SAMPLING_INTERVAL_MS + "ms,wallprecheck=false,filter=0"; + } +} diff --git a/doc/architecture/TLSContext.md b/doc/architecture/TLSContext.md index 0a17829dc..e55f3d339 100644 --- a/doc/architecture/TLSContext.md +++ b/doc/architecture/TLSContext.md @@ -500,6 +500,41 @@ Full benchmark data and analysis: `@Threads(2)` and `@Threads(4)` variants to verify linear scaling (absence of false sharing). +## Wall-clock park TLS, TaskBlock, and OS-thread precheck + +### Park TLS (`ProfiledThread`) + +`parkEnter` / `parkExit` track Java-level `LockSupport` park intervals with span context +snapshots for wall-clock collapsing and TaskBlock emission. The parked flag suppresses +wall-clock signals on filtered threads when enabled. + +### `wallprecheck` (default `false`, opt-in) + +Disabled by default. When explicitly enabled with `wallprecheck=true` *and* thread filtering +is on (`Arguments::_wall_precheck == true`), the wall-clock timer samples **HotSpot** +`VMThread::osThreadState()` **before** sending `SIGVTALRM`. Slots must hold a non-null +`VMThread*` from registration (`ThreadFilter::setVMThread`) for the precheck to run; +otherwise only Java park-flag suppression applies. + +On JDK **21+**, `Thread.sleep` often maps to **CONDVAR_WAIT** (not only legacy **SLEEPING**), +same broad category as condvar-based park — see `PrecheckTest` and `wallClock.cpp`. + +### JVMTI `MonitorWait` (observes `Object.wait`) / TaskBlock (JDK before 21) + +JVMTI `MonitorWait` / `MonitorWaited` callbacks are registered only when +`java_version() < 21` (`vmEntry.cpp`). They observe `Object.wait()` calls (HotSpot state +`OBJECT_WAIT`) — **not** `OSThreadState::MONITOR_WAIT` (which is the unrelated +synchronized-contention state). In our native code these are dispatched as +`Profiler::ObjectWaitEnter` / `Profiler::ObjectWaitExit`, snapshotting/clearing the +`_object_wait_*` TLS fields on `ProfiledThread`. They populate TaskBlock with a +**blocker** value matching **`System.identityHashCode(waitedObject)`** (same convention as +the JNI park path). + +With the default **`wallprecheck=false`**, OS-thread-state suppression is off, so parked-flag +behavior can be tested without condvar/sleep overlap (isolation tests such as +`ParkTaskBlockTest`). The precheck path is exercised by `PrecheckTest` and +`WallclockMitigationsCombinedTest`, which both pass `wallprecheck=true` explicitly. + ## OTEP References - [OTEP #4947 — Profiling Signal Conventions](https://github.com/open-telemetry/oteps/pull/4947): diff --git a/doc/reference/EventTypeSystem.md b/doc/reference/EventTypeSystem.md index f3b3d49ae..7b1b43b2f 100644 --- a/doc/reference/EventTypeSystem.md +++ b/doc/reference/EventTypeSystem.md @@ -232,3 +232,25 @@ The eventTypeFromBCI() conversion function provides a clean bridge between Datad - Preserves the semantic benefits of BCI_* identifiers When working with event types, always use the conversion function when calling code that expects EventType, and use BCI_* constants directly in Datadog-specific code. + +## Datadog JFR extensions (wall-clock / TaskBlock) + +Defined in `ddprof-lib/src/main/cpp/jfrMetadata.h` / `jfrMetadata.cpp`: + +- **`datadog.TaskBlock`** (`T_TASK_BLOCK`) — blocking intervals with `blocker` carrying the same + **32-bit identity hash bits** as `System.identityHashCode` (JVMTI wait path and JNI park path). +- **`datadog.WallClockSamplingEpoch`** (`T_WALLCLOCK_SAMPLE_EPOCH`) — timer aggregation snapshot; + appended numeric fields must remain backward-compatible for parsers that tolerate trailing varints. + +**JVMTI**: `MonitorWait` / `MonitorWaited` (which observe `Object.wait()`, dispatched in our +native code as `Profiler::ObjectWaitEnter` / `Profiler::ObjectWaitExit`) are registered only +for `java_version() < 21` (`vmEntry.cpp`). Note: the JVMTI event names are after the +`ObjectMonitor` backing `Object.wait()`, **not** after `OSThreadState::MONITOR_WAIT` (which is +the unrelated state "in the entry queue of a contested `synchronized` block"). + +### Downstream ingestion + +- Confirm profiling pipelines accept appended `WallClockSamplingEpoch` fields and `datadog.TaskBlock` + (`jfrMetadata.h`). +- Internal debug counters (`wc_*`, `task_block_*` in `counters.h`) may require metric allowlist updates + where applicable.