From 3a61202a45057dfeb8e4ba568594805125f915b5 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 13 May 2026 12:24:49 +0200 Subject: [PATCH 01/25] fix(profiler): pre-register vtable receiver classes via JVMTI (PROF-14618) Restore vtable-target frame capture in CPU-only recordings by populating _class_map from safe JVM-thread contexts: - Profiler::preregisterLoadedClasses(jvmtiEnv*) bulk-inserts via GetLoadedClasses + GetClassSignature + ObjectSampler::normalizeClassSignature. Called inside the existing exclusive _class_map_lock at recording start and dump so signal-handler readers see a populated map. - VM::ClassPrepare moved out-of-line into vmEntry.cpp; now also inserts newly prepared classes when StackWalkFeatures.vtable_target is set. - Hot signal-handler path in hotspotSupport.cpp is unchanged. New dictionary_concurrent_ut.cpp test pins the bounded_lookup(0) hit contract after bulk pre-registration. A new VtableTargetCpuTest is included @Disabled with a TODO for the follow-up (vtable_target has no CLI toggle yet, and the synthetic BCI_ALLOC frame is recorded as a class id, not in the stack-trace string). Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 38 ++++++ ddprof-lib/src/main/cpp/profiler.h | 5 + ddprof-lib/src/main/cpp/vmEntry.cpp | 28 +++++ ddprof-lib/src/main/cpp/vmEntry.h | 6 +- .../src/test/cpp/dictionary_concurrent_ut.cpp | 32 +++++ .../profiler/cpu/VtableTargetCpuTest.java | 114 ++++++++++++++++++ 6 files changed, 219 insertions(+), 4 deletions(-) create mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f613103ee..f55c13d73 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1084,6 +1084,9 @@ Error Profiler::start(Arguments &args, bool reset) { // it is being cleaned up _class_map_lock.lock(); _class_map.clear(); + if (args._features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } _class_map_lock.unlock(); // Reset call trace storage @@ -1399,6 +1402,9 @@ Error Profiler::dump(const char *path, const int length) { // Reset classmap _class_map_lock.lock(); _class_map.clear(); + if (_features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } _class_map_lock.unlock(); _thread_info.clearAll(thread_ids); @@ -1539,6 +1545,38 @@ int Profiler::lookupClass(const char *key, size_t length) { return -1; } +void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { + if (jvmti == nullptr) { + return; + } + jint class_count = 0; + jclass* classes = nullptr; + if (jvmti->GetLoadedClasses(&class_count, &classes) != JVMTI_ERROR_NONE || + classes == nullptr) { + return; + } + for (jint i = 0; i < class_count; i++) { + char* sig = nullptr; + if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE || + sig == nullptr) { + if (sig != nullptr) { + jvmti->Deallocate(reinterpret_cast(sig)); + } + continue; + } + const char* slice = nullptr; + size_t slice_len = 0; + if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { + // Caller holds _class_map_lock exclusively — call _class_map.lookup + // directly. Do NOT call lookupClass(), which would re-attempt + // tryLockShared() and deadlock. + (void)_class_map.lookup(slice, slice_len); + } + jvmti->Deallocate(reinterpret_cast(sig)); + } + jvmti->Deallocate(reinterpret_cast(classes)); +} + int Profiler::status(char* status, int max_len) { return snprintf(status, max_len, "== Java-Profiler Status ===\n" diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 4ba24905d..e7a430451 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -213,6 +213,11 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); + // Pre-populate _class_map with all currently-loaded reference classes so that + // signal-safe lookups in walkVM (vtable_target) can resolve them without ever + // needing to malloc. Caller MUST hold _class_map_lock EXCLUSIVELY. Runs on a + // JVM thread (never in a signal handler). + void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { _call_trace_storage.processTraces(processor); diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 76fd49c68..ccc51349a 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -575,6 +575,34 @@ void VM::loadAllMethodIDs(jvmtiEnv *jvmti, JNIEnv *jni) { } } +void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass) { + loadMethodIDs(jvmti, jni, klass); + + // Pre-populate _class_map for vtable_target signal-safe lookup. ClassPrepare + // runs on a JVM thread (never a signal handler) so malloc inside + // _class_map.lookup is legal. Gate on vtable_target to avoid overhead when + // the feature is disabled. + Profiler* profiler = Profiler::instance(); + if (profiler == nullptr || !profiler->stackWalkFeatures().vtable_target) { + return; + } + char* sig = nullptr; + if (jvmti->GetClassSignature(klass, &sig, nullptr) != JVMTI_ERROR_NONE || + sig == nullptr) { + if (sig != nullptr) { + jvmti->Deallocate(reinterpret_cast(sig)); + } + return; + } + const char* slice = nullptr; + size_t slice_len = 0; + if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { + (void)profiler->lookupClass(slice, slice_len); + } + jvmti->Deallocate(reinterpret_cast(sig)); +} + void JNICALL VM::VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) { ready(jvmti, jni); loadAllMethodIDs(jvmti, jni); diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index 6be4c87bb..2c98548c9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -198,10 +198,8 @@ class VM { // Needed only for AsyncGetCallTrace support } - static void JNICALL ClassPrepare(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, - jclass klass) { - loadMethodIDs(jvmti, jni, klass); - } + static void JNICALL ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass); static jvmtiError JNICALL RedefineClassesHook(jvmtiEnv *jvmti, jint class_count, diff --git a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp index ad2ccb17e..ab985b81d 100644 --- a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp +++ b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp @@ -250,6 +250,38 @@ TEST(DictionaryConcurrent, SignalHandlerBoundedLookupVsDumpClear) { EXPECT_GT(total_clears.load(), 0L); } +// (4a) Bulk exclusive-lock insert (mimicking preregisterLoadedClasses) followed +// by read-only bounded_lookup(0) on each inserted key. Verifies that the +// pre-registration contract holds: every key inserted while the exclusive lock +// is held is subsequently visible to bounded_lookup(0) from a shared context. +TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { + Dictionary dict(/*id=*/0); + + constexpr int kBulk = 50; + char keys[kBulk][64]; + unsigned int inserted_ids[kBulk]; + + // Bulk insert under an exclusive lock — mirrors preregisterLoadedClasses. + for (int i = 0; i < kBulk; i++) { + snprintf(keys[i], sizeof(keys[i]), "java/util/BulkClass%d", i); + inserted_ids[i] = dict.lookup(keys[i], strlen(keys[i])); + ASSERT_NE(0u, inserted_ids[i]); + ASSERT_NE(static_cast(INT_MAX), inserted_ids[i]); + } + + // Read back with bounded_lookup(0) — must return the same id, no malloc. + for (int i = 0; i < kBulk; i++) { + unsigned int found = dict.bounded_lookup(keys[i], strlen(keys[i]), 0); + EXPECT_EQ(inserted_ids[i], found) + << "bounded_lookup returned wrong id for key " << keys[i]; + } + + // A never-inserted key must return INT_MAX. + const char* absent = "java/util/NeverInserted"; + EXPECT_EQ(static_cast(INT_MAX), + dict.bounded_lookup(absent, strlen(absent), 0)); +} + // (4) Same race as (3) but using BoundedOptionalSharedLockGuard, which is the // guard classMapTrySharedGuard() now returns in hotspotSupport.cpp. The bounded // variant may fail spuriously under reader pressure (≤5 CAS attempts); this diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java new file mode 100644 index 000000000..1a29361d2 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java @@ -0,0 +1,114 @@ +/* + * Copyright 2026, Datadog, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.datadoghq.profiler.cpu; + +import com.datadoghq.profiler.AbstractProfilerTest; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Verifies vtable-target pre-registration: when vtable_target is enabled, + * CPU samples taken at vtable/itable dispatch stubs should carry a synthetic + * BCI_ALLOC frame whose class name matches the polymorphic receiver type. + * + * TODO (PROF-14618 follow-up): two blockers prevent enabling this test: + * 1. vtable_target has no command-line argument; needs a "features=vtable_target" + * (or equivalent) toggle wired through Arguments::parse(). + * 2. The current STACK_TRACE_STRING-based assertion is wrong: vtable_target + * emits the receiver class as a synthetic BCI_ALLOC frame whose label is + * stored as the class-id, not as text in the stack trace string. When + * enabling, replace the trace.contains(...) check with a scan over each + * ExecutionSample's stack-frame items looking for a BCI_ALLOC frame whose + * class label matches the receiver type. + */ +@Disabled("PROF-14618 follow-up: nail down assertion shape — vtable_target has no CLI toggle yet") +public class VtableTargetCpuTest extends AbstractProfilerTest { + + // Two concrete receiver types to force polymorphic vtable dispatch. + interface Workload { + long compute(long seed); + } + + static final class WorkloadA implements Workload { + @Override + public long compute(long seed) { + long x = seed; + for (int i = 0; i < 1_000; i++) { + x = x * 6364136223846793005L + 1442695040888963407L; + } + return x; + } + } + + static final class WorkloadB implements Workload { + @Override + public long compute(long seed) { + long x = seed; + for (int i = 0; i < 1_000; i++) { + x = x * 2862933555777941757L + 3037000493L; + } + return x; + } + } + + @Test + public void vtableStubReceiverClassAppearsInSamples() throws Exception { + Workload a = new WorkloadA(); + Workload b = new WorkloadB(); + + // Drive polymorphic dispatch for ~3 s so the JIT compiles the call site + // as a vtable stub and the profiler collects CPU samples inside it. + long result = 0; + long deadline = System.currentTimeMillis() + 3_000; + while (System.currentTimeMillis() < deadline) { + result += a.compute(result); + result += b.compute(result); + } + // Prevent dead-code elimination. + if (result == 0) { + throw new AssertionError("unreachable"); + } + + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.ExecutionSample"); + + // At least one sample must contain a synthetic frame whose label + // includes the receiver class simple name. The label is recorded as + // the "allocated class" name (BCI_ALLOC frame) for vtable-stub samples. + boolean foundVtableFrame = false; + for (IItemIterable cpuSamples : events) { + IMemberAccessor frameAccessor = + JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); + for (IItem sample : cpuSamples) { + String trace = frameAccessor.getMember(sample); + if (trace != null && (trace.contains("WorkloadA") || trace.contains("WorkloadB"))) { + foundVtableFrame = true; + break; + } + } + if (foundVtableFrame) break; + } + + assertTrue(foundVtableFrame, + "Expected at least one CPU sample with a vtable-stub receiver frame " + + "(WorkloadA or WorkloadB) but none were found"); + } + + @Override + protected String getProfilerCommand() { + // TODO (PROF-14618): replace with "cpu=10ms,features=vtable_target" once + // Arguments::parse() supports the vtable_target flag. + return "cpu=10ms"; + } +} From 3a34b65a9f65fd7426f055174d92c52c5932cb48 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 13 May 2026 12:57:01 +0200 Subject: [PATCH 02/25] fixup: chorus feedback (logging, dead-code, drop disabled test) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Log::warn on JVMTI error paths in preregisterLoadedClasses and ClassPrepare so silent skips are observable. - Drop dead-code `if (sig != nullptr)` guards after the `|| sig == nullptr` short-circuit (JVMTI spec leaves output undefined on error). - Remove disabled VtableTargetCpuTest.java — integration test deferred to a follow-up once vtable_target gets a CLI toggle and the synthetic-frame assertion shape is settled. Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 17 +-- ddprof-lib/src/main/cpp/vmEntry.cpp | 11 +- .../profiler/cpu/VtableTargetCpuTest.java | 114 ------------------ 3 files changed, 16 insertions(+), 126 deletions(-) delete mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index f55c13d73..709c8e448 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1551,17 +1551,20 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { } jint class_count = 0; jclass* classes = nullptr; - if (jvmti->GetLoadedClasses(&class_count, &classes) != JVMTI_ERROR_NONE || - classes == nullptr) { + jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); + if (err != JVMTI_ERROR_NONE) { + Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — vtable-target frames will miss until ClassPrepare fires", err); + return; + } + if (classes == nullptr) { return; } for (jint i = 0; i < class_count; i++) { char* sig = nullptr; - if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE || - sig == nullptr) { - if (sig != nullptr) { - jvmti->Deallocate(reinterpret_cast(sig)); - } + if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + continue; + } + if (sig == nullptr) { continue; } const char* slice = nullptr; diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index ccc51349a..6ae12beaa 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -588,11 +588,12 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, return; } char* sig = nullptr; - if (jvmti->GetClassSignature(klass, &sig, nullptr) != JVMTI_ERROR_NONE || - sig == nullptr) { - if (sig != nullptr) { - jvmti->Deallocate(reinterpret_cast(sig)); - } + jvmtiError err = jvmti->GetClassSignature(klass, &sig, nullptr); + if (err != JVMTI_ERROR_NONE) { + Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration", err); + return; + } + if (sig == nullptr) { return; } const char* slice = nullptr; diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java deleted file mode 100644 index 1a29361d2..000000000 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetCpuTest.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2026, Datadog, Inc. - * SPDX-License-Identifier: Apache-2.0 - */ - -package com.datadoghq.profiler.cpu; - -import com.datadoghq.profiler.AbstractProfilerTest; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.openjdk.jmc.common.item.IItem; -import org.openjdk.jmc.common.item.IItemCollection; -import org.openjdk.jmc.common.item.IItemIterable; -import org.openjdk.jmc.common.item.IMemberAccessor; -import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Verifies vtable-target pre-registration: when vtable_target is enabled, - * CPU samples taken at vtable/itable dispatch stubs should carry a synthetic - * BCI_ALLOC frame whose class name matches the polymorphic receiver type. - * - * TODO (PROF-14618 follow-up): two blockers prevent enabling this test: - * 1. vtable_target has no command-line argument; needs a "features=vtable_target" - * (or equivalent) toggle wired through Arguments::parse(). - * 2. The current STACK_TRACE_STRING-based assertion is wrong: vtable_target - * emits the receiver class as a synthetic BCI_ALLOC frame whose label is - * stored as the class-id, not as text in the stack trace string. When - * enabling, replace the trace.contains(...) check with a scan over each - * ExecutionSample's stack-frame items looking for a BCI_ALLOC frame whose - * class label matches the receiver type. - */ -@Disabled("PROF-14618 follow-up: nail down assertion shape — vtable_target has no CLI toggle yet") -public class VtableTargetCpuTest extends AbstractProfilerTest { - - // Two concrete receiver types to force polymorphic vtable dispatch. - interface Workload { - long compute(long seed); - } - - static final class WorkloadA implements Workload { - @Override - public long compute(long seed) { - long x = seed; - for (int i = 0; i < 1_000; i++) { - x = x * 6364136223846793005L + 1442695040888963407L; - } - return x; - } - } - - static final class WorkloadB implements Workload { - @Override - public long compute(long seed) { - long x = seed; - for (int i = 0; i < 1_000; i++) { - x = x * 2862933555777941757L + 3037000493L; - } - return x; - } - } - - @Test - public void vtableStubReceiverClassAppearsInSamples() throws Exception { - Workload a = new WorkloadA(); - Workload b = new WorkloadB(); - - // Drive polymorphic dispatch for ~3 s so the JIT compiles the call site - // as a vtable stub and the profiler collects CPU samples inside it. - long result = 0; - long deadline = System.currentTimeMillis() + 3_000; - while (System.currentTimeMillis() < deadline) { - result += a.compute(result); - result += b.compute(result); - } - // Prevent dead-code elimination. - if (result == 0) { - throw new AssertionError("unreachable"); - } - - stopProfiler(); - - IItemCollection events = verifyEvents("datadog.ExecutionSample"); - - // At least one sample must contain a synthetic frame whose label - // includes the receiver class simple name. The label is recorded as - // the "allocated class" name (BCI_ALLOC frame) for vtable-stub samples. - boolean foundVtableFrame = false; - for (IItemIterable cpuSamples : events) { - IMemberAccessor frameAccessor = - JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); - for (IItem sample : cpuSamples) { - String trace = frameAccessor.getMember(sample); - if (trace != null && (trace.contains("WorkloadA") || trace.contains("WorkloadB"))) { - foundVtableFrame = true; - break; - } - } - if (foundVtableFrame) break; - } - - assertTrue(foundVtableFrame, - "Expected at least one CPU sample with a vtable-stub receiver frame " + - "(WorkloadA or WorkloadB) but none were found"); - } - - @Override - protected String getProfilerCommand() { - // TODO (PROF-14618): replace with "cpu=10ms,features=vtable_target" once - // Arguments::parse() supports the vtable_target flag. - return "cpu=10ms"; - } -} From 5a06eb7e54d6763db1c392d59f7e844edf975357 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 May 2026 21:30:22 +0200 Subject: [PATCH 03/25] address: review feedback on vtable JVMTI pre-registration - ClassPrepare: blocking SharedLockGuard + classMap()->lookup() (was best-effort lookupClass) - preregisterLoadedClasses: filter to 'L'-type signatures; DeleteLocalRef each class - Test/header comments rewritten to match new lock protocol and L-type filter Addresses PR #527 review comments: - 3233432627 (Copilot, vmEntry.cpp): ClassPrepare no longer silently skips during dump - 3233432702 (Copilot, profiler.h/cpp): only reference types inserted into _class_map - 3233432734 (Copilot, dictionary_concurrent_ut.cpp): no false 'exclusive lock' claim - 3260404113 (Codex, profiler.cpp): JNI local refs released at every loop exit Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 14 ++++++++++++++ ddprof-lib/src/main/cpp/profiler.h | 10 ++++++---- ddprof-lib/src/main/cpp/vmEntry.cpp | 17 +++++++++++++---- .../src/test/cpp/dictionary_concurrent_ut.cpp | 13 ++++++++----- 4 files changed, 41 insertions(+), 13 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 709c8e448..1cd50b911 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1549,6 +1549,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jvmti == nullptr) { return; } + JNIEnv* jni = VM::jni(); jint class_count = 0; jclass* classes = nullptr; jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); @@ -1559,12 +1560,24 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (classes == nullptr) { return; } + // Each classes[i] is a JNI local reference allocated by JVMTI; delete it at + // every loop exit so the local-reference table does not grow across repeated + // start/dump calls on large applications. for (jint i = 0; i < class_count; i++) { char* sig = nullptr; if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } if (sig == nullptr) { + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); + continue; + } + // Only 'L'-type (reference) signatures can ever match vtable_target lookup + // keys; skip primitives, arrays, and other non-reference signatures. + if (sig[0] != 'L') { + jvmti->Deallocate(reinterpret_cast(sig)); + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } const char* slice = nullptr; @@ -1576,6 +1589,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { (void)_class_map.lookup(slice, slice_len); } jvmti->Deallocate(reinterpret_cast(sig)); + if (jni != nullptr) jni->DeleteLocalRef(classes[i]); } jvmti->Deallocate(reinterpret_cast(classes)); } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index e7a430451..9108a6008 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -213,10 +213,12 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); - // Pre-populate _class_map with all currently-loaded reference classes so that - // signal-safe lookups in walkVM (vtable_target) can resolve them without ever - // needing to malloc. Caller MUST hold _class_map_lock EXCLUSIVELY. Runs on a - // JVM thread (never in a signal handler). + // Pre-populate _class_map with all currently-loaded 'L'-type (reference) + // class signatures so that signal-safe lookups in walkVM (vtable_target) can + // resolve them without ever needing to malloc. Primitives and arrays are + // skipped — they never match vtable lookup keys. Caller MUST hold + // _class_map_lock EXCLUSIVELY. Runs on a JVM thread (never in a signal + // handler). void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 6ae12beaa..81b928ea2 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -580,9 +580,11 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, loadMethodIDs(jvmti, jni, klass); // Pre-populate _class_map for vtable_target signal-safe lookup. ClassPrepare - // runs on a JVM thread (never a signal handler) so malloc inside - // _class_map.lookup is legal. Gate on vtable_target to avoid overhead when - // the feature is disabled. + // runs on a JVM thread (never a signal handler), so we take a blocking + // shared lock via classMapSharedGuard() rather than the best-effort + // tryLockShared() in lookupClass(). This ensures newly prepared classes are + // never silently skipped while an exclusive dump/reset is in flight. Gate on + // vtable_target to avoid overhead when the feature is disabled. Profiler* profiler = Profiler::instance(); if (profiler == nullptr || !profiler->stackWalkFeatures().vtable_target) { return; @@ -596,10 +598,17 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, if (sig == nullptr) { return; } + // Only 'L'-type (reference) signatures can ever match vtable_target lookup + // keys; skip primitives and arrays. + if (sig[0] != 'L') { + jvmti->Deallocate(reinterpret_cast(sig)); + return; + } const char* slice = nullptr; size_t slice_len = 0; if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { - (void)profiler->lookupClass(slice, slice_len); + SharedLockGuard guard = profiler->classMapSharedGuard(); + (void)profiler->classMap()->lookup(slice, slice_len); } jvmti->Deallocate(reinterpret_cast(sig)); } diff --git a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp index ab985b81d..fa5147a2d 100644 --- a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp +++ b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp @@ -250,10 +250,12 @@ TEST(DictionaryConcurrent, SignalHandlerBoundedLookupVsDumpClear) { EXPECT_GT(total_clears.load(), 0L); } -// (4a) Bulk exclusive-lock insert (mimicking preregisterLoadedClasses) followed -// by read-only bounded_lookup(0) on each inserted key. Verifies that the -// pre-registration contract holds: every key inserted while the exclusive lock -// is held is subsequently visible to bounded_lookup(0) from a shared context. +// (4a) Single-threaded bulk insert (mimicking the inserts preregisterLoadedClasses +// performs while holding _class_map_lock exclusively in production) followed by +// read-only bounded_lookup(0) on each inserted key. Verifies the pre-registration +// contract: every key inserted via Dictionary::lookup() is subsequently visible +// to bounded_lookup(0). This test takes no external lock — the production +// exclusive-lock protocol is exercised by the other tests in this file. TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { Dictionary dict(/*id=*/0); @@ -261,7 +263,8 @@ TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { char keys[kBulk][64]; unsigned int inserted_ids[kBulk]; - // Bulk insert under an exclusive lock — mirrors preregisterLoadedClasses. + // Bulk insert — mirrors the per-class lookup() calls preregisterLoadedClasses + // issues from a JVM thread. for (int i = 0; i < kBulk; i++) { snprintf(keys[i], sizeof(keys[i]), "java/util/BulkClass%d", i); inserted_ids[i] = dict.lookup(keys[i], strlen(keys[i])); From 8fe88245cf1acf2c12016ace52cb2526765c1865 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 19 May 2026 13:37:02 +0000 Subject: [PATCH 04/25] fix: add warn logging for GetClassSignature per-class failures in preregisterLoadedClasses Agent-Logs-Url: https://github.com/DataDog/java-profiler/sessions/bdfc7f46-b926-497d-b779-a1cc8c5b7769 Co-authored-by: jbachorik <738413+jbachorik@users.noreply.github.com> --- ddprof-lib/src/main/cpp/profiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 1cd50b911..911c1c14a 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1563,9 +1563,11 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { // Each classes[i] is a JNI local reference allocated by JVMTI; delete it at // every loop exit so the local-reference table does not grow across repeated // start/dump calls on large applications. + jint sig_failures = 0; for (jint i = 0; i < class_count; i++) { char* sig = nullptr; if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { + sig_failures++; if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } @@ -1592,6 +1594,9 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jni != nullptr) jni->DeleteLocalRef(classes[i]); } jvmti->Deallocate(reinterpret_cast(classes)); + if (sig_failures > 0) { + Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d of %d classes — those classes skipped for vtable-target pre-registration", sig_failures, class_count); + } } int Profiler::status(char* status, int max_len) { From e6989bc0f6d8f5445771ca68500f4c27f2fa5f1b Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 May 2026 15:55:51 +0200 Subject: [PATCH 05/25] address: fix features race and resume gap in vtable_target preregistration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - profiler.cpp:1077 — move _features=args._features before reset block so ClassPrepare callbacks see correct vtable_target immediately - profiler.cpp:1117 — add else-if resume branch calling preregisterLoadedClasses without map clear, catching classes loaded while profiler was stopped - profiler.cpp:1574 — add sig_failures counter + post-loop Log::warn for GetClassSignature failures in preregisterLoadedClasses Co-Authored-By: muse --- ddprof-lib/src/main/cpp/profiler.cpp | 37 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 911c1c14a..768a034d4 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1074,6 +1074,21 @@ Error Profiler::start(Arguments &args, bool reset) { return Error("No profiling events specified"); } + // Commit _features before the reset/preregister block so ClassPrepare + // callbacks (which gate on _features.vtable_target) see the correct enabled + // state from the moment preregisterLoadedClasses releases the class-map lock. + _features = args._features; + if (VM::hotspot_version() < 8) { + _features.java_anchor = 0; + _features.gc_traces = 0; + } + if (!VMStructs::hasClassNames()) { + _features.vtable_target = 0; + } + if (!VMStructs::hasCompilerStructs()) { + _features.comp_task = 0; + } + if (reset || _start_time == 0) { // Reset counters _total_samples = 0; @@ -1099,6 +1114,13 @@ Error Profiler::start(Arguments &args, bool reset) { // Reset thread names and IDs _thread_info.clearAll(); + } else if (_features.vtable_target && VMStructs::hasClassNames()) { + // Resume: classes loaded while the profiler was stopped miss ClassPrepare + // (JVMTI events are off while stopped). Re-snapshot without clearing so + // existing valid entries are preserved. + _class_map_lock.lock(); + preregisterLoadedClasses(VM::jvmti()); + _class_map_lock.unlock(); } // (Re-)allocate calltrace buffers @@ -1120,17 +1142,6 @@ Error Profiler::start(Arguments &args, bool reset) { // Remote symbolication is now inline in ASGCT_CallFrame // No separate pool allocation needed! - _features = args._features; - if (VM::hotspot_version() < 8) { - _features.java_anchor = 0; - _features.gc_traces = 0; - } - if (!VMStructs::hasClassNames()) { - _features.vtable_target = 0; - } - if (!VMStructs::hasCompilerStructs()) { - _features.comp_task = 0; - } _safe_mode = args._safe_mode; if (VM::hotspot_version() < 8 || VM::isZing()) { _safe_mode |= GC_TRACES | LAST_JAVA_PC; @@ -1567,7 +1578,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { for (jint i = 0; i < class_count; i++) { char* sig = nullptr; if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { - sig_failures++; + ++sig_failures; if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } @@ -1595,7 +1606,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { } jvmti->Deallocate(reinterpret_cast(classes)); if (sig_failures > 0) { - Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d of %d classes — those classes skipped for vtable-target pre-registration", sig_failures, class_count); + Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); } } From 92574654b15d33136f54a291ad0ce2c5ccb89838 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 May 2026 20:09:56 +0200 Subject: [PATCH 06/25] fix: address review comments on PR #527 - use _features.vtable_target (sanitized) instead of args._features - add isRunning() gated on _state_lock for ClassPrepare gate - two-phase preregisterLoadedClasses: JVMTI enum lock-free, bulk inserts under exclusive lock - gate ClassPrepare inserts on isRunning(); rate-limit GetClassSignature warn Co-Authored-By: muse --- ddprof-lib/src/main/cpp/profiler.cpp | 34 ++++++++++++++++------------ ddprof-lib/src/main/cpp/profiler.h | 11 ++++++--- ddprof-lib/src/main/cpp/vmEntry.cpp | 8 +++++-- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 768a034d4..fd61422d1 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include #include @@ -1099,10 +1101,12 @@ Error Profiler::start(Arguments &args, bool reset) { // it is being cleaned up _class_map_lock.lock(); _class_map.clear(); - if (args._features.vtable_target && VMStructs::hasClassNames()) { + _class_map_lock.unlock(); + // preregisterLoadedClasses manages _class_map_lock internally; callers must + // not hold it (JVMTI enumeration is lock-free; inserts take the lock). + if (_features.vtable_target && VMStructs::hasClassNames()) { preregisterLoadedClasses(VM::jvmti()); } - _class_map_lock.unlock(); // Reset call trace storage if (!_omit_stacktraces) { @@ -1118,9 +1122,7 @@ Error Profiler::start(Arguments &args, bool reset) { // Resume: classes loaded while the profiler was stopped miss ClassPrepare // (JVMTI events are off while stopped). Re-snapshot without clearing so // existing valid entries are preserved. - _class_map_lock.lock(); preregisterLoadedClasses(VM::jvmti()); - _class_map_lock.unlock(); } // (Re-)allocate calltrace buffers @@ -1413,10 +1415,10 @@ Error Profiler::dump(const char *path, const int length) { // Reset classmap _class_map_lock.lock(); _class_map.clear(); + _class_map_lock.unlock(); if (_features.vtable_target && VMStructs::hasClassNames()) { preregisterLoadedClasses(VM::jvmti()); } - _class_map_lock.unlock(); _thread_info.clearAll(thread_ids); _thread_info.reportCounters(); @@ -1571,9 +1573,11 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (classes == nullptr) { return; } - // Each classes[i] is a JNI local reference allocated by JVMTI; delete it at - // every loop exit so the local-reference table does not grow across repeated - // start/dump calls on large applications. + // Phase 1 (no lock): enumerate signatures and copy normalized slices. + // normalizeClassSignature returns a pointer INTO the JVMTI sig buffer, so + // the slice is copied into a std::string before Deallocate invalidates it. + std::vector sigs; + sigs.reserve(class_count); jint sig_failures = 0; for (jint i = 0; i < class_count; i++) { char* sig = nullptr; @@ -1586,8 +1590,6 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } - // Only 'L'-type (reference) signatures can ever match vtable_target lookup - // keys; skip primitives, arrays, and other non-reference signatures. if (sig[0] != 'L') { jvmti->Deallocate(reinterpret_cast(sig)); if (jni != nullptr) jni->DeleteLocalRef(classes[i]); @@ -1596,10 +1598,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { const char* slice = nullptr; size_t slice_len = 0; if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { - // Caller holds _class_map_lock exclusively — call _class_map.lookup - // directly. Do NOT call lookupClass(), which would re-attempt - // tryLockShared() and deadlock. - (void)_class_map.lookup(slice, slice_len); + sigs.emplace_back(slice, slice_len); } jvmti->Deallocate(reinterpret_cast(sig)); if (jni != nullptr) jni->DeleteLocalRef(classes[i]); @@ -1608,6 +1607,13 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (sig_failures > 0) { Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); } + // Phase 2 (exclusive lock): bulk-insert the collected names. Only the cheap + // Dictionary pointer operations happen under the lock — no JVMTI calls. + _class_map_lock.lock(); + for (const std::string& s : sigs) { + (void)_class_map.lookup(s.c_str(), s.size()); + } + _class_map_lock.unlock(); } int Profiler::status(char* status, int max_len) { diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 9108a6008..5de3f2995 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -197,6 +197,11 @@ class alignas(alignof(SpinLock)) Profiler { return _features; } + inline bool isRunning() { + MutexLocker ml(_state_lock); + return _state == RUNNING; + } + u64 total_samples() { return _total_samples; } int max_stack_depth() { return _max_stack_depth; } time_t uptime() { return time(NULL) - _start_time; } @@ -216,9 +221,9 @@ class alignas(alignof(SpinLock)) Profiler { // Pre-populate _class_map with all currently-loaded 'L'-type (reference) // class signatures so that signal-safe lookups in walkVM (vtable_target) can // resolve them without ever needing to malloc. Primitives and arrays are - // skipped — they never match vtable lookup keys. Caller MUST hold - // _class_map_lock EXCLUSIVELY. Runs on a JVM thread (never in a signal - // handler). + // skipped — they never match vtable lookup keys. Caller must NOT hold + // _class_map_lock; this function acquires it internally for the bulk-insert + // phase only. Runs on a JVM thread (never in a signal handler). void preregisterLoadedClasses(jvmtiEnv* jvmti); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 81b928ea2..417174235 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -18,6 +18,7 @@ #include "safeAccess.h" #include "hotspot/vmStructs.h" #include "hotspot/jitCodeCache.h" +#include #include #include #include @@ -586,13 +587,16 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, // never silently skipped while an exclusive dump/reset is in flight. Gate on // vtable_target to avoid overhead when the feature is disabled. Profiler* profiler = Profiler::instance(); - if (profiler == nullptr || !profiler->stackWalkFeatures().vtable_target) { + if (profiler == nullptr || !profiler->isRunning() || !profiler->stackWalkFeatures().vtable_target) { return; } char* sig = nullptr; jvmtiError err = jvmti->GetClassSignature(klass, &sig, nullptr); if (err != JVMTI_ERROR_NONE) { - Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration", err); + static std::atomic warn_count{0}; + if (warn_count.fetch_add(1, std::memory_order_relaxed) == 0) { + Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration (further failures suppressed)", err); + } return; } if (sig == nullptr) { From 3bf50618c63f0d82d204bcfc696e216fd549f7cd Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 May 2026 13:12:11 +0200 Subject: [PATCH 07/25] fix(vtable-target): register array class signatures in preregisterLoadedClasses Array classes (e.g. [I, [Ljava/lang/String;) are valid vtable dispatch receivers for Object methods (toString, hashCode). Their klass names are used as lookup keys in walkVM, but the sig[0] != 'L' filter in preregisterLoadedClasses and ClassPrepare silently dropped all array signatures. normalizeClassSignature already handles '['-prefixed signatures correctly (returns them verbatim), so extend the filter to allow '['-prefixed sigs through. Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/profiler.cpp | 2 +- ddprof-lib/src/main/cpp/profiler.h | 6 +++--- ddprof-lib/src/main/cpp/vmEntry.cpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index fd61422d1..587b3ba4e 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1590,7 +1590,7 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; } - if (sig[0] != 'L') { + if (sig[0] != 'L' && sig[0] != '[') { jvmti->Deallocate(reinterpret_cast(sig)); if (jni != nullptr) jni->DeleteLocalRef(classes[i]); continue; diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 5de3f2995..d34b32d33 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -219,9 +219,9 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); // Pre-populate _class_map with all currently-loaded 'L'-type (reference) - // class signatures so that signal-safe lookups in walkVM (vtable_target) can - // resolve them without ever needing to malloc. Primitives and arrays are - // skipped — they never match vtable lookup keys. Caller must NOT hold + // and array ('[') class signatures so that signal-safe lookups in walkVM + // (vtable_target) can resolve them without ever needing to malloc. Only bare + // primitive type descriptors (I, B, C, etc.) are skipped. Caller must NOT hold // _class_map_lock; this function acquires it internally for the bulk-insert // phase only. Runs on a JVM thread (never in a signal handler). void preregisterLoadedClasses(jvmtiEnv* jvmti); diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 417174235..e81488e5f 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -602,9 +602,9 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, if (sig == nullptr) { return; } - // Only 'L'-type (reference) signatures can ever match vtable_target lookup - // keys; skip primitives and arrays. - if (sig[0] != 'L') { + // Only 'L'-type (reference) and array ('[') signatures can match vtable_target + // lookup keys; skip bare primitive type descriptors (I, B, C, etc.). + if (sig[0] != 'L' && sig[0] != '[') { jvmti->Deallocate(reinterpret_cast(sig)); return; } From 04a3dec6fb0ad41ab5f4ba8f85217ee861872a9f Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 May 2026 17:52:03 +0200 Subject: [PATCH 08/25] fix: use ExclusiveLockGuard and atomic state helper for class-map and state accesses - Replace all manual _class_map_lock.lock()/unlock() pairs with ExclusiveLockGuard - Add private state() helper for relaxed atomic state reads - Replace all _state.load(memory_order_relaxed) calls with state() - isRunning() uses memory_order_acquire directly (lock-free, not via helper) Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/profiler.cpp | 73 ++++++++++++++++++---------- ddprof-lib/src/main/cpp/profiler.h | 14 ++++-- 2 files changed, 56 insertions(+), 31 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 587b3ba4e..5049ac397 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1052,7 +1052,7 @@ void Profiler::check_JDK_8313796_workaround() { Error Profiler::start(Arguments &args, bool reset) { MutexLocker ml(_state_lock); - if (_state > IDLE) { + if (state() > IDLE) { return Error("Profiler already started"); } @@ -1099,9 +1099,10 @@ Error Profiler::start(Arguments &args, bool reset) { // Reset dictionaries and bitmaps // Reset class map under lock because ObjectSampler may try to use it while // it is being cleaned up - _class_map_lock.lock(); - _class_map.clear(); - _class_map_lock.unlock(); + { + ExclusiveLockGuard guard(&_class_map_lock); + _class_map.clear(); + } // preregisterLoadedClasses manages _class_map_lock internally; callers must // not hold it (JVMTI enumeration is lock-free; inserts take the lock). if (_features.vtable_target && VMStructs::hasClassNames()) { @@ -1272,10 +1273,14 @@ Error Profiler::start(Arguments &args, bool reset) { // TODO: find a better way to resolve the thread name. onThreadStart(nullptr, nullptr, nullptr); - _state = RUNNING; + _state.store(RUNNING, std::memory_order_release); _start_time = time(NULL); __atomic_add_fetch(&_epoch, 1, __ATOMIC_RELAXED); - + // Second snapshot: catch classes prepared during the engine-startup window + // (between _features.vtable_target=1 and the RUNNING store above). + if (_features.vtable_target && VMStructs::hasClassNames()) { + preregisterLoadedClasses(VM::jvmti()); + } return Error::OK; } // no engine was activated; perform cleanup @@ -1292,7 +1297,7 @@ Error Profiler::start(Arguments &args, bool reset) { Error Profiler::stop() { MutexLocker ml(_state_lock); - if (_state != RUNNING) { + if (state() != RUNNING) { return Error("Profiler is not active"); } @@ -1326,13 +1331,13 @@ Error Profiler::stop() { // owned by library metadata, so we must keep library patches active until after serialization LibraryPatcher::unpatch_libraries(); - _state = IDLE; + _state.store(IDLE, std::memory_order_release); return Error::OK; } Error Profiler::check(Arguments &args) { MutexLocker ml(_state_lock); - if (_state > IDLE) { + if (state() > IDLE) { return Error("Profiler already started"); } @@ -1369,7 +1374,7 @@ Error Profiler::check(Arguments &args) { Error Profiler::flushJfr() { MutexLocker ml(_state_lock); - if (_state != RUNNING) { + if (state() != RUNNING) { return Error("Profiler is not active"); } @@ -1385,11 +1390,12 @@ Error Profiler::flushJfr() { Error Profiler::dump(const char *path, const int length) { MutexLocker ml(_state_lock); - if (_state != IDLE && _state != RUNNING) { + State cur_state = state(); + if (cur_state != IDLE && cur_state != RUNNING) { return Error("Profiler has not started"); } - if (_state == RUNNING) { + if (cur_state == RUNNING) { std::set thread_ids; // flush the liveness tracker instance and note all the threads referenced // by the live objects @@ -1412,12 +1418,15 @@ Error Profiler::dump(const char *path, const int length) { // in processTraces() already handles clearing old traces while preserving // traces referenced by surviving LivenessTracker objects unlockAll(); - // Reset classmap - _class_map_lock.lock(); - _class_map.clear(); - _class_map_lock.unlock(); if (_features.vtable_target && VMStructs::hasClassNames()) { - preregisterLoadedClasses(VM::jvmti()); + // clear_first=true: clears the old epoch's entries under the exclusive + // lock BEFORE the JVMTI enumeration begins, giving only a nanosecond-scale + // empty window instead of the millisecond-scale window of a bare clear() + // followed by a long JVMTI call. + preregisterLoadedClasses(VM::jvmti(), /*clear_first=*/true); + } else { + ExclusiveLockGuard guard(&_class_map_lock); + _class_map.clear(); } _thread_info.clearAll(thread_ids); @@ -1481,7 +1490,7 @@ Error Profiler::runInternal(Arguments &args, std::ostream &out) { } case ACTION_STATUS: { MutexLocker ml(_state_lock); - if (_state == RUNNING) { + if (state() == RUNNING) { out << "Profiling is running for " << uptime() << " seconds\n"; } else { out << "Profiler is not active\n"; @@ -1537,7 +1546,7 @@ void Profiler::shutdown(Arguments &args) { MutexLocker ml(_state_lock); // The last chance to dump profile before VM terminates - if (_state == RUNNING) { + if (state() == RUNNING) { args._action = ACTION_STOP; Error error = run(args); if (error) { @@ -1545,7 +1554,7 @@ void Profiler::shutdown(Arguments &args) { } } - _state = TERMINATED; + _state.store(TERMINATED, std::memory_order_release); } int Profiler::lookupClass(const char *key, size_t length) { @@ -1558,10 +1567,19 @@ int Profiler::lookupClass(const char *key, size_t length) { return -1; } -void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { +void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { if (jvmti == nullptr) { return; } + // Phase 0: clear under exclusive lock BEFORE the JVMTI enumeration. + // This gives a nanosecond-scale empty window (just the clear itself) instead + // of a millisecond-scale window (the full enumeration). ClassPrepare callbacks + // that fire during Phase 1 insert via shared lock and are preserved — Phase 2 + // does not clear, so those entries survive the bulk-insert. + if (clear_first) { + ExclusiveLockGuard guard(&_class_map_lock); + _class_map.clear(); + } JNIEnv* jni = VM::jni(); jint class_count = 0; jclass* classes = nullptr; @@ -1609,11 +1627,14 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti) { } // Phase 2 (exclusive lock): bulk-insert the collected names. Only the cheap // Dictionary pointer operations happen under the lock — no JVMTI calls. - _class_map_lock.lock(); - for (const std::string& s : sigs) { - (void)_class_map.lookup(s.c_str(), s.size()); + // NOTE: no clear here; clear_first already ran before Phase 1, so any + // ClassPrepare insertions made during the Phase 1 window are preserved. + { + ExclusiveLockGuard guard(&_class_map_lock); + for (const std::string& s : sigs) { + (void)_class_map.lookup(s.c_str(), s.size()); + } } - _class_map_lock.unlock(); } int Profiler::status(char* status, int max_len) { @@ -1623,7 +1644,7 @@ int Profiler::status(char* status, int max_len) { " CPU Engine : %s\n" " WallClock Engine : %s\n" " Allocations : %s\n", - _state == RUNNING ? "true" : "false", + state() == RUNNING ? "true" : "false", _cpu_engine != nullptr ? _cpu_engine->name() : "None", _wall_engine != nullptr ? _wall_engine->name() : "None", _alloc_engine != nullptr ? _alloc_engine->name() : "None"); diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index d34b32d33..d1a26b993 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -26,6 +26,7 @@ #include "threadInfo.h" #include "trap.h" #include "vmEntry.h" +#include #include #include #include @@ -71,7 +72,7 @@ class alignas(alignof(SpinLock)) Profiler { static volatile bool _need_JDK_8313796_workaround; Mutex _state_lock; - State _state; + std::atomic _state; // class unload hook Trap _class_unload_hook_trap; typedef void (*NotifyClassUnloadedFunc)(void *); @@ -147,9 +148,13 @@ class alignas(alignof(SpinLock)) Profiler { static Profiler *const _instance; + inline State state() const { + return _state.load(std::memory_order_relaxed); + } + public: Profiler() - : _state_lock(), _state(NEW), _class_unload_hook_trap(2), + : _state_lock(), _state(State::NEW), _class_unload_hook_trap(2), _notify_class_unloaded_func(NULL), _thread_info(), _class_map(1), _string_label_map(2), _context_value_map(3), _thread_filter(), _call_trace_storage(), _jfr(), _cpu_engine(NULL), _wall_engine(NULL), @@ -198,8 +203,7 @@ class alignas(alignof(SpinLock)) Profiler { } inline bool isRunning() { - MutexLocker ml(_state_lock); - return _state == RUNNING; + return _state.load(std::memory_order_acquire) == RUNNING; } u64 total_samples() { return _total_samples; } @@ -224,7 +228,7 @@ class alignas(alignof(SpinLock)) Profiler { // primitive type descriptors (I, B, C, etc.) are skipped. Caller must NOT hold // _class_map_lock; this function acquires it internally for the bulk-insert // phase only. Runs on a JVM thread (never in a signal handler). - void preregisterLoadedClasses(jvmtiEnv* jvmti); + void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { _call_trace_storage.processTraces(processor); From 28f35c545ff8eef99159f9b4496a6f8cfe740406 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 May 2026 18:24:47 +0200 Subject: [PATCH 09/25] docs: fix misleading locking semantics comments in preregisterLoadedClasses Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/profiler.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 5049ac397..023a16d5a 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1419,10 +1419,10 @@ Error Profiler::dump(const char *path, const int length) { // traces referenced by surviving LivenessTracker objects unlockAll(); if (_features.vtable_target && VMStructs::hasClassNames()) { - // clear_first=true: clears the old epoch's entries under the exclusive - // lock BEFORE the JVMTI enumeration begins, giving only a nanosecond-scale - // empty window instead of the millisecond-scale window of a bare clear() - // followed by a long JVMTI call. + // clear_first=true: clear runs under the exclusive lock so signal + // handlers are blocked only for the duration of the clear itself. + // ClassPrepare callbacks that fire during the JVMTI enumeration (Phase 1) + // insert via shared lock and survive — Phase 2 does not clear. preregisterLoadedClasses(VM::jvmti(), /*clear_first=*/true); } else { ExclusiveLockGuard guard(&_class_map_lock); @@ -1572,10 +1572,10 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { return; } // Phase 0: clear under exclusive lock BEFORE the JVMTI enumeration. - // This gives a nanosecond-scale empty window (just the clear itself) instead - // of a millisecond-scale window (the full enumeration). ClassPrepare callbacks - // that fire during Phase 1 insert via shared lock and are preserved — Phase 2 - // does not clear, so those entries survive the bulk-insert. + // Signal handlers are blocked only for the duration of the clear itself; + // during Phase 1 they can still acquire the shared lock and see an empty map. + // The benefit: ClassPrepare callbacks that fire during Phase 1 insert via + // shared lock and survive — Phase 2 does not clear. if (clear_first) { ExclusiveLockGuard guard(&_class_map_lock); _class_map.clear(); From 23d9decd8b202803bdbd25bfbeab7b8283513018 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 21 May 2026 16:29:10 +0200 Subject: [PATCH 10/25] address(comment-accuracy): fix preregisterLoadedClasses locking doc and clear ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ddprof-lib/src/main/cpp/profiler.h:231 — rewrite header comment to document all three phases (Phase 0 clear, Phase 1 unlock window, Phase 2 bulk-insert); remove misleading "bulk-insert phase only" claim - ddprof-lib/src/main/cpp/profiler.cpp:1570 — move GetLoadedClasses before the if (clear_first) block so JVMTI failure leaves the map unchanged rather than permanently empty; fix nullptr guard ordering (Phase 0 still runs on class_count=0 success); update Phase 2 NOTE and error log Co-Authored-By: muse --- ddprof-lib/src/main/cpp/profiler.cpp | 27 +++++++++++++++------------ ddprof-lib/src/main/cpp/profiler.h | 9 +++++++-- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 91cc830e3..cac8e7d81 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1671,7 +1671,17 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { if (jvmti == nullptr) { return; } - // Phase 0: clear under exclusive lock BEFORE the JVMTI enumeration. + JNIEnv* jni = VM::jni(); + jint class_count = 0; + jclass* classes = nullptr; + jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); + if (err != JVMTI_ERROR_NONE) { + // Leave the map unchanged (may be stale); vtable-target frames will be + // resolved gradually as ClassPrepare events fire for affected classes. + Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — class map left unchanged, vtable-target frames may miss until ClassPrepare fires", err); + return; + } + // Phase 0: clear the map after GetLoadedClasses succeeds, before Phase 1. // Signal handlers are blocked only for the duration of the clear itself; // during Phase 1 they can still acquire the shared lock and see an empty map. // The benefit: ClassPrepare callbacks that fire during Phase 1 insert via @@ -1680,16 +1690,8 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { ExclusiveLockGuard guard(&_class_map_lock); _class_map.clear(); } - JNIEnv* jni = VM::jni(); - jint class_count = 0; - jclass* classes = nullptr; - jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); - if (err != JVMTI_ERROR_NONE) { - Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — vtable-target frames will miss until ClassPrepare fires", err); - return; - } if (classes == nullptr) { - return; + return; // Zero classes loaded (class_count == 0); nothing to enumerate. } // Phase 1 (no lock): enumerate signatures and copy normalized slices. // normalizeClassSignature returns a pointer INTO the JVMTI sig buffer, so @@ -1727,8 +1729,9 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { } // Phase 2 (exclusive lock): bulk-insert the collected names. Only the cheap // Dictionary pointer operations happen under the lock — no JVMTI calls. - // NOTE: no clear here; clear_first already ran before Phase 1, so any - // ClassPrepare insertions made during the Phase 1 window are preserved. + // NOTE: no clear here. When clear_first=true, Phase 0 already cleared the map + // before Phase 1, so concurrent ClassPrepare insertions from that window survive. + // When clear_first=false the map is additive and no ClassPrepare entries are lost. { ExclusiveLockGuard guard(&_class_map_lock); for (const std::string& s : sigs) { diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index d57086c43..aa181d613 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -227,8 +227,13 @@ class alignas(alignof(SpinLock)) Profiler { // and array ('[') class signatures so that signal-safe lookups in walkVM // (vtable_target) can resolve them without ever needing to malloc. Only bare // primitive type descriptors (I, B, C, etc.) are skipped. Caller must NOT hold - // _class_map_lock; this function acquires it internally for the bulk-insert - // phase only. Runs on a JVM thread (never in a signal handler). + // _class_map_lock; this function acquires it internally across three phases: + // Phase 0 (clear_first=true only): exclusive lock to clear the map. + // Phase 1 (no lock): JVMTI snapshot + local enumeration; concurrent + // ClassPrepare callbacks may insert via shared lock during this window. + // Phase 2 (always): exclusive lock for the bulk-insert of Phase 1 names; + // no re-clear here, so any Phase 1 ClassPrepare insertions survive. + // Runs on a JVM thread (never in a signal handler). void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { From 05d22048bc0849aed4565fe972ea66901bd0fdeb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 14:45:42 +0000 Subject: [PATCH 11/25] fix(profiler): tighten preregister API visibility and add lock precondition assert Agent-Logs-Url: https://github.com/DataDog/java-profiler/sessions/6223488a-a230-40b6-8882-b62a9826111a Co-authored-by: jbachorik <738413+jbachorik@users.noreply.github.com> --- ddprof-lib/src/main/cpp/profiler.cpp | 7 +++++++ ddprof-lib/src/main/cpp/profiler.h | 25 +++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index cac8e7d81..fd06c6f8b 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1668,6 +1668,13 @@ int Profiler::lookupClass(const char *key, size_t length) { } void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { +#ifndef NDEBUG + bool can_take_shared = _class_map_lock.tryLockShared(); + assert(can_take_shared && "Caller must not hold _class_map_lock exclusively"); + if (can_take_shared) { + _class_map_lock.unlockShared(); + } +#endif if (jvmti == nullptr) { return; } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index aa181d613..055efc6dc 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -153,6 +153,19 @@ class alignas(alignof(SpinLock)) Profiler { return _state.load(std::memory_order_relaxed); } + // Pre-populate _class_map with all currently-loaded 'L'-type (reference) + // and array ('[') class signatures so that signal-safe lookups in walkVM + // (vtable_target) can resolve them without ever needing to malloc. Only bare + // primitive type descriptors (I, B, C, etc.) are skipped. Caller must NOT hold + // _class_map_lock; this function acquires it internally across three phases: + // Phase 0 (clear_first=true only): exclusive lock to clear the map. + // Phase 1 (no lock): JVMTI snapshot + local enumeration; concurrent + // ClassPrepare callbacks may insert via shared lock during this window. + // Phase 2 (always): exclusive lock for the bulk-insert of Phase 1 names; + // no re-clear here, so any Phase 1 ClassPrepare insertions survive. + // Runs on a JVM thread (never in a signal handler). + void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); + public: Profiler() : _state_lock(), _state(State::NEW), _class_unload_hook_trap(2), @@ -223,18 +236,6 @@ class alignas(alignof(SpinLock)) Profiler { const char* cstack() const; int lookupClass(const char *key, size_t length); - // Pre-populate _class_map with all currently-loaded 'L'-type (reference) - // and array ('[') class signatures so that signal-safe lookups in walkVM - // (vtable_target) can resolve them without ever needing to malloc. Only bare - // primitive type descriptors (I, B, C, etc.) are skipped. Caller must NOT hold - // _class_map_lock; this function acquires it internally across three phases: - // Phase 0 (clear_first=true only): exclusive lock to clear the map. - // Phase 1 (no lock): JVMTI snapshot + local enumeration; concurrent - // ClassPrepare callbacks may insert via shared lock during this window. - // Phase 2 (always): exclusive lock for the bulk-insert of Phase 1 names; - // no re-clear here, so any Phase 1 ClassPrepare insertions survive. - // Runs on a JVM thread (never in a signal handler). - void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); void processCallTraces(std::function&)> processor) { if (!_omit_stacktraces) { _call_trace_storage.processTraces(processor); From 2b23c65c41e312d127715bc1ef13fcd00281a8dc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 14:50:09 +0000 Subject: [PATCH 12/25] docs(profiler): clarify debug lock precondition assertion rationale Agent-Logs-Url: https://github.com/DataDog/java-profiler/sessions/6223488a-a230-40b6-8882-b62a9826111a Co-authored-by: jbachorik <738413+jbachorik@users.noreply.github.com> --- ddprof-lib/src/main/cpp/profiler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index fd06c6f8b..5b657e571 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1669,6 +1669,9 @@ int Profiler::lookupClass(const char *key, size_t length) { void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { #ifndef NDEBUG + // SpinLock has no owner-tracking API. We assert the public precondition + // ("caller must not hold _class_map_lock exclusively") by checking that a + // shared acquisition succeeds at function entry. bool can_take_shared = _class_map_lock.tryLockShared(); assert(can_take_shared && "Caller must not hold _class_map_lock exclusively"); if (can_take_shared) { From 56df9a17001a2a1e43402ad5575c5d02097c152a Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Thu, 21 May 2026 17:32:24 +0200 Subject: [PATCH 13/25] fix(profiler): move Phase 0 clear before GetLoadedClasses to close race Co-Authored-By: Claude Sonnet 4.6 --- ddprof-lib/src/main/cpp/profiler.cpp | 39 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 5b657e571..bc3caec9c 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1519,10 +1519,10 @@ Error Profiler::dump(const char *path, const int length) { // traces referenced by surviving LivenessTracker objects unlockAll(); if (_features.vtable_target && VMStructs::hasClassNames()) { - // clear_first=true: clear runs under the exclusive lock so signal - // handlers are blocked only for the duration of the clear itself. - // ClassPrepare callbacks that fire during the JVMTI enumeration (Phase 1) - // insert via shared lock and survive — Phase 2 does not clear. + // clear_first=true: clears the map before GetLoadedClasses to close the + // race where a ClassPrepare insertion (between the old-placement clear and + // the snapshot) would be wiped. ClassPrepare callbacks that fire after the + // clear insert via shared lock and survive — Phase 2 does not clear. preregisterLoadedClasses(VM::jvmti(), /*clear_first=*/true); } else { ExclusiveLockGuard guard(&_class_map_lock); @@ -1681,25 +1681,32 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { if (jvmti == nullptr) { return; } + // Phase 0: clear the map BEFORE taking the GetLoadedClasses snapshot. + // This closes the race: any ClassPrepare that fires after the clear + // inserts into the map via shared lock, and Phase 2 does not re-clear, + // so those insertions survive. + // Tradeoff: the window where signal handlers see an empty map is widened + // to GetLoadedClasses + Phase-1 duration; this is an accepted cost of + // eliminating the race. + if (clear_first) { + ExclusiveLockGuard guard(&_class_map_lock); + _class_map.clear(); + } JNIEnv* jni = VM::jni(); jint class_count = 0; jclass* classes = nullptr; jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); if (err != JVMTI_ERROR_NONE) { - // Leave the map unchanged (may be stale); vtable-target frames will be - // resolved gradually as ClassPrepare events fire for affected classes. - Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — class map left unchanged, vtable-target frames may miss until ClassPrepare fires", err); + // If clear_first=false the map retains stale entries; if clear_first=true + // it was already cleared above and is now empty. Either way, vtable-target + // frames will be resolved gradually as ClassPrepare events fire. + if (clear_first) { + Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — class map was cleared and is now empty, vtable-target frames may miss until ClassPrepare fires", err); + } else { + Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — class map left unchanged (may be stale), vtable-target frames may miss until ClassPrepare fires", err); + } return; } - // Phase 0: clear the map after GetLoadedClasses succeeds, before Phase 1. - // Signal handlers are blocked only for the duration of the clear itself; - // during Phase 1 they can still acquire the shared lock and see an empty map. - // The benefit: ClassPrepare callbacks that fire during Phase 1 insert via - // shared lock and survive — Phase 2 does not clear. - if (clear_first) { - ExclusiveLockGuard guard(&_class_map_lock); - _class_map.clear(); - } if (classes == nullptr) { return; // Zero classes loaded (class_count == 0); nothing to enumerate. } From 5d868c100acf58dfd8a4c7e995e4dad829c7ca9d Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 22 May 2026 09:00:39 +0200 Subject: [PATCH 14/25] address(assert-comment): clarify partial-check semantics of NDEBUG lock assert profiler.cpp: reword comment to state tryLockShared() proves no exclusive holder but cannot detect a shared-lock caller (self-deadlock in Phase 0/2); update assert message to only claim what the check can actually detect Co-Authored-By: muse --- ddprof-lib/src/main/cpp/profiler.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index bc3caec9c..15066e50e 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1669,11 +1669,13 @@ int Profiler::lookupClass(const char *key, size_t length) { void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { #ifndef NDEBUG - // SpinLock has no owner-tracking API. We assert the public precondition - // ("caller must not hold _class_map_lock exclusively") by checking that a - // shared acquisition succeeds at function entry. + // SpinLock has no owner-tracking API, so this check is partial. + // tryLockShared() succeeding proves no thread currently holds the lock + // exclusively, but it cannot detect a caller that already holds a shared + // lock — such a caller would self-deadlock when Phase 0 or Phase 2 tries + // to acquire an exclusive lock. bool can_take_shared = _class_map_lock.tryLockShared(); - assert(can_take_shared && "Caller must not hold _class_map_lock exclusively"); + assert(can_take_shared && "_class_map_lock must not be exclusively held on entry (shared-holder self-deadlock undetectable)"); if (can_take_shared) { _class_map_lock.unlockShared(); } From f9e140a3c00cbe8fe0f261ee3cab8526da332a27 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 22 May 2026 09:58:29 +0200 Subject: [PATCH 15/25] test(profiler): regression tests for vtable_target class-map preregistration Co-Authored-By: Claude Sonnet 4.6 --- .../cpu/VtableTargetPreregistrationTest.java | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java new file mode 100644 index 000000000..af9a3074a --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java @@ -0,0 +1,87 @@ +package com.datadoghq.profiler.cpu; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junitpioneer.jupiter.RetryingTest; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class VtableTargetPreregistrationTest extends AbstractProfilerTest { + + @Override + protected String getProfilerCommand() { + return "cpu=10ms"; + } + + abstract static class Shape { + abstract int area(); + } + + static class Circle extends Shape { + private final int r; + Circle(int r) { this.r = r; } + @Override public int area() { return r * r; } + } + + static class Square extends Shape { + private final int s; + Square(int s) { this.s = s; } + @Override public int area() { return s * s; } + } + + static class Triangle extends Shape { + private final int b; + Triangle(int b) { this.b = b; } + @Override public int area() { return b * b / 2; } + } + + private int profiledWork(Shape... shapes) { + int result = 0; + for (int i = 0; i < 1_000_000; i++) { + for (Shape shape : shapes) { + result += shape.area(); + } + } + return result; + } + + @RetryingTest(3) + public void testClassMapPopulatedOnCpuStart() { + stopProfiler(); + assertTrue(getRecordedCounterValue("dictionary_classes_keys") > 0, + "dictionary_classes_keys must be > 0: preregisterLoadedClasses() was not called on profiler start"); + } + + // jvmtiError is the expected synthetic receiver-class frame that vtable_target inserts; + // contrast with SmokeCpuTest which asserts its absence for non-vtable workloads. + @RetryingTest(5) + public void testVtableReceiverFrameInCpuSamples() { + Assumptions.assumeFalse(Platform.isZing() || Platform.isJ9()); + int result = profiledWork(new Circle(3), new Square(4), new Triangle(5)); + System.err.println(result); + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.ExecutionSample"); + boolean foundVtableWithReceiver = false; + for (IItemIterable cpuSamples : events) { + IMemberAccessor frameAccessor = + JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); + for (IItem sample : cpuSamples) { + String stackTrace = frameAccessor.getMember(sample); + if (stackTrace.contains(".vtable stub()") && stackTrace.contains("jvmtiError")) { + foundVtableWithReceiver = true; + break; + } + } + if (foundVtableWithReceiver) break; + } + assertTrue(foundVtableWithReceiver, + "No CPU sample contained both a vtable stub frame and a jvmtiError receiver frame"); + } +} From be0e50a64c9cda16cce12dfecd3a28d2f18feed5 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 22 May 2026 11:51:20 +0200 Subject: [PATCH 16/25] fix(test): fix VtableTargetPreregistrationTest failures - Drop testClassMapPopulatedOnCpuStart: dictionary_classes_keys is always 0 after Counters::reset() (line 1180) which runs after preregisterLoadedClasses, making the assertion permanently false. - Fix testVtableReceiverFrameInCpuSamples: cpu=10ms with 1M trivial iterations (r*r ~3ms) produced <1 sample. Switch to cpu=1ms and ThreadLocalRandom bodies to ensure ~300ms of CPU-bound work and force megamorphic vtable dispatch. Co-Authored-By: Claude Sonnet 4.6 --- .../cpu/VtableTargetPreregistrationTest.java | 38 ++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java index af9a3074a..eb62da84f 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java @@ -10,40 +10,38 @@ import org.openjdk.jmc.common.item.IMemberAccessor; import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; +import java.util.concurrent.ThreadLocalRandom; + import static org.junit.jupiter.api.Assertions.assertTrue; public class VtableTargetPreregistrationTest extends AbstractProfilerTest { @Override protected String getProfilerCommand() { - return "cpu=10ms"; + return "cpu=1ms"; } abstract static class Shape { abstract int area(); } + // Three implementations force megamorphic vtable dispatch (JIT won't inline). + // ThreadLocalRandom bodies ensure each variant is non-trivial and CPU-bound. static class Circle extends Shape { - private final int r; - Circle(int r) { this.r = r; } - @Override public int area() { return r * r; } + @Override public int area() { return ThreadLocalRandom.current().nextInt() | 1; } } static class Square extends Shape { - private final int s; - Square(int s) { this.s = s; } - @Override public int area() { return s * s; } + @Override public int area() { return ThreadLocalRandom.current().nextInt() | 2; } } static class Triangle extends Shape { - private final int b; - Triangle(int b) { this.b = b; } - @Override public int area() { return b * b / 2; } + @Override public int area() { return ThreadLocalRandom.current().nextInt() | 4; } } private int profiledWork(Shape... shapes) { int result = 0; - for (int i = 0; i < 1_000_000; i++) { + for (int i = 0; i < 10_000_000; i++) { for (Shape shape : shapes) { result += shape.area(); } @@ -51,19 +49,14 @@ private int profiledWork(Shape... shapes) { return result; } - @RetryingTest(3) - public void testClassMapPopulatedOnCpuStart() { - stopProfiler(); - assertTrue(getRecordedCounterValue("dictionary_classes_keys") > 0, - "dictionary_classes_keys must be > 0: preregisterLoadedClasses() was not called on profiler start"); - } - - // jvmtiError is the expected synthetic receiver-class frame that vtable_target inserts; - // contrast with SmokeCpuTest which asserts its absence for non-vtable workloads. + // jvmtiError is the expected synthetic receiver-class frame that vtable_target inserts when + // the class map is populated (PR #527 fix). The class map is populated by + // preregisterLoadedClasses() at CPU-only profiler start; if that call is absent (the bug), + // bounded_lookup returns INT_MAX and no synthetic frame is inserted — jvmtiError never appears. @RetryingTest(5) public void testVtableReceiverFrameInCpuSamples() { Assumptions.assumeFalse(Platform.isZing() || Platform.isJ9()); - int result = profiledWork(new Circle(3), new Square(4), new Triangle(5)); + int result = profiledWork(new Circle(), new Square(), new Triangle()); System.err.println(result); stopProfiler(); @@ -82,6 +75,7 @@ public void testVtableReceiverFrameInCpuSamples() { if (foundVtableWithReceiver) break; } assertTrue(foundVtableWithReceiver, - "No CPU sample contained both a vtable stub frame and a jvmtiError receiver frame"); + "No CPU sample contained both a vtable stub frame and a jvmtiError receiver frame; " + + "preregisterLoadedClasses() may not have run at CPU-only profiler start (PR #527 regression)"); } } From 351475180744d814674c84d9c0b7c108e1df54f6 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 22 May 2026 15:12:56 +0200 Subject: [PATCH 17/25] fix: enable vtable_target by default when cpu profiling is requested --- ddprof-lib/src/main/cpp/arguments.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index 5b74df5c3..41c028af7 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -163,6 +163,7 @@ Error Arguments::parse(const char *args) { if (_cpu < 0) { msg = "cpu must be >= 0"; } + _features.vtable_target = 1; CASE("wall") if (value == NULL) { From 6e2e925322aedf433b878883dc92c4bb2c935a04 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 12:44:26 +0200 Subject: [PATCH 18/25] review: address PR #527 feedback - Use SharedLockGuard in Phase 2 of preregisterLoadedClasses - Clarify test comment: jvmtiError is JVMTI fallback for fake class_id method_id Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/profiler.cpp | 10 +++++++--- .../cpu/VtableTargetPreregistrationTest.java | 16 ++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 15066e50e..7fecb68a5 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1746,13 +1746,17 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { if (sig_failures > 0) { Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); } - // Phase 2 (exclusive lock): bulk-insert the collected names. Only the cheap - // Dictionary pointer operations happen under the lock — no JVMTI calls. + // Phase 2 (shared lock): bulk-insert the collected names. SharedLockGuard is + // sufficient: concurrent inserters (ClassPrepare callbacks, lookupClass) already + // run under the shared lock; only clear() in Phase 0 needs the exclusive lock. + // Using ExclusiveLockGuard here would block signal-handler readers (bounded_lookup + // in vtable_target and tryLockShared in object sampler) for the full bulk-insert + // duration, dropping samples unnecessarily. // NOTE: no clear here. When clear_first=true, Phase 0 already cleared the map // before Phase 1, so concurrent ClassPrepare insertions from that window survive. // When clear_first=false the map is additive and no ClassPrepare entries are lost. { - ExclusiveLockGuard guard(&_class_map_lock); + SharedLockGuard guard(&_class_map_lock); for (const std::string& s : sigs) { (void)_class_map.lookup(s.c_str(), s.size()); } diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java index eb62da84f..efc14d636 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java @@ -49,10 +49,13 @@ private int profiledWork(Shape... shapes) { return result; } - // jvmtiError is the expected synthetic receiver-class frame that vtable_target inserts when - // the class map is populated (PR #527 fix). The class map is populated by - // preregisterLoadedClasses() at CPU-only profiler start; if that call is absent (the bug), - // bounded_lookup returns INT_MAX and no synthetic frame is inserted — jvmtiError never appears. + // The vtable_target feature inserts a synthetic frame whose method_id holds a class_id + // (u32 integer from _class_map) — not a real jmethodID pointer. When the flight recorder + // resolves this frame, JVMTI rejects the fake jmethodID and falls back to the "jvmtiError" + // placeholder. So every vtable stub whose receiver class is registered in _class_map will + // have a companion "jvmtiError" frame in the JFR output. If preregisterLoadedClasses() was + // not called (the bug), bounded_lookup returns INT_MAX, no synthetic frame is inserted, and + // "jvmtiError" never appears next to a vtable stub. @RetryingTest(5) public void testVtableReceiverFrameInCpuSamples() { Assumptions.assumeFalse(Platform.isZing() || Platform.isJ9()); @@ -75,7 +78,8 @@ public void testVtableReceiverFrameInCpuSamples() { if (foundVtableWithReceiver) break; } assertTrue(foundVtableWithReceiver, - "No CPU sample contained both a vtable stub frame and a jvmtiError receiver frame; " + - "preregisterLoadedClasses() may not have run at CPU-only profiler start (PR #527 regression)"); + "No CPU sample contained both a vtable stub frame and a synthetic receiver-class frame " + + "(appears as \"jvmtiError\" because the class_id is not a real jmethodID); " + + "preregisterLoadedClasses() may not have run at CPU-only profiler start"); } } From 6bd5353dc49603619cd87fbba3f1eec483408dca Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 25 May 2026 13:27:35 +0200 Subject: [PATCH 19/25] fix(vtable_target): render receiver class name instead of jvmtiError - flightRecorder.cpp: BCI_ALLOC in resolveMethod now uses class_id directly as the JFR class ref with method name instead of passing a fake jmethodID to fillJavaMethodInfo - VtableTargetPreregistrationTest: assert Circle/Square/Triangle in stack trace next to vtable stub; add waitForProfilerReady and null guards - profiler.h: fix Phase 2 doccomment (shared lock, not exclusive) - profiler.cpp: Deallocate classes array when class_count==0 Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 11 +++++++ ddprof-lib/src/main/cpp/profiler.cpp | 7 +++-- ddprof-lib/src/main/cpp/profiler.h | 3 +- .../cpu/VtableTargetPreregistrationTest.java | 29 +++++++++++-------- 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 50e495188..cbaaed560 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -403,6 +403,17 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { TEST_LOG("WARNING: Library lookup failed for index %u", lib_index); fillNativeMethodInfo(mi, "unknown_library", nullptr); } + } else if (bci == BCI_ALLOC) { + // Synthetic vtable-receiver frame from hotspotSupport.cpp:walkVM. + // method_id holds a class_id from _class_map (the same Dictionary as _classes). + // Use it directly as the JFR class reference; emit "" as the + // method name. Passing class_id to fillJavaMethodInfo would produce "jvmtiError" + // because JVMTI rejects small integers as jmethodID pointers. + mi->_class = (u32)(uintptr_t)method; + mi->_name = _symbols.lookup(""); + mi->_sig = _symbols.lookup("()V"); + mi->_type = FRAME_NATIVE; + mi->_is_entry = false; } else { fillJavaMethodInfo(mi, method, first_time); } diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 7fecb68a5..ef6777ce5 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1709,8 +1709,11 @@ void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { } return; } - if (classes == nullptr) { - return; // Zero classes loaded (class_count == 0); nothing to enumerate. + if (class_count == 0) { + if (classes != nullptr) { + jvmti->Deallocate(reinterpret_cast(classes)); + } + return; } // Phase 1 (no lock): enumerate signatures and copy normalized slices. // normalizeClassSignature returns a pointer INTO the JVMTI sig buffer, so diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 055efc6dc..492c0422d 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -161,7 +161,8 @@ class alignas(alignof(SpinLock)) Profiler { // Phase 0 (clear_first=true only): exclusive lock to clear the map. // Phase 1 (no lock): JVMTI snapshot + local enumeration; concurrent // ClassPrepare callbacks may insert via shared lock during this window. - // Phase 2 (always): exclusive lock for the bulk-insert of Phase 1 names; + // Phase 2 (always): shared lock for the bulk-insert of Phase 1 names; + // shared suffices because Dictionary::lookup uses CAS internally; // no re-clear here, so any Phase 1 ClassPrepare insertions survive. // Runs on a JVM thread (never in a signal handler). void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java index efc14d636..f823d4a96 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java @@ -49,16 +49,16 @@ private int profiledWork(Shape... shapes) { return result; } - // The vtable_target feature inserts a synthetic frame whose method_id holds a class_id - // (u32 integer from _class_map) — not a real jmethodID pointer. When the flight recorder - // resolves this frame, JVMTI rejects the fake jmethodID and falls back to the "jvmtiError" - // placeholder. So every vtable stub whose receiver class is registered in _class_map will - // have a companion "jvmtiError" frame in the JFR output. If preregisterLoadedClasses() was - // not called (the bug), bounded_lookup returns INT_MAX, no synthetic frame is inserted, and - // "jvmtiError" never appears next to a vtable stub. + // The vtable_target feature inserts a synthetic frame immediately + // below a vtable stub frame in the call stack. The receiver class (Circle/Square/Triangle) + // is resolved from _class_map and emitted as the class name for the synthetic frame. + // If preregisterLoadedClasses() was not called (the bug), bounded_lookup returns INT_MAX, + // no synthetic frame is inserted, and the receiver class name never appears next to a + // vtable stub in the JFR output. @RetryingTest(5) - public void testVtableReceiverFrameInCpuSamples() { + public void testVtableReceiverFrameInCpuSamples() throws Exception { Assumptions.assumeFalse(Platform.isZing() || Platform.isJ9()); + waitForProfilerReady(2000); int result = profiledWork(new Circle(), new Square(), new Triangle()); System.err.println(result); stopProfiler(); @@ -68,9 +68,14 @@ public void testVtableReceiverFrameInCpuSamples() { for (IItemIterable cpuSamples : events) { IMemberAccessor frameAccessor = JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); + if (frameAccessor == null) continue; for (IItem sample : cpuSamples) { String stackTrace = frameAccessor.getMember(sample); - if (stackTrace.contains(".vtable stub()") && stackTrace.contains("jvmtiError")) { + if (stackTrace != null + && stackTrace.contains(".vtable stub()") + && (stackTrace.contains("Circle") + || stackTrace.contains("Square") + || stackTrace.contains("Triangle"))) { foundVtableWithReceiver = true; break; } @@ -78,8 +83,8 @@ public void testVtableReceiverFrameInCpuSamples() { if (foundVtableWithReceiver) break; } assertTrue(foundVtableWithReceiver, - "No CPU sample contained both a vtable stub frame and a synthetic receiver-class frame " + - "(appears as \"jvmtiError\" because the class_id is not a real jmethodID); " + - "preregisterLoadedClasses() may not have run at CPU-only profiler start"); + "No CPU sample contained both a vtable stub frame and a receiver-class frame " + + "(Circle/Square/Triangle); preregisterLoadedClasses() may not have run at " + + "CPU-only profiler start"); } } From 6f0e462e634c4f78ec70960b90793ac391506a95 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 10:26:12 +0200 Subject: [PATCH 20/25] fix(vtable_target): skip generated accessor/lambda-form receivers GeneratedConstructorAccessor, GeneratedMethodAccessor, and LambdaForm$ classes are normalised (digit suffix stripped) in fillJavaMethodInfo for regular frames; the BCI_ALLOC vtable-receiver path bypasses that normalisation, exposing un-normalised names that fail MetadataNormalisationTest. Skip these classes at the insertion point in hotspotSupport.cpp. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../src/main/cpp/hotspot/hotspotSupport.cpp | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp index b63b66e1d..29bb586e6 100644 --- a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp +++ b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp @@ -535,21 +535,34 @@ __attribute__((no_sanitize("address"))) int HotspotSupport::walkVM(void* ucontex uintptr_t receiver = frame.jarg0(); if (receiver != 0) { VMSymbol* symbol = VMKlass::fromOop(receiver)->name(); - // walkVM runs in a signal handler. _class_map is mutated - // under _class_map_lock (shared by Profiler::lookupClass - // inserters, exclusive by _class_map.clear() in the dump - // path between unlockAll() and lock()). bounded_lookup - // with size_limit=0 never inserts (no malloc), but it - // still traverses row->next and reads row->keys, which - // clear() concurrently frees. Take the lock shared via - // try-lock; if an exclusive clear() is in progress, drop - // the synthetic frame rather than read freed memory. - auto guard = profiler->classMapTrySharedGuard(); - if (guard.ownsLock()) { - u32 class_id = profiler->classMap()->bounded_lookup( - symbol->body(), symbol->length(), 0); - if (class_id != INT_MAX) { - fillFrame(frames[depth++], BCI_ALLOC, class_id); + const char* sym_name = symbol->body(); + size_t sym_len = symbol->length(); + // Skip dynamically-generated accessor and lambda-form classes. + // They are normalised away (digit suffix stripped) by fillJavaMethodInfo + // for regular frames, but the vtable receiver path bypasses that + // normalisation. Showing the un-normalised names would break tests + // (e.g. MetadataNormalisationTest) that assert those names are absent. + bool skip_receiver = + (sym_len > 30 && strncmp(sym_name, "jdk/internal/reflect/Generated", 30) == 0) || + (sym_len > 21 && strncmp(sym_name, "sun/reflect/Generated", 21) == 0) || + (sym_len > 27 && strncmp(sym_name, "java/lang/invoke/LambdaForm$", 27) == 0); + if (!skip_receiver) { + // walkVM runs in a signal handler. _class_map is mutated + // under _class_map_lock (shared by Profiler::lookupClass + // inserters, exclusive by _class_map.clear() in the dump + // path between unlockAll() and lock()). bounded_lookup + // with size_limit=0 never inserts (no malloc), but it + // still traverses row->next and reads row->keys, which + // clear() concurrently frees. Take the lock shared via + // try-lock; if an exclusive clear() is in progress, drop + // the synthetic frame rather than read freed memory. + auto guard = profiler->classMapTrySharedGuard(); + if (guard.ownsLock()) { + u32 class_id = profiler->classMap()->bounded_lookup( + sym_name, sym_len, 0); + if (class_id != INT_MAX) { + fillFrame(frames[depth++], BCI_ALLOC, class_id); + } } } } From 3a0bd2d8a19ae889a2053e662cf3d8381bca44ad Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 11:55:14 +0200 Subject: [PATCH 21/25] fix(vtable_target): normalise receiver class name in resolveMethod Instead of skipping generated accessor/lambda-form classes at the insertion point, apply the same prefix-based normalisation as fillJavaMethodInfo in the BCI_ALLOC case of resolveMethod (dump-time, cached per unique receiver class via method map): GeneratedConstructorAccessor1 -> GeneratedConstructorAccessor, LambdaForm$MH.xxx -> LambdaForm$MH, etc. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 44 ++++++++++++++++--- .../src/main/cpp/hotspot/hotspotSupport.cpp | 43 +++++++----------- 2 files changed, 54 insertions(+), 33 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index cce24f937..b944105dd 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -429,11 +429,45 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { } } else if (bci == BCI_ALLOC) { // Synthetic vtable-receiver frame from hotspotSupport.cpp:walkVM. - // method_id holds a class_id from _class_map (the same Dictionary as _classes). - // Use it directly as the JFR class reference; emit "" as the - // method name. Passing class_id to fillJavaMethodInfo would produce "jvmtiError" - // because JVMTI rejects small integers as jmethodID pointers. - mi->_class = (u32)(uintptr_t)method; + // method_id holds a raw class_id from _class_map (the same Dictionary as _classes). + // Collect the class names under the shared lock, look up the raw name, apply the + // same prefix-based normalisation as fillJavaMethodInfo (strips digit suffixes from + // generated accessor classes, canonicalises LambdaForm sub-types), then emit + // "" as the method name. + // The method map caches this MethodInfo per class_id, so the collect runs at most + // once per unique receiver class across the entire dump. + u32 raw_class_id = (u32)(uintptr_t)method; + u32 class_id = raw_class_id; + { + std::map class_names; + auto guard = Profiler::instance()->classMapSharedGuard(); + _classes->collect(class_names); + auto it = class_names.find(raw_class_id); + if (it != class_names.end()) { + const char* name = it->second; + // Mirrors the normalisation in fillJavaMethodInfo. class_names stores + // slash-separated names without L-prefix or ;-suffix. + if (has_prefix(name, "jdk/internal/reflect/GeneratedConstructorAccessor")) { + class_id = _classes->lookup("jdk/internal/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix(name, "sun/reflect/GeneratedConstructorAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix(name, "jdk/internal/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("jdk/internal/reflect/GeneratedMethodAccessor"); + } else if (has_prefix(name, "sun/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedMethodAccessor"); + } else if (has_prefix(name, "java/lang/invoke/LambdaForm$")) { + const char* suffix = name + strlen("java/lang/invoke/LambdaForm$"); + if (has_prefix(suffix, "MH")) { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$MH"); + } else if (has_prefix(suffix, "BMH")) { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$BMH"); + } else if (has_prefix(suffix, "DMH")) { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$DMH"); + } + } + } + } + mi->_class = class_id; mi->_name = _symbols.lookup(""); mi->_sig = _symbols.lookup("()V"); mi->_type = FRAME_NATIVE; diff --git a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp index 29bb586e6..b63b66e1d 100644 --- a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp +++ b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp @@ -535,34 +535,21 @@ __attribute__((no_sanitize("address"))) int HotspotSupport::walkVM(void* ucontex uintptr_t receiver = frame.jarg0(); if (receiver != 0) { VMSymbol* symbol = VMKlass::fromOop(receiver)->name(); - const char* sym_name = symbol->body(); - size_t sym_len = symbol->length(); - // Skip dynamically-generated accessor and lambda-form classes. - // They are normalised away (digit suffix stripped) by fillJavaMethodInfo - // for regular frames, but the vtable receiver path bypasses that - // normalisation. Showing the un-normalised names would break tests - // (e.g. MetadataNormalisationTest) that assert those names are absent. - bool skip_receiver = - (sym_len > 30 && strncmp(sym_name, "jdk/internal/reflect/Generated", 30) == 0) || - (sym_len > 21 && strncmp(sym_name, "sun/reflect/Generated", 21) == 0) || - (sym_len > 27 && strncmp(sym_name, "java/lang/invoke/LambdaForm$", 27) == 0); - if (!skip_receiver) { - // walkVM runs in a signal handler. _class_map is mutated - // under _class_map_lock (shared by Profiler::lookupClass - // inserters, exclusive by _class_map.clear() in the dump - // path between unlockAll() and lock()). bounded_lookup - // with size_limit=0 never inserts (no malloc), but it - // still traverses row->next and reads row->keys, which - // clear() concurrently frees. Take the lock shared via - // try-lock; if an exclusive clear() is in progress, drop - // the synthetic frame rather than read freed memory. - auto guard = profiler->classMapTrySharedGuard(); - if (guard.ownsLock()) { - u32 class_id = profiler->classMap()->bounded_lookup( - sym_name, sym_len, 0); - if (class_id != INT_MAX) { - fillFrame(frames[depth++], BCI_ALLOC, class_id); - } + // walkVM runs in a signal handler. _class_map is mutated + // under _class_map_lock (shared by Profiler::lookupClass + // inserters, exclusive by _class_map.clear() in the dump + // path between unlockAll() and lock()). bounded_lookup + // with size_limit=0 never inserts (no malloc), but it + // still traverses row->next and reads row->keys, which + // clear() concurrently frees. Take the lock shared via + // try-lock; if an exclusive clear() is in progress, drop + // the synthetic frame rather than read freed memory. + auto guard = profiler->classMapTrySharedGuard(); + if (guard.ownsLock()) { + u32 class_id = profiler->classMap()->bounded_lookup( + symbol->body(), symbol->length(), 0); + if (class_id != INT_MAX) { + fillFrame(frames[depth++], BCI_ALLOC, class_id); } } } From 7fa28ff288023d02a200e1c2564f0dac727d3fe8 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 12:23:25 +0200 Subject: [PATCH 22/25] fix(vtable_target): normalise receiver class and share class map snapshot - Lookup: add _class_cache (std::map) populated once via initClassCache() before writeStackTraces; writeClasses reuses it instead of a second collect - resolveMethod BCI_ALLOC: use _class_cache + same prefix normalisations as fillJavaMethodInfo (GeneratedXXXAccessor strips digit suffix, LambdaForm sub-types canonicalised) - hotspotSupport.cpp: revert skip filter (normalisation now in resolveMethod) Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/flightRecorder.cpp | 74 ++++++++++------------ ddprof-lib/src/main/cpp/flightRecorder.h | 6 ++ 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index b944105dd..f19ae5edc 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -430,40 +430,33 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { } else if (bci == BCI_ALLOC) { // Synthetic vtable-receiver frame from hotspotSupport.cpp:walkVM. // method_id holds a raw class_id from _class_map (the same Dictionary as _classes). - // Collect the class names under the shared lock, look up the raw name, apply the - // same prefix-based normalisation as fillJavaMethodInfo (strips digit suffixes from - // generated accessor classes, canonicalises LambdaForm sub-types), then emit - // "" as the method name. - // The method map caches this MethodInfo per class_id, so the collect runs at most - // once per unique receiver class across the entire dump. + // Use the pre-collected _class_cache snapshot (populated once at Lookup construction) + // to look up the raw name, then apply the same prefix-based normalisation as + // fillJavaMethodInfo (strips digit suffixes from generated accessor classes, + // canonicalises LambdaForm sub-types). No lock or collect needed here. u32 raw_class_id = (u32)(uintptr_t)method; u32 class_id = raw_class_id; - { - std::map class_names; - auto guard = Profiler::instance()->classMapSharedGuard(); - _classes->collect(class_names); - auto it = class_names.find(raw_class_id); - if (it != class_names.end()) { - const char* name = it->second; - // Mirrors the normalisation in fillJavaMethodInfo. class_names stores - // slash-separated names without L-prefix or ;-suffix. - if (has_prefix(name, "jdk/internal/reflect/GeneratedConstructorAccessor")) { - class_id = _classes->lookup("jdk/internal/reflect/GeneratedConstructorAccessor"); - } else if (has_prefix(name, "sun/reflect/GeneratedConstructorAccessor")) { - class_id = _classes->lookup("sun/reflect/GeneratedConstructorAccessor"); - } else if (has_prefix(name, "jdk/internal/reflect/GeneratedMethodAccessor")) { - class_id = _classes->lookup("jdk/internal/reflect/GeneratedMethodAccessor"); - } else if (has_prefix(name, "sun/reflect/GeneratedMethodAccessor")) { - class_id = _classes->lookup("sun/reflect/GeneratedMethodAccessor"); - } else if (has_prefix(name, "java/lang/invoke/LambdaForm$")) { - const char* suffix = name + strlen("java/lang/invoke/LambdaForm$"); - if (has_prefix(suffix, "MH")) { - class_id = _classes->lookup("java/lang/invoke/LambdaForm$MH"); - } else if (has_prefix(suffix, "BMH")) { - class_id = _classes->lookup("java/lang/invoke/LambdaForm$BMH"); - } else if (has_prefix(suffix, "DMH")) { - class_id = _classes->lookup("java/lang/invoke/LambdaForm$DMH"); - } + auto it = _class_cache.find(raw_class_id); + if (it != _class_cache.end()) { + const char* name = it->second; + // Mirrors normalisation in fillJavaMethodInfo; names are slash-separated, + // no L-prefix or ;-suffix. + if (has_prefix(name, "jdk/internal/reflect/GeneratedConstructorAccessor")) { + class_id = _classes->lookup("jdk/internal/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix(name, "sun/reflect/GeneratedConstructorAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix(name, "jdk/internal/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("jdk/internal/reflect/GeneratedMethodAccessor"); + } else if (has_prefix(name, "sun/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedMethodAccessor"); + } else if (has_prefix(name, "java/lang/invoke/LambdaForm$")) { + const char* suffix = name + strlen("java/lang/invoke/LambdaForm$"); + if (has_prefix(suffix, "MH")) { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$MH"); + } else if (has_prefix(suffix, "BMH")) { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$BMH"); + } else if (has_prefix(suffix, "DMH")) { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$DMH"); } } } @@ -480,6 +473,11 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { return mi; } +void Lookup::initClassCache() { + auto guard = Profiler::instance()->classMapSharedGuard(); + _classes->collect(_class_cache); +} + u32 Lookup::getPackage(const char *class_name) { const char *package = strrchr(class_name, '/'); if (package == NULL) { @@ -1246,6 +1244,7 @@ void Recording::writeCpool(Buffer *buf) { // classMapSharedGuard() for its full duration; the exclusive classMap()->clear() // in Profiler::dump runs only after this method returns. Lookup lookup(this, &_method_map, Profiler::instance()->classMap()); + lookup.initClassCache(); writeFrameTypes(buf); writeThreadStates(buf); writeExecutionModes(buf); @@ -1458,13 +1457,10 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) { void Recording::writeClasses(Buffer *buf, Lookup *lookup) { DEBUG_ASSERT_NOT_IN_SIGNAL(); - std::map classes; - // Hold classMapSharedGuard() for the full function. The const char* pointers - // stored in classes point into dictionary row storage; clear() frees that - // storage under the exclusive lock, so we must not release the shared lock - // until we have finished iterating. - auto guard = Profiler::instance()->classMapSharedGuard(); - lookup->_classes->collect(classes); + // Use the pre-collected snapshot from Lookup::initClassCache(). The shared + // lock was held during that collect; clear() cannot run until after the full + // constant-pool section is written, so the const char* pointers remain valid. + const std::map& classes = lookup->_class_cache; buf->putVar64(T_CLASS); buf->putVar64(classes.size()); diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index b8af773ad..21c8f3c72 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -306,6 +306,7 @@ class Lookup { Recording *_rec; MethodMap *_method_map; Dictionary *_classes; + std::map _class_cache; // snapshot of _classes, populated once at dump time Dictionary _packages; Dictionary _symbols; @@ -324,6 +325,11 @@ class Lookup { : _rec(rec), _method_map(method_map), _classes(classes), _packages(), _symbols() {} + // Call once before writeStackTraces. Collects the class-map snapshot under + // the shared lock so that resolveMethod (BCI_ALLOC) and writeClasses can + // both use _class_cache without a second collect. + void initClassCache(); + MethodInfo *resolveMethod(ASGCT_CallFrame &frame); u32 getPackage(const char *class_name); u32 getSymbol(const char *name); From 1447c2110fb8efd138d234521d1a57fc07642105 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 13:45:24 +0200 Subject: [PATCH 23/25] review: address proctor findings on PR #527 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - arguments.cpp: document vtable_target overhead (JVMTI enum at start + each dump; incremental ClassPrepare; signal path is lock-free) - profiler.cpp: clarify debug assertion — detects exclusive hold, not shared-holder re-entrancy; remove overclaiming comment - profiler.h: fix Phase 2 comment causality (exclusive Phase 0 prevents clear(), not CAS; CAS makes concurrent shared inserts safe) - vmEntry.cpp: document _features/_state memory-ordering invariant in ClassPrepare - VtableTargetPreregistrationTest: add contains("") to assert the synthetic frame itself, not just the class name substring - flightRecorder.cpp: two-phase collect design comments (linter-staged) Co-Authored-By: Claude Sonnet 4.6 (1M context) --- ddprof-lib/src/main/cpp/arguments.cpp | 4 ++ ddprof-lib/src/main/cpp/flightRecorder.cpp | 47 ++++++++++++++----- ddprof-lib/src/main/cpp/profiler.cpp | 12 +++-- ddprof-lib/src/main/cpp/profiler.h | 4 +- ddprof-lib/src/main/cpp/vmEntry.cpp | 4 ++ .../cpu/VtableTargetPreregistrationTest.java | 7 +-- 6 files changed, 57 insertions(+), 21 deletions(-) diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index 41c028af7..b7bc366a4 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -163,6 +163,10 @@ Error Arguments::parse(const char *args) { if (_cpu < 0) { msg = "cpu must be >= 0"; } + // vtable_target: resolve vtable/itable stub receiver classes in CPU traces. + // Cost: one JVMTI GetLoadedClasses enumeration at profiler start and at + // each JFR dump; incremental per-class insert on ClassPrepare. The + // signal-handler path itself only does a lock-free bounded_lookup (no malloc). _features.vtable_target = 1; CASE("wall") diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index f19ae5edc..2c37ad300 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -430,10 +430,19 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { } else if (bci == BCI_ALLOC) { // Synthetic vtable-receiver frame from hotspotSupport.cpp:walkVM. // method_id holds a raw class_id from _class_map (the same Dictionary as _classes). - // Use the pre-collected _class_cache snapshot (populated once at Lookup construction) - // to look up the raw name, then apply the same prefix-based normalisation as - // fillJavaMethodInfo (strips digit suffixes from generated accessor classes, - // canonicalises LambdaForm sub-types). No lock or collect needed here. + // + // _class_cache is safe to use here because vtable-receiver classes are always + // pre-registered via preregisterLoadedClasses() before profiling starts. + // walkVM uses bounded_lookup(size_limit=0), which never inserts, so a non-INT_MAX + // class_id guarantees the class was already in _class_map when initClassCache() + // ran. No lock is needed: the snapshot pointers remain valid for the full + // writeCpool() duration because _class_map.clear() only runs after writeCpool returns. + // + // _class_cache is intentionally NOT used as the source for writeClasses(). + // fillJavaMethodInfo() (called during writeStackTraces/writeMethods) inserts + // every Java class into _classes. writeClasses() must collect _classes fresh + // after those insertions to obtain the complete class pool; otherwise class names + // would be missing from JFR and stack-trace strings would show null class names. u32 raw_class_id = (u32)(uintptr_t)method; u32 class_id = raw_class_id; auto it = _class_cache.find(raw_class_id); @@ -474,6 +483,13 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { } void Lookup::initClassCache() { + // Snapshot _classes into _class_cache for use by resolveMethod(BCI_ALLOC). + // Must be called before writeStackTraces() so the snapshot covers all + // vtable-receiver classes (pre-registered before profiling starts). + // This snapshot is intentionally NOT used by writeClasses(): regular Java + // classes are inserted into _classes by fillJavaMethodInfo() during + // writeStackTraces/writeMethods, so writeClasses() must re-collect after + // those passes to obtain the complete class pool. auto guard = Profiler::instance()->classMapSharedGuard(); _classes->collect(_class_cache); } @@ -1239,10 +1255,14 @@ void Recording::writeCpool(Buffer *buf) { // constant pool count - bump each time a new pool is added buf->put8(12); - // classMap() is shared across the dump (this thread) and the JVMTI shared-lock - // writers (Profiler::lookupClass and friends). writeClasses() holds - // classMapSharedGuard() for its full duration; the exclusive classMap()->clear() - // in Profiler::dump runs only after this method returns. + // Two-phase classMap locking: initClassCache() takes the shared lock early to + // snapshot vtable-receiver class names for resolveMethod(BCI_ALLOC). The snapshot + // is valid for the whole writeCpool() call because classMap()->clear() (exclusive + // lock) only runs in Profiler::dump after writeCpool() returns. + // writeClasses() takes the shared lock a second time to collect the COMPLETE class + // set: fillJavaMethodInfo() inserts every Java class into _classes during + // writeStackTraces/writeMethods, so the early snapshot would miss them all and + // produce a class pool with null class names in every stack frame. Lookup lookup(this, &_method_map, Profiler::instance()->classMap()); lookup.initClassCache(); writeFrameTypes(buf); @@ -1457,10 +1477,13 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) { void Recording::writeClasses(Buffer *buf, Lookup *lookup) { DEBUG_ASSERT_NOT_IN_SIGNAL(); - // Use the pre-collected snapshot from Lookup::initClassCache(). The shared - // lock was held during that collect; clear() cannot run until after the full - // constant-pool section is written, so the const char* pointers remain valid. - const std::map& classes = lookup->_class_cache; + // Hold classMapSharedGuard() for the full function. The const char* pointers + // stored in classes point into dictionary row storage; clear() frees that + // storage under the exclusive lock, so we must not release the shared lock + // until we have finished iterating. + std::map classes; + auto guard = Profiler::instance()->classMapSharedGuard(); + lookup->_classes->collect(classes); buf->putVar64(T_CLASS); buf->putVar64(classes.size()); diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 87315fbd9..df4eea1cc 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1765,12 +1765,14 @@ int Profiler::lookupClass(const char *key, size_t length) { void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { #ifndef NDEBUG // SpinLock has no owner-tracking API, so this check is partial. - // tryLockShared() succeeding proves no thread currently holds the lock - // exclusively, but it cannot detect a caller that already holds a shared - // lock — such a caller would self-deadlock when Phase 0 or Phase 2 tries - // to acquire an exclusive lock. + // Partial re-entrancy check: tryLockShared() succeeding proves no thread + // holds the lock exclusively right now, so Phase 0's exclusive-lock attempt + // will not immediately deadlock. It does NOT detect a caller that already + // holds a shared lock — that caller would deadlock if Phase 0 later tries to + // upgrade to exclusive. No known call path creates that scenario, but the + // check is advisory only, not a full safety guarantee. bool can_take_shared = _class_map_lock.tryLockShared(); - assert(can_take_shared && "_class_map_lock must not be exclusively held on entry (shared-holder self-deadlock undetectable)"); + assert(can_take_shared && "_class_map_lock must not be held exclusively on entry"); if (can_take_shared) { _class_map_lock.unlockShared(); } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index b266edfdf..d54e0710a 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -166,7 +166,9 @@ class alignas(alignof(SpinLock)) Profiler { // Phase 1 (no lock): JVMTI snapshot + local enumeration; concurrent // ClassPrepare callbacks may insert via shared lock during this window. // Phase 2 (always): shared lock for the bulk-insert of Phase 1 names; - // shared suffices because Dictionary::lookup uses CAS internally; + // multiple concurrent shared-lock holders are safe because Dictionary + // uses CAS for slot allocation; exclusive lock from Phase 0 is what + // prevents a concurrent clear() from running during Phase 1 or 2; // no re-clear here, so any Phase 1 ClassPrepare insertions survive. // Runs on a JVM thread (never in a signal handler). void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 348c155fa..7f20e9668 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -646,6 +646,10 @@ void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, // tryLockShared() in lookupClass(). This ensures newly prepared classes are // never silently skipped while an exclusive dump/reset is in flight. Gate on // vtable_target to avoid overhead when the feature is disabled. + // Memory-ordering: _features.vtable_target is written in Profiler::start() + // before the release store to _state. The acquire load of _state in + // isRunning() above establishes happens-before with that write, so reading + // _features.vtable_target here is safe without a separate atomic. Profiler* profiler = Profiler::instance(); if (profiler == nullptr || !profiler->isRunning() || !profiler->stackWalkFeatures().vtable_target) { return; diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java index f823d4a96..af156bc12 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java @@ -73,6 +73,7 @@ public void testVtableReceiverFrameInCpuSamples() throws Exception { String stackTrace = frameAccessor.getMember(sample); if (stackTrace != null && stackTrace.contains(".vtable stub()") + && stackTrace.contains("") && (stackTrace.contains("Circle") || stackTrace.contains("Square") || stackTrace.contains("Triangle"))) { @@ -83,8 +84,8 @@ public void testVtableReceiverFrameInCpuSamples() throws Exception { if (foundVtableWithReceiver) break; } assertTrue(foundVtableWithReceiver, - "No CPU sample contained both a vtable stub frame and a receiver-class frame " + - "(Circle/Square/Triangle); preregisterLoadedClasses() may not have run at " + - "CPU-only profiler start"); + "No CPU sample contained a vtable stub frame, a synthetic frame, " + + "and a receiver class (Circle/Square/Triangle); preregisterLoadedClasses() may not " + + "have run at CPU-only profiler start, or resolveMethod BCI_ALLOC is broken"); } } From f932a1b7cbb6beb309e60bdedd5b7c7991a43e3b Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 16:19:03 +0200 Subject: [PATCH 24/25] refactor(vtable_target): defer receiver resolution to dump time via SafeAccess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signal handler stores raw VMSymbol* in a new BCI_VTABLE_RECEIVER frame; Lookup::resolveVTableReceiver reads the Symbol via SafeAccess at dump time, crash-safe under concurrent class unloading. Drops preregister + ClassPrepare hook; _class_map cleared unconditionally each dump. MethodMap gains a class_id key namespace so two VMSymbol* addresses for the same class collapse to one MethodInfo. SafeAccess::safeCopy added with aligned-load strategy so over-reads stay in-page. Buffer 1024 → 4096; failure sentinel "" instead of "". Co-Authored-By: Claude Opus 4.7 (1M context) --- ddprof-lib/src/main/cpp/arguments.cpp | 8 +- ddprof-lib/src/main/cpp/counters.h | 1 + ddprof-lib/src/main/cpp/flightRecorder.cpp | 161 +++++++++++----- ddprof-lib/src/main/cpp/flightRecorder.h | 51 ++++- .../src/main/cpp/hotspot/hotspotSupport.cpp | 26 +-- ddprof-lib/src/main/cpp/hotspot/vmStructs.h | 6 + ddprof-lib/src/main/cpp/profiler.cpp | 135 +------------ ddprof-lib/src/main/cpp/profiler.h | 17 -- ddprof-lib/src/main/cpp/safeAccess.cpp | 53 ++++++ ddprof-lib/src/main/cpp/safeAccess.h | 8 + ddprof-lib/src/main/cpp/vmEntry.cpp | 40 ---- ddprof-lib/src/main/cpp/vmEntry.h | 19 ++ ddprof-lib/src/test/cpp/safefetch_ut.cpp | 179 ++++++++++++++++++ ...Test.java => VtableReceiverFrameTest.java} | 16 +- 14 files changed, 465 insertions(+), 255 deletions(-) rename ddprof-test/src/test/java/com/datadoghq/profiler/cpu/{VtableTargetPreregistrationTest.java => VtableReceiverFrameTest.java} (83%) diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index b7bc366a4..52b55d78b 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -164,9 +164,11 @@ Error Arguments::parse(const char *args) { msg = "cpu must be >= 0"; } // vtable_target: resolve vtable/itable stub receiver classes in CPU traces. - // Cost: one JVMTI GetLoadedClasses enumeration at profiler start and at - // each JFR dump; incremental per-class insert on ClassPrepare. The - // signal-handler path itself only does a lock-free bounded_lookup (no malloc). + // Signal handler stores the raw receiver VMSymbol* in a BCI_VTABLE_RECEIVER + // frame (no lock, no map lookup, no allocation). Resolution happens at dump + // time via SafeAccess-protected reads in Lookup::resolveVTableReceiver, + // which is crash-safe against concurrent class unloading. _class_map only + // grows with classes actually sampled during the chunk. _features.vtable_target = 1; CASE("wall") diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index 29f621de1..9f41ab32c 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -67,6 +67,7 @@ X(REMOTE_SYMBOLICATION_FRAMES, "remote_symbolication_frames") \ X(REMOTE_SYMBOLICATION_LIBS_WITH_BUILD_ID, "remote_symbolication_libs_with_build_id") \ X(REMOTE_SYMBOLICATION_BUILD_ID_CACHE_HITS, "remote_symbolication_build_id_cache_hits") \ + X(VTABLE_RECEIVER_RESOLVE_FAILED, "vtable_receiver_resolve_failed") \ X(THREAD_ENTRY_MARK_DETECTIONS, "thread_entry_mark_detections") \ X(WALKVM_THREAD_INACCESSIBLE, "walkvm_thread_inaccessible") \ X(WALKVM_ANCHOR_NULL, "walkvm_anchor_null") \ diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 2c37ad300..45928c0ad 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -359,18 +359,128 @@ void Lookup::fillJavaMethodInfo(MethodInfo *mi, jmethodID method, jni->PopLocalFrame(NULL); } +bool Lookup::resolveVTableReceiver(VMSymbol *sym, char *buf, size_t bufsize, + u32 *out_class_id) { + if (sym == nullptr || !SafeAccess::isReadable(sym)) { + return false; + } + // Read the 4-byte word containing the u2 length field. In all HotSpot + // versions we support the length is at offset 0 of Symbol; we still go + // through VMStructs in case that ever changes. The low 16 bits hold the + // length on little-endian targets (all supported platforms). + int32_t *len_word_addr = + (int32_t *)((char *)sym + VMSymbol::lengthOffset()); + int32_t w1 = SafeAccess::safeFetch32(len_word_addr, -1); + int32_t w2 = SafeAccess::safeFetch32(len_word_addr, 0); + if (w1 == -1 && w2 == 0) { + return false; + } + unsigned len = (unsigned)(w1 & 0xFFFF); + // Bounds: a usable internal class name needs at least 1 byte (single-char + // descriptors like "B"/"C" for primitives never appear as vtable receivers + // because primitives can't be receivers of virtual or interface dispatch). + // Upper bound is the caller-provided buffer; class names above this length + // are dropped — operators see VTABLE_RECEIVER_RESOLVE_FAILED rise. + if (len == 0 || len > bufsize) { + return false; + } + const void *body = (const char *)sym + VMSymbol::bodyOffset(); + if (!SafeAccess::safeCopy(buf, body, len)) { + return false; + } + // Reject anything that doesn't look like a JVM internal class name. + // Valid bytes for slash-separated internal names: '/', '$', '[', ';', '_', + // alnum. Rejecting reduces — but does not eliminate — the case where the + // Symbol slot was reused for unrelated data that happens to be printable. + for (unsigned i = 0; i < len; i++) { + unsigned char c = (unsigned char)buf[i]; + if (c < 0x20 || c >= 0x7F) { + return false; + } + } + u32 class_id = _classes->lookup(buf, len); + // Apply synthetic-accessor/LambdaForm normalisation so that the many + // distinct names HotSpot generates for these families (..Accessor1234, + // LambdaForm$MH/0x...) collapse to one bucket each in the JFR class pool. + // Folding the normalisation inside resolveVTableReceiver keeps the call + // site in resolveMethod minimal and ensures the cache stores normalised + // class ids (so MethodMap deduplication works for these families too). + if (has_prefix_n(buf, len, + "jdk/internal/reflect/GeneratedConstructorAccessor")) { + class_id = + _classes->lookup("jdk/internal/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix_n(buf, len, "sun/reflect/GeneratedConstructorAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix_n(buf, len, + "jdk/internal/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("jdk/internal/reflect/GeneratedMethodAccessor"); + } else if (has_prefix_n(buf, len, "sun/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedMethodAccessor"); + } else if (has_prefix_n(buf, len, "java/lang/invoke/LambdaForm$")) { + size_t prefix_len = strlen("java/lang/invoke/LambdaForm$"); + const char *suffix = buf + prefix_len; + size_t suffix_len = len - prefix_len; + if (suffix_len >= 2 && suffix[0] == 'M' && suffix[1] == 'H') { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$MH"); + } else if (suffix_len >= 3 && suffix[0] == 'B' && suffix[1] == 'M' && + suffix[2] == 'H') { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$BMH"); + } else if (suffix_len >= 3 && suffix[0] == 'D' && suffix[1] == 'M' && + suffix[2] == 'H') { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$DMH"); + } + } + *out_class_id = class_id; + return true; +} + +u32 Lookup::resolveVTableReceiverCached(void *sym) { + auto cached = _vtable_receiver_cache.find(sym); + if (cached != _vtable_receiver_cache.end()) { + return cached->second; + } + // Stack buffer sized to fit virtually every real class name. HotSpot + // Symbol length is u2 (max 65535); names beyond 4096 bytes are rare + // (deeply nested LambdaForm signatures, large CGLIB proxies) and are + // recorded as resolve failures via the sentinel below. + char buf[4096]; + u32 class_id = 0; + if (!resolveVTableReceiver((VMSymbol *)sym, buf, sizeof(buf), &class_id)) { + Counters::increment(VTABLE_RECEIVER_RESOLVE_FAILED); + // Explicit sentinel so JFR renders an obvious "we couldn't read it" + // marker instead of an empty class name (which is indistinguishable + // from a parser/encoder error downstream). + class_id = _classes->lookup(""); + } + _vtable_receiver_cache[sym] = class_id; + return class_id; +} + MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { static const char* UNKNOWN = "unknown"; unsigned long key; jint bci = frame.bci; jmethodID method = frame.method_id; + + // BCI_VTABLE_RECEIVER: method holds a VMSymbol* (see vmEntry.h). Resolve + // to a class_id via the per-dump cache once, then key MethodMap by the + // resolved class_id so two distinct Symbol addresses for the same class + // name (class unload + reload within a chunk) collapse to one MethodInfo + // row. + u32 vtable_class_id = 0; + if (bci == BCI_VTABLE_RECEIVER) { + vtable_class_id = resolveVTableReceiverCached((void *)method); + } + if (method == nullptr) { key = MethodMap::makeKey(UNKNOWN); } else if (bci == BCI_ERROR || bci == BCI_NATIVE_FRAME) { key = MethodMap::makeKey(frame.native_function_name); } else if (bci == BCI_NATIVE_FRAME_REMOTE) { key = MethodMap::makeKey(frame.packed_remote_frame); + } else if (bci == BCI_VTABLE_RECEIVER) { + key = MethodMap::makeVTableReceiverKey(vtable_class_id); } else { FrameTypeId frame_type = FrameType::decode(bci); assert(frame_type == FRAME_INTERPRETED || frame_type == FRAME_JIT_COMPILED || @@ -427,49 +537,14 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { TEST_LOG("WARNING: Library lookup failed for index %u", lib_index); fillNativeMethodInfo(mi, "unknown_library", nullptr); } - } else if (bci == BCI_ALLOC) { - // Synthetic vtable-receiver frame from hotspotSupport.cpp:walkVM. - // method_id holds a raw class_id from _class_map (the same Dictionary as _classes). - // - // _class_cache is safe to use here because vtable-receiver classes are always - // pre-registered via preregisterLoadedClasses() before profiling starts. - // walkVM uses bounded_lookup(size_limit=0), which never inserts, so a non-INT_MAX - // class_id guarantees the class was already in _class_map when initClassCache() - // ran. No lock is needed: the snapshot pointers remain valid for the full - // writeCpool() duration because _class_map.clear() only runs after writeCpool returns. - // - // _class_cache is intentionally NOT used as the source for writeClasses(). - // fillJavaMethodInfo() (called during writeStackTraces/writeMethods) inserts - // every Java class into _classes. writeClasses() must collect _classes fresh - // after those insertions to obtain the complete class pool; otherwise class names - // would be missing from JFR and stack-trace strings would show null class names. - u32 raw_class_id = (u32)(uintptr_t)method; - u32 class_id = raw_class_id; - auto it = _class_cache.find(raw_class_id); - if (it != _class_cache.end()) { - const char* name = it->second; - // Mirrors normalisation in fillJavaMethodInfo; names are slash-separated, - // no L-prefix or ;-suffix. - if (has_prefix(name, "jdk/internal/reflect/GeneratedConstructorAccessor")) { - class_id = _classes->lookup("jdk/internal/reflect/GeneratedConstructorAccessor"); - } else if (has_prefix(name, "sun/reflect/GeneratedConstructorAccessor")) { - class_id = _classes->lookup("sun/reflect/GeneratedConstructorAccessor"); - } else if (has_prefix(name, "jdk/internal/reflect/GeneratedMethodAccessor")) { - class_id = _classes->lookup("jdk/internal/reflect/GeneratedMethodAccessor"); - } else if (has_prefix(name, "sun/reflect/GeneratedMethodAccessor")) { - class_id = _classes->lookup("sun/reflect/GeneratedMethodAccessor"); - } else if (has_prefix(name, "java/lang/invoke/LambdaForm$")) { - const char* suffix = name + strlen("java/lang/invoke/LambdaForm$"); - if (has_prefix(suffix, "MH")) { - class_id = _classes->lookup("java/lang/invoke/LambdaForm$MH"); - } else if (has_prefix(suffix, "BMH")) { - class_id = _classes->lookup("java/lang/invoke/LambdaForm$BMH"); - } else if (has_prefix(suffix, "DMH")) { - class_id = _classes->lookup("java/lang/invoke/LambdaForm$DMH"); - } - } - } - mi->_class = class_id; + } else if (bci == BCI_VTABLE_RECEIVER) { + // Synthetic vtable-receiver frame: method_id holds a VMSymbol* + // captured in walkVM. The Symbol -> class_id resolution (with + // synthetic-accessor/LambdaForm normalisation) was already done + // above via resolveVTableReceiverCached, which also handles + // resolution failures by mapping them to "" + // and incrementing VTABLE_RECEIVER_RESOLVE_FAILED. + mi->_class = vtable_class_id; mi->_name = _symbols.lookup(""); mi->_sig = _symbols.lookup("()V"); mi->_type = FRAME_NATIVE; diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index 21c8f3c72..18019e12e 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -8,6 +8,7 @@ #define _FLIGHTRECORDER_H #include +#include #include #include @@ -28,6 +29,8 @@ #include "threadIdTable.h" #include "vmEntry.h" +class VMSymbol; // hotspot/vmStructs.h + const u64 MAX_JLONG = 0x7fffffffffffffffULL; const u64 MIN_JLONG = 0x8000000000000000ULL; const int MAX_JFR_EVENT_SIZE = 256; @@ -115,13 +118,16 @@ class MethodInfo { // 3) Encoded RemoteFrameInfo // The values of the keys are potentially overlapping, so we use // the highest 2 bits to distinguish them. -// 00 - jmethodID -// 10 - void* address -// 01 - RemoteFrameInfo +// Key encoding (top two bits): +// 00 - jmethodID +// 10 - void* address (native frame names) +// 01 - RemoteFrameInfo (packed remote symbolication) +// 11 - vtable_receiver class_id (BCI_VTABLE_RECEIVER frames) class MethodMap : public std::map { public: static constexpr unsigned long ADDRESS_MARK = 0x8000000000000000ULL; static constexpr unsigned long REMOTE_FRAME_MARK = 0x4000000000000000ULL; + static constexpr unsigned long VTABLE_RECEIVER_MARK = ADDRESS_MARK | REMOTE_FRAME_MARK; static constexpr unsigned long KEY_TYPE_MASK = ADDRESS_MARK | REMOTE_FRAME_MARK; MethodMap() {} @@ -142,6 +148,15 @@ class MethodMap : public std::map { unsigned long key = packed_remote_frame; assert((key & KEY_TYPE_MASK) == 0); return (key | REMOTE_FRAME_MARK);} + + // BCI_VTABLE_RECEIVER frames key by the resolved class_id (not by the + // VMSymbol* captured at sample time), so two distinct Symbol addresses + // for the same class name collapse to a single MethodInfo row. + static unsigned long makeVTableReceiverKey(u32 class_id) { + unsigned long key = (unsigned long)class_id; + assert((key & KEY_TYPE_MASK) == 0); + return (key | VTABLE_RECEIVER_MARK); + } }; class Recording { @@ -307,6 +322,13 @@ class Lookup { MethodMap *_method_map; Dictionary *_classes; std::map _class_cache; // snapshot of _classes, populated once at dump time + // Per-dump VMSymbol* -> resolved class_id cache for BCI_VTABLE_RECEIVER + // frames. Two purposes: (1) amortise the SafeAccess work to once per + // distinct Symbol pointer per dump; (2) the resolved class_id is used + // as the MethodMap key, so distinct Symbol* addresses for the same + // class name (class unload/reload mid-chunk) collapse to a single + // MethodInfo row. + std::unordered_map _vtable_receiver_cache; Dictionary _packages; Dictionary _symbols; @@ -319,6 +341,29 @@ class Lookup { bool has_prefix(const char *str, const char *prefix) const { return strncmp(str, prefix, strlen(prefix)) == 0; } + // Length-bounded variant for buffers that may not be NUL-terminated. + bool has_prefix_n(const char *buf, size_t buf_len, const char *prefix) const { + size_t plen = strlen(prefix); + return buf_len >= plen && strncmp(buf, prefix, plen) == 0; + } + + // Resolves a VMSymbol* captured at sample time (BCI_VTABLE_RECEIVER) into a + // class id in _classes, applying the synthetic-accessor/LambdaForm + // normalisation inline. Crash-safe under concurrent class unloading: all + // reads of the Symbol go through SafeAccess (safefetch + bounded copy), so + // a Symbol freed and its page unmapped between sample and dump cannot + // SIGSEGV the dump thread. On success returns true and fills *out_class_id + // with the normalised class id. `buf` is a working area used internally; + // its contents on return are unspecified. + bool resolveVTableReceiver(VMSymbol *sym, char *buf, size_t bufsize, + u32 *out_class_id); + + // Cache wrapper: look up Symbol* in _vtable_receiver_cache; on miss, + // resolve via resolveVTableReceiver and cache the result. On any + // resolution failure (SafeAccess fault, length out of range, non-printable + // bytes) returns the sentinel "" class_id and + // increments VTABLE_RECEIVER_RESOLVE_FAILED. + u32 resolveVTableReceiverCached(void *sym); public: Lookup(Recording *rec, MethodMap *method_map, Dictionary *classes) diff --git a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp index b63b66e1d..a85e6b053 100644 --- a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp +++ b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp @@ -535,22 +535,16 @@ __attribute__((no_sanitize("address"))) int HotspotSupport::walkVM(void* ucontex uintptr_t receiver = frame.jarg0(); if (receiver != 0) { VMSymbol* symbol = VMKlass::fromOop(receiver)->name(); - // walkVM runs in a signal handler. _class_map is mutated - // under _class_map_lock (shared by Profiler::lookupClass - // inserters, exclusive by _class_map.clear() in the dump - // path between unlockAll() and lock()). bounded_lookup - // with size_limit=0 never inserts (no malloc), but it - // still traverses row->next and reads row->keys, which - // clear() concurrently frees. Take the lock shared via - // try-lock; if an exclusive clear() is in progress, drop - // the synthetic frame rather than read freed memory. - auto guard = profiler->classMapTrySharedGuard(); - if (guard.ownsLock()) { - u32 class_id = profiler->classMap()->bounded_lookup( - symbol->body(), symbol->length(), 0); - if (class_id != INT_MAX) { - fillFrame(frames[depth++], BCI_ALLOC, class_id); - } + // Store the raw VMSymbol* in the frame's method_id + // slot. BCI_VTABLE_RECEIVER (vmEntry.h) repurposes + // method_id for this pointer — same precedent as + // BCI_NATIVE_FRAME storing const char* and + // BCI_NATIVE_FRAME_REMOTE storing a packed blob. + // Resolution happens at dump time via SafeAccess so + // a concurrent class-unload + Symbol free cannot + // crash the dump thread (see Lookup::resolveVTableReceiver). + if (symbol != nullptr) { + fillFrame(frames[depth++], BCI_VTABLE_RECEIVER, (void*)symbol); } } } diff --git a/ddprof-lib/src/main/cpp/hotspot/vmStructs.h b/ddprof-lib/src/main/cpp/hotspot/vmStructs.h index 13aea46af..7459aef40 100644 --- a/ddprof-lib/src/main/cpp/hotspot/vmStructs.h +++ b/ddprof-lib/src/main/cpp/hotspot/vmStructs.h @@ -606,6 +606,12 @@ DECLARE(VMSymbol) assert(_symbol_body_offset >= 0); return at(_symbol_body_offset); } + + // Public accessors for safefetch-based dump-time resolution (no `this` + // deref): used to compute the address of the length/body fields without + // touching the Symbol's memory, so callers can probe with SafeAccess. + static int lengthOffset() { return _symbol_length_offset; } + static int bodyOffset() { return _symbol_body_offset; } DECLARE_END DECLARE(VMClassLoaderData) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index df4eea1cc..56e40557f 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -1207,9 +1207,9 @@ Error Profiler::start(Arguments &args, bool reset) { return Error("No profiling events specified"); } - // Commit _features before the reset/preregister block so ClassPrepare - // callbacks (which gate on _features.vtable_target) see the correct enabled - // state from the moment preregisterLoadedClasses releases the class-map lock. + // Commit _features before the reset block so any signal-handler code that + // reads _features.* observes the correct enabled state once profiling + // engines start. _features = args._features; if (VM::hotspot_version() < 8) { _features.java_anchor = 0; @@ -1236,11 +1236,6 @@ Error Profiler::start(Arguments &args, bool reset) { ExclusiveLockGuard guard(&_class_map_lock); _class_map.clear(); } - // preregisterLoadedClasses manages _class_map_lock internally; callers must - // not hold it (JVMTI enumeration is lock-free; inserts take the lock). - if (_features.vtable_target && VMStructs::hasClassNames()) { - preregisterLoadedClasses(VM::jvmti()); - } // Reset call trace storage if (!_omit_stacktraces) { @@ -1252,11 +1247,6 @@ Error Profiler::start(Arguments &args, bool reset) { // Reset thread names and IDs _thread_info.clearAll(); - } else if (_features.vtable_target && VMStructs::hasClassNames()) { - // Resume: classes loaded while the profiler was stopped miss ClassPrepare - // (JVMTI events are off while stopped). Re-snapshot without clearing so - // existing valid entries are preserved. - preregisterLoadedClasses(VM::jvmti()); } // (Re-)allocate calltrace buffers @@ -1426,11 +1416,6 @@ Error Profiler::start(Arguments &args, bool reset) { _state.store(RUNNING, std::memory_order_release); _start_time = time(NULL); __atomic_add_fetch(&_epoch, 1, __ATOMIC_RELAXED); - // Second snapshot: catch classes prepared during the engine-startup window - // (between _features.vtable_target=1 and the RUNNING store above). - if (_features.vtable_target && VMStructs::hasClassNames()) { - preregisterLoadedClasses(VM::jvmti()); - } return Error::OK; } // no engine was activated; perform cleanup @@ -1613,13 +1598,12 @@ Error Profiler::dump(const char *path, const int length) { // in processTraces() already handles clearing old traces while preserving // traces referenced by surviving LivenessTracker objects unlockAll(); - if (_features.vtable_target && VMStructs::hasClassNames()) { - // clear_first=true: clears the map before GetLoadedClasses to close the - // race where a ClassPrepare insertion (between the old-placement clear and - // the snapshot) would be wiped. ClassPrepare callbacks that fire after the - // clear insert via shared lock and survive — Phase 2 does not clear. - preregisterLoadedClasses(VM::jvmti(), /*clear_first=*/true); - } else { + // Clear the class map at end-of-dump. Class IDs are per-chunk in the JFR + // format, so the dump just completed re-populated _classes from the names + // it actually needed; the runtime map can start fresh for the next chunk. + // Working-set bound: only classes touched by the next chunk's samples + // re-enter the map. + { ExclusiveLockGuard guard(&_class_map_lock); _class_map.clear(); } @@ -1762,107 +1746,6 @@ int Profiler::lookupClass(const char *key, size_t length) { return -1; } -void Profiler::preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first) { -#ifndef NDEBUG - // SpinLock has no owner-tracking API, so this check is partial. - // Partial re-entrancy check: tryLockShared() succeeding proves no thread - // holds the lock exclusively right now, so Phase 0's exclusive-lock attempt - // will not immediately deadlock. It does NOT detect a caller that already - // holds a shared lock — that caller would deadlock if Phase 0 later tries to - // upgrade to exclusive. No known call path creates that scenario, but the - // check is advisory only, not a full safety guarantee. - bool can_take_shared = _class_map_lock.tryLockShared(); - assert(can_take_shared && "_class_map_lock must not be held exclusively on entry"); - if (can_take_shared) { - _class_map_lock.unlockShared(); - } -#endif - if (jvmti == nullptr) { - return; - } - // Phase 0: clear the map BEFORE taking the GetLoadedClasses snapshot. - // This closes the race: any ClassPrepare that fires after the clear - // inserts into the map via shared lock, and Phase 2 does not re-clear, - // so those insertions survive. - // Tradeoff: the window where signal handlers see an empty map is widened - // to GetLoadedClasses + Phase-1 duration; this is an accepted cost of - // eliminating the race. - if (clear_first) { - ExclusiveLockGuard guard(&_class_map_lock); - _class_map.clear(); - } - JNIEnv* jni = VM::jni(); - jint class_count = 0; - jclass* classes = nullptr; - jvmtiError err = jvmti->GetLoadedClasses(&class_count, &classes); - if (err != JVMTI_ERROR_NONE) { - // If clear_first=false the map retains stale entries; if clear_first=true - // it was already cleared above and is now empty. Either way, vtable-target - // frames will be resolved gradually as ClassPrepare events fire. - if (clear_first) { - Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — class map was cleared and is now empty, vtable-target frames may miss until ClassPrepare fires", err); - } else { - Log::warn("preregisterLoadedClasses: GetLoadedClasses failed (%d) — class map left unchanged (may be stale), vtable-target frames may miss until ClassPrepare fires", err); - } - return; - } - if (class_count == 0) { - if (classes != nullptr) { - jvmti->Deallocate(reinterpret_cast(classes)); - } - return; - } - // Phase 1 (no lock): enumerate signatures and copy normalized slices. - // normalizeClassSignature returns a pointer INTO the JVMTI sig buffer, so - // the slice is copied into a std::string before Deallocate invalidates it. - std::vector sigs; - sigs.reserve(class_count); - jint sig_failures = 0; - for (jint i = 0; i < class_count; i++) { - char* sig = nullptr; - if (jvmti->GetClassSignature(classes[i], &sig, nullptr) != JVMTI_ERROR_NONE) { - ++sig_failures; - if (jni != nullptr) jni->DeleteLocalRef(classes[i]); - continue; - } - if (sig == nullptr) { - if (jni != nullptr) jni->DeleteLocalRef(classes[i]); - continue; - } - if (sig[0] != 'L' && sig[0] != '[') { - jvmti->Deallocate(reinterpret_cast(sig)); - if (jni != nullptr) jni->DeleteLocalRef(classes[i]); - continue; - } - const char* slice = nullptr; - size_t slice_len = 0; - if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { - sigs.emplace_back(slice, slice_len); - } - jvmti->Deallocate(reinterpret_cast(sig)); - if (jni != nullptr) jni->DeleteLocalRef(classes[i]); - } - jvmti->Deallocate(reinterpret_cast(classes)); - if (sig_failures > 0) { - Log::warn("preregisterLoadedClasses: GetClassSignature failed for %d/%d class(es) — those vtable-target frames may be missing until ClassPrepare fires", sig_failures, class_count); - } - // Phase 2 (shared lock): bulk-insert the collected names. SharedLockGuard is - // sufficient: concurrent inserters (ClassPrepare callbacks, lookupClass) already - // run under the shared lock; only clear() in Phase 0 needs the exclusive lock. - // Using ExclusiveLockGuard here would block signal-handler readers (bounded_lookup - // in vtable_target and tryLockShared in object sampler) for the full bulk-insert - // duration, dropping samples unnecessarily. - // NOTE: no clear here. When clear_first=true, Phase 0 already cleared the map - // before Phase 1, so concurrent ClassPrepare insertions from that window survive. - // When clear_first=false the map is additive and no ClassPrepare entries are lost. - { - SharedLockGuard guard(&_class_map_lock); - for (const std::string& s : sigs) { - (void)_class_map.lookup(s.c_str(), s.size()); - } - } -} - int Profiler::status(char* status, int max_len) { return snprintf(status, max_len, "== Java-Profiler Status ===\n" diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index d54e0710a..ae61013a4 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -157,22 +157,6 @@ class alignas(alignof(SpinLock)) Profiler { return _state.load(std::memory_order_relaxed); } - // Pre-populate _class_map with all currently-loaded 'L'-type (reference) - // and array ('[') class signatures so that signal-safe lookups in walkVM - // (vtable_target) can resolve them without ever needing to malloc. Only bare - // primitive type descriptors (I, B, C, etc.) are skipped. Caller must NOT hold - // _class_map_lock; this function acquires it internally across three phases: - // Phase 0 (clear_first=true only): exclusive lock to clear the map. - // Phase 1 (no lock): JVMTI snapshot + local enumeration; concurrent - // ClassPrepare callbacks may insert via shared lock during this window. - // Phase 2 (always): shared lock for the bulk-insert of Phase 1 names; - // multiple concurrent shared-lock holders are safe because Dictionary - // uses CAS for slot allocation; exclusive lock from Phase 0 is what - // prevents a concurrent clear() from running during Phase 1 or 2; - // no re-clear here, so any Phase 1 ClassPrepare insertions survive. - // Runs on a JVM thread (never in a signal handler). - void preregisterLoadedClasses(jvmtiEnv* jvmti, bool clear_first = false); - public: Profiler() : _state_lock(), _state(State::NEW), _class_unload_hook_trap(2), @@ -235,7 +219,6 @@ class alignas(alignof(SpinLock)) Profiler { Dictionary *classMap() { return &_class_map; } SharedLockGuard classMapSharedGuard() { return SharedLockGuard(&_class_map_lock); } - BoundedOptionalSharedLockGuard classMapTrySharedGuard() { return BoundedOptionalSharedLockGuard(&_class_map_lock); } Dictionary *stringLabelMap() { return &_string_label_map; } Dictionary *contextValueMap() { return &_context_value_map; } u32 numContextAttributes() { return _num_context_attributes; } diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index 8f1db9a3e..ce650f3c2 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -152,6 +152,59 @@ extern "C" int64_t safefetch64_cont(int64_t* adr, int64_t errValue); #endif #endif +bool SafeAccess::safeCopy(void* dst, const void* src, size_t len) { + // Two-sentinel pattern (same as isReadable): a real-data word may equal + // one sentinel by chance, but not both — if both fetches return their + // sentinel, the access truly faulted. + // + // All safefetch32 loads issued here use 4-byte-aligned addresses. Pages + // are 4 KiB (or 16 KiB on Apple Silicon), both divisible by 4, so an + // aligned 4-byte load never spans a page boundary. The only fault + // possible is when the aligned address itself lies in an unmapped page; + // we never spuriously fault on an over-read past `src + len`. + static const int32_t SENT_A = (int32_t)0x55AA55AA; + static const int32_t SENT_B = (int32_t)0xAA55AA55; + uint8_t* d = (uint8_t*)dst; + const uint8_t* s = (const uint8_t*)src; + size_t i = 0; + + // Front fixup: if `src` is not 4-byte aligned, fetch at the previous + // aligned address (1..3 bytes before src). That address lies in the + // same 4-byte word as src — and since pages are 4-byte aligned, in + // the same page as src. The leading k bytes of the fetched word lie + // before the caller's range and are discarded via the +k offset; they + // never reach `dst`. + size_t k = (uintptr_t)s & 3u; + if (k != 0 && i < len) { + int32_t* aligned = (int32_t*)(s - k); + int32_t v1 = safefetch32_impl(aligned, SENT_A); + int32_t v2 = safefetch32_impl(aligned, SENT_B); + if (v1 == SENT_A && v2 == SENT_B) { + return false; + } + size_t take = (4 - k < len) ? (4 - k) : len; + memcpy(d, ((const uint8_t*)&v1) + k, take); + i = take; + } + + // Middle + tail: (s + i) is now 4-byte aligned. The final iteration may + // load up to 3 over-read bytes past `src + len`, but those bytes sit in + // the same 4-byte-aligned word and therefore the same page as the bytes + // we actually wanted — never a fault from the over-read alone. + while (i < len) { + int32_t* aligned = (int32_t*)(s + i); + int32_t v1 = safefetch32_impl(aligned, SENT_A); + int32_t v2 = safefetch32_impl(aligned, SENT_B); + if (v1 == SENT_A && v2 == SENT_B) { + return false; + } + size_t chunk = (len - i >= 4) ? 4 : (len - i); + memcpy(d + i, &v1, chunk); // memcpy from local — no UAF risk + i += chunk; + } + return true; +} + bool SafeAccess::handle_safefetch(int sig, void* context) { ucontext_t* uc = (ucontext_t*)context; uintptr_t pc = uc->current_pc; diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index b8d8b4177..43ed9ce3a 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -64,6 +64,14 @@ class SafeAccess { return safefetch64_impl(ptr, errorValue); } + // Copies up to len bytes from src to dst using safefetch32_impl so that a + // page-unmap or repurpose of src memory during the copy does not crash the + // process. Returns true on full success, false if any read faulted. dst must + // have at least len bytes capacity; reads from src may over-read up to 3 + // bytes past src+len (over-read is also safefetch-protected). + NOINLINE + static bool safeCopy(void* dst, const void* src, size_t len); + static bool handle_safefetch(int sig, void* context); // NOINLINE functions with stable addresses for JVM patching (vmStructs.cpp) diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index 7f20e9668..f93edeea9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -639,46 +639,6 @@ void VM::loadAllMethodIDs(jvmtiEnv *jvmti, JNIEnv *jni) { void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, jclass klass) { loadMethodIDs(jvmti, jni, klass); - - // Pre-populate _class_map for vtable_target signal-safe lookup. ClassPrepare - // runs on a JVM thread (never a signal handler), so we take a blocking - // shared lock via classMapSharedGuard() rather than the best-effort - // tryLockShared() in lookupClass(). This ensures newly prepared classes are - // never silently skipped while an exclusive dump/reset is in flight. Gate on - // vtable_target to avoid overhead when the feature is disabled. - // Memory-ordering: _features.vtable_target is written in Profiler::start() - // before the release store to _state. The acquire load of _state in - // isRunning() above establishes happens-before with that write, so reading - // _features.vtable_target here is safe without a separate atomic. - Profiler* profiler = Profiler::instance(); - if (profiler == nullptr || !profiler->isRunning() || !profiler->stackWalkFeatures().vtable_target) { - return; - } - char* sig = nullptr; - jvmtiError err = jvmti->GetClassSignature(klass, &sig, nullptr); - if (err != JVMTI_ERROR_NONE) { - static std::atomic warn_count{0}; - if (warn_count.fetch_add(1, std::memory_order_relaxed) == 0) { - Log::warn("ClassPrepare: GetClassSignature failed (%d) — class skipped for vtable-target pre-registration (further failures suppressed)", err); - } - return; - } - if (sig == nullptr) { - return; - } - // Only 'L'-type (reference) and array ('[') signatures can match vtable_target - // lookup keys; skip bare primitive type descriptors (I, B, C, etc.). - if (sig[0] != 'L' && sig[0] != '[') { - jvmti->Deallocate(reinterpret_cast(sig)); - return; - } - const char* slice = nullptr; - size_t slice_len = 0; - if (ObjectSampler::normalizeClassSignature(sig, &slice, &slice_len)) { - SharedLockGuard guard = profiler->classMapSharedGuard(); - (void)profiler->classMap()->lookup(slice, slice_len); - } - jvmti->Deallocate(reinterpret_cast(sig)); } void JNICALL VM::VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) { diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index b68ce4ae2..42af173a9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -34,6 +34,25 @@ enum ASGCT_CallFrameType { BCI_ERROR = -18, // method_id is an error string BCI_NATIVE_FRAME_REMOTE = -19, // method_id points to RemoteFrameInfo for remote symbolication BCI_NATIVE_MALLOC = -20, // native malloc/free sample (size stored in counter) + // method_id holds a VMSymbol* (the receiver class's name Symbol), + // NOT a jmethodID. The pointer is captured in the signal handler + // (hotspotSupport.cpp:walkVM) and resolved at dump time via SafeAccess + // in Lookup::resolveVTableReceiver. Same precedent as BCI_NATIVE_FRAME + // (const char* in method_id) and BCI_NATIVE_FRAME_REMOTE (packed + // 64-bit blob). Any reader iterating frames must check bci BEFORE + // dereferencing method_id as a jmethodID. + // + // Limitation: CallTraceHashTable::calcHash mixes the raw bytes of the + // frames array (including method_id) into the trace id. Two samples + // of the same logical class whose Symbol* address differs (class + // unload + reload within a chunk) produce distinct trace ids; this + // is accepted because normalising at sample time would require an + // in-signal-handler Symbol read, which the redesign explicitly + // avoids. The dump-time MethodMap key is class_id-based (see + // MethodMap::makeKey(u32)), so the synthetic + // MethodInfo collapses across distinct Symbol* addresses even though + // the CallTrace itself does not. + BCI_VTABLE_RECEIVER = -21, }; // See hotspot/src/share/vm/prims/forte.cpp diff --git a/ddprof-lib/src/test/cpp/safefetch_ut.cpp b/ddprof-lib/src/test/cpp/safefetch_ut.cpp index 0c0fad37e..f1cf4ae7f 100644 --- a/ddprof-lib/src/test/cpp/safefetch_ut.cpp +++ b/ddprof-lib/src/test/cpp/safefetch_ut.cpp @@ -1,8 +1,10 @@ #include #include #include +#include #include #include +#include #include "safeAccess.h" #include "os.h" @@ -157,3 +159,180 @@ TEST_F(SafeFetchTest, mprotectedMemory64) { munmap(page, 4096); } + +// --------------------------------------------------------------------------- +// SafeAccess::safeCopy — bulk variant of safeFetch{32,64} that copies a byte +// range via the safefetch trampoline. Must: +// - return true and copy the bytes exactly when src is fully readable, +// including when [src, src+len) sits within a few bytes of an unmapped +// page boundary (aligned-load strategy keeps over-reads in-page) +// - return false (no crash) when the requested range itself crosses into +// an unmapped page +// - handle unaligned src by fetching at the previous 4-byte aligned +// address and discarding the leading 1..3 bytes +// - never write past dst[len-1] even when len is not a multiple of 4 +// - not mis-classify real data as a fault when it equals one sentinel +// --------------------------------------------------------------------------- + +TEST_F(SafeFetchTest, safeCopy_happyPath) { + const char src[] = "java/lang/Object"; + char dst[sizeof(src)] = {0}; + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, sizeof(src) - 1)); + EXPECT_EQ(0, memcmp(dst, src, sizeof(src) - 1)); +} + +TEST_F(SafeFetchTest, safeCopy_zeroLength) { + // Even if src is NULL, len=0 must be a no-op success. + char dst[8] = {0}; + EXPECT_TRUE(SafeAccess::safeCopy(dst, nullptr, 0)); +} + +TEST_F(SafeFetchTest, safeCopy_shortLength_doesNotOverwriteDst) { + // The internal 4-byte fetch must not overflow dst beyond len bytes. + const char src[] = "AB"; + char dst[8]; + memset(dst, 0x5A, sizeof(dst)); + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, 2)); + EXPECT_EQ('A', dst[0]); + EXPECT_EQ('B', dst[1]); + // Sentinel bytes 2..7 must be untouched. + for (int i = 2; i < 8; i++) { + EXPECT_EQ((char)0x5A, dst[i]) << "dst[" << i << "] was overwritten"; + } +} + +TEST_F(SafeFetchTest, safeCopy_unmappedSource_returnsFalse) { + // Map a page, then unmap it: the address is now firmly invalid. safeCopy + // must return false rather than SIGSEGV. + void* page = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(page, MAP_FAILED); + ASSERT_EQ(0, munmap(page, 4096)); + + char dst[64] = {0}; + EXPECT_FALSE(SafeAccess::safeCopy(dst, page, 32)); +} + +TEST_F(SafeFetchTest, safeCopy_protNoneSource_returnsFalse) { + // mprotect-PROT_NONE the page (similar to mprotectedMemory32). safeCopy + // must return false on the first faulting word, not crash. + void* page = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(page, MAP_FAILED); + memcpy(page, "ignored", 7); + ASSERT_EQ(0, mprotect(page, 4096, PROT_NONE)); + + char dst[64] = {0}; + EXPECT_FALSE(SafeAccess::safeCopy(dst, page, 32)); + + // Restore so munmap can run cleanly. + ASSERT_EQ(0, mprotect(page, 4096, PROT_READ | PROT_WRITE)); + munmap(page, 4096); +} + +TEST_F(SafeFetchTest, safeCopy_tailNearUnmappedBoundary_stillSucceeds) { + // Map two adjacent pages, unmap only the second. Place src so the bytes + // we ask for end inside the mapped page but the (over-read of the) next + // 4-byte word would touch the unmapped page. The aligned-load strategy + // must keep the load within the mapped page → success, not fault. + long page_size = sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + void* region = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(region, MAP_FAILED); + ASSERT_EQ(0, munmap((char*)region + page_size, page_size)); + + char* mapped_end = (char*)region + page_size; + char* src = mapped_end - 2; // 2 bytes from page boundary, k = 2 + src[0] = 'X'; + src[1] = 'Y'; + + char dst[16]; + memset(dst, 0, sizeof(dst)); + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, 2)); + EXPECT_EQ('X', dst[0]); + EXPECT_EQ('Y', dst[1]); + + munmap(region, page_size); +} + +TEST_F(SafeFetchTest, safeCopy_requestedRangeCrossesUnmappedPage_returnsFalse) { + // Distinct from the case above: here the *requested* range itself + // crosses into the unmapped page. safeCopy must legitimately fault + // when it can't read all the bytes the caller asked for. + long page_size = sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + void* region = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(region, MAP_FAILED); + ASSERT_EQ(0, munmap((char*)region + page_size, page_size)); + + char* mapped_end = (char*)region + page_size; + char* src = mapped_end - 2; + src[0] = 'X'; + src[1] = 'Y'; + + // Asking for 8 bytes pushes 6 bytes into the unmapped page → must fault. + char dst[16] = {0}; + EXPECT_FALSE(SafeAccess::safeCopy(dst, src, 8)); + + munmap(region, page_size); +} + +TEST_F(SafeFetchTest, safeCopy_unalignedSource_allMisalignments) { + // The front fixup must correctly extract leading bytes from the + // previous-aligned-word fetch for every misalignment k ∈ {1, 2, 3}. + static const char kSentinel[] = "ABCDEFGHIJKLMNOP"; // 16 bytes + // Use a 4-byte-aligned buffer so we can shift src forward by k. + alignas(4) char buf[32]; + memcpy(buf + 4, kSentinel, 16); // place payload at aligned offset 4 + + for (size_t k = 1; k <= 3; k++) { + const char* src = buf + 4 + k; // misaligned by k + size_t len = 16 - k; // copy the rest of the payload + char dst[16]; + memset(dst, 0, sizeof(dst)); + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, len)) << "k=" << k; + EXPECT_EQ(0, memcmp(dst, kSentinel + k, len)) << "k=" << k; + } +} + +TEST_F(SafeFetchTest, safeCopy_unalignedShortAtPageEnd_stillSucceeds) { + // Combine misalignment with proximity to an unmapped boundary: src is + // misaligned AND only a few bytes from the end of the mapped page. + // The front fixup reads backward (into the same page) → success. + long page_size = sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + void* region = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(region, MAP_FAILED); + ASSERT_EQ(0, munmap((char*)region + page_size, page_size)); + + char* mapped_end = (char*)region + page_size; + // mapped_end is 4-byte aligned (pages are 4 KiB-aligned). Place src + // 3 bytes back from the boundary so k = 1 and only 3 bytes are wanted. + char* src = mapped_end - 3; + src[0] = 'P'; + src[1] = 'Q'; + src[2] = 'R'; + + char dst[8] = {0}; + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, 3)); + EXPECT_EQ('P', dst[0]); + EXPECT_EQ('Q', dst[1]); + EXPECT_EQ('R', dst[2]); + + munmap(region, page_size); +} + +TEST_F(SafeFetchTest, safeCopy_dataMatchingSingleSentinel_stillSucceeds) { + // The two-sentinel pattern must not mis-classify real data that happens + // to equal one of the sentinels. SENT_A is 0x55AA55AA. + uint32_t real_data = 0x55AA55AA; + char dst[4]; + ASSERT_TRUE(SafeAccess::safeCopy(dst, &real_data, 4)); + EXPECT_EQ(0, memcmp(dst, &real_data, 4)); +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java similarity index 83% rename from ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java rename to ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java index af156bc12..a601c231f 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableTargetPreregistrationTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java @@ -14,7 +14,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; -public class VtableTargetPreregistrationTest extends AbstractProfilerTest { +public class VtableReceiverFrameTest extends AbstractProfilerTest { @Override protected String getProfilerCommand() { @@ -51,10 +51,9 @@ private int profiledWork(Shape... shapes) { // The vtable_target feature inserts a synthetic frame immediately // below a vtable stub frame in the call stack. The receiver class (Circle/Square/Triangle) - // is resolved from _class_map and emitted as the class name for the synthetic frame. - // If preregisterLoadedClasses() was not called (the bug), bounded_lookup returns INT_MAX, - // no synthetic frame is inserted, and the receiver class name never appears next to a - // vtable stub in the JFR output. + // is captured as a VMSymbol* in the signal handler and resolved to a class name at + // dump time via SafeAccess-protected reads. If resolution fails or the synthetic frame + // is dropped, the receiver class name will not appear next to a vtable stub in JFR. @RetryingTest(5) public void testVtableReceiverFrameInCpuSamples() throws Exception { Assumptions.assumeFalse(Platform.isZing() || Platform.isJ9()); @@ -71,6 +70,9 @@ public void testVtableReceiverFrameInCpuSamples() throws Exception { if (frameAccessor == null) continue; for (IItem sample : cpuSamples) { String stackTrace = frameAccessor.getMember(sample); + if (stackTrace != null && stackTrace.contains(".vtable stub()")) { + System.err.println("=VTABLE STUB TRACE=\n" + stackTrace + "\n=END="); + } if (stackTrace != null && stackTrace.contains(".vtable stub()") && stackTrace.contains("") @@ -85,7 +87,7 @@ public void testVtableReceiverFrameInCpuSamples() throws Exception { } assertTrue(foundVtableWithReceiver, "No CPU sample contained a vtable stub frame, a synthetic frame, " + - "and a receiver class (Circle/Square/Triangle); preregisterLoadedClasses() may not " + - "have run at CPU-only profiler start, or resolveMethod BCI_ALLOC is broken"); + "and a receiver class (Circle/Square/Triangle); signal-handler VMSymbol* capture or " + + "dump-time SafeAccess resolution in Lookup::resolveVTableReceiver is broken"); } } From ff60ef3242b779631dd840990a0f350e1108a28d Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 26 May 2026 16:43:49 +0200 Subject: [PATCH 25/25] test(vtable_target): match HTML-escaped synthetic frame name JMC's STACK_TRACE_STRING HTML-escapes angle brackets in method names (same as it does for /), so the synthetic frame renders as "<vtable_receiver>". Match on the bare token so the assertion works on every JFR-consumer rendering. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../datadoghq/profiler/cpu/VtableReceiverFrameTest.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java index a601c231f..3648a7c5f 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java @@ -73,9 +73,13 @@ public void testVtableReceiverFrameInCpuSamples() throws Exception { if (stackTrace != null && stackTrace.contains(".vtable stub()")) { System.err.println("=VTABLE STUB TRACE=\n" + stackTrace + "\n=END="); } + // JMC's STACK_TRACE_STRING HTML-escapes angle brackets in method + // names (it does the same for /), so the synthetic + // method appears as "<vtable_receiver>" in the rendered string. + // Match on the bare token so the test is robust to either form. if (stackTrace != null && stackTrace.contains(".vtable stub()") - && stackTrace.contains("") + && stackTrace.contains("vtable_receiver") && (stackTrace.contains("Circle") || stackTrace.contains("Square") || stackTrace.contains("Triangle"))) { @@ -86,7 +90,7 @@ public void testVtableReceiverFrameInCpuSamples() throws Exception { if (foundVtableWithReceiver) break; } assertTrue(foundVtableWithReceiver, - "No CPU sample contained a vtable stub frame, a synthetic frame, " + + "No CPU sample contained a vtable stub frame, a vtable_receiver synthetic frame, " + "and a receiver class (Circle/Square/Triangle); signal-handler VMSymbol* capture or " + "dump-time SafeAccess resolution in Lookup::resolveVTableReceiver is broken"); }