diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index 5b74df5c3..52b55d78b 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -163,6 +163,13 @@ Error Arguments::parse(const char *args) { if (_cpu < 0) { msg = "cpu must be >= 0"; } + // vtable_target: resolve vtable/itable stub receiver classes in CPU traces. + // Signal handler stores the raw receiver VMSymbol* in a BCI_VTABLE_RECEIVER + // frame (no lock, no map lookup, no allocation). Resolution happens at dump + // time via SafeAccess-protected reads in Lookup::resolveVTableReceiver, + // which is crash-safe against concurrent class unloading. _class_map only + // grows with classes actually sampled during the chunk. + _features.vtable_target = 1; CASE("wall") if (value == NULL) { diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index 29f621de1..9f41ab32c 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -67,6 +67,7 @@ X(REMOTE_SYMBOLICATION_FRAMES, "remote_symbolication_frames") \ X(REMOTE_SYMBOLICATION_LIBS_WITH_BUILD_ID, "remote_symbolication_libs_with_build_id") \ X(REMOTE_SYMBOLICATION_BUILD_ID_CACHE_HITS, "remote_symbolication_build_id_cache_hits") \ + X(VTABLE_RECEIVER_RESOLVE_FAILED, "vtable_receiver_resolve_failed") \ X(THREAD_ENTRY_MARK_DETECTIONS, "thread_entry_mark_detections") \ X(WALKVM_THREAD_INACCESSIBLE, "walkvm_thread_inaccessible") \ X(WALKVM_ANCHOR_NULL, "walkvm_anchor_null") \ diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 789b9debc..45928c0ad 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -359,18 +359,128 @@ void Lookup::fillJavaMethodInfo(MethodInfo *mi, jmethodID method, jni->PopLocalFrame(NULL); } +bool Lookup::resolveVTableReceiver(VMSymbol *sym, char *buf, size_t bufsize, + u32 *out_class_id) { + if (sym == nullptr || !SafeAccess::isReadable(sym)) { + return false; + } + // Read the 4-byte word containing the u2 length field. In all HotSpot + // versions we support the length is at offset 0 of Symbol; we still go + // through VMStructs in case that ever changes. The low 16 bits hold the + // length on little-endian targets (all supported platforms). + int32_t *len_word_addr = + (int32_t *)((char *)sym + VMSymbol::lengthOffset()); + int32_t w1 = SafeAccess::safeFetch32(len_word_addr, -1); + int32_t w2 = SafeAccess::safeFetch32(len_word_addr, 0); + if (w1 == -1 && w2 == 0) { + return false; + } + unsigned len = (unsigned)(w1 & 0xFFFF); + // Bounds: a usable internal class name needs at least 1 byte (single-char + // descriptors like "B"/"C" for primitives never appear as vtable receivers + // because primitives can't be receivers of virtual or interface dispatch). + // Upper bound is the caller-provided buffer; class names above this length + // are dropped — operators see VTABLE_RECEIVER_RESOLVE_FAILED rise. + if (len == 0 || len > bufsize) { + return false; + } + const void *body = (const char *)sym + VMSymbol::bodyOffset(); + if (!SafeAccess::safeCopy(buf, body, len)) { + return false; + } + // Reject anything that doesn't look like a JVM internal class name. + // Valid bytes for slash-separated internal names: '/', '$', '[', ';', '_', + // alnum. Rejecting reduces — but does not eliminate — the case where the + // Symbol slot was reused for unrelated data that happens to be printable. + for (unsigned i = 0; i < len; i++) { + unsigned char c = (unsigned char)buf[i]; + if (c < 0x20 || c >= 0x7F) { + return false; + } + } + u32 class_id = _classes->lookup(buf, len); + // Apply synthetic-accessor/LambdaForm normalisation so that the many + // distinct names HotSpot generates for these families (..Accessor1234, + // LambdaForm$MH/0x...) collapse to one bucket each in the JFR class pool. + // Folding the normalisation inside resolveVTableReceiver keeps the call + // site in resolveMethod minimal and ensures the cache stores normalised + // class ids (so MethodMap deduplication works for these families too). + if (has_prefix_n(buf, len, + "jdk/internal/reflect/GeneratedConstructorAccessor")) { + class_id = + _classes->lookup("jdk/internal/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix_n(buf, len, "sun/reflect/GeneratedConstructorAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedConstructorAccessor"); + } else if (has_prefix_n(buf, len, + "jdk/internal/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("jdk/internal/reflect/GeneratedMethodAccessor"); + } else if (has_prefix_n(buf, len, "sun/reflect/GeneratedMethodAccessor")) { + class_id = _classes->lookup("sun/reflect/GeneratedMethodAccessor"); + } else if (has_prefix_n(buf, len, "java/lang/invoke/LambdaForm$")) { + size_t prefix_len = strlen("java/lang/invoke/LambdaForm$"); + const char *suffix = buf + prefix_len; + size_t suffix_len = len - prefix_len; + if (suffix_len >= 2 && suffix[0] == 'M' && suffix[1] == 'H') { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$MH"); + } else if (suffix_len >= 3 && suffix[0] == 'B' && suffix[1] == 'M' && + suffix[2] == 'H') { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$BMH"); + } else if (suffix_len >= 3 && suffix[0] == 'D' && suffix[1] == 'M' && + suffix[2] == 'H') { + class_id = _classes->lookup("java/lang/invoke/LambdaForm$DMH"); + } + } + *out_class_id = class_id; + return true; +} + +u32 Lookup::resolveVTableReceiverCached(void *sym) { + auto cached = _vtable_receiver_cache.find(sym); + if (cached != _vtable_receiver_cache.end()) { + return cached->second; + } + // Stack buffer sized to fit virtually every real class name. HotSpot + // Symbol length is u2 (max 65535); names beyond 4096 bytes are rare + // (deeply nested LambdaForm signatures, large CGLIB proxies) and are + // recorded as resolve failures via the sentinel below. + char buf[4096]; + u32 class_id = 0; + if (!resolveVTableReceiver((VMSymbol *)sym, buf, sizeof(buf), &class_id)) { + Counters::increment(VTABLE_RECEIVER_RESOLVE_FAILED); + // Explicit sentinel so JFR renders an obvious "we couldn't read it" + // marker instead of an empty class name (which is indistinguishable + // from a parser/encoder error downstream). + class_id = _classes->lookup(""); + } + _vtable_receiver_cache[sym] = class_id; + return class_id; +} + MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { static const char* UNKNOWN = "unknown"; unsigned long key; jint bci = frame.bci; jmethodID method = frame.method_id; + + // BCI_VTABLE_RECEIVER: method holds a VMSymbol* (see vmEntry.h). Resolve + // to a class_id via the per-dump cache once, then key MethodMap by the + // resolved class_id so two distinct Symbol addresses for the same class + // name (class unload + reload within a chunk) collapse to one MethodInfo + // row. + u32 vtable_class_id = 0; + if (bci == BCI_VTABLE_RECEIVER) { + vtable_class_id = resolveVTableReceiverCached((void *)method); + } + if (method == nullptr) { key = MethodMap::makeKey(UNKNOWN); } else if (bci == BCI_ERROR || bci == BCI_NATIVE_FRAME) { key = MethodMap::makeKey(frame.native_function_name); } else if (bci == BCI_NATIVE_FRAME_REMOTE) { key = MethodMap::makeKey(frame.packed_remote_frame); + } else if (bci == BCI_VTABLE_RECEIVER) { + key = MethodMap::makeVTableReceiverKey(vtable_class_id); } else { FrameTypeId frame_type = FrameType::decode(bci); assert(frame_type == FRAME_INTERPRETED || frame_type == FRAME_JIT_COMPILED || @@ -427,6 +537,18 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { TEST_LOG("WARNING: Library lookup failed for index %u", lib_index); fillNativeMethodInfo(mi, "unknown_library", nullptr); } + } else if (bci == BCI_VTABLE_RECEIVER) { + // Synthetic vtable-receiver frame: method_id holds a VMSymbol* + // captured in walkVM. The Symbol -> class_id resolution (with + // synthetic-accessor/LambdaForm normalisation) was already done + // above via resolveVTableReceiverCached, which also handles + // resolution failures by mapping them to "" + // and incrementing VTABLE_RECEIVER_RESOLVE_FAILED. + mi->_class = vtable_class_id; + mi->_name = _symbols.lookup(""); + mi->_sig = _symbols.lookup("()V"); + mi->_type = FRAME_NATIVE; + mi->_is_entry = false; } else { fillJavaMethodInfo(mi, method, first_time); } @@ -435,6 +557,18 @@ MethodInfo *Lookup::resolveMethod(ASGCT_CallFrame &frame) { return mi; } +void Lookup::initClassCache() { + // Snapshot _classes into _class_cache for use by resolveMethod(BCI_ALLOC). + // Must be called before writeStackTraces() so the snapshot covers all + // vtable-receiver classes (pre-registered before profiling starts). + // This snapshot is intentionally NOT used by writeClasses(): regular Java + // classes are inserted into _classes by fillJavaMethodInfo() during + // writeStackTraces/writeMethods, so writeClasses() must re-collect after + // those passes to obtain the complete class pool. + auto guard = Profiler::instance()->classMapSharedGuard(); + _classes->collect(_class_cache); +} + u32 Lookup::getPackage(const char *class_name) { const char *package = strrchr(class_name, '/'); if (package == NULL) { @@ -1196,11 +1330,16 @@ void Recording::writeCpool(Buffer *buf) { // constant pool count - bump each time a new pool is added buf->put8(12); - // classMap() is shared across the dump (this thread) and the JVMTI shared-lock - // writers (Profiler::lookupClass and friends). writeClasses() holds - // classMapSharedGuard() for its full duration; the exclusive classMap()->clear() - // in Profiler::dump runs only after this method returns. + // Two-phase classMap locking: initClassCache() takes the shared lock early to + // snapshot vtable-receiver class names for resolveMethod(BCI_ALLOC). The snapshot + // is valid for the whole writeCpool() call because classMap()->clear() (exclusive + // lock) only runs in Profiler::dump after writeCpool() returns. + // writeClasses() takes the shared lock a second time to collect the COMPLETE class + // set: fillJavaMethodInfo() inserts every Java class into _classes during + // writeStackTraces/writeMethods, so the early snapshot would miss them all and + // produce a class pool with null class names in every stack frame. Lookup lookup(this, &_method_map, Profiler::instance()->classMap()); + lookup.initClassCache(); writeFrameTypes(buf); writeThreadStates(buf); writeExecutionModes(buf); @@ -1413,11 +1552,11 @@ void Recording::writeMethods(Buffer *buf, Lookup *lookup) { void Recording::writeClasses(Buffer *buf, Lookup *lookup) { DEBUG_ASSERT_NOT_IN_SIGNAL(); - std::map classes; // Hold classMapSharedGuard() for the full function. The const char* pointers // stored in classes point into dictionary row storage; clear() frees that // storage under the exclusive lock, so we must not release the shared lock // until we have finished iterating. + std::map classes; auto guard = Profiler::instance()->classMapSharedGuard(); lookup->_classes->collect(classes); diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index b8af773ad..18019e12e 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -8,6 +8,7 @@ #define _FLIGHTRECORDER_H #include +#include #include #include @@ -28,6 +29,8 @@ #include "threadIdTable.h" #include "vmEntry.h" +class VMSymbol; // hotspot/vmStructs.h + const u64 MAX_JLONG = 0x7fffffffffffffffULL; const u64 MIN_JLONG = 0x8000000000000000ULL; const int MAX_JFR_EVENT_SIZE = 256; @@ -115,13 +118,16 @@ class MethodInfo { // 3) Encoded RemoteFrameInfo // The values of the keys are potentially overlapping, so we use // the highest 2 bits to distinguish them. -// 00 - jmethodID -// 10 - void* address -// 01 - RemoteFrameInfo +// Key encoding (top two bits): +// 00 - jmethodID +// 10 - void* address (native frame names) +// 01 - RemoteFrameInfo (packed remote symbolication) +// 11 - vtable_receiver class_id (BCI_VTABLE_RECEIVER frames) class MethodMap : public std::map { public: static constexpr unsigned long ADDRESS_MARK = 0x8000000000000000ULL; static constexpr unsigned long REMOTE_FRAME_MARK = 0x4000000000000000ULL; + static constexpr unsigned long VTABLE_RECEIVER_MARK = ADDRESS_MARK | REMOTE_FRAME_MARK; static constexpr unsigned long KEY_TYPE_MASK = ADDRESS_MARK | REMOTE_FRAME_MARK; MethodMap() {} @@ -142,6 +148,15 @@ class MethodMap : public std::map { unsigned long key = packed_remote_frame; assert((key & KEY_TYPE_MASK) == 0); return (key | REMOTE_FRAME_MARK);} + + // BCI_VTABLE_RECEIVER frames key by the resolved class_id (not by the + // VMSymbol* captured at sample time), so two distinct Symbol addresses + // for the same class name collapse to a single MethodInfo row. + static unsigned long makeVTableReceiverKey(u32 class_id) { + unsigned long key = (unsigned long)class_id; + assert((key & KEY_TYPE_MASK) == 0); + return (key | VTABLE_RECEIVER_MARK); + } }; class Recording { @@ -306,6 +321,14 @@ class Lookup { Recording *_rec; MethodMap *_method_map; Dictionary *_classes; + std::map _class_cache; // snapshot of _classes, populated once at dump time + // Per-dump VMSymbol* -> resolved class_id cache for BCI_VTABLE_RECEIVER + // frames. Two purposes: (1) amortise the SafeAccess work to once per + // distinct Symbol pointer per dump; (2) the resolved class_id is used + // as the MethodMap key, so distinct Symbol* addresses for the same + // class name (class unload/reload mid-chunk) collapse to a single + // MethodInfo row. + std::unordered_map _vtable_receiver_cache; Dictionary _packages; Dictionary _symbols; @@ -318,12 +341,40 @@ class Lookup { bool has_prefix(const char *str, const char *prefix) const { return strncmp(str, prefix, strlen(prefix)) == 0; } + // Length-bounded variant for buffers that may not be NUL-terminated. + bool has_prefix_n(const char *buf, size_t buf_len, const char *prefix) const { + size_t plen = strlen(prefix); + return buf_len >= plen && strncmp(buf, prefix, plen) == 0; + } + + // Resolves a VMSymbol* captured at sample time (BCI_VTABLE_RECEIVER) into a + // class id in _classes, applying the synthetic-accessor/LambdaForm + // normalisation inline. Crash-safe under concurrent class unloading: all + // reads of the Symbol go through SafeAccess (safefetch + bounded copy), so + // a Symbol freed and its page unmapped between sample and dump cannot + // SIGSEGV the dump thread. On success returns true and fills *out_class_id + // with the normalised class id. `buf` is a working area used internally; + // its contents on return are unspecified. + bool resolveVTableReceiver(VMSymbol *sym, char *buf, size_t bufsize, + u32 *out_class_id); + + // Cache wrapper: look up Symbol* in _vtable_receiver_cache; on miss, + // resolve via resolveVTableReceiver and cache the result. On any + // resolution failure (SafeAccess fault, length out of range, non-printable + // bytes) returns the sentinel "" class_id and + // increments VTABLE_RECEIVER_RESOLVE_FAILED. + u32 resolveVTableReceiverCached(void *sym); public: Lookup(Recording *rec, MethodMap *method_map, Dictionary *classes) : _rec(rec), _method_map(method_map), _classes(classes), _packages(), _symbols() {} + // Call once before writeStackTraces. Collects the class-map snapshot under + // the shared lock so that resolveMethod (BCI_ALLOC) and writeClasses can + // both use _class_cache without a second collect. + void initClassCache(); + MethodInfo *resolveMethod(ASGCT_CallFrame &frame); u32 getPackage(const char *class_name); u32 getSymbol(const char *name); diff --git a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp index b63b66e1d..a85e6b053 100644 --- a/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp +++ b/ddprof-lib/src/main/cpp/hotspot/hotspotSupport.cpp @@ -535,22 +535,16 @@ __attribute__((no_sanitize("address"))) int HotspotSupport::walkVM(void* ucontex uintptr_t receiver = frame.jarg0(); if (receiver != 0) { VMSymbol* symbol = VMKlass::fromOop(receiver)->name(); - // walkVM runs in a signal handler. _class_map is mutated - // under _class_map_lock (shared by Profiler::lookupClass - // inserters, exclusive by _class_map.clear() in the dump - // path between unlockAll() and lock()). bounded_lookup - // with size_limit=0 never inserts (no malloc), but it - // still traverses row->next and reads row->keys, which - // clear() concurrently frees. Take the lock shared via - // try-lock; if an exclusive clear() is in progress, drop - // the synthetic frame rather than read freed memory. - auto guard = profiler->classMapTrySharedGuard(); - if (guard.ownsLock()) { - u32 class_id = profiler->classMap()->bounded_lookup( - symbol->body(), symbol->length(), 0); - if (class_id != INT_MAX) { - fillFrame(frames[depth++], BCI_ALLOC, class_id); - } + // Store the raw VMSymbol* in the frame's method_id + // slot. BCI_VTABLE_RECEIVER (vmEntry.h) repurposes + // method_id for this pointer — same precedent as + // BCI_NATIVE_FRAME storing const char* and + // BCI_NATIVE_FRAME_REMOTE storing a packed blob. + // Resolution happens at dump time via SafeAccess so + // a concurrent class-unload + Symbol free cannot + // crash the dump thread (see Lookup::resolveVTableReceiver). + if (symbol != nullptr) { + fillFrame(frames[depth++], BCI_VTABLE_RECEIVER, (void*)symbol); } } } diff --git a/ddprof-lib/src/main/cpp/hotspot/vmStructs.h b/ddprof-lib/src/main/cpp/hotspot/vmStructs.h index 13aea46af..7459aef40 100644 --- a/ddprof-lib/src/main/cpp/hotspot/vmStructs.h +++ b/ddprof-lib/src/main/cpp/hotspot/vmStructs.h @@ -606,6 +606,12 @@ DECLARE(VMSymbol) assert(_symbol_body_offset >= 0); return at(_symbol_body_offset); } + + // Public accessors for safefetch-based dump-time resolution (no `this` + // deref): used to compute the address of the length/body fields without + // touching the Symbol's memory, so callers can probe with SafeAccess. + static int lengthOffset() { return _symbol_length_offset; } + static int bodyOffset() { return _symbol_body_offset; } DECLARE_END DECLARE(VMClassLoaderData) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 0bb66b89b..56e40557f 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include #include @@ -1176,7 +1178,7 @@ void Profiler::check_JDK_8313796_workaround() { Error Profiler::start(Arguments &args, bool reset) { MutexLocker ml(_state_lock); - if (_state > IDLE) { + if (state() > IDLE) { return Error("Profiler already started"); } @@ -1205,6 +1207,21 @@ Error Profiler::start(Arguments &args, bool reset) { return Error("No profiling events specified"); } + // Commit _features before the reset block so any signal-handler code that + // reads _features.* observes the correct enabled state once profiling + // engines start. + _features = args._features; + if (VM::hotspot_version() < 8) { + _features.java_anchor = 0; + _features.gc_traces = 0; + } + if (!VMStructs::hasClassNames()) { + _features.vtable_target = 0; + } + if (!VMStructs::hasCompilerStructs()) { + _features.comp_task = 0; + } + if (reset || _start_time == 0) { // Reset counters. _sample_seq is intentionally not reset: it is a // monotonically increasing uniqueness generator for correlation IDs and @@ -1215,9 +1232,10 @@ Error Profiler::start(Arguments &args, bool reset) { // Reset dictionaries and bitmaps // Reset class map under lock because ObjectSampler may try to use it while // it is being cleaned up - _class_map_lock.lock(); - _class_map.clear(); - _class_map_lock.unlock(); + { + ExclusiveLockGuard guard(&_class_map_lock); + _class_map.clear(); + } // Reset call trace storage if (!_omit_stacktraces) { @@ -1260,17 +1278,6 @@ Error Profiler::start(Arguments &args, bool reset) { // Remote symbolication is now inline in ASGCT_CallFrame // No separate pool allocation needed! - _features = args._features; - if (VM::hotspot_version() < 8) { - _features.java_anchor = 0; - _features.gc_traces = 0; - } - if (!VMStructs::hasClassNames()) { - _features.vtable_target = 0; - } - if (!VMStructs::hasCompilerStructs()) { - _features.comp_task = 0; - } _safe_mode = args._safe_mode; if (VM::hotspot_version() < 8 || VM::isZing()) { _safe_mode |= GC_TRACES | LAST_JAVA_PC; @@ -1406,10 +1413,9 @@ Error Profiler::start(Arguments &args, bool reset) { // TODO: find a better way to resolve the thread name. onThreadStart(nullptr, nullptr, nullptr); - _state = RUNNING; + _state.store(RUNNING, std::memory_order_release); _start_time = time(NULL); __atomic_add_fetch(&_epoch, 1, __ATOMIC_RELAXED); - return Error::OK; } // no engine was activated; perform cleanup @@ -1427,7 +1433,7 @@ Error Profiler::start(Arguments &args, bool reset) { Error Profiler::stop() { MutexLocker ml(_state_lock); - if (_state != RUNNING) { + if (state() != RUNNING) { return Error("Profiler is not active"); } @@ -1503,13 +1509,13 @@ Error Profiler::stop() { // owned by library metadata, so we must keep library patches active until after serialization LibraryPatcher::unpatch_libraries(); - _state = IDLE; + _state.store(IDLE, std::memory_order_release); return Error::OK; } Error Profiler::check(Arguments &args) { MutexLocker ml(_state_lock); - if (_state > IDLE) { + if (state() > IDLE) { return Error("Profiler already started"); } @@ -1546,7 +1552,7 @@ Error Profiler::check(Arguments &args) { Error Profiler::flushJfr() { MutexLocker ml(_state_lock); - if (_state != RUNNING) { + if (state() != RUNNING) { return Error("Profiler is not active"); } @@ -1563,11 +1569,12 @@ Error Profiler::flushJfr() { Error Profiler::dump(const char *path, const int length) { MutexLocker ml(_state_lock); - if (_state != IDLE && _state != RUNNING) { + State cur_state = state(); + if (cur_state != IDLE && cur_state != RUNNING) { return Error("Profiler has not started"); } - if (_state == RUNNING) { + if (cur_state == RUNNING) { std::set thread_ids; // flush the liveness tracker instance and note all the threads referenced // by the live objects @@ -1591,10 +1598,15 @@ Error Profiler::dump(const char *path, const int length) { // in processTraces() already handles clearing old traces while preserving // traces referenced by surviving LivenessTracker objects unlockAll(); - // Reset classmap - _class_map_lock.lock(); - _class_map.clear(); - _class_map_lock.unlock(); + // Clear the class map at end-of-dump. Class IDs are per-chunk in the JFR + // format, so the dump just completed re-populated _classes from the names + // it actually needed; the runtime map can start fresh for the next chunk. + // Working-set bound: only classes touched by the next chunk's samples + // re-enter the map. + { + ExclusiveLockGuard guard(&_class_map_lock); + _class_map.clear(); + } _thread_info.clearAll(thread_ids); _thread_info.reportCounters(); @@ -1657,7 +1669,7 @@ Error Profiler::runInternal(Arguments &args, std::ostream &out) { } case ACTION_STATUS: { MutexLocker ml(_state_lock); - if (_state == RUNNING) { + if (state() == RUNNING) { out << "Profiling is running for " << uptime() << " seconds\n"; } else { out << "Profiler is not active\n"; @@ -1713,7 +1725,7 @@ void Profiler::shutdown(Arguments &args) { MutexLocker ml(_state_lock); // The last chance to dump profile before VM terminates - if (_state == RUNNING) { + if (state() == RUNNING) { args._action = ACTION_STOP; Error error = run(args); if (error) { @@ -1721,7 +1733,7 @@ void Profiler::shutdown(Arguments &args) { } } - _state = TERMINATED; + _state.store(TERMINATED, std::memory_order_release); } int Profiler::lookupClass(const char *key, size_t length) { @@ -1741,7 +1753,7 @@ int Profiler::status(char* status, int max_len) { " CPU Engine : %s\n" " WallClock Engine : %s\n" " Allocations : %s\n", - _state == RUNNING ? "true" : "false", + state() == RUNNING ? "true" : "false", _cpu_engine != nullptr ? _cpu_engine->name() : "None", _wall_engine != nullptr ? _wall_engine->name() : "None", _alloc_engine != nullptr ? _alloc_engine->name() : "None"); diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 62b0b7749..ae61013a4 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -26,6 +26,7 @@ #include "threadInfo.h" #include "trap.h" #include "vmEntry.h" +#include #include #include #include @@ -71,7 +72,7 @@ class alignas(alignof(SpinLock)) Profiler { static volatile bool _need_JDK_8313796_workaround; Mutex _state_lock; - State _state; + std::atomic _state; // class unload hook Trap _class_unload_hook_trap; typedef void (*NotifyClassUnloadedFunc)(void *); @@ -152,9 +153,13 @@ class alignas(alignof(SpinLock)) Profiler { static Profiler *const _instance; + inline State state() const { + return _state.load(std::memory_order_relaxed); + } + public: Profiler() - : _state_lock(), _state(NEW), _class_unload_hook_trap(2), + : _state_lock(), _state(State::NEW), _class_unload_hook_trap(2), _notify_class_unloaded_func(NULL), _thread_info(), _class_map(1), _string_label_map(2), _context_value_map(3), _thread_filter(), _call_trace_storage(), _jfr(), _cpu_engine(NULL), _wall_engine(NULL), @@ -202,6 +207,10 @@ class alignas(alignof(SpinLock)) Profiler { return _features; } + inline bool isRunning() { + return _state.load(std::memory_order_acquire) == RUNNING; + } + u64 total_samples() { return _total_samples; } int max_stack_depth() { return _max_stack_depth; } time_t uptime() { return time(NULL) - _start_time; } @@ -210,7 +219,6 @@ class alignas(alignof(SpinLock)) Profiler { Dictionary *classMap() { return &_class_map; } SharedLockGuard classMapSharedGuard() { return SharedLockGuard(&_class_map_lock); } - BoundedOptionalSharedLockGuard classMapTrySharedGuard() { return BoundedOptionalSharedLockGuard(&_class_map_lock); } Dictionary *stringLabelMap() { return &_string_label_map; } Dictionary *contextValueMap() { return &_context_value_map; } u32 numContextAttributes() { return _num_context_attributes; } diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index 8f1db9a3e..ce650f3c2 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -152,6 +152,59 @@ extern "C" int64_t safefetch64_cont(int64_t* adr, int64_t errValue); #endif #endif +bool SafeAccess::safeCopy(void* dst, const void* src, size_t len) { + // Two-sentinel pattern (same as isReadable): a real-data word may equal + // one sentinel by chance, but not both — if both fetches return their + // sentinel, the access truly faulted. + // + // All safefetch32 loads issued here use 4-byte-aligned addresses. Pages + // are 4 KiB (or 16 KiB on Apple Silicon), both divisible by 4, so an + // aligned 4-byte load never spans a page boundary. The only fault + // possible is when the aligned address itself lies in an unmapped page; + // we never spuriously fault on an over-read past `src + len`. + static const int32_t SENT_A = (int32_t)0x55AA55AA; + static const int32_t SENT_B = (int32_t)0xAA55AA55; + uint8_t* d = (uint8_t*)dst; + const uint8_t* s = (const uint8_t*)src; + size_t i = 0; + + // Front fixup: if `src` is not 4-byte aligned, fetch at the previous + // aligned address (1..3 bytes before src). That address lies in the + // same 4-byte word as src — and since pages are 4-byte aligned, in + // the same page as src. The leading k bytes of the fetched word lie + // before the caller's range and are discarded via the +k offset; they + // never reach `dst`. + size_t k = (uintptr_t)s & 3u; + if (k != 0 && i < len) { + int32_t* aligned = (int32_t*)(s - k); + int32_t v1 = safefetch32_impl(aligned, SENT_A); + int32_t v2 = safefetch32_impl(aligned, SENT_B); + if (v1 == SENT_A && v2 == SENT_B) { + return false; + } + size_t take = (4 - k < len) ? (4 - k) : len; + memcpy(d, ((const uint8_t*)&v1) + k, take); + i = take; + } + + // Middle + tail: (s + i) is now 4-byte aligned. The final iteration may + // load up to 3 over-read bytes past `src + len`, but those bytes sit in + // the same 4-byte-aligned word and therefore the same page as the bytes + // we actually wanted — never a fault from the over-read alone. + while (i < len) { + int32_t* aligned = (int32_t*)(s + i); + int32_t v1 = safefetch32_impl(aligned, SENT_A); + int32_t v2 = safefetch32_impl(aligned, SENT_B); + if (v1 == SENT_A && v2 == SENT_B) { + return false; + } + size_t chunk = (len - i >= 4) ? 4 : (len - i); + memcpy(d + i, &v1, chunk); // memcpy from local — no UAF risk + i += chunk; + } + return true; +} + bool SafeAccess::handle_safefetch(int sig, void* context) { ucontext_t* uc = (ucontext_t*)context; uintptr_t pc = uc->current_pc; diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index b8d8b4177..43ed9ce3a 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -64,6 +64,14 @@ class SafeAccess { return safefetch64_impl(ptr, errorValue); } + // Copies up to len bytes from src to dst using safefetch32_impl so that a + // page-unmap or repurpose of src memory during the copy does not crash the + // process. Returns true on full success, false if any read faulted. dst must + // have at least len bytes capacity; reads from src may over-read up to 3 + // bytes past src+len (over-read is also safefetch-protected). + NOINLINE + static bool safeCopy(void* dst, const void* src, size_t len); + static bool handle_safefetch(int sig, void* context); // NOINLINE functions with stable addresses for JVM patching (vmStructs.cpp) diff --git a/ddprof-lib/src/main/cpp/vmEntry.cpp b/ddprof-lib/src/main/cpp/vmEntry.cpp index aa5a30889..f93edeea9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.cpp +++ b/ddprof-lib/src/main/cpp/vmEntry.cpp @@ -19,6 +19,7 @@ #include "safeAccess.h" #include "hotspot/vmStructs.h" #include "hotspot/jitCodeCache.h" +#include #include #include #include "guards.h" @@ -635,6 +636,11 @@ void VM::loadAllMethodIDs(jvmtiEnv *jvmti, JNIEnv *jni) { } } +void JNICALL VM::ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass) { + loadMethodIDs(jvmti, jni, klass); +} + void JNICALL VM::VMInit(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) { ready(jvmti, jni); loadAllMethodIDs(jvmti, jni); diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index a362fc992..42af173a9 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -34,6 +34,25 @@ enum ASGCT_CallFrameType { BCI_ERROR = -18, // method_id is an error string BCI_NATIVE_FRAME_REMOTE = -19, // method_id points to RemoteFrameInfo for remote symbolication BCI_NATIVE_MALLOC = -20, // native malloc/free sample (size stored in counter) + // method_id holds a VMSymbol* (the receiver class's name Symbol), + // NOT a jmethodID. The pointer is captured in the signal handler + // (hotspotSupport.cpp:walkVM) and resolved at dump time via SafeAccess + // in Lookup::resolveVTableReceiver. Same precedent as BCI_NATIVE_FRAME + // (const char* in method_id) and BCI_NATIVE_FRAME_REMOTE (packed + // 64-bit blob). Any reader iterating frames must check bci BEFORE + // dereferencing method_id as a jmethodID. + // + // Limitation: CallTraceHashTable::calcHash mixes the raw bytes of the + // frames array (including method_id) into the trace id. Two samples + // of the same logical class whose Symbol* address differs (class + // unload + reload within a chunk) produce distinct trace ids; this + // is accepted because normalising at sample time would require an + // in-signal-handler Symbol read, which the redesign explicitly + // avoids. The dump-time MethodMap key is class_id-based (see + // MethodMap::makeKey(u32)), so the synthetic + // MethodInfo collapses across distinct Symbol* addresses even though + // the CallTrace itself does not. + BCI_VTABLE_RECEIVER = -21, }; // See hotspot/src/share/vm/prims/forte.cpp @@ -220,10 +239,8 @@ class VM { // Needed only for AsyncGetCallTrace support } - static void JNICALL ClassPrepare(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread, - jclass klass) { - loadMethodIDs(jvmti, jni, klass); - } + static void JNICALL ClassPrepare(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread, + jclass klass); static jvmtiError JNICALL RedefineClassesHook(jvmtiEnv *jvmti, jint class_count, diff --git a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp index ad2ccb17e..fa5147a2d 100644 --- a/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp +++ b/ddprof-lib/src/test/cpp/dictionary_concurrent_ut.cpp @@ -250,6 +250,41 @@ TEST(DictionaryConcurrent, SignalHandlerBoundedLookupVsDumpClear) { EXPECT_GT(total_clears.load(), 0L); } +// (4a) Single-threaded bulk insert (mimicking the inserts preregisterLoadedClasses +// performs while holding _class_map_lock exclusively in production) followed by +// read-only bounded_lookup(0) on each inserted key. Verifies the pre-registration +// contract: every key inserted via Dictionary::lookup() is subsequently visible +// to bounded_lookup(0). This test takes no external lock — the production +// exclusive-lock protocol is exercised by the other tests in this file. +TEST(DictionaryConcurrent, BulkInsertThenBoundedLookupHitsMirrorInsertedIds) { + Dictionary dict(/*id=*/0); + + constexpr int kBulk = 50; + char keys[kBulk][64]; + unsigned int inserted_ids[kBulk]; + + // Bulk insert — mirrors the per-class lookup() calls preregisterLoadedClasses + // issues from a JVM thread. + for (int i = 0; i < kBulk; i++) { + snprintf(keys[i], sizeof(keys[i]), "java/util/BulkClass%d", i); + inserted_ids[i] = dict.lookup(keys[i], strlen(keys[i])); + ASSERT_NE(0u, inserted_ids[i]); + ASSERT_NE(static_cast(INT_MAX), inserted_ids[i]); + } + + // Read back with bounded_lookup(0) — must return the same id, no malloc. + for (int i = 0; i < kBulk; i++) { + unsigned int found = dict.bounded_lookup(keys[i], strlen(keys[i]), 0); + EXPECT_EQ(inserted_ids[i], found) + << "bounded_lookup returned wrong id for key " << keys[i]; + } + + // A never-inserted key must return INT_MAX. + const char* absent = "java/util/NeverInserted"; + EXPECT_EQ(static_cast(INT_MAX), + dict.bounded_lookup(absent, strlen(absent), 0)); +} + // (4) Same race as (3) but using BoundedOptionalSharedLockGuard, which is the // guard classMapTrySharedGuard() now returns in hotspotSupport.cpp. The bounded // variant may fail spuriously under reader pressure (≤5 CAS attempts); this diff --git a/ddprof-lib/src/test/cpp/safefetch_ut.cpp b/ddprof-lib/src/test/cpp/safefetch_ut.cpp index 0c0fad37e..f1cf4ae7f 100644 --- a/ddprof-lib/src/test/cpp/safefetch_ut.cpp +++ b/ddprof-lib/src/test/cpp/safefetch_ut.cpp @@ -1,8 +1,10 @@ #include #include #include +#include #include #include +#include #include "safeAccess.h" #include "os.h" @@ -157,3 +159,180 @@ TEST_F(SafeFetchTest, mprotectedMemory64) { munmap(page, 4096); } + +// --------------------------------------------------------------------------- +// SafeAccess::safeCopy — bulk variant of safeFetch{32,64} that copies a byte +// range via the safefetch trampoline. Must: +// - return true and copy the bytes exactly when src is fully readable, +// including when [src, src+len) sits within a few bytes of an unmapped +// page boundary (aligned-load strategy keeps over-reads in-page) +// - return false (no crash) when the requested range itself crosses into +// an unmapped page +// - handle unaligned src by fetching at the previous 4-byte aligned +// address and discarding the leading 1..3 bytes +// - never write past dst[len-1] even when len is not a multiple of 4 +// - not mis-classify real data as a fault when it equals one sentinel +// --------------------------------------------------------------------------- + +TEST_F(SafeFetchTest, safeCopy_happyPath) { + const char src[] = "java/lang/Object"; + char dst[sizeof(src)] = {0}; + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, sizeof(src) - 1)); + EXPECT_EQ(0, memcmp(dst, src, sizeof(src) - 1)); +} + +TEST_F(SafeFetchTest, safeCopy_zeroLength) { + // Even if src is NULL, len=0 must be a no-op success. + char dst[8] = {0}; + EXPECT_TRUE(SafeAccess::safeCopy(dst, nullptr, 0)); +} + +TEST_F(SafeFetchTest, safeCopy_shortLength_doesNotOverwriteDst) { + // The internal 4-byte fetch must not overflow dst beyond len bytes. + const char src[] = "AB"; + char dst[8]; + memset(dst, 0x5A, sizeof(dst)); + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, 2)); + EXPECT_EQ('A', dst[0]); + EXPECT_EQ('B', dst[1]); + // Sentinel bytes 2..7 must be untouched. + for (int i = 2; i < 8; i++) { + EXPECT_EQ((char)0x5A, dst[i]) << "dst[" << i << "] was overwritten"; + } +} + +TEST_F(SafeFetchTest, safeCopy_unmappedSource_returnsFalse) { + // Map a page, then unmap it: the address is now firmly invalid. safeCopy + // must return false rather than SIGSEGV. + void* page = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(page, MAP_FAILED); + ASSERT_EQ(0, munmap(page, 4096)); + + char dst[64] = {0}; + EXPECT_FALSE(SafeAccess::safeCopy(dst, page, 32)); +} + +TEST_F(SafeFetchTest, safeCopy_protNoneSource_returnsFalse) { + // mprotect-PROT_NONE the page (similar to mprotectedMemory32). safeCopy + // must return false on the first faulting word, not crash. + void* page = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(page, MAP_FAILED); + memcpy(page, "ignored", 7); + ASSERT_EQ(0, mprotect(page, 4096, PROT_NONE)); + + char dst[64] = {0}; + EXPECT_FALSE(SafeAccess::safeCopy(dst, page, 32)); + + // Restore so munmap can run cleanly. + ASSERT_EQ(0, mprotect(page, 4096, PROT_READ | PROT_WRITE)); + munmap(page, 4096); +} + +TEST_F(SafeFetchTest, safeCopy_tailNearUnmappedBoundary_stillSucceeds) { + // Map two adjacent pages, unmap only the second. Place src so the bytes + // we ask for end inside the mapped page but the (over-read of the) next + // 4-byte word would touch the unmapped page. The aligned-load strategy + // must keep the load within the mapped page → success, not fault. + long page_size = sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + void* region = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(region, MAP_FAILED); + ASSERT_EQ(0, munmap((char*)region + page_size, page_size)); + + char* mapped_end = (char*)region + page_size; + char* src = mapped_end - 2; // 2 bytes from page boundary, k = 2 + src[0] = 'X'; + src[1] = 'Y'; + + char dst[16]; + memset(dst, 0, sizeof(dst)); + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, 2)); + EXPECT_EQ('X', dst[0]); + EXPECT_EQ('Y', dst[1]); + + munmap(region, page_size); +} + +TEST_F(SafeFetchTest, safeCopy_requestedRangeCrossesUnmappedPage_returnsFalse) { + // Distinct from the case above: here the *requested* range itself + // crosses into the unmapped page. safeCopy must legitimately fault + // when it can't read all the bytes the caller asked for. + long page_size = sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + void* region = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(region, MAP_FAILED); + ASSERT_EQ(0, munmap((char*)region + page_size, page_size)); + + char* mapped_end = (char*)region + page_size; + char* src = mapped_end - 2; + src[0] = 'X'; + src[1] = 'Y'; + + // Asking for 8 bytes pushes 6 bytes into the unmapped page → must fault. + char dst[16] = {0}; + EXPECT_FALSE(SafeAccess::safeCopy(dst, src, 8)); + + munmap(region, page_size); +} + +TEST_F(SafeFetchTest, safeCopy_unalignedSource_allMisalignments) { + // The front fixup must correctly extract leading bytes from the + // previous-aligned-word fetch for every misalignment k ∈ {1, 2, 3}. + static const char kSentinel[] = "ABCDEFGHIJKLMNOP"; // 16 bytes + // Use a 4-byte-aligned buffer so we can shift src forward by k. + alignas(4) char buf[32]; + memcpy(buf + 4, kSentinel, 16); // place payload at aligned offset 4 + + for (size_t k = 1; k <= 3; k++) { + const char* src = buf + 4 + k; // misaligned by k + size_t len = 16 - k; // copy the rest of the payload + char dst[16]; + memset(dst, 0, sizeof(dst)); + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, len)) << "k=" << k; + EXPECT_EQ(0, memcmp(dst, kSentinel + k, len)) << "k=" << k; + } +} + +TEST_F(SafeFetchTest, safeCopy_unalignedShortAtPageEnd_stillSucceeds) { + // Combine misalignment with proximity to an unmapped boundary: src is + // misaligned AND only a few bytes from the end of the mapped page. + // The front fixup reads backward (into the same page) → success. + long page_size = sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + void* region = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(region, MAP_FAILED); + ASSERT_EQ(0, munmap((char*)region + page_size, page_size)); + + char* mapped_end = (char*)region + page_size; + // mapped_end is 4-byte aligned (pages are 4 KiB-aligned). Place src + // 3 bytes back from the boundary so k = 1 and only 3 bytes are wanted. + char* src = mapped_end - 3; + src[0] = 'P'; + src[1] = 'Q'; + src[2] = 'R'; + + char dst[8] = {0}; + EXPECT_TRUE(SafeAccess::safeCopy(dst, src, 3)); + EXPECT_EQ('P', dst[0]); + EXPECT_EQ('Q', dst[1]); + EXPECT_EQ('R', dst[2]); + + munmap(region, page_size); +} + +TEST_F(SafeFetchTest, safeCopy_dataMatchingSingleSentinel_stillSucceeds) { + // The two-sentinel pattern must not mis-classify real data that happens + // to equal one of the sentinels. SENT_A is 0x55AA55AA. + uint32_t real_data = 0x55AA55AA; + char dst[4]; + ASSERT_TRUE(SafeAccess::safeCopy(dst, &real_data, 4)); + EXPECT_EQ(0, memcmp(dst, &real_data, 4)); +} diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java new file mode 100644 index 000000000..3648a7c5f --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/VtableReceiverFrameTest.java @@ -0,0 +1,97 @@ +package com.datadoghq.profiler.cpu; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junitpioneer.jupiter.RetryingTest; +import org.openjdk.jmc.common.item.IItem; +import org.openjdk.jmc.common.item.IItemCollection; +import org.openjdk.jmc.common.item.IItemIterable; +import org.openjdk.jmc.common.item.IMemberAccessor; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import java.util.concurrent.ThreadLocalRandom; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class VtableReceiverFrameTest extends AbstractProfilerTest { + + @Override + protected String getProfilerCommand() { + return "cpu=1ms"; + } + + abstract static class Shape { + abstract int area(); + } + + // Three implementations force megamorphic vtable dispatch (JIT won't inline). + // ThreadLocalRandom bodies ensure each variant is non-trivial and CPU-bound. + static class Circle extends Shape { + @Override public int area() { return ThreadLocalRandom.current().nextInt() | 1; } + } + + static class Square extends Shape { + @Override public int area() { return ThreadLocalRandom.current().nextInt() | 2; } + } + + static class Triangle extends Shape { + @Override public int area() { return ThreadLocalRandom.current().nextInt() | 4; } + } + + private int profiledWork(Shape... shapes) { + int result = 0; + for (int i = 0; i < 10_000_000; i++) { + for (Shape shape : shapes) { + result += shape.area(); + } + } + return result; + } + + // The vtable_target feature inserts a synthetic frame immediately + // below a vtable stub frame in the call stack. The receiver class (Circle/Square/Triangle) + // is captured as a VMSymbol* in the signal handler and resolved to a class name at + // dump time via SafeAccess-protected reads. If resolution fails or the synthetic frame + // is dropped, the receiver class name will not appear next to a vtable stub in JFR. + @RetryingTest(5) + public void testVtableReceiverFrameInCpuSamples() throws Exception { + Assumptions.assumeFalse(Platform.isZing() || Platform.isJ9()); + waitForProfilerReady(2000); + int result = profiledWork(new Circle(), new Square(), new Triangle()); + System.err.println(result); + stopProfiler(); + + IItemCollection events = verifyEvents("datadog.ExecutionSample"); + boolean foundVtableWithReceiver = false; + for (IItemIterable cpuSamples : events) { + IMemberAccessor frameAccessor = + JdkAttributes.STACK_TRACE_STRING.getAccessor(cpuSamples.getType()); + if (frameAccessor == null) continue; + for (IItem sample : cpuSamples) { + String stackTrace = frameAccessor.getMember(sample); + if (stackTrace != null && stackTrace.contains(".vtable stub()")) { + System.err.println("=VTABLE STUB TRACE=\n" + stackTrace + "\n=END="); + } + // JMC's STACK_TRACE_STRING HTML-escapes angle brackets in method + // names (it does the same for /), so the synthetic + // method appears as "<vtable_receiver>" in the rendered string. + // Match on the bare token so the test is robust to either form. + if (stackTrace != null + && stackTrace.contains(".vtable stub()") + && stackTrace.contains("vtable_receiver") + && (stackTrace.contains("Circle") + || stackTrace.contains("Square") + || stackTrace.contains("Triangle"))) { + foundVtableWithReceiver = true; + break; + } + } + if (foundVtableWithReceiver) break; + } + assertTrue(foundVtableWithReceiver, + "No CPU sample contained a vtable stub frame, a vtable_receiver synthetic frame, " + + "and a receiver class (Circle/Square/Triangle); signal-handler VMSymbol* capture or " + + "dump-time SafeAccess resolution in Lookup::resolveVTableReceiver is broken"); + } +}