Skip to content

Commit 38e1e9c

Browse files
Merge branch 'main' into fix-150075-tar-addfile-no-offsets
2 parents 33533a1 + f621ba1 commit 38e1e9c

23 files changed

Lines changed: 932 additions & 140 deletions

Lib/profiling/sampling/jsonl_collector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ def export(self, filename):
164164
self._iter_final_agg_entries(),
165165
)
166166
self._write_message(output, self._build_end_record())
167+
print(f"JSONL profile written to {filename}")
167168

168169
def _build_meta_record(self):
169170
record = {

Lib/profiling/sampling/sample.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,33 @@ def _print_unwinder_stats(self):
327327
print(f" Hits: {code_hits:n} ({ANSIColors.GREEN}{fmt(code_hits_pct)}%{ANSIColors.RESET})")
328328
print(f" Misses: {code_misses:n} ({ANSIColors.RED}{fmt(code_misses_pct)}%{ANSIColors.RESET})")
329329

330+
batched_attempts = stats.get('batched_read_attempts', 0)
331+
batched_successes = stats.get('batched_read_successes', 0)
332+
batched_misses = stats.get('batched_read_misses', 0)
333+
segments_requested = stats.get('batched_read_segments_requested', 0)
334+
segments_completed = stats.get('batched_read_segments_completed', 0)
335+
if batched_attempts > 0:
336+
batched_success_rate = stats.get('batched_read_success_rate', 0.0)
337+
batched_miss_rate = 100.0 - batched_success_rate
338+
segment_completion_rate = stats.get(
339+
'batched_read_segment_completion_rate', 0.0
340+
)
341+
342+
print(f" {ANSIColors.CYAN}Batched Reads:{ANSIColors.RESET}")
343+
print(f" Attempts: {batched_attempts:n}")
344+
print(
345+
f" Successes: {batched_successes:n} "
346+
f"({ANSIColors.GREEN}{fmt(batched_success_rate)}%{ANSIColors.RESET})"
347+
)
348+
print(
349+
f" Misses: {batched_misses:n} "
350+
f"({ANSIColors.RED}{fmt(batched_miss_rate)}%{ANSIColors.RESET})"
351+
)
352+
print(
353+
f" Segments read: {segments_completed:n}/{segments_requested:n} "
354+
f"({ANSIColors.GREEN}{fmt(segment_completion_rate)}%{ANSIColors.RESET})"
355+
)
356+
330357
# Memory operations
331358
memory_reads = stats.get('memory_reads', 0)
332359
memory_bytes = stats.get('memory_bytes_read', 0)

Lib/test/test_dict.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,10 +1903,35 @@ def test_hash(self):
19031903
self.assertEqual(hash(frozendict(x=1, y=2)),
19041904
hash(frozendict(y=2, x=1)))
19051905

1906+
# Check that hash() computes the hash of (key, value) pairs
1907+
cases = [
1908+
frozendict(a=False, b=True, c=True),
1909+
frozendict(a=True, b=False, c=True),
1910+
frozendict(a=True, b=True, c=False),
1911+
frozendict({False: "a", "b": True, "c": True}),
1912+
frozendict({"a": "b", False: True, True: "c"}),
1913+
]
1914+
hashes = {hash(fd) for fd in cases}
1915+
self.assertEqual(len(hashes), len(cases))
1916+
19061917
fd = frozendict(x=[1], y=[2])
19071918
with self.assertRaisesRegex(TypeError, "unhashable type: 'list'"):
19081919
hash(fd)
19091920

1921+
@support.cpython_only
1922+
def test_hash_cpython(self):
1923+
# Check that hash(frozendict) implementation is:
1924+
# hash(frozenset(fd.items()))
1925+
for fd in (
1926+
frozendict(),
1927+
frozendict(x=1, y=2),
1928+
frozendict(y=2, x=1),
1929+
frozendict(a=False, b=True, c=True),
1930+
frozendict.fromkeys('abc'),
1931+
):
1932+
with self.subTest(fd=fd):
1933+
self.assertEqual(hash(fd), hash(frozenset(fd.items())))
1934+
19101935
def test_fromkeys(self):
19111936
self.assertEqual(frozendict.fromkeys('abc'),
19121937
frozendict(a=None, b=None, c=None))

Lib/test/test_external_inspection.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3767,6 +3767,13 @@ def test_get_stats(self):
37673767
"frames_read_from_cache",
37683768
"frames_read_from_memory",
37693769
"frame_cache_hit_rate",
3770+
"batched_read_attempts",
3771+
"batched_read_successes",
3772+
"batched_read_misses",
3773+
"batched_read_segments_requested",
3774+
"batched_read_segments_completed",
3775+
"batched_read_success_rate",
3776+
"batched_read_segment_completion_rate",
37703777
]
37713778
for key in expected_keys:
37723779
self.assertIn(key, stats)

Lib/test/test_genericalias.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,14 @@
5555
from unittest.case import _AssertRaisesContext
5656
from queue import Queue, SimpleQueue
5757
from weakref import WeakSet, ReferenceType, ref
58-
import typing
59-
from typing import Unpack
6058
try:
6159
from tkinter import Event
6260
except ImportError:
6361
Event = None
6462
from string.templatelib import Template, Interpolation
6563

66-
from typing import TypeVar
64+
import typing
65+
from typing import TypeVar, Unpack
6766
T = TypeVar('T')
6867
K = TypeVar('K')
6968
V = TypeVar('V')
@@ -621,6 +620,14 @@ def test_nested_paramspec_specialization(self):
621620
self.assertEqual(deeply_nested_specialized.__args__, ([str, [float], int], float))
622621
self.assertEqual(deeply_nested_specialized.__parameters__, ())
623622

623+
def test_gh150146(self):
624+
# It used to crash:
625+
for container in [memoryview, list, tuple]:
626+
with self.subTest(container=container):
627+
x = container[TypeVar("")]
628+
with self.assertRaises(TypeError):
629+
x[*typing.Mapping[..., ...]]
630+
624631

625632
class TypeIterationTests(unittest.TestCase):
626633
_UNITERABLE_TYPES = (list, tuple)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
On Linux when Python is linked to the musl C library, use a thread stack
2+
size of at least 1 MiB instead of musl default which is 128 kiB. Patch by
3+
Victor Stinner.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix ``hash(frozendict)``: compute the hash of each ``(key, value)`` pair
2+
correctly. Patch by Victor Stinner.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a crash on a complex type variable substitution.
2+
3+
``from typing import TypeVar; memoryview[TypeVar("")][*typing.Mapping[...,
4+
...]]`` used to fail due to missing ``NULL`` check on ``_unpack_args`` C
5+
function call.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix excessive overhead in the Tachyon profiler when inspecting a remote
2+
process by avoiding repeated remote page-cache scans, batching predicted
3+
remote reads, and reusing cached profiler result objects. Patch by Pablo
4+
Galindo and Maurycy Pawłowski-Wieroński.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern "C" {
3030
#include "internal/pycore_llist.h" // struct llist_node
3131
#include "internal/pycore_long.h" // _PyLong_GetZero
3232
#include "internal/pycore_pyerrors.h" // _PyErr_FormatFromCause
33+
#include "internal/pycore_pyhash.h" // _Py_HashPointerRaw
3334
#include "internal/pycore_stackref.h" // Py_TAG_BITS
3435
#include "../../Python/remote_debug.h"
3536

@@ -215,6 +216,8 @@ typedef struct {
215216
PyObject *file_name;
216217
int first_lineno;
217218
PyObject *linetable; // bytes
219+
PyObject *last_frame_info;
220+
ptrdiff_t last_addrq;
218221
uintptr_t addr_code_adaptive;
219222
} CachedCodeMetadata;
220223

@@ -224,11 +227,41 @@ typedef struct {
224227

225228
typedef struct {
226229
uint64_t thread_id; // 0 = empty slot
230+
uintptr_t thread_state_addr;
227231
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
228232
Py_ssize_t num_addrs;
233+
PyObject *thread_id_obj; // owned reference, NULL if empty
229234
PyObject *frame_list; // owned reference, NULL if empty
230235
} FrameCacheEntry;
231236

237+
#define INTERPRETER_THREAD_CACHE_SIZE 32
238+
#if (INTERPRETER_THREAD_CACHE_SIZE & (INTERPRETER_THREAD_CACHE_SIZE - 1)) != 0
239+
# error "INTERPRETER_THREAD_CACHE_SIZE must be a power of two"
240+
#endif
241+
242+
// The two per-interpreter L2 caches below are split into per-field tables so
243+
// that a writer rebinding one slot cannot leave stale data in a field owned by
244+
// the other when the slot is reused across interpreters.
245+
typedef struct {
246+
uintptr_t interpreter_addr;
247+
uintptr_t thread_state_addr;
248+
} InterpreterTstateCacheEntry;
249+
typedef struct {
250+
uintptr_t interpreter_addr;
251+
uint64_t code_object_generation;
252+
} InterpreterGenerationCacheEntry;
253+
254+
// Carries already-read thread state and/or frame buffers across helpers so the
255+
// downstream callee can skip a remote read. Address fields are caller-supplied
256+
// inputs; buffer pointers (tstate, frame) are NULL unless a prior batched read
257+
// successfully populated them.
258+
typedef struct {
259+
const char *tstate;
260+
uintptr_t tstate_addr;
261+
const char *frame;
262+
uintptr_t frame_addr;
263+
} RemoteReadPrefetch;
264+
232265
/* Statistics for profiling performance analysis */
233266
typedef struct {
234267
uint64_t total_samples; // Total number of get_stack_trace calls
@@ -242,14 +275,44 @@ typedef struct {
242275
uint64_t code_object_cache_hits; // Code object cache hits
243276
uint64_t code_object_cache_misses; // Code object cache misses
244277
uint64_t stale_cache_invalidations; // Times stale entries were cleared
278+
uint64_t batched_read_attempts; // Batched remote-read attempts
279+
uint64_t batched_read_successes; // Attempts that read all requested segments
280+
uint64_t batched_read_misses; // Attempts that fell back or partially read
281+
uint64_t batched_read_segments_requested; // Segments requested by batched reads
282+
uint64_t batched_read_segments_completed; // Segments completed by batched reads
245283
} UnwinderStats;
246284

285+
#if defined(__GNUC__) || defined(__clang__)
286+
# define REMOTE_DEBUG_UNLIKELY(value) __builtin_expect(!!(value), 0)
287+
#else
288+
# define REMOTE_DEBUG_UNLIKELY(value) (value)
289+
#endif
290+
247291
/* Stats tracking macros - no-op when stats collection is disabled */
248292
#define STATS_INC(unwinder, field) \
249-
do { if ((unwinder)->collect_stats) (unwinder)->stats.field++; } while(0)
293+
do { if (REMOTE_DEBUG_UNLIKELY((unwinder)->collect_stats)) (unwinder)->stats.field++; } while(0)
250294

251295
#define STATS_ADD(unwinder, field, val) \
252-
do { if ((unwinder)->collect_stats) (unwinder)->stats.field += (val); } while(0)
296+
do { if (REMOTE_DEBUG_UNLIKELY((unwinder)->collect_stats)) (unwinder)->stats.field += (val); } while(0)
297+
298+
#if HAVE_PROCESS_VM_READV
299+
# define STATS_BATCHED_READ(unwinder, requested, completed) \
300+
do { \
301+
if (REMOTE_DEBUG_UNLIKELY((unwinder)->collect_stats)) { \
302+
(unwinder)->stats.batched_read_attempts++; \
303+
(unwinder)->stats.batched_read_segments_requested += (uint64_t)(requested); \
304+
(unwinder)->stats.batched_read_segments_completed += (uint64_t)(completed); \
305+
if ((completed) == (requested)) { \
306+
(unwinder)->stats.batched_read_successes++; \
307+
} \
308+
else { \
309+
(unwinder)->stats.batched_read_misses++; \
310+
} \
311+
} \
312+
} while(0)
313+
#else
314+
# define STATS_BATCHED_READ(unwinder, requested, completed) ((void)0)
315+
#endif
253316

254317
typedef struct {
255318
PyTypeObject *RemoteDebugging_Type;
@@ -290,7 +353,6 @@ typedef struct {
290353
struct _Py_AsyncioModuleDebugOffsets async_debug_offsets;
291354
uintptr_t interpreter_addr;
292355
uintptr_t tstate_addr;
293-
uint64_t code_object_generation;
294356
_Py_hashtable_t *code_object_cache;
295357
int debug;
296358
int only_active_thread;
@@ -302,9 +364,17 @@ typedef struct {
302364
int cache_frames;
303365
int collect_stats; // whether to collect statistics
304366
uint32_t stale_invalidation_counter; // counter for throttling frame_cache_invalidate_stale
367+
// L1 single-entry shortcut over cached_tstates[]: most workloads sample one
368+
// interpreter, so check these pairs before hashing into the table below.
369+
uintptr_t cached_tstate_interpreter_addr;
370+
uintptr_t cached_tstate_addr;
371+
uintptr_t cached_generation_interpreter_addr;
372+
uint64_t cached_code_object_generation;
305373
RemoteDebuggingState *cached_state;
306374
FrameCacheEntry *frame_cache; // preallocated array of FRAME_CACHE_MAX_THREADS entries
307375
UnwinderStats stats; // statistics for performance analysis
376+
InterpreterTstateCacheEntry cached_tstates[INTERPRETER_THREAD_CACHE_SIZE];
377+
InterpreterGenerationCacheEntry cached_generations[INTERPRETER_THREAD_CACHE_SIZE];
308378
#ifdef Py_GIL_DISABLED
309379
uint32_t tlbc_generation;
310380
_Py_hashtable_t *tlbc_cache;
@@ -361,11 +431,13 @@ typedef struct {
361431
typedef struct {
362432
/* Inputs */
363433
uintptr_t frame_addr; // Starting frame address
434+
uintptr_t thread_state_addr; // Owning thread state address
364435
uintptr_t base_frame_addr; // Sentinel at bottom (for validation)
365436
uintptr_t gc_frame; // GC frame address (0 if not tracking)
366437
uintptr_t last_profiled_frame; // Last cached frame (0 if no cache)
367438
StackChunkList *chunks; // Pre-copied stack chunks
368439
int skip_first_frame; // Skip frame_addr itself (continue from its caller)
440+
RemoteReadPrefetch prefetch; // Optional already-read thread/frame buffers
369441

370442
/* Outputs */
371443
PyObject *frame_info; // List to append FrameInfo objects
@@ -548,6 +620,7 @@ extern int process_frame_chain(
548620
extern int frame_cache_init(RemoteUnwinderObject *unwinder);
549621
extern void frame_cache_cleanup(RemoteUnwinderObject *unwinder);
550622
extern FrameCacheEntry *frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id);
623+
extern FrameCacheEntry *frame_cache_find_by_tstate(RemoteUnwinderObject *unwinder, uintptr_t tstate_addr);
551624
extern int clear_last_profiled_frames(RemoteUnwinderObject *unwinder);
552625
extern void frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result);
553626
extern int frame_cache_lookup_and_extend(
@@ -566,6 +639,7 @@ extern int frame_cache_store(
566639
PyObject *frame_list,
567640
const uintptr_t *addrs,
568641
Py_ssize_t num_addrs,
642+
uintptr_t thread_state_addr,
569643
uintptr_t base_frame_addr,
570644
uintptr_t last_frame_visited);
571645

@@ -605,7 +679,8 @@ extern PyObject* unwind_stack_for_thread(
605679
uintptr_t *current_tstate,
606680
uintptr_t gil_holder_tstate,
607681
uintptr_t gc_frame,
608-
uintptr_t main_thread_tstate
682+
uintptr_t main_thread_tstate,
683+
const RemoteReadPrefetch *prefetch
609684
);
610685

611686
/* Thread stopping functions (for blocking mode) */

0 commit comments

Comments
 (0)