From 36ffc5cb185249cce8665f8c6d6836352ee03a45 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Fri, 15 May 2026 18:26:18 +0530 Subject: [PATCH 1/7] implement per type method cache --- Include/cpython/object.h | 2 + Include/internal/pycore_interp_structs.h | 23 +-- Include/internal/pycore_object.h | 2 - Include/internal/pycore_typecache.h | 44 +++++ Lib/test/test_free_threading/test_type.py | 18 ++ Lib/test/test_sys.py | 2 +- Makefile.pre.in | 2 + Objects/typeobject.c | 209 +++------------------- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/pystate.c | 3 +- Python/typecache.c | 186 +++++++++++++++++++ Tools/ftscalingbench/ftscalingbench.py | 16 ++ 15 files changed, 314 insertions(+), 205 deletions(-) create mode 100644 Include/internal/pycore_typecache.h create mode 100644 Python/typecache.c diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 326254c335b489..4c5a677e5543ec 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -246,6 +246,8 @@ struct _typeobject { * This function must escape to any code that can result in * the GC being run, such as Py_DECREF. */ _Py_iteritemfunc _tp_iteritem; + + void *_tp_cache; }; #define _Py_ATTR_CACHE_UNUSED (30000) // (see tp_versions_used) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f13bc2178b1e7e..1c0ea07d2843bc 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -548,23 +548,6 @@ struct _types_runtime_state { }; -// Type attribute lookup cache: speed up attribute and method lookups, -// see _PyType_Lookup(). -struct type_cache_entry { - unsigned int version; // initialized from type->tp_version_tag -#ifdef Py_GIL_DISABLED - _PySeqLock sequence; -#endif - PyObject *name; // reference to exactly a str or None - PyObject *value; // borrowed reference or NULL -}; - -#define MCACHE_SIZE_EXP 12 - -struct type_cache { - struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP]; -}; - typedef struct { PyTypeObject *type; int isbuiltin; @@ -579,6 +562,10 @@ typedef struct { are also some diagnostic uses for the list of weakrefs, so we still keep it. */ PyObject *tp_weaklist; + /* Per-interpreter attribute lookup cache (struct type_cache *). + For static builtin types the cache must be per-interpreter + because tp_dict and the values it stores are per-interpreter. */ + void *_tp_cache; } managed_static_type_state; #define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ @@ -589,8 +576,6 @@ struct types_state { where all those lower numbers are used for core static types. */ unsigned int next_version_tag; - struct type_cache type_cache; - /* Every static builtin type is initialized for each interpreter during its own initialization, including for the main interpreter during global runtime initialization. This is done by calling diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c2c508c1a71c5c..8fa3b47b6c312a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -291,8 +291,6 @@ _PyType_HasFeature(PyTypeObject *type, unsigned long feature) { return ((type->tp_flags) & feature) != 0; } -extern void _PyType_InitCache(PyInterpreterState *interp); - extern PyStatus _PyObject_InitState(PyInterpreterState *interp); extern void _PyObject_FiniState(PyInterpreterState *interp); extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); diff --git a/Include/internal/pycore_typecache.h b/Include/internal/pycore_typecache.h new file mode 100644 index 00000000000000..2af68c20447656 --- /dev/null +++ b/Include/internal/pycore_typecache.h @@ -0,0 +1,44 @@ +#ifndef PY_INTERNAL_TYPECACHE_H +#define PY_INTERNAL_TYPECACHE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_stackref.h" + + +#define _Py_TYPECACHE_MINSIZE 8 + +struct type_cache_entry { + PyObject *name; + PyObject *value; +}; + +struct type_cache { + uint32_t mask; + uint32_t version_tag; + uint32_t available; + uint32_t used; + struct type_cache_entry hashtable[0]; +}; + +struct _PyTypeCacheLookupResult { + _PyStackRef value; + int cache_hit; + uint32_t version_tag; +}; + + +extern void _PyTypeCache_InitType(PyTypeObject *type); +extern void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value); +extern struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject *name); +extern void _PyTypeCache_Invalidate(PyTypeObject *type); + +#ifdef __cplusplus +} +#endif +#endif /* PY_INTERNAL_TYPECACHE_H */ diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 1255d842dbff48..f7bacab00846b9 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -84,6 +84,24 @@ def reader_func(): self.run_one(writer_func, reader_func) + def test_attr_cache_mortal(self): + class C: + x = object() + + class D(C): + pass + + def writer_func(): + for _ in range(3000): + C.x = object() + + def reader_func(): + for _ in range(3000): + C.x + D.x + + self.run_one(writer_func, reader_func) + def test___class___modification(self): loops = 200 diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 02c70403185f60..75347f59f4adf4 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1788,7 +1788,7 @@ def delx(self): del self.__x check((1,2,3), vsize('') + self.P + 3*self.P) # type # static type: PyTypeObject - fmt = 'P2nPI13Pl4Pn9Pn12PI2Pc' + fmt = 'P2nPI13Pl4Pn9Pn12PI2PcP' s = vsize(fmt) check(int, s) typeid = 'n' if support.Py_GIL_DISABLED else '' diff --git a/Makefile.pre.in b/Makefile.pre.in index 9435bf534fb512..eebcf45e442e88 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -506,6 +506,7 @@ PYTHON_OBJS= \ Python/thread.o \ Python/traceback.o \ Python/tracemalloc.o \ + Python/typecache.o \ Python/uniqueid.o \ Python/getopt.o \ Python/pystrcmp.o \ @@ -1411,6 +1412,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_tracemalloc.h \ $(srcdir)/Include/internal/pycore_tstate.h \ $(srcdir)/Include/internal/pycore_tuple.h \ + $(srcdir)/Include/internal/pycore_typecache.h \ $(srcdir)/Include/internal/pycore_typedefs.h \ $(srcdir)/Include/internal/pycore_typeobject.h \ $(srcdir)/Include/internal/pycore_typevarobject.h \ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7cca137f74be58..1faa30ea2ae8ae 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -21,7 +21,8 @@ #include "pycore_slots.h" // _PySlotIterator_Init #include "pycore_symtable.h" // _Py_Mangle() #include "pycore_tuple.h" // _PyTuple_FromPair -#include "pycore_typeobject.h" // struct type_cache +#include "pycore_typecache.h" // _PyTypeCache_Lookup() +#include "pycore_typeobject.h" // _PyTypes_InitTypes() #include "pycore_unicodeobject.h" // _PyUnicode_Copy #include "pycore_unionobject.h" // _Py_union_type_or #include "pycore_weakref.h" // _PyWeakref_GET_REF() @@ -41,21 +42,7 @@ class object "PyObject *" "&PyBaseObject_Type" /* Support type attribute lookup cache */ -/* The cache can keep references to the names alive for longer than - they normally would. This is why the maximum size is limited to - MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large - strings are used as attribute names. */ -#define MCACHE_MAX_ATTR_SIZE 100 -#define MCACHE_HASH(version, name_hash) \ - (((unsigned int)(version) ^ (unsigned int)(name_hash)) \ - & ((1 << MCACHE_SIZE_EXP) - 1)) - -#define MCACHE_HASH_METHOD(type, name) \ - MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \ - ((Py_ssize_t)(name)) >> 3) -#define MCACHE_CACHEABLE_NAME(name) \ - (PyUnicode_CheckExact(name) && \ - (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE)) +#define MCACHE_CACHEABLE_NAME(name) (PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)) #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag @@ -969,75 +956,18 @@ _PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_d } -static struct type_cache* -get_type_cache(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->types.type_cache; -} - - -static void -type_cache_clear(struct type_cache *cache, PyObject *value) -{ - for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - struct type_cache_entry *entry = &cache->hashtable[i]; -#ifdef Py_GIL_DISABLED - _PySeqLock_LockWrite(&entry->sequence); -#endif - entry->version = 0; - Py_XSETREF(entry->name, _Py_XNewRef(value)); - entry->value = NULL; -#ifdef Py_GIL_DISABLED - _PySeqLock_UnlockWrite(&entry->sequence); -#endif - } -} - - -void -_PyType_InitCache(PyInterpreterState *interp) -{ - struct type_cache *cache = &interp->types.type_cache; - for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - struct type_cache_entry *entry = &cache->hashtable[i]; - assert(entry->name == NULL); - - entry->version = 0; - // Set to None so _PyType_LookupRef() can use Py_SETREF(), - // rather than using slower Py_XSETREF(). - entry->name = Py_None; - entry->value = NULL; - } -} - - -static unsigned int -_PyType_ClearCache(PyInterpreterState *interp) -{ - struct type_cache *cache = &interp->types.type_cache; - // Set to None, rather than NULL, so _PyType_LookupRef() can - // use Py_SETREF() rather than using slower Py_XSETREF(). - type_cache_clear(cache, Py_None); - - return NEXT_VERSION_TAG(interp) - 1; -} - - unsigned int PyType_ClearCache(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - return _PyType_ClearCache(interp); + + return NEXT_VERSION_TAG(interp) - 1; } void _PyTypes_Fini(PyInterpreterState *interp) { - struct type_cache *cache = &interp->types.type_cache; - type_cache_clear(cache, NULL); - // All the managed static types should have been finalized already. assert(interp->types.for_extensions.num_initialized == 0); for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { @@ -1231,6 +1161,7 @@ type_modified_unlocked(PyTypeObject *type) } set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + _PyTypeCache_Invalidate(type); if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the // comment on struct _specialization_cache): @@ -1314,6 +1245,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + _PyTypeCache_Invalidate(type); type->tp_versions_used = _Py_ATTR_CACHE_UNUSED; if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the @@ -6197,67 +6129,9 @@ is_dunder_name(PyObject *name) return 0; } -static PyObject * -update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int version_tag, PyObject *value) -{ - _Py_atomic_store_ptr_relaxed(&entry->value, value); /* borrowed */ - assert(PyUnstable_Unicode_GET_CACHED_HASH(name) != -1); - OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); - // We're releasing this under the lock for simplicity sake because it's always a - // exact unicode object or Py_None so it's safe to do so. - PyObject *old_name = entry->name; - _Py_atomic_store_ptr_relaxed(&entry->name, Py_NewRef(name)); - // We must write the version last to avoid _Py_TryXGetStackRef() - // operating on an invalid (already deallocated) value inside - // _PyType_LookupRefAndVersion(). If we write the version first then a - // reader could pass the "entry_version == type_version" check but could - // be using the old entry value. - _Py_atomic_store_uint32_release(&entry->version, version_tag); - return old_name; -} - -#if Py_GIL_DISABLED - -static void -update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, - unsigned int version_tag, PyObject *value) -{ - _PySeqLock_LockWrite(&entry->sequence); - - // update the entry - if (entry->name == name && - entry->value == value && - entry->version == version_tag) { - // We raced with another update, bail and restore previous sequence. - _PySeqLock_AbandonWrite(&entry->sequence); - return; - } - - PyObject *old_value = update_cache(entry, name, version_tag, value); - - // Then update sequence to the next valid value - _PySeqLock_UnlockWrite(&entry->sequence); - - Py_DECREF(old_value); -} - -#endif - void _PyTypes_AfterFork(void) { -#ifdef Py_GIL_DISABLED - struct type_cache *cache = get_type_cache(); - for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - struct type_cache_entry *entry = &cache->hashtable[i]; - if (_PySeqLock_AfterFork(&entry->sequence)) { - // Entry was in the process of updating while forking, clear it... - entry->value = NULL; - Py_SETREF(entry->name, Py_None); - entry->version = 0; - } - } -#endif } /* Internal API to look for a name through the MRO. @@ -6290,45 +6164,16 @@ should_assign_version_tag(PyTypeObject *type, PyObject *name, unsigned int versi unsigned int _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out) { - unsigned int h = MCACHE_HASH_METHOD(type, name); - struct type_cache *cache = get_type_cache(); - struct type_cache_entry *entry = &cache->hashtable[h]; -#ifdef Py_GIL_DISABLED - // synchronize-with other writing threads by doing an acquire load on the sequence - while (1) { - uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence); - uint32_t entry_version = _Py_atomic_load_uint32_acquire(&entry->version); - uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag); - if (entry_version == type_version && - _Py_atomic_load_ptr_relaxed(&entry->name) == name) { + int cacheable = MCACHE_CACHEABLE_NAME(name); + if (cacheable) { + struct _PyTypeCacheLookupResult r = _PyTypeCache_Lookup(type, name); + if (r.cache_hit) { OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - if (_Py_TryXGetStackRef(&entry->value, out)) { - // If the sequence is still valid then we're done - if (_PySeqLock_EndRead(&entry->sequence, sequence)) { - return entry_version; - } - PyStackRef_XCLOSE(*out); - } - else { - // If we can't incref the object we need to fallback to locking - break; - } - } - else { - // cache miss - break; + *out = r.value; + return r.version_tag; } } -#else - if (entry->version == type->tp_version_tag && entry->name == name) { - assert(type->tp_version_tag); - OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - *out = entry->value ? PyStackRef_FromPyObjectNew(entry->value) : PyStackRef_NULL; - return entry->version; - } -#endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); @@ -6339,14 +6184,23 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef PyInterpreterState *interp = _PyInterpreterState_GET(); unsigned int version_tag = FT_ATOMIC_LOAD_UINT(type->tp_version_tag); - if (should_assign_version_tag(type, name, version_tag)) { + if (cacheable && + (version_tag != 0 || should_assign_version_tag(type, name, version_tag))) + { BEGIN_TYPE_LOCK(); - assign_version_tag(interp, type); version_tag = type->tp_version_tag; + if (version_tag == 0) { + assign_version_tag(interp, type); + version_tag = type->tp_version_tag; + } res = find_name_in_mro(type, name, out); + if (res >= 0 && version_tag != 0) { + _PyTypeCache_Insert(type, name, PyStackRef_AsPyObjectBorrow(*out)); + } END_TYPE_LOCK(); } else { + version_tag = 0; res = find_name_in_mro(type, name, out); } @@ -6356,17 +6210,6 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef return 0; } - if (version_tag == 0 || !MCACHE_CACHEABLE_NAME(name)) { - return 0; - } - - PyObject *res_obj = PyStackRef_AsPyObjectBorrow(*out); -#if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, version_tag, res_obj); -#else - PyObject *old_value = update_cache(entry, name, version_tag, res_obj); - Py_DECREF(old_value); -#endif return version_tag; } @@ -6881,6 +6724,7 @@ clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type, if (final) { Py_CLEAR(type->tp_cache); } + _PyTypeCache_Invalidate(type); clear_tp_dict(type); clear_tp_bases(type, final); clear_tp_mro(type, final); @@ -6990,6 +6834,7 @@ type_dealloc(PyObject *self) Py_XDECREF(type->tp_bases); Py_XDECREF(type->tp_mro); Py_XDECREF(type->tp_cache); + _PyTypeCache_Invalidate(type); clear_tp_subclasses(type); /* A type's tp_doc is heap allocated, unlike the tp_doc slots @@ -9505,6 +9350,8 @@ type_ready(PyTypeObject *type, int initial) goto error; } + _PyTypeCache_InitType(type); + #ifdef Py_TRACE_REFS /* PyType_Ready is the closest thing we have to a choke point * for type objects, so is the best place I can think of to try diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 17b98c9d9ec345..e6c0ae16a79986 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -282,6 +282,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index af3fded0dabf2d..28bac4e8e8a5a3 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -490,6 +490,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index e255ed5af19125..9b8bdde8c8d9be 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -332,6 +332,7 @@ + @@ -700,6 +701,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 649ee1859ff996..7788871ea51a3a 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -894,6 +894,9 @@ Include\internal + + Include\internal + Include\internal @@ -1613,6 +1616,9 @@ Python + + Python + Python diff --git a/Python/pystate.c b/Python/pystate.c index ff712019affbf9..0c17a9901f195a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -12,7 +12,7 @@ #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interpframe.h" // _PyThreadState_HasStackSpace() -#include "pycore_object.h" // _PyType_InitCache() +#include "pycore_object.h" // _PyObject_GC_New() #include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() #include "pycore_optimizer.h" // JIT_CLEANUP_THRESHOLD #include "pycore_parking_lot.h" // _PyParkingLot_AfterFork() @@ -572,7 +572,6 @@ init_interpreter(PyInterpreterState *interp, _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); PyConfig_InitPythonConfig(&interp->config); - _PyType_InitCache(interp); #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif diff --git a/Python/typecache.c b/Python/typecache.c new file mode 100644 index 00000000000000..4d74d9d6b05f54 --- /dev/null +++ b/Python/typecache.c @@ -0,0 +1,186 @@ +#include "Python.h" + +#include "pycore_typecache.h" +#include "pycore_interp.h" // PyInterpreterState +#include "pycore_object.h" // _PyObject_XDecRefDelayed() +#include "pycore_pymem.h" +#include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_pyatomic_ft_wrappers.h" +#include "pycore_typeobject.h" // _PyStaticType_GetState() + +static struct { + struct type_cache cache; + struct type_cache_entry entries[_Py_TYPECACHE_MINSIZE]; +} empty_cache_storage = { + .cache = { + .mask = _Py_TYPECACHE_MINSIZE - 1, + .available = 0, + .used = 0, + }, +}; + +#define empty_cache (empty_cache_storage.cache) + +static inline uint32_t cache_size(struct type_cache *cache) +{ + return cache->mask + 1; +} + +static inline size_t cache_nbytes(struct type_cache *cache) +{ + return sizeof(struct type_cache) + + (size_t)cache_size(cache) * sizeof(struct type_cache_entry); +} + +static struct type_cache *allocate_cache(uint32_t size) +{ + assert((size & (size - 1)) == 0); + struct type_cache *cache = PyMem_Calloc(1, sizeof(struct type_cache) + size * sizeof(struct type_cache_entry)); + if (cache == NULL) { + return NULL; + } + cache->mask = size - 1; + cache->available = size - (size >> 2); + cache->used = 0; + return cache; +} + +static void free_cache_delayed(struct type_cache *cache) +{ + if (cache == NULL || cache == &empty_cache) { + return; + } + _PyMem_FreeDelayed(cache, cache_nbytes(cache)); +} + + +static inline void **cache_slot(PyTypeObject *type) +{ + if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + managed_static_type_state *state = _PyStaticType_GetState(interp, type); + assert(state != NULL); + return &state->_tp_cache; + } + return &type->_tp_cache; +} + +static inline struct type_cache *get_cache(PyTypeObject *type) +{ + return (struct type_cache *)FT_ATOMIC_LOAD_PTR(*cache_slot(type)); +} + +static inline void set_cache(PyTypeObject *type, struct type_cache *cache) +{ + FT_ATOMIC_STORE_PTR(*cache_slot(type), cache); +} + +void _PyTypeCache_InitType(PyTypeObject *type) +{ + *cache_slot(type) = &empty_cache; +} + +static inline void type_cache_insert(struct type_cache *cache, PyObject *name, + PyObject *value) +{ + Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name); + assert(hash != -1); + uint32_t index = hash & cache->mask; + for (;;) { + if (cache->hashtable[index].name == NULL) { + FT_ATOMIC_STORE_PTR(cache->hashtable[index].value, value); + FT_ATOMIC_STORE_PTR(cache->hashtable[index].name, name); + cache->used++; + cache->available--; + return; + } + else if (cache->hashtable[index].name == name) { + return; + } + index = (index + 1) & cache->mask; + } +} + +static inline int type_cache_resize(PyTypeObject *type, struct type_cache *cache) +{ + uint32_t old_size = cache_size(cache); + uint32_t new_size; + if (cache->used == 0) { + new_size = _Py_TYPECACHE_MINSIZE; + } + else { + new_size = old_size * 2; + } + struct type_cache *new_cache = allocate_cache(new_size); + if (new_cache == NULL) { + return -1; + } + FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); + for (uint32_t i = 0; i < old_size; i++) { + if (cache->hashtable[i].name != NULL) { + type_cache_insert(new_cache, cache->hashtable[i].name, + cache->hashtable[i].value); + } + } + set_cache(type, new_cache); + free_cache_delayed(cache); + return 0; +} + +void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value) +{ + struct type_cache *cache = get_cache(type); + if (cache->available == 0) { + if (type_cache_resize(type, cache) == -1) { + return; + } + cache = get_cache(type); + assert(cache->available > 0); + } + type_cache_insert(cache, name, value); + FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); +} + +struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject *name) +{ + assert(PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)); + struct _PyTypeCacheLookupResult miss = {PyStackRef_NULL, 0, 0}; + struct type_cache *cache = get_cache(type); + if (cache == NULL) { + return miss; + } + Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name); + assert(hash != -1); + uint32_t index = hash & cache->mask; + _PyStackRef out_ref; + for (;;) { + PyObject *entry_name = FT_ATOMIC_LOAD_PTR(cache->hashtable[index].name); + if (entry_name == NULL) { + return miss; + } + if (entry_name == name) { +#ifdef Py_GIL_DISABLED + if (!_Py_TryXGetStackRef(&cache->hashtable[index].value, &out_ref)) { + return miss; + } +#else + PyObject *v = cache->hashtable[index].value; + out_ref = v ? PyStackRef_FromPyObjectNew(v) : PyStackRef_NULL; +#endif + break; + } + index = (index + 1) & cache->mask; + } + uint32_t cache_version = FT_ATOMIC_LOAD_UINT_RELAXED(cache->version_tag); + if (cache_version != FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)) { + PyStackRef_XCLOSE(out_ref); + return miss; + } + return (struct _PyTypeCacheLookupResult){out_ref, 1, cache_version}; +} + +void _PyTypeCache_Invalidate(PyTypeObject *type) { + struct type_cache *cache = get_cache(type); + set_cache(type, &empty_cache); + free_cache_delayed(cache); +} diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index c8a914c22a9e13..824dd822854449 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -326,6 +326,22 @@ def enum_attr(): MyEnum.Z +_MCACHE_NUM_TYPES = 1 << 14 +_MCACHE_PAIRS = [ + (type(f"C{i}", (), {f"m{i}": lambda self: None})(), f"m{i}") + for i in range(_MCACHE_NUM_TYPES) +] + +@register_benchmark +def type_lookup(): + pairs = _MCACHE_PAIRS + n = len(pairs) + outer = (1000 * WORK_SCALE) // n + for _ in range(outer): + for inst, name in pairs: + getattr(inst, name) + + def bench_one_thread(func): t0 = time.perf_counter_ns() func() From a4b07af14940ade0f5fe35ff1a0619e90aa78d07 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 20 May 2026 20:06:35 +0530 Subject: [PATCH 2/7] add micro benchmark --- Tools/ftscalingbench/ftscalingbench.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 824dd822854449..a79242e740371b 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -325,19 +325,16 @@ def enum_attr(): MyEnum.Y MyEnum.Z - _MCACHE_NUM_TYPES = 1 << 14 _MCACHE_PAIRS = [ - (type(f"C{i}", (), {f"m{i}": lambda self: None})(), f"m{i}") + (type(f"C{i}", (), {f"m{i}": i % 256})(), sys.intern(f"m{i}")) for i in range(_MCACHE_NUM_TYPES) ] @register_benchmark def type_lookup(): pairs = _MCACHE_PAIRS - n = len(pairs) - outer = (1000 * WORK_SCALE) // n - for _ in range(outer): + for _ in range(WORK_SCALE // 10): for inst, name in pairs: getattr(inst, name) From 6b55b9d73f3b3cbbdee9827529e685262e83c765 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 20 May 2026 20:35:58 +0530 Subject: [PATCH 3/7] fix issie with zero length arrays --- Include/internal/pycore_typecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_typecache.h b/Include/internal/pycore_typecache.h index 2af68c20447656..da805ca33a1cb8 100644 --- a/Include/internal/pycore_typecache.h +++ b/Include/internal/pycore_typecache.h @@ -23,7 +23,7 @@ struct type_cache { uint32_t version_tag; uint32_t available; uint32_t used; - struct type_cache_entry hashtable[0]; + struct type_cache_entry hashtable[1]; }; struct _PyTypeCacheLookupResult { From c9bdf4b2f5dff22a965683eaf49e5cb12bdcafd9 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 20 May 2026 21:08:31 +0530 Subject: [PATCH 4/7] fix for default build --- Python/typecache.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Python/typecache.c b/Python/typecache.c index 4d74d9d6b05f54..870d6ce8629795 100644 --- a/Python/typecache.c +++ b/Python/typecache.c @@ -50,6 +50,13 @@ static void free_cache_delayed(struct type_cache *cache) if (cache == NULL || cache == &empty_cache) { return; } +#ifndef Py_GIL_DISABLED + for (uint32_t i = 0; i < cache_size(cache); i++) { + if (cache->hashtable[i].name != NULL) { + Py_DECREF(cache->hashtable[i].name); + } + } +#endif _PyMem_FreeDelayed(cache, cache_nbytes(cache)); } @@ -88,6 +95,7 @@ static inline void type_cache_insert(struct type_cache *cache, PyObject *name, uint32_t index = hash & cache->mask; for (;;) { if (cache->hashtable[index].name == NULL) { + Py_INCREF(name); FT_ATOMIC_STORE_PTR(cache->hashtable[index].value, value); FT_ATOMIC_STORE_PTR(cache->hashtable[index].name, name); cache->used++; From bd59f2d2f6f819200c5957409c6793ec2d758414 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Thu, 21 May 2026 21:48:36 +0530 Subject: [PATCH 5/7] add comments --- Objects/typeobject.c | 5 ++++- Python/typecache.c | 20 +++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1faa30ea2ae8ae..8305dd87864277 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6652,7 +6652,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) done: Py_DECREF(name); Py_XDECREF(descr); - Py_XDECREF(old_value); + // delay decref of the old value as lock-free type cache readers may access it + if (old_value != NULL && !_Py_IsImmortal(old_value)) { + _PyObject_XDecRefDelayed(old_value); + } return res; } diff --git a/Python/typecache.c b/Python/typecache.c index 870d6ce8629795..f346b0a48f702b 100644 --- a/Python/typecache.c +++ b/Python/typecache.c @@ -1,8 +1,14 @@ -#include "Python.h" +// Lock-free per type method cache implementation. + +// The cache is used for method and attribute lookups on type objects. +// The stored names are always interned strings, and the +// stored values are borrowed references to the corresponding method or attribute object. +// For static types, the cache is stored on the per-interpreter managed_static_type_state, +// and for heap types the cache is stored in the `PyTypeObject._tp_cache` field. +#include "Python.h" #include "pycore_typecache.h" #include "pycore_interp.h" // PyInterpreterState -#include "pycore_object.h" // _PyObject_XDecRefDelayed() #include "pycore_pymem.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pyatomic_ft_wrappers.h" @@ -18,7 +24,10 @@ static struct { .used = 0, }, }; - +// The empty cache is statically allocated and shared across all the types, +// when a type is modified, the cache of type is set to the empty cache +// and when a cache entry is inserted to the empty cache, a new cache is +// allocated for the type and the entry is inserted to the new cache. #define empty_cache (empty_cache_storage.cache) static inline uint32_t cache_size(struct type_cache *cache) @@ -95,7 +104,10 @@ static inline void type_cache_insert(struct type_cache *cache, PyObject *name, uint32_t index = hash & cache->mask; for (;;) { if (cache->hashtable[index].name == NULL) { +#ifndef Py_GIL_DISABLED + // On free-threading, all interned strings are immortal. Py_INCREF(name); +#endif FT_ATOMIC_STORE_PTR(cache->hashtable[index].value, value); FT_ATOMIC_STORE_PTR(cache->hashtable[index].name, name); cache->used++; @@ -179,6 +191,7 @@ struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject } index = (index + 1) & cache->mask; } + // to maintain consistency with find_name_in_mro and prevent stale cache reads uint32_t cache_version = FT_ATOMIC_LOAD_UINT_RELAXED(cache->version_tag); if (cache_version != FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)) { PyStackRef_XCLOSE(out_ref); @@ -187,6 +200,7 @@ struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject return (struct _PyTypeCacheLookupResult){out_ref, 1, cache_version}; } + void _PyTypeCache_Invalidate(PyTypeObject *type) { struct type_cache *cache = get_cache(type); set_cache(type, &empty_cache); From 7416272906f9cde0c8f17e86c5f570f169cc6582 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Thu, 21 May 2026 22:04:27 +0530 Subject: [PATCH 6/7] consistent function naming --- Python/typecache.c | 48 +++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/Python/typecache.c b/Python/typecache.c index f346b0a48f702b..edcfb0d2e9eca2 100644 --- a/Python/typecache.c +++ b/Python/typecache.c @@ -41,31 +41,36 @@ static inline size_t cache_nbytes(struct type_cache *cache) + (size_t)cache_size(cache) * sizeof(struct type_cache_entry); } -static struct type_cache *allocate_cache(uint32_t size) +static struct type_cache *cache_allocate(uint32_t size) { + // size must be a power of two assert((size & (size - 1)) == 0); struct type_cache *cache = PyMem_Calloc(1, sizeof(struct type_cache) + size * sizeof(struct type_cache_entry)); if (cache == NULL) { return NULL; } cache->mask = size - 1; + // load factor of 0.75 cache->available = size - (size >> 2); cache->used = 0; return cache; } -static void free_cache_delayed(struct type_cache *cache) +static void cache_free_delayed(struct type_cache *cache) { if (cache == NULL || cache == &empty_cache) { return; } #ifndef Py_GIL_DISABLED + // On gil-enabled builds, the cache owns strong references to the interned strings, + // so we need to decref them before freeing the cache memory. for (uint32_t i = 0; i < cache_size(cache); i++) { if (cache->hashtable[i].name != NULL) { Py_DECREF(cache->hashtable[i].name); } } #endif + // Delay the freeing of old cache for concurrent lock-free readers _PyMem_FreeDelayed(cache, cache_nbytes(cache)); } @@ -81,12 +86,12 @@ static inline void **cache_slot(PyTypeObject *type) return &type->_tp_cache; } -static inline struct type_cache *get_cache(PyTypeObject *type) +static inline struct type_cache *cache_get(PyTypeObject *type) { return (struct type_cache *)FT_ATOMIC_LOAD_PTR(*cache_slot(type)); } -static inline void set_cache(PyTypeObject *type, struct type_cache *cache) +static inline void cache_set(PyTypeObject *type, struct type_cache *cache) { FT_ATOMIC_STORE_PTR(*cache_slot(type), cache); } @@ -96,7 +101,7 @@ void _PyTypeCache_InitType(PyTypeObject *type) *cache_slot(type) = &empty_cache; } -static inline void type_cache_insert(struct type_cache *cache, PyObject *name, +static inline void cache_insert(struct type_cache *cache, PyObject *name, PyObject *value) { Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name); @@ -115,49 +120,55 @@ static inline void type_cache_insert(struct type_cache *cache, PyObject *name, return; } else if (cache->hashtable[index].name == name) { + /* someone else added the entry before us. */ return; } index = (index + 1) & cache->mask; } } -static inline int type_cache_resize(PyTypeObject *type, struct type_cache *cache) +static inline int cache_resize(PyTypeObject *type, struct type_cache *cache) { uint32_t old_size = cache_size(cache); uint32_t new_size; if (cache->used == 0) { + // the cache is the empty cache, we need to allocate a new cache with the minimum size new_size = _Py_TYPECACHE_MINSIZE; } else { + // double the cache size when resizing new_size = old_size * 2; } - struct type_cache *new_cache = allocate_cache(new_size); + struct type_cache *new_cache = cache_allocate(new_size); if (new_cache == NULL) { return -1; } FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); for (uint32_t i = 0; i < old_size; i++) { if (cache->hashtable[i].name != NULL) { - type_cache_insert(new_cache, cache->hashtable[i].name, + cache_insert(new_cache, cache->hashtable[i].name, cache->hashtable[i].value); } } - set_cache(type, new_cache); - free_cache_delayed(cache); + cache_set(type, new_cache); + cache_free_delayed(cache); return 0; } void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value) { - struct type_cache *cache = get_cache(type); + struct type_cache *cache = cache_get(type); + // If the cache is full, resize it before inserting the new entry. + // this also handles the case of empty cache where available is 0 but there are no entries. if (cache->available == 0) { - if (type_cache_resize(type, cache) == -1) { + if (cache_resize(type, cache) == -1) { + // out of memory, don't cache the value return; } - cache = get_cache(type); + cache = cache_get(type); assert(cache->available > 0); } - type_cache_insert(cache, name, value); + cache_insert(cache, name, value); FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); } @@ -165,7 +176,7 @@ struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject { assert(PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)); struct _PyTypeCacheLookupResult miss = {PyStackRef_NULL, 0, 0}; - struct type_cache *cache = get_cache(type); + struct type_cache *cache = cache_get(type); if (cache == NULL) { return miss; } @@ -202,7 +213,8 @@ struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject void _PyTypeCache_Invalidate(PyTypeObject *type) { - struct type_cache *cache = get_cache(type); - set_cache(type, &empty_cache); - free_cache_delayed(cache); + struct type_cache *cache = cache_get(type); + // if the type was modified, the cache is set to the empty cache and the old cache is freed after a delay. + cache_set(type, &empty_cache); + cache_free_delayed(cache); } From 16ab4f98bcfcbcde5f03b34d69602f444fd917c3 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Thu, 21 May 2026 22:21:30 +0530 Subject: [PATCH 7/7] formatting --- Python/typecache.c | 46 +++++++++++++++++++--------- Tools/c-analyzer/cpython/ignored.tsv | 3 ++ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/Python/typecache.c b/Python/typecache.c index edcfb0d2e9eca2..a37118badeab68 100644 --- a/Python/typecache.c +++ b/Python/typecache.c @@ -30,22 +30,27 @@ static struct { // allocated for the type and the entry is inserted to the new cache. #define empty_cache (empty_cache_storage.cache) -static inline uint32_t cache_size(struct type_cache *cache) +static inline uint32_t +cache_size(struct type_cache *cache) { return cache->mask + 1; } -static inline size_t cache_nbytes(struct type_cache *cache) +static inline size_t +cache_nbytes(struct type_cache *cache) { return sizeof(struct type_cache) + (size_t)cache_size(cache) * sizeof(struct type_cache_entry); } -static struct type_cache *cache_allocate(uint32_t size) +static struct type_cache * +cache_allocate(uint32_t size) { // size must be a power of two assert((size & (size - 1)) == 0); - struct type_cache *cache = PyMem_Calloc(1, sizeof(struct type_cache) + size * sizeof(struct type_cache_entry)); + size_t nbytes = sizeof(struct type_cache) + + (size_t)size * sizeof(struct type_cache_entry); + struct type_cache *cache = PyMem_Calloc(1, nbytes); if (cache == NULL) { return NULL; } @@ -56,7 +61,8 @@ static struct type_cache *cache_allocate(uint32_t size) return cache; } -static void cache_free_delayed(struct type_cache *cache) +static void +cache_free_delayed(struct type_cache *cache) { if (cache == NULL || cache == &empty_cache) { return; @@ -75,7 +81,8 @@ static void cache_free_delayed(struct type_cache *cache) } -static inline void **cache_slot(PyTypeObject *type) +static inline void ** +cache_slot(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -86,23 +93,27 @@ static inline void **cache_slot(PyTypeObject *type) return &type->_tp_cache; } -static inline struct type_cache *cache_get(PyTypeObject *type) +static inline struct type_cache * +cache_get(PyTypeObject *type) { return (struct type_cache *)FT_ATOMIC_LOAD_PTR(*cache_slot(type)); } -static inline void cache_set(PyTypeObject *type, struct type_cache *cache) +static inline void +cache_set(PyTypeObject *type, struct type_cache *cache) { FT_ATOMIC_STORE_PTR(*cache_slot(type), cache); } -void _PyTypeCache_InitType(PyTypeObject *type) +void +_PyTypeCache_InitType(PyTypeObject *type) { *cache_slot(type) = &empty_cache; } -static inline void cache_insert(struct type_cache *cache, PyObject *name, - PyObject *value) +static inline void +cache_insert(struct type_cache *cache, PyObject *name, + PyObject *value) { Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name); assert(hash != -1); @@ -127,7 +138,8 @@ static inline void cache_insert(struct type_cache *cache, PyObject *name, } } -static inline int cache_resize(PyTypeObject *type, struct type_cache *cache) +static inline int +cache_resize(PyTypeObject *type, struct type_cache *cache) { uint32_t old_size = cache_size(cache); uint32_t new_size; @@ -155,7 +167,8 @@ static inline int cache_resize(PyTypeObject *type, struct type_cache *cache) return 0; } -void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value) +void +_PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value) { struct type_cache *cache = cache_get(type); // If the cache is full, resize it before inserting the new entry. @@ -172,7 +185,8 @@ void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value) FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); } -struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject *name) +struct _PyTypeCacheLookupResult +_PyTypeCache_Lookup(PyTypeObject *type, PyObject *name) { assert(PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)); struct _PyTypeCacheLookupResult miss = {PyStackRef_NULL, 0, 0}; @@ -212,7 +226,9 @@ struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject } -void _PyTypeCache_Invalidate(PyTypeObject *type) { +void +_PyTypeCache_Invalidate(PyTypeObject *type) +{ struct type_cache *cache = cache_get(type); // if the type was modified, the cache is set to the empty cache and the old cache is freed after a delay. cache_set(type, &empty_cache); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index ddfb93a424c018..6f09fc665e2c85 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -57,6 +57,9 @@ Python/pyhash.c - _Py_HashSecret - ## thread-safe hashtable (internal locks) Python/parking_lot.c - buckets - +## shared empty sentinel for the per-type method cache +Python/typecache.c - empty_cache_storage - + ## data needed for introspecting asyncio state from debuggers and profilers Modules/_asynciomodule.c - _Py_AsyncioDebug -