diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 326254c335b489..4c5a677e5543ec 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -246,6 +246,8 @@ struct _typeobject { * This function must escape to any code that can result in * the GC being run, such as Py_DECREF. */ _Py_iteritemfunc _tp_iteritem; + + void *_tp_cache; }; #define _Py_ATTR_CACHE_UNUSED (30000) // (see tp_versions_used) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f13bc2178b1e7e..1c0ea07d2843bc 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -548,23 +548,6 @@ struct _types_runtime_state { }; -// Type attribute lookup cache: speed up attribute and method lookups, -// see _PyType_Lookup(). -struct type_cache_entry { - unsigned int version; // initialized from type->tp_version_tag -#ifdef Py_GIL_DISABLED - _PySeqLock sequence; -#endif - PyObject *name; // reference to exactly a str or None - PyObject *value; // borrowed reference or NULL -}; - -#define MCACHE_SIZE_EXP 12 - -struct type_cache { - struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP]; -}; - typedef struct { PyTypeObject *type; int isbuiltin; @@ -579,6 +562,10 @@ typedef struct { are also some diagnostic uses for the list of weakrefs, so we still keep it. */ PyObject *tp_weaklist; + /* Per-interpreter attribute lookup cache (struct type_cache *). + For static builtin types the cache must be per-interpreter + because tp_dict and the values it stores are per-interpreter. */ + void *_tp_cache; } managed_static_type_state; #define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ @@ -589,8 +576,6 @@ struct types_state { where all those lower numbers are used for core static types. */ unsigned int next_version_tag; - struct type_cache type_cache; - /* Every static builtin type is initialized for each interpreter during its own initialization, including for the main interpreter during global runtime initialization. This is done by calling diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c2c508c1a71c5c..8fa3b47b6c312a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -291,8 +291,6 @@ _PyType_HasFeature(PyTypeObject *type, unsigned long feature) { return ((type->tp_flags) & feature) != 0; } -extern void _PyType_InitCache(PyInterpreterState *interp); - extern PyStatus _PyObject_InitState(PyInterpreterState *interp); extern void _PyObject_FiniState(PyInterpreterState *interp); extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); diff --git a/Include/internal/pycore_typecache.h b/Include/internal/pycore_typecache.h new file mode 100644 index 00000000000000..da805ca33a1cb8 --- /dev/null +++ b/Include/internal/pycore_typecache.h @@ -0,0 +1,44 @@ +#ifndef PY_INTERNAL_TYPECACHE_H +#define PY_INTERNAL_TYPECACHE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_stackref.h" + + +#define _Py_TYPECACHE_MINSIZE 8 + +struct type_cache_entry { + PyObject *name; + PyObject *value; +}; + +struct type_cache { + uint32_t mask; + uint32_t version_tag; + uint32_t available; + uint32_t used; + struct type_cache_entry hashtable[1]; +}; + +struct _PyTypeCacheLookupResult { + _PyStackRef value; + int cache_hit; + uint32_t version_tag; +}; + + +extern void _PyTypeCache_InitType(PyTypeObject *type); +extern void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value); +extern struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject *name); +extern void _PyTypeCache_Invalidate(PyTypeObject *type); + +#ifdef __cplusplus +} +#endif +#endif /* PY_INTERNAL_TYPECACHE_H */ diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 1255d842dbff48..f7bacab00846b9 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -84,6 +84,24 @@ def reader_func(): self.run_one(writer_func, reader_func) + def test_attr_cache_mortal(self): + class C: + x = object() + + class D(C): + pass + + def writer_func(): + for _ in range(3000): + C.x = object() + + def reader_func(): + for _ in range(3000): + C.x + D.x + + self.run_one(writer_func, reader_func) + def test___class___modification(self): loops = 200 diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 02c70403185f60..75347f59f4adf4 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1788,7 +1788,7 @@ def delx(self): del self.__x check((1,2,3), vsize('') + self.P + 3*self.P) # type # static type: PyTypeObject - fmt = 'P2nPI13Pl4Pn9Pn12PI2Pc' + fmt = 'P2nPI13Pl4Pn9Pn12PI2PcP' s = vsize(fmt) check(int, s) typeid = 'n' if support.Py_GIL_DISABLED else '' diff --git a/Makefile.pre.in b/Makefile.pre.in index 9435bf534fb512..eebcf45e442e88 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -506,6 +506,7 @@ PYTHON_OBJS= \ Python/thread.o \ Python/traceback.o \ Python/tracemalloc.o \ + Python/typecache.o \ Python/uniqueid.o \ Python/getopt.o \ Python/pystrcmp.o \ @@ -1411,6 +1412,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_tracemalloc.h \ $(srcdir)/Include/internal/pycore_tstate.h \ $(srcdir)/Include/internal/pycore_tuple.h \ + $(srcdir)/Include/internal/pycore_typecache.h \ $(srcdir)/Include/internal/pycore_typedefs.h \ $(srcdir)/Include/internal/pycore_typeobject.h \ $(srcdir)/Include/internal/pycore_typevarobject.h \ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7cca137f74be58..8305dd87864277 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -21,7 +21,8 @@ #include "pycore_slots.h" // _PySlotIterator_Init #include "pycore_symtable.h" // _Py_Mangle() #include "pycore_tuple.h" // _PyTuple_FromPair -#include "pycore_typeobject.h" // struct type_cache +#include "pycore_typecache.h" // _PyTypeCache_Lookup() +#include "pycore_typeobject.h" // _PyTypes_InitTypes() #include "pycore_unicodeobject.h" // _PyUnicode_Copy #include "pycore_unionobject.h" // _Py_union_type_or #include "pycore_weakref.h" // _PyWeakref_GET_REF() @@ -41,21 +42,7 @@ class object "PyObject *" "&PyBaseObject_Type" /* Support type attribute lookup cache */ -/* The cache can keep references to the names alive for longer than - they normally would. This is why the maximum size is limited to - MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large - strings are used as attribute names. */ -#define MCACHE_MAX_ATTR_SIZE 100 -#define MCACHE_HASH(version, name_hash) \ - (((unsigned int)(version) ^ (unsigned int)(name_hash)) \ - & ((1 << MCACHE_SIZE_EXP) - 1)) - -#define MCACHE_HASH_METHOD(type, name) \ - MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \ - ((Py_ssize_t)(name)) >> 3) -#define MCACHE_CACHEABLE_NAME(name) \ - (PyUnicode_CheckExact(name) && \ - (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE)) +#define MCACHE_CACHEABLE_NAME(name) (PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)) #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag @@ -969,75 +956,18 @@ _PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_d } -static struct type_cache* -get_type_cache(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->types.type_cache; -} - - -static void -type_cache_clear(struct type_cache *cache, PyObject *value) -{ - for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - struct type_cache_entry *entry = &cache->hashtable[i]; -#ifdef Py_GIL_DISABLED - _PySeqLock_LockWrite(&entry->sequence); -#endif - entry->version = 0; - Py_XSETREF(entry->name, _Py_XNewRef(value)); - entry->value = NULL; -#ifdef Py_GIL_DISABLED - _PySeqLock_UnlockWrite(&entry->sequence); -#endif - } -} - - -void -_PyType_InitCache(PyInterpreterState *interp) -{ - struct type_cache *cache = &interp->types.type_cache; - for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - struct type_cache_entry *entry = &cache->hashtable[i]; - assert(entry->name == NULL); - - entry->version = 0; - // Set to None so _PyType_LookupRef() can use Py_SETREF(), - // rather than using slower Py_XSETREF(). - entry->name = Py_None; - entry->value = NULL; - } -} - - -static unsigned int -_PyType_ClearCache(PyInterpreterState *interp) -{ - struct type_cache *cache = &interp->types.type_cache; - // Set to None, rather than NULL, so _PyType_LookupRef() can - // use Py_SETREF() rather than using slower Py_XSETREF(). - type_cache_clear(cache, Py_None); - - return NEXT_VERSION_TAG(interp) - 1; -} - - unsigned int PyType_ClearCache(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - return _PyType_ClearCache(interp); + + return NEXT_VERSION_TAG(interp) - 1; } void _PyTypes_Fini(PyInterpreterState *interp) { - struct type_cache *cache = &interp->types.type_cache; - type_cache_clear(cache, NULL); - // All the managed static types should have been finalized already. assert(interp->types.for_extensions.num_initialized == 0); for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { @@ -1231,6 +1161,7 @@ type_modified_unlocked(PyTypeObject *type) } set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + _PyTypeCache_Invalidate(type); if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the // comment on struct _specialization_cache): @@ -1314,6 +1245,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + _PyTypeCache_Invalidate(type); type->tp_versions_used = _Py_ATTR_CACHE_UNUSED; if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the @@ -6197,67 +6129,9 @@ is_dunder_name(PyObject *name) return 0; } -static PyObject * -update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int version_tag, PyObject *value) -{ - _Py_atomic_store_ptr_relaxed(&entry->value, value); /* borrowed */ - assert(PyUnstable_Unicode_GET_CACHED_HASH(name) != -1); - OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); - // We're releasing this under the lock for simplicity sake because it's always a - // exact unicode object or Py_None so it's safe to do so. - PyObject *old_name = entry->name; - _Py_atomic_store_ptr_relaxed(&entry->name, Py_NewRef(name)); - // We must write the version last to avoid _Py_TryXGetStackRef() - // operating on an invalid (already deallocated) value inside - // _PyType_LookupRefAndVersion(). If we write the version first then a - // reader could pass the "entry_version == type_version" check but could - // be using the old entry value. - _Py_atomic_store_uint32_release(&entry->version, version_tag); - return old_name; -} - -#if Py_GIL_DISABLED - -static void -update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, - unsigned int version_tag, PyObject *value) -{ - _PySeqLock_LockWrite(&entry->sequence); - - // update the entry - if (entry->name == name && - entry->value == value && - entry->version == version_tag) { - // We raced with another update, bail and restore previous sequence. - _PySeqLock_AbandonWrite(&entry->sequence); - return; - } - - PyObject *old_value = update_cache(entry, name, version_tag, value); - - // Then update sequence to the next valid value - _PySeqLock_UnlockWrite(&entry->sequence); - - Py_DECREF(old_value); -} - -#endif - void _PyTypes_AfterFork(void) { -#ifdef Py_GIL_DISABLED - struct type_cache *cache = get_type_cache(); - for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { - struct type_cache_entry *entry = &cache->hashtable[i]; - if (_PySeqLock_AfterFork(&entry->sequence)) { - // Entry was in the process of updating while forking, clear it... - entry->value = NULL; - Py_SETREF(entry->name, Py_None); - entry->version = 0; - } - } -#endif } /* Internal API to look for a name through the MRO. @@ -6290,45 +6164,16 @@ should_assign_version_tag(PyTypeObject *type, PyObject *name, unsigned int versi unsigned int _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out) { - unsigned int h = MCACHE_HASH_METHOD(type, name); - struct type_cache *cache = get_type_cache(); - struct type_cache_entry *entry = &cache->hashtable[h]; -#ifdef Py_GIL_DISABLED - // synchronize-with other writing threads by doing an acquire load on the sequence - while (1) { - uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence); - uint32_t entry_version = _Py_atomic_load_uint32_acquire(&entry->version); - uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag); - if (entry_version == type_version && - _Py_atomic_load_ptr_relaxed(&entry->name) == name) { + int cacheable = MCACHE_CACHEABLE_NAME(name); + if (cacheable) { + struct _PyTypeCacheLookupResult r = _PyTypeCache_Lookup(type, name); + if (r.cache_hit) { OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - if (_Py_TryXGetStackRef(&entry->value, out)) { - // If the sequence is still valid then we're done - if (_PySeqLock_EndRead(&entry->sequence, sequence)) { - return entry_version; - } - PyStackRef_XCLOSE(*out); - } - else { - // If we can't incref the object we need to fallback to locking - break; - } - } - else { - // cache miss - break; + *out = r.value; + return r.version_tag; } } -#else - if (entry->version == type->tp_version_tag && entry->name == name) { - assert(type->tp_version_tag); - OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - *out = entry->value ? PyStackRef_FromPyObjectNew(entry->value) : PyStackRef_NULL; - return entry->version; - } -#endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); @@ -6339,14 +6184,23 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef PyInterpreterState *interp = _PyInterpreterState_GET(); unsigned int version_tag = FT_ATOMIC_LOAD_UINT(type->tp_version_tag); - if (should_assign_version_tag(type, name, version_tag)) { + if (cacheable && + (version_tag != 0 || should_assign_version_tag(type, name, version_tag))) + { BEGIN_TYPE_LOCK(); - assign_version_tag(interp, type); version_tag = type->tp_version_tag; + if (version_tag == 0) { + assign_version_tag(interp, type); + version_tag = type->tp_version_tag; + } res = find_name_in_mro(type, name, out); + if (res >= 0 && version_tag != 0) { + _PyTypeCache_Insert(type, name, PyStackRef_AsPyObjectBorrow(*out)); + } END_TYPE_LOCK(); } else { + version_tag = 0; res = find_name_in_mro(type, name, out); } @@ -6356,17 +6210,6 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef return 0; } - if (version_tag == 0 || !MCACHE_CACHEABLE_NAME(name)) { - return 0; - } - - PyObject *res_obj = PyStackRef_AsPyObjectBorrow(*out); -#if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, version_tag, res_obj); -#else - PyObject *old_value = update_cache(entry, name, version_tag, res_obj); - Py_DECREF(old_value); -#endif return version_tag; } @@ -6809,7 +6652,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) done: Py_DECREF(name); Py_XDECREF(descr); - Py_XDECREF(old_value); + // delay decref of the old value as lock-free type cache readers may access it + if (old_value != NULL && !_Py_IsImmortal(old_value)) { + _PyObject_XDecRefDelayed(old_value); + } return res; } @@ -6881,6 +6727,7 @@ clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type, if (final) { Py_CLEAR(type->tp_cache); } + _PyTypeCache_Invalidate(type); clear_tp_dict(type); clear_tp_bases(type, final); clear_tp_mro(type, final); @@ -6990,6 +6837,7 @@ type_dealloc(PyObject *self) Py_XDECREF(type->tp_bases); Py_XDECREF(type->tp_mro); Py_XDECREF(type->tp_cache); + _PyTypeCache_Invalidate(type); clear_tp_subclasses(type); /* A type's tp_doc is heap allocated, unlike the tp_doc slots @@ -9505,6 +9353,8 @@ type_ready(PyTypeObject *type, int initial) goto error; } + _PyTypeCache_InitType(type); + #ifdef Py_TRACE_REFS /* PyType_Ready is the closest thing we have to a choke point * for type objects, so is the best place I can think of to try diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 17b98c9d9ec345..e6c0ae16a79986 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -282,6 +282,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index af3fded0dabf2d..28bac4e8e8a5a3 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -490,6 +490,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index e255ed5af19125..9b8bdde8c8d9be 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -332,6 +332,7 @@ + @@ -700,6 +701,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 649ee1859ff996..7788871ea51a3a 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -894,6 +894,9 @@ Include\internal + + Include\internal + Include\internal @@ -1613,6 +1616,9 @@ Python + + Python + Python diff --git a/Python/pystate.c b/Python/pystate.c index 530bd567b770be..e0272650702c86 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -12,7 +12,7 @@ #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interpframe.h" // _PyThreadState_HasStackSpace() -#include "pycore_object.h" // _PyType_InitCache() +#include "pycore_object.h" // _PyObject_GC_New() #include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() #include "pycore_optimizer.h" // JIT_CLEANUP_THRESHOLD #include "pycore_parking_lot.h" // _PyParkingLot_AfterFork() @@ -572,7 +572,6 @@ init_interpreter(PyInterpreterState *interp, _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); PyConfig_InitPythonConfig(&interp->config); - _PyType_InitCache(interp); #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif diff --git a/Python/typecache.c b/Python/typecache.c new file mode 100644 index 00000000000000..a37118badeab68 --- /dev/null +++ b/Python/typecache.c @@ -0,0 +1,236 @@ +// Lock-free per type method cache implementation. + +// The cache is used for method and attribute lookups on type objects. +// The stored names are always interned strings, and the +// stored values are borrowed references to the corresponding method or attribute object. +// For static types, the cache is stored on the per-interpreter managed_static_type_state, +// and for heap types the cache is stored in the `PyTypeObject._tp_cache` field. + +#include "Python.h" +#include "pycore_typecache.h" +#include "pycore_interp.h" // PyInterpreterState +#include "pycore_pymem.h" +#include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_pyatomic_ft_wrappers.h" +#include "pycore_typeobject.h" // _PyStaticType_GetState() + +static struct { + struct type_cache cache; + struct type_cache_entry entries[_Py_TYPECACHE_MINSIZE]; +} empty_cache_storage = { + .cache = { + .mask = _Py_TYPECACHE_MINSIZE - 1, + .available = 0, + .used = 0, + }, +}; +// The empty cache is statically allocated and shared across all the types, +// when a type is modified, the cache of type is set to the empty cache +// and when a cache entry is inserted to the empty cache, a new cache is +// allocated for the type and the entry is inserted to the new cache. +#define empty_cache (empty_cache_storage.cache) + +static inline uint32_t +cache_size(struct type_cache *cache) +{ + return cache->mask + 1; +} + +static inline size_t +cache_nbytes(struct type_cache *cache) +{ + return sizeof(struct type_cache) + + (size_t)cache_size(cache) * sizeof(struct type_cache_entry); +} + +static struct type_cache * +cache_allocate(uint32_t size) +{ + // size must be a power of two + assert((size & (size - 1)) == 0); + size_t nbytes = sizeof(struct type_cache) + + (size_t)size * sizeof(struct type_cache_entry); + struct type_cache *cache = PyMem_Calloc(1, nbytes); + if (cache == NULL) { + return NULL; + } + cache->mask = size - 1; + // load factor of 0.75 + cache->available = size - (size >> 2); + cache->used = 0; + return cache; +} + +static void +cache_free_delayed(struct type_cache *cache) +{ + if (cache == NULL || cache == &empty_cache) { + return; + } +#ifndef Py_GIL_DISABLED + // On gil-enabled builds, the cache owns strong references to the interned strings, + // so we need to decref them before freeing the cache memory. + for (uint32_t i = 0; i < cache_size(cache); i++) { + if (cache->hashtable[i].name != NULL) { + Py_DECREF(cache->hashtable[i].name); + } + } +#endif + // Delay the freeing of old cache for concurrent lock-free readers + _PyMem_FreeDelayed(cache, cache_nbytes(cache)); +} + + +static inline void ** +cache_slot(PyTypeObject *type) +{ + if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + managed_static_type_state *state = _PyStaticType_GetState(interp, type); + assert(state != NULL); + return &state->_tp_cache; + } + return &type->_tp_cache; +} + +static inline struct type_cache * +cache_get(PyTypeObject *type) +{ + return (struct type_cache *)FT_ATOMIC_LOAD_PTR(*cache_slot(type)); +} + +static inline void +cache_set(PyTypeObject *type, struct type_cache *cache) +{ + FT_ATOMIC_STORE_PTR(*cache_slot(type), cache); +} + +void +_PyTypeCache_InitType(PyTypeObject *type) +{ + *cache_slot(type) = &empty_cache; +} + +static inline void +cache_insert(struct type_cache *cache, PyObject *name, + PyObject *value) +{ + Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name); + assert(hash != -1); + uint32_t index = hash & cache->mask; + for (;;) { + if (cache->hashtable[index].name == NULL) { +#ifndef Py_GIL_DISABLED + // On free-threading, all interned strings are immortal. + Py_INCREF(name); +#endif + FT_ATOMIC_STORE_PTR(cache->hashtable[index].value, value); + FT_ATOMIC_STORE_PTR(cache->hashtable[index].name, name); + cache->used++; + cache->available--; + return; + } + else if (cache->hashtable[index].name == name) { + /* someone else added the entry before us. */ + return; + } + index = (index + 1) & cache->mask; + } +} + +static inline int +cache_resize(PyTypeObject *type, struct type_cache *cache) +{ + uint32_t old_size = cache_size(cache); + uint32_t new_size; + if (cache->used == 0) { + // the cache is the empty cache, we need to allocate a new cache with the minimum size + new_size = _Py_TYPECACHE_MINSIZE; + } + else { + // double the cache size when resizing + new_size = old_size * 2; + } + struct type_cache *new_cache = cache_allocate(new_size); + if (new_cache == NULL) { + return -1; + } + FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); + for (uint32_t i = 0; i < old_size; i++) { + if (cache->hashtable[i].name != NULL) { + cache_insert(new_cache, cache->hashtable[i].name, + cache->hashtable[i].value); + } + } + cache_set(type, new_cache); + cache_free_delayed(cache); + return 0; +} + +void +_PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value) +{ + struct type_cache *cache = cache_get(type); + // If the cache is full, resize it before inserting the new entry. + // this also handles the case of empty cache where available is 0 but there are no entries. + if (cache->available == 0) { + if (cache_resize(type, cache) == -1) { + // out of memory, don't cache the value + return; + } + cache = cache_get(type); + assert(cache->available > 0); + } + cache_insert(cache, name, value); + FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)); +} + +struct _PyTypeCacheLookupResult +_PyTypeCache_Lookup(PyTypeObject *type, PyObject *name) +{ + assert(PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)); + struct _PyTypeCacheLookupResult miss = {PyStackRef_NULL, 0, 0}; + struct type_cache *cache = cache_get(type); + if (cache == NULL) { + return miss; + } + Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name); + assert(hash != -1); + uint32_t index = hash & cache->mask; + _PyStackRef out_ref; + for (;;) { + PyObject *entry_name = FT_ATOMIC_LOAD_PTR(cache->hashtable[index].name); + if (entry_name == NULL) { + return miss; + } + if (entry_name == name) { +#ifdef Py_GIL_DISABLED + if (!_Py_TryXGetStackRef(&cache->hashtable[index].value, &out_ref)) { + return miss; + } +#else + PyObject *v = cache->hashtable[index].value; + out_ref = v ? PyStackRef_FromPyObjectNew(v) : PyStackRef_NULL; +#endif + break; + } + index = (index + 1) & cache->mask; + } + // to maintain consistency with find_name_in_mro and prevent stale cache reads + uint32_t cache_version = FT_ATOMIC_LOAD_UINT_RELAXED(cache->version_tag); + if (cache_version != FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)) { + PyStackRef_XCLOSE(out_ref); + return miss; + } + return (struct _PyTypeCacheLookupResult){out_ref, 1, cache_version}; +} + + +void +_PyTypeCache_Invalidate(PyTypeObject *type) +{ + struct type_cache *cache = cache_get(type); + // if the type was modified, the cache is set to the empty cache and the old cache is freed after a delay. + cache_set(type, &empty_cache); + cache_free_delayed(cache); +} diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index ddfb93a424c018..6f09fc665e2c85 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -57,6 +57,9 @@ Python/pyhash.c - _Py_HashSecret - ## thread-safe hashtable (internal locks) Python/parking_lot.c - buckets - +## shared empty sentinel for the per-type method cache +Python/typecache.c - empty_cache_storage - + ## data needed for introspecting asyncio state from debuggers and profilers Modules/_asynciomodule.c - _Py_AsyncioDebug - diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index c8a914c22a9e13..a79242e740371b 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -325,6 +325,19 @@ def enum_attr(): MyEnum.Y MyEnum.Z +_MCACHE_NUM_TYPES = 1 << 14 +_MCACHE_PAIRS = [ + (type(f"C{i}", (), {f"m{i}": i % 256})(), sys.intern(f"m{i}")) + for i in range(_MCACHE_NUM_TYPES) +] + +@register_benchmark +def type_lookup(): + pairs = _MCACHE_PAIRS + for _ in range(WORK_SCALE // 10): + for inst, name in pairs: + getattr(inst, name) + def bench_one_thread(func): t0 = time.perf_counter_ns()