diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index 326254c335b489..4c5a677e5543ec 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -246,6 +246,8 @@ struct _typeobject {
* This function must escape to any code that can result in
* the GC being run, such as Py_DECREF. */
_Py_iteritemfunc _tp_iteritem;
+
+ void *_tp_cache;
};
#define _Py_ATTR_CACHE_UNUSED (30000) // (see tp_versions_used)
diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index f13bc2178b1e7e..1c0ea07d2843bc 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -548,23 +548,6 @@ struct _types_runtime_state {
};
-// Type attribute lookup cache: speed up attribute and method lookups,
-// see _PyType_Lookup().
-struct type_cache_entry {
- unsigned int version; // initialized from type->tp_version_tag
-#ifdef Py_GIL_DISABLED
- _PySeqLock sequence;
-#endif
- PyObject *name; // reference to exactly a str or None
- PyObject *value; // borrowed reference or NULL
-};
-
-#define MCACHE_SIZE_EXP 12
-
-struct type_cache {
- struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
-};
-
typedef struct {
PyTypeObject *type;
int isbuiltin;
@@ -579,6 +562,10 @@ typedef struct {
are also some diagnostic uses for the list of weakrefs,
so we still keep it. */
PyObject *tp_weaklist;
+ /* Per-interpreter attribute lookup cache (struct type_cache *).
+ For static builtin types the cache must be per-interpreter
+ because tp_dict and the values it stores are per-interpreter. */
+ void *_tp_cache;
} managed_static_type_state;
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
@@ -589,8 +576,6 @@ struct types_state {
where all those lower numbers are used for core static types. */
unsigned int next_version_tag;
- struct type_cache type_cache;
-
/* Every static builtin type is initialized for each interpreter
during its own initialization, including for the main interpreter
during global runtime initialization. This is done by calling
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index c2c508c1a71c5c..8fa3b47b6c312a 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -291,8 +291,6 @@ _PyType_HasFeature(PyTypeObject *type, unsigned long feature) {
return ((type->tp_flags) & feature) != 0;
}
-extern void _PyType_InitCache(PyInterpreterState *interp);
-
extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
extern void _PyObject_FiniState(PyInterpreterState *interp);
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
diff --git a/Include/internal/pycore_typecache.h b/Include/internal/pycore_typecache.h
new file mode 100644
index 00000000000000..da805ca33a1cb8
--- /dev/null
+++ b/Include/internal/pycore_typecache.h
@@ -0,0 +1,44 @@
+#ifndef PY_INTERNAL_TYPECACHE_H
+#define PY_INTERNAL_TYPECACHE_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "pycore_stackref.h"
+
+
+#define _Py_TYPECACHE_MINSIZE 8
+
+struct type_cache_entry {
+ PyObject *name;
+ PyObject *value;
+};
+
+struct type_cache {
+ uint32_t mask;
+ uint32_t version_tag;
+ uint32_t available;
+ uint32_t used;
+ struct type_cache_entry hashtable[1];
+};
+
+struct _PyTypeCacheLookupResult {
+ _PyStackRef value;
+ int cache_hit;
+ uint32_t version_tag;
+};
+
+
+extern void _PyTypeCache_InitType(PyTypeObject *type);
+extern void _PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value);
+extern struct _PyTypeCacheLookupResult _PyTypeCache_Lookup(PyTypeObject *type, PyObject *name);
+extern void _PyTypeCache_Invalidate(PyTypeObject *type);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* PY_INTERNAL_TYPECACHE_H */
diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py
index 1255d842dbff48..f7bacab00846b9 100644
--- a/Lib/test/test_free_threading/test_type.py
+++ b/Lib/test/test_free_threading/test_type.py
@@ -84,6 +84,24 @@ def reader_func():
self.run_one(writer_func, reader_func)
+ def test_attr_cache_mortal(self):
+ class C:
+ x = object()
+
+ class D(C):
+ pass
+
+ def writer_func():
+ for _ in range(3000):
+ C.x = object()
+
+ def reader_func():
+ for _ in range(3000):
+ C.x
+ D.x
+
+ self.run_one(writer_func, reader_func)
+
def test___class___modification(self):
loops = 200
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 02c70403185f60..75347f59f4adf4 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1788,7 +1788,7 @@ def delx(self): del self.__x
check((1,2,3), vsize('') + self.P + 3*self.P)
# type
# static type: PyTypeObject
- fmt = 'P2nPI13Pl4Pn9Pn12PI2Pc'
+ fmt = 'P2nPI13Pl4Pn9Pn12PI2PcP'
s = vsize(fmt)
check(int, s)
typeid = 'n' if support.Py_GIL_DISABLED else ''
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 9435bf534fb512..eebcf45e442e88 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -506,6 +506,7 @@ PYTHON_OBJS= \
Python/thread.o \
Python/traceback.o \
Python/tracemalloc.o \
+ Python/typecache.o \
Python/uniqueid.o \
Python/getopt.o \
Python/pystrcmp.o \
@@ -1411,6 +1412,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_tracemalloc.h \
$(srcdir)/Include/internal/pycore_tstate.h \
$(srcdir)/Include/internal/pycore_tuple.h \
+ $(srcdir)/Include/internal/pycore_typecache.h \
$(srcdir)/Include/internal/pycore_typedefs.h \
$(srcdir)/Include/internal/pycore_typeobject.h \
$(srcdir)/Include/internal/pycore_typevarobject.h \
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 7cca137f74be58..8305dd87864277 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -21,7 +21,8 @@
#include "pycore_slots.h" // _PySlotIterator_Init
#include "pycore_symtable.h" // _Py_Mangle()
#include "pycore_tuple.h" // _PyTuple_FromPair
-#include "pycore_typeobject.h" // struct type_cache
+#include "pycore_typecache.h" // _PyTypeCache_Lookup()
+#include "pycore_typeobject.h" // _PyTypes_InitTypes()
#include "pycore_unicodeobject.h" // _PyUnicode_Copy
#include "pycore_unionobject.h" // _Py_union_type_or
#include "pycore_weakref.h" // _PyWeakref_GET_REF()
@@ -41,21 +42,7 @@ class object "PyObject *" "&PyBaseObject_Type"
/* Support type attribute lookup cache */
-/* The cache can keep references to the names alive for longer than
- they normally would. This is why the maximum size is limited to
- MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large
- strings are used as attribute names. */
-#define MCACHE_MAX_ATTR_SIZE 100
-#define MCACHE_HASH(version, name_hash) \
- (((unsigned int)(version) ^ (unsigned int)(name_hash)) \
- & ((1 << MCACHE_SIZE_EXP) - 1))
-
-#define MCACHE_HASH_METHOD(type, name) \
- MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \
- ((Py_ssize_t)(name)) >> 3)
-#define MCACHE_CACHEABLE_NAME(name) \
- (PyUnicode_CheckExact(name) && \
- (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE))
+#define MCACHE_CACHEABLE_NAME(name) (PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name))
#define NEXT_VERSION_TAG(interp) \
(interp)->types.next_version_tag
@@ -969,75 +956,18 @@ _PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_d
}
-static struct type_cache*
-get_type_cache(void)
-{
- PyInterpreterState *interp = _PyInterpreterState_GET();
- return &interp->types.type_cache;
-}
-
-
-static void
-type_cache_clear(struct type_cache *cache, PyObject *value)
-{
- for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
- struct type_cache_entry *entry = &cache->hashtable[i];
-#ifdef Py_GIL_DISABLED
- _PySeqLock_LockWrite(&entry->sequence);
-#endif
- entry->version = 0;
- Py_XSETREF(entry->name, _Py_XNewRef(value));
- entry->value = NULL;
-#ifdef Py_GIL_DISABLED
- _PySeqLock_UnlockWrite(&entry->sequence);
-#endif
- }
-}
-
-
-void
-_PyType_InitCache(PyInterpreterState *interp)
-{
- struct type_cache *cache = &interp->types.type_cache;
- for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
- struct type_cache_entry *entry = &cache->hashtable[i];
- assert(entry->name == NULL);
-
- entry->version = 0;
- // Set to None so _PyType_LookupRef() can use Py_SETREF(),
- // rather than using slower Py_XSETREF().
- entry->name = Py_None;
- entry->value = NULL;
- }
-}
-
-
-static unsigned int
-_PyType_ClearCache(PyInterpreterState *interp)
-{
- struct type_cache *cache = &interp->types.type_cache;
- // Set to None, rather than NULL, so _PyType_LookupRef() can
- // use Py_SETREF() rather than using slower Py_XSETREF().
- type_cache_clear(cache, Py_None);
-
- return NEXT_VERSION_TAG(interp) - 1;
-}
-
-
unsigned int
PyType_ClearCache(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- return _PyType_ClearCache(interp);
+
+ return NEXT_VERSION_TAG(interp) - 1;
}
void
_PyTypes_Fini(PyInterpreterState *interp)
{
- struct type_cache *cache = &interp->types.type_cache;
- type_cache_clear(cache, NULL);
-
// All the managed static types should have been finalized already.
assert(interp->types.for_extensions.num_initialized == 0);
for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
@@ -1231,6 +1161,7 @@ type_modified_unlocked(PyTypeObject *type)
}
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
+ _PyTypeCache_Invalidate(type);
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
@@ -1314,6 +1245,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases)
clear:
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
+ _PyTypeCache_Invalidate(type);
type->tp_versions_used = _Py_ATTR_CACHE_UNUSED;
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
@@ -6197,67 +6129,9 @@ is_dunder_name(PyObject *name)
return 0;
}
-static PyObject *
-update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int version_tag, PyObject *value)
-{
- _Py_atomic_store_ptr_relaxed(&entry->value, value); /* borrowed */
- assert(PyUnstable_Unicode_GET_CACHED_HASH(name) != -1);
- OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name);
- // We're releasing this under the lock for simplicity sake because it's always a
- // exact unicode object or Py_None so it's safe to do so.
- PyObject *old_name = entry->name;
- _Py_atomic_store_ptr_relaxed(&entry->name, Py_NewRef(name));
- // We must write the version last to avoid _Py_TryXGetStackRef()
- // operating on an invalid (already deallocated) value inside
- // _PyType_LookupRefAndVersion(). If we write the version first then a
- // reader could pass the "entry_version == type_version" check but could
- // be using the old entry value.
- _Py_atomic_store_uint32_release(&entry->version, version_tag);
- return old_name;
-}
-
-#if Py_GIL_DISABLED
-
-static void
-update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name,
- unsigned int version_tag, PyObject *value)
-{
- _PySeqLock_LockWrite(&entry->sequence);
-
- // update the entry
- if (entry->name == name &&
- entry->value == value &&
- entry->version == version_tag) {
- // We raced with another update, bail and restore previous sequence.
- _PySeqLock_AbandonWrite(&entry->sequence);
- return;
- }
-
- PyObject *old_value = update_cache(entry, name, version_tag, value);
-
- // Then update sequence to the next valid value
- _PySeqLock_UnlockWrite(&entry->sequence);
-
- Py_DECREF(old_value);
-}
-
-#endif
-
void
_PyTypes_AfterFork(void)
{
-#ifdef Py_GIL_DISABLED
- struct type_cache *cache = get_type_cache();
- for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) {
- struct type_cache_entry *entry = &cache->hashtable[i];
- if (_PySeqLock_AfterFork(&entry->sequence)) {
- // Entry was in the process of updating while forking, clear it...
- entry->value = NULL;
- Py_SETREF(entry->name, Py_None);
- entry->version = 0;
- }
- }
-#endif
}
/* Internal API to look for a name through the MRO.
@@ -6290,45 +6164,16 @@ should_assign_version_tag(PyTypeObject *type, PyObject *name, unsigned int versi
unsigned int
_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out)
{
- unsigned int h = MCACHE_HASH_METHOD(type, name);
- struct type_cache *cache = get_type_cache();
- struct type_cache_entry *entry = &cache->hashtable[h];
-#ifdef Py_GIL_DISABLED
- // synchronize-with other writing threads by doing an acquire load on the sequence
- while (1) {
- uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
- uint32_t entry_version = _Py_atomic_load_uint32_acquire(&entry->version);
- uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
- if (entry_version == type_version &&
- _Py_atomic_load_ptr_relaxed(&entry->name) == name) {
+ int cacheable = MCACHE_CACHEABLE_NAME(name);
+ if (cacheable) {
+ struct _PyTypeCacheLookupResult r = _PyTypeCache_Lookup(type, name);
+ if (r.cache_hit) {
OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
- if (_Py_TryXGetStackRef(&entry->value, out)) {
- // If the sequence is still valid then we're done
- if (_PySeqLock_EndRead(&entry->sequence, sequence)) {
- return entry_version;
- }
- PyStackRef_XCLOSE(*out);
- }
- else {
- // If we can't incref the object we need to fallback to locking
- break;
- }
- }
- else {
- // cache miss
- break;
+ *out = r.value;
+ return r.version_tag;
}
}
-#else
- if (entry->version == type->tp_version_tag && entry->name == name) {
- assert(type->tp_version_tag);
- OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
- OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
- *out = entry->value ? PyStackRef_FromPyObjectNew(entry->value) : PyStackRef_NULL;
- return entry->version;
- }
-#endif
OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name));
OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name));
@@ -6339,14 +6184,23 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef
PyInterpreterState *interp = _PyInterpreterState_GET();
unsigned int version_tag = FT_ATOMIC_LOAD_UINT(type->tp_version_tag);
- if (should_assign_version_tag(type, name, version_tag)) {
+ if (cacheable &&
+ (version_tag != 0 || should_assign_version_tag(type, name, version_tag)))
+ {
BEGIN_TYPE_LOCK();
- assign_version_tag(interp, type);
version_tag = type->tp_version_tag;
+ if (version_tag == 0) {
+ assign_version_tag(interp, type);
+ version_tag = type->tp_version_tag;
+ }
res = find_name_in_mro(type, name, out);
+ if (res >= 0 && version_tag != 0) {
+ _PyTypeCache_Insert(type, name, PyStackRef_AsPyObjectBorrow(*out));
+ }
END_TYPE_LOCK();
}
else {
+ version_tag = 0;
res = find_name_in_mro(type, name, out);
}
@@ -6356,17 +6210,6 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef
return 0;
}
- if (version_tag == 0 || !MCACHE_CACHEABLE_NAME(name)) {
- return 0;
- }
-
- PyObject *res_obj = PyStackRef_AsPyObjectBorrow(*out);
-#if Py_GIL_DISABLED
- update_cache_gil_disabled(entry, name, version_tag, res_obj);
-#else
- PyObject *old_value = update_cache(entry, name, version_tag, res_obj);
- Py_DECREF(old_value);
-#endif
return version_tag;
}
@@ -6809,7 +6652,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value)
done:
Py_DECREF(name);
Py_XDECREF(descr);
- Py_XDECREF(old_value);
+ // delay decref of the old value as lock-free type cache readers may access it
+ if (old_value != NULL && !_Py_IsImmortal(old_value)) {
+ _PyObject_XDecRefDelayed(old_value);
+ }
return res;
}
@@ -6881,6 +6727,7 @@ clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type,
if (final) {
Py_CLEAR(type->tp_cache);
}
+ _PyTypeCache_Invalidate(type);
clear_tp_dict(type);
clear_tp_bases(type, final);
clear_tp_mro(type, final);
@@ -6990,6 +6837,7 @@ type_dealloc(PyObject *self)
Py_XDECREF(type->tp_bases);
Py_XDECREF(type->tp_mro);
Py_XDECREF(type->tp_cache);
+ _PyTypeCache_Invalidate(type);
clear_tp_subclasses(type);
/* A type's tp_doc is heap allocated, unlike the tp_doc slots
@@ -9505,6 +9353,8 @@ type_ready(PyTypeObject *type, int initial)
goto error;
}
+ _PyTypeCache_InitType(type);
+
#ifdef Py_TRACE_REFS
/* PyType_Ready is the closest thing we have to a choke point
* for type objects, so is the best place I can think of to try
diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj
index 17b98c9d9ec345..e6c0ae16a79986 100644
--- a/PCbuild/_freeze_module.vcxproj
+++ b/PCbuild/_freeze_module.vcxproj
@@ -282,6 +282,7 @@
+
diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters
index af3fded0dabf2d..28bac4e8e8a5a3 100644
--- a/PCbuild/_freeze_module.vcxproj.filters
+++ b/PCbuild/_freeze_module.vcxproj.filters
@@ -490,6 +490,9 @@
Source Files
+
+ Source Files
+
Source Files
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index e255ed5af19125..9b8bdde8c8d9be 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -332,6 +332,7 @@
+
@@ -700,6 +701,7 @@
+
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 649ee1859ff996..7788871ea51a3a 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -894,6 +894,9 @@
Include\internal
+
+ Include\internal
+
Include\internal
@@ -1613,6 +1616,9 @@
Python
+
+ Python
+
Python
diff --git a/Python/pystate.c b/Python/pystate.c
index 530bd567b770be..e0272650702c86 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -12,7 +12,7 @@
#include "pycore_freelist.h" // _PyObject_ClearFreeLists()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interpframe.h" // _PyThreadState_HasStackSpace()
-#include "pycore_object.h" // _PyType_InitCache()
+#include "pycore_object.h" // _PyObject_GC_New()
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()
#include "pycore_optimizer.h" // JIT_CLEANUP_THRESHOLD
#include "pycore_parking_lot.h" // _PyParkingLot_AfterFork()
@@ -572,7 +572,6 @@ init_interpreter(PyInterpreterState *interp,
_PyEval_InitState(interp);
_PyGC_InitState(&interp->gc);
PyConfig_InitPythonConfig(&interp->config);
- _PyType_InitCache(interp);
#ifdef Py_GIL_DISABLED
_Py_brc_init_state(interp);
#endif
diff --git a/Python/typecache.c b/Python/typecache.c
new file mode 100644
index 00000000000000..a37118badeab68
--- /dev/null
+++ b/Python/typecache.c
@@ -0,0 +1,236 @@
+// Lock-free per type method cache implementation.
+
+// The cache is used for method and attribute lookups on type objects.
+// The stored names are always interned strings, and the
+// stored values are borrowed references to the corresponding method or attribute object.
+// For static types, the cache is stored on the per-interpreter managed_static_type_state,
+// and for heap types the cache is stored in the `PyTypeObject._tp_cache` field.
+
+#include "Python.h"
+#include "pycore_typecache.h"
+#include "pycore_interp.h" // PyInterpreterState
+#include "pycore_pymem.h"
+#include "pycore_pystate.h" // _PyInterpreterState_GET()
+#include "pycore_pyatomic_ft_wrappers.h"
+#include "pycore_typeobject.h" // _PyStaticType_GetState()
+
+static struct {
+ struct type_cache cache;
+ struct type_cache_entry entries[_Py_TYPECACHE_MINSIZE];
+} empty_cache_storage = {
+ .cache = {
+ .mask = _Py_TYPECACHE_MINSIZE - 1,
+ .available = 0,
+ .used = 0,
+ },
+};
+// The empty cache is statically allocated and shared across all the types,
+// when a type is modified, the cache of type is set to the empty cache
+// and when a cache entry is inserted to the empty cache, a new cache is
+// allocated for the type and the entry is inserted to the new cache.
+#define empty_cache (empty_cache_storage.cache)
+
+static inline uint32_t
+cache_size(struct type_cache *cache)
+{
+ return cache->mask + 1;
+}
+
+static inline size_t
+cache_nbytes(struct type_cache *cache)
+{
+ return sizeof(struct type_cache)
+ + (size_t)cache_size(cache) * sizeof(struct type_cache_entry);
+}
+
+static struct type_cache *
+cache_allocate(uint32_t size)
+{
+ // size must be a power of two
+ assert((size & (size - 1)) == 0);
+ size_t nbytes = sizeof(struct type_cache)
+ + (size_t)size * sizeof(struct type_cache_entry);
+ struct type_cache *cache = PyMem_Calloc(1, nbytes);
+ if (cache == NULL) {
+ return NULL;
+ }
+ cache->mask = size - 1;
+ // load factor of 0.75
+ cache->available = size - (size >> 2);
+ cache->used = 0;
+ return cache;
+}
+
+static void
+cache_free_delayed(struct type_cache *cache)
+{
+ if (cache == NULL || cache == &empty_cache) {
+ return;
+ }
+#ifndef Py_GIL_DISABLED
+ // On gil-enabled builds, the cache owns strong references to the interned strings,
+ // so we need to decref them before freeing the cache memory.
+ for (uint32_t i = 0; i < cache_size(cache); i++) {
+ if (cache->hashtable[i].name != NULL) {
+ Py_DECREF(cache->hashtable[i].name);
+ }
+ }
+#endif
+ // Delay the freeing of old cache for concurrent lock-free readers
+ _PyMem_FreeDelayed(cache, cache_nbytes(cache));
+}
+
+
+static inline void **
+cache_slot(PyTypeObject *type)
+{
+ if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ managed_static_type_state *state = _PyStaticType_GetState(interp, type);
+ assert(state != NULL);
+ return &state->_tp_cache;
+ }
+ return &type->_tp_cache;
+}
+
+static inline struct type_cache *
+cache_get(PyTypeObject *type)
+{
+ return (struct type_cache *)FT_ATOMIC_LOAD_PTR(*cache_slot(type));
+}
+
+static inline void
+cache_set(PyTypeObject *type, struct type_cache *cache)
+{
+ FT_ATOMIC_STORE_PTR(*cache_slot(type), cache);
+}
+
+void
+_PyTypeCache_InitType(PyTypeObject *type)
+{
+ *cache_slot(type) = &empty_cache;
+}
+
+static inline void
+cache_insert(struct type_cache *cache, PyObject *name,
+ PyObject *value)
+{
+ Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name);
+ assert(hash != -1);
+ uint32_t index = hash & cache->mask;
+ for (;;) {
+ if (cache->hashtable[index].name == NULL) {
+#ifndef Py_GIL_DISABLED
+ // On free-threading, all interned strings are immortal.
+ Py_INCREF(name);
+#endif
+ FT_ATOMIC_STORE_PTR(cache->hashtable[index].value, value);
+ FT_ATOMIC_STORE_PTR(cache->hashtable[index].name, name);
+ cache->used++;
+ cache->available--;
+ return;
+ }
+ else if (cache->hashtable[index].name == name) {
+ /* someone else added the entry before us. */
+ return;
+ }
+ index = (index + 1) & cache->mask;
+ }
+}
+
+static inline int
+cache_resize(PyTypeObject *type, struct type_cache *cache)
+{
+ uint32_t old_size = cache_size(cache);
+ uint32_t new_size;
+ if (cache->used == 0) {
+ // the cache is the empty cache, we need to allocate a new cache with the minimum size
+ new_size = _Py_TYPECACHE_MINSIZE;
+ }
+ else {
+ // double the cache size when resizing
+ new_size = old_size * 2;
+ }
+ struct type_cache *new_cache = cache_allocate(new_size);
+ if (new_cache == NULL) {
+ return -1;
+ }
+ FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag));
+ for (uint32_t i = 0; i < old_size; i++) {
+ if (cache->hashtable[i].name != NULL) {
+ cache_insert(new_cache, cache->hashtable[i].name,
+ cache->hashtable[i].value);
+ }
+ }
+ cache_set(type, new_cache);
+ cache_free_delayed(cache);
+ return 0;
+}
+
+void
+_PyTypeCache_Insert(PyTypeObject *type, PyObject *name, PyObject *value)
+{
+ struct type_cache *cache = cache_get(type);
+ // If the cache is full, resize it before inserting the new entry.
+ // this also handles the case of empty cache where available is 0 but there are no entries.
+ if (cache->available == 0) {
+ if (cache_resize(type, cache) == -1) {
+ // out of memory, don't cache the value
+ return;
+ }
+ cache = cache_get(type);
+ assert(cache->available > 0);
+ }
+ cache_insert(cache, name, value);
+ FT_ATOMIC_STORE_UINT_RELAXED(cache->version_tag, FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag));
+}
+
+struct _PyTypeCacheLookupResult
+_PyTypeCache_Lookup(PyTypeObject *type, PyObject *name)
+{
+ assert(PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name));
+ struct _PyTypeCacheLookupResult miss = {PyStackRef_NULL, 0, 0};
+ struct type_cache *cache = cache_get(type);
+ if (cache == NULL) {
+ return miss;
+ }
+ Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(name);
+ assert(hash != -1);
+ uint32_t index = hash & cache->mask;
+ _PyStackRef out_ref;
+ for (;;) {
+ PyObject *entry_name = FT_ATOMIC_LOAD_PTR(cache->hashtable[index].name);
+ if (entry_name == NULL) {
+ return miss;
+ }
+ if (entry_name == name) {
+#ifdef Py_GIL_DISABLED
+ if (!_Py_TryXGetStackRef(&cache->hashtable[index].value, &out_ref)) {
+ return miss;
+ }
+#else
+ PyObject *v = cache->hashtable[index].value;
+ out_ref = v ? PyStackRef_FromPyObjectNew(v) : PyStackRef_NULL;
+#endif
+ break;
+ }
+ index = (index + 1) & cache->mask;
+ }
+ // to maintain consistency with find_name_in_mro and prevent stale cache reads
+ uint32_t cache_version = FT_ATOMIC_LOAD_UINT_RELAXED(cache->version_tag);
+ if (cache_version != FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag)) {
+ PyStackRef_XCLOSE(out_ref);
+ return miss;
+ }
+ return (struct _PyTypeCacheLookupResult){out_ref, 1, cache_version};
+}
+
+
+void
+_PyTypeCache_Invalidate(PyTypeObject *type)
+{
+ struct type_cache *cache = cache_get(type);
+ // if the type was modified, the cache is set to the empty cache and the old cache is freed after a delay.
+ cache_set(type, &empty_cache);
+ cache_free_delayed(cache);
+}
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index ddfb93a424c018..6f09fc665e2c85 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -57,6 +57,9 @@ Python/pyhash.c - _Py_HashSecret -
## thread-safe hashtable (internal locks)
Python/parking_lot.c - buckets -
+## shared empty sentinel for the per-type method cache
+Python/typecache.c - empty_cache_storage -
+
## data needed for introspecting asyncio state from debuggers and profilers
Modules/_asynciomodule.c - _Py_AsyncioDebug -
diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py
index c8a914c22a9e13..a79242e740371b 100644
--- a/Tools/ftscalingbench/ftscalingbench.py
+++ b/Tools/ftscalingbench/ftscalingbench.py
@@ -325,6 +325,19 @@ def enum_attr():
MyEnum.Y
MyEnum.Z
+_MCACHE_NUM_TYPES = 1 << 14
+_MCACHE_PAIRS = [
+ (type(f"C{i}", (), {f"m{i}": i % 256})(), sys.intern(f"m{i}"))
+ for i in range(_MCACHE_NUM_TYPES)
+]
+
+@register_benchmark
+def type_lookup():
+ pairs = _MCACHE_PAIRS
+ for _ in range(WORK_SCALE // 10):
+ for inst, name in pairs:
+ getattr(inst, name)
+
def bench_one_thread(func):
t0 = time.perf_counter_ns()