https://github.com/python/cpython/commit/9b0bfba2a265b8108610b037945c004d8e81f2b4
commit: 9b0bfba2a265b8108610b037945c004d8e81f2b4
branch: main
author: Sam Gross <[email protected]>
committer: colesbury <[email protected]>
date: 2024-10-21T12:51:29-04:00
summary:
gh-124218: Use per-thread reference counting for globals and builtins (#125713)
Use per-thread refcounting for the reference from function objects to
the globals and builtins dictionaries.
files:
M Include/cpython/dictobject.h
M Include/internal/pycore_dict.h
M Include/internal/pycore_object.h
M Include/internal/pycore_uniqueid.h
M Objects/dictobject.c
M Objects/funcobject.c
M Objects/moduleobject.c
M Python/uniqueid.c
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index b113c7fdcf6515..78473e54898fa5 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -17,7 +17,9 @@ typedef struct {
/* This is a private field for CPython's internal use.
* Bits 0-7 are for dict watchers.
* Bits 8-11 are for the watched mutation counter (used by tier2
optimization)
- * The remaining bits are not currently used. */
+ * Bits 12-31 are currently unused
+ * Bits 32-63 are a unique id in the free threading build (used for
per-thread refcounting)
+ */
uint64_t _ma_watcher_tag;
PyDictKeysObject *ma_keys;
diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 1920724c1d4f57..1d185559b3ef43 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -229,6 +229,8 @@ static inline PyDictUnicodeEntry*
DK_UNICODE_ENTRIES(PyDictKeysObject *dk) {
#define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS +
DICT_WATCHED_MUTATION_BITS))
#define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1)
#define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS +
DICT_WATCHED_MUTATION_BITS)) - 1)
+#define DICT_UNIQUE_ID_SHIFT (32)
+#define DICT_UNIQUE_ID_MAX ((UINT64_C(1) << (64 - DICT_UNIQUE_ID_SHIFT)) - 1)
PyAPI_FUNC(void)
@@ -307,8 +309,40 @@ _PyInlineValuesSize(PyTypeObject *tp)
int
_PyDict_DetachFromObject(PyDictObject *dict, PyObject *obj);
+// Enables per-thread ref counting on this dict in the free threading build
+extern void _PyDict_EnablePerThreadRefcounting(PyObject *op);
+
PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *);
+// See `_Py_INCREF_TYPE()` in pycore_object.h
+#ifndef Py_GIL_DISABLED
+# define _Py_INCREF_DICT Py_INCREF
+# define _Py_DECREF_DICT Py_DECREF
+#else
+static inline Py_ssize_t
+_PyDict_UniqueId(PyDictObject *mp)
+{
+ // Offset by one so that _ma_watcher_tag=0 represents an unassigned id
+ return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1;
+}
+
+static inline void
+_Py_INCREF_DICT(PyObject *op)
+{
+ assert(PyDict_Check(op));
+ Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op);
+ _Py_THREAD_INCREF_OBJECT(op, id);
+}
+
+static inline void
+_Py_DECREF_DICT(PyObject *op)
+{
+ assert(PyDict_Check(op));
+ Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op);
+ _Py_THREAD_DECREF_OBJECT(op, id);
+}
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 96f6d61e1c620b..c7af720b1ce43d 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -293,6 +293,20 @@ extern PyStatus _PyObject_InitState(PyInterpreterState
*interp);
extern void _PyObject_FiniState(PyInterpreterState *interp);
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
+// Macros used for per-thread reference counting in the free threading build.
+// They resolve to normal Py_INCREF/DECREF calls in the default build.
+//
+// The macros are used for only a few references that would otherwise cause
+// scaling bottlenecks in the free threading build:
+// - The reference from an object to `ob_type`.
+// - The reference from a function to `func_code`.
+// - The reference from a function to `func_globals` and `func_builtins`.
+//
+// It's safe, but not performant or necessary, to use these macros for other
+// references to code, type, or dict objects. It's also safe to mix their
+// usage with normal Py_INCREF/DECREF calls.
+//
+// See also Include/internal/pycore_dict.h for _Py_INCREF_DICT/_Py_DECREF_DICT.
#ifndef Py_GIL_DISABLED
# define _Py_INCREF_TYPE Py_INCREF
# define _Py_DECREF_TYPE Py_DECREF
diff --git a/Include/internal/pycore_uniqueid.h
b/Include/internal/pycore_uniqueid.h
index ad5dd38ea08483..d3db49ddb78103 100644
--- a/Include/internal/pycore_uniqueid.h
+++ b/Include/internal/pycore_uniqueid.h
@@ -48,6 +48,9 @@ struct _Py_unique_id_pool {
// Assigns the next id from the pool of ids.
extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj);
+// Releases the allocated id back to the pool.
+extern void _PyObject_ReleaseUniqueId(Py_ssize_t unique_id);
+
// Releases the allocated id back to the pool.
extern void _PyObject_DisablePerThreadRefcounting(PyObject *obj);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 806096f5814062..c4e11a3e9c0bc7 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1636,6 +1636,24 @@ _PyDict_MaybeUntrack(PyObject *op)
_PyObject_GC_UNTRACK(op);
}
+void
+_PyDict_EnablePerThreadRefcounting(PyObject *op)
+{
+ assert(PyDict_Check(op));
+#ifdef Py_GIL_DISABLED
+ Py_ssize_t id = _PyObject_AssignUniqueId(op);
+ if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) {
+ _PyObject_ReleaseUniqueId(id);
+ return;
+ }
+
+ PyDictObject *mp = (PyDictObject *)op;
+ assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0);
+ // Plus 1 so that _ma_watcher_tag=0 represents an unassigned id
+ mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT;
+#endif
+}
+
static inline int
is_unusable_slot(Py_ssize_t ix)
{
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 3cb247691386bf..44fb4ac0907d7b 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals()
+#include "pycore_dict.h" // _Py_INCREF_DICT()
#include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
@@ -112,8 +113,15 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr)
Py_XDECREF(module);
return NULL;
}
- op->func_globals = Py_NewRef(constr->fc_globals);
- op->func_builtins = Py_NewRef(constr->fc_builtins);
+ _Py_INCREF_DICT(constr->fc_globals);
+ op->func_globals = constr->fc_globals;
+ if (PyDict_Check(constr->fc_builtins)) {
+ _Py_INCREF_DICT(constr->fc_builtins);
+ }
+ else {
+ Py_INCREF(constr->fc_builtins);
+ }
+ op->func_builtins = constr->fc_builtins;
op->func_name = Py_NewRef(constr->fc_name);
op->func_qualname = Py_NewRef(constr->fc_qualname);
_Py_INCREF_CODE((PyCodeObject *)constr->fc_code);
@@ -143,7 +151,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject
*globals, PyObject *qualname
{
assert(globals != NULL);
assert(PyDict_Check(globals));
- Py_INCREF(globals);
+ _Py_INCREF_DICT(globals);
PyThreadState *tstate = _PyThreadState_GET();
@@ -184,7 +192,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject
*globals, PyObject *qualname
if (builtins == NULL) {
goto error;
}
- Py_INCREF(builtins);
+ if (PyDict_Check(builtins)) {
+ _Py_INCREF_DICT(builtins);
+ }
+ else {
+ Py_INCREF(builtins);
+ }
PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
if (op == NULL) {
@@ -1057,8 +1070,21 @@ func_clear(PyObject *self)
{
PyFunctionObject *op = _PyFunction_CAST(self);
func_clear_version(_PyInterpreterState_GET(), op);
- Py_CLEAR(op->func_globals);
- Py_CLEAR(op->func_builtins);
+ PyObject *globals = op->func_globals;
+ op->func_globals = NULL;
+ if (globals != NULL) {
+ _Py_DECREF_DICT(globals);
+ }
+ PyObject *builtins = op->func_builtins;
+ op->func_builtins = NULL;
+ if (builtins != NULL) {
+ if (PyDict_Check(builtins)) {
+ _Py_DECREF_DICT(builtins);
+ }
+ else {
+ Py_DECREF(builtins);
+ }
+ }
Py_CLEAR(op->func_module);
Py_CLEAR(op->func_defaults);
Py_CLEAR(op->func_kwdefaults);
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index f63ae4e048bcd9..c06badd5f3edfe 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_call.h" // _PyObject_CallNoArgs()
+#include "pycore_dict.h" // _PyDict_EnablePerThreadRefcounting()
#include "pycore_fileutils.h" // _Py_wgetcwd
#include "pycore_interp.h" // PyInterpreterState.importlib
#include "pycore_long.h" // _PyLong_GetOne()
@@ -105,7 +106,7 @@ new_module_notrack(PyTypeObject *mt)
static void
track_module(PyModuleObject *m)
{
- _PyObject_SetDeferredRefcount(m->md_dict);
+ _PyDict_EnablePerThreadRefcounting(m->md_dict);
PyObject_GC_Track(m->md_dict);
_PyObject_SetDeferredRefcount((PyObject *)m);
diff --git a/Python/uniqueid.c b/Python/uniqueid.c
index 0cbb35c6cd2f8b..b9f30713feeb57 100644
--- a/Python/uniqueid.c
+++ b/Python/uniqueid.c
@@ -1,5 +1,6 @@
#include "Python.h"
+#include "pycore_dict.h" // _PyDict_UniqueId()
#include "pycore_lock.h" // PyMutex_LockFlags()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_object.h" // _Py_IncRefTotal
@@ -98,8 +99,8 @@ _PyObject_AssignUniqueId(PyObject *obj)
return unique_id;
}
-static void
-release_unique_id(Py_ssize_t unique_id)
+void
+_PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_unique_id_pool *pool = &interp->unique_ids;
@@ -128,6 +129,11 @@ clear_unique_id(PyObject *obj)
id = co->_co_unique_id;
co->_co_unique_id = -1;
}
+ else if (PyDict_Check(obj)) {
+ PyDictObject *mp = (PyDictObject *)obj;
+ id = _PyDict_UniqueId(mp);
+ mp->_ma_watcher_tag &= ~(UINT64_MAX << DICT_UNIQUE_ID_SHIFT);
+ }
return id;
}
@@ -136,7 +142,7 @@ _PyObject_DisablePerThreadRefcounting(PyObject *obj)
{
Py_ssize_t id = clear_unique_id(obj);
if (id >= 0) {
- release_unique_id(id);
+ _PyObject_ReleaseUniqueId(id);
}
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]