bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20… · python/cpython@35d6540
@@ -214,6 +214,22 @@ extern "C" {
214214# define OVERALLOCATE_FACTOR 4
215215#endif
216216217+/* bpo-40521: Interned strings are shared by all interpreters. */
218+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
219+# define INTERNED_STRINGS
220+#endif
221+222+/* This dictionary holds all interned unicode strings. Note that references
223+ to strings in this dictionary are *not* counted in the string's ob_refcnt.
224+ When the interned string reaches a refcnt of 0 the string deallocation
225+ function will delete the reference from this dictionary.
226+227+ Another way to look at this is that to say that the actual reference
228+ count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
229+*/
230+#ifdef INTERNED_STRINGS
231+static PyObject *interned = NULL;
232+#endif
217233218234/* Forward declaration */
219235static inline int
@@ -1950,20 +1966,21 @@ unicode_dealloc(PyObject *unicode)
1950196619511967case SSTATE_INTERNED_MORTAL:
19521968 {
1953- struct _Py_unicode_state *state = get_unicode_state();
1969+#ifdef INTERNED_STRINGS
19541970/* Revive the dead object temporarily. PyDict_DelItem() removes two
19551971 references (key and value) which were ignored by
19561972 PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19571973 to prevent calling unicode_dealloc() again. Adjust refcnt after
19581974 PyDict_DelItem(). */
19591975assert(Py_REFCNT(unicode) == 0);
19601976Py_SET_REFCNT(unicode, 3);
1961-if (PyDict_DelItem(state->interned, unicode) != 0) {
1977+if (PyDict_DelItem(interned, unicode) != 0) {
19621978_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19631979NULL);
19641980 }
19651981assert(Py_REFCNT(unicode) == 1);
19661982Py_SET_REFCNT(unicode, 0);
1983+#endif
19671984break;
19681985 }
19691986@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1134211359if (PyUnicode_CHECK_INTERNED(left))
1134311360return 0;
113441136111362+#ifdef INTERNED_STRINGS
1134511363assert(_PyUnicode_HASH(right_uni) != -1);
1134611364Py_hash_t hash = _PyUnicode_HASH(left);
1134711365if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1134811366return 0;
1134911367 }
11368+#endif
11350113691135111370return unicode_compare_eq(left, right_uni);
1135211371}
@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
1559115610return;
1559215611 }
155931561215613+#ifdef INTERNED_STRINGS
1559415614if (PyUnicode_READY(s) == -1) {
1559515615PyErr_Clear();
1559615616return;
1559715617 }
155981561815599-struct _Py_unicode_state *state = get_unicode_state();
15600-if (state->interned == NULL) {
15601-state->interned = PyDict_New();
15602-if (state->interned == NULL) {
15619+if (interned == NULL) {
15620+interned = PyDict_New();
15621+if (interned == NULL) {
1560315622PyErr_Clear(); /* Don't leave an exception */
1560415623return;
1560515624 }
1560615625 }
156071562615608-PyObject *t = PyDict_SetDefault(state->interned, s, s);
15627+PyObject *t = PyDict_SetDefault(interned, s, s);
1560915628if (t == NULL) {
1561015629PyErr_Clear();
1561115630return;
@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
1562215641 this. */
1562315642Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1562415643_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15644+#else
15645+// PyDict expects that interned strings have their hash
15646+// (PyASCIIObject.hash) already computed.
15647+ (void)unicode_hash(s);
15648+#endif
1562515649}
156261565015627-1562815651void
1562915652PyUnicode_InternImmortal(PyObject **p)
1563015653{
@@ -15658,25 +15681,29 @@ PyUnicode_InternFromString(const char *cp)
1565815681void
1565915682_PyUnicode_ClearInterned(PyInterpreterState *interp)
1566015683{
15661-struct _Py_unicode_state *state = &interp->unicode;
15662-if (state->interned == NULL) {
15684+if (!_Py_IsMainInterpreter(interp)) {
15685+ // interned dict is shared by all interpreters
1566315686return;
1566415687 }
15665-assert(PyDict_CheckExact(state->interned));
15688+15689+if (interned == NULL) {
15690+return;
15691+ }
15692+assert(PyDict_CheckExact(interned));
15666156931566715694/* Interned unicode strings are not forcibly deallocated; rather, we give
1566815695 them their stolen references back, and then clear and DECREF the
1566915696 interned dict. */
15670156971567115698#ifdef INTERNED_STATS
1567215699fprintf(stderr, "releasing %zd interned strings\n",
15673-PyDict_GET_SIZE(state->interned));
15700+PyDict_GET_SIZE(interned));
15674157011567515702Py_ssize_t immortal_size = 0, mortal_size = 0;
1567615703#endif
1567715704Py_ssize_t pos = 0;
1567815705PyObject *s, *ignored_value;
15679-while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
15706+while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
1568015707assert(PyUnicode_IS_READY(s));
15681157081568215709switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1570715734mortal_size, immortal_size);
1570815735#endif
157091573615710-PyDict_Clear(state->interned);
15711-Py_CLEAR(state->interned);
15737+PyDict_Clear(interned);
15738+Py_CLEAR(interned);
1571215739}
15713157401571415741@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1607916106static inline int
1608016107unicode_is_finalizing(void)
1608116108{
16082-struct _Py_unicode_state *state = get_unicode_state();
16083-return (state->interned == NULL);
16109+return (interned == NULL);
1608416110}
1608516111#endif
1608616112@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1609016116{
1609116117struct _Py_unicode_state *state = &interp->unicode;
160921611816093-// _PyUnicode_ClearInterned() must be called before
16094-assert(state->interned == NULL);
16119+if (_Py_IsMainInterpreter(interp)) {
16120+// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121+assert(interned == NULL);
16122+ }
16095161231609616124_PyUnicode_FiniEncodings(&state->fs_codec);
1609716125