[3.10] bpo-46006: Revert "bpo-40521: Per-interpreter interned strings… · python/cpython@72c260c
@@ -211,6 +211,22 @@ extern "C" {
211211# define OVERALLOCATE_FACTOR 4
212212#endif
213213214+/* bpo-40521: Interned strings are shared by all interpreters. */
215+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
216+# define INTERNED_STRINGS
217+#endif
218+219+/* This dictionary holds all interned unicode strings. Note that references
220+ to strings in this dictionary are *not* counted in the string's ob_refcnt.
221+ When the interned string reaches a refcnt of 0 the string deallocation
222+ function will delete the reference from this dictionary.
223+224+ Another way to look at this is that to say that the actual reference
225+ count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
226+*/
227+#ifdef INTERNED_STRINGS
228+static PyObject *interned = NULL;
229+#endif
214230215231static struct _Py_unicode_state*
216232get_unicode_state(void)
@@ -1936,20 +1952,21 @@ unicode_dealloc(PyObject *unicode)
1936195219371953case SSTATE_INTERNED_MORTAL:
19381954 {
1939- struct _Py_unicode_state *state = get_unicode_state();
1955+#ifdef INTERNED_STRINGS
19401956/* Revive the dead object temporarily. PyDict_DelItem() removes two
19411957 references (key and value) which were ignored by
19421958 PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19431959 to prevent calling unicode_dealloc() again. Adjust refcnt after
19441960 PyDict_DelItem(). */
19451961assert(Py_REFCNT(unicode) == 0);
19461962Py_SET_REFCNT(unicode, 3);
1947-if (PyDict_DelItem(state->interned, unicode) != 0) {
1963+if (PyDict_DelItem(interned, unicode) != 0) {
19481964_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19491965NULL);
19501966 }
19511967assert(Py_REFCNT(unicode) == 1);
19521968Py_SET_REFCNT(unicode, 0);
1969+#endif
19531970break;
19541971 }
19551972@@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1160011617if (PyUnicode_CHECK_INTERNED(left))
1160111618return 0;
116021161911620+#ifdef INTERNED_STRINGS
1160311621assert(_PyUnicode_HASH(right_uni) != -1);
1160411622Py_hash_t hash = _PyUnicode_HASH(left);
1160511623if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1160611624return 0;
1160711625 }
11626+#endif
11608116271160911628return unicode_compare_eq(left, right_uni);
1161011629}
@@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)
1583315852return;
1583415853 }
158351585415855+#ifdef INTERNED_STRINGS
1583615856if (PyUnicode_READY(s) == -1) {
1583715857PyErr_Clear();
1583815858return;
1583915859 }
158401586015841-struct _Py_unicode_state *state = get_unicode_state();
15842-if (state->interned == NULL) {
15843-state->interned = PyDict_New();
15844-if (state->interned == NULL) {
15861+if (interned == NULL) {
15862+interned = PyDict_New();
15863+if (interned == NULL) {
1584515864PyErr_Clear(); /* Don't leave an exception */
1584615865return;
1584715866 }
1584815867 }
158491586815850-PyObject *t = PyDict_SetDefault(state->interned, s, s);
15869+PyObject *t = PyDict_SetDefault(interned, s, s);
1585115870if (t == NULL) {
1585215871PyErr_Clear();
1585315872return;
@@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)
1586415883 this. */
1586515884Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1586615885_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15886+#else
15887+// PyDict expects that interned strings have their hash
15888+// (PyASCIIObject.hash) already computed.
15889+ (void)unicode_hash(s);
15890+#endif
1586715891}
158681589215869-1587015893void
1587115894PyUnicode_InternImmortal(PyObject **p)
1587215895{
@@ -15900,25 +15923,29 @@ PyUnicode_InternFromString(const char *cp)
1590015923void
1590115924_PyUnicode_ClearInterned(PyInterpreterState *interp)
1590215925{
15903-struct _Py_unicode_state *state = &interp->unicode;
15904-if (state->interned == NULL) {
15926+if (!_Py_IsMainInterpreter(interp)) {
15927+// interned dict is shared by all interpreters
15928+return;
15929+ }
15930+15931+if (interned == NULL) {
1590515932return;
1590615933 }
15907-assert(PyDict_CheckExact(state->interned));
15934+assert(PyDict_CheckExact(interned));
15908159351590915936/* Interned unicode strings are not forcibly deallocated; rather, we give
1591015937 them their stolen references back, and then clear and DECREF the
1591115938 interned dict. */
15912159391591315940#ifdef INTERNED_STATS
1591415941fprintf(stderr, "releasing %zd interned strings\n",
15915-PyDict_GET_SIZE(state->interned));
15942+PyDict_GET_SIZE(interned));
15916159431591715944Py_ssize_t immortal_size = 0, mortal_size = 0;
1591815945#endif
1591915946Py_ssize_t pos = 0;
1592015947PyObject *s, *ignored_value;
15921-while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
15948+while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
1592215949assert(PyUnicode_IS_READY(s));
15923159501592415951switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1594915976mortal_size, immortal_size);
1595015977#endif
159511597815952-PyDict_Clear(state->interned);
15953-Py_CLEAR(state->interned);
15979+PyDict_Clear(interned);
15980+Py_CLEAR(interned);
1595415981}
15955159821595615983@@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1632216349{
1632316350struct _Py_unicode_state *state = &interp->unicode;
163241635116325-// _PyUnicode_ClearInterned() must be called before
16326-assert(state->interned == NULL);
16352+if (_Py_IsMainInterpreter(interp)) {
16353+// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16354+assert(interned == NULL);
16355+ }
16327163561632816357_PyUnicode_FiniEncodings(&state->fs_codec);
1632916358