bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20… · python/cpython@35d6540

@@ -214,6 +214,22 @@ extern "C" {

214214

# define OVERALLOCATE_FACTOR 4

215215

#endif

216216217+

/* bpo-40521: Interned strings are shared by all interpreters. */

218+

#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS

219+

# define INTERNED_STRINGS

220+

#endif

221+222+

/* This dictionary holds all interned unicode strings. Note that references

223+

to strings in this dictionary are *not* counted in the string's ob_refcnt.

224+

When the interned string reaches a refcnt of 0 the string deallocation

225+

function will delete the reference from this dictionary.

226+227+

Another way to look at this is that to say that the actual reference

228+

count of a string is: s->ob_refcnt + (s->state ? 2 : 0)

229+

*/

230+

#ifdef INTERNED_STRINGS

231+

static PyObject *interned = NULL;

232+

#endif

217233218234

/* Forward declaration */

219235

static inline int

@@ -1950,20 +1966,21 @@ unicode_dealloc(PyObject *unicode)

1950196619511967

case SSTATE_INTERNED_MORTAL:

19521968

{

1953-

struct _Py_unicode_state *state = get_unicode_state();

1969+

#ifdef INTERNED_STRINGS

19541970

/* Revive the dead object temporarily. PyDict_DelItem() removes two

19551971

references (key and value) which were ignored by

19561972

PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2

19571973

to prevent calling unicode_dealloc() again. Adjust refcnt after

19581974

PyDict_DelItem(). */

19591975

assert(Py_REFCNT(unicode) == 0);

19601976

Py_SET_REFCNT(unicode, 3);

1961-

if (PyDict_DelItem(state->interned, unicode) != 0) {

1977+

if (PyDict_DelItem(interned, unicode) != 0) {

19621978

_PyErr_WriteUnraisableMsg("deletion of interned string failed",

19631979

NULL);

19641980

}

19651981

assert(Py_REFCNT(unicode) == 1);

19661982

Py_SET_REFCNT(unicode, 0);

1983+

#endif

19671984

break;

19681985

}

19691986

@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)

1134211359

if (PyUnicode_CHECK_INTERNED(left))

1134311360

return 0;

113441136111362+

#ifdef INTERNED_STRINGS

1134511363

assert(_PyUnicode_HASH(right_uni) != -1);

1134611364

Py_hash_t hash = _PyUnicode_HASH(left);

1134711365

if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {

1134811366

return 0;

1134911367

}

11368+

#endif

11350113691135111370

return unicode_compare_eq(left, right_uni);

1135211371

}

@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)

1559115610

return;

1559215611

}

155931561215613+

#ifdef INTERNED_STRINGS

1559415614

if (PyUnicode_READY(s) == -1) {

1559515615

PyErr_Clear();

1559615616

return;

1559715617

}

155981561815599-

struct _Py_unicode_state *state = get_unicode_state();

15600-

if (state->interned == NULL) {

15601-

state->interned = PyDict_New();

15602-

if (state->interned == NULL) {

15619+

if (interned == NULL) {

15620+

interned = PyDict_New();

15621+

if (interned == NULL) {

1560315622

PyErr_Clear(); /* Don't leave an exception */

1560415623

return;

1560515624

}

1560615625

}

156071562615608-

PyObject *t = PyDict_SetDefault(state->interned, s, s);

15627+

PyObject *t = PyDict_SetDefault(interned, s, s);

1560915628

if (t == NULL) {

1561015629

PyErr_Clear();

1561115630

return;

@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)

1562215641

this. */

1562315642

Py_SET_REFCNT(s, Py_REFCNT(s) - 2);

1562415643

_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;

15644+

#else

15645+

// PyDict expects that interned strings have their hash

15646+

// (PyASCIIObject.hash) already computed.

15647+

(void)unicode_hash(s);

15648+

#endif

1562515649

}

156261565015627-1562815651

void

1562915652

PyUnicode_InternImmortal(PyObject **p)

1563015653

{

@@ -15658,25 +15681,29 @@ PyUnicode_InternFromString(const char *cp)

1565815681

void

1565915682

_PyUnicode_ClearInterned(PyInterpreterState *interp)

1566015683

{

15661-

struct _Py_unicode_state *state = &interp->unicode;

15662-

if (state->interned == NULL) {

15684+

if (!_Py_IsMainInterpreter(interp)) {

15685+

// interned dict is shared by all interpreters

1566315686

return;

1566415687

}

15665-

assert(PyDict_CheckExact(state->interned));

15688+15689+

if (interned == NULL) {

15690+

return;

15691+

}

15692+

assert(PyDict_CheckExact(interned));

15666156931566715694

/* Interned unicode strings are not forcibly deallocated; rather, we give

1566815695

them their stolen references back, and then clear and DECREF the

1566915696

interned dict. */

15670156971567115698

#ifdef INTERNED_STATS

1567215699

fprintf(stderr, "releasing %zd interned strings\n",

15673-

PyDict_GET_SIZE(state->interned));

15700+

PyDict_GET_SIZE(interned));

15674157011567515702

Py_ssize_t immortal_size = 0, mortal_size = 0;

1567615703

#endif

1567715704

Py_ssize_t pos = 0;

1567815705

PyObject *s, *ignored_value;

15679-

while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {

15706+

while (PyDict_Next(interned, &pos, &s, &ignored_value)) {

1568015707

assert(PyUnicode_IS_READY(s));

15681157081568215709

switch (PyUnicode_CHECK_INTERNED(s)) {

@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)

1570715734

mortal_size, immortal_size);

1570815735

#endif

157091573615710-

PyDict_Clear(state->interned);

15711-

Py_CLEAR(state->interned);

15737+

PyDict_Clear(interned);

15738+

Py_CLEAR(interned);

1571215739

}

15713157401571415741

@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)

1607916106

static inline int

1608016107

unicode_is_finalizing(void)

1608116108

{

16082-

struct _Py_unicode_state *state = get_unicode_state();

16083-

return (state->interned == NULL);

16109+

return (interned == NULL);

1608416110

}

1608516111

#endif

1608616112

@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)

1609016116

{

1609116117

struct _Py_unicode_state *state = &interp->unicode;

160921611816093-

// _PyUnicode_ClearInterned() must be called before

16094-

assert(state->interned == NULL);

16119+

if (_Py_IsMainInterpreter(interp)) {

16120+

// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()

16121+

assert(interned == NULL);

16122+

}

16095161231609616124

_PyUnicode_FiniEncodings(&state->fs_codec);

1609716125