[3.10] bpo-46006: Revert "bpo-40521: Per-interpreter interned strings… · python/cpython@72c260c

@@ -211,6 +211,22 @@ extern "C" {

211211

# define OVERALLOCATE_FACTOR 4

212212

#endif

213213214+

/* bpo-40521: Interned strings are shared by all interpreters. */

215+

#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS

216+

# define INTERNED_STRINGS

217+

#endif

218+219+

/* This dictionary holds all interned unicode strings. Note that references

220+

to strings in this dictionary are *not* counted in the string's ob_refcnt.

221+

When the interned string reaches a refcnt of 0 the string deallocation

222+

function will delete the reference from this dictionary.

223+224+

Another way to look at this is that to say that the actual reference

225+

count of a string is: s->ob_refcnt + (s->state ? 2 : 0)

226+

*/

227+

#ifdef INTERNED_STRINGS

228+

static PyObject *interned = NULL;

229+

#endif

214230215231

static struct _Py_unicode_state*

216232

get_unicode_state(void)

@@ -1936,20 +1952,21 @@ unicode_dealloc(PyObject *unicode)

1936195219371953

case SSTATE_INTERNED_MORTAL:

19381954

{

1939-

struct _Py_unicode_state *state = get_unicode_state();

1955+

#ifdef INTERNED_STRINGS

19401956

/* Revive the dead object temporarily. PyDict_DelItem() removes two

19411957

references (key and value) which were ignored by

19421958

PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2

19431959

to prevent calling unicode_dealloc() again. Adjust refcnt after

19441960

PyDict_DelItem(). */

19451961

assert(Py_REFCNT(unicode) == 0);

19461962

Py_SET_REFCNT(unicode, 3);

1947-

if (PyDict_DelItem(state->interned, unicode) != 0) {

1963+

if (PyDict_DelItem(interned, unicode) != 0) {

19481964

_PyErr_WriteUnraisableMsg("deletion of interned string failed",

19491965

NULL);

19501966

}

19511967

assert(Py_REFCNT(unicode) == 1);

19521968

Py_SET_REFCNT(unicode, 0);

1969+

#endif

19531970

break;

19541971

}

19551972

@@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)

1160011617

if (PyUnicode_CHECK_INTERNED(left))

1160111618

return 0;

116021161911620+

#ifdef INTERNED_STRINGS

1160311621

assert(_PyUnicode_HASH(right_uni) != -1);

1160411622

Py_hash_t hash = _PyUnicode_HASH(left);

1160511623

if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {

1160611624

return 0;

1160711625

}

11626+

#endif

11608116271160911628

return unicode_compare_eq(left, right_uni);

1161011629

}

@@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)

1583315852

return;

1583415853

}

158351585415855+

#ifdef INTERNED_STRINGS

1583615856

if (PyUnicode_READY(s) == -1) {

1583715857

PyErr_Clear();

1583815858

return;

1583915859

}

158401586015841-

struct _Py_unicode_state *state = get_unicode_state();

15842-

if (state->interned == NULL) {

15843-

state->interned = PyDict_New();

15844-

if (state->interned == NULL) {

15861+

if (interned == NULL) {

15862+

interned = PyDict_New();

15863+

if (interned == NULL) {

1584515864

PyErr_Clear(); /* Don't leave an exception */

1584615865

return;

1584715866

}

1584815867

}

158491586815850-

PyObject *t = PyDict_SetDefault(state->interned, s, s);

15869+

PyObject *t = PyDict_SetDefault(interned, s, s);

1585115870

if (t == NULL) {

1585215871

PyErr_Clear();

1585315872

return;

@@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)

1586415883

this. */

1586515884

Py_SET_REFCNT(s, Py_REFCNT(s) - 2);

1586615885

_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;

15886+

#else

15887+

// PyDict expects that interned strings have their hash

15888+

// (PyASCIIObject.hash) already computed.

15889+

(void)unicode_hash(s);

15890+

#endif

1586715891

}

158681589215869-1587015893

void

1587115894

PyUnicode_InternImmortal(PyObject **p)

1587215895

{

@@ -15900,25 +15923,29 @@ PyUnicode_InternFromString(const char *cp)

1590015923

void

1590115924

_PyUnicode_ClearInterned(PyInterpreterState *interp)

1590215925

{

15903-

struct _Py_unicode_state *state = &interp->unicode;

15904-

if (state->interned == NULL) {

15926+

if (!_Py_IsMainInterpreter(interp)) {

15927+

// interned dict is shared by all interpreters

15928+

return;

15929+

}

15930+15931+

if (interned == NULL) {

1590515932

return;

1590615933

}

15907-

assert(PyDict_CheckExact(state->interned));

15934+

assert(PyDict_CheckExact(interned));

15908159351590915936

/* Interned unicode strings are not forcibly deallocated; rather, we give

1591015937

them their stolen references back, and then clear and DECREF the

1591115938

interned dict. */

15912159391591315940

#ifdef INTERNED_STATS

1591415941

fprintf(stderr, "releasing %zd interned strings\n",

15915-

PyDict_GET_SIZE(state->interned));

15942+

PyDict_GET_SIZE(interned));

15916159431591715944

Py_ssize_t immortal_size = 0, mortal_size = 0;

1591815945

#endif

1591915946

Py_ssize_t pos = 0;

1592015947

PyObject *s, *ignored_value;

15921-

while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {

15948+

while (PyDict_Next(interned, &pos, &s, &ignored_value)) {

1592215949

assert(PyUnicode_IS_READY(s));

15923159501592415951

switch (PyUnicode_CHECK_INTERNED(s)) {

@@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)

1594915976

mortal_size, immortal_size);

1595015977

#endif

159511597815952-

PyDict_Clear(state->interned);

15953-

Py_CLEAR(state->interned);

15979+

PyDict_Clear(interned);

15980+

Py_CLEAR(interned);

1595415981

}

15955159821595615983

@@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)

1632216349

{

1632316350

struct _Py_unicode_state *state = &interp->unicode;

163241635116325-

// _PyUnicode_ClearInterned() must be called before

16326-

assert(state->interned == NULL);

16352+

if (_Py_IsMainInterpreter(interp)) {

16353+

// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()

16354+

assert(interned == NULL);

16355+

}

16327163561632816357

_PyUnicode_FiniEncodings(&state->fs_codec);

1632916358