bpo-40521: Disable Unicode caches in isolated subinterpreters (GH-19933) · python/cpython@607b102

@@ -198,6 +198,11 @@ extern "C" {

198198

# define OVERALLOCATE_FACTOR 4

199199

#endif

200200201+

/* bpo-40521: Interned strings are shared by all interpreters. */

202+

#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS

203+

# define INTERNED_STRINGS

204+

#endif

205+201206

/* This dictionary holds all interned unicode strings. Note that references

202207

to strings in this dictionary are *not* counted in the string's ob_refcnt.

203208

When the interned string reaches a refcnt of 0 the string deallocation

@@ -206,7 +211,9 @@ extern "C" {

206211

Another way to look at this is that to say that the actual reference

207212

count of a string is: s->ob_refcnt + (s->state ? 2 : 0)

208213

*/

214+

#ifdef INTERNED_STRINGS

209215

static PyObject *interned = NULL;

216+

#endif

210217211218

/* The empty Unicode object is shared to improve performance. */

212219

static PyObject *unicode_empty = NULL;

@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,

281288

/* List of static strings. */

282289

static _Py_Identifier *static_strings = NULL;

283290291+

/* bpo-40521: Latin1 singletons are shared by all interpreters. */

292+

#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS

293+

# define LATIN1_SINGLETONS

294+

#endif

295+296+

#ifdef LATIN1_SINGLETONS

284297

/* Single character Unicode strings in the Latin-1 range are being

285298

shared as well. */

286299

static PyObject *unicode_latin1[256] = {NULL};

300+

#endif

287301288302

/* Fast detection of the most frequent whitespace characters */

289303

const unsigned char _Py_ascii_whitespace[] = {

@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)

662676

return unicode_empty;

663677

}

664678679+

#ifdef LATIN1_SINGLETONS

665680

if (length == 1) {

666681

const void *data = PyUnicode_DATA(unicode);

667682

int kind = PyUnicode_KIND(unicode);

@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)

683698

}

684699

}

685700

}

701+

#endif

686702687703

assert(_PyUnicode_CheckConsistency(unicode, 1));

688704

return unicode;

@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)

19131929

case SSTATE_INTERNED_MORTAL:

19141930

/* revive dead object temporarily for DelItem */

19151931

Py_SET_REFCNT(unicode, 3);

1932+

#ifdef INTERNED_STRINGS

19161933

if (PyDict_DelItem(interned, unicode) != 0) {

19171934

_PyErr_WriteUnraisableMsg("deletion of interned string failed",

19181935

NULL);

19191936

}

1937+

#endif

19201938

break;

1921193919221940

case SSTATE_INTERNED_IMMORTAL:

@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)

19441962

static int

19451963

unicode_is_singleton(PyObject *unicode)

19461964

{

1947-

PyASCIIObject *ascii = (PyASCIIObject *)unicode;

1948-

if (unicode == unicode_empty)

1965+

if (unicode == unicode_empty) {

19491966

return 1;

1967+

}

1968+

#ifdef LATIN1_SINGLETONS

1969+

PyASCIIObject *ascii = (PyASCIIObject *)unicode;

19501970

if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)

19511971

{

19521972

Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);

19531973

if (ch < 256 && unicode_latin1[ch] == unicode)

19541974

return 1;

19551975

}

1976+

#endif

19561977

return 0;

19571978

}

19581979

#endif

@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,

20942115

static PyObject*

20952116

get_latin1_char(unsigned char ch)

20962117

{

2097-

PyObject *unicode = unicode_latin1[ch];

2118+

PyObject *unicode;

2119+2120+

#ifdef LATIN1_SINGLETONS

2121+

unicode = unicode_latin1[ch];

2122+

if (unicode) {

2123+

Py_INCREF(unicode);

2124+

return unicode;

2125+

}

2126+

#endif

2127+2128+

unicode = PyUnicode_New(1, ch);

20982129

if (!unicode) {

2099-

unicode = PyUnicode_New(1, ch);

2100-

if (!unicode)

2101-

return NULL;

2102-

PyUnicode_1BYTE_DATA(unicode)[0] = ch;

2103-

assert(_PyUnicode_CheckConsistency(unicode, 1));

2104-

unicode_latin1[ch] = unicode;

2130+

return NULL;

21052131

}

2132+2133+

PyUnicode_1BYTE_DATA(unicode)[0] = ch;

2134+

assert(_PyUnicode_CheckConsistency(unicode, 1));

2135+2136+

#ifdef LATIN1_SINGLETONS

21062137

Py_INCREF(unicode);

2138+

unicode_latin1[ch] = unicode;

2139+

#endif

21072140

return unicode;

21082141

}

21092142

@@ -11270,7 +11303,6 @@ int

1127011303

_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)

1127111304

{

1127211305

PyObject *right_uni;

11273-

Py_hash_t hash;

11274113061127511307

assert(_PyUnicode_CHECK(left));

1127611308

assert(right->string);

@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)

1130211334

if (PyUnicode_CHECK_INTERNED(left))

1130311335

return 0;

113041133611337+

#ifdef INTERNED_STRINGS

1130511338

assert(_PyUnicode_HASH(right_uni) != -1);

11306-

hash = _PyUnicode_HASH(left);

11339+

Py_hash_t hash = _PyUnicode_HASH(left);

1130711340

if (hash != -1 && hash != _PyUnicode_HASH(right_uni))

1130811341

return 0;

11342+

#endif

11309113431131011344

return unicode_compare_eq(left, right_uni);

1131111345

}

@@ -15487,43 +15521,55 @@ void

1548715521

PyUnicode_InternInPlace(PyObject **p)

1548815522

{

1548915523

PyObject *s = *p;

15490-

PyObject *t;

1549115524

#ifdef Py_DEBUG

1549215525

assert(s != NULL);

1549315526

assert(_PyUnicode_CHECK(s));

1549415527

#else

15495-

if (s == NULL || !PyUnicode_Check(s))

15528+

if (s == NULL || !PyUnicode_Check(s)) {

1549615529

return;

15530+

}

1549715531

#endif

15532+1549815533

/* If it's a subclass, we don't really know what putting

1549915534

it in the interned dict might do. */

15500-

if (!PyUnicode_CheckExact(s))

15535+

if (!PyUnicode_CheckExact(s)) {

1550115536

return;

15502-

if (PyUnicode_CHECK_INTERNED(s))

15537+

}

15538+15539+

if (PyUnicode_CHECK_INTERNED(s)) {

1550315540

return;

15541+

}

15542+15543+

#ifdef INTERNED_STRINGS

1550415544

if (interned == NULL) {

1550515545

interned = PyDict_New();

1550615546

if (interned == NULL) {

1550715547

PyErr_Clear(); /* Don't leave an exception */

1550815548

return;

1550915549

}

1551015550

}

15551+15552+

PyObject *t;

1551115553

Py_ALLOW_RECURSION

1551215554

t = PyDict_SetDefault(interned, s, s);

1551315555

Py_END_ALLOW_RECURSION

15556+1551415557

if (t == NULL) {

1551515558

PyErr_Clear();

1551615559

return;

1551715560

}

15561+1551815562

if (t != s) {

1551915563

Py_INCREF(t);

1552015564

Py_SETREF(*p, t);

1552115565

return;

1552215566

}

15567+1552315568

/* The two references in interned are not counted by refcnt.

1552415569

The deallocator will take care of this */

1552515570

Py_SET_REFCNT(s, Py_REFCNT(s) - 2);

1552615571

_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;

15572+

#endif

1552715573

}

15528155741552915575

void

@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)

16109161551611016156

Py_CLEAR(unicode_empty);

161111615716158+

#ifdef LATIN1_SINGLETONS

1611216159

for (Py_ssize_t i = 0; i < 256; i++) {

1611316160

Py_CLEAR(unicode_latin1[i]);

1611416161

}

16162+

#endif

1611516163

_PyUnicode_ClearStaticStrings();

1611616164

}

1611716165