bpo-24214: Fixed the UTF-8 incremental decoder. (GH-12603) (GH-12627) · python/cpython@bd48280

File tree

3 files changed

lines changed

  • Misc/NEWS.d/next/Core and Builtins

3 files changed

lines changed

Original file line numberDiff line numberDiff line change

@@ -401,6 +401,15 @@ def test_lone_surrogates(self):

401401

self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"),

402402

before + backslashreplace + after)

403403
404+

def test_incremental_surrogatepass(self):

405+

# Test incremental decoder for surrogatepass handler:

406+

# see issue #24214

407+

data = '\uD901'.encode(self.encoding, 'surrogatepass')

408+

for i in range(1, len(data)):

409+

dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')

410+

self.assertEqual(dec.decode(data[:i]), '')

411+

self.assertEqual(dec.decode(data[i:], True), '\uD901')

412+
404413
405414

class UTF32Test(ReadTest, unittest.TestCase):

406415

encoding = "utf-32"

Original file line numberDiff line numberDiff line change

@@ -0,0 +1,2 @@

1+

Fixed support of the surrogatepass error handler in the UTF-8 incremental

2+

decoder.

Original file line numberDiff line numberDiff line change

@@ -4890,6 +4890,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,

48904890

case 2:

48914891

case 3:

48924892

case 4:

4893+

if (s == end || consumed) {

4894+

goto End;

4895+

}

48934896

errmsg = "invalid continuation byte";

48944897

startinpos = s - starts;

48954898

endinpos = startinpos + ch - 1;