bpo-24214: Fixed the UTF-8 incremental decoder. (GH-12603) · python/cpython@7a465cb

File tree

3 files changed

lines changed

  • Misc/NEWS.d/next/Core and Builtins

3 files changed

lines changed

Original file line numberDiff line numberDiff line change

@@ -406,6 +406,15 @@ def test_lone_surrogates(self):

406406

self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"),

407407

before + backslashreplace + after)

408408
409+

def test_incremental_surrogatepass(self):

410+

# Test incremental decoder for surrogatepass handler:

411+

# see issue #24214

412+

data = '\uD901'.encode(self.encoding, 'surrogatepass')

413+

for i in range(1, len(data)):

414+

dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')

415+

self.assertEqual(dec.decode(data[:i]), '')

416+

self.assertEqual(dec.decode(data[i:], True), '\uD901')

417+
409418
410419

class UTF32Test(ReadTest, unittest.TestCase):

411420

encoding = "utf-32"

Original file line numberDiff line numberDiff line change

@@ -0,0 +1,2 @@

1+

Fixed support of the surrogatepass error handler in the UTF-8 incremental

2+

decoder.

Original file line numberDiff line numberDiff line change

@@ -4883,6 +4883,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,

48834883

case 2:

48844884

case 3:

48854885

case 4:

4886+

if (s == end || consumed) {

4887+

goto End;

4888+

}

48864889

errmsg = "invalid continuation byte";

48874890

startinpos = s - starts;

48884891

endinpos = startinpos + ch - 1;