bpo-36297: remove "unicode_internal" codec (GH-12342) · python/cpython@6a16b18

@@ -1239,16 +1239,6 @@ def test_errors(self):

12391239

self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))

12401240124112411242-

class RecodingTest(unittest.TestCase):

1243-

def test_recoding(self):

1244-

f = io.BytesIO()

1245-

with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:

1246-

f2.write("a")

1247-

# Python used to crash on this at exit because of a refcount

1248-

# bug in _codecsmodule.c

1249-1250-

self.assertTrue(f.closed)

1251-12521242

# From RFC 3492

12531243

punycode_testcases = [

12541244

# A Arabic (Egyptian):

@@ -1378,87 +1368,6 @@ def test_decode(self):

13781368

self.assertEqual(uni, puny.decode("punycode"))

13791369138013701381-

class UnicodeInternalTest(unittest.TestCase):

1382-

@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')

1383-

def test_bug1251300(self):

1384-

# Decoding with unicode_internal used to not correctly handle "code

1385-

# points" above 0x10ffff on UCS-4 builds.

1386-

ok = [

1387-

(b"\x00\x10\xff\xff", "\U0010ffff"),

1388-

(b"\x00\x00\x01\x01", "\U00000101"),

1389-

(b"", ""),

1390-

]

1391-

not_ok = [

1392-

b"\x7f\xff\xff\xff",

1393-

b"\x80\x00\x00\x00",

1394-

b"\x81\x00\x00\x00",

1395-

b"\x00",

1396-

b"\x00\x00\x00\x00\x00",

1397-

]

1398-

for internal, uni in ok:

1399-

if sys.byteorder == "little":

1400-

internal = bytes(reversed(internal))

1401-

with support.check_warnings():

1402-

self.assertEqual(uni, internal.decode("unicode_internal"))

1403-

for internal in not_ok:

1404-

if sys.byteorder == "little":

1405-

internal = bytes(reversed(internal))

1406-

with support.check_warnings(('unicode_internal codec has been '

1407-

'deprecated', DeprecationWarning)):

1408-

self.assertRaises(UnicodeDecodeError, internal.decode,

1409-

"unicode_internal")

1410-

if sys.byteorder == "little":

1411-

invalid = b"\x00\x00\x11\x00"

1412-

invalid_backslashreplace = r"\x00\x00\x11\x00"

1413-

else:

1414-

invalid = b"\x00\x11\x00\x00"

1415-

invalid_backslashreplace = r"\x00\x11\x00\x00"

1416-

with support.check_warnings():

1417-

self.assertRaises(UnicodeDecodeError,

1418-

invalid.decode, "unicode_internal")

1419-

with support.check_warnings():

1420-

self.assertEqual(invalid.decode("unicode_internal", "replace"),

1421-

'\ufffd')

1422-

with support.check_warnings():

1423-

self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),

1424-

invalid_backslashreplace)

1425-1426-

@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')

1427-

def test_decode_error_attributes(self):

1428-

try:

1429-

with support.check_warnings(('unicode_internal codec has been '

1430-

'deprecated', DeprecationWarning)):

1431-

b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")

1432-

except UnicodeDecodeError as ex:

1433-

self.assertEqual("unicode_internal", ex.encoding)

1434-

self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)

1435-

self.assertEqual(4, ex.start)

1436-

self.assertEqual(8, ex.end)

1437-

else:

1438-

self.fail()

1439-1440-

@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')

1441-

def test_decode_callback(self):

1442-

codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)

1443-

decoder = codecs.getdecoder("unicode_internal")

1444-

with support.check_warnings(('unicode_internal codec has been '

1445-

'deprecated', DeprecationWarning)):

1446-

ab = "ab".encode("unicode_internal").decode()

1447-

ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),

1448-

"ascii"),

1449-

"UnicodeInternalTest")

1450-

self.assertEqual(("ab", 12), ignored)

1451-1452-

def test_encode_length(self):

1453-

with support.check_warnings(('unicode_internal codec has been '

1454-

'deprecated', DeprecationWarning)):

1455-

# Issue 3739

1456-

encoder = codecs.getencoder("unicode_internal")

1457-

self.assertEqual(encoder("a")[1], 1)

1458-

self.assertEqual(encoder("\xe9\u0142")[1], 2)

1459-1460-

self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)

1461-14621371

# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html

14631372

nameprep_tests = [

14641373

# 3.1 Map to nothing.

@@ -1949,7 +1858,6 @@ def test_basic(self):

19491858

"shift_jisx0213",

19501859

"tis_620",

19511860

"unicode_escape",

1952-

"unicode_internal",

19531861

"utf_16",

19541862

"utf_16_be",

19551863

"utf_16_le",

@@ -1969,7 +1877,6 @@ def test_basic(self):

19691877

# The following encodings don't work in stateful mode

19701878

broken_unicode_with_stateful = [

19711879

"punycode",

1972-

"unicode_internal"

19731880

]

1974188119751882

@@ -1984,12 +1891,10 @@ def test_basics(self):

19841891

name = "latin_1"

19851892

self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))

198618931987-

with support.check_warnings():

1988-

# unicode-internal has been deprecated

1989-

(b, size) = codecs.getencoder(encoding)(s)

1990-

self.assertEqual(size, len(s), "encoding=%r" % encoding)

1991-

(chars, size) = codecs.getdecoder(encoding)(b)

1992-

self.assertEqual(chars, s, "encoding=%r" % encoding)

1894+

(b, size) = codecs.getencoder(encoding)(s)

1895+

self.assertEqual(size, len(s), "encoding=%r" % encoding)

1896+

(chars, size) = codecs.getdecoder(encoding)(b)

1897+

self.assertEqual(chars, s, "encoding=%r" % encoding)

1993189819941899

if encoding not in broken_unicode_with_stateful:

19951900

# check stream reader/writer

@@ -2116,9 +2021,7 @@ def test_bad_decode_args(self):

21162021

def test_bad_encode_args(self):

21172022

for encoding in all_unicode_encodings:

21182023

encoder = codecs.getencoder(encoding)

2119-

with support.check_warnings():

2120-

# unicode-internal has been deprecated

2121-

self.assertRaises(TypeError, encoder)

2024+

self.assertRaises(TypeError, encoder)

2122202521232026

def test_encoding_map_type_initialized(self):

21242027

from encodings import cp1140