bpo-36297: remove "unicode_internal" codec (GH-12342) · python/cpython@6a16b18
@@ -1239,16 +1239,6 @@ def test_errors(self):
12391239self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
12401240124112411242-class RecodingTest(unittest.TestCase):
1243-def test_recoding(self):
1244-f = io.BytesIO()
1245-with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
1246-f2.write("a")
1247-# Python used to crash on this at exit because of a refcount
1248-# bug in _codecsmodule.c
1249-1250-self.assertTrue(f.closed)
1251-12521242# From RFC 3492
12531243punycode_testcases = [
12541244# A Arabic (Egyptian):
@@ -1378,87 +1368,6 @@ def test_decode(self):
13781368self.assertEqual(uni, puny.decode("punycode"))
13791369138013701381-class UnicodeInternalTest(unittest.TestCase):
1382-@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
1383-def test_bug1251300(self):
1384-# Decoding with unicode_internal used to not correctly handle "code
1385-# points" above 0x10ffff on UCS-4 builds.
1386-ok = [
1387- (b"\x00\x10\xff\xff", "\U0010ffff"),
1388- (b"\x00\x00\x01\x01", "\U00000101"),
1389- (b"", ""),
1390- ]
1391-not_ok = [
1392-b"\x7f\xff\xff\xff",
1393-b"\x80\x00\x00\x00",
1394-b"\x81\x00\x00\x00",
1395-b"\x00",
1396-b"\x00\x00\x00\x00\x00",
1397- ]
1398-for internal, uni in ok:
1399-if sys.byteorder == "little":
1400-internal = bytes(reversed(internal))
1401-with support.check_warnings():
1402-self.assertEqual(uni, internal.decode("unicode_internal"))
1403-for internal in not_ok:
1404-if sys.byteorder == "little":
1405-internal = bytes(reversed(internal))
1406-with support.check_warnings(('unicode_internal codec has been '
1407-'deprecated', DeprecationWarning)):
1408-self.assertRaises(UnicodeDecodeError, internal.decode,
1409-"unicode_internal")
1410-if sys.byteorder == "little":
1411-invalid = b"\x00\x00\x11\x00"
1412-invalid_backslashreplace = r"\x00\x00\x11\x00"
1413-else:
1414-invalid = b"\x00\x11\x00\x00"
1415-invalid_backslashreplace = r"\x00\x11\x00\x00"
1416-with support.check_warnings():
1417-self.assertRaises(UnicodeDecodeError,
1418-invalid.decode, "unicode_internal")
1419-with support.check_warnings():
1420-self.assertEqual(invalid.decode("unicode_internal", "replace"),
1421-'\ufffd')
1422-with support.check_warnings():
1423-self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
1424-invalid_backslashreplace)
1425-1426-@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
1427-def test_decode_error_attributes(self):
1428-try:
1429-with support.check_warnings(('unicode_internal codec has been '
1430-'deprecated', DeprecationWarning)):
1431-b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
1432-except UnicodeDecodeError as ex:
1433-self.assertEqual("unicode_internal", ex.encoding)
1434-self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
1435-self.assertEqual(4, ex.start)
1436-self.assertEqual(8, ex.end)
1437-else:
1438-self.fail()
1439-1440-@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
1441-def test_decode_callback(self):
1442-codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
1443-decoder = codecs.getdecoder("unicode_internal")
1444-with support.check_warnings(('unicode_internal codec has been '
1445-'deprecated', DeprecationWarning)):
1446-ab = "ab".encode("unicode_internal").decode()
1447-ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
1448-"ascii"),
1449-"UnicodeInternalTest")
1450-self.assertEqual(("ab", 12), ignored)
1451-1452-def test_encode_length(self):
1453-with support.check_warnings(('unicode_internal codec has been '
1454-'deprecated', DeprecationWarning)):
1455-# Issue 3739
1456-encoder = codecs.getencoder("unicode_internal")
1457-self.assertEqual(encoder("a")[1], 1)
1458-self.assertEqual(encoder("\xe9\u0142")[1], 2)
1459-1460-self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
1461-14621371# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
14631372nameprep_tests = [
14641373# 3.1 Map to nothing.
@@ -1949,7 +1858,6 @@ def test_basic(self):
19491858"shift_jisx0213",
19501859"tis_620",
19511860"unicode_escape",
1952-"unicode_internal",
19531861"utf_16",
19541862"utf_16_be",
19551863"utf_16_le",
@@ -1969,7 +1877,6 @@ def test_basic(self):
19691877# The following encodings don't work in stateful mode
19701878broken_unicode_with_stateful = [
19711879"punycode",
1972-"unicode_internal"
19731880]
1974188119751882@@ -1984,12 +1891,10 @@ def test_basics(self):
19841891name = "latin_1"
19851892self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
198618931987-with support.check_warnings():
1988-# unicode-internal has been deprecated
1989- (b, size) = codecs.getencoder(encoding)(s)
1990-self.assertEqual(size, len(s), "encoding=%r" % encoding)
1991- (chars, size) = codecs.getdecoder(encoding)(b)
1992-self.assertEqual(chars, s, "encoding=%r" % encoding)
1894+ (b, size) = codecs.getencoder(encoding)(s)
1895+self.assertEqual(size, len(s), "encoding=%r" % encoding)
1896+ (chars, size) = codecs.getdecoder(encoding)(b)
1897+self.assertEqual(chars, s, "encoding=%r" % encoding)
1993189819941899if encoding not in broken_unicode_with_stateful:
19951900# check stream reader/writer
@@ -2116,9 +2021,7 @@ def test_bad_decode_args(self):
21162021def test_bad_encode_args(self):
21172022for encoding in all_unicode_encodings:
21182023encoder = codecs.getencoder(encoding)
2119-with support.check_warnings():
2120-# unicode-internal has been deprecated
2121-self.assertRaises(TypeError, encoder)
2024+self.assertRaises(TypeError, encoder)
2122202521232026def test_encoding_map_type_initialized(self):
21242027from encodings import cp1140