bpo-29456: Fix bugs in unicodedata.normalize: u1176, u11a7 and u11c3 …

bpo-29456: Fix bugs in unicodedata.normalize: u1176, u11a7 and u11c3 … · python/cpython@0e2b76e

4 files changed

lines changed

Original file line number	Diff line number	Diff line change
`@@ -208,6 +208,19 @@ def test_issue10254(self):`
`208`	`208`	`b = 'C\u0338' * 20 + '\xC7'`
`209`	`209`	`self.assertEqual(self.db.normalize('NFC', a), b)`
`210`	`210`
	`211`	`+def test_issue29456(self):`
	`212`	`+# Fix #29456`
	`213`	`+u1176_str_a = '\u1100\u1176\u11a8'`
	`214`	`+u1176_str_b = '\u1100\u1176\u11a8'`
	`215`	`+u11a7_str_a = '\u1100\u1175\u11a7'`
	`216`	`+u11a7_str_b = '\uae30\u11a7'`
	`217`	`+u11c3_str_a = '\u1100\u1175\u11c3'`
	`218`	`+u11c3_str_b = '\uae30\u11c3'`
	`219`	`+self.assertEqual(self.db.normalize('NFC', u1176_str_a), u1176_str_b)`
	`220`	`+self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)`
	`221`	`+self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)`
	`222`	`+`
	`223`	`+`
`211`	`224`	`def test_east_asian_width(self):`
`212`	`225`	`eaw = self.db.east_asian_width`
`213`	`226`	`self.assertRaises(TypeError, eaw, b'a')`

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Fix bugs in hangul normalization: u1176, u11a7 and u11c3`

Original file line number	Diff line number	Diff line change
`@@ -681,15 +681,19 @@ nfc_nfkc(PyObject self, PyObject input, int k)`
`681`	`681`	`if (LBase <= code && code < (LBase+LCount) &&`
`682`	`682`	`i + 1 < len &&`
`683`	`683`	`VBase <= PyUnicode_READ(kind, data, i+1) &&`
`684`		`-PyUnicode_READ(kind, data, i+1) <= (VBase+VCount)) {`
	`684`	`+PyUnicode_READ(kind, data, i+1) < (VBase+VCount)) {`
	`685`	`+/* check L character is a modern leading consonant (0x1100 ~ 0x1112)`
	`686`	`+ and V character is a modern vowel (0x1161 ~ 0x1175). */`
`685`	`687`	`int LIndex, VIndex;`
`686`	`688`	`LIndex = code - LBase;`
`687`	`689`	`VIndex = PyUnicode_READ(kind, data, i+1) - VBase;`
`688`	`690`	`code = SBase + (LIndexVCount+VIndex)TCount;`
`689`	`691`	`i+=2;`
`690`	`692`	`if (i < len &&`
`691`		`-TBase <= PyUnicode_READ(kind, data, i) &&`
`692`		`-PyUnicode_READ(kind, data, i) <= (TBase+TCount)) {`
	`693`	`+TBase < PyUnicode_READ(kind, data, i) &&`
	`694`	`+PyUnicode_READ(kind, data, i) < (TBase+TCount)) {`
	`695`	`+/* check T character is a modern trailing consonant`
	`696`	`+ (0x11A8 ~ 0x11C2). */`
`693`	`697`	`code += PyUnicode_READ(kind, data, i)-TBase;`
`694`	`698`	`i++;`
`695`	`699`	`}`