Avoid panicking branch in `EscapeIterInner`. · rust-lang/rust@16981ba

@@ -6,56 +6,85 @@ use crate::ops::Range;

6677

const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();

889-

/// Escapes a byte into provided buffer; returns length of escaped

10-

/// representation.

11-

pub(crate) fn escape_ascii_into(output: &mut [ascii::Char; 4], byte: u8) -> Range<u8> {

12-

#[inline]

13-

fn backslash(a: ascii::Char) -> ([ascii::Char; 4], u8) {

14-

([ascii::Char::ReverseSolidus, a, ascii::Char::Null, ascii::Char::Null], 2)

15-

}

9+

#[inline]

10+

const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], u8) {

11+

const { assert!(N >= 2) };

12+13+

let mut output = [ascii::Char::Null; N];

14+15+

output[0] = ascii::Char::ReverseSolidus;

16+

output[1] = a;

17+18+

(output, 2)

19+

}

20+21+

/// Escapes an ASCII character.

22+

///

23+

/// Returns a buffer and the length of the escaped representation.

24+

const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], u8) {

25+

const { assert!(N >= 4) };

162617-

let (data, len) = match byte {

27+

match byte {

1828

b'\t' => backslash(ascii::Char::SmallT),

1929

b'\r' => backslash(ascii::Char::SmallR),

2030

b'\n' => backslash(ascii::Char::SmallN),

2131

b'\\' => backslash(ascii::Char::ReverseSolidus),

2232

b'\'' => backslash(ascii::Char::Apostrophe),

2333

b'\"' => backslash(ascii::Char::QuotationMark),

24-

_ => {

25-

if let Some(a) = byte.as_ascii()

34+

byte => {

35+

let mut output = [ascii::Char::Null; N];

36+37+

if let Some(c) = byte.as_ascii()

2638

&& !byte.is_ascii_control()

2739

{

28-

([a, ascii::Char::Null, ascii::Char::Null, ascii::Char::Null], 1)

40+

output[0] = c;

41+

(output, 1)

2942

} else {

30-

let hi = HEX_DIGITS[usize::from(byte >> 4)];

31-

let lo = HEX_DIGITS[usize::from(byte & 0xf)];

32-

([ascii::Char::ReverseSolidus, ascii::Char::SmallX, hi, lo], 4)

43+

let hi = HEX_DIGITS[(byte >> 4) as usize];

44+

let lo = HEX_DIGITS[(byte & 0xf) as usize];

45+46+

output[0] = ascii::Char::ReverseSolidus;

47+

output[1] = ascii::Char::SmallX;

48+

output[2] = hi;

49+

output[3] = lo;

50+51+

(output, 4)

3352

}

3453

}

35-

};

36-

*output = data;

37-

0..len

54+

}

3855

}

395640-

/// Escapes a character into provided buffer using `\u{NNNN}` representation.

41-

pub(crate) fn escape_unicode_into(output: &mut [ascii::Char; 10], ch: char) -> Range<u8> {

42-

output[9] = ascii::Char::RightCurlyBracket;

43-44-

let ch = ch as u32;

45-

output[3] = HEX_DIGITS[((ch >> 20) & 15) as usize];

46-

output[4] = HEX_DIGITS[((ch >> 16) & 15) as usize];

47-

output[5] = HEX_DIGITS[((ch >> 12) & 15) as usize];

48-

output[6] = HEX_DIGITS[((ch >> 8) & 15) as usize];

49-

output[7] = HEX_DIGITS[((ch >> 4) & 15) as usize];

50-

output[8] = HEX_DIGITS[((ch >> 0) & 15) as usize];

51-52-

// or-ing 1 ensures that for ch==0 the code computes that one digit should

53-

// be printed.

54-

let start = (ch | 1).leading_zeros() as usize / 4 - 2;

55-

const UNICODE_ESCAPE_PREFIX: &[ascii::Char; 3] = b"\\u{".as_ascii().unwrap();

56-

output[start..][..3].copy_from_slice(UNICODE_ESCAPE_PREFIX);

57-58-

(start as u8)..10

57+

/// Escapes a character `\u{NNNN}` representation.

58+

///

59+

/// Returns a buffer and the length of the escaped representation.

60+

const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], u8) {

61+

const { assert!(N >= 10) };

62+63+

let c = c as u32;

64+65+

// OR-ing `1` ensures that for `c == 0` the code computes that

66+

// one digit should be printed.

67+

let u_len = (8 - (c | 1).leading_zeros() / 4) as usize;

68+69+

let closing_paren_offset = 3 + u_len;

70+71+

let mut output = [ascii::Char::Null; N];

72+73+

output[0] = ascii::Char::ReverseSolidus;

74+

output[1] = ascii::Char::SmallU;

75+

output[2] = ascii::Char::LeftCurlyBracket;

76+77+

output[3 + u_len.saturating_sub(6)] = HEX_DIGITS[((c >> 20) & 0x0f) as usize];

78+

output[3 + u_len.saturating_sub(5)] = HEX_DIGITS[((c >> 16) & 0x0f) as usize];

79+

output[3 + u_len.saturating_sub(4)] = HEX_DIGITS[((c >> 12) & 0x0f) as usize];

80+

output[3 + u_len.saturating_sub(3)] = HEX_DIGITS[((c >> 8) & 0x0f) as usize];

81+

output[3 + u_len.saturating_sub(2)] = HEX_DIGITS[((c >> 4) & 0x0f) as usize];

82+

output[3 + u_len.saturating_sub(1)] = HEX_DIGITS[((c >> 0) & 0x0f) as usize];

83+84+

output[closing_paren_offset] = ascii::Char::RightCurlyBracket;

85+86+

let len = (closing_paren_offset + 1) as u8;

87+

(output, len)

5988

}

60896190

/// An iterator over an fixed-size array.

@@ -65,45 +94,62 @@ pub(crate) fn escape_unicode_into(output: &mut [ascii::Char; 10], ch: char) -> R

6594

#[derive(Clone, Debug)]

6695

pub(crate) struct EscapeIterInner<const N: usize> {

6796

// The element type ensures this is always ASCII, and thus also valid UTF-8.

68-

pub(crate) data: [ascii::Char; N],

97+

data: [ascii::Char; N],

699870-

// Invariant: alive.start <= alive.end <= N.

71-

pub(crate) alive: Range<u8>,

99+

// Invariant: `alive.start <= alive.end <= N`

100+

alive: Range<u8>,

72101

}

7310274103

impl<const N: usize> EscapeIterInner<N> {

75-

pub fn new(data: [ascii::Char; N], alive: Range<u8>) -> Self {

76-

const { assert!(N < 256) };

77-

debug_assert!(alive.start <= alive.end && usize::from(alive.end) <= N, "{alive:?}");

78-

Self { data, alive }

104+

pub const fn backslash(c: ascii::Char) -> Self {

105+

let (data, len) = backslash(c);

106+

Self { data, alive: 0..len }

79107

}

8010881-

pub fn from_array<const M: usize>(array: [ascii::Char; M]) -> Self {

82-

const { assert!(M <= N) };

109+

pub const fn ascii(c: u8) -> Self {

110+

let (data, len) = escape_ascii(c);

111+

Self { data, alive: 0..len }

112+

}

8311384-

let mut data = [ascii::Char::Null; N];

85-

data[..M].copy_from_slice(&array);

86-

Self::new(data, 0..M as u8)

114+

pub const fn unicode(c: char) -> Self {

115+

let (data, len) = escape_unicode(c);

116+

Self { data, alive: 0..len }

117+

}

118+119+

#[inline]

120+

pub const fn empty() -> Self {

121+

Self { data: [ascii::Char::Null; N], alive: 0..0 }

87122

}

8812389124

pub fn as_ascii(&self) -> &[ascii::Char] {

90-

&self.data[usize::from(self.alive.start)..usize::from(self.alive.end)]

125+

// SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`.

126+

unsafe {

127+

self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))

128+

}

91129

}

92130131+

#[inline]

93132

pub fn as_str(&self) -> &str {

94133

self.as_ascii().as_str()

95134

}

96135136+

#[inline]

97137

pub fn len(&self) -> usize {

98138

usize::from(self.alive.end - self.alive.start)

99139

}

100140101141

pub fn next(&mut self) -> Option<u8> {

102-

self.alive.next().map(|i| self.data[usize::from(i)].to_u8())

142+

let i = self.alive.next()?;

143+144+

// SAFETY: `i` is guaranteed to be a valid index for `self.data`.

145+

unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }

103146

}

104147105148

pub fn next_back(&mut self) -> Option<u8> {

106-

self.alive.next_back().map(|i| self.data[usize::from(i)].to_u8())

149+

let i = self.alive.next_back()?;

150+151+

// SAFETY: `i` is guaranteed to be a valid index for `self.data`.

152+

unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }

107153

}

108154109155

pub fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {