git.delta.rocks / jrsonnet / refs/commits / c1e14347eca8

difftreelog

fix decode non-bmp characters

Yaroslav Bolyukin2022-04-22parent: #11643ec.patch.diff
in: master

1 file changed

modifiedcrates/jrsonnet-parser/src/unescape.rsdiffbeforeafterboth
1use std::str::Chars;
2
3fn decode_unicode(chars: &mut Chars) -> Option<u16> {
4 IntoIterator::into_iter([chars.next()?, chars.next()?, chars.next()?, chars.next()?])
5 .map(|c| c.to_digit(16).map(|f| f as u16))
6 .try_fold(0u16, |acc, v| Some((acc << 4) | (v?)))
7}
8
1pub fn unescape(s: &str) -> Option<String> {9pub fn unescape(s: &str) -> Option<String> {
2 let mut chars = s.chars();10 let mut chars = s.chars();
14 'n' => out.push('\n'),22 'n' => out.push('\n'),
15 'r' => out.push('\r'),23 'r' => out.push('\r'),
16 't' => out.push('\t'),24 't' => out.push('\t'),
17 'u' => {25 'u' => match decode_unicode(&mut chars)? {
18 let c = IntoIterator::into_iter([26 // May only be second byte
19 chars.next()?,27 0xDC00..=0xDFFF => return None,
28 // Surrogate pair
29 n1 @ 0xD800..=0xDBFF => {
30 if chars.next() != Some('\\') {
31 return None;
32 }
20 chars.next()?,33 if chars.next() != Some('u') {
21 chars.next()?,34 return None;
35 }
22 chars.next()?,36 let n2 = decode_unicode(&mut chars)?;
23 ])
24 .map(|c| c.to_digit(16))37 if !matches!(n2, 0xDC00..=0xDFFF) {
38 return None;
39 }
25 .try_fold(0u32, |acc, v| Some((acc << 8) | (v?)))?;40 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
41 out.push(char::from_u32(n)?);
42 }
26 out.push(char::from_u32(c)?)43 n => out.push(char::from_u32(n as u32)?),
27 }44 },
28 'x' => {45 'x' => {
29 let c = IntoIterator::into_iter([chars.next()?, chars.next()?])46 let c = IntoIterator::into_iter([chars.next()?, chars.next()?])
30 .map(|c| c.to_digit(16))47 .map(|c| c.to_digit(16))