--- a/crates/jrsonnet-evaluator/src/stdlib/manifest.rs +++ b/crates/jrsonnet-evaluator/src/stdlib/manifest.rs @@ -259,24 +259,90 @@ buf } -fn escape_string_json_buf(s: &str, buf: &mut String) { - buf.push('"'); - for c in s.chars() { - match c { - '"' => buf.push_str("\\\""), - '\\' => buf.push_str("\\\\"), - '\u{0008}' => buf.push_str("\\b"), - '\u{000c}' => buf.push_str("\\f"), - '\n' => buf.push_str("\\n"), - '\r' => buf.push_str("\\r"), - '\t' => buf.push_str("\\t"), - c if c < 32 as char || (c >= 127 as char && c <= 159 as char) => { - write!(buf, "\\u{:04x}", c as u32).unwrap(); +// Json string encoding was borrowed from https://github.com/serde-rs/json + +const BB: u8 = b'b'; // \x08 +const TT: u8 = b't'; // \x09 +const NN: u8 = b'n'; // \x0A +const FF: u8 = b'f'; // \x0C +const RR: u8 = b'r'; // \x0D +const QU: u8 = b'"'; // \x22 +const BS: u8 = b'\\'; // \x5C +const UU: u8 = b'u'; // \x00...\x1F except the ones above +const __: u8 = 0; + +// Lookup table of escape sequences. A value of b'x' at index i means that byte +// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped. +static ESCAPE: [u8; 256] = [ + // 1 2 3 4 5 6 7 8 9 A B C D E F + UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 + UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 + __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 + __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F +]; + +fn escape_string_json_buf(value: &str, buf: &mut String) { + // Safety: we only write correct utf-8 in this function + let mut buf: &mut Vec = unsafe { core::mem::transmute(buf) }; + let bytes = value.as_bytes(); + + // Perfect for ascii strings, removes any reallocations + buf.reserve(value.len() + 2); + + buf.push(b'"'); + + let mut start = 0; + + for (i, &byte) in bytes.iter().enumerate() { + let escape = ESCAPE[byte as usize]; + if escape == __ { + continue; + } + + if start < i { + buf.extend_from_slice(&bytes[start..i]); + } + start = i + 1; + + match escape { + self::BB | self::TT | self::NN | self::FF | self::RR | self::QU | self::BS => { + buf.extend_from_slice(&[b'\\', escape]) + } + self::UU => { + static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef"; + let bytes = &[ + b'\\', + b'u', + b'0', + b'0', + HEX_DIGITS[(byte >> 4) as usize], + HEX_DIGITS[(byte & 0xF) as usize], + ]; + buf.extend_from_slice(bytes) } - c => buf.push(c), + _ => unreachable!(), } } - buf.push('"'); + + if start == bytes.len() { + buf.push(b'"'); + return; + } + + buf.extend_from_slice(&bytes[start..]); + buf.push(b'"'); } pub struct YamlFormat<'s> {