From 1b7abe84e61128fac13d8cde82ae73276d936c6a Mon Sep 17 00:00:00 2001 From: Лач Date: Mon, 20 Jul 2020 23:40:28 +0000 Subject: [PATCH] perf: faster format --- --- a/crates/jrsonnet-evaluator/build.rs +++ b/crates/jrsonnet-evaluator/build.rs @@ -39,7 +39,7 @@ if **name == *"join" || **name == *"manifestJsonEx" || **name == *"escapeStringJson" || **name == *"equals" || **name == *"base64" || **name == *"foldl" || **name == *"foldr" || - **name == *"sortImpl" || **name == *"range" + **name == *"sortImpl" || **name == *"format" || **name == *"range" ) }) .collect(), --- /dev/null +++ b/crates/jrsonnet-evaluator/src/builtin/format.rs @@ -0,0 +1,714 @@ +//! faster std.format impl +#![allow(clippy::too_many_arguments)] + +use crate::{ + create_error, create_error_result, to_string, Error, LocError, ObjValue, Val, ValType, +}; + +#[derive(Debug)] +pub enum FormatError { + TruncatedFormatCode, + UnrecognizedConversionType(char), + ValueError(LocError), + + NotEnoughValues, + + CannotUseStarWidthWithObject, + MappingKeysRequired, + NoSuchField(Rc), +} +impl From for FormatError { + fn from(e: LocError) -> Self { + Self::ValueError(e) + } +} +use std::rc::Rc; +use FormatError::*; + +pub fn try_parse_mapping_key(str: &str) -> Result<(&str, &str), FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + let bytes = str.as_bytes(); + if bytes[0] == b'(' { + let mut i = 1; + while i < bytes.len() { + if bytes[i] == b')' { + return Ok((&str[1..i as usize], &str[i as usize + 1..])); + } + i += 1; + } + Err(TruncatedFormatCode) + } else { + Ok(("", str)) + } +} + +#[cfg(test)] +pub mod tests_key { + use super::*; + + #[test] + fn parse_key() { + assert_eq!( + try_parse_mapping_key("(hello ) world").unwrap(), + ("hello ", " world") + ); + assert_eq!(try_parse_mapping_key("() world").unwrap(), ("", " world")); + assert_eq!(try_parse_mapping_key(" world").unwrap(), ("", " world")); + assert_eq!( + try_parse_mapping_key(" () world").unwrap(), + ("", " () world") + ); + } + + #[test] + #[should_panic] + fn parse_key_missing_start() { + try_parse_mapping_key("").unwrap(); + } + + #[test] + #[should_panic] + fn parse_key_missing_end() { + try_parse_mapping_key("( ").unwrap(); + } +} + +#[derive(Default, Debug)] +pub struct CFlags { + pub alt: bool, + pub zero: bool, + pub left: bool, + pub blank: bool, + pub sign: bool, +} + +pub fn try_parse_cflags(str: &str) -> Result<(CFlags, &str), FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + let bytes = str.as_bytes(); + let mut i = 0; + let mut out = CFlags::default(); + loop { + if bytes.len() == i { + return Err(TruncatedFormatCode); + } + match bytes[i] { + b'#' => out.alt = true, + b'0' => out.zero = true, + b'-' => out.left = true, + b' ' => out.blank = true, + b'+' => out.sign = true, + _ => break, + } + i += 1; + } + Ok((out, &str[i..])) +} + +#[derive(Debug, PartialEq)] +pub enum Width { + Star, + Fixed(usize), +} +pub fn try_parse_field_width(str: &str) -> Result<(Width, &str), FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + let bytes = str.as_bytes(); + if bytes[0] == b'*' { + return Ok((Width::Star, &str[1..])); + } + let mut out: usize = 0; + let mut digits = 0; + while let Some(digit) = (bytes[digits] as char).to_digit(10) { + out *= 10; + out += digit as usize; + digits += 1; + if digits == bytes.len() { + return Err(TruncatedFormatCode); + } + } + Ok((Width::Fixed(out), &str[digits..])) +} + +pub fn try_parse_precision(str: &str) -> Result<(Option, &str), FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + let bytes = str.as_bytes(); + if bytes[0] == b'.' { + try_parse_field_width(&str[1..]).map(|(r, s)| (Some(r), s)) + } else { + Ok((None, str)) + } +} + +// Only skips +pub fn try_parse_length_modifier(str: &str) -> Result<&str, FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + let bytes = str.as_bytes(); + let mut idx = 0; + while bytes[idx] == b'h' || bytes[idx] == b'l' || bytes[idx] == b'L' { + idx += 1; + if bytes.len() == idx { + return Err(TruncatedFormatCode); + } + } + Ok(&str[idx..]) +} + +#[derive(Debug)] +pub enum ConvTypeV { + Decimal, + Octal, + Hexadecimal, + Scientific, + Float, + Shorter, + Char, + String, + Percent, +} +pub struct ConvType { + v: ConvTypeV, + caps: bool, +} + +pub fn parse_conversion_type(str: &str) -> Result<(ConvType, &str), FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + + let code = str.as_bytes()[0]; + let v: (ConvTypeV, bool) = match code { + b'd' | b'i' | b'u' => (ConvTypeV::Decimal, false), + b'o' => (ConvTypeV::Octal, false), + b'x' => (ConvTypeV::Hexadecimal, false), + b'X' => (ConvTypeV::Hexadecimal, true), + b'e' => (ConvTypeV::Scientific, false), + b'E' => (ConvTypeV::Scientific, true), + b'f' => (ConvTypeV::Float, false), + b'F' => (ConvTypeV::Float, true), + b'g' => (ConvTypeV::Shorter, false), + b'G' => (ConvTypeV::Shorter, true), + b'c' => (ConvTypeV::Char, false), + b's' => (ConvTypeV::String, false), + b'%' => (ConvTypeV::Percent, false), + c => return Err(UnrecognizedConversionType(c as char)), + }; + + Ok((ConvType { v: v.0, caps: v.1 }, &str[1..])) +} + +#[derive(Debug)] +pub struct Code<'s> { + mkey: &'s str, + cflags: CFlags, + width: Width, + precision: Option, + convtype: ConvTypeV, + caps: bool, +} +pub fn parse_code(str: &str) -> Result<(Code, &str), FormatError> { + if str.is_empty() { + return Err(TruncatedFormatCode); + } + let (mkey, str) = try_parse_mapping_key(str)?; + let (cflags, str) = try_parse_cflags(str)?; + let (width, str) = try_parse_field_width(str)?; + let (precision, str) = try_parse_precision(str)?; + let str = try_parse_length_modifier(str)?; + let (convtype, str) = parse_conversion_type(str)?; + + Ok(( + Code { + mkey, + cflags, + width, + precision, + convtype: convtype.v, + caps: convtype.caps, + }, + str, + )) +} + +#[derive(Debug)] +pub enum Element<'s> { + String(&'s str), + Code(Code<'s>), +} +pub fn parse_codes(mut str: &str) -> Result, FormatError> { + let mut bytes = str.as_bytes(); + let mut out = vec![]; + let mut offset = 0; + + loop { + while offset != bytes.len() && bytes[offset] != b'%' { + offset += 1; + } + if offset == bytes.len() { + return Ok(out); + } + out.push(Element::String(&str[0..offset])); + str = &str[offset + 1..]; + let (code, nstr) = parse_code(str)?; + str = nstr; + bytes = str.as_bytes(); + offset = 0; + + out.push(Element::Code(code)) + } +} + +const NUMBERS: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyz"; + +#[inline] +pub fn render_integer( + out: &mut String, + iv: i64, + padding: usize, + precision: usize, + blank: bool, + sign: bool, + radix: i64, + prefix: &str, + caps: bool, +) { + // Digit char indexes in reverse order, i.e + // for radix = 16 and n = 12f: [15, 2, 1] + let digits = if iv == 0 { + vec![0u8] + } else { + let mut v = iv.abs(); + let mut nums = Vec::with_capacity(1); + while v > 0 { + nums.push((v % radix) as u8); + v /= radix; + } + nums + }; + let neg = iv < 0; + let zp = padding.saturating_sub(if neg || blank || sign { 1 } else { 0 }); + let zp2 = zp + .max(precision) + .saturating_sub(prefix.len() + digits.len()); + + if neg { + out.push('-') + } else if sign { + out.push('+'); + } else if blank { + out.push(' '); + } + + out.reserve(zp2); + for _ in 0..zp2 { + out.push('0'); + } + out.push_str(&prefix); + + for digit in digits.into_iter().rev() { + let ch = NUMBERS[digit as usize] as char; + out.push(if caps { ch.to_ascii_uppercase() } else { ch }); + } +} + +pub fn render_decimal( + out: &mut String, + iv: i64, + padding: usize, + precision: usize, + blank: bool, + sign: bool, +) { + render_integer(out, iv, padding, precision, blank, sign, 10, "", false) +} +pub fn render_octal( + out: &mut String, + iv: i64, + padding: usize, + precision: usize, + alt: bool, + blank: bool, + sign: bool, +) { + render_integer( + out, + iv, + padding, + precision, + blank, + sign, + 8, + if alt && iv != 0 { "0" } else { "" }, + false, + ) +} +pub fn render_hexadecimal( + out: &mut String, + iv: i64, + padding: usize, + precision: usize, + alt: bool, + blank: bool, + sign: bool, + caps: bool, +) { + render_integer( + out, + iv, + padding, + precision, + blank, + sign, + 16, + match (alt, caps) { + (true, true) => "0X", + (true, false) => "0x", + (false, _) => "", + }, + caps, + ) +} + +pub fn render_float( + out: &mut String, + n: f64, + mut padding: usize, + precision: usize, + blank: bool, + sign: bool, + ensure_pt: bool, + trailing: bool, +) { + let dot_size = if precision == 0 && !ensure_pt { 0 } else { 1 }; + padding = padding.saturating_sub(dot_size + precision); + render_decimal(out, n.floor() as i64, padding, 0, blank, sign); + if precision == 0 { + if ensure_pt { + out.push('.'); + } + return; + } + let frac = (n.fract() * 10.0_f64.powf(precision as f64) + 0.5).floor(); + if trailing || frac > 0.0 { + out.push('.'); + let mut frac_str = String::new(); + render_decimal(&mut frac_str, frac as i64, precision, 0, false, false); + let mut trim = frac_str.len(); + if !trailing { + for b in frac_str.as_bytes().iter().rev() { + if *b == b'0' { + trim -= 1; + } + } + } + out.push_str(&frac_str[..trim]); + } else if ensure_pt { + out.push('.'); + } +} + +pub fn render_float_sci( + out: &mut String, + n: f64, + mut padding: usize, + precision: usize, + blank: bool, + sign: bool, + ensure_pt: bool, + trailing: bool, + caps: bool, +) { + let exponent = n.log10().floor(); + let mantissa = if exponent as i16 == -324 { + n * 10.0 / 10.0_f64.powf(exponent + 1.0) + } else { + n / 10.0_f64.powf(exponent) + }; + let mut exponent_str = String::new(); + render_decimal(&mut exponent_str, exponent as i64, 3, 0, false, true); + + // +1 for e + padding = padding.saturating_sub(exponent_str.len() + 1); + + render_float( + out, mantissa, padding, precision, blank, sign, ensure_pt, trailing, + ); + out.push(if caps { 'E' } else { 'e' }); + out.push_str(&exponent_str); +} + +pub fn format_code( + out: &mut String, + value: &Val, + code: &Code, + width: usize, + precision: Option, +) -> Result<(), FormatError> { + let clfags = &code.cflags; + let (fpprec, iprec) = match precision { + Some(v) => (v, v), + None => (6, 0), + }; + let padding = if clfags.zero && !clfags.left { + width + } else { + 0 + }; + + // TODO: If left padded, can optimize by writing directly to out + let mut tmp_out = String::new(); + + match code.convtype { + ConvTypeV::String => tmp_out.push_str(&to_string(value)?), + ConvTypeV::Decimal => { + let value = value.clone().try_cast_num("%d/%u/%i requires number")?; + render_decimal( + &mut tmp_out, + value as i64, + padding, + iprec, + clfags.blank, + clfags.sign, + ); + } + ConvTypeV::Octal => { + let value = value.clone().try_cast_num("%o requires number")?; + render_octal( + &mut tmp_out, + value as i64, + padding, + iprec, + clfags.alt, + clfags.blank, + clfags.sign, + ); + } + ConvTypeV::Hexadecimal => { + let value = value.clone().try_cast_num("%x/%X requires number")?; + render_hexadecimal( + &mut tmp_out, + value as i64, + padding, + iprec, + clfags.alt, + clfags.blank, + clfags.sign, + code.caps, + ); + } + ConvTypeV::Scientific => { + let value = value.clone().try_cast_num("%e/%E requires number")?; + render_float_sci( + &mut tmp_out, + value, + padding, + fpprec, + clfags.blank, + clfags.sign, + clfags.alt, + true, + code.caps, + ); + } + ConvTypeV::Float => { + let value = value.clone().try_cast_num("%e/%E requires number")?; + render_float( + &mut tmp_out, + value, + padding, + fpprec, + clfags.blank, + clfags.sign, + clfags.alt, + true, + ); + } + ConvTypeV::Shorter => { + let value = value.clone().try_cast_num("%g/%G requires number")?; + let exponent = value.log10().floor(); + if exponent < -4.0 || exponent >= fpprec as f64 { + render_float_sci( + &mut tmp_out, + value, + padding, + fpprec - 1, + clfags.blank, + clfags.sign, + clfags.alt, + clfags.alt, + code.caps, + ); + } else { + let digits_before_pt = 1.max(exponent as usize + 1); + render_float( + &mut tmp_out, + value, + padding, + fpprec - digits_before_pt, + clfags.blank, + clfags.sign, + clfags.alt, + clfags.alt, + ); + } + } + ConvTypeV::Char => match value.clone().unwrap_if_lazy()? { + Val::Num(n) => tmp_out.push( + std::char::from_u32(n as u32) + .ok_or_else(|| create_error(Error::InvalidUnicodeCodepointGot(n as u32)))?, + ), + Val::Str(s) => { + if s.chars().count() != 1 { + create_error_result(Error::RuntimeError( + format!("%c expected 1 char string, got {}", s.chars().count()).into(), + ))?; + } + tmp_out.push_str(&s); + } + _ => { + create_error_result(Error::TypeMismatch( + "%c requires number/string", + vec![ValType::Num, ValType::Str], + value.value_type()?, + ))?; + } + }, + ConvTypeV::Percent => tmp_out.push('%'), + }; + + let padding = width.saturating_sub(tmp_out.len()); + + if !clfags.left { + for _ in 0..padding { + out.push(' '); + } + } + out.push_str(&tmp_out); + if clfags.left { + for _ in 0..padding { + out.push(' '); + } + } + + Ok(()) +} + +pub fn format_arr(str: &str, mut values: &[Val]) -> Result { + let codes = parse_codes(&str)?; + let mut out = String::new(); + + for code in codes { + match code { + Element::String(s) => { + out.push_str(s); + } + Element::Code(c) => { + let width = match c.width { + Width::Star => { + if values.is_empty() { + return Err(FormatError::NotEnoughValues); + } + let value = &values[0]; + values = &values[1..]; + value.clone().try_cast_num("field width")? as usize + } + Width::Fixed(n) => n, + }; + let precision = match c.precision { + Some(Width::Star) => { + if values.is_empty() { + return Err(FormatError::NotEnoughValues); + } + let value = &values[0]; + values = &values[1..]; + Some(value.clone().try_cast_num("field precision")? as usize) + } + Some(Width::Fixed(n)) => Some(n), + None => None, + }; + if values.is_empty() { + return Err(FormatError::NotEnoughValues); + } + let value = &values[0]; + values = &values[1..]; + + format_code(&mut out, value, &c, width, precision)?; + } + } + } + + Ok(out) +} + +pub fn format_obj(str: &str, values: &ObjValue) -> Result { + let codes = parse_codes(&str)?; + let mut out = String::new(); + + for code in codes { + match code { + Element::String(s) => { + out.push_str(s); + } + Element::Code(c) => { + // TODO: Operate on ref + let f: Rc = c.mkey.into(); + if f.is_empty() { + return Err(FormatError::MappingKeysRequired); + } + let width = match c.width { + Width::Star => { + return Err(FormatError::CannotUseStarWidthWithObject); + } + Width::Fixed(n) => n, + }; + let precision = match c.precision { + Some(Width::Star) => { + return Err(FormatError::CannotUseStarWidthWithObject); + } + Some(Width::Fixed(n)) => Some(n), + None => None, + }; + let value = if let Some(v) = values.get(f.clone())? { + v + } else { + return Err(FormatError::NoSuchField(f)); + }; + + format_code(&mut out, &value, &c, width, precision)?; + } + } + } + + Ok(out) +} + +#[cfg(test)] +pub mod test_format { + use super::*; + + #[test] + fn parse() { + println!("{:?}", parse_codes("Hello %s world!!! %s %(aaa)s ww")); + } + + #[test] + fn octals() { + assert_eq!(format_arr("%#o", &[Val::Num(8.0)]).unwrap(), "010"); + assert_eq!(format_arr("%#4o", &[Val::Num(8.0)]).unwrap(), " 010"); + assert_eq!(format_arr("%4o", &[Val::Num(8.0)]).unwrap(), " 10"); + assert_eq!(format_arr("%04o", &[Val::Num(8.0)]).unwrap(), "0010"); + assert_eq!(format_arr("%+4o", &[Val::Num(8.0)]).unwrap(), " +10"); + assert_eq!(format_arr("%+04o", &[Val::Num(8.0)]).unwrap(), "+010"); + assert_eq!(format_arr("%-4o", &[Val::Num(8.0)]).unwrap(), "10 "); + assert_eq!(format_arr("%+-4o", &[Val::Num(8.0)]).unwrap(), "+10 "); + assert_eq!(format_arr("%+-04o", &[Val::Num(8.0)]).unwrap(), "+10 "); + } +} --- a/crates/jrsonnet-evaluator/src/builtin/mod.rs +++ b/crates/jrsonnet-evaluator/src/builtin/mod.rs @@ -1,2 +1,4 @@ pub mod stdlib; pub use stdlib::*; + +pub mod format; --- a/crates/jrsonnet-evaluator/src/error.rs +++ b/crates/jrsonnet-evaluator/src/error.rs @@ -60,6 +60,7 @@ StringManifestOutputIsNotAString, ImportCallbackError(String), + InvalidUnicodeCodepointGot(u32), } #[derive(Clone, Debug)] --- a/crates/jrsonnet-evaluator/src/evaluate.rs +++ b/crates/jrsonnet-evaluator/src/evaluate.rs @@ -1,4 +1,5 @@ use crate::{ + builtin::format::{format_arr, format_obj}, context_creator, create_error, create_error_result, equals, escape_string_json, future_wrapper, lazy_val, manifest_json_ex, parse_args, primitive_equals, push, with_state, Context, ContextCreator, Error, FuncDesc, LazyBinding, LazyVal, ObjMember, ObjValue, Result, Val, @@ -634,6 +635,17 @@ Ok(Val::Arr(Rc::new(new_arr))) }))?, // faster + ("std", "format") => parse_args!(context, "std.format", args, 2, [ + 0, str: [Val::Str]!!Val::Str, vec![ValType::Str]; + 1, vals: [Val::Arr|Val::Obj], vec![ValType::Arr, ValType::Obj]; + ], { + match vals { + Val::Arr(vals) => Val::Str(format_arr(&str, &vals).unwrap().into()), + Val::Obj(obj) => Val::Str(format_obj(&str, &obj).unwrap().into()), + _ => unreachable!() + } + }), + // faster ("std", "range") => parse_args!(context, "std.range", args, 2, [ 0, from: [Val::Num]!!Val::Num, vec![ValType::Num]; 0, to: [Val::Num]!!Val::Num, vec![ValType::Num]; @@ -648,9 +660,11 @@ 0, n: [Val::Num]!!Val::Num, vec![ValType::Num]; ], { let mut out = String::new(); - out.push(std::char::from_u32(n as u32).unwrap()); - Val::Str(out.into()) - }), + out.push(std::char::from_u32(n as u32).ok_or_else(|| + create_error(crate::error::Error::InvalidUnicodeCodepointGot(n as u32)) + )?); + Ok(Val::Str(out.into())) + })?, ("std", "encodeUTF8") => parse_args!(context, "std.encodeUtf8", args, 1, [ 0, str: [Val::Str]!!Val::Str, vec![ValType::Str]; ], { --- a/crates/jrsonnet-evaluator/src/val.rs +++ b/crates/jrsonnet-evaluator/src/val.rs @@ -405,6 +405,15 @@ out } +pub fn to_string(val: &Val) -> Result> { + Ok(match val { + Val::Bool(true) => "true".into(), + Val::Null => "null".into(), + Val::Str(s) => s.clone(), + v => v.clone().into_json(0)?, + }) +} + #[test] fn json_test() { assert_eq!(escape_string_json("\u{001f}"), "\"\\u001f\"") -- gitstuff