From 0831da3ed8d9e151fcf0804538f97e4e94c9c008 Mon Sep 17 00:00:00 2001 From: Yaroslav Bolyukin Date: Sun, 07 Aug 2022 19:32:57 +0000 Subject: [PATCH] perf: reimplement AST codegen --- --- a/Cargo.lock +++ b/Cargo.lock @@ -275,6 +275,7 @@ "jrsonnet-gcmodule", "rustc-hash", "serde", + "structdump", ] [[package]] @@ -292,10 +293,10 @@ dependencies = [ "jrsonnet-gcmodule", "jrsonnet-interner", - "jrsonnet-stdlib", "peg", "serde", "static_assertions", + "structdump", ] [[package]] @@ -312,6 +313,7 @@ "serde", "serde_json", "serde_yaml_with_quirks", + "structdump", ] [[package]] @@ -576,6 +578,28 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] +name = "structdump" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0570327507bf281d8a6e6b0d4c082b12cb6bcee27efce755aa5efacd44076c1" +dependencies = [ + "proc-macro2", + "quote", + "structdump-derive", +] + +[[package]] +name = "structdump-derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29cc0b59cfa11f1bceda09a9a7e37e6a6c3138575fd24ade8aa9af6d09aedf28" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] name = "syn" version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,6 @@ opt-level = 3 lto = "fat" codegen-units = 1 -debug = 0 +# debug = 0 panic = "abort" -strip = true +# strip = true --- a/crates/jrsonnet-interner/Cargo.toml +++ b/crates/jrsonnet-interner/Cargo.toml @@ -7,12 +7,19 @@ edition = "2021" [features] -default = ["serde"] +default = [] +# Implement value serialization using structdump +structdump = ["dep:structdump"] +# Implement value serialization using serde +# +# Warning: serialized values won't be deduplicated serde = ["dep:serde"] [dependencies] jrsonnet-gcmodule = { version = "0.3.4" } serde = { version = "1.0", optional = true } +structdump = { version = "0.2.0", optional = true } + rustc-hash = "1.1" hashbrown = { version = "0.12.1", features = ["inline-more"] } --- a/crates/jrsonnet-interner/src/lib.rs +++ b/crates/jrsonnet-interner/src/lib.rs @@ -205,6 +205,7 @@ } } +#[cfg(feature = "serde")] impl serde::Serialize for IStr { fn serialize(&self, serializer: S) -> Result where @@ -214,6 +215,7 @@ } } +#[cfg(feature = "serde")] impl<'de> serde::Deserialize<'de> for IStr { fn deserialize(deserializer: D) -> Result where @@ -224,6 +226,24 @@ } } +#[cfg(feature = "structdump")] +impl structdump::Codegen for IStr { + fn gen_code( + &self, + res: &mut structdump::CodegenResult, + _unique: bool, + ) -> structdump::TokenStream { + let s: &str = self; + res.add_code( + structdump::quote! { + structdump_import::IStr::from(#s) + }, + Some(structdump::quote![structdump_import::IStr]), + false, + ) + } +} + thread_local! { static POOL: RefCell>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default())); } --- a/crates/jrsonnet-parser/Cargo.toml +++ b/crates/jrsonnet-parser/Cargo.toml @@ -7,7 +7,23 @@ edition = "2021" [features] +default = [] exp-destruct = [] +# Implement serialization of AST using structdump +# +# Structdump generates code, which exactly replicated passed AST +# Contrary to serde, has no code bloat problem, and is recommended +# +# The only limitation is serialized form is only useable if built from build script +structdump = ["dep:structdump", "jrsonnet-interner/structdump"] +# Implement serialization of AST using serde +# +# Warning: as serde doesn't deduplicate strings, `Source` struct will bloat +# output binary with repeating source code. To resolve this issue, you should either +# override serialization of this struct using custom `Serializer`/`Deserializer`, +# not rely on Source, and fill its `source_code` with empty value, or use `structdump` +# instead +serde = ["dep:serde"] [dependencies] jrsonnet-interner = { path = "../jrsonnet-interner", version = "0.4.2" } @@ -18,6 +34,4 @@ peg = "0.8.0" serde = { version = "1.0", features = ["derive", "rc"], optional = true } - -[dev-dependencies] -jrsonnet-stdlib = { path = "../jrsonnet-stdlib", version = "0.4.2" } +structdump = { version = "0.2.0", features = ["derive"], optional = true } --- a/crates/jrsonnet-parser/src/expr.rs +++ b/crates/jrsonnet-parser/src/expr.rs @@ -8,10 +8,13 @@ use jrsonnet_interner::IStr; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +#[cfg(feature = "structdump")] +use structdump::Codegen; use crate::source::Source; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "structdump", derive(Codegen))] #[derive(Debug, PartialEq, Trace)] pub enum FieldName { /// {fixed: 2} @@ -20,6 +23,7 @@ Dyn(LocExpr), } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)] pub enum Visibility { @@ -37,10 +41,12 @@ } } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, Debug, PartialEq, Trace)] pub struct AssertStmt(pub LocExpr, pub Option); +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct FieldMember { @@ -51,6 +57,7 @@ pub value: LocExpr, } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub enum Member { @@ -59,6 +66,7 @@ AssertStmt(AssertStmt), } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)] pub enum UnaryOpType { @@ -84,6 +92,7 @@ } } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)] pub enum BinaryOpType { @@ -150,11 +159,13 @@ } /// name, default value +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct Param(pub Destruct, pub Option); /// Defined function parameters +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Trace)] pub struct ParamsDesc(pub Rc>); @@ -166,6 +177,7 @@ } } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct ArgsDesc { @@ -187,6 +199,7 @@ Drop, } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Trace)] pub enum Destruct { @@ -216,6 +229,7 @@ } } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Trace)] pub enum BindSpec { @@ -230,14 +244,17 @@ }, } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct IfSpecData(pub LocExpr); +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct ForSpecData(pub IStr, pub LocExpr); +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub enum CompSpec { @@ -245,6 +262,7 @@ ForSpec(ForSpecData), } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct ObjComp { @@ -256,6 +274,7 @@ pub compspecs: Vec, } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub enum ObjBody { @@ -263,6 +282,7 @@ ObjComp(ObjComp), } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Eq, Clone, Copy, Trace)] pub enum LiteralType { @@ -274,6 +294,7 @@ False, } +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub struct SliceDesc { @@ -283,6 +304,7 @@ } /// Syntax base +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, PartialEq, Trace)] pub enum Expr { @@ -351,6 +373,7 @@ } /// file, begin offset, end offset +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Eq, Trace)] #[trace(skip)] @@ -373,6 +396,7 @@ /// Holds AST expression and its location in source file #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "structdump", derive(Codegen))] #[derive(Clone, PartialEq, Trace)] pub struct LocExpr(pub Rc, pub ExprLocation); --- a/crates/jrsonnet-parser/src/source.rs +++ b/crates/jrsonnet-parser/src/source.rs @@ -9,9 +9,12 @@ use jrsonnet_interner::IStr; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +#[cfg(feature = "structdump")] +use structdump::Codegen; use crate::location::{location_to_offset, offset_to_location, CodeLocation}; +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(PartialEq, Eq, Debug, Hash, Clone)] pub enum SourcePath { @@ -39,9 +42,10 @@ /// Either real file, or virtual /// Hash of FileName always have same value as raw Path, to make it possible to use with raw_entry_mut +#[cfg_attr(feature = "structdump", derive(Codegen))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Clone, PartialEq, Eq, Debug)] -pub struct Source(Rc<(SourcePath, IStr)>); +pub struct Source(pub Rc<(SourcePath, IStr)>); static_assertions::assert_eq_size!(Source, *const ()); impl Trace for Source { --- a/crates/jrsonnet-stdlib/Cargo.toml +++ b/crates/jrsonnet-stdlib/Cargo.toml @@ -7,9 +7,10 @@ edition = "2021" [features] -default = [] -# Serializes standard library AST, and deserialize on start, instead of parsing it every run from text -serialized-stdlib = ["bincode", "jrsonnet-parser/serde"] +default = ["codegenerated-stdlib"] +# Speed-up initialization by generating code for parsed stdlib, instead +# of invoking parser for it +codegenerated-stdlib = ["jrsonnet-parser/structdump"] # Enables legacy `std.thisFile` support, at the cost of worse caching legacy-this-file = [] # Add order preservation flag to some functions @@ -45,8 +46,5 @@ serde_yaml_with_quirks = "0.8.24" [build-dependencies] -jrsonnet-parser = { path = "../jrsonnet-parser", version = "0.4.2", features = [ - "serde", -] } -serde = "1.0" -bincode = "1.3" +jrsonnet-parser = { path = "../jrsonnet-parser", version = "0.4.2" } +structdump = { version = "0.2.0", features = ["derive"] } --- a/crates/jrsonnet-stdlib/build.rs +++ b/crates/jrsonnet-stdlib/build.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, env, fs::File, io::Write, path::Path}; -use bincode::serialize; use jrsonnet_parser::{parse, ParserSettings, Source}; +use structdump::CodegenResult; fn main() { let parsed = parse( @@ -15,10 +15,15 @@ ) .expect("parse"); + let mut out = CodegenResult::default(); + + let v = out.codegen(&parsed, true); + { let out_dir = env::var("OUT_DIR").unwrap(); - let dest_path = Path::new(&out_dir).join("stdlib.bincode"); + let dest_path = Path::new(&out_dir).join("stdlib.rs"); let mut f = File::create(&dest_path).unwrap(); - f.write_all(&serialize(&parsed).unwrap()).unwrap(); + f.write_all(v.to_string().replace(';', ";\n").as_bytes()) + .unwrap(); } } --- a/crates/jrsonnet-stdlib/src/expr.rs +++ b/crates/jrsonnet-stdlib/src/expr.rs @@ -1,22 +1,103 @@ -use std::borrow::Cow; +use jrsonnet_parser::LocExpr; -use jrsonnet_parser::{LocExpr, ParserSettings, Source}; +mod structdump_import { + pub(super) use std::{borrow::Cow, rc::Rc}; -pub const STDLIB_STR: &str = include_str!("./std.jsonnet"); + pub(super) use jrsonnet_parser::*; + pub(super) use vec; + pub(super) use Option; +} pub fn stdlib_expr() -> LocExpr { #[cfg(feature = "serialized-stdlib")] { + use bincode::{BincodeRead, DefaultOptions, Options}; + use serde::{Deserialize, Deserializer}; + + struct LocDeserializer { + source: Source, + wrapped: bincode::Deserializer, + } + macro_rules! delegate { + ($(fn $name:ident($($arg:ident: $ty:ty),*))+) => {$( + fn $name(mut self $(, $arg: $ty)*, visitor: V) -> Result + where V: serde::de::Visitor<'de>, + { + self.wrapped.$name($($arg,)* visitor) + } + )+}; + } + impl<'de, R, O> Deserializer<'de> for LocDeserializer + where + R: BincodeRead<'de>, + O: Options, + { + type Error = <&'de mut bincode::Deserializer as Deserializer<'de>>::Error; + + delegate! { + fn deserialize_any() + fn deserialize_bool() + fn deserialize_u16() + fn deserialize_u32() + fn deserialize_u64() + fn deserialize_i16() + fn deserialize_i32() + fn deserialize_i64() + fn deserialize_f32() + fn deserialize_f64() + fn deserialize_u128() + fn deserialize_i128() + fn deserialize_u8() + fn deserialize_i8() + fn deserialize_unit() + fn deserialize_char() + fn deserialize_str() + fn deserialize_string() + fn deserialize_bytes() + fn deserialize_byte_buf() + fn deserialize_enum(name: &'static str, variants: &'static [&'static str]) + fn deserialize_tuple(len: usize) + fn deserialize_option() + fn deserialize_seq() + fn deserialize_map() + fn deserialize_struct(name: &'static str, fields: &'static [&'static str]) + fn deserialize_identifier() + fn deserialize_newtype_struct(name: &'static str) + fn deserialize_unit_struct(name: &'static str) + fn deserialize_tuple_struct(name: &'static str, len: usize) + fn deserialize_ignored_any() + } + + fn is_human_readable(&self) -> bool { + false + } + } + + // In build.rs, Source object is populated with empty values, deserializer wrapper loads correct values on deserialize + let mut deserializer = bincode::Deserializer::from_slice( + include_bytes!(concat!(env!("OUT_DIR"), "/stdlib.bincode")), + DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(), + ); + // Should not panic, stdlib.bincode is generated in build.rs - return bincode::deserialize(include_bytes!(concat!(env!("OUT_DIR"), "/stdlib.bincode"))) - .unwrap(); + LocExpr::deserialize(&mut deserializer).unwrap() } - jrsonnet_parser::parse( - STDLIB_STR, - &ParserSettings { - file_name: Source::new_virtual(Cow::Borrowed(""), STDLIB_STR.into()), - }, - ) - .unwrap() + #[cfg(feature = "codegenerated-stdlib")] + { + include!(concat!(env!("OUT_DIR"), "/stdlib.rs")) + } + + #[cfg(not(feature = "codegenerated-stdlib"))] + { + jrsonnet_parser::parse( + STDLIB_STR, + &ParserSettings { + file_name: Source::new_virtual(Cow::Borrowed(""), STDLIB_STR.into()), + }, + ) + .unwrap() + } } -- gitstuff