git.delta.rocks / jrsonnet / refs/commits / 0831da3ed8d9

difftreelog

perf reimplement AST codegen

Yaroslav Bolyukin2022-08-07parent: #b305ce3.patch.diff
in: master

10 files changed

modifiedCargo.lockdiffbeforeafterboth
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -275,6 +275,7 @@
  "jrsonnet-gcmodule",
  "rustc-hash",
  "serde",
+ "structdump",
 ]
 
 [[package]]
@@ -292,10 +293,10 @@
 dependencies = [
  "jrsonnet-gcmodule",
  "jrsonnet-interner",
- "jrsonnet-stdlib",
  "peg",
  "serde",
  "static_assertions",
+ "structdump",
 ]
 
 [[package]]
@@ -312,6 +313,7 @@
  "serde",
  "serde_json",
  "serde_yaml_with_quirks",
+ "structdump",
 ]
 
 [[package]]
@@ -576,6 +578,28 @@
 checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 
 [[package]]
+name = "structdump"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0570327507bf281d8a6e6b0d4c082b12cb6bcee27efce755aa5efacd44076c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "structdump-derive",
+]
+
+[[package]]
+name = "structdump-derive"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29cc0b59cfa11f1bceda09a9a7e37e6a6c3138575fd24ade8aa9af6d09aedf28"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
 name = "syn"
 version = "1.0.96"
 source = "registry+https://github.com/rust-lang/crates.io-index"
modifiedCargo.tomldiffbeforeafterboth
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,6 @@
 opt-level = 3
 lto = "fat"
 codegen-units = 1
-debug = 0
+# debug = 0
 panic = "abort"
-strip = true
+# strip = true
modifiedcrates/jrsonnet-interner/Cargo.tomldiffbeforeafterboth
--- a/crates/jrsonnet-interner/Cargo.toml
+++ b/crates/jrsonnet-interner/Cargo.toml
@@ -7,12 +7,19 @@
 edition = "2021"
 
 [features]
-default = ["serde"]
+default = []
+# Implement value serialization using structdump
+structdump = ["dep:structdump"]
+# Implement value serialization using serde
+#
+# Warning: serialized values won't be deduplicated
 serde = ["dep:serde"]
 
 [dependencies]
 jrsonnet-gcmodule = { version = "0.3.4" }
 
 serde = { version = "1.0", optional = true }
+structdump = { version = "0.2.0", optional = true }
+
 rustc-hash = "1.1"
 hashbrown = { version = "0.12.1", features = ["inline-more"] }
modifiedcrates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth
--- a/crates/jrsonnet-interner/src/lib.rs
+++ b/crates/jrsonnet-interner/src/lib.rs
@@ -205,6 +205,7 @@
 	}
 }
 
+#[cfg(feature = "serde")]
 impl serde::Serialize for IStr {
 	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
 	where
@@ -214,6 +215,7 @@
 	}
 }
 
+#[cfg(feature = "serde")]
 impl<'de> serde::Deserialize<'de> for IStr {
 	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
 	where
@@ -224,6 +226,24 @@
 	}
 }
 
+#[cfg(feature = "structdump")]
+impl structdump::Codegen for IStr {
+	fn gen_code(
+		&self,
+		res: &mut structdump::CodegenResult,
+		_unique: bool,
+	) -> structdump::TokenStream {
+		let s: &str = self;
+		res.add_code(
+			structdump::quote! {
+				structdump_import::IStr::from(#s)
+			},
+			Some(structdump::quote![structdump_import::IStr]),
+			false,
+		)
+	}
+}
+
 thread_local! {
 	static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));
 }
modifiedcrates/jrsonnet-parser/Cargo.tomldiffbeforeafterboth
--- a/crates/jrsonnet-parser/Cargo.toml
+++ b/crates/jrsonnet-parser/Cargo.toml
@@ -7,7 +7,23 @@
 edition = "2021"
 
 [features]
+default = []
 exp-destruct = []
+# Implement serialization of AST using structdump
+#
+# Structdump generates code, which exactly replicated passed AST
+# Contrary to serde, has no code bloat problem, and is recommended
+#
+# The only limitation is serialized form is only useable if built from build script
+structdump = ["dep:structdump", "jrsonnet-interner/structdump"]
+# Implement serialization of AST using serde
+#
+# Warning: as serde doesn't deduplicate strings, `Source` struct will bloat
+# output binary with repeating source code. To resolve this issue, you should either
+# override serialization of this struct using custom `Serializer`/`Deserializer`,
+# not rely on Source, and fill its `source_code` with empty value, or use `structdump`
+# instead
+serde = ["dep:serde"]
 
 [dependencies]
 jrsonnet-interner = { path = "../jrsonnet-interner", version = "0.4.2" }
@@ -18,6 +34,4 @@
 peg = "0.8.0"
 
 serde = { version = "1.0", features = ["derive", "rc"], optional = true }
-
-[dev-dependencies]
-jrsonnet-stdlib = { path = "../jrsonnet-stdlib", version = "0.4.2" }
+structdump = { version = "0.2.0", features = ["derive"], optional = true }
modifiedcrates/jrsonnet-parser/src/expr.rsdiffbeforeafterboth
--- a/crates/jrsonnet-parser/src/expr.rs
+++ b/crates/jrsonnet-parser/src/expr.rs
@@ -8,10 +8,13 @@
 use jrsonnet_interner::IStr;
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
+#[cfg(feature = "structdump")]
+use structdump::Codegen;
 
 use crate::source::Source;
 
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[derive(Debug, PartialEq, Trace)]
 pub enum FieldName {
 	/// {fixed: 2}
@@ -20,6 +23,7 @@
 	Dyn(LocExpr),
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)]
 pub enum Visibility {
@@ -37,10 +41,12 @@
 	}
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Clone, Debug, PartialEq, Trace)]
 pub struct AssertStmt(pub LocExpr, pub Option<LocExpr>);
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct FieldMember {
@@ -51,6 +57,7 @@
 	pub value: LocExpr,
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub enum Member {
@@ -59,6 +66,7 @@
 	AssertStmt(AssertStmt),
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)]
 pub enum UnaryOpType {
@@ -84,6 +92,7 @@
 	}
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Trace)]
 pub enum BinaryOpType {
@@ -150,11 +159,13 @@
 }
 
 /// name, default value
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct Param(pub Destruct, pub Option<LocExpr>);
 
 /// Defined function parameters
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, PartialEq, Trace)]
 pub struct ParamsDesc(pub Rc<Vec<Param>>);
@@ -166,6 +177,7 @@
 	}
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct ArgsDesc {
@@ -187,6 +199,7 @@
 	Drop,
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, PartialEq, Trace)]
 pub enum Destruct {
@@ -216,6 +229,7 @@
 	}
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, PartialEq, Trace)]
 pub enum BindSpec {
@@ -230,14 +244,17 @@
 	},
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct IfSpecData(pub LocExpr);
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct ForSpecData(pub IStr, pub LocExpr);
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub enum CompSpec {
@@ -245,6 +262,7 @@
 	ForSpec(ForSpecData),
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct ObjComp {
@@ -256,6 +274,7 @@
 	pub compspecs: Vec<CompSpec>,
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub enum ObjBody {
@@ -263,6 +282,7 @@
 	ObjComp(ObjComp),
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Trace)]
 pub enum LiteralType {
@@ -274,6 +294,7 @@
 	False,
 }
 
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub struct SliceDesc {
@@ -283,6 +304,7 @@
 }
 
 /// Syntax base
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, PartialEq, Trace)]
 pub enum Expr {
@@ -351,6 +373,7 @@
 }
 
 /// file, begin offset, end offset
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Clone, PartialEq, Eq, Trace)]
 #[trace(skip)]
@@ -373,6 +396,7 @@
 
 /// Holds AST expression and its location in source file
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "structdump", derive(Codegen))]
 #[derive(Clone, PartialEq, Trace)]
 pub struct LocExpr(pub Rc<Expr>, pub ExprLocation);
 
modifiedcrates/jrsonnet-parser/src/source.rsdiffbeforeafterboth
before · crates/jrsonnet-parser/src/source.rs
1use std::{2	borrow::Cow,3	fmt,4	path::{Component, Path, PathBuf},5	rc::Rc,6};78use jrsonnet_gcmodule::{Trace, Tracer};9use jrsonnet_interner::IStr;10#[cfg(feature = "serde")]11use serde::{Deserialize, Serialize};1213use crate::location::{location_to_offset, offset_to_location, CodeLocation};1415#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]16#[derive(PartialEq, Eq, Debug, Hash, Clone)]17pub enum SourcePath {18	/// This file is located on disk19	Path(PathBuf),20	/// This file is located somewhere else (I.e http), but it can refer to relative paths, and is egilible for caching21	Custom(String),22	/// This file is only located in memory, and can't be cached23	Virtual(Cow<'static, str>),24}25impl Trace for SourcePath {26	fn trace(&self, _tracer: &mut Tracer) {}2728	fn is_type_tracked() -> bool {29		false30	}31}3233impl SourcePath {34	/// Should import resolver be able to read file by this path?35	pub fn can_load(&self) -> bool {36		matches!(self, Self::Path(_) | Self::Custom(_))37	}38}3940/// Either real file, or virtual41/// Hash of FileName always have same value as raw Path, to make it possible to use with raw_entry_mut42#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]43#[derive(Clone, PartialEq, Eq, Debug)]44pub struct Source(Rc<(SourcePath, IStr)>);45static_assertions::assert_eq_size!(Source, *const ());4647impl Trace for Source {48	fn trace(&self, _tracer: &mut Tracer) {}4950	fn is_type_tracked() -> bool {51		false52	}53}5455impl Source {56	/// Fails when path contains inner /../ or /./ references, or not absolute57	pub fn new(path: SourcePath, code: IStr) -> Option<Self> {58		if let SourcePath::Path(path) = &path {59			if !path.is_absolute()60				|| path61					.components()62					.any(|c| matches!(c, Component::CurDir | Component::ParentDir))63			{64				return None;65			}66		}67		Some(Self(Rc::new((path, code))))68	}6970	pub fn new_virtual(n: Cow<'static, str>, code: IStr) -> Self {71		Self(Rc::new((SourcePath::Virtual(n), code)))72	}7374	pub fn short_display(&self) -> ShortDisplay {75		ShortDisplay(self.clone())76	}7778	/// Returns Some if this file is loaded from FS79	pub fn path(&self) -> Option<&Path> {80		match self.source_path() {81			SourcePath::Path(r) => Some(r),82			SourcePath::Custom(_) => None,83			SourcePath::Virtual(_) => None,84		}85	}86	pub fn code(&self) -> &str {87		&self.0 .188	}8990	pub fn source_path(&self) -> &SourcePath {91		&self.0 .0 as &SourcePath92	}9394	pub fn map_source_locations(&self, locs: &[u32]) -> Vec<CodeLocation> {95		offset_to_location(&self.0 .1, locs)96	}97	pub fn map_from_source_location(&self, line: usize, column: usize) -> Option<usize> {98		location_to_offset(&self.0 .1, line, column)99	}100}101pub struct ShortDisplay(Source);102impl fmt::Display for ShortDisplay {103	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {104		match &self.0 .0 .0 as &SourcePath {105			SourcePath::Path(r) => {106				write!(107					f,108					"{}",109					r.file_name().expect("path is valid").to_string_lossy()110				)111			}112			SourcePath::Custom(r) => write!(f, "{}", r),113			SourcePath::Virtual(n) => write!(f, "{}", n),114		}115	}116}
modifiedcrates/jrsonnet-stdlib/Cargo.tomldiffbeforeafterboth
--- a/crates/jrsonnet-stdlib/Cargo.toml
+++ b/crates/jrsonnet-stdlib/Cargo.toml
@@ -7,9 +7,10 @@
 edition = "2021"
 
 [features]
-default = []
-# Serializes standard library AST, and deserialize on start, instead of parsing it every run from text
-serialized-stdlib = ["bincode", "jrsonnet-parser/serde"]
+default = ["codegenerated-stdlib"]
+# Speed-up initialization by generating code for parsed stdlib, instead
+# of invoking parser for it
+codegenerated-stdlib = ["jrsonnet-parser/structdump"]
 # Enables legacy `std.thisFile` support, at the cost of worse caching
 legacy-this-file = []
 # Add order preservation flag to some functions
@@ -45,8 +46,5 @@
 serde_yaml_with_quirks = "0.8.24"
 
 [build-dependencies]
-jrsonnet-parser = { path = "../jrsonnet-parser", version = "0.4.2", features = [
-    "serde",
-] }
-serde = "1.0"
-bincode = "1.3"
+jrsonnet-parser = { path = "../jrsonnet-parser", version = "0.4.2" }
+structdump = { version = "0.2.0", features = ["derive"] }
modifiedcrates/jrsonnet-stdlib/build.rsdiffbeforeafterboth
--- a/crates/jrsonnet-stdlib/build.rs
+++ b/crates/jrsonnet-stdlib/build.rs
@@ -1,7 +1,7 @@
 use std::{borrow::Cow, env, fs::File, io::Write, path::Path};
 
-use bincode::serialize;
 use jrsonnet_parser::{parse, ParserSettings, Source};
+use structdump::CodegenResult;
 
 fn main() {
 	let parsed = parse(
@@ -15,10 +15,15 @@
 	)
 	.expect("parse");
 
+	let mut out = CodegenResult::default();
+
+	let v = out.codegen(&parsed, true);
+
 	{
 		let out_dir = env::var("OUT_DIR").unwrap();
-		let dest_path = Path::new(&out_dir).join("stdlib.bincode");
+		let dest_path = Path::new(&out_dir).join("stdlib.rs");
 		let mut f = File::create(&dest_path).unwrap();
-		f.write_all(&serialize(&parsed).unwrap()).unwrap();
+		f.write_all(v.to_string().replace(';', ";\n").as_bytes())
+			.unwrap();
 	}
 }
modifiedcrates/jrsonnet-stdlib/src/expr.rsdiffbeforeafterboth
--- a/crates/jrsonnet-stdlib/src/expr.rs
+++ b/crates/jrsonnet-stdlib/src/expr.rs
@@ -1,22 +1,103 @@
-use std::borrow::Cow;
+use jrsonnet_parser::LocExpr;
 
-use jrsonnet_parser::{LocExpr, ParserSettings, Source};
+mod structdump_import {
+	pub(super) use std::{borrow::Cow, rc::Rc};
 
-pub const STDLIB_STR: &str = include_str!("./std.jsonnet");
+	pub(super) use jrsonnet_parser::*;
+	pub(super) use vec;
+	pub(super) use Option;
+}
 
 pub fn stdlib_expr() -> LocExpr {
 	#[cfg(feature = "serialized-stdlib")]
 	{
+		use bincode::{BincodeRead, DefaultOptions, Options};
+		use serde::{Deserialize, Deserializer};
+
+		struct LocDeserializer<R, O: Options> {
+			source: Source,
+			wrapped: bincode::Deserializer<R, O>,
+		}
+		macro_rules! delegate {
+			($(fn $name:ident($($arg:ident: $ty:ty),*))+) => {$(
+				fn $name<V>(mut self $(, $arg: $ty)*, visitor: V) -> Result<V::Value, Self::Error>
+				where V: serde::de::Visitor<'de>,
+				{
+					self.wrapped.$name($($arg,)* visitor)
+				}
+			)+};
+		}
+		impl<'de, R, O> Deserializer<'de> for LocDeserializer<R, O>
+		where
+			R: BincodeRead<'de>,
+			O: Options,
+		{
+			type Error = <&'de mut bincode::Deserializer<R, O> as Deserializer<'de>>::Error;
+
+			delegate! {
+				fn deserialize_any()
+				fn deserialize_bool()
+				fn deserialize_u16()
+				fn deserialize_u32()
+				fn deserialize_u64()
+				fn deserialize_i16()
+				fn deserialize_i32()
+				fn deserialize_i64()
+				fn deserialize_f32()
+				fn deserialize_f64()
+				fn deserialize_u128()
+				fn deserialize_i128()
+				fn deserialize_u8()
+				fn deserialize_i8()
+				fn deserialize_unit()
+				fn deserialize_char()
+				fn deserialize_str()
+				fn deserialize_string()
+				fn deserialize_bytes()
+				fn deserialize_byte_buf()
+				fn deserialize_enum(name: &'static str, variants: &'static [&'static str])
+				fn deserialize_tuple(len: usize)
+				fn deserialize_option()
+				fn deserialize_seq()
+				fn deserialize_map()
+				fn deserialize_struct(name: &'static str, fields: &'static [&'static str])
+				fn deserialize_identifier()
+				fn deserialize_newtype_struct(name: &'static str)
+				fn deserialize_unit_struct(name: &'static str)
+				fn deserialize_tuple_struct(name: &'static str, len: usize)
+				fn deserialize_ignored_any()
+			}
+
+			fn is_human_readable(&self) -> bool {
+				false
+			}
+		}
+
+		// In build.rs, Source object is populated with empty values, deserializer wrapper loads correct values on deserialize
+		let mut deserializer = bincode::Deserializer::from_slice(
+			include_bytes!(concat!(env!("OUT_DIR"), "/stdlib.bincode")),
+			DefaultOptions::new()
+				.with_fixint_encoding()
+				.allow_trailing_bytes(),
+		);
+
 		// Should not panic, stdlib.bincode is generated in build.rs
-		return bincode::deserialize(include_bytes!(concat!(env!("OUT_DIR"), "/stdlib.bincode")))
-			.unwrap();
+		LocExpr::deserialize(&mut deserializer).unwrap()
 	}
 
-	jrsonnet_parser::parse(
-		STDLIB_STR,
-		&ParserSettings {
-			file_name: Source::new_virtual(Cow::Borrowed("<std>"), STDLIB_STR.into()),
-		},
-	)
-	.unwrap()
+	#[cfg(feature = "codegenerated-stdlib")]
+	{
+		include!(concat!(env!("OUT_DIR"), "/stdlib.rs"))
+	}
+
+	#[cfg(not(feature = "codegenerated-stdlib"))]
+	{
+		jrsonnet_parser::parse(
+			STDLIB_STR,
+			&ParserSettings {
+				file_name: Source::new_virtual(Cow::Borrowed("<std>"), STDLIB_STR.into()),
+			},
+		)
+		.unwrap()
+	}
 }