git.delta.rocks / jrsonnet / refs/commits / 8eff8514f067

difftreelog

source

crates/jsonnet-parser/src/lib.rs16.2 KiBsourcehistory
1#![feature(box_syntax)]2#![feature(test)]34extern crate test;56use peg::parser;7use std::{path::PathBuf, rc::Rc};8mod expr;9mod string_processing;10pub use expr::*;11pub use peg;12use string_processing::deent;1314pub struct ParserSettings {15	pub loc_data: bool,16	pub file_name: PathBuf,17}1819parser! {20	grammar jsonnet_parser() for str {21		use peg::ParseLiteral;2223		/// Standard C-like comments24		rule comment()25			= "//" (!['\n'][_])* "\n"26			/ "/*" ((!("*/")[_][_])/("\\" "*/"))* "*/"27			/ "#" (!['\n'][_])* "\n"2829		rule _() = ([' ' | '\n' | '\t'] / comment())*3031		/// For comma-delimited elements32		rule comma() = quiet!{_ "," _} / expected!("<comma>")33		rule alpha() -> char = c:$(['_' | 'a'..='z' | 'A'..='Z']) {c.chars().next().unwrap()}34		rule digit() -> char = d:$(['0'..='9']) {d.chars().next().unwrap()}35		rule end_of_ident() = !['0'..='9' | '_' | 'a'..='z' | 'A'..='Z']36		/// Sequence of digits37		rule uint() -> u64 = a:$(digit()+) { a.parse().unwrap() }38		/// Number in scientific notation format39		rule number() -> f64 = quiet!{a:$(uint() ("." uint())? (['e'|'E'] (s:['+'|'-'])? uint())?) { a.parse().unwrap() }} / expected!("<number>")4041		/// Reserved word followed by any non-alphanumberic42		rule reserved() = ("assert" / "else" / "error" / "false" / "for" / "function" / "if" / "import" / "importstr" / "in" / "local" / "null" / "tailstrict" / "then" / "self" / "super" / "true") end_of_ident()43		rule id() -> String = quiet!{ !reserved() s:$(alpha() (alpha() / digit())*) {s.to_owned()}} / expected!("<identifier>")4445		rule keyword(id: &'static str)46			= ##parse_string_literal(id) end_of_ident()47		// Adds location data information to existing expression48		rule l(s: &ParserSettings, x: rule<Expr>) -> LocExpr49			= start:position!() v:x() end:position!() {loc_expr!(v, s.loc_data, (s.file_name.clone(), start, end))}5051		pub rule param(s: &ParserSettings) -> expr::Param = name:id() expr:(_ "=" _ expr:expr(s){expr})? { expr::Param(name, expr) }52		pub rule params(s: &ParserSettings) -> expr::ParamsDesc53			= params:(param(s) ** comma()) {54				let mut defaults_started = false;55				for param in &params {56					defaults_started = defaults_started || param.1.is_some();57					assert_eq!(defaults_started, param.1.is_some(), "defauld parameters should be used after all positionals");58				}59				expr::ParamsDesc(params)60			}61			/ { expr::ParamsDesc(Vec::new()) }6263		pub rule arg(s: &ParserSettings) -> expr::Arg64			= name:id() _ "=" _ expr:expr(s) {expr::Arg(Some(name), expr)}65			/ expr:expr(s) {expr::Arg(None, expr)}66		pub rule args(s: &ParserSettings) -> expr::ArgsDesc67			= args:arg(s) ** comma() comma()? {68				let mut named_started = false;69				for arg in &args {70					named_started = named_started || arg.0.is_some();71					assert_eq!(named_started, arg.0.is_some(), "named args should be used after all positionals");72				}73				expr::ArgsDesc(args)74			}75			/ { expr::ArgsDesc(Vec::new()) }7677		pub rule bind(s: &ParserSettings) -> expr::BindSpec78			= name:id() _ "=" _ expr:expr(s) {expr::BindSpec{name, params: None, value: expr}}79			/ name:id() _ "(" _ params:params(s) _ ")" _ "=" _ expr:expr(s) {expr::BindSpec{name, params: Some(params), value: expr}}80		pub rule assertion(s: &ParserSettings) -> expr::AssertStmt81			= keyword("assert") _ cond:expr(s) msg:(_ ":" _ e:expr(s) {e})? { expr::AssertStmt(cond, msg) }8283		pub rule whole_line() -> String84			= str:$((!['\n'][_])* "\n") {str.to_owned()}85		pub rule string() -> String86			= "\"" str:$(("\\\"" / !['"'][_])*) "\"" {str.to_owned()}87			/ "'" str:$((!['\''][_])*) "'" {str.to_owned()}88			// TODO: This is temporary workaround, i still dont know how to write this correctly btw.89			/ "|||" "\n" str:$((" "*<1, 1> whole_line())+) " "*<0, 0> "|||" {deent(str)}90			/ "|||" "\n" str:$((" "*<2, 2> whole_line())+) " "*<1, 1> "|||" {deent(str)}91			/ "|||" "\n" str:$((" "*<3, 3> whole_line())+) " "*<2, 2> "|||" {deent(str)}92			/ "|||" "\n" str:$((" "*<4, 4> whole_line())+) " "*<3, 3> "|||" {deent(str)}93			/ "|||" "\n" str:$((" "*<5, 5> whole_line())+) " "*<4, 4> "|||" {deent(str)}94			/ "|||" "\n" str:$((" "*<6, 6> whole_line())+) " "*<5, 5> "|||" {deent(str)}95			/ "|||" "\n" str:$((" "*<7, 7> whole_line())+) " "*<6, 6> "|||" {deent(str)}96			/ "|||" "\n" str:$((" "*<8, 8> whole_line())+) " "*<7, 7> "|||" {deent(str)}97			/ "|||" "\n" str:$((" "*<9, 9> whole_line())+) " "*<8, 8> "|||" {deent(str)}98			/ "|||" "\n" str:$((" "*<10, 10> whole_line())+) " "*<9, 9> "|||" {deent(str)}99			/ "|||" "\n" str:$((" "*<11, 11> whole_line())+) " "*<10, 10> "|||" {deent(str)}100			/ "|||" "\n" str:$((" "*<12, 12> whole_line())+) " "*<11, 10> "|||" {deent(str)}101102		pub rule field_name(s: &ParserSettings) -> expr::FieldName103			= name:id() {expr::FieldName::Fixed(name)}104			/ name:string() {expr::FieldName::Fixed(name)}105			/ "[" _ expr:expr(s) _ "]" {expr::FieldName::Dyn(expr)}106		pub rule visibility() -> expr::Visibility107			= ":::" {expr::Visibility::Unhide}108			/ "::" {expr::Visibility::Hidden}109			/ ":" {expr::Visibility::Normal}110		pub rule field(s: &ParserSettings) -> expr::FieldMember111			= name:field_name(s) _ plus:"+"? _ visibility:visibility() _ value:expr(s) {expr::FieldMember{112				name,113				plus: plus.is_some(),114				params: None,115				visibility,116				value,117			}}118			/ name:field_name(s) _ "(" _ params:params(s) _ ")" _ visibility:visibility() _ value:expr(s) {expr::FieldMember{119				name,120				plus: false,121				params: Some(params),122				visibility,123				value,124			}}125		pub rule obj_local(s: &ParserSettings) -> BindSpec126			= keyword("local") _ bind:bind(s) {bind}127		pub rule member(s: &ParserSettings) -> expr::Member128			= bind:obj_local(s) {expr::Member::BindStmt(bind)}129			/ assertion:assertion(s) {expr::Member::AssertStmt(assertion)}130			/ field:field(s) {expr::Member::Field(field)}131		pub rule objinside(s: &ParserSettings) -> expr::ObjBody132			= pre_locals:(b: obj_local(s) comma() {b})* "[" _ key:expr(s) _ "]" _ ":" _ value:expr(s) post_locals:(comma() b:obj_local(s) {b})* _ forspec:forspec(s) others:(_ rest:compspec(s) {rest})? {133				expr::ObjBody::ObjComp {134					pre_locals,135					key,136					value,137					post_locals,138					rest: [vec![CompSpec::ForSpec(forspec)], others.unwrap_or_default()].concat(),139				}140			}141			/ members:(member(s) ** comma()) comma()? {expr::ObjBody::MemberList(members)}142		pub rule ifspec(s: &ParserSettings) -> IfSpecData143			= keyword("if") _ expr:expr(s) {IfSpecData(expr)}144		pub rule forspec(s: &ParserSettings) -> ForSpecData145			= keyword("for") _ id:id() _ keyword("in") _ cond:expr(s) {ForSpecData(id, cond)}146		pub rule compspec(s: &ParserSettings) -> Vec<expr::CompSpec>147			= s:(i:ifspec(s) { expr::CompSpec::IfSpec(i) } / f:forspec(s) {expr::CompSpec::ForSpec(f)} ) ** _ {s}148		pub rule local_expr(s: &ParserSettings) -> LocExpr149			= l(s,<keyword("local") _ binds:bind(s) ** comma() _ ";" _ expr:expr(s) { Expr::LocalExpr(binds, expr) }>)150		pub rule string_expr(s: &ParserSettings) -> LocExpr151			= l(s, <s:string() {Expr::Str(s)}>)152		pub rule obj_expr(s: &ParserSettings) -> LocExpr153			= l(s,<"{" _ body:objinside(s) _ "}" {Expr::Obj(body)}>)154		pub rule array_expr(s: &ParserSettings) -> LocExpr155			= l(s,<"[" _ elems:(expr(s) ** comma()) _ comma()? "]" {Expr::Arr(elems)}>)156		pub rule array_comp_expr(s: &ParserSettings) -> LocExpr157			= l(s,<"[" _ expr:expr(s) _ comma()? _ forspec:forspec(s) _ others:(others: compspec(s) _ {others})? "]" {Expr::ArrComp(expr, [vec![CompSpec::ForSpec(forspec)], others.unwrap_or_default()].concat())}>)158		pub rule number_expr(s: &ParserSettings) -> LocExpr159			= l(s,<n:number() { expr::Expr::Num(n) }>)160		pub rule var_expr(s: &ParserSettings) -> LocExpr161			= l(s,<n:id() { expr::Expr::Var(n) }>)162		pub rule if_then_else_expr(s: &ParserSettings) -> LocExpr163			= l(s,<cond:ifspec(s) _ keyword("then") _ cond_then:expr(s) cond_else:(_ keyword("else") _ e:expr(s) {e})? {Expr::IfElse{164				cond,165				cond_then,166				cond_else,167			}}>)168169		pub rule literal(s: &ParserSettings) -> LocExpr170			= l(s,<v:(171				keyword("null") {LiteralType::Null}172				/ keyword("true") {LiteralType::True}173				/ keyword("false") {LiteralType::False}174				/ keyword("self") {LiteralType::This}175				/ keyword("$") {LiteralType::Dollar}176				/ keyword("super") {LiteralType::Super}177			) {Expr::Literal(v)}>)178179		pub rule expr_basic(s: &ParserSettings) -> LocExpr180			= literal(s)181182			/ string_expr(s) / number_expr(s)183			/ array_expr(s)184			/ obj_expr(s)185			/ array_expr(s)186			/ array_comp_expr(s)187188			/ var_expr(s)189			/ local_expr(s)190			/ if_then_else_expr(s)191192			/ l(s,<keyword("function") _ "(" _ params:params(s) _ ")" _ expr:expr(s) {Expr::Function(params, expr)}>)193			/ l(s,<assertion:assertion(s) _ ";" _ expr:expr(s) { Expr::AssertExpr(assertion, expr) }>)194195			/ l(s,<keyword("error") _ expr:expr(s) { Expr::Error(expr) }>)196197		pub rule slice_desc(s: &ParserSettings) -> SliceDesc198			= start:expr(s)? _ ":" _ pair:(end:expr(s)? _ step:(":" _ e:expr(s) {e})? {(end, step)})? {199				if let Some((end, step)) = pair {200					SliceDesc { start, end, step }201				}else{202					SliceDesc { start, end: None, step: None }203				}204			}205206		rule expr(s: &ParserSettings) -> LocExpr207			= start:position!() a:precedence! {208				a:(@) _ "||" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Or, b))}209				--210				a:(@) _ "&&" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::And, b))}211				--212				a:(@) _ "|" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::BitOr, b))}213				--214				a:@ _ "^" _ b:(@) {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::BitXor, b))}215				--216				a:(@) _ "&" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::BitAnd, b))}217				--218				a:(@) _ "==" _ b:@ {loc_expr_todo!(Expr::Apply(219					el!(Expr::Index(220						el!(Expr::Var("std".to_owned())),221						el!(Expr::Str("equals".to_owned()))222					)),223					ArgsDesc(vec![Arg(None, a), Arg(None, b)]),224					true225				))}226				a:(@) _ "!=" _ b:@ {loc_expr_todo!(Expr::UnaryOp(UnaryOpType::Not, el!(Expr::Apply(227					el!(Expr::Index(228						el!(Expr::Var("std".to_owned())),229						el!(Expr::Str("equals".to_owned()))230					)),231					ArgsDesc(vec![Arg(None, a), Arg(None, b)]),232					true233				))))}234				--235				a:(@) _ "<" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Lt, b))}236				a:(@) _ ">" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Gt, b))}237				a:(@) _ "<=" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Lte, b))}238				a:(@) _ ">=" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Gte, b))}239				--240				a:(@) _ "<<" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Lhs, b))}241				a:(@) _ ">>" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Rhs, b))}242				--243				a:(@) _ "+" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Add, b))}244				a:(@) _ "-" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Sub, b))}245				--246				a:(@) _ "*" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Mul, b))}247				a:(@) _ "/" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Div, b))}248				a:(@) _ "%" _ b:@ {loc_expr_todo!(Expr::Apply(249					el!(Expr::Index(250						el!(Expr::Var("std".to_owned())),251						el!(Expr::Str("mod".to_owned()))252					)), ArgsDesc(vec![Arg(None, a), Arg(None, b)]),253					true254				))}255				--256						"-" _ b:@ {loc_expr_todo!(Expr::UnaryOp(UnaryOpType::Minus, b))}257						"!" _ b:@ {loc_expr_todo!(Expr::UnaryOp(UnaryOpType::Not, b))}258						"~" _ b:@ { loc_expr_todo!(Expr::UnaryOp(UnaryOpType::BitNot, b)) }259				--260				a:(@) _ "[" _ s:slice_desc(s) _ "]" {loc_expr_todo!(Expr::Slice(a, s))}261				a:(@) _ "." _ s:id() {loc_expr_todo!(Expr::Index(a, el!(Expr::Str(s))))}262				a:(@) _ "[" _ s:expr(s) _ "]" {loc_expr_todo!(Expr::Index(a, s))}263				a:(@) _ "(" _ args:args(s) _ ")" ts:(_ keyword("tailstrict"))? {loc_expr_todo!(Expr::Apply(a, args, ts.is_some()))}264				a:(@) _ "{" _ body:objinside(s) _ "}" {loc_expr_todo!(Expr::ObjExtend(a, body))}265				--266				e:expr_basic(s) {e}267				"(" _ e:expr(s) _ ")" {loc_expr_todo!(Expr::Parened(e))}268			} end:position!() {269				let LocExpr(e, _) = a;270				LocExpr(e, if s.loc_data {271					Some(Rc::new(ExprLocation(s.file_name.to_owned(), start, end)))272				} else {273					None274				})275			}276			/ e:expr_basic(s) {e}277278		pub rule jsonnet(s: &ParserSettings) -> LocExpr = _ e:expr(s) _ {e}279	}280}281282pub type ParseError = peg::error::ParseError<peg::str::LineCol>;283pub fn parse(str: &str, settings: &ParserSettings) -> Result<LocExpr, ParseError> {284	jsonnet_parser::jsonnet(str, settings)285}286287#[macro_export]288macro_rules! el {289	($expr:expr) => {290		LocExpr(std::rc::Rc::new($expr), None)291	};292}293294#[cfg(test)]295pub mod tests {296	use super::{expr::*, parse};297	use crate::ParserSettings;298	use std::path::PathBuf;299300	macro_rules! parse {301		($s:expr) => {302			parse(303				$s,304				&ParserSettings {305					loc_data: false,306					file_name: PathBuf::from("/test.jsonnet"),307					},308				)309			.unwrap()310		};311	}312313	mod expressions {314		use super::*;315316		pub fn basic_math() -> LocExpr {317			el!(Expr::BinaryOp(318				el!(Expr::Num(2.0)),319				BinaryOpType::Add,320				el!(Expr::BinaryOp(321					el!(Expr::Num(2.0)),322					BinaryOpType::Mul,323					el!(Expr::Num(2.0)),324				)),325			))326		}327	}328329	#[test]330	fn multiline_string() {331		assert_eq!(332			parse!("|||\n      Hello world!\n  a\n|||"),333			el!(Expr::Str("    Hello world!\na\n".to_owned())),334		)335	}336337	#[test]338	fn empty_object() {339		assert_eq!(parse!("{}"), el!(Expr::Obj(ObjBody::MemberList(vec![]))));340	}341342	#[test]343	fn basic_math() {344		assert_eq!(345			parse!("2+2*2"),346			el!(Expr::BinaryOp(347				el!(Expr::Num(2.0)),348				BinaryOpType::Add,349				el!(Expr::BinaryOp(350					el!(Expr::Num(2.0)),351					BinaryOpType::Mul,352					el!(Expr::Num(2.0))353				))354			))355		);356	}357358	#[test]359	fn basic_math_with_indents() {360		assert_eq!(parse!("2	+ 	  2	  *	2   	"), expressions::basic_math());361	}362363	#[test]364	fn basic_math_parened() {365		assert_eq!(366			parse!("2+(2+2*2)"),367			el!(Expr::BinaryOp(368				el!(Expr::Num(2.0)),369				BinaryOpType::Add,370				el!(Expr::Parened(expressions::basic_math())),371			))372		);373	}374375	/// Comments should not affect parsing376	#[test]377	fn comments() {378		assert_eq!(379			parse!("2//comment\n+//comment\n3/*test*/*/*test*/4"),380			el!(Expr::BinaryOp(381				el!(Expr::Num(2.0)),382				BinaryOpType::Add,383				el!(Expr::BinaryOp(384					el!(Expr::Num(3.0)),385					BinaryOpType::Mul,386					el!(Expr::Num(4.0))387				))388			))389		);390	}391392	/// Comments should be able to be escaped393	#[test]394	fn comment_escaping() {395		assert_eq!(396			parse!("2/*\\*/+*/ - 22"),397			el!(Expr::BinaryOp(398				el!(Expr::Num(2.0)),399				BinaryOpType::Sub,400				el!(Expr::Num(22.0))401			))402		);403	}404405	#[test]406	fn suffix() {407		// assert_eq!(parse!("std.test"), el!(Expr::Num(2.2)));408		// assert_eq!(parse!("std(2)"), el!(Expr::Num(2.2)));409		// assert_eq!(parse!("std.test(2)"), el!(Expr::Num(2.2)));410		// assert_eq!(parse!("a[b]"), el!(Expr::Num(2.2)))411	}412413	#[test]414	fn array_comp() {415		use Expr::*;416		assert_eq!(417			parse!("[std.deepJoin(x) for x in arr]"),418			el!(ArrComp(419				el!(Apply(420					el!(Index(421						el!(Var("std".to_owned())),422						el!(Str("deepJoin".to_owned()))423					)),424					ArgsDesc(vec![Arg(None, el!(Var("x".to_owned())))]),425					false,426				)),427				vec![CompSpec::ForSpec(ForSpecData(428					"x".to_owned(),429					el!(Var("arr".to_owned()))430				))]431			)),432		)433	}434435	#[test]436	fn reserved() {437		use Expr::*;438		assert_eq!(parse!("null"), el!(Literal(LiteralType::Null)));439		assert_eq!(parse!("nulla"), el!(Var("nulla".to_owned())));440	}441442	#[test]443	fn multiple_args_buf() {444		parse!("a(b, null_fields)");445	}446447	#[test]448	fn infix_precedence() {449		use Expr::*;450		assert_eq!(451			parse!("!a && !b"),452			el!(BinaryOp(453				el!(UnaryOp(UnaryOpType::Not, el!(Var("a".to_owned())))),454				BinaryOpType::And,455				el!(UnaryOp(UnaryOpType::Not, el!(Var("b".to_owned()))))456			))457		);458	}459460	#[test]461	fn infix_precedence_division() {462		use Expr::*;463		assert_eq!(464			parse!("!a / !b"),465			el!(BinaryOp(466				el!(UnaryOp(UnaryOpType::Not, el!(Var("a".to_owned())))),467				BinaryOpType::Div,468				el!(UnaryOp(UnaryOpType::Not, el!(Var("b".to_owned()))))469			))470		);471	}472473	#[test]474	fn double_negation() {475		use Expr::*;476		assert_eq!(477			parse!("!!a"),478			el!(UnaryOp(479				UnaryOpType::Not,480				el!(UnaryOp(UnaryOpType::Not, el!(Var("a".to_owned()))))481			))482		)483	}484485	#[test]486	fn array_test_error() {487		parse!("[a for a in b if c for e in f]");488		//                    ^^^^ failed code489	}490491	#[test]492	fn can_parse_stdlib() {493		parse!(jsonnet_stdlib::STDLIB_STR);494	}495496	use test::Bencher;497498	// From source code499	#[bench]500	fn bench_parse_peg(b: &mut Bencher) {501		b.iter(|| parse!(jsonnet_stdlib::STDLIB_STR))502	}503504	// From serialized blob505	#[bench]506	fn bench_parse_serde_bincode(b: &mut Bencher) {507		let serialized = bincode::serialize(&parse!(jsonnet_stdlib::STDLIB_STR)).unwrap();508		b.iter(|| bincode::deserialize::<LocExpr>(&serialized))509	}510}