git.delta.rocks / jrsonnet / refs/commits / 3f3e40506793

difftreelog

source

crates/jsonnet-parser/src/lib.rs17.5 KiBsourcehistory
1#![feature(box_syntax)]2#![feature(test)]34extern crate test;56use peg::parser;7use std::{path::PathBuf, rc::Rc};8mod expr;9mod string_processing;10pub use expr::*;11pub use peg;12use string_processing::deent;1314pub struct ParserSettings {15	pub loc_data: bool,16	pub file_name: PathBuf,17}1819parser! {20	grammar jsonnet_parser() for str {21		use peg::ParseLiteral;2223		/// Standard C-like comments24		rule comment()25			= "//" (!['\n'][_])* "\n"26			/ "/*" (!("*/")[_])* "*/"27			/ "#" (!['\n'][_])* "\n"2829		rule single_whitespace() = quiet!{([' ' | '\r' | '\n' | '\t'] / comment())} / expected!("<whitespace>")30		rule _() = single_whitespace()*3132		/// For comma-delimited elements33		rule comma() = quiet!{_ "," _} / expected!("<comma>")34		rule alpha() -> char = c:$(['_' | 'a'..='z' | 'A'..='Z']) {c.chars().next().unwrap()}35		rule digit() -> char = d:$(['0'..='9']) {d.chars().next().unwrap()}36		rule end_of_ident() = !['0'..='9' | '_' | 'a'..='z' | 'A'..='Z']37		/// Sequence of digits38		rule uint() -> u64 = a:$(digit()+) { a.parse().unwrap() }39		/// Number in scientific notation format40		rule number() -> f64 = quiet!{a:$(uint() ("." uint())? (['e'|'E'] (s:['+'|'-'])? uint())?) { a.parse().unwrap() }} / expected!("<number>")4142		/// Reserved word followed by any non-alphanumberic43		rule reserved() = ("assert" / "else" / "error" / "false" / "for" / "function" / "if" / "import" / "importstr" / "in" / "local" / "null" / "tailstrict" / "then" / "self" / "super" / "true") end_of_ident()44		rule id() -> String = quiet!{ !reserved() s:$(alpha() (alpha() / digit())*) {s.to_owned()}} / expected!("<identifier>")4546		rule keyword(id: &'static str)47			= ##parse_string_literal(id) end_of_ident()48		// Adds location data information to existing expression49		rule l(s: &ParserSettings, x: rule<Expr>) -> LocExpr50			= start:position!() v:x() end:position!() {loc_expr!(v, s.loc_data, (s.file_name.clone(), start, end))}5152		pub rule param(s: &ParserSettings) -> expr::Param = name:id() expr:(_ "=" _ expr:expr(s){expr})? { expr::Param(name, expr) }53		pub rule params(s: &ParserSettings) -> expr::ParamsDesc54			= params:(param(s) ** comma()) {55				let mut defaults_started = false;56				for param in &params {57					defaults_started = defaults_started || param.1.is_some();58					assert_eq!(defaults_started, param.1.is_some(), "defauld parameters should be used after all positionals");59				}60				expr::ParamsDesc(params)61			}62			/ { expr::ParamsDesc(Vec::new()) }6364		pub rule arg(s: &ParserSettings) -> expr::Arg65			= name:id() _ "=" _ expr:expr(s) {expr::Arg(Some(name), expr)}66			/ expr:expr(s) {expr::Arg(None, expr)}67		pub rule args(s: &ParserSettings) -> expr::ArgsDesc68			= args:arg(s) ** comma() comma()? {69				let mut named_started = false;70				for arg in &args {71					named_started = named_started || arg.0.is_some();72					assert_eq!(named_started, arg.0.is_some(), "named args should be used after all positionals");73				}74				expr::ArgsDesc(args)75			}76			/ { expr::ArgsDesc(Vec::new()) }7778		pub rule bind(s: &ParserSettings) -> expr::BindSpec79			= name:id() _ "=" _ expr:expr(s) {expr::BindSpec{name, params: None, value: expr}}80			/ name:id() _ "(" _ params:params(s) _ ")" _ "=" _ expr:expr(s) {expr::BindSpec{name, params: Some(params), value: expr}}81		pub rule assertion(s: &ParserSettings) -> expr::AssertStmt82			= keyword("assert") _ cond:expr(s) msg:(_ ":" _ e:expr(s) {e})? { expr::AssertStmt(cond, msg) }8384		pub rule whole_line() -> String85			= str:$((!['\n'][_])* "\n") {str.to_owned()}86		pub rule string() -> String87			= "\"" str:$(("\\\"" / "\\\\" / (!['"'][_]))*) "\"" {unescape::unescape(str).unwrap()}88			/ "'" str:$(("\\'" / "\\\\" / (!['\''][_]))*) "'" {unescape::unescape(str).unwrap()}89			/ "@'" str:$(("''" / (!['\''][_]))*) "'" {str.replace("''", "'")}90			/ "@\"" str:$(("\"\"" / (!['"'][_]))*) "\"" {str.replace("\"\"", "\"")}91			// TODO: This is temporary workaround, i still dont know how to write this correctly btw.92			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<1, 1> whole_line())+) " "*<0, 0> "|||" {deent(str)}93			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<2, 2> whole_line())+) " "*<1, 1> "|||" {deent(str)}94			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<3, 3> whole_line())+) " "*<2, 2> "|||" {deent(str)}95			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<4, 4> whole_line())+) " "*<3, 3> "|||" {deent(str)}96			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<5, 5> whole_line())+) " "*<4, 4> "|||" {deent(str)}97			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<6, 6> whole_line())+) " "*<5, 5> "|||" {deent(str)}98			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<7, 7> whole_line())+) " "*<6, 6> "|||" {deent(str)}99			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<8, 8> whole_line())+) " "*<7, 7> "|||" {deent(str)}100			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<9, 9> whole_line())+) " "*<8, 8> "|||" {deent(str)}101			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<10, 10> whole_line())+) " "*<9, 9> "|||" {deent(str)}102			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<11, 11> whole_line())+) " "*<10, 10> "|||" {deent(str)}103			/ "|||" (!['\n']single_whitespace())+ "\n" str:$((" "*<12, 12> whole_line())+) " "*<11, 10> "|||" {deent(str)}104105		pub rule field_name(s: &ParserSettings) -> expr::FieldName106			= name:id() {expr::FieldName::Fixed(name)}107			/ name:string() {expr::FieldName::Fixed(name)}108			/ "[" _ expr:expr(s) _ "]" {expr::FieldName::Dyn(expr)}109		pub rule visibility() -> expr::Visibility110			= ":::" {expr::Visibility::Unhide}111			/ "::" {expr::Visibility::Hidden}112			/ ":" {expr::Visibility::Normal}113		pub rule field(s: &ParserSettings) -> expr::FieldMember114			= name:field_name(s) _ plus:"+"? _ visibility:visibility() _ value:expr(s) {expr::FieldMember{115				name,116				plus: plus.is_some(),117				params: None,118				visibility,119				value,120			}}121			/ name:field_name(s) _ "(" _ params:params(s) _ ")" _ visibility:visibility() _ value:expr(s) {expr::FieldMember{122				name,123				plus: false,124				params: Some(params),125				visibility,126				value,127			}}128		pub rule obj_local(s: &ParserSettings) -> BindSpec129			= keyword("local") _ bind:bind(s) {bind}130		pub rule member(s: &ParserSettings) -> expr::Member131			= bind:obj_local(s) {expr::Member::BindStmt(bind)}132			/ assertion:assertion(s) {expr::Member::AssertStmt(assertion)}133			/ field:field(s) {expr::Member::Field(field)}134		pub rule objinside(s: &ParserSettings) -> expr::ObjBody135			= pre_locals:(b: obj_local(s) comma() {b})* "[" _ key:expr(s) _ "]" _ ":" _ value:expr(s) post_locals:(comma() b:obj_local(s) {b})* _ forspec:forspec(s) others:(_ rest:compspec(s) {rest})? {136				expr::ObjBody::ObjComp {137					pre_locals,138					key,139					value,140					post_locals,141					rest: [vec![CompSpec::ForSpec(forspec)], others.unwrap_or_default()].concat(),142				}143			}144			/ members:(member(s) ** comma()) comma()? {expr::ObjBody::MemberList(members)}145		pub rule ifspec(s: &ParserSettings) -> IfSpecData146			= keyword("if") _ expr:expr(s) {IfSpecData(expr)}147		pub rule forspec(s: &ParserSettings) -> ForSpecData148			= keyword("for") _ id:id() _ keyword("in") _ cond:expr(s) {ForSpecData(id, cond)}149		pub rule compspec(s: &ParserSettings) -> Vec<expr::CompSpec>150			= s:(i:ifspec(s) { expr::CompSpec::IfSpec(i) } / f:forspec(s) {expr::CompSpec::ForSpec(f)} ) ** _ {s}151		pub rule local_expr(s: &ParserSettings) -> LocExpr152			= l(s,<keyword("local") _ binds:bind(s) ** comma() _ ";" _ expr:expr(s) { Expr::LocalExpr(binds, expr) }>)153		pub rule string_expr(s: &ParserSettings) -> LocExpr154			= l(s, <s:string() {Expr::Str(s)}>)155		pub rule obj_expr(s: &ParserSettings) -> LocExpr156			= l(s,<"{" _ body:objinside(s) _ "}" {Expr::Obj(body)}>)157		pub rule array_expr(s: &ParserSettings) -> LocExpr158			= l(s,<"[" _ elems:(expr(s) ** comma()) _ comma()? "]" {Expr::Arr(elems)}>)159		pub rule array_comp_expr(s: &ParserSettings) -> LocExpr160			= l(s,<"[" _ expr:expr(s) _ comma()? _ forspec:forspec(s) _ others:(others: compspec(s) _ {others})? "]" {Expr::ArrComp(expr, [vec![CompSpec::ForSpec(forspec)], others.unwrap_or_default()].concat())}>)161		pub rule number_expr(s: &ParserSettings) -> LocExpr162			= l(s,<n:number() { expr::Expr::Num(n) }>)163		pub rule var_expr(s: &ParserSettings) -> LocExpr164			= l(s,<n:id() { expr::Expr::Var(n) }>)165		pub rule if_then_else_expr(s: &ParserSettings) -> LocExpr166			= l(s,<cond:ifspec(s) _ keyword("then") _ cond_then:expr(s) cond_else:(_ keyword("else") _ e:expr(s) {e})? {Expr::IfElse{167				cond,168				cond_then,169				cond_else,170			}}>)171172		pub rule literal(s: &ParserSettings) -> LocExpr173			= l(s,<v:(174				keyword("null") {LiteralType::Null}175				/ keyword("true") {LiteralType::True}176				/ keyword("false") {LiteralType::False}177				/ keyword("self") {LiteralType::This}178				/ keyword("$") {LiteralType::Dollar}179				/ keyword("super") {LiteralType::Super}180			) {Expr::Literal(v)}>)181182		pub rule expr_basic(s: &ParserSettings) -> LocExpr183			= literal(s)184185			/ string_expr(s) / number_expr(s)186			/ array_expr(s)187			/ obj_expr(s)188			/ array_expr(s)189			/ array_comp_expr(s)190191			/ var_expr(s)192			/ local_expr(s)193			/ if_then_else_expr(s)194195			/ l(s,<keyword("function") _ "(" _ params:params(s) _ ")" _ expr:expr(s) {Expr::Function(params, expr)}>)196			/ l(s,<assertion:assertion(s) _ ";" _ expr:expr(s) { Expr::AssertExpr(assertion, expr) }>)197198			/ l(s,<keyword("error") _ expr:expr(s) { Expr::Error(expr) }>)199200		pub rule slice_desc(s: &ParserSettings) -> SliceDesc201			= start:expr(s)? _ ":" _ pair:(end:expr(s)? _ step:(":" _ e:expr(s) {e})? {(end, step)})? {202				if let Some((end, step)) = pair {203					SliceDesc { start, end, step }204				}else{205					SliceDesc { start, end: None, step: None }206				}207			}208209		rule expr(s: &ParserSettings) -> LocExpr210			= start:position!() a:precedence! {211				a:(@) _ "||" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Or, b))}212				--213				a:(@) _ "&&" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::And, b))}214				--215				a:(@) _ "|" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::BitOr, b))}216				--217				a:@ _ "^" _ b:(@) {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::BitXor, b))}218				--219				a:(@) _ "&" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::BitAnd, b))}220				--221				a:(@) _ "==" _ b:@ {loc_expr_todo!(Expr::Apply(222					el!(Expr::Index(223						el!(Expr::Var("std".to_owned())),224						el!(Expr::Str("equals".to_owned()))225					)),226					ArgsDesc(vec![Arg(None, a), Arg(None, b)]),227					true228				))}229				a:(@) _ "!=" _ b:@ {loc_expr_todo!(Expr::UnaryOp(UnaryOpType::Not, el!(Expr::Apply(230					el!(Expr::Index(231						el!(Expr::Var("std".to_owned())),232						el!(Expr::Str("equals".to_owned()))233					)),234					ArgsDesc(vec![Arg(None, a), Arg(None, b)]),235					true236				))))}237				--238				a:(@) _ "<" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Lt, b))}239				a:(@) _ ">" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Gt, b))}240				a:(@) _ "<=" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Lte, b))}241				a:(@) _ ">=" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Gte, b))}242				--243				a:(@) _ "<<" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Lhs, b))}244				a:(@) _ ">>" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Rhs, b))}245				--246				a:(@) _ "+" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Add, b))}247				a:(@) _ "-" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Sub, b))}248				--249				a:(@) _ "*" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Mul, b))}250				a:(@) _ "/" _ b:@ {loc_expr_todo!(Expr::BinaryOp(a, BinaryOpType::Div, b))}251				a:(@) _ "%" _ b:@ {loc_expr_todo!(Expr::Apply(252					el!(Expr::Index(253						el!(Expr::Var("std".to_owned())),254						el!(Expr::Str("mod".to_owned()))255					)), ArgsDesc(vec![Arg(None, a), Arg(None, b)]),256					true257				))}258				--259						"-" _ b:@ {loc_expr_todo!(Expr::UnaryOp(UnaryOpType::Minus, b))}260						"!" _ b:@ {loc_expr_todo!(Expr::UnaryOp(UnaryOpType::Not, b))}261						"~" _ b:@ { loc_expr_todo!(Expr::UnaryOp(UnaryOpType::BitNot, b)) }262				--263				a:(@) _ "[" _ s:slice_desc(s) _ "]" {loc_expr_todo!(Expr::Slice(a, s))}264				a:(@) _ "." _ s:id() {loc_expr_todo!(Expr::Index(a, el!(Expr::Str(s))))}265				a:(@) _ "[" _ s:expr(s) _ "]" {loc_expr_todo!(Expr::Index(a, s))}266				a:(@) _ "(" _ args:args(s) _ ")" ts:(_ keyword("tailstrict"))? {loc_expr_todo!(Expr::Apply(a, args, ts.is_some()))}267				a:(@) _ "{" _ body:objinside(s) _ "}" {loc_expr_todo!(Expr::ObjExtend(a, body))}268				--269				e:expr_basic(s) {e}270				"(" _ e:expr(s) _ ")" {loc_expr_todo!(Expr::Parened(e))}271			} end:position!() {272				let LocExpr(e, _) = a;273				LocExpr(e, if s.loc_data {274					Some(Rc::new(ExprLocation(s.file_name.to_owned(), start, end)))275				} else {276					None277				})278			}279			/ e:expr_basic(s) {e}280281		pub rule jsonnet(s: &ParserSettings) -> LocExpr = _ e:expr(s) _ {e}282	}283}284285pub type ParseError = peg::error::ParseError<peg::str::LineCol>;286pub fn parse(str: &str, settings: &ParserSettings) -> Result<LocExpr, ParseError> {287	jsonnet_parser::jsonnet(str, settings)288}289290#[macro_export]291macro_rules! el {292	($expr:expr) => {293		LocExpr(std::rc::Rc::new($expr), None)294	};295}296297#[cfg(test)]298pub mod tests {299	use super::{expr::*, parse};300	use crate::ParserSettings;301	use std::path::PathBuf;302303	macro_rules! parse {304		($s:expr) => {305			parse(306				$s,307				&ParserSettings {308					loc_data: false,309					file_name: PathBuf::from("/test.jsonnet"),310					},311				)312			.unwrap()313		};314	}315316	mod expressions {317		use super::*;318319		pub fn basic_math() -> LocExpr {320			el!(Expr::BinaryOp(321				el!(Expr::Num(2.0)),322				BinaryOpType::Add,323				el!(Expr::BinaryOp(324					el!(Expr::Num(2.0)),325					BinaryOpType::Mul,326					el!(Expr::Num(2.0)),327				)),328			))329		}330	}331332	#[test]333	fn multiline_string() {334		assert_eq!(335			parse!("|||\n      Hello world!\n  a\n|||"),336			el!(Expr::Str("    Hello world!\na\n".to_owned())),337		)338	}339340	#[test]341	fn string_escaping() {342		assert_eq!(343			parse!(r#""Hello, \"world\"!""#),344			el!(Expr::Str(r#"Hello, "world"!"#.to_owned())),345		);346		assert_eq!(347			parse!(r#"'Hello \'world\'!'"#),348			el!(Expr::Str("Hello 'world'!".to_owned())),349		);350		assert_eq!(parse!(r#"'\\\\'"#), el!(Expr::Str("\\\\".to_owned())),);351	}352353	#[test]354	fn string_unescaping() {355		assert_eq!(356			parse!(r#""Hello\nWorld""#),357			el!(Expr::Str("Hello\nWorld".to_owned())),358		);359	}360361	#[test]362	fn string_verbantim() {363		assert_eq!(364			parse!(r#"@"Hello\n""World""""#),365			el!(Expr::Str("Hello\\n\"World\"".to_owned())),366		);367	}368369	#[test]370	fn empty_object() {371		assert_eq!(parse!("{}"), el!(Expr::Obj(ObjBody::MemberList(vec![]))));372	}373374	#[test]375	fn basic_math() {376		assert_eq!(377			parse!("2+2*2"),378			el!(Expr::BinaryOp(379				el!(Expr::Num(2.0)),380				BinaryOpType::Add,381				el!(Expr::BinaryOp(382					el!(Expr::Num(2.0)),383					BinaryOpType::Mul,384					el!(Expr::Num(2.0))385				))386			))387		);388	}389390	#[test]391	fn basic_math_with_indents() {392		assert_eq!(parse!("2	+ 	  2	  *	2   	"), expressions::basic_math());393	}394395	#[test]396	fn basic_math_parened() {397		assert_eq!(398			parse!("2+(2+2*2)"),399			el!(Expr::BinaryOp(400				el!(Expr::Num(2.0)),401				BinaryOpType::Add,402				el!(Expr::Parened(expressions::basic_math())),403			))404		);405	}406407	/// Comments should not affect parsing408	#[test]409	fn comments() {410		assert_eq!(411			parse!("2//comment\n+//comment\n3/*test*/*/*test*/4"),412			el!(Expr::BinaryOp(413				el!(Expr::Num(2.0)),414				BinaryOpType::Add,415				el!(Expr::BinaryOp(416					el!(Expr::Num(3.0)),417					BinaryOpType::Mul,418					el!(Expr::Num(4.0))419				))420			))421		);422	}423424	/// Comments should be able to be escaped425	#[test]426	fn comment_escaping() {427		assert_eq!(428			parse!("2/*\\*/+*/ - 22"),429			el!(Expr::BinaryOp(430				el!(Expr::Num(2.0)),431				BinaryOpType::Sub,432				el!(Expr::Num(22.0))433			))434		);435	}436437	#[test]438	fn suffix() {439		// assert_eq!(parse!("std.test"), el!(Expr::Num(2.2)));440		// assert_eq!(parse!("std(2)"), el!(Expr::Num(2.2)));441		// assert_eq!(parse!("std.test(2)"), el!(Expr::Num(2.2)));442		// assert_eq!(parse!("a[b]"), el!(Expr::Num(2.2)))443	}444445	#[test]446	fn array_comp() {447		use Expr::*;448		assert_eq!(449			parse!("[std.deepJoin(x) for x in arr]"),450			el!(ArrComp(451				el!(Apply(452					el!(Index(453						el!(Var("std".to_owned())),454						el!(Str("deepJoin".to_owned()))455					)),456					ArgsDesc(vec![Arg(None, el!(Var("x".to_owned())))]),457					false,458				)),459				vec![CompSpec::ForSpec(ForSpecData(460					"x".to_owned(),461					el!(Var("arr".to_owned()))462				))]463			)),464		)465	}466467	#[test]468	fn reserved() {469		use Expr::*;470		assert_eq!(parse!("null"), el!(Literal(LiteralType::Null)));471		assert_eq!(parse!("nulla"), el!(Var("nulla".to_owned())));472	}473474	#[test]475	fn multiple_args_buf() {476		parse!("a(b, null_fields)");477	}478479	#[test]480	fn infix_precedence() {481		use Expr::*;482		assert_eq!(483			parse!("!a && !b"),484			el!(BinaryOp(485				el!(UnaryOp(UnaryOpType::Not, el!(Var("a".to_owned())))),486				BinaryOpType::And,487				el!(UnaryOp(UnaryOpType::Not, el!(Var("b".to_owned()))))488			))489		);490	}491492	#[test]493	fn infix_precedence_division() {494		use Expr::*;495		assert_eq!(496			parse!("!a / !b"),497			el!(BinaryOp(498				el!(UnaryOp(UnaryOpType::Not, el!(Var("a".to_owned())))),499				BinaryOpType::Div,500				el!(UnaryOp(UnaryOpType::Not, el!(Var("b".to_owned()))))501			))502		);503	}504505	#[test]506	fn double_negation() {507		use Expr::*;508		assert_eq!(509			parse!("!!a"),510			el!(UnaryOp(511				UnaryOpType::Not,512				el!(UnaryOp(UnaryOpType::Not, el!(Var("a".to_owned()))))513			))514		)515	}516517	#[test]518	fn array_test_error() {519		parse!("[a for a in b if c for e in f]");520		//                    ^^^^ failed code521	}522523	#[test]524	fn can_parse_stdlib() {525		parse!(jsonnet_stdlib::STDLIB_STR);526	}527528	use test::Bencher;529530	// From source code531	#[bench]532	fn bench_parse_peg(b: &mut Bencher) {533		b.iter(|| parse!(jsonnet_stdlib::STDLIB_STR))534	}535536	// From serialized blob537	#[bench]538	fn bench_parse_serde_bincode(b: &mut Bencher) {539		let serialized = bincode::serialize(&parse!(jsonnet_stdlib::STDLIB_STR)).unwrap();540		b.iter(|| bincode::deserialize::<LocExpr>(&serialized))541	}542}