git.delta.rocks / jrsonnet / refs/commits / c678cf8e698f

difftreelog

source

crates/jrsonnet-rowan-parser/src/parser.rs16.8 KiBsourcehistory
1use std::{cell::Cell, fmt::Display, rc::Rc};23use miette::{LabeledSpan, SourceOffset, SourceSpan};4use rowan::{GreenNode, TextRange, TextSize};56use crate::{7	binary::BinaryOperator,8	event::{Event, Sink},9	lex::{lex, Lexeme},10	marker::{AsRange, CompletedMarker, Marker, Ranger},11	token_set::SyntaxKindSet,12	unary::UnaryOperator,13	SyntaxKind,14	SyntaxKind::*,15	SyntaxNode, T, TS,16};1718pub struct Parse {19	pub green_node: GreenNode,20	pub errors: Vec<SyntaxError>,21}2223#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]24pub enum ExpectedSyntax {25	Named(&'static str),26	Unnamed(SyntaxKind),27}28impl Display for ExpectedSyntax {29	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {30		match self {31			ExpectedSyntax::Named(n) => write!(f, "{}", n),32			ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),33		}34	}35}3637pub struct Parser<'i> {38	lexemes: &'i [Lexeme<'i>],39	pub offset: usize,40	pub events: Vec<Event>,41	pub entered: u32,42	pub hints: Vec<(u32, TextRange, String)>,43	pub last_error_token: usize,44	expected_syntax: Option<ExpectedSyntax>,45	expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,46}4748const DEFAULT_RECOVERY_SET: SyntaxKindSet = TS![; ')' ']' '}' local];4950#[derive(Clone, Debug, PartialEq, Eq)]51pub enum SyntaxError {52	Unexpected {53		expected: ExpectedSyntax,54		found: SyntaxKind,55		range: TextRange,56	},57	Missing {58		expected: ExpectedSyntax,59		offset: TextSize,60	},61	Custom {62		error: String,63		range: TextRange,64	},65	Hint {66		error: String,67		range: TextRange,68	},69}7071impl Into<LabeledSpan> for SyntaxError {72	fn into(self) -> LabeledSpan {73		match self {74			SyntaxError::Unexpected {75				expected,76				found,77				range,78			} => LabeledSpan::new_with_span(79				Some(format!("expected {}, found {:?}", expected, found)),80				SourceSpan::new(81					SourceOffset::from(usize::from(range.start())),82					SourceOffset::from(usize::from(range.end() - range.start())),83				),84			),85			SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(86				Some(format!("missing {}", expected)),87				SourceSpan::new(88					SourceOffset::from(usize::from(offset)),89					SourceOffset::from(0),90				),91			),92			SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {93				LabeledSpan::new_with_span(94					Some(format!("{}", error)),95					SourceSpan::new(96						SourceOffset::from(usize::from(range.start())),97						SourceOffset::from(usize::from(range.end() - range.start())),98					),99				)100			}101		}102	}103}104105impl<'i> Parser<'i> {106	fn new(lexemes: &'i [Lexeme<'i>]) -> Self {107		Self {108			lexemes,109			offset: 0,110			events: vec![],111			entered: 0,112			last_error_token: 0,113			hints: vec![],114			expected_syntax: None,115			expected_syntax_tracking_state: Rc::new(Cell::new(116				ExpectedSyntaxTrackingState::Unnamed,117			)),118		}119	}120	pub fn clear_outdated_hints(&mut self) {121		let amount = self122			.hints123			.iter()124			.rev()125			.take_while(|h| h.0 > self.entered)126			.count();127		self.hints.truncate(self.hints.len() - amount)128	}129	fn clear_expected_syntaxes(&mut self) {130		self.expected_syntax = None;131		self.expected_syntax_tracking_state132			.set(ExpectedSyntaxTrackingState::Unnamed);133	}134	pub fn start(&mut self) -> Marker {135		let start_event_idx = self.events.len();136		self.events.push(Event::Placeholder);137		self.entered += 1;138		Marker::new(start_event_idx, self.offset)139	}140	pub fn start_ranger(&mut self) -> Ranger {141		let pos = self.offset;142		Ranger { pos }143	}144	fn parse(mut self) -> Vec<Event> {145		let m = self.start();146		expr(&mut self);147		if !self.at_end() {148			let ranger = self.start_ranger();149150			while self.peek().is_some() {151				self.bump()152			}153			let end = ranger.finish(&self);154			self.custom_error(end, "unexpected input after expression");155		}156		m.complete(&mut self, SOURCE_FILE);157158		self.events159	}160161	pub(crate) fn expect(&mut self, kind: SyntaxKind) {162		self.expect_with_recovery_set(kind, TS![])163	}164165	pub(crate) fn expect_with_recovery_set(166		&mut self,167		kind: SyntaxKind,168		recovery_set: SyntaxKindSet,169	) {170		if self.at(kind) {171			self.bump();172		} else {173			self.error_with_recovery_set(recovery_set);174		}175	}176177	pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {178		if self.at(kind) {179			self.bump();180		} else {181			self.error_with_no_skip();182		}183	}184	pub(crate) fn last_token_range(&self) -> Option<TextRange> {185		self.lexemes.last().map(|Lexeme { range, .. }| *range)186	}187	fn current_token(&self) -> Lexeme<'i> {188		self.lexemes[self.offset]189	}190	fn previous_token(&mut self) -> Option<Lexeme<'i>> {191		if self.offset == 0 {192			return None;193		}194		let mut previous_token_idx = self.offset - 1;195		while self196			.lexemes197			.get(previous_token_idx)198			.map_or(false, |l| l.kind.is_trivia())199			&& previous_token_idx != 0200		{201			previous_token_idx -= 1;202		}203204		Some(self.lexemes[previous_token_idx])205	}206	pub fn start_of_token(&self, mut idx: usize) -> TextSize {207		while self.lexemes[idx].kind.is_trivia() {208			idx += 1;209		}210		self.lexemes[idx].range.start()211	}212	pub fn end_of_token(&self, mut idx: usize) -> TextSize {213		while self.lexemes[idx].kind.is_trivia() {214			idx -= 1;215		}216		self.lexemes[idx].range.end()217	}218	pub(crate) fn custom_error(&mut self, marker: impl AsRange, error: impl AsRef<str>) {219		self.last_error_token = marker.end_token();220		self.events.push(Event::Error(SyntaxError::Custom {221			error: error.as_ref().to_string(),222			range: marker.as_range(self),223		}));224	}225	pub(crate) fn error_with_recovery_set(226		&mut self,227		recovery_set: SyntaxKindSet,228	) -> Option<CompletedMarker> {229		self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))230	}231	pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {232		self.error_with_recovery_set_no_default(SyntaxKindSet::ALL)233	}234235	pub fn error_with_recovery_set_no_default(236		&mut self,237		recovery_set: SyntaxKindSet,238	) -> Option<CompletedMarker> {239		let expected_syntax = self.expected_syntax.take().unwrap();240		self.expected_syntax_tracking_state241			.set(ExpectedSyntaxTrackingState::Unnamed);242243		if self.at_end() || self.at_set(recovery_set) {244			let range = self245				.previous_token()246				.map(|t| t.range)247				.unwrap_or(TextRange::at(TextSize::from(0), TextSize::from(0)));248249			self.events.push(Event::Error(SyntaxError::Missing {250				expected: expected_syntax,251				offset: range.end(),252			}));253			return None;254		}255256		let current_token = self.current_token();257258		self.events.push(Event::Error(SyntaxError::Unexpected {259			expected: expected_syntax.clone(),260			found: current_token.kind,261			range: current_token.range,262		}));263		self.clear_expected_syntaxes();264		self.last_error_token = self.offset;265266		let m = self.start();267		self.bump();268		Some(m.complete(self, SyntaxKind::ERROR))269	}270271	fn bump(&mut self) {272		self.skip_trivia();273		assert_ne!(self.offset, self.lexemes.len(), "already at end");274		self.events.push(Event::Token);275		self.offset += 1;276		self.clear_expected_syntaxes();277	}278	fn peek(&mut self) -> Option<SyntaxKind> {279		self.skip_trivia();280		self.peek_raw()281	}282	pub fn peek_token(&mut self) -> Option<&Lexeme<'i>> {283		self.skip_trivia();284		self.peek_token_raw()285	}286	fn skip_trivia(&mut self) {287		while self.peek_raw().map(|c| c.is_trivia()).unwrap_or(false) {288			self.offset += 1;289		}290	}291	fn peek_raw(&mut self) -> Option<SyntaxKind> {292		self.lexemes.get(self.offset).map(|l| l.kind)293	}294	fn peek_token_raw(&mut self) -> Option<&Lexeme<'i>> {295		self.lexemes.get(self.offset)296	}297	#[must_use]298	pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {299		self.expected_syntax_tracking_state300			.set(ExpectedSyntaxTrackingState::Named);301		self.expected_syntax = Some(ExpectedSyntax::Named(name));302303		ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))304	}305	pub fn at(&mut self, kind: SyntaxKind) -> bool {306		if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {307			self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));308		}309		self.peek() == Some(kind)310	}311	pub fn at_set(&mut self, set: SyntaxKindSet) -> bool {312		self.peek().map_or(false, |k| set.contains(k))313	}314	pub fn at_end(&mut self) -> bool {315		self.peek().is_none()316	}317}318pub(crate) struct ExpectedSyntaxGuard {319	expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,320}321322impl ExpectedSyntaxGuard {323	fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {324		Self {325			expected_syntax_tracking_state,326		}327	}328}329330impl Drop for ExpectedSyntaxGuard {331	fn drop(&mut self) {332		self.expected_syntax_tracking_state333			.set(ExpectedSyntaxTrackingState::Unnamed);334	}335}336337#[derive(Debug, Clone, Copy)]338enum ExpectedSyntaxTrackingState {339	Named,340	Unnamed,341}342macro_rules! at_match {343	($p:ident {344		$($r:expr => $e:expr,)*345		_ => $else:expr $(,)?346	}) => {{347		$(348			if $p.at($r) {$e} else349		)* {350			$else351		}352	}}353}354355fn expr(p: &mut Parser) {356	expr_binding_power(p, 0);357}358fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {359	let mut lhs = lhs(p)?;360361	loop {362		let op = at_match!(p {363			T![*] => BinaryOperator::Mul,364			T![/] => BinaryOperator::Div,365			T![%] => BinaryOperator::Mod,366			T![+] => BinaryOperator::Plus,367			T![-] => BinaryOperator::Minus,368			T![<<] => BinaryOperator::ShiftLeft,369			T![>>] => BinaryOperator::ShiftRight,370			T![<] => BinaryOperator::LessThan,371			T![>] => BinaryOperator::GreaterThan,372			T![<=] => BinaryOperator::LessThanOrEqual,373			T![>=] => BinaryOperator::GreaterThanOrEqual,374			T![==] => BinaryOperator::Equal,375			T![!=] => BinaryOperator::NotEqual,376			T![&] => BinaryOperator::BitAnd,377			T![^] => BinaryOperator::BitXor,378			T![|] => BinaryOperator::BitOr,379			T![&&] => BinaryOperator::And,380			T![||] => BinaryOperator::Or,381			T![in] => BinaryOperator::In,382			T!['{'] => BinaryOperator::ObjectApply,383			_ => break,384		});385		let (left_binding_power, right_binding_power) = op.binding_power();386		if left_binding_power < minimum_binding_power {387			break;388		}389390		// Object apply is not a real operator, we dont have something to bump391		if op != BinaryOperator::ObjectApply {392			p.bump();393		}394395		let m = lhs.precede(p);396		let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();397		lhs = m.complete(398			p,399			if op == BinaryOperator::ObjectApply {400				EXPR_OBJ_EXTEND401			} else {402				EXPR_BINARY403			},404		);405406		if !parsed_rhs {407			break;408		}409	}410	Some(lhs)411}412fn compspec(p: &mut Parser) {413	assert!(p.at(T![for]) || p.at(T![if]));414	if p.at(T![for]) {415		let m = p.start();416		p.bump();417		p.expect(IDENT);418		p.expect(T![in]);419		expr(p);420		m.complete(p, FOR_SPEC);421	} else if p.at(T![in]) {422		let m = p.start();423		p.bump();424		expr(p);425		m.complete(p, IF_SPEC);426	} else {427		unreachable!()428	}429}430fn comma(p: &mut Parser) -> bool {431	if p.at(T![,]) {432		p.bump();433		true434	} else {435		false436	}437}438fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {439	if p.at(T![,]) {440		p.bump();441		true442	} else if p.at_set(set) {443		p.expect_with_no_skip(T![,]);444		p.bump();445		true446	} else {447		false448	}449}450fn field_name(p: &mut Parser) {451	let _e = p.expected_syntax_name("field name");452	if p.at(T!['[']) {453		p.bump();454		expr(p);455		p.expect(T![']']);456	} else if p.at(IDENT) {457		p.bump()458	} else {459		p.error_with_recovery_set(TS![;]);460	}461}462fn object(p: &mut Parser) -> CompletedMarker {463	assert!(p.at(T!['{']));464	let m = p.start();465	p.bump();466467	loop {468		if p.at(T!['}']) {469			p.bump();470			break;471		}472		let m = p.start();473		field_name(p);474		p.expect(T![,]);475		expr(p);476		while p.at(T![for]) || p.at(T![if]) {477			compspec(p)478		}479		m.complete(p, MEMBER);480		if comma_with_alternatives(p, SyntaxKindSet::new(&[T![=]])) {481			continue;482		}483		p.expect(R_BRACE);484		break;485	}486487	m.complete(p, OBJ_BODY)488}489490fn params(p: &mut Parser) -> CompletedMarker {491	assert!(p.at(T!['(']));492	let m = p.start();493	p.bump();494495	loop {496		if p.at(T![')']) {497			p.bump();498			break;499		}500		let m = p.start();501		p.expect(IDENT);502		if p.at(T![=]) {503			p.bump();504			expr(p);505		}506		m.complete(p, PARAM);507		if comma(p) {508			continue;509		}510		p.expect(T![')']);511		break;512	}513514	m.complete(p, PARAMS_DESC)515}516fn args(p: &mut Parser) {517	assert!(p.at(T!['(']));518	p.bump();519520	let mut error_positional_start = None::<Marker>;521	let mut started_named = Cell::new(false);522	let mut on_positional = |p: &mut Parser, m: Marker| {523		let c = m.complete(p, ARG);524		if started_named.get() && error_positional_start.is_none() {525			error_positional_start = Some(c.precede(p));526		}527	};528	loop {529		if p.at(T![')']) {530			break;531		}532533		let m = p.start();534		if p.at(IDENT) {535			p.bump();536			if p.at(T![=]) {537				p.bump();538				expr(p);539				m.complete(p, ARG);540				started_named.set(true);541			} else {542				on_positional(p, m);543			}544		} else {545			expr(p);546			on_positional(p, m);547		}548		if comma(p) {549			continue;550		}551		break;552	}553	if let Some(error_positional_start) = error_positional_start {554		let c = error_positional_start.complete(p, ERROR);555		p.custom_error(c, "positional arguments can't be placed after named")556	}557	p.expect(T![')']);558}559560fn array(p: &mut Parser) -> CompletedMarker {561	assert!(p.at(T!['[']));562	// Start the list node563	let m = p.start();564	p.bump(); // '['565566	// This vec will have at most one element in case of correct input567	let mut compspecs = Vec::with_capacity(1);568	let mut elems = 0;569570	loop {571		if p.at(T![']']) {572			p.bump();573			break;574		}575		elems += 1;576		expr(p);577		let c = p.start_ranger();578		let mut had_spec = false;579		while p.at(T![for]) || p.at(T![if]) {580			had_spec = true;581			compspec(p)582		}583		if had_spec {584			compspecs.push(c.finish(p));585		}586		if comma(p) {587			continue;588		}589		p.expect(T![']']);590		break;591	}592593	if elems > 1 && !compspecs.is_empty() {594		for spec in compspecs {595			p.custom_error(596				spec,597				"compspec may only be used if there is only one array element",598			)599		}600601		m.complete(p, EXPR_ARRAY)602	} else if !compspecs.is_empty() {603		m.complete(p, EXPR_ARRAY_COMP)604	} else {605		m.complete(p, EXPR_ARRAY)606	}607}608609fn lhs(p: &mut Parser) -> Option<CompletedMarker> {610	let mut lhs = lhs_basic(p)?;611612	loop {613		if p.at(T![.]) {614			let m = lhs.precede(p);615			p.bump();616			p.expect(IDENT);617			lhs = m.complete(p, EXPR_INDEX);618		} else if p.at(T!['[']) {619			let m = lhs.precede(p);620			p.bump();621			// Start622			if !p.at(T![:]) {623				expr(p);624			}625			if p.at(T![:]) {626				p.bump();627				// End628				if !p.at(T![']']) && !p.at(T![:]) {629					expr(p);630				}631				if p.at(T![:]) {632					p.bump();633					// Step634					if !p.at(T![']']) {635						expr(p);636					}637				}638			}639			p.expect(T![']']);640			lhs = m.complete(p, EXPR_SLICE);641		} else if p.at(T!['(']) {642			let m = lhs.precede(p);643			args(p);644			lhs = m.complete(p, EXPR_APPLY);645		} else {646			break;647		}648	}649650	Some(lhs)651}652653fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {654	let _e = p.expected_syntax_name("value");655	Some(if p.peek().map(|l| l.is_literal()).unwrap_or(false) {656		let m = p.start();657		p.bump();658		m.complete(p, EXPR_LITERAL)659	} else if p.peek().map(|l| l.is_string()).unwrap_or(false) {660		let m = p.start();661		p.bump();662		m.complete(p, EXPR_STRING)663	} else if p.peek().map(|l| l.is_number()).unwrap_or(false) {664		let m = p.start();665		p.bump();666		m.complete(p, EXPR_NUMBER)667	} else if p.at(IDENT) {668		let m = p.start();669		p.bump();670		m.complete(p, EXPR_VAR)671	} else if p.at(T!['[']) {672		array(p)673	} else if p.at(T!['{']) {674		object(p)675	} else if p.at(T![local]) {676		let m = p.start();677		p.bump();678		let mut sus_local = None;679		loop {680			p.expect_with_recovery_set(IDENT, TS![= ; local]);681			if p.at(T!['(']) {682				params(p);683			}684685			let sus_local_candidate = p.start_ranger();686			p.expect_with_recovery_set(T![=], TS![; local]);687688			sus_local = p.at(T![local]).then(|| sus_local_candidate.finish(p));689			expr(p);690691			if !comma(p) {692				break;693			}694		}695		p.expect(T![;]);696		if let Some(sus_local) = sus_local {697			if sus_local.had_error_since(p) {698				p.custom_error(sus_local, "unusal local placement, missing ';' ?")699			}700		}701		expr(p);702		m.complete(p, T![local])703	} else if p.at(T![function]) {704		let m = p.start();705		p.bump();706		args(p);707		expr(p);708		m.complete(p, EXPR_FUNCTION)709	} else if p.at(T![error]) {710		let m = p.start();711		p.bump();712		expr(p);713		m.complete(p, EXPR_ERROR)714	} else if p.at(T![assert]) {715		let m = p.start();716		p.bump();717		expr(p);718		if p.at(T![:]) {719			p.bump();720			expr(p);721		}722		m.complete(p, EXPR_ASSERT)723	} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {724		let m = p.start();725		p.bump();726		expr(p);727		m.complete(p, EXPR_IMPORT)728	} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {729		let op = match p.peek().unwrap() {730			T![-] => UnaryOperator::Minus,731			T![!] => UnaryOperator::Not,732			T![~] => UnaryOperator::BitNegate,733			_ => unreachable!(),734		};735		let ((), right_binding_power) = op.binding_power();736737		let m = p.start();738		p.bump();739		expr_binding_power(p, right_binding_power);740		m.complete(p, EXPR_UNARY)741	} else if p.at(T!['(']) {742		let m = p.start();743		p.bump();744		expr(p);745		assert!(p.at(T![')']));746		p.bump();747		m.complete(p, EXPR_PARENED)748	} else {749		p.error_with_no_skip();750		return None;751	})752}753754impl Parse {755	pub fn syntax(&self) -> SyntaxNode {756		SyntaxNode::new_root(self.green_node.clone())757	}758}759760pub fn parse(input: &str) -> Parse {761	let lexemes = lex(input);762	let parser = Parser::new(&lexemes);763	let events = parser.parse();764	dbg!(&events);765	let sink = Sink::new(events, &lexemes);766767	sink.finish()768}