git.delta.rocks / jrsonnet / refs/commits / 5382799864e9

difftreelog

source

crates/jrsonnet-rowan-parser/src/parser.rs18.0 KiBsourcehistory
1use std::cell::Cell;2use std::fmt::Display;3use std::rc::Rc;45use miette::Diagnostic;6use miette::LabeledSpan;7use miette::SourceOffset;8use miette::SourceSpan;9use rowan::GreenNode;1011use rowan::TextRange;12use rowan::TextSize;13use thiserror::Error;1415use crate::binary::BinaryOperator;16use crate::event::Event;17use crate::event::Sink;18use crate::lex::lex;19use crate::lex::Lang;20use crate::lex::Lexeme;21use crate::lex::SyntaxKind;22use crate::lex::SyntaxKind::*;23use crate::marker::AsRange;24use crate::marker::CompletedMarker;25use crate::marker::FinishedRanger;26use crate::marker::Marker;27use crate::marker::Ranger;28use crate::token_set::TokenSet;29use crate::unary::UnaryOperator;3031pub struct Parse {32	pub green_node: GreenNode,33	pub errors: Vec<SyntaxError>,34}3536#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]37pub enum ExpectedSyntax {38	Named(&'static str),39	Unnamed(SyntaxKind),40}41impl Display for ExpectedSyntax {42	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {43		match self {44			ExpectedSyntax::Named(n) => write!(f, "{}", n),45			ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),46		}47	}48}4950pub struct Parser<'i> {51	lexemes: &'i [Lexeme<'i>],52	pub offset: usize,53	pub events: Vec<Event>,54	pub entered: u32,55	pub hints: Vec<(u32, TextRange, String)>,56	pub last_error_token: usize,57	expected_syntax: Option<ExpectedSyntax>,58	expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,59}6061const DEFAULT_RECOVERY_SET: TokenSet = TokenSet::new(&[62	SymbolSemi,63	RParen,64	SymbolRightBracket,65	SymbolRightBrace,66	KeywordLocal,67]);6869#[derive(Clone, Debug, PartialEq, Eq)]70pub enum SyntaxError {71	Unexpected {72		expected: ExpectedSyntax,73		found: SyntaxKind,74		range: TextRange,75	},76	Missing {77		expected: ExpectedSyntax,78		offset: TextSize,79	},80	Custom {81		error: String,82		range: TextRange,83	},84	Hint {85		error: String,86		range: TextRange,87	},88}8990impl Into<LabeledSpan> for SyntaxError {91	fn into(self) -> LabeledSpan {92		match self {93			SyntaxError::Unexpected {94				expected,95				found,96				range,97			} => LabeledSpan::new_with_span(98				Some(format!("expected {}, found {:?}", expected, found)),99				SourceSpan::new(100					SourceOffset::from(usize::from(range.start())),101					SourceOffset::from(usize::from(range.end() - range.start())),102				),103			),104			SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(105				Some(format!("missing {}", expected)),106				SourceSpan::new(107					SourceOffset::from(usize::from(offset)),108					SourceOffset::from(0),109				),110			),111			SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {112				LabeledSpan::new_with_span(113					Some(format!("{}", error)),114					SourceSpan::new(115						SourceOffset::from(usize::from(range.start())),116						SourceOffset::from(usize::from(range.end() - range.start())),117					),118				)119			}120		}121	}122}123124impl<'i> Parser<'i> {125	fn new(lexemes: &'i [Lexeme<'i>]) -> Self {126		Self {127			lexemes,128			offset: 0,129			events: vec![],130			entered: 0,131			last_error_token: 0,132			hints: vec![],133			expected_syntax: None,134			expected_syntax_tracking_state: Rc::new(Cell::new(135				ExpectedSyntaxTrackingState::Unnamed,136			)),137		}138	}139	pub fn clear_outdated_hints(&mut self) {140		let amount = self141			.hints142			.iter()143			.rev()144			.take_while(|h| h.0 > self.entered)145			.count();146		self.hints.truncate(self.hints.len() - amount)147	}148	fn clear_expected_syntaxes(&mut self) {149		self.expected_syntax = None;150		self.expected_syntax_tracking_state151			.set(ExpectedSyntaxTrackingState::Unnamed);152	}153	pub fn start(&mut self) -> Marker {154		let start_event_idx = self.events.len();155		self.events.push(Event::Placeholder);156		self.entered += 1;157		Marker::new(start_event_idx, self.offset)158	}159	pub fn start_ranger(&mut self) -> Ranger {160		let pos = self.offset;161		Ranger { pos }162	}163	fn parse(mut self) -> Vec<Event> {164		let m = self.start();165		expr(&mut self);166		if !self.at_end() {167			let ranger = self.start_ranger();168169			while self.peek().is_some() {170				self.bump()171			}172			let end = ranger.finish(&self);173			self.custom_error(end, "unexpected input after expression");174		}175		m.complete(&mut self, Root);176177		self.events178	}179180	pub(crate) fn expect(&mut self, kind: SyntaxKind) {181		self.expect_with_recovery_set(kind, TokenSet::default())182	}183184	pub(crate) fn expect_with_recovery_set(&mut self, kind: SyntaxKind, recovery_set: TokenSet) {185		if self.at(kind) {186			self.bump();187		} else {188			self.error_with_recovery_set(recovery_set);189		}190	}191192	pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {193		if self.at(kind) {194			self.bump();195		} else {196			self.error_with_no_skip();197		}198	}199	pub(crate) fn last_token_range(&self) -> Option<TextRange> {200		self.lexemes.last().map(|Lexeme { range, .. }| *range)201	}202	fn current_token(&self) -> Lexeme<'i> {203		self.lexemes[self.offset]204	}205	fn previous_token(&mut self) -> Option<Lexeme<'i>> {206		if self.offset == 0 {207			return None;208		}209		let mut previous_token_idx = self.offset - 1;210		while self211			.lexemes212			.get(previous_token_idx)213			.map_or(false, |l| l.kind.is_trivia())214			&& previous_token_idx != 0215		{216			previous_token_idx -= 1;217		}218219		Some(self.lexemes[previous_token_idx])220	}221	pub fn start_of_token(&self, mut idx: usize) -> TextSize {222		while self.lexemes[idx].kind.is_trivia() {223			idx += 1;224		}225		self.lexemes[idx].range.start()226	}227	pub fn end_of_token(&self, mut idx: usize) -> TextSize {228		while self.lexemes[idx].kind.is_trivia() {229			idx -= 1;230		}231		self.lexemes[idx].range.end()232	}233	pub(crate) fn custom_error(&mut self, marker: impl AsRange, error: impl AsRef<str>) {234		self.last_error_token = marker.end_token();235		self.events.push(Event::Error(SyntaxError::Custom {236			error: error.as_ref().to_string(),237			range: marker.as_range(self),238		}));239	}240	pub(crate) fn error_with_recovery_set(241		&mut self,242		recovery_set: TokenSet,243	) -> Option<CompletedMarker> {244		self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))245	}246	pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {247		self.error_with_recovery_set_no_default(TokenSet::ALL)248	}249250	pub fn error_with_recovery_set_no_default(251		&mut self,252		recovery_set: TokenSet,253	) -> Option<CompletedMarker> {254		let expected_syntax = self.expected_syntax.take().unwrap();255		self.expected_syntax_tracking_state256			.set(ExpectedSyntaxTrackingState::Unnamed);257258		if self.at_end() || self.at_set(recovery_set) {259			let range = self260				.previous_token()261				.map(|t| t.range)262				.unwrap_or(TextRange::at(TextSize::from(0), TextSize::from(0)));263264			self.events.push(Event::Error(SyntaxError::Missing {265				expected: expected_syntax,266				offset: range.end(),267			}));268			return None;269		}270271		let current_token = self.current_token();272273		self.events.push(Event::Error(SyntaxError::Unexpected {274			expected: expected_syntax.clone(),275			found: current_token.kind,276			range: current_token.range,277		}));278		self.clear_expected_syntaxes();279		self.last_error_token = self.offset;280281		let m = self.start();282		self.bump();283		Some(m.complete(self, SyntaxKind::Error))284	}285286	fn bump(&mut self) {287		self.skip_trivia();288		assert_ne!(self.offset, self.lexemes.len(), "already at end");289		self.events.push(Event::Token);290		self.offset += 1;291		self.clear_expected_syntaxes();292	}293	fn peek(&mut self) -> Option<SyntaxKind> {294		self.skip_trivia();295		self.peek_raw()296	}297	pub fn peek_token(&mut self) -> Option<&Lexeme<'i>> {298		self.skip_trivia();299		self.peek_token_raw()300	}301	fn skip_trivia(&mut self) {302		while self.peek_raw().map(|c| c.is_trivia()).unwrap_or(false) {303			self.offset += 1;304		}305	}306	fn peek_raw(&mut self) -> Option<SyntaxKind> {307		self.lexemes.get(self.offset).map(|l| l.kind)308	}309	fn peek_token_raw(&mut self) -> Option<&Lexeme<'i>> {310		self.lexemes.get(self.offset)311	}312	#[must_use]313	pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {314		self.expected_syntax_tracking_state315			.set(ExpectedSyntaxTrackingState::Named);316		self.expected_syntax = Some(ExpectedSyntax::Named(name));317318		ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))319	}320	pub fn at(&mut self, kind: SyntaxKind) -> bool {321		if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {322			self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));323		}324		self.peek() == Some(kind)325	}326	pub fn at_set(&mut self, set: TokenSet) -> bool {327		self.peek().map_or(false, |k| set.contains(k))328	}329	pub fn at_end(&mut self) -> bool {330		self.peek().is_none()331	}332}333pub(crate) struct ExpectedSyntaxGuard {334	expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,335}336337impl ExpectedSyntaxGuard {338	fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {339		Self {340			expected_syntax_tracking_state,341		}342	}343}344345impl Drop for ExpectedSyntaxGuard {346	fn drop(&mut self) {347		self.expected_syntax_tracking_state348			.set(ExpectedSyntaxTrackingState::Unnamed);349	}350}351352#[derive(Debug, Clone, Copy)]353enum ExpectedSyntaxTrackingState {354	Named,355	Unnamed,356}357macro_rules! at_match {358	($p:ident {359		$($r:ident => $e:expr,)*360		_ => $else:expr $(,)?361	}) => {{362		$(363			if $p.at($r) {$e} else364		)* {365			$else366		}367	}}368}369370fn expr(p: &mut Parser) {371	expr_binding_power(p, 0);372}373fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {374	let mut lhs = lhs(p)?;375376	loop {377		let op = at_match!(p {378			OpMul => BinaryOperator::Mul,379			OpDiv => BinaryOperator::Div,380			OpMod => BinaryOperator::Mod,381			OpPlus => BinaryOperator::Plus,382			OpMinus => BinaryOperator::Minus,383			OpShiftLeft => BinaryOperator::ShiftLeft,384			OpShiftRight => BinaryOperator::ShiftRight,385			OpLessThan => BinaryOperator::LessThan,386			OpGreaterThan => BinaryOperator::GreaterThan,387			OpLessThanOrEqual => BinaryOperator::LessThanOrEqual,388			OpGreaterThanOrEqual => BinaryOperator::GreaterThanOrEqual,389			OpEqual => BinaryOperator::Equal,390			OpNotEqual => BinaryOperator::NotEqual,391			OpBitAnd => BinaryOperator::BitAnd,392			OpBitXor => BinaryOperator::BitXor,393			OpBitOr => BinaryOperator::BitOr,394			OpAnd => BinaryOperator::And,395			OpOr => BinaryOperator::Or,396			OpIn => BinaryOperator::In,397			SymbolLeftBrace => BinaryOperator::ObjectApply,398			_ => break,399		});400		let (left_binding_power, right_binding_power) = op.binding_power();401		if left_binding_power < minimum_binding_power {402			break;403		}404405		// Object apply is not a real operator, we dont have something to bump406		if op != BinaryOperator::ObjectApply {407			p.bump();408		}409410		let m = lhs.precede(p);411		let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();412		lhs = m.complete(413			p,414			if op == BinaryOperator::ObjectApply {415				ObjectApply416			} else {417				BinOp418			},419		);420421		if !parsed_rhs {422			break;423		}424	}425	Some(lhs)426}427fn compspec(p: &mut Parser) {428	assert!(p.at(KeywordFor) || p.at(KeywordIf));429	if p.at(KeywordFor) {430		let m = p.start();431		p.bump();432		p.expect(Ident);433		p.expect(OpIn);434		expr(p);435		m.complete(p, CompspecFor);436	} else if p.at(KeywordIf) {437		let m = p.start();438		p.bump();439		expr(p);440		m.complete(p, CompspecIf);441	} else {442		unreachable!()443	}444}445fn comma(p: &mut Parser) -> bool {446	if p.at(SymbolComma) {447		p.bump();448		true449	} else {450		false451	}452}453fn comma_with_alternatives(p: &mut Parser, set: TokenSet) -> bool {454	if p.at(SymbolComma) {455		p.bump();456		true457	} else if p.at_set(set) {458		p.expect_with_no_skip(SymbolComma);459		p.bump();460		true461	} else {462		false463	}464}465fn field_name(p: &mut Parser) {466	let _e = p.expected_syntax_name("field name");467	if p.at(SymbolLeftBracket) {468		p.bump();469		expr(p);470		p.expect(SymbolRightBracket);471	} else if p.at(Ident) {472		p.bump()473	} else {474		p.error_with_recovery_set(TokenSet::new(&[SymbolSemi]));475	}476}477fn object(p: &mut Parser) -> CompletedMarker {478	assert!(p.at(SymbolLeftBrace));479	let m = p.start();480	p.bump();481482	loop {483		if p.at(SymbolRightBrace) {484			p.bump();485			break;486		}487		let m = p.start();488		field_name(p);489		p.expect(SymbolColon);490		expr(p);491		while p.at(KeywordFor) || p.at(KeywordIf) {492			compspec(p)493		}494		m.complete(p, Field);495		if comma_with_alternatives(p, TokenSet::new(&[SymbolAssign])) {496			continue;497		}498		p.expect(SymbolRightBrace);499		break;500	}501502	m.complete(p, Object)503}504505fn params(p: &mut Parser) -> CompletedMarker {506	assert!(p.at(LParen));507	let m = p.start();508	p.bump();509510	loop {511		if p.at(RParen) {512			p.bump();513			break;514		}515		let m = p.start();516		p.expect(Ident);517		if p.at(SymbolAssign) {518			p.bump();519			expr(p);520		}521		m.complete(p, DefParam);522		if comma(p) {523			continue;524		}525		p.expect(RParen);526		break;527	}528529	m.complete(p, DefParams)530}531fn args(p: &mut Parser) {532	assert!(p.at(LParen));533	p.bump();534535	let mut error_positional_start = None::<Marker>;536	let mut started_named = Cell::new(false);537	let mut on_positional = |p: &mut Parser, m: Marker| {538		let c = m.complete(p, DefPositionalArg);539		if started_named.get() && error_positional_start.is_none() {540			error_positional_start = Some(c.precede(p));541		}542	};543	loop {544		if p.at(RParen) {545			break;546		}547548		let m = p.start();549		if p.at(Ident) {550			p.bump();551			if p.at(SymbolAssign) {552				p.bump();553				expr(p);554				m.complete(p, DefNamedArg);555				started_named.set(true);556			} else {557				on_positional(p, m);558			}559		} else {560			expr(p);561			on_positional(p, m);562		}563		if comma(p) {564			continue;565		}566		break;567	}568	if let Some(error_positional_start) = error_positional_start {569		let c = error_positional_start.complete(p, ErrorPositionalAfterNamed);570		p.custom_error(c, "positional arguments can't be placed after named")571	}572	p.expect(RParen);573}574575fn array(p: &mut Parser) -> CompletedMarker {576	assert!(p.at(SymbolLeftBracket));577	// Start the list node578	let m = p.start();579	p.bump(); // '['580581	// This vec will have at most one element in case of correct input582	let mut compspecs = Vec::with_capacity(1);583	let mut elems = 0;584585	loop {586		if p.at(SymbolRightBracket) {587			p.bump();588			break;589		}590		elems += 1;591		let m = p.start();592		{593			let m = p.start();594			expr(p);595			m.complete(p, BodyDef);596		}597		let c = p.start_ranger();598		let mut had_spec = false;599		while p.at(KeywordFor) || p.at(KeywordIf) {600			had_spec = true;601			compspec(p)602		}603		if had_spec {604			compspecs.push(c.finish(p));605		}606		m.complete(p, ArrayElem);607		if comma(p) {608			continue;609		}610		p.expect(SymbolRightBracket);611		break;612	}613614	if elems > 1 && !compspecs.is_empty() {615		for spec in compspecs {616			p.custom_error(617				spec,618				"compspec may only be used if there is only one array element",619			)620		}621	}622623	m.complete(p, Array)624}625626fn lhs(p: &mut Parser) -> Option<CompletedMarker> {627	let mut lhs = lhs_basic(p)?;628629	loop {630		if p.at(SymbolDot) {631			let m = lhs.precede(p);632			p.bump();633			p.expect(Ident);634			lhs = m.complete(p, FieldAccess);635		} else if p.at(SymbolLeftBracket) {636			let m = lhs.precede(p);637			p.bump();638			// Start639			if !p.at(SymbolColon) {640				expr(p);641			}642			if p.at(SymbolColon) {643				p.bump();644				// End645				if !p.at(SymbolRightBracket) && !p.at(SymbolColon) {646					expr(p);647				}648				if p.at(SymbolColon) {649					p.bump();650					// Step651					if !p.at(SymbolRightBracket) {652						expr(p);653					}654				}655			}656			p.expect(SymbolRightBracket);657			lhs = m.complete(p, Slice);658		} else if p.at(LParen) {659			let m = lhs.precede(p);660			args(p);661			lhs = m.complete(p, FunctionCall);662		} else {663			break;664		}665	}666667	Some(lhs)668}669670fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {671	let _e = p.expected_syntax_name("value");672	Some(673		if p.at(Number)674			|| p.at(StringSingleQuoted)675			|| p.at(StringDoubleQuoted)676			|| p.at(StringSingleVerbatim)677			|| p.at(StringDoubleVerbatim)678			|| p.at(StringBlock)679			|| p.at(KeywordNull)680			|| p.at(SymbolDollar)681			|| p.at(KeywordSuper)682			|| p.at(KeywordSelf)683		{684			let m = p.start();685			p.bump();686			m.complete(p, Literal)687		} else if p.at(Ident) {688			let m = p.start();689			p.bump();690			m.complete(p, Ident)691		} else if p.at(SymbolLeftBracket) {692			array(p)693		} else if p.at(SymbolLeftBrace) {694			object(p)695		} else if p.at(KeywordLocal) {696			let m = p.start();697			p.bump();698			let mut sus_local = None;699			loop {700				p.expect_with_recovery_set(701					Ident,702					TokenSet::new(&[SymbolAssign, SymbolSemi, KeywordLocal]),703				);704				if p.at(LParen) {705					params(p);706				}707708				let sus_local_candidate = p.start_ranger();709				p.expect_with_recovery_set(710					SymbolAssign,711					TokenSet::new(&[SymbolSemi, KeywordLocal]),712				);713714				sus_local = p.at(KeywordLocal).then(|| sus_local_candidate.finish(p));715				expr(p);716717				if !comma(p) {718					break;719				}720			}721			p.expect(SymbolSemi);722			if let Some(sus_local) = sus_local {723				if sus_local.had_error_since(p) {724					p.custom_error(sus_local, "unusal local placement, missing ';' ?")725				}726			}727			{728				let m = p.start();729				expr(p);730				m.complete(p, BodyDef);731			}732			m.complete(p, Local)733		} else if p.at(KeywordFunction) {734			let m = p.start();735			p.bump();736			args(p);737			{738				let m = p.start();739				expr(p);740				m.complete(p, BodyDef);741			}742			m.complete(p, FunctionDef)743		} else if p.at(KeywordError) {744			let m = p.start();745			p.bump();746			expr(p);747			m.complete(p, ExprError)748		} else if p.at(KeywordAssert) {749			let m = p.start();750			p.bump();751			expr(p);752			if p.at(SymbolColon) {753				p.bump();754				expr(p);755			}756			m.complete(p, ExprAssert)757		} else if p.at(KeywordImport) || p.at(KeywordImportStr) {758			let m = p.start();759			p.bump();760			expr(p);761			m.complete(p, ExprImport)762		} else if p.at(OpMinus) || p.at(OpNot) || p.at(OpBitNegate) {763			let op = match p.peek().unwrap() {764				OpMinus => UnaryOperator::Minus,765				OpNot => UnaryOperator::Not,766				OpBitNegate => UnaryOperator::BitNegate,767				_ => unreachable!(),768			};769			let ((), right_binding_power) = op.binding_power();770771			let m = p.start();772			p.bump();773			expr_binding_power(p, right_binding_power);774			m.complete(p, UnaryOp)775		} else if p.at(LParen) {776			let m = p.start();777			p.bump();778			expr(p);779			assert!(p.at(RParen));780			p.bump();781			m.complete(p, Parened)782		} else {783			p.error_with_no_skip();784			return None;785		},786	)787}788789type SyntaxNode = rowan::SyntaxNode<Lang>;790#[allow(unused)]791type SyntaxToken = rowan::SyntaxToken<Lang>;792#[allow(unused)]793type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;794795impl Parse {796	pub fn syntax(&self) -> SyntaxNode {797		SyntaxNode::new_root(self.green_node.clone())798	}799}800801pub fn parse(input: &str) -> Parse {802	let lexemes = lex(input);803	let parser = Parser::new(&lexemes);804	let events = parser.parse();805	dbg!(&events);806	let sink = Sink::new(events, &lexemes);807808	sink.finish()809}