1use std::{cell::Cell, fmt::Display, rc::Rc};23use miette::{LabeledSpan, SourceOffset, SourceSpan};4use rowan::{GreenNode, TextRange, TextSize};56use crate::{7 binary::BinaryOperator,8 event::{Event, Sink},9 lex::{lex, Lexeme},10 marker::{AsRange, CompletedMarker, Marker, Ranger},11 token_set::SyntaxKindSet,12 unary::UnaryOperator,13 SyntaxKind,14 SyntaxKind::*,15 SyntaxNode, T, TS,16};1718pub struct Parse {19 pub green_node: GreenNode,20 pub errors: Vec<SyntaxError>,21}2223#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]24pub enum ExpectedSyntax {25 Named(&'static str),26 Unnamed(SyntaxKind),27}28impl Display for ExpectedSyntax {29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {30 match self {31 ExpectedSyntax::Named(n) => write!(f, "{}", n),32 ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),33 }34 }35}3637pub struct Parser<'i> {38 lexemes: &'i [Lexeme<'i>],39 pub offset: usize,40 pub events: Vec<Event>,41 pub entered: u32,42 pub hints: Vec<(u32, TextRange, String)>,43 pub last_error_token: usize,44 expected_syntax: Option<ExpectedSyntax>,45 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,46}4748const DEFAULT_RECOVERY_SET: SyntaxKindSet = TS![; ')' ']' '}' local];4950#[derive(Clone, Debug, PartialEq, Eq)]51pub enum SyntaxError {52 Unexpected {53 expected: ExpectedSyntax,54 found: SyntaxKind,55 range: TextRange,56 },57 Missing {58 expected: ExpectedSyntax,59 offset: TextSize,60 },61 Custom {62 error: String,63 range: TextRange,64 },65 Hint {66 error: String,67 range: TextRange,68 },69}7071impl Into<LabeledSpan> for SyntaxError {72 fn into(self) -> LabeledSpan {73 match self {74 SyntaxError::Unexpected {75 expected,76 found,77 range,78 } => LabeledSpan::new_with_span(79 Some(format!("expected {}, found {:?}", expected, found)),80 SourceSpan::new(81 SourceOffset::from(usize::from(range.start())),82 SourceOffset::from(usize::from(range.end() - range.start())),83 ),84 ),85 SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(86 Some(format!("missing {}", expected)),87 SourceSpan::new(88 SourceOffset::from(usize::from(offset)),89 SourceOffset::from(0),90 ),91 ),92 SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {93 LabeledSpan::new_with_span(94 Some(format!("{}", error)),95 SourceSpan::new(96 SourceOffset::from(usize::from(range.start())),97 SourceOffset::from(usize::from(range.end() - range.start())),98 ),99 )100 }101 }102 }103}104105impl<'i> Parser<'i> {106 fn new(lexemes: &'i [Lexeme<'i>]) -> Self {107 Self {108 lexemes,109 offset: 0,110 events: vec![],111 entered: 0,112 last_error_token: 0,113 hints: vec![],114 expected_syntax: None,115 expected_syntax_tracking_state: Rc::new(Cell::new(116 ExpectedSyntaxTrackingState::Unnamed,117 )),118 }119 }120 pub fn clear_outdated_hints(&mut self) {121 let amount = self122 .hints123 .iter()124 .rev()125 .take_while(|h| h.0 > self.entered)126 .count();127 self.hints.truncate(self.hints.len() - amount)128 }129 fn clear_expected_syntaxes(&mut self) {130 self.expected_syntax = None;131 self.expected_syntax_tracking_state132 .set(ExpectedSyntaxTrackingState::Unnamed);133 }134 pub fn start(&mut self) -> Marker {135 let start_event_idx = self.events.len();136 self.events.push(Event::Placeholder);137 self.entered += 1;138 Marker::new(start_event_idx, self.offset)139 }140 pub fn start_ranger(&mut self) -> Ranger {141 let pos = self.offset;142 Ranger { pos }143 }144 fn parse(mut self) -> Vec<Event> {145 let m = self.start();146 expr(&mut self);147 if !self.at_end() {148 let ranger = self.start_ranger();149150 while self.peek().is_some() {151 self.bump()152 }153 let end = ranger.finish(&self);154 self.custom_error(end, "unexpected input after expression");155 }156 m.complete(&mut self, SOURCE_FILE);157158 self.events159 }160161 pub(crate) fn expect(&mut self, kind: SyntaxKind) {162 self.expect_with_recovery_set(kind, TS![])163 }164165 pub(crate) fn expect_with_recovery_set(166 &mut self,167 kind: SyntaxKind,168 recovery_set: SyntaxKindSet,169 ) {170 if self.at(kind) {171 self.bump();172 } else {173 self.error_with_recovery_set(recovery_set);174 }175 }176177 pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {178 if self.at(kind) {179 self.bump();180 } else {181 self.error_with_no_skip();182 }183 }184 pub(crate) fn last_token_range(&self) -> Option<TextRange> {185 self.lexemes.last().map(|Lexeme { range, .. }| *range)186 }187 fn current_token(&self) -> Lexeme<'i> {188 self.lexemes[self.offset]189 }190 fn previous_token(&mut self) -> Option<Lexeme<'i>> {191 if self.offset == 0 {192 return None;193 }194 let mut previous_token_idx = self.offset - 1;195 while self196 .lexemes197 .get(previous_token_idx)198 .map_or(false, |l| l.kind.is_trivia())199 && previous_token_idx != 0200 {201 previous_token_idx -= 1;202 }203204 Some(self.lexemes[previous_token_idx])205 }206 pub fn start_of_token(&self, mut idx: usize) -> TextSize {207 while self.lexemes[idx].kind.is_trivia() {208 idx += 1;209 }210 self.lexemes[idx].range.start()211 }212 pub fn end_of_token(&self, mut idx: usize) -> TextSize {213 while self.lexemes[idx].kind.is_trivia() {214 idx -= 1;215 }216 self.lexemes[idx].range.end()217 }218 pub(crate) fn custom_error(&mut self, marker: impl AsRange, error: impl AsRef<str>) {219 self.last_error_token = marker.end_token();220 self.events.push(Event::Error(SyntaxError::Custom {221 error: error.as_ref().to_string(),222 range: marker.as_range(self),223 }));224 }225 pub(crate) fn error_with_recovery_set(226 &mut self,227 recovery_set: SyntaxKindSet,228 ) -> Option<CompletedMarker> {229 self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))230 }231 pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {232 self.error_with_recovery_set_no_default(SyntaxKindSet::ALL)233 }234235 pub fn error_with_recovery_set_no_default(236 &mut self,237 recovery_set: SyntaxKindSet,238 ) -> Option<CompletedMarker> {239 let expected_syntax = self.expected_syntax.take().unwrap();240 self.expected_syntax_tracking_state241 .set(ExpectedSyntaxTrackingState::Unnamed);242243 if self.at_end() || self.at_set(recovery_set) {244 let range = self245 .previous_token()246 .map(|t| t.range)247 .unwrap_or(TextRange::at(TextSize::from(0), TextSize::from(0)));248249 self.events.push(Event::Error(SyntaxError::Missing {250 expected: expected_syntax,251 offset: range.end(),252 }));253 return None;254 }255256 let current_token = self.current_token();257258 self.events.push(Event::Error(SyntaxError::Unexpected {259 expected: expected_syntax.clone(),260 found: current_token.kind,261 range: current_token.range,262 }));263 self.clear_expected_syntaxes();264 self.last_error_token = self.offset;265266 let m = self.start();267 self.bump();268 Some(m.complete(self, SyntaxKind::ERROR))269 }270271 fn bump(&mut self) {272 self.skip_trivia();273 assert_ne!(self.offset, self.lexemes.len(), "already at end");274 self.events.push(Event::Token);275 self.offset += 1;276 self.clear_expected_syntaxes();277 }278 fn peek(&mut self) -> Option<SyntaxKind> {279 self.skip_trivia();280 self.peek_raw()281 }282 pub fn peek_token(&mut self) -> Option<&Lexeme<'i>> {283 self.skip_trivia();284 self.peek_token_raw()285 }286 fn skip_trivia(&mut self) {287 while self.peek_raw().map(|c| c.is_trivia()).unwrap_or(false) {288 self.offset += 1;289 }290 }291 fn peek_raw(&mut self) -> Option<SyntaxKind> {292 self.lexemes.get(self.offset).map(|l| l.kind)293 }294 fn peek_token_raw(&mut self) -> Option<&Lexeme<'i>> {295 self.lexemes.get(self.offset)296 }297 #[must_use]298 pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {299 self.expected_syntax_tracking_state300 .set(ExpectedSyntaxTrackingState::Named);301 self.expected_syntax = Some(ExpectedSyntax::Named(name));302303 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))304 }305 pub fn at(&mut self, kind: SyntaxKind) -> bool {306 if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {307 self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));308 }309 self.peek() == Some(kind)310 }311 pub fn at_set(&mut self, set: SyntaxKindSet) -> bool {312 self.peek().map_or(false, |k| set.contains(k))313 }314 pub fn at_end(&mut self) -> bool {315 self.peek().is_none()316 }317}318pub(crate) struct ExpectedSyntaxGuard {319 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,320}321322impl ExpectedSyntaxGuard {323 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {324 Self {325 expected_syntax_tracking_state,326 }327 }328}329330impl Drop for ExpectedSyntaxGuard {331 fn drop(&mut self) {332 self.expected_syntax_tracking_state333 .set(ExpectedSyntaxTrackingState::Unnamed);334 }335}336337#[derive(Debug, Clone, Copy)]338enum ExpectedSyntaxTrackingState {339 Named,340 Unnamed,341}342macro_rules! at_match {343 ($p:ident {344 $($r:expr => $e:expr,)*345 _ => $else:expr $(,)?346 }) => {{347 $(348 if $p.at($r) {$e} else349 )* {350 $else351 }352 }}353}354355fn expr(p: &mut Parser) {356 expr_binding_power(p, 0);357}358fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {359 let mut lhs = lhs(p)?;360361 loop {362 let op = at_match!(p {363 T![*] => BinaryOperator::Mul,364 T![/] => BinaryOperator::Div,365 T![%] => BinaryOperator::Mod,366 T![+] => BinaryOperator::Plus,367 T![-] => BinaryOperator::Minus,368 T![<<] => BinaryOperator::ShiftLeft,369 T![>>] => BinaryOperator::ShiftRight,370 T![<] => BinaryOperator::LessThan,371 T![>] => BinaryOperator::GreaterThan,372 T![<=] => BinaryOperator::LessThanOrEqual,373 T![>=] => BinaryOperator::GreaterThanOrEqual,374 T![==] => BinaryOperator::Equal,375 T![!=] => BinaryOperator::NotEqual,376 T![&] => BinaryOperator::BitAnd,377 T![^] => BinaryOperator::BitXor,378 T![|] => BinaryOperator::BitOr,379 T![&&] => BinaryOperator::And,380 T![||] => BinaryOperator::Or,381 T![in] => BinaryOperator::In,382 T!['{'] => BinaryOperator::ObjectApply,383 _ => break,384 });385 let (left_binding_power, right_binding_power) = op.binding_power();386 if left_binding_power < minimum_binding_power {387 break;388 }389390 391 if op != BinaryOperator::ObjectApply {392 p.bump();393 }394395 let m = lhs.precede(p);396 let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();397 lhs = m.complete(398 p,399 if op == BinaryOperator::ObjectApply {400 EXPR_OBJ_EXTEND401 } else {402 EXPR_BINARY403 },404 );405406 if !parsed_rhs {407 break;408 }409 }410 Some(lhs)411}412fn compspec(p: &mut Parser) {413 assert!(p.at(T![for]) || p.at(T![if]));414 if p.at(T![for]) {415 let m = p.start();416 p.bump();417 p.expect(IDENT);418 p.expect(T![in]);419 expr(p);420 m.complete(p, FOR_SPEC);421 } else if p.at(T![in]) {422 let m = p.start();423 p.bump();424 expr(p);425 m.complete(p, IF_SPEC);426 } else {427 unreachable!()428 }429}430fn comma(p: &mut Parser) -> bool {431 if p.at(T![,]) {432 p.bump();433 true434 } else {435 false436 }437}438fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {439 if p.at(T![,]) {440 p.bump();441 true442 } else if p.at_set(set) {443 p.expect_with_no_skip(T![,]);444 p.bump();445 true446 } else {447 false448 }449}450fn field_name(p: &mut Parser) {451 let _e = p.expected_syntax_name("field name");452 if p.at(T!['[']) {453 p.bump();454 expr(p);455 p.expect(T![']']);456 } else if p.at(IDENT) {457 p.bump()458 } else {459 p.error_with_recovery_set(TS![;]);460 }461}462fn object(p: &mut Parser) -> CompletedMarker {463 assert!(p.at(T!['{']));464 let m = p.start();465 p.bump();466467 loop {468 if p.at(T!['}']) {469 p.bump();470 break;471 }472 let m = p.start();473 field_name(p);474 p.expect(T![,]);475 expr(p);476 while p.at(T![for]) || p.at(T![if]) {477 compspec(p)478 }479 m.complete(p, MEMBER);480 if comma_with_alternatives(p, SyntaxKindSet::new(&[T![=]])) {481 continue;482 }483 p.expect(R_BRACE);484 break;485 }486487 m.complete(p, OBJ_BODY)488}489490fn params(p: &mut Parser) -> CompletedMarker {491 assert!(p.at(T!['(']));492 let m = p.start();493 p.bump();494495 loop {496 if p.at(T![')']) {497 p.bump();498 break;499 }500 let m = p.start();501 p.expect(IDENT);502 if p.at(T![=]) {503 p.bump();504 expr(p);505 }506 m.complete(p, PARAM);507 if comma(p) {508 continue;509 }510 p.expect(T![')']);511 break;512 }513514 m.complete(p, PARAMS_DESC)515}516fn args(p: &mut Parser) {517 assert!(p.at(T!['(']));518 p.bump();519520 let mut error_positional_start = None::<Marker>;521 let mut started_named = Cell::new(false);522 let mut on_positional = |p: &mut Parser, m: Marker| {523 let c = m.complete(p, ARG);524 if started_named.get() && error_positional_start.is_none() {525 error_positional_start = Some(c.precede(p));526 }527 };528 loop {529 if p.at(T![')']) {530 break;531 }532533 let m = p.start();534 if p.at(IDENT) {535 p.bump();536 if p.at(T![=]) {537 p.bump();538 expr(p);539 m.complete(p, ARG);540 started_named.set(true);541 } else {542 on_positional(p, m);543 }544 } else {545 expr(p);546 on_positional(p, m);547 }548 if comma(p) {549 continue;550 }551 break;552 }553 if let Some(error_positional_start) = error_positional_start {554 let c = error_positional_start.complete(p, ERROR);555 p.custom_error(c, "positional arguments can't be placed after named")556 }557 p.expect(T![')']);558}559560fn array(p: &mut Parser) -> CompletedMarker {561 assert!(p.at(T!['[']));562 563 let m = p.start();564 p.bump(); 565566 567 let mut compspecs = Vec::with_capacity(1);568 let mut elems = 0;569570 loop {571 if p.at(T![']']) {572 p.bump();573 break;574 }575 elems += 1;576 expr(p);577 let c = p.start_ranger();578 let mut had_spec = false;579 while p.at(T![for]) || p.at(T![if]) {580 had_spec = true;581 compspec(p)582 }583 if had_spec {584 compspecs.push(c.finish(p));585 }586 if comma(p) {587 continue;588 }589 p.expect(T![']']);590 break;591 }592593 if elems > 1 && !compspecs.is_empty() {594 for spec in compspecs {595 p.custom_error(596 spec,597 "compspec may only be used if there is only one array element",598 )599 }600601 m.complete(p, EXPR_ARRAY)602 } else if !compspecs.is_empty() {603 m.complete(p, EXPR_ARRAY_COMP)604 } else {605 m.complete(p, EXPR_ARRAY)606 }607}608609fn lhs(p: &mut Parser) -> Option<CompletedMarker> {610 let mut lhs = lhs_basic(p)?;611612 loop {613 if p.at(T![.]) {614 let m = lhs.precede(p);615 p.bump();616 p.expect(IDENT);617 lhs = m.complete(p, EXPR_INDEX);618 } else if p.at(T!['[']) {619 let m = lhs.precede(p);620 p.bump();621 622 if !p.at(T![:]) {623 expr(p);624 }625 if p.at(T![:]) {626 p.bump();627 628 if !p.at(T![']']) && !p.at(T![:]) {629 expr(p);630 }631 if p.at(T![:]) {632 p.bump();633 634 if !p.at(T![']']) {635 expr(p);636 }637 }638 }639 p.expect(T![']']);640 lhs = m.complete(p, EXPR_SLICE);641 } else if p.at(T!['(']) {642 let m = lhs.precede(p);643 args(p);644 lhs = m.complete(p, EXPR_APPLY);645 } else {646 break;647 }648 }649650 Some(lhs)651}652653fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {654 let _e = p.expected_syntax_name("value");655 Some(if p.peek().map(|l| l.is_literal()).unwrap_or(false) {656 let m = p.start();657 p.bump();658 m.complete(p, EXPR_LITERAL)659 } else if p.peek().map(|l| l.is_string()).unwrap_or(false) {660 let m = p.start();661 p.bump();662 m.complete(p, EXPR_STRING)663 } else if p.peek().map(|l| l.is_number()).unwrap_or(false) {664 let m = p.start();665 p.bump();666 m.complete(p, EXPR_NUMBER)667 } else if p.at(IDENT) {668 let m = p.start();669 p.bump();670 m.complete(p, EXPR_VAR)671 } else if p.at(T!['[']) {672 array(p)673 } else if p.at(T!['{']) {674 object(p)675 } else if p.at(T![local]) {676 let m = p.start();677 p.bump();678 let mut sus_local = None;679 loop {680 p.expect_with_recovery_set(IDENT, TS![= ; local]);681 if p.at(T!['(']) {682 params(p);683 }684685 let sus_local_candidate = p.start_ranger();686 p.expect_with_recovery_set(T![=], TS![; local]);687688 sus_local = p.at(T![local]).then(|| sus_local_candidate.finish(p));689 expr(p);690691 if !comma(p) {692 break;693 }694 }695 p.expect(T![;]);696 if let Some(sus_local) = sus_local {697 if sus_local.had_error_since(p) {698 p.custom_error(sus_local, "unusal local placement, missing ';' ?")699 }700 }701 expr(p);702 m.complete(p, T![local])703 } else if p.at(T![function]) {704 let m = p.start();705 p.bump();706 args(p);707 expr(p);708 m.complete(p, EXPR_FUNCTION)709 } else if p.at(T![error]) {710 let m = p.start();711 p.bump();712 expr(p);713 m.complete(p, EXPR_ERROR)714 } else if p.at(T![assert]) {715 let m = p.start();716 p.bump();717 expr(p);718 if p.at(T![:]) {719 p.bump();720 expr(p);721 }722 m.complete(p, EXPR_ASSERT)723 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {724 let m = p.start();725 p.bump();726 expr(p);727 m.complete(p, EXPR_IMPORT)728 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {729 let op = match p.peek().unwrap() {730 T![-] => UnaryOperator::Minus,731 T![!] => UnaryOperator::Not,732 T![~] => UnaryOperator::BitNegate,733 _ => unreachable!(),734 };735 let ((), right_binding_power) = op.binding_power();736737 let m = p.start();738 p.bump();739 expr_binding_power(p, right_binding_power);740 m.complete(p, EXPR_UNARY)741 } else if p.at(T!['(']) {742 let m = p.start();743 p.bump();744 expr(p);745 assert!(p.at(T![')']));746 p.bump();747 m.complete(p, EXPR_PARENED)748 } else {749 p.error_with_no_skip();750 return None;751 })752}753754impl Parse {755 pub fn syntax(&self) -> SyntaxNode {756 SyntaxNode::new_root(self.green_node.clone())757 }758}759760pub fn parse(input: &str) -> Parse {761 let lexemes = lex(input);762 let parser = Parser::new(&lexemes);763 let events = parser.parse();764 dbg!(&events);765 let sink = Sink::new(events, &lexemes);766767 sink.finish()768}