1use std::cell::Cell;2use std::fmt::Display;3use std::rc::Rc;45use miette::Diagnostic;6use miette::LabeledSpan;7use miette::SourceOffset;8use miette::SourceSpan;9use rowan::GreenNode;1011use rowan::TextRange;12use rowan::TextSize;13use thiserror::Error;1415use crate::binary::BinaryOperator;16use crate::event::Event;17use crate::event::Sink;18use crate::lex::lex;19use crate::lex::Lang;20use crate::lex::Lexeme;21use crate::lex::SyntaxKind;22use crate::lex::SyntaxKind::*;23use crate::marker::AsRange;24use crate::marker::CompletedMarker;25use crate::marker::FinishedRanger;26use crate::marker::Marker;27use crate::marker::Ranger;28use crate::token_set::TokenSet;29use crate::unary::UnaryOperator;3031pub struct Parse {32 pub green_node: GreenNode,33 pub errors: Vec<SyntaxError>,34}3536#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]37pub enum ExpectedSyntax {38 Named(&'static str),39 Unnamed(SyntaxKind),40}41impl Display for ExpectedSyntax {42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {43 match self {44 ExpectedSyntax::Named(n) => write!(f, "{}", n),45 ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),46 }47 }48}4950pub struct Parser<'i> {51 lexemes: &'i [Lexeme<'i>],52 pub offset: usize,53 pub events: Vec<Event>,54 pub entered: u32,55 pub hints: Vec<(u32, TextRange, String)>,56 pub last_error_token: usize,57 expected_syntax: Option<ExpectedSyntax>,58 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,59}6061const DEFAULT_RECOVERY_SET: TokenSet = TokenSet::new(&[62 SymbolSemi,63 RParen,64 SymbolRightBracket,65 SymbolRightBrace,66 KeywordLocal,67]);6869#[derive(Clone, Debug, PartialEq, Eq)]70pub enum SyntaxError {71 Unexpected {72 expected: ExpectedSyntax,73 found: SyntaxKind,74 range: TextRange,75 },76 Missing {77 expected: ExpectedSyntax,78 offset: TextSize,79 },80 Custom {81 error: String,82 range: TextRange,83 },84 Hint {85 error: String,86 range: TextRange,87 },88}8990impl Into<LabeledSpan> for SyntaxError {91 fn into(self) -> LabeledSpan {92 match self {93 SyntaxError::Unexpected {94 expected,95 found,96 range,97 } => LabeledSpan::new_with_span(98 Some(format!("expected {}, found {:?}", expected, found)),99 SourceSpan::new(100 SourceOffset::from(usize::from(range.start())),101 SourceOffset::from(usize::from(range.end() - range.start())),102 ),103 ),104 SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(105 Some(format!("missing {}", expected)),106 SourceSpan::new(107 SourceOffset::from(usize::from(offset)),108 SourceOffset::from(0),109 ),110 ),111 SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {112 LabeledSpan::new_with_span(113 Some(format!("{}", error)),114 SourceSpan::new(115 SourceOffset::from(usize::from(range.start())),116 SourceOffset::from(usize::from(range.end() - range.start())),117 ),118 )119 }120 }121 }122}123124impl<'i> Parser<'i> {125 fn new(lexemes: &'i [Lexeme<'i>]) -> Self {126 Self {127 lexemes,128 offset: 0,129 events: vec![],130 entered: 0,131 last_error_token: 0,132 hints: vec![],133 expected_syntax: None,134 expected_syntax_tracking_state: Rc::new(Cell::new(135 ExpectedSyntaxTrackingState::Unnamed,136 )),137 }138 }139 pub fn clear_outdated_hints(&mut self) {140 let amount = self141 .hints142 .iter()143 .rev()144 .take_while(|h| h.0 > self.entered)145 .count();146 self.hints.truncate(self.hints.len() - amount)147 }148 fn clear_expected_syntaxes(&mut self) {149 self.expected_syntax = None;150 self.expected_syntax_tracking_state151 .set(ExpectedSyntaxTrackingState::Unnamed);152 }153 pub fn start(&mut self) -> Marker {154 let start_event_idx = self.events.len();155 self.events.push(Event::Placeholder);156 self.entered += 1;157 Marker::new(start_event_idx, self.offset)158 }159 pub fn start_ranger(&mut self) -> Ranger {160 let pos = self.offset;161 Ranger { pos }162 }163 fn parse(mut self) -> Vec<Event> {164 let m = self.start();165 expr(&mut self);166 if !self.at_end() {167 let ranger = self.start_ranger();168169 while self.peek().is_some() {170 self.bump()171 }172 let end = ranger.finish(&self);173 self.custom_error(end, "unexpected input after expression");174 }175 m.complete(&mut self, Root);176177 self.events178 }179180 pub(crate) fn expect(&mut self, kind: SyntaxKind) {181 self.expect_with_recovery_set(kind, TokenSet::default())182 }183184 pub(crate) fn expect_with_recovery_set(&mut self, kind: SyntaxKind, recovery_set: TokenSet) {185 if self.at(kind) {186 self.bump();187 } else {188 self.error_with_recovery_set(recovery_set);189 }190 }191192 pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {193 if self.at(kind) {194 self.bump();195 } else {196 self.error_with_no_skip();197 }198 }199 pub(crate) fn last_token_range(&self) -> Option<TextRange> {200 self.lexemes.last().map(|Lexeme { range, .. }| *range)201 }202 fn current_token(&self) -> Lexeme<'i> {203 self.lexemes[self.offset]204 }205 fn previous_token(&mut self) -> Option<Lexeme<'i>> {206 if self.offset == 0 {207 return None;208 }209 let mut previous_token_idx = self.offset - 1;210 while self211 .lexemes212 .get(previous_token_idx)213 .map_or(false, |l| l.kind.is_trivia())214 && previous_token_idx != 0215 {216 previous_token_idx -= 1;217 }218219 Some(self.lexemes[previous_token_idx])220 }221 pub fn start_of_token(&self, mut idx: usize) -> TextSize {222 while self.lexemes[idx].kind.is_trivia() {223 idx += 1;224 }225 self.lexemes[idx].range.start()226 }227 pub fn end_of_token(&self, mut idx: usize) -> TextSize {228 while self.lexemes[idx].kind.is_trivia() {229 idx -= 1;230 }231 self.lexemes[idx].range.end()232 }233 pub(crate) fn custom_error(&mut self, marker: impl AsRange, error: impl AsRef<str>) {234 self.last_error_token = marker.end_token();235 self.events.push(Event::Error(SyntaxError::Custom {236 error: error.as_ref().to_string(),237 range: marker.as_range(self),238 }));239 }240 pub(crate) fn error_with_recovery_set(241 &mut self,242 recovery_set: TokenSet,243 ) -> Option<CompletedMarker> {244 self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))245 }246 pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {247 self.error_with_recovery_set_no_default(TokenSet::ALL)248 }249250 pub fn error_with_recovery_set_no_default(251 &mut self,252 recovery_set: TokenSet,253 ) -> Option<CompletedMarker> {254 let expected_syntax = self.expected_syntax.take().unwrap();255 self.expected_syntax_tracking_state256 .set(ExpectedSyntaxTrackingState::Unnamed);257258 if self.at_end() || self.at_set(recovery_set) {259 let range = self260 .previous_token()261 .map(|t| t.range)262 .unwrap_or(TextRange::at(TextSize::from(0), TextSize::from(0)));263264 self.events.push(Event::Error(SyntaxError::Missing {265 expected: expected_syntax,266 offset: range.end(),267 }));268 return None;269 }270271 let current_token = self.current_token();272273 self.events.push(Event::Error(SyntaxError::Unexpected {274 expected: expected_syntax.clone(),275 found: current_token.kind,276 range: current_token.range,277 }));278 self.clear_expected_syntaxes();279 self.last_error_token = self.offset;280281 let m = self.start();282 self.bump();283 Some(m.complete(self, SyntaxKind::Error))284 }285286 fn bump(&mut self) {287 self.skip_trivia();288 assert_ne!(self.offset, self.lexemes.len(), "already at end");289 self.events.push(Event::Token);290 self.offset += 1;291 self.clear_expected_syntaxes();292 }293 fn peek(&mut self) -> Option<SyntaxKind> {294 self.skip_trivia();295 self.peek_raw()296 }297 pub fn peek_token(&mut self) -> Option<&Lexeme<'i>> {298 self.skip_trivia();299 self.peek_token_raw()300 }301 fn skip_trivia(&mut self) {302 while self.peek_raw().map(|c| c.is_trivia()).unwrap_or(false) {303 self.offset += 1;304 }305 }306 fn peek_raw(&mut self) -> Option<SyntaxKind> {307 self.lexemes.get(self.offset).map(|l| l.kind)308 }309 fn peek_token_raw(&mut self) -> Option<&Lexeme<'i>> {310 self.lexemes.get(self.offset)311 }312 #[must_use]313 pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {314 self.expected_syntax_tracking_state315 .set(ExpectedSyntaxTrackingState::Named);316 self.expected_syntax = Some(ExpectedSyntax::Named(name));317318 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))319 }320 pub fn at(&mut self, kind: SyntaxKind) -> bool {321 if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {322 self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));323 }324 self.peek() == Some(kind)325 }326 pub fn at_set(&mut self, set: TokenSet) -> bool {327 self.peek().map_or(false, |k| set.contains(k))328 }329 pub fn at_end(&mut self) -> bool {330 self.peek().is_none()331 }332}333pub(crate) struct ExpectedSyntaxGuard {334 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,335}336337impl ExpectedSyntaxGuard {338 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {339 Self {340 expected_syntax_tracking_state,341 }342 }343}344345impl Drop for ExpectedSyntaxGuard {346 fn drop(&mut self) {347 self.expected_syntax_tracking_state348 .set(ExpectedSyntaxTrackingState::Unnamed);349 }350}351352#[derive(Debug, Clone, Copy)]353enum ExpectedSyntaxTrackingState {354 Named,355 Unnamed,356}357macro_rules! at_match {358 ($p:ident {359 $($r:ident => $e:expr,)*360 _ => $else:expr $(,)?361 }) => {{362 $(363 if $p.at($r) {$e} else364 )* {365 $else366 }367 }}368}369370fn expr(p: &mut Parser) {371 expr_binding_power(p, 0);372}373fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {374 let mut lhs = lhs(p)?;375376 loop {377 let op = at_match!(p {378 OpMul => BinaryOperator::Mul,379 OpDiv => BinaryOperator::Div,380 OpMod => BinaryOperator::Mod,381 OpPlus => BinaryOperator::Plus,382 OpMinus => BinaryOperator::Minus,383 OpShiftLeft => BinaryOperator::ShiftLeft,384 OpShiftRight => BinaryOperator::ShiftRight,385 OpLessThan => BinaryOperator::LessThan,386 OpGreaterThan => BinaryOperator::GreaterThan,387 OpLessThanOrEqual => BinaryOperator::LessThanOrEqual,388 OpGreaterThanOrEqual => BinaryOperator::GreaterThanOrEqual,389 OpEqual => BinaryOperator::Equal,390 OpNotEqual => BinaryOperator::NotEqual,391 OpBitAnd => BinaryOperator::BitAnd,392 OpBitXor => BinaryOperator::BitXor,393 OpBitOr => BinaryOperator::BitOr,394 OpAnd => BinaryOperator::And,395 OpOr => BinaryOperator::Or,396 OpIn => BinaryOperator::In,397 SymbolLeftBrace => BinaryOperator::ObjectApply,398 _ => break,399 });400 let (left_binding_power, right_binding_power) = op.binding_power();401 if left_binding_power < minimum_binding_power {402 break;403 }404405 406 if op != BinaryOperator::ObjectApply {407 p.bump();408 }409410 let m = lhs.precede(p);411 let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();412 lhs = m.complete(413 p,414 if op == BinaryOperator::ObjectApply {415 ObjectApply416 } else {417 BinOp418 },419 );420421 if !parsed_rhs {422 break;423 }424 }425 Some(lhs)426}427fn compspec(p: &mut Parser) {428 assert!(p.at(KeywordFor) || p.at(KeywordIf));429 if p.at(KeywordFor) {430 let m = p.start();431 p.bump();432 p.expect(Ident);433 p.expect(OpIn);434 expr(p);435 m.complete(p, CompspecFor);436 } else if p.at(KeywordIf) {437 let m = p.start();438 p.bump();439 expr(p);440 m.complete(p, CompspecIf);441 } else {442 unreachable!()443 }444}445fn comma(p: &mut Parser) -> bool {446 if p.at(SymbolComma) {447 p.bump();448 true449 } else {450 false451 }452}453fn comma_with_alternatives(p: &mut Parser, set: TokenSet) -> bool {454 if p.at(SymbolComma) {455 p.bump();456 true457 } else if p.at_set(set) {458 p.expect_with_no_skip(SymbolComma);459 p.bump();460 true461 } else {462 false463 }464}465fn field_name(p: &mut Parser) {466 let _e = p.expected_syntax_name("field name");467 if p.at(SymbolLeftBracket) {468 p.bump();469 expr(p);470 p.expect(SymbolRightBracket);471 } else if p.at(Ident) {472 p.bump()473 } else {474 p.error_with_recovery_set(TokenSet::new(&[SymbolSemi]));475 }476}477fn object(p: &mut Parser) -> CompletedMarker {478 assert!(p.at(SymbolLeftBrace));479 let m = p.start();480 p.bump();481482 loop {483 if p.at(SymbolRightBrace) {484 p.bump();485 break;486 }487 let m = p.start();488 field_name(p);489 p.expect(SymbolColon);490 expr(p);491 while p.at(KeywordFor) || p.at(KeywordIf) {492 compspec(p)493 }494 m.complete(p, Field);495 if comma_with_alternatives(p, TokenSet::new(&[SymbolAssign])) {496 continue;497 }498 p.expect(SymbolRightBrace);499 break;500 }501502 m.complete(p, Object)503}504505fn params(p: &mut Parser) -> CompletedMarker {506 assert!(p.at(LParen));507 let m = p.start();508 p.bump();509510 loop {511 if p.at(RParen) {512 p.bump();513 break;514 }515 let m = p.start();516 p.expect(Ident);517 if p.at(SymbolAssign) {518 p.bump();519 expr(p);520 }521 m.complete(p, DefParam);522 if comma(p) {523 continue;524 }525 p.expect(RParen);526 break;527 }528529 m.complete(p, DefParams)530}531fn args(p: &mut Parser) {532 assert!(p.at(LParen));533 p.bump();534535 let mut error_positional_start = None::<Marker>;536 let mut started_named = Cell::new(false);537 let mut on_positional = |p: &mut Parser, m: Marker| {538 let c = m.complete(p, DefPositionalArg);539 if started_named.get() && error_positional_start.is_none() {540 error_positional_start = Some(c.precede(p));541 }542 };543 loop {544 if p.at(RParen) {545 break;546 }547548 let m = p.start();549 if p.at(Ident) {550 p.bump();551 if p.at(SymbolAssign) {552 p.bump();553 expr(p);554 m.complete(p, DefNamedArg);555 started_named.set(true);556 } else {557 on_positional(p, m);558 }559 } else {560 expr(p);561 on_positional(p, m);562 }563 if comma(p) {564 continue;565 }566 break;567 }568 if let Some(error_positional_start) = error_positional_start {569 let c = error_positional_start.complete(p, ErrorPositionalAfterNamed);570 p.custom_error(c, "positional arguments can't be placed after named")571 }572 p.expect(RParen);573}574575fn array(p: &mut Parser) -> CompletedMarker {576 assert!(p.at(SymbolLeftBracket));577 578 let m = p.start();579 p.bump(); 580581 582 let mut compspecs = Vec::with_capacity(1);583 let mut elems = 0;584585 loop {586 if p.at(SymbolRightBracket) {587 p.bump();588 break;589 }590 elems += 1;591 let m = p.start();592 {593 let m = p.start();594 expr(p);595 m.complete(p, BodyDef);596 }597 let c = p.start_ranger();598 let mut had_spec = false;599 while p.at(KeywordFor) || p.at(KeywordIf) {600 had_spec = true;601 compspec(p)602 }603 if had_spec {604 compspecs.push(c.finish(p));605 }606 m.complete(p, ArrayElem);607 if comma(p) {608 continue;609 }610 p.expect(SymbolRightBracket);611 break;612 }613614 if elems > 1 && !compspecs.is_empty() {615 for spec in compspecs {616 p.custom_error(617 spec,618 "compspec may only be used if there is only one array element",619 )620 }621 }622623 m.complete(p, Array)624}625626fn lhs(p: &mut Parser) -> Option<CompletedMarker> {627 let mut lhs = lhs_basic(p)?;628629 loop {630 if p.at(SymbolDot) {631 let m = lhs.precede(p);632 p.bump();633 p.expect(Ident);634 lhs = m.complete(p, FieldAccess);635 } else if p.at(SymbolLeftBracket) {636 let m = lhs.precede(p);637 p.bump();638 639 if !p.at(SymbolColon) {640 expr(p);641 }642 if p.at(SymbolColon) {643 p.bump();644 645 if !p.at(SymbolRightBracket) && !p.at(SymbolColon) {646 expr(p);647 }648 if p.at(SymbolColon) {649 p.bump();650 651 if !p.at(SymbolRightBracket) {652 expr(p);653 }654 }655 }656 p.expect(SymbolRightBracket);657 lhs = m.complete(p, Slice);658 } else if p.at(LParen) {659 let m = lhs.precede(p);660 args(p);661 lhs = m.complete(p, FunctionCall);662 } else {663 break;664 }665 }666667 Some(lhs)668}669670fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {671 let _e = p.expected_syntax_name("value");672 Some(673 if p.at(Number)674 || p.at(StringSingleQuoted)675 || p.at(StringDoubleQuoted)676 || p.at(StringSingleVerbatim)677 || p.at(StringDoubleVerbatim)678 || p.at(StringBlock)679 || p.at(KeywordNull)680 || p.at(SymbolDollar)681 || p.at(KeywordSuper)682 || p.at(KeywordSelf)683 {684 let m = p.start();685 p.bump();686 m.complete(p, Literal)687 } else if p.at(Ident) {688 let m = p.start();689 p.bump();690 m.complete(p, Ident)691 } else if p.at(SymbolLeftBracket) {692 array(p)693 } else if p.at(SymbolLeftBrace) {694 object(p)695 } else if p.at(KeywordLocal) {696 let m = p.start();697 p.bump();698 let mut sus_local = None;699 loop {700 p.expect_with_recovery_set(701 Ident,702 TokenSet::new(&[SymbolAssign, SymbolSemi, KeywordLocal]),703 );704 if p.at(LParen) {705 params(p);706 }707708 let sus_local_candidate = p.start_ranger();709 p.expect_with_recovery_set(710 SymbolAssign,711 TokenSet::new(&[SymbolSemi, KeywordLocal]),712 );713714 sus_local = p.at(KeywordLocal).then(|| sus_local_candidate.finish(p));715 expr(p);716717 if !comma(p) {718 break;719 }720 }721 p.expect(SymbolSemi);722 if let Some(sus_local) = sus_local {723 if sus_local.had_error_since(p) {724 p.custom_error(sus_local, "unusal local placement, missing ';' ?")725 }726 }727 {728 let m = p.start();729 expr(p);730 m.complete(p, BodyDef);731 }732 m.complete(p, Local)733 } else if p.at(KeywordFunction) {734 let m = p.start();735 p.bump();736 args(p);737 {738 let m = p.start();739 expr(p);740 m.complete(p, BodyDef);741 }742 m.complete(p, FunctionDef)743 } else if p.at(KeywordError) {744 let m = p.start();745 p.bump();746 expr(p);747 m.complete(p, ExprError)748 } else if p.at(KeywordAssert) {749 let m = p.start();750 p.bump();751 expr(p);752 if p.at(SymbolColon) {753 p.bump();754 expr(p);755 }756 m.complete(p, ExprAssert)757 } else if p.at(KeywordImport) || p.at(KeywordImportStr) {758 let m = p.start();759 p.bump();760 expr(p);761 m.complete(p, ExprImport)762 } else if p.at(OpMinus) || p.at(OpNot) || p.at(OpBitNegate) {763 let op = match p.peek().unwrap() {764 OpMinus => UnaryOperator::Minus,765 OpNot => UnaryOperator::Not,766 OpBitNegate => UnaryOperator::BitNegate,767 _ => unreachable!(),768 };769 let ((), right_binding_power) = op.binding_power();770771 let m = p.start();772 p.bump();773 expr_binding_power(p, right_binding_power);774 m.complete(p, UnaryOp)775 } else if p.at(LParen) {776 let m = p.start();777 p.bump();778 expr(p);779 assert!(p.at(RParen));780 p.bump();781 m.complete(p, Parened)782 } else {783 p.error_with_no_skip();784 return None;785 },786 )787}788789type SyntaxNode = rowan::SyntaxNode<Lang>;790#[allow(unused)]791type SyntaxToken = rowan::SyntaxToken<Lang>;792#[allow(unused)]793type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;794795impl Parse {796 pub fn syntax(&self) -> SyntaxNode {797 SyntaxNode::new_root(self.green_node.clone())798 }799}800801pub fn parse(input: &str) -> Parse {802 let lexemes = lex(input);803 let parser = Parser::new(&lexemes);804 let events = parser.parse();805 dbg!(&events);806 let sink = Sink::new(events, &lexemes);807808 sink.finish()809}