1use std::{cell::Cell, fmt::Display, rc::Rc};23use miette::{LabeledSpan, SourceOffset, SourceSpan};4use rowan::{GreenNode, TextRange, TextSize};56use crate::{7 event::Event,8 lex::Lexeme,9 marker::{CompletedMarker, Marker, Ranger},10 nodes::{BinaryOperatorKind, Literal, Number, Text, Trivia, UnaryOperatorKind},11 token_set::SyntaxKindSet,12 AstToken, SyntaxKind,13 SyntaxKind::*,14 SyntaxNode, T, TS,15};1617pub struct Parse {18 pub green_node: GreenNode,19 pub errors: Vec<SyntaxError>,20}2122#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]23pub enum ExpectedSyntax {24 Named(&'static str),25 Unnamed(SyntaxKind),26}27impl Display for ExpectedSyntax {28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {29 match self {30 ExpectedSyntax::Named(n) => write!(f, "{}", n),31 ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),32 }33 }34}3536pub struct Parser {37 38 kinds: Vec<SyntaxKind>,39 pub offset: usize,40 pub events: Vec<Event>,41 pub entered: u32,42 pub hints: Vec<(u32, TextRange, String)>,43 pub last_error_token: usize,44 expected_syntax: Option<ExpectedSyntax>,45 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,46 steps: Cell<u64>,47}4849const DEFAULT_RECOVERY_SET: SyntaxKindSet = TS![];5051#[derive(Clone, Debug, PartialEq, Eq)]52pub enum SyntaxError {53 Unexpected {54 expected: ExpectedSyntax,55 found: SyntaxKind,56 range: TextRange,57 },58 Missing {59 expected: ExpectedSyntax,60 offset: TextSize,61 },62 Custom {63 error: String,64 range: TextRange,65 },66 Hint {67 error: String,68 range: TextRange,69 },70}7172impl From<SyntaxError> for LabeledSpan {73 fn from(val: SyntaxError) -> Self {74 match val {75 SyntaxError::Unexpected {76 expected,77 found,78 range,79 } => LabeledSpan::new_with_span(80 Some(format!("expected {}, found {:?}", expected, found)),81 SourceSpan::new(82 SourceOffset::from(usize::from(range.start())),83 SourceOffset::from(usize::from(range.end() - range.start())),84 ),85 ),86 SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(87 Some(format!("missing {}", expected)),88 SourceSpan::new(89 SourceOffset::from(usize::from(offset)),90 SourceOffset::from(0),91 ),92 ),93 SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {94 LabeledSpan::new_with_span(95 Some(error),96 SourceSpan::new(97 SourceOffset::from(usize::from(range.start())),98 SourceOffset::from(usize::from(range.end() - range.start())),99 ),100 )101 }102 }103 }104}105106impl Parser {107 pub fn new(kinds: Vec<SyntaxKind>) -> Self {108 Self {109 kinds,110 offset: 0,111 events: vec![],112 entered: 0,113 last_error_token: 0,114 hints: vec![],115 expected_syntax: None,116 expected_syntax_tracking_state: Rc::new(Cell::new(117 ExpectedSyntaxTrackingState::Unnamed,118 )),119 steps: Cell::new(0),120 }121 }122 pub fn clear_outdated_hints(&mut self) {123 let amount = self124 .hints125 .iter()126 .rev()127 .take_while(|h| h.0 > self.entered)128 .count();129 self.hints.truncate(self.hints.len() - amount)130 }131 fn clear_expected_syntaxes(&mut self) {132 self.expected_syntax = None;133 self.expected_syntax_tracking_state134 .set(ExpectedSyntaxTrackingState::Unnamed);135 }136 pub fn start(&mut self) -> Marker {137 let start_event_idx = self.events.len();138 self.events.push(Event::Pending);139 self.entered += 1;140 Marker::new(start_event_idx)141 }142 pub fn start_ranger(&mut self) -> Ranger {143 let pos = self.offset;144 Ranger { pos }145 }146 pub fn parse(mut self) -> Vec<Event> {147 let m = self.start();148 expr(&mut self);149 self.expect(EOF);150 m.complete(&mut self, SOURCE_FILE);151152 self.events153 }154155 pub(crate) fn expect(&mut self, kind: SyntaxKind) {156 self.expect_with_recovery_set(kind, TS![])157 }158159 pub(crate) fn expect_with_recovery_set(160 &mut self,161 kind: SyntaxKind,162 recovery_set: SyntaxKindSet,163 ) {164 if self.at(kind) {165 if kind != EOF {166 self.bump();167 }168 } else {169 self.error_with_recovery_set(recovery_set);170 }171 }172173 pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {174 if self.at(kind) {175 self.bump();176 } else {177 self.error_with_no_skip();178 }179 }180 pub(crate) fn error_with_recovery_set(181 &mut self,182 recovery_set: SyntaxKindSet,183 ) -> Option<CompletedMarker> {184 self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))185 }186 pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {187 self.error_with_recovery_set_no_default(SyntaxKindSet::ALL)188 }189190 pub fn error_with_recovery_set_no_default(191 &mut self,192 recovery_set: SyntaxKindSet,193 ) -> Option<CompletedMarker> {194 let expected_syntax = self195 .expected_syntax196 .take()197 .unwrap_or(ExpectedSyntax::Named("unknown"));198 self.expected_syntax_tracking_state199 .set(ExpectedSyntaxTrackingState::Unnamed);200201 if self.at_end() || self.at_ts(recovery_set) {202 203 204 205 206207 208 209 210 211 return None;212 }213214 let current_token = self.current();215216 217 218 219 220 221 self.clear_expected_syntaxes();222 self.last_error_token = self.offset;223224 let m = self.start();225 self.bump();226 Some(m.complete(self, SyntaxKind::ERROR))227 }228 fn bump_assert(&mut self, kind: SyntaxKind) {229 assert!(self.at(kind), "expected {:?}", kind);230 self.bump_remap(self.current());231 }232 fn bump(&mut self) {233 self.bump_remap(self.current());234 }235 fn bump_remap(&mut self, kind: SyntaxKind) {236 assert_ne!(self.offset, self.kinds.len(), "already at end");237 self.events.push(Event::Token { kind });238 self.offset += 1;239 self.clear_expected_syntaxes();240 }241 fn step(&self) {242 use std::fmt::Write;243 let steps = self.steps.get();244 if steps >= 15000000 {245 let mut out = "seems like parsing is stuck".to_owned();246 {247 let last = 20;248 write!(out, "\n\nLast {} events:", last).unwrap();249 for (i, event) in self250 .events251 .iter()252 .skip(self.events.len().saturating_sub(last))253 .enumerate()254 {255 write!(out, "\n{i}. {event:?}").unwrap();256 }257 }258 {259 let next = 20;260 write!(out, "\n\nNext {next} tokens:").unwrap();261 for (i, tok) in self.kinds.iter().skip(self.offset).take(next).enumerate() {262 write!(out, "\n{i}. {tok:?}").unwrap();263 }264 }265 panic!("{out}")266 }267 self.steps.set(steps + 1);268 }269 fn nth(&self, i: usize) -> SyntaxKind {270 self.step();271 let mut offset = self.offset;272 for _ in 0..i {273 offset += 1;274 }275 self.kinds.get(offset).copied().unwrap_or(EOF)276 }277 fn current(&self) -> SyntaxKind {278 self.nth(0)279 }280 #[must_use]281 pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {282 self.expected_syntax_tracking_state283 .set(ExpectedSyntaxTrackingState::Named);284 self.expected_syntax = Some(ExpectedSyntax::Named(name));285286 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))287 }288 pub fn at(&mut self, kind: SyntaxKind) -> bool {289 self.nth_at(0, kind)290 }291 pub fn nth_at(&mut self, n: usize, kind: SyntaxKind) -> bool {292 if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {293 self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));294 }295 self.nth(n) == kind296 }297 pub fn at_ts(&mut self, set: SyntaxKindSet) -> bool {298 set.contains(self.current())299 }300 pub fn at_end(&mut self) -> bool {301 self.at(EOF)302 }303}304pub(crate) struct ExpectedSyntaxGuard {305 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,306}307308impl ExpectedSyntaxGuard {309 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {310 Self {311 expected_syntax_tracking_state,312 }313 }314}315316impl Drop for ExpectedSyntaxGuard {317 fn drop(&mut self) {318 self.expected_syntax_tracking_state319 .set(ExpectedSyntaxTrackingState::Unnamed);320 }321}322323#[derive(Debug, Clone, Copy)]324enum ExpectedSyntaxTrackingState {325 Named,326 Unnamed,327}328329fn expr(p: &mut Parser) -> Option<CompletedMarker> {330 expr_binding_power(p, 0)331}332fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {333 let mut lhs = lhs(p)?;334335 while let Some(op) = BinaryOperatorKind::cast(p.current())336 .or_else(|| p.at(T!['{']).then(|| BinaryOperatorKind::MetaObjectApply))337 {338 let (left_binding_power, right_binding_power) = op.binding_power();339 if left_binding_power < minimum_binding_power {340 break;341 }342343 344 if op != BinaryOperatorKind::MetaObjectApply {345 p.bump();346 }347348 let m = lhs.wrap(p, LHS_EXPR).precede(p);349 let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();350 lhs = m.complete(351 p,352 if op == BinaryOperatorKind::MetaObjectApply {353 EXPR_OBJ_EXTEND354 } else {355 EXPR_BINARY356 },357 );358359 if !parsed_rhs {360 break;361 }362 }363 Some(lhs)364}365fn compspec(p: &mut Parser) {366 assert!(p.at(T![for]) || p.at(T![if]));367 if p.at(T![for]) {368 let m = p.start();369 p.bump();370 name(p);371 p.expect(T![in]);372 expr(p);373 m.complete(p, FOR_SPEC);374 } else if p.at(T![if]) {375 let m = p.start();376 p.bump();377 expr(p);378 m.complete(p, IF_SPEC);379 } else {380 unreachable!()381 }382}383fn comma(p: &mut Parser) -> bool {384 if p.at(T![,]) {385 p.bump();386 true387 } else {388 false389 }390}391fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {392 if p.at(T![,]) {393 p.bump();394 true395 } else if p.at_ts(set) {396 p.expect_with_no_skip(T![,]);397 p.bump();398 true399 } else {400 false401 }402}403fn field_name(p: &mut Parser) {404 let _e = p.expected_syntax_name("field name");405 let m = p.start();406 if p.at(T!['[']) {407 p.bump();408 expr(p);409 p.expect(T![']']);410 m.complete(p, FIELD_NAME_DYNAMIC);411 } else if p.at(IDENT) {412 name(p);413 m.complete(p, FIELD_NAME_FIXED);414 } else if Text::can_cast(p.current()) {415 text(p);416 m.complete(p, FIELD_NAME_FIXED);417 } else {418 p.error_with_recovery_set(TS![;]);419 }420}421fn visibility(p: &mut Parser) {422 if p.at_ts(TS![: :: :::]) {423 p.bump()424 } else {425 p.error_with_recovery_set(TS![]);426 }427}428fn field(p: &mut Parser) {429 let m = p.start();430 field_name(p);431 let plus = if p.at(T![+]) {432 let r = p.start_ranger();433 p.bump();434 Some(r.finish(p))435 } else {436 None437 };438 let params = if p.at(T!['(']) {439 440 441 442 params_desc(p);443 444 445 446 447 448 true449 } else {450 false451 };452 visibility(p);453 expr(p);454455 if params {456 m.complete(p, FIELD_METHOD)457 } else {458 m.complete(p, FIELD_NORMAL)459 };460}461fn assertion(p: &mut Parser) {462 let m = p.start();463 p.bump_assert(T![assert]);464 expr(p).map(|c| c.wrap(p, LHS_EXPR));465 if p.at(T![:]) {466 p.bump();467 expr(p);468 }469 m.complete(p, ASSERTION);470}471fn object(p: &mut Parser) -> CompletedMarker {472 let m_t = p.start();473 let m = p.start();474 p.bump_assert(T!['{']);475476 loop {477 if p.at(T!['}']) {478 p.bump();479 break;480 }481 let m = p.start();482 if p.at(T![local]) {483 obj_local(p);484 m.complete(p, MEMBER_BIND_STMT)485 } else if p.at(T![assert]) {486 assertion(p);487 m.complete(p, MEMBER_ASSERT_STMT)488 } else {489 field(p);490 while p.at(T![for]) || p.at(T![if]) {491 compspec(p)492 }493 m.complete(p, MEMBER_FIELD)494 };495 if comma_with_alternatives(p, SyntaxKindSet::new(&[T![=]])) {496 continue;497 }498 p.expect(R_BRACE);499 break;500 }501502 m.complete(p, OBJ_BODY_MEMBER_LIST);503 m_t.complete(p, EXPR_OBJECT)504}505fn param(p: &mut Parser) {506 let m = p.start();507 destruct(p);508 if p.at(T![=]) {509 p.bump();510 expr(p);511 }512 m.complete(p, PARAM);513}514fn params_desc(p: &mut Parser) -> CompletedMarker {515 let m = p.start();516 p.bump_assert(T!['(']);517518 loop {519 if p.at(T![')']) {520 p.bump();521 break;522 }523 param(p);524 if comma(p) {525 continue;526 }527 p.expect(T![')']);528 break;529 }530531 m.complete(p, PARAMS_DESC)532}533fn args_desc(p: &mut Parser) {534 let m = p.start();535 p.bump_assert(T!['(']);536537 let started_named = Cell::new(false);538539 loop {540 if p.at(T![')']) {541 break;542 }543544 let m = p.start();545 if p.at(IDENT) && p.nth_at(1, T![=]) {546 name(p);547 p.bump();548 expr(p);549 m.complete(p, ARG);550 started_named.set(true);551 } else {552 expr(p);553 m.complete(p, ARG);554 }555 if comma(p) {556 continue;557 }558 break;559 }560 p.expect(T![')']);561 if p.at(T![tailstrict]) {562 p.bump()563 }564 m.complete(p, ARGS_DESC);565}566567fn array(p: &mut Parser) -> CompletedMarker {568 569 let m = p.start();570 p.bump_assert(T!['[']);571572 573 let mut compspecs = Vec::with_capacity(1);574 let mut elems = 0;575576 loop {577 if p.at(T![']']) {578 p.bump();579 break;580 }581 elems += 1;582 expr(p);583 let c = p.start_ranger();584 let mut had_spec = false;585 while p.at(T![for]) || p.at(T![if]) {586 had_spec = true;587 compspec(p)588 }589 if had_spec {590 compspecs.push(c.finish(p));591 }592 if comma(p) {593 continue;594 }595 p.expect(T![']']);596 break;597 }598599 if elems > 1 && !compspecs.is_empty() {600 for spec in compspecs {601 602 603 604 605 }606607 m.complete(p, EXPR_ARRAY)608 } else if !compspecs.is_empty() {609 m.complete(p, EXPR_ARRAY_COMP)610 } else {611 m.complete(p, EXPR_ARRAY)612 }613}614615#[must_use]616fn slice_desc_or_index(p: &mut Parser) -> bool {617 let m = p.start();618 p.bump();619 620 621 if !p.at(T![:]) && !p.at(T![::]) {622 expr(p);623 }624 if p.at(T![:]) {625 p.bump();626 627 if !p.at(T![']']) {628 expr(p).map(|c| c.wrap(p, SLICE_DESC_END));629 }630 if p.at(T![:]) {631 p.bump();632 633 if !p.at(T![']']) {634 expr(p).map(|c| c.wrap(p, SLICE_DESC_STEP));635 }636 }637 } else if p.at(T![::]) {638 p.bump();639 640 if !p.at(T![']']) {641 expr(p).map(|c| c.wrap(p, SLICE_DESC_END));642 }643 } else {644 645 p.expect(T![']']);646 m.forget(p);647 return false;648 }649 p.expect(T![']']);650 m.complete(p, SLICE_DESC);651 true652}653fn lhs(p: &mut Parser) -> Option<CompletedMarker> {654 let mut lhs = lhs_basic(p)?;655656 loop {657 if p.at(T![.]) {658 let m = lhs.precede(p);659 p.bump();660 name(p);661 lhs = m.complete(p, EXPR_INDEX);662 } else if p.at(T!['[']) {663 if slice_desc_or_index(p) {664 lhs = lhs.precede(p).complete(p, EXPR_SLICE);665 } else {666 lhs = lhs667 .wrap(p, LHS_EXPR)668 .precede(p)669 .complete(p, EXPR_INDEX_EXPR);670 }671 } else if p.at(T!['(']) {672 let m = lhs.precede(p);673 args_desc(p);674 lhs = m.complete(p, EXPR_APPLY);675 } else {676 break;677 }678 }679680 Some(lhs)681}682fn name(p: &mut Parser) {683 let m = p.start();684 p.expect(IDENT);685 m.complete(p, NAME);686}687fn destruct_rest(p: &mut Parser) {688 let m = p.start();689 p.bump_assert(T![...]);690 if p.at(IDENT) {691 p.bump()692 }693 m.complete(p, DESTRUCT_REST);694}695fn destruct_object_field(p: &mut Parser) {696 let m = p.start();697 name(p);698 if p.at(T![:]) {699 p.bump();700 destruct(p);701 };702 if p.at(T![=]) {703 p.bump();704 expr(p);705 }706 m.complete(p, DESTRUCT_OBJECT_FIELD);707}708fn obj_local(p: &mut Parser) {709 let m = p.start();710 p.bump_assert(T![local]);711 bind(p);712 m.complete(p, OBJ_LOCAL);713}714fn destruct(p: &mut Parser) -> CompletedMarker {715 let m = p.start();716 if p.at(T![?]) {717 p.bump();718 m.complete(p, DESTRUCT_SKIP)719 } else if p.at(T!['[']) {720 p.bump();721 let mut had_rest = false;722 loop {723 if p.at(T![']']) {724 p.bump();725 break;726 } else if p.at(T![...]) {727 let m_err = p.start_ranger();728 destruct_rest(p);729 730 731 732 had_rest = true;733 } else {734 destruct(p);735 }736 if p.at(T![,]) {737 p.bump();738 continue;739 }740 p.expect(T![']']);741 break;742 }743 m.complete(p, DESTRUCT_ARRAY)744 } else if p.at(T!['{']) {745 p.bump();746 let mut had_rest = false;747 loop {748 if p.at(T!['}']) {749 p.bump();750 break;751 } else if p.at(T![...]) {752 let m_err = p.start_ranger();753 destruct_rest(p);754 755 756 757 had_rest = true;758 } else {759 if had_rest {760 p.error_with_recovery_set(TS![]);761 }762 destruct_object_field(p);763 }764 if p.at(T![,]) {765 p.bump();766 continue;767 }768 p.expect(T!['}']);769 break;770 }771 m.complete(p, DESTRUCT_OBJECT)772 } else if p.at(IDENT) {773 name(p);774 m.complete(p, DESTRUCT_FULL)775 } else {776 m.complete(p, ERROR)777 }778}779fn bind(p: &mut Parser) {780 let m = p.start();781 if p.at(IDENT) && p.nth_at(1, T!['(']) {782 name(p);783 params_desc(p);784 p.expect(T![=]);785 expr(p);786 m.complete(p, BIND_FUNCTION)787 } else {788 destruct(p);789 p.expect(T![=]);790 expr(p);791 m.complete(p, BIND_DESTRUCT)792 };793}794fn text(p: &mut Parser) {795 assert!(Text::can_cast(p.current()));796 p.bump();797}798fn number(p: &mut Parser) {799 assert!(Number::can_cast(p.current()));800 p.bump();801}802fn literal(p: &mut Parser) {803 assert!(Literal::can_cast(p.current()));804 p.bump();805}806fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {807 let _e = p.expected_syntax_name("value");808 Some(if Literal::can_cast(p.current()) {809 let m = p.start();810 literal(p);811 m.complete(p, EXPR_LITERAL)812 } else if Text::can_cast(p.current()) {813 let m = p.start();814 text(p);815 m.complete(p, EXPR_STRING)816 } else if Number::can_cast(p.current()) {817 let m = p.start();818 number(p);819 m.complete(p, EXPR_NUMBER)820 } else if p.at(IDENT) {821 let m = p.start();822 name(p);823 m.complete(p, EXPR_VAR)824 } else if p.at(INTRINSIC_THIS_FILE) {825 let m = p.start();826 p.bump();827 m.complete(p, EXPR_INTRINSIC_THIS_FILE)828 } else if p.at(INTRINSIC_ID) {829 let m = p.start();830 p.bump();831 m.complete(p, EXPR_INTRINSIC_ID)832 } else if p.at(INTRINSIC) {833 let m = p.start();834 p.bump();835 p.expect(T!['(']);836 name(p);837 p.expect(T![')']);838 m.complete(p, EXPR_INTRINSIC)839 } else if p.at(T![if]) {840 let m = p.start();841 p.bump();842 expr(p);843 p.expect(T![then]);844 expr(p).map(|c| c.wrap(p, TRUE_EXPR));845 if p.at(T![else]) {846 p.bump();847 expr(p).map(|c| c.wrap(p, FALSE_EXPR));848 }849 m.complete(p, EXPR_IF_THEN_ELSE)850 } else if p.at(T!['[']) {851 array(p)852 } else if p.at(T!['{']) {853 object(p)854 } else if p.at(T![local]) {855 let m = p.start();856 p.bump();857 loop {858 if p.at(T![;]) {859 p.bump();860 break;861 }862 bind(p);863864 if p.at(T![,]) {865 p.bump();866 continue;867 }868 p.expect(T![;]);869 break;870 }871 expr(p);872 m.complete(p, EXPR_LOCAL)873 } else if p.at(T![function]) {874 let m = p.start();875 p.bump();876 params_desc(p);877 expr(p);878 m.complete(p, EXPR_FUNCTION)879 } else if p.at(T![error]) {880 let m = p.start();881 p.bump();882 expr(p);883 m.complete(p, EXPR_ERROR)884 } else if p.at(T![assert]) {885 let m = p.start();886 assertion(p);887 p.expect(T![;]);888 expr(p);889 m.complete(p, EXPR_ASSERT)890 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {891 let m = p.start();892 p.bump();893 text(p);894 m.complete(p, EXPR_IMPORT)895 } else if let Some(op) = UnaryOperatorKind::cast(p.current()) {896 let ((), right_binding_power) = op.binding_power();897898 let m = p.start();899 p.bump();900 expr_binding_power(p, right_binding_power);901 m.complete(p, EXPR_UNARY)902 } else if p.at(T!['(']) {903 let m = p.start();904 p.bump();905 expr(p);906 p.expect(T![')']);907 m.complete(p, EXPR_PARENED)908 } else {909 p.error_with_recovery_set(TS![]);910 return None;911 })912}913914impl Parse {915 pub fn syntax(&self) -> SyntaxNode {916 SyntaxNode::new_root(self.green_node.clone())917 }918}