1use std::{cell::Cell, fmt, rc::Rc};23use rowan::{GreenNode, TextRange};45use crate::{6 event::Event,7 marker::{CompletedMarker, Marker},8 nodes::{BinaryOperatorKind, Literal, Number, Text, UnaryOperatorKind},9 token_set::SyntaxKindSet,10 AstToken, SyntaxKind,11 SyntaxKind::*,12 SyntaxNode, T, TS,13};1415pub struct Parse {16 pub green_node: GreenNode,17 pub errors: Vec<LocatedSyntaxError>,18}1920pub struct Parser {21 22 kinds: Vec<SyntaxKind>,23 pub offset: usize,24 pub events: Vec<Event>,25 pub entered: u32,26 pub hints: Vec<(u32, TextRange, String)>,27 pub last_error_token: usize,28 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>,29 steps: Cell<u64>,30}3132#[derive(Clone, Debug)]33pub enum SyntaxError {34 Unexpected {35 expected: ExpectedSyntax,36 found: SyntaxKind,37 },38 Missing {39 expected: ExpectedSyntax,40 },41 Custom {42 error: String,43 },44 Hint {45 error: String,46 },47}48impl fmt::Display for SyntaxError {49 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {50 match self {51 SyntaxError::Unexpected { expected, found } => {52 write!(f, "unexpected {found:?}, expecting {expected}")53 }54 SyntaxError::Missing { expected } => write!(f, "missing {expected}"),55 SyntaxError::Custom { error } => write!(f, "{error}"),56 SyntaxError::Hint { error } => write!(f, "{error}"),57 }58 }59}6061#[derive(Debug)]62pub struct LocatedSyntaxError {63 pub error: SyntaxError,64 pub range: TextRange,65}6667impl Parser {68 pub fn new(kinds: Vec<SyntaxKind>) -> Self {69 Self {70 kinds,71 offset: 0,72 events: vec![],73 entered: 0,74 last_error_token: 0,75 hints: vec![],76 expected_syntax_tracking_state: Rc::new(Cell::new(ExpectedSyntax::Unnamed(TS![]))),77 steps: Cell::new(0),78 }79 }80 pub fn clear_outdated_hints(&mut self) {81 let amount = self82 .hints83 .iter()84 .rev()85 .take_while(|h| h.0 > self.entered)86 .count();87 self.hints.truncate(self.hints.len() - amount);88 }89 fn clear_expected_syntaxes(&self) {90 self.expected_syntax_tracking_state91 .set(ExpectedSyntax::Unnamed(TS![]));92 }93 pub fn start(&mut self) -> Marker {94 let start_event_idx = self.events.len();95 self.events.push(Event::Pending);96 self.entered += 1;97 Marker::new(start_event_idx)98 }99 100 101 102 103 pub fn parse(mut self) -> Vec<Event> {104 let m = self.start();105 expr(&mut self);106 if !self.at(EOF) {107 let m = self.start();108 while !self.at(EOF) {109 self.bump();110 }111 m.complete_error(&mut self, "unexpected tokens after end");112 }113 m.complete(&mut self, SOURCE_FILE);114115 self.events116 }117118 pub(crate) fn expect(&mut self, kind: SyntaxKind) {119 self.expect_with_recovery_set(kind, TS![]);120 }121122 pub(crate) fn expect_with_recovery_set(123 &mut self,124 kind: SyntaxKind,125 recovery_set: SyntaxKindSet,126 ) {127 if self.at(kind) {128 if kind != EOF {129 self.bump();130 }131 } else {132 self.error_with_recovery_set(recovery_set);133 }134 }135136 137 138 139 140 141 142 143 pub fn error_with_no_skip(&mut self) -> CompletedMarker {144 self.error_with_recovery_set(SyntaxKindSet::ALL)145 }146147 pub fn error_with_recovery_set(&mut self, recovery_set: SyntaxKindSet) -> CompletedMarker {148 let expected = self.expected_syntax_tracking_state.get();149 self.expected_syntax_tracking_state150 .set(ExpectedSyntax::Unnamed(TS![]));151152 if self.at_end() || self.at_ts(recovery_set) {153 let m = self.start();154 return m.complete_missing(self, expected);155 }156157 let current_token = self.current();158159 self.last_error_token = self.offset;160161 let m = self.start();162 self.bump();163 let m = m.complete_unexpected(self, expected, current_token);164 self.clear_expected_syntaxes();165 m166 }167 fn bump_assert(&mut self, kind: SyntaxKind) {168 assert!(self.at(kind), "expected {kind:?}");169 self.bump_remap(self.current());170 }171 fn bump(&mut self) {172 self.bump_remap(self.current());173 }174 fn bump_remap(&mut self, kind: SyntaxKind) {175 assert_ne!(self.offset, self.kinds.len(), "already at end");176 self.events.push(Event::Token { kind });177 self.offset += 1;178 self.clear_expected_syntaxes();179 }180 fn step(&self) {181 use std::fmt::Write;182 let steps = self.steps.get();183 if steps >= 15_000_000 {184 let mut out = "seems like parsing is stuck".to_owned();185 {186 let last = 20;187 write!(out, "\n\nLast {last} events:").unwrap();188 for (i, event) in self189 .events190 .iter()191 .skip(self.events.len().saturating_sub(last))192 .enumerate()193 {194 write!(out, "\n{i}. {event:?}").unwrap();195 }196 }197 {198 let next = 20;199 write!(out, "\n\nNext {next} tokens:").unwrap();200 for (i, tok) in self.kinds.iter().skip(self.offset).take(next).enumerate() {201 write!(out, "\n{i}. {tok:?}").unwrap();202 }203 }204 panic!("{out}")205 }206 self.steps.set(steps + 1);207 }208 fn nth(&self, i: usize) -> SyntaxKind {209 self.step();210 let mut offset = self.offset;211 for _ in 0..i {212 offset += 1;213 }214 self.kinds.get(offset).copied().unwrap_or(EOF)215 }216 fn current(&self) -> SyntaxKind {217 self.nth(0)218 }219 #[must_use]220 pub(crate) fn expected_syntax_name(&self, name: &'static str) -> ExpectedSyntaxGuard {221 self.expected_syntax_tracking_state222 .set(ExpectedSyntax::Named(name));223224 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))225 }226 pub fn at(&self, kind: SyntaxKind) -> bool {227 self.nth_at(0, kind)228 }229 pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {230 if n == 0 {231 if let ExpectedSyntax::Unnamed(kinds) = self.expected_syntax_tracking_state.get() {232 let kinds = kinds.with(kind);233 self.expected_syntax_tracking_state234 .set(ExpectedSyntax::Unnamed(kinds));235 }236 }237 self.nth(n) == kind238 }239 pub fn at_ts(&self, set: SyntaxKindSet) -> bool {240 if let ExpectedSyntax::Unnamed(kinds) = self.expected_syntax_tracking_state.get() {241 let kinds = kinds.union(set);242 self.expected_syntax_tracking_state243 .set(ExpectedSyntax::Unnamed(kinds));244 }245 set.contains(self.current())246 }247 pub fn at_end(&self) -> bool {248 self.at(EOF)249 }250}251pub struct ExpectedSyntaxGuard {252 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>,253}254255impl ExpectedSyntaxGuard {256 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>) -> Self {257 Self {258 expected_syntax_tracking_state,259 }260 }261}262263impl Drop for ExpectedSyntaxGuard {264 fn drop(&mut self) {265 self.expected_syntax_tracking_state266 .set(ExpectedSyntax::Unnamed(TS![]));267 }268}269270#[derive(Clone, Debug, Copy)]271pub enum ExpectedSyntax {272 Named(&'static str),273 Unnamed(SyntaxKindSet),274}275impl fmt::Display for ExpectedSyntax {276 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {277 match self {278 Self::Named(name) => write!(f, "{name}"),279 Self::Unnamed(set) => write!(f, "{set}"),280 }281 }282}283284fn expr(p: &mut Parser) -> CompletedMarker {285 let m = p.start();286 while p.at(T![local]) || p.at(T![assert]) {287 let m = p.start();288289 if p.at(T![local]) {290 p.bump();291 loop {292 if p.at(T![;]) {293 p.bump();294 break;295 }296 bind(p);297298 if p.at(T![,]) {299 p.bump();300 continue;301 }302 p.expect(T![;]);303 break;304 }305 m.complete(p, STMT_LOCAL);306 } else {307 assertion(p);308 p.expect(T![;]);309 m.complete(p, STMT_ASSERT);310 }311 }312 match expr_binding_power(p, 0) {313 Ok(m) | Err(m) => m,314 };315 m.complete(p, EXPR)316}317fn expr_binding_power(318 p: &mut Parser,319 minimum_binding_power: u8,320) -> Result<CompletedMarker, CompletedMarker> {321 let mut lhs = lhs(p)?;322323 while let Some(op) = BinaryOperatorKind::cast(p.current())324 .or_else(|| p.at(T!['{']).then_some(BinaryOperatorKind::MetaObjectApply))325 {326 let (left_binding_power, right_binding_power) = op.binding_power();327 if left_binding_power < minimum_binding_power {328 break;329 }330331 let m = lhs.wrap(p, EXPR, false);332333 334 if op != BinaryOperatorKind::MetaObjectApply {335 p.bump();336 }337338 let m = m.precede(p);339 let parsed_rhs = expr_binding_power(p, right_binding_power)340 .map(|v| v.precede(p).complete(p, EXPR))341 .is_ok();342 lhs = m.complete(343 p,344 if op == BinaryOperatorKind::MetaObjectApply {345 EXPR_OBJ_EXTEND346 } else {347 EXPR_BINARY348 },349 );350351 if !parsed_rhs {352 break;353 }354 }355 Ok(lhs)356}357358const COMPSPEC: SyntaxKindSet = TS![for if];359fn compspec(p: &mut Parser) -> CompletedMarker {360 assert!(p.at_ts(COMPSPEC));361 if p.at(T![for]) {362 let m = p.start();363 p.bump();364 destruct(p);365 p.expect(T![in]);366 expr(p);367 m.complete(p, FOR_SPEC)368 } else if p.at(T![if]) {369 let m = p.start();370 p.bump();371 expr(p);372 m.complete(p, IF_SPEC)373 } else {374 unreachable!()375 }376}377378fn comma(p: &mut Parser) -> bool {379 comma_with_alternatives(p, TS![])380}381fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {382 if p.at(T![,]) {383 p.bump();384 true385 } else if p.at_ts(set) {386 let _ex = p.expected_syntax_name("comma");387 p.expect_with_recovery_set(T![,], TS![]);388 true389 } else {390 false391 }392}393394fn field_name(p: &mut Parser) {395 let _e = p.expected_syntax_name("field name");396 let m = p.start();397 if p.at(T!['[']) {398 p.bump();399 expr(p);400 p.expect(T![']']);401 m.complete(p, FIELD_NAME_DYNAMIC);402 } else if p.at(IDENT) {403 name(p);404 m.complete(p, FIELD_NAME_FIXED);405 } else if Text::can_cast(p.current()) {406 text(p);407 m.complete(p, FIELD_NAME_FIXED);408 } else {409 m.forget(p);410 p.error_with_recovery_set(TS![; : :: ::: '(']);411 }412}413fn visibility(p: &mut Parser) {414 if p.at_ts(TS![: :: :::]) {415 p.bump();416 } else {417 p.error_with_recovery_set(TS![=]);418 }419}420fn assertion(p: &mut Parser) {421 let m = p.start();422 p.bump_assert(T![assert]);423 expr(p);424 if p.at(T![:]) {425 p.bump();426 expr(p);427 }428 m.complete(p, ASSERTION);429}430fn object(p: &mut Parser) -> CompletedMarker {431 let m_t = p.start();432 let m = p.start();433 p.bump_assert(T!['{']);434435 let mut elems = 0;436 let mut compspecs = Vec::new();437 let mut asserts = Vec::new();438 loop {439 if p.at(T!['}']) {440 p.bump();441 break;442 }443 if p.at_ts(TS![for]) {444 if elems == 0 {445 let m = p.start();446 m.complete_missing(p, ExpectedSyntax::Named("field definition"));447 }448 while p.at_ts(COMPSPEC) {449 compspecs.push(compspec(p));450 }451 if comma_with_alternatives(p, TS![;]) {452 continue;453 }454 p.expect(R_BRACE);455 break;456 }457 let m = p.start();458 if p.at(T![local]) {459 obj_local(p);460 m.complete(p, MEMBER_BIND_STMT);461 } else if p.at(T![assert]) {462 assertion(p);463 asserts.push(m.complete(p, MEMBER_ASSERT_STMT));464 } else {465 field_name(p);466 if p.at(T![+]) {467 p.bump();468 }469 let params = if p.at(T!['(']) {470 params_desc(p);471 visibility(p);472 expr(p);473 true474 } else if p.at_ts(TS![: :: :::]) && p.nth_at(1, T![function]) {475 visibility(p);476 p.bump_assert(T![function]);477 params_desc(p);478 expr(p);479 true480 } else {481 visibility(p);482 expr(p);483 false484 };485 elems += 1;486487 if params {488 m.complete(p, MEMBER_FIELD_METHOD)489 } else {490 m.complete(p, MEMBER_FIELD_NORMAL)491 };492 };493 while p.at_ts(COMPSPEC) {494 compspecs.push(compspec(p));495 }496 if comma_with_alternatives(p, TS![;]) {497 continue;498 }499 p.expect(R_BRACE);500 break;501 }502503 if elems > 1 && !compspecs.is_empty() {504 for errored in compspecs {505 errored.wrap_error(506 p,507 "compspec may only be used if there is only one object element",508 true,509 );510 }511 m.complete(p, OBJ_BODY_MEMBER_LIST);512 } else if !compspecs.is_empty() {513 for errored in asserts {514 errored.wrap_error(p, "asserts can't be used in object comprehensions", true);515 }516 m.complete(p, OBJ_BODY_COMP);517 } else {518 m.complete(p, OBJ_BODY_MEMBER_LIST);519 }520 m_t.complete(p, EXPR_OBJECT)521}522fn param(p: &mut Parser) {523 let m = p.start();524 destruct(p);525 if p.at(T![=]) {526 p.bump();527 expr(p);528 }529 m.complete(p, PARAM);530}531fn params_desc(p: &mut Parser) -> CompletedMarker {532 let m = p.start();533 p.bump_assert(T!['(']);534535 loop {536 if p.at(T![')']) {537 p.bump();538 break;539 }540 param(p);541 if comma(p) {542 continue;543 }544 p.expect(T![')']);545 break;546 }547548 m.complete(p, PARAMS_DESC)549}550fn args_desc(p: &mut Parser) {551 let m = p.start();552 p.bump_assert(T!['(']);553554 let started_named = Cell::new(false);555 let mut unnamed_after_named = Vec::new();556557 loop {558 if p.at(T![')']) {559 break;560 }561562 let m = p.start();563 if p.at(IDENT) && p.nth_at(1, T![=]) {564 name(p);565 p.bump();566 expr(p);567 m.complete(p, ARG);568 started_named.set(true);569 } else {570 expr(p);571 let arg = m.complete(p, ARG);572 if started_named.get() {573 unnamed_after_named.push(arg);574 }575 }576 if comma(p) {577 continue;578 }579 break;580 }581 p.expect(T![')']);582 if p.at(T![tailstrict]) {583 p.bump();584 }585586 for errored in unnamed_after_named {587 errored.wrap_error(p, "can't use positional arguments after named", true);588 }589590 m.complete(p, ARGS_DESC);591}592593fn array(p: &mut Parser) -> CompletedMarker {594 595 let m = p.start();596 p.bump_assert(T!['[']);597598 let mut compspecs = Vec::new();599 let mut elems = 0;600601 loop {602 if p.at(T![']']) {603 p.bump();604 break;605 }606 if elems != 0 && p.at_ts(TS![for]) {607 while p.at_ts(COMPSPEC) {608 compspecs.push(compspec(p));609 }610 if comma(p) {611 continue;612 }613 p.expect(T![']']);614 break;615 }616 expr(p);617 elems += 1;618 while p.at_ts(COMPSPEC) {619 compspecs.push(compspec(p));620 }621 if comma(p) {622 continue;623 }624 p.expect(T![']']);625 break;626 }627628 if elems > 1 && !compspecs.is_empty() {629 for spec in compspecs {630 spec.wrap_error(631 p,632 "compspec may only be used if there is only one array element",633 true,634 );635 }636637 m.complete(p, EXPR_ARRAY)638 } else if !compspecs.is_empty() {639 m.complete(p, EXPR_ARRAY_COMP)640 } else {641 m.complete(p, EXPR_ARRAY)642 }643}644645#[must_use]646fn slice_desc_or_index(p: &mut Parser) -> bool {647 let m = p.start();648 p.bump();649 650 651 if !p.at(T![:]) && !p.at(T![::]) {652 expr(p);653 }654 if p.at(T![:]) {655 p.bump();656 657 if !p.at(T![']']) {658 expr(p).wrap(p, SLICE_DESC_END, true);659 }660 if p.at(T![:]) {661 p.bump();662 663 if !p.at(T![']']) {664 expr(p).wrap(p, SLICE_DESC_STEP, true);665 }666 }667 } else if p.at(T![::]) {668 p.bump();669 670 if !p.at(T![']']) {671 expr(p).wrap(p, SLICE_DESC_END, true);672 }673 } else {674 675 p.expect(T![']']);676 m.forget(p);677 return false;678 }679 p.expect(T![']']);680 m.complete(p, SLICE_DESC);681 true682}683684fn suffix(p: &mut Parser) {685 loop {686 let start = p.start();687 let _marker: CompletedMarker = if p.at(T![?]) {688 p.bump();689 p.expect(T![.]);690 if p.at(IDENT) {691 name(p);692 start.complete(p, SUFFIX_INDEX)693 } else if p.at(T!['[']) {694 p.bump();695 expr(p);696 p.expect(T![']']);697 start.complete(p, SUFFIX_INDEX_EXPR)698 } else {699 start.complete_missing(p, ExpectedSyntax::Named("index"))700 }701 } else if p.at(T![.]) {702 p.bump();703 name(p);704 start.complete(p, SUFFIX_INDEX)705 } else if p.at(T!['[']) {706 if slice_desc_or_index(p) {707 start.complete(p, SUFFIX_SLICE)708 } else {709 start.complete(p, SUFFIX_INDEX_EXPR)710 }711 } else if p.at(T!['(']) {712 args_desc(p);713 start.complete(p, SUFFIX_APPLY)714 } else {715 start.forget(p);716 break;717 };718 }719}720721fn lhs(p: &mut Parser) -> Result<CompletedMarker, CompletedMarker> {722 let lhs = lhs_basic(p)?;723724 suffix(p);725726 Ok(lhs)727}728fn name(p: &mut Parser) {729 let m = p.start();730 p.expect(IDENT);731 m.complete(p, NAME);732}733fn destruct_rest(p: &mut Parser) {734 let m = p.start();735 p.bump_assert(T![...]);736 if p.at(IDENT) {737 p.bump();738 }739 m.complete(p, DESTRUCT_REST);740}741fn destruct_object_field(p: &mut Parser) {742 let m = p.start();743 name(p);744 if p.at(T![:]) {745 p.bump();746 destruct(p);747 };748 if p.at(T![=]) {749 p.bump();750 expr(p);751 }752 m.complete(p, DESTRUCT_OBJECT_FIELD);753}754fn obj_local(p: &mut Parser) {755 let m = p.start();756 p.bump_assert(T![local]);757 bind(p);758 m.complete(p, OBJ_LOCAL);759}760fn destruct(p: &mut Parser) -> CompletedMarker {761 let m = p.start();762 let _ex = p.expected_syntax_name("destruction specifier");763 if p.at(T![?]) {764 p.bump();765 m.complete(p, DESTRUCT_SKIP)766 } else if p.at(T!['[']) {767 p.bump();768 769 loop {770 if p.at(T![']']) {771 p.bump();772 break;773 } else if p.at(T![...]) {774 775 destruct_rest(p);776 777 778 779 780 } else {781 destruct(p);782 }783 if p.at(T![,]) {784 p.bump();785 continue;786 }787 p.expect(T![']']);788 break;789 }790 m.complete(p, DESTRUCT_ARRAY)791 } else if p.at(T!['{']) {792 p.bump();793 let mut had_rest = false;794 loop {795 if p.at(T!['}']) {796 p.bump();797 break;798 } else if p.at(T![...]) {799 800 destruct_rest(p);801 802 803 804 had_rest = true;805 } else {806 if had_rest {807 p.error_with_recovery_set(TS![]);808 }809 destruct_object_field(p);810 }811 if p.at(T![,]) {812 p.bump();813 continue;814 }815 p.expect(T!['}']);816 break;817 }818 m.complete(p, DESTRUCT_OBJECT)819 } else if p.at(IDENT) {820 name(p);821 m.complete(p, DESTRUCT_FULL)822 } else {823 m.forget(p);824 p.error_with_recovery_set(TS![; , '}', '(', :])825 }826}827fn bind(p: &mut Parser) {828 let m = p.start();829 if p.at(IDENT) && p.nth_at(1, T!['(']) {830 name(p);831 params_desc(p);832 p.expect(T![=]);833 expr(p);834 m.complete(p, BIND_FUNCTION)835 } else if p.at(IDENT) && p.nth_at(1, T![=]) && p.nth_at(2, T![function]) {836 name(p);837 p.expect(T![=]);838 p.expect(T![function]);839 params_desc(p);840 expr(p);841 m.complete(p, BIND_FUNCTION)842 } else {843 destruct(p);844 p.expect(T![=]);845 expr(p);846 m.complete(p, BIND_DESTRUCT)847 };848}849fn text(p: &mut Parser) {850 assert!(Text::can_cast(p.current()));851 p.bump();852}853fn number(p: &mut Parser) {854 assert!(Number::can_cast(p.current()));855 p.bump();856}857fn literal(p: &mut Parser) {858 assert!(Literal::can_cast(p.current()));859 p.bump();860}861fn lhs_basic(p: &mut Parser) -> Result<CompletedMarker, CompletedMarker> {862 let _e = p.expected_syntax_name("expression");863 Ok(if Literal::can_cast(p.current()) {864 let m = p.start();865 literal(p);866 m.complete(p, EXPR_LITERAL)867 } else if Text::can_cast(p.current()) {868 let m = p.start();869 text(p);870 m.complete(p, EXPR_STRING)871 } else if Number::can_cast(p.current()) {872 let m = p.start();873 number(p);874 m.complete(p, EXPR_NUMBER)875 } else if p.at(IDENT) {876 let m = p.start();877 name(p);878 m.complete(p, EXPR_VAR)879 } else if p.at(T![if]) {880 let m = p.start();881 p.bump();882 expr(p);883 p.expect(T![then]);884 expr(p).wrap(p, TRUE_EXPR, true);885 if p.at(T![else]) {886 p.bump();887 expr(p).wrap(p, FALSE_EXPR, true);888 }889 m.complete(p, EXPR_IF_THEN_ELSE)890 } else if p.at(T!['[']) {891 array(p)892 } else if p.at(T!['{']) {893 object(p)894 } else if p.at(T![function]) {895 let m = p.start();896 p.bump();897 params_desc(p);898 expr(p);899 m.complete(p, EXPR_FUNCTION)900 } else if p.at(T![error]) {901 let m = p.start();902 p.bump();903 expr(p);904 m.complete(p, EXPR_ERROR)905 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {906 let m = p.start();907 p.bump();908 text(p);909 m.complete(p, EXPR_IMPORT)910 } else if let Some(op) = UnaryOperatorKind::cast(p.current()) {911 let ((), right_binding_power) = op.binding_power();912913 let m = p.start();914 p.bump();915 let _ = expr_binding_power(p, right_binding_power);916 m.complete(p, EXPR_UNARY)917 } else if p.at(T!['(']) {918 let m = p.start();919 p.bump();920 expr(p);921 p.expect(T![')']);922 m.complete(p, EXPR_PARENED)923 } else {924 return Err(p.error_with_no_skip());925 })926}927928impl Parse {929 pub fn syntax(&self) -> SyntaxNode {930 SyntaxNode::new_root(self.green_node.clone())931 }932}