1use std::{cell::Cell, fmt, rc::Rc};23use rowan::{GreenNode, TextRange};45use crate::{6 event::Event,7 marker::{CompletedMarker, Marker},8 nodes::{BinaryOperatorKind, Literal, Number, Text, UnaryOperatorKind},9 token_set::SyntaxKindSet,10 AstToken, SyntaxKind,11 SyntaxKind::*,12 SyntaxNode, T, TS,13};1415pub struct Parse {16 pub green_node: GreenNode,17 pub errors: Vec<LocatedSyntaxError>,18}1920pub struct Parser {21 22 kinds: Vec<SyntaxKind>,23 pub offset: usize,24 pub events: Vec<Event>,25 pub entered: u32,26 pub hints: Vec<(u32, TextRange, String)>,27 pub last_error_token: usize,28 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>,29 steps: Cell<u64>,30}3132#[derive(Clone, Debug)]33pub enum SyntaxError {34 Unexpected {35 expected: ExpectedSyntax,36 found: SyntaxKind,37 },38 Missing {39 expected: ExpectedSyntax,40 },41 Custom {42 error: String,43 },44 Hint {45 error: String,46 },47}4849#[derive(Debug)]50pub struct LocatedSyntaxError {51 pub error: SyntaxError,52 pub range: TextRange,53}5455impl Parser {56 pub fn new(kinds: Vec<SyntaxKind>) -> Self {57 Self {58 kinds,59 offset: 0,60 events: vec![],61 entered: 0,62 last_error_token: 0,63 hints: vec![],64 expected_syntax_tracking_state: Rc::new(Cell::new(ExpectedSyntax::Unnamed(TS![]))),65 steps: Cell::new(0),66 }67 }68 pub fn clear_outdated_hints(&mut self) {69 let amount = self70 .hints71 .iter()72 .rev()73 .take_while(|h| h.0 > self.entered)74 .count();75 self.hints.truncate(self.hints.len() - amount);76 }77 fn clear_expected_syntaxes(&self) {78 self.expected_syntax_tracking_state79 .set(ExpectedSyntax::Unnamed(TS![]));80 }81 pub fn start(&mut self) -> Marker {82 let start_event_idx = self.events.len();83 self.events.push(Event::Pending);84 self.entered += 1;85 Marker::new(start_event_idx)86 }87 88 89 90 91 pub fn parse(mut self) -> Vec<Event> {92 let m = self.start();93 expr(&mut self);94 if !self.at(EOF) {95 let m = self.start();96 while !self.at(EOF) {97 self.bump();98 }99 m.complete_error(&mut self, "unexpected tokens after end");100 }101 m.complete(&mut self, SOURCE_FILE);102103 self.events104 }105106 pub(crate) fn expect(&mut self, kind: SyntaxKind) {107 self.expect_with_recovery_set(kind, TS![]);108 }109110 pub(crate) fn expect_with_recovery_set(111 &mut self,112 kind: SyntaxKind,113 recovery_set: SyntaxKindSet,114 ) {115 if self.at(kind) {116 if kind != EOF {117 self.bump();118 }119 } else {120 self.error_with_recovery_set(recovery_set);121 }122 }123124 125 126 127 128 129 130 131 pub fn error_with_no_skip(&mut self) -> CompletedMarker {132 self.error_with_recovery_set(SyntaxKindSet::ALL)133 }134135 pub fn error_with_recovery_set(&mut self, recovery_set: SyntaxKindSet) -> CompletedMarker {136 let expected = self.expected_syntax_tracking_state.get();137 self.expected_syntax_tracking_state138 .set(ExpectedSyntax::Unnamed(TS![]));139140 if self.at_end() || self.at_ts(recovery_set) {141 let m = self.start();142 return m.complete_missing(self, expected);143 }144145 let current_token = self.current();146147 self.last_error_token = self.offset;148149 let m = self.start();150 self.bump();151 let m = m.complete_unexpected(self, expected, current_token);152 self.clear_expected_syntaxes();153 m154 }155 fn bump_assert(&mut self, kind: SyntaxKind) {156 assert!(self.at(kind), "expected {kind:?}");157 self.bump_remap(self.current());158 }159 fn bump(&mut self) {160 self.bump_remap(self.current());161 }162 fn bump_remap(&mut self, kind: SyntaxKind) {163 assert_ne!(self.offset, self.kinds.len(), "already at end");164 self.events.push(Event::Token { kind });165 self.offset += 1;166 self.clear_expected_syntaxes();167 }168 fn step(&self) {169 use std::fmt::Write;170 let steps = self.steps.get();171 if steps >= 15_000_000 {172 let mut out = "seems like parsing is stuck".to_owned();173 {174 let last = 20;175 write!(out, "\n\nLast {last} events:").unwrap();176 for (i, event) in self177 .events178 .iter()179 .skip(self.events.len().saturating_sub(last))180 .enumerate()181 {182 write!(out, "\n{i}. {event:?}").unwrap();183 }184 }185 {186 let next = 20;187 write!(out, "\n\nNext {next} tokens:").unwrap();188 for (i, tok) in self.kinds.iter().skip(self.offset).take(next).enumerate() {189 write!(out, "\n{i}. {tok:?}").unwrap();190 }191 }192 panic!("{out}")193 }194 self.steps.set(steps + 1);195 }196 fn nth(&self, i: usize) -> SyntaxKind {197 self.step();198 let mut offset = self.offset;199 for _ in 0..i {200 offset += 1;201 }202 self.kinds.get(offset).copied().unwrap_or(EOF)203 }204 fn current(&self) -> SyntaxKind {205 self.nth(0)206 }207 #[must_use]208 pub(crate) fn expected_syntax_name(&self, name: &'static str) -> ExpectedSyntaxGuard {209 self.expected_syntax_tracking_state210 .set(ExpectedSyntax::Named(name));211212 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))213 }214 pub fn at(&self, kind: SyntaxKind) -> bool {215 self.nth_at(0, kind)216 }217 pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {218 if n == 0 {219 if let ExpectedSyntax::Unnamed(kinds) = self.expected_syntax_tracking_state.get() {220 let kinds = kinds.with(kind);221 self.expected_syntax_tracking_state222 .set(ExpectedSyntax::Unnamed(kinds));223 }224 }225 self.nth(n) == kind226 }227 pub fn at_ts(&self, set: SyntaxKindSet) -> bool {228 if let ExpectedSyntax::Unnamed(kinds) = self.expected_syntax_tracking_state.get() {229 let kinds = kinds.union(set);230 self.expected_syntax_tracking_state231 .set(ExpectedSyntax::Unnamed(kinds));232 }233 set.contains(self.current())234 }235 pub fn at_end(&self) -> bool {236 self.at(EOF)237 }238}239pub struct ExpectedSyntaxGuard {240 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>,241}242243impl ExpectedSyntaxGuard {244 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>) -> Self {245 Self {246 expected_syntax_tracking_state,247 }248 }249}250251impl Drop for ExpectedSyntaxGuard {252 fn drop(&mut self) {253 self.expected_syntax_tracking_state254 .set(ExpectedSyntax::Unnamed(TS![]));255 }256}257258#[derive(Clone, Debug, Copy)]259pub enum ExpectedSyntax {260 Named(&'static str),261 Unnamed(SyntaxKindSet),262}263impl fmt::Display for ExpectedSyntax {264 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {265 match self {266 Self::Named(name) => write!(f, "{name}"),267 Self::Unnamed(set) => write!(f, "{set}"),268 }269 }270}271272fn expr(p: &mut Parser) -> CompletedMarker {273 let m = p.start();274 while p.at(T![local]) || p.at(T![assert]) {275 let m = p.start();276277 if p.at(T![local]) {278 p.bump();279 loop {280 if p.at(T![;]) {281 p.bump();282 break;283 }284 bind(p);285286 if p.at(T![,]) {287 p.bump();288 continue;289 }290 p.expect(T![;]);291 break;292 }293 m.complete(p, STMT_LOCAL);294 } else {295 assertion(p);296 p.expect(T![;]);297 m.complete(p, STMT_ASSERT);298 }299 }300 match expr_binding_power(p, 0) {301 Ok(m) | Err(m) => m,302 };303 m.complete(p, EXPR)304}305fn expr_binding_power(306 p: &mut Parser,307 minimum_binding_power: u8,308) -> Result<CompletedMarker, CompletedMarker> {309 let mut lhs = lhs(p)?;310311 while let Some(op) = BinaryOperatorKind::cast(p.current())312 .or_else(|| p.at(T!['{']).then_some(BinaryOperatorKind::MetaObjectApply))313 {314 let (left_binding_power, right_binding_power) = op.binding_power();315 if left_binding_power < minimum_binding_power {316 break;317 }318319 320 if op != BinaryOperatorKind::MetaObjectApply {321 p.bump();322 }323324 let m = lhs.wrap(p, EXPR).precede(p);325 let parsed_rhs = expr_binding_power(p, right_binding_power)326 .map(|v| v.precede(p).complete(p, EXPR))327 .is_ok();328 lhs = m.complete(329 p,330 if op == BinaryOperatorKind::MetaObjectApply {331 EXPR_OBJ_EXTEND332 } else {333 EXPR_BINARY334 },335 );336337 if !parsed_rhs {338 break;339 }340 }341 Ok(lhs)342}343344const COMPSPEC: SyntaxKindSet = TS![for if];345fn compspec(p: &mut Parser) -> CompletedMarker {346 assert!(p.at_ts(COMPSPEC));347 if p.at(T![for]) {348 let m = p.start();349 p.bump();350 destruct(p);351 p.expect(T![in]);352 expr(p);353 m.complete(p, FOR_SPEC)354 } else if p.at(T![if]) {355 let m = p.start();356 p.bump();357 expr(p);358 m.complete(p, IF_SPEC)359 } else {360 unreachable!()361 }362}363364fn comma(p: &mut Parser) -> bool {365 comma_with_alternatives(p, TS![])366}367fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {368 if p.at(T![,]) {369 p.bump();370 true371 } else if p.at_ts(set) {372 let _ex = p.expected_syntax_name("comma");373 p.expect_with_recovery_set(T![,], TS![]);374 true375 } else {376 false377 }378}379380fn field_name(p: &mut Parser) {381 let _e = p.expected_syntax_name("field name");382 let m = p.start();383 if p.at(T!['[']) {384 p.bump();385 expr(p);386 p.expect(T![']']);387 m.complete(p, FIELD_NAME_DYNAMIC);388 } else if p.at(IDENT) {389 name(p);390 m.complete(p, FIELD_NAME_FIXED);391 } else if Text::can_cast(p.current()) {392 text(p);393 m.complete(p, FIELD_NAME_FIXED);394 } else {395 m.forget(p);396 p.error_with_recovery_set(TS![; : :: ::: '(']);397 }398}399fn visibility(p: &mut Parser) {400 if p.at_ts(TS![: :: :::]) {401 p.bump();402 } else {403 p.error_with_recovery_set(TS![=]);404 }405}406fn assertion(p: &mut Parser) {407 let m = p.start();408 p.bump_assert(T![assert]);409 expr(p);410 if p.at(T![:]) {411 p.bump();412 expr(p);413 }414 m.complete(p, ASSERTION);415}416fn object(p: &mut Parser) -> CompletedMarker {417 let m_t = p.start();418 let m = p.start();419 p.bump_assert(T!['{']);420421 let mut elems = 0;422 let mut compspecs = Vec::new();423 let mut asserts = Vec::new();424 loop {425 if p.at(T!['}']) {426 p.bump();427 break;428 }429 if p.at_ts(TS![for]) {430 if elems == 0 {431 let m = p.start();432 m.complete_missing(p, ExpectedSyntax::Named("field definition"));433 }434 while p.at_ts(COMPSPEC) {435 compspecs.push(compspec(p));436 }437 if comma_with_alternatives(p, TS![;]) {438 continue;439 }440 p.expect(R_BRACE);441 break;442 }443 let m = p.start();444 if p.at(T![local]) {445 obj_local(p);446 m.complete(p, MEMBER_BIND_STMT);447 } else if p.at(T![assert]) {448 assertion(p);449 asserts.push(m.complete(p, MEMBER_ASSERT_STMT));450 } else {451 field_name(p);452 if p.at(T![+]) {453 p.bump();454 }455 let params = if p.at(T!['(']) {456 params_desc(p);457 visibility(p);458 expr(p);459 true460 } else if p.at_ts(TS![: :: :::]) && p.nth_at(1, T![function]) {461 visibility(p);462 p.bump_assert(T![function]);463 params_desc(p);464 expr(p);465 true466 } else {467 visibility(p);468 expr(p);469 false470 };471 elems += 1;472473 if params {474 m.complete(p, MEMBER_FIELD_METHOD)475 } else {476 m.complete(p, MEMBER_FIELD_NORMAL)477 };478 };479 while p.at_ts(COMPSPEC) {480 compspecs.push(compspec(p));481 }482 if comma_with_alternatives(p, TS![;]) {483 continue;484 }485 p.expect(R_BRACE);486 break;487 }488489 if elems > 1 && !compspecs.is_empty() {490 for errored in compspecs {491 errored.wrap_error(492 p,493 "compspec may only be used if there is only one object element",494 );495 }496 m.complete(p, OBJ_BODY_MEMBER_LIST);497 } else if !compspecs.is_empty() {498 for errored in asserts {499 errored.wrap_error(p, "asserts can't be used in object comprehensions");500 }501 m.complete(p, OBJ_BODY_COMP);502 } else {503 m.complete(p, OBJ_BODY_MEMBER_LIST);504 }505 m_t.complete(p, EXPR_OBJECT)506}507fn param(p: &mut Parser) {508 let m = p.start();509 destruct(p);510 if p.at(T![=]) {511 p.bump();512 expr(p);513 }514 m.complete(p, PARAM);515}516fn params_desc(p: &mut Parser) -> CompletedMarker {517 let m = p.start();518 p.bump_assert(T!['(']);519520 loop {521 if p.at(T![')']) {522 p.bump();523 break;524 }525 param(p);526 if comma(p) {527 continue;528 }529 p.expect(T![')']);530 break;531 }532533 m.complete(p, PARAMS_DESC)534}535fn args_desc(p: &mut Parser) {536 let m = p.start();537 p.bump_assert(T!['(']);538539 let started_named = Cell::new(false);540 let mut unnamed_after_named = Vec::new();541542 loop {543 if p.at(T![')']) {544 break;545 }546547 let m = p.start();548 if p.at(IDENT) && p.nth_at(1, T![=]) {549 name(p);550 p.bump();551 expr(p);552 m.complete(p, ARG);553 started_named.set(true);554 } else {555 expr(p);556 let arg = m.complete(p, ARG);557 if started_named.get() {558 unnamed_after_named.push(arg);559 }560 }561 if comma(p) {562 continue;563 }564 break;565 }566 p.expect(T![')']);567 if p.at(T![tailstrict]) {568 p.bump();569 }570571 for errored in unnamed_after_named {572 errored.wrap_error(p, "can't use positional arguments after named");573 }574575 m.complete(p, ARGS_DESC);576}577578fn array(p: &mut Parser) -> CompletedMarker {579 580 let m = p.start();581 p.bump_assert(T!['[']);582583 let mut compspecs = Vec::new();584 let mut elems = 0;585586 loop {587 if p.at(T![']']) {588 p.bump();589 break;590 }591 if elems != 0 && p.at_ts(TS![for]) {592 while p.at_ts(COMPSPEC) {593 compspecs.push(compspec(p));594 }595 if comma(p) {596 continue;597 }598 p.expect(T![']']);599 break;600 }601 expr(p);602 elems += 1;603 while p.at_ts(COMPSPEC) {604 compspecs.push(compspec(p));605 }606 if comma(p) {607 continue;608 }609 p.expect(T![']']);610 break;611 }612613 if elems > 1 && !compspecs.is_empty() {614 for spec in compspecs {615 spec.wrap_error(616 p,617 "compspec may only be used if there is only one array element",618 );619 }620621 m.complete(p, EXPR_ARRAY)622 } else if !compspecs.is_empty() {623 m.complete(p, EXPR_ARRAY_COMP)624 } else {625 m.complete(p, EXPR_ARRAY)626 }627}628629#[must_use]630fn slice_desc_or_index(p: &mut Parser) -> bool {631 let m = p.start();632 p.bump();633 634 635 if !p.at(T![:]) && !p.at(T![::]) {636 expr(p);637 }638 if p.at(T![:]) {639 p.bump();640 641 if !p.at(T![']']) {642 expr(p).wrap(p, SLICE_DESC_END);643 }644 if p.at(T![:]) {645 p.bump();646 647 if !p.at(T![']']) {648 expr(p).wrap(p, SLICE_DESC_STEP);649 }650 }651 } else if p.at(T![::]) {652 p.bump();653 654 if !p.at(T![']']) {655 expr(p).wrap(p, SLICE_DESC_END);656 }657 } else {658 659 p.expect(T![']']);660 m.forget(p);661 return false;662 }663 p.expect(T![']']);664 m.complete(p, SLICE_DESC);665 true666}667668fn suffix(p: &mut Parser) {669 loop {670 let start = p.start();671 let _marker: CompletedMarker = if p.at(T![?]) {672 p.bump();673 p.expect(T![.]);674 if p.at(IDENT) {675 name(p);676 start.complete(p, SUFFIX_INDEX)677 } else if p.at(T!['[']) {678 p.bump();679 expr(p);680 p.expect(T![']']);681 start.complete(p, SUFFIX_INDEX_EXPR)682 } else {683 start.complete_missing(p, ExpectedSyntax::Named("index"))684 }685 } else if p.at(T![.]) {686 p.bump();687 name(p);688 start.complete(p, SUFFIX_INDEX)689 } else if p.at(T!['[']) {690 if slice_desc_or_index(p) {691 start.complete(p, SUFFIX_SLICE)692 } else {693 start.complete(p, SUFFIX_INDEX_EXPR)694 }695 } else if p.at(T!['(']) {696 args_desc(p);697 start.complete(p, SUFFIX_APPLY)698 } else {699 start.forget(p);700 break;701 };702 }703}704705fn lhs(p: &mut Parser) -> Result<CompletedMarker, CompletedMarker> {706 let lhs = lhs_basic(p)?;707708 suffix(p);709710 Ok(lhs)711}712fn name(p: &mut Parser) {713 let m = p.start();714 p.expect(IDENT);715 m.complete(p, NAME);716}717fn destruct_rest(p: &mut Parser) {718 let m = p.start();719 p.bump_assert(T![...]);720 if p.at(IDENT) {721 p.bump();722 }723 m.complete(p, DESTRUCT_REST);724}725fn destruct_object_field(p: &mut Parser) {726 let m = p.start();727 name(p);728 if p.at(T![:]) {729 p.bump();730 destruct(p);731 };732 if p.at(T![=]) {733 p.bump();734 expr(p);735 }736 m.complete(p, DESTRUCT_OBJECT_FIELD);737}738fn obj_local(p: &mut Parser) {739 let m = p.start();740 p.bump_assert(T![local]);741 bind(p);742 m.complete(p, OBJ_LOCAL);743}744fn destruct(p: &mut Parser) -> CompletedMarker {745 let m = p.start();746 let _ex = p.expected_syntax_name("destruction specifier");747 if p.at(T![?]) {748 p.bump();749 m.complete(p, DESTRUCT_SKIP)750 } else if p.at(T!['[']) {751 p.bump();752 753 loop {754 if p.at(T![']']) {755 p.bump();756 break;757 } else if p.at(T![...]) {758 759 destruct_rest(p);760 761 762 763 764 } else {765 destruct(p);766 }767 if p.at(T![,]) {768 p.bump();769 continue;770 }771 p.expect(T![']']);772 break;773 }774 m.complete(p, DESTRUCT_ARRAY)775 } else if p.at(T!['{']) {776 p.bump();777 let mut had_rest = false;778 loop {779 if p.at(T!['}']) {780 p.bump();781 break;782 } else if p.at(T![...]) {783 784 destruct_rest(p);785 786 787 788 had_rest = true;789 } else {790 if had_rest {791 p.error_with_recovery_set(TS![]);792 }793 destruct_object_field(p);794 }795 if p.at(T![,]) {796 p.bump();797 continue;798 }799 p.expect(T!['}']);800 break;801 }802 m.complete(p, DESTRUCT_OBJECT)803 } else if p.at(IDENT) {804 name(p);805 m.complete(p, DESTRUCT_FULL)806 } else {807 m.forget(p);808 p.error_with_recovery_set(TS![; , '}', '(', :])809 }810}811fn bind(p: &mut Parser) {812 let m = p.start();813 if p.at(IDENT) && p.nth_at(1, T!['(']) {814 name(p);815 params_desc(p);816 p.expect(T![=]);817 expr(p);818 m.complete(p, BIND_FUNCTION)819 } else if p.at(IDENT) && p.nth_at(1, T![=]) && p.nth_at(2, T![function]) {820 name(p);821 p.expect(T![=]);822 p.expect(T![function]);823 params_desc(p);824 expr(p);825 m.complete(p, BIND_FUNCTION)826 } else {827 destruct(p);828 p.expect(T![=]);829 expr(p);830 m.complete(p, BIND_DESTRUCT)831 };832}833fn text(p: &mut Parser) {834 assert!(Text::can_cast(p.current()));835 p.bump();836}837fn number(p: &mut Parser) {838 assert!(Number::can_cast(p.current()));839 p.bump();840}841fn literal(p: &mut Parser) {842 assert!(Literal::can_cast(p.current()));843 p.bump();844}845fn lhs_basic(p: &mut Parser) -> Result<CompletedMarker, CompletedMarker> {846 let _e = p.expected_syntax_name("expression");847 Ok(if Literal::can_cast(p.current()) {848 let m = p.start();849 literal(p);850 m.complete(p, EXPR_LITERAL)851 } else if Text::can_cast(p.current()) {852 let m = p.start();853 text(p);854 m.complete(p, EXPR_STRING)855 } else if Number::can_cast(p.current()) {856 let m = p.start();857 number(p);858 m.complete(p, EXPR_NUMBER)859 } else if p.at(IDENT) {860 let m = p.start();861 name(p);862 m.complete(p, EXPR_VAR)863 } else if p.at(T![if]) {864 let m = p.start();865 p.bump();866 expr(p);867 p.expect(T![then]);868 expr(p).wrap(p, TRUE_EXPR);869 if p.at(T![else]) {870 p.bump();871 expr(p).wrap(p, FALSE_EXPR);872 }873 m.complete(p, EXPR_IF_THEN_ELSE)874 } else if p.at(T!['[']) {875 array(p)876 } else if p.at(T!['{']) {877 object(p)878 } else if p.at(T![function]) {879 let m = p.start();880 p.bump();881 params_desc(p);882 expr(p);883 m.complete(p, EXPR_FUNCTION)884 } else if p.at(T![error]) {885 let m = p.start();886 p.bump();887 expr(p);888 m.complete(p, EXPR_ERROR)889 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {890 let m = p.start();891 p.bump();892 text(p);893 m.complete(p, EXPR_IMPORT)894 } else if let Some(op) = UnaryOperatorKind::cast(p.current()) {895 let ((), right_binding_power) = op.binding_power();896897 let m = p.start();898 p.bump();899 let _ = expr_binding_power(p, right_binding_power);900 m.complete(p, EXPR_UNARY)901 } else if p.at(T!['(']) {902 let m = p.start();903 p.bump();904 expr(p);905 p.expect(T![')']);906 m.complete(p, EXPR_PARENED)907 } else {908 return Err(p.error_with_no_skip());909 })910}911912impl Parse {913 pub fn syntax(&self) -> SyntaxNode {914 SyntaxNode::new_root(self.green_node.clone())915 }916}