1use std::{cell::Cell, fmt, rc::Rc};23use rowan::{GreenNode, TextRange};45use crate::{6 event::Event,7 marker::{CompletedMarker, Marker},8 nodes::{BinaryOperatorKind, Literal, Number, Text, UnaryOperatorKind},9 token_set::SyntaxKindSet,10 AstToken, SyntaxKind,11 SyntaxKind::*,12 SyntaxNode, T, TS,13};1415pub struct Parse {16 pub green_node: GreenNode,17 pub errors: Vec<LocatedSyntaxError>,18}1920pub struct Parser {21 22 kinds: Vec<SyntaxKind>,23 pub offset: usize,24 pub events: Vec<Event>,25 pub entered: u32,26 pub hints: Vec<(u32, TextRange, String)>,27 pub last_error_token: usize,28 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>,29 steps: Cell<u64>,30}3132#[derive(Clone, Debug)]33pub enum SyntaxError {34 Unexpected {35 expected: ExpectedSyntax,36 found: SyntaxKind,37 },38 Missing {39 expected: ExpectedSyntax,40 },41 Custom {42 error: String,43 },44 Hint {45 error: String,46 },47}48impl fmt::Display for SyntaxError {49 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {50 match self {51 SyntaxError::Unexpected { expected, found } => {52 write!(f, "unexpected {found:?}, expecting {expected}")53 }54 SyntaxError::Missing { expected } => write!(f, "missing {expected}"),55 SyntaxError::Custom { error } | SyntaxError::Hint { error } => write!(f, "{error}"),56 }57 }58}5960#[derive(Debug)]61pub struct LocatedSyntaxError {62 pub error: SyntaxError,63 pub range: TextRange,64}6566impl Parser {67 pub fn new(kinds: Vec<SyntaxKind>) -> Self {68 Self {69 kinds,70 offset: 0,71 events: vec![],72 entered: 0,73 last_error_token: 0,74 hints: vec![],75 expected_syntax_tracking_state: Rc::new(Cell::new(ExpectedSyntax::Unnamed(TS![]))),76 steps: Cell::new(0),77 }78 }79 pub fn clear_outdated_hints(&mut self) {80 let amount = self81 .hints82 .iter()83 .rev()84 .take_while(|h| h.0 > self.entered)85 .count();86 self.hints.truncate(self.hints.len() - amount);87 }88 fn clear_expected_syntaxes(&self) {89 self.expected_syntax_tracking_state90 .set(ExpectedSyntax::Unnamed(TS![]));91 }92 pub fn start(&mut self) -> Marker {93 let start_event_idx = self.events.len();94 self.events.push(Event::Pending);95 self.entered += 1;96 Marker::new(start_event_idx)97 }98 99 100 101 102 pub fn parse(mut self) -> Vec<Event> {103 let m = self.start();104 expr(&mut self);105 if !self.at(EOF) {106 let m = self.start();107 while !self.at(EOF) {108 self.bump();109 }110 m.complete_error(&mut self, "unexpected tokens after end");111 }112 m.complete(&mut self, SOURCE_FILE);113114 self.events115 }116117 pub(crate) fn expect(&mut self, kind: SyntaxKind) {118 self.expect_with_recovery_set(kind, TS![]);119 }120121 pub(crate) fn expect_with_recovery_set(122 &mut self,123 kind: SyntaxKind,124 recovery_set: SyntaxKindSet,125 ) {126 if self.at(kind) {127 if kind != EOF {128 self.bump();129 }130 } else {131 self.error_with_recovery_set(recovery_set);132 }133 }134135 136 137 138 139 140 141 142 pub fn error_with_no_skip(&mut self) -> CompletedMarker {143 self.error_with_recovery_set(SyntaxKindSet::ALL)144 }145146 pub fn error_with_recovery_set(&mut self, recovery_set: SyntaxKindSet) -> CompletedMarker {147 let expected = self.expected_syntax_tracking_state.get();148 self.expected_syntax_tracking_state149 .set(ExpectedSyntax::Unnamed(TS![]));150151 if self.at_end() || self.at_ts(recovery_set) {152 let m = self.start();153 return m.complete_missing(self, expected);154 }155156 let current_token = self.current();157158 self.last_error_token = self.offset;159160 let m = self.start();161 self.bump();162 let m = m.complete_unexpected(self, expected, current_token);163 self.clear_expected_syntaxes();164 m165 }166 fn bump_assert(&mut self, kind: SyntaxKind) {167 assert!(self.at(kind), "expected {kind:?}");168 self.bump_remap(self.current());169 }170 fn bump(&mut self) {171 self.bump_remap(self.current());172 }173 fn bump_remap(&mut self, kind: SyntaxKind) {174 assert_ne!(self.offset, self.kinds.len(), "already at end");175 self.events.push(Event::Token { kind });176 self.offset += 1;177 self.clear_expected_syntaxes();178 }179 fn step(&self) {180 use std::fmt::Write;181 let steps = self.steps.get();182 if steps >= 15_000_000 {183 let mut out = "seems like parsing is stuck".to_owned();184 {185 let last = 20;186 write!(out, "\n\nLast {last} events:").unwrap();187 for (i, event) in self188 .events189 .iter()190 .skip(self.events.len().saturating_sub(last))191 .enumerate()192 {193 write!(out, "\n{i}. {event:?}").unwrap();194 }195 }196 {197 let next = 20;198 write!(out, "\n\nNext {next} tokens:").unwrap();199 for (i, tok) in self.kinds.iter().skip(self.offset).take(next).enumerate() {200 write!(out, "\n{i}. {tok:?}").unwrap();201 }202 }203 panic!("{out}")204 }205 self.steps.set(steps + 1);206 }207 fn nth(&self, i: usize) -> SyntaxKind {208 self.step();209 let mut offset = self.offset;210 for _ in 0..i {211 offset += 1;212 }213 self.kinds.get(offset).copied().unwrap_or(EOF)214 }215 fn current(&self) -> SyntaxKind {216 self.nth(0)217 }218 #[must_use]219 pub(crate) fn expected_syntax_name(&self, name: &'static str) -> ExpectedSyntaxGuard {220 self.expected_syntax_tracking_state221 .set(ExpectedSyntax::Named(name));222223 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))224 }225 pub fn at(&self, kind: SyntaxKind) -> bool {226 self.nth_at(0, kind)227 }228 pub fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {229 if n == 0 {230 if let ExpectedSyntax::Unnamed(kinds) = self.expected_syntax_tracking_state.get() {231 let kinds = kinds.with(kind);232 self.expected_syntax_tracking_state233 .set(ExpectedSyntax::Unnamed(kinds));234 }235 }236 self.nth(n) == kind237 }238 pub fn at_ts(&self, set: SyntaxKindSet) -> bool {239 if let ExpectedSyntax::Unnamed(kinds) = self.expected_syntax_tracking_state.get() {240 let kinds = kinds.union(set);241 self.expected_syntax_tracking_state242 .set(ExpectedSyntax::Unnamed(kinds));243 }244 set.contains(self.current())245 }246 pub fn at_end(&self) -> bool {247 self.at(EOF)248 }249}250pub struct ExpectedSyntaxGuard {251 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>,252}253254impl ExpectedSyntaxGuard {255 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntax>>) -> Self {256 Self {257 expected_syntax_tracking_state,258 }259 }260}261262impl Drop for ExpectedSyntaxGuard {263 fn drop(&mut self) {264 self.expected_syntax_tracking_state265 .set(ExpectedSyntax::Unnamed(TS![]));266 }267}268269#[derive(Clone, Debug, Copy)]270pub enum ExpectedSyntax {271 Named(&'static str),272 Unnamed(SyntaxKindSet),273}274impl fmt::Display for ExpectedSyntax {275 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {276 match self {277 Self::Named(name) => write!(f, "{name}"),278 Self::Unnamed(set) => write!(f, "{set}"),279 }280 }281}282283fn expr(p: &mut Parser) -> CompletedMarker {284 let m = p.start();285 while p.at(T![local]) || p.at(T![assert]) {286 let m = p.start();287288 if p.at(T![local]) {289 p.bump();290 loop {291 if p.at(T![;]) {292 p.bump();293 break;294 }295 bind(p);296297 if p.at(T![,]) {298 p.bump();299 continue;300 }301 p.expect(T![;]);302 break;303 }304 m.complete(p, STMT_LOCAL);305 } else {306 assertion(p);307 p.expect(T![;]);308 m.complete(p, STMT_ASSERT);309 }310 }311 match expr_binding_power(p, 0) {312 Ok(m) | Err(m) => m,313 };314 m.complete(p, EXPR)315}316fn expr_binding_power(317 p: &mut Parser,318 minimum_binding_power: u8,319) -> Result<CompletedMarker, CompletedMarker> {320 let mut lhs = lhs(p)?;321322 while let Some(op) = BinaryOperatorKind::cast(p.current())323 .or_else(|| p.at(T!['{']).then_some(BinaryOperatorKind::MetaObjectApply))324 {325 let (left_binding_power, right_binding_power) = op.binding_power();326 if left_binding_power < minimum_binding_power {327 break;328 }329330 let m = lhs.wrap(p, EXPR, false);331332 333 if op != BinaryOperatorKind::MetaObjectApply {334 p.bump();335 }336337 let m = m.precede(p);338 let parsed_rhs = expr_binding_power(p, right_binding_power)339 .map(|v| v.precede(p).complete(p, EXPR))340 .is_ok();341 lhs = m.complete(342 p,343 if op == BinaryOperatorKind::MetaObjectApply {344 EXPR_OBJ_EXTEND345 } else {346 EXPR_BINARY347 },348 );349350 if !parsed_rhs {351 break;352 }353 }354 Ok(lhs)355}356357const COMPSPEC: SyntaxKindSet = TS![for if];358fn compspec(p: &mut Parser) -> CompletedMarker {359 assert!(p.at_ts(COMPSPEC));360 if p.at(T![for]) {361 let m = p.start();362 p.bump();363 destruct(p);364 p.expect(T![in]);365 expr(p);366 m.complete(p, FOR_SPEC)367 } else if p.at(T![if]) {368 let m = p.start();369 p.bump();370 expr(p);371 m.complete(p, IF_SPEC)372 } else {373 unreachable!()374 }375}376377fn comma(p: &mut Parser) -> bool {378 comma_with_alternatives(p, TS![])379}380fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {381 if p.at(T![,]) {382 p.bump();383 true384 } else if p.at_ts(set) {385 let _ex = p.expected_syntax_name("comma");386 p.expect_with_recovery_set(T![,], TS![]);387 true388 } else {389 false390 }391}392393fn field_name(p: &mut Parser) {394 let _e = p.expected_syntax_name("field name");395 let m = p.start();396 if p.at(T!['[']) {397 p.bump();398 expr(p);399 p.expect(T![']']);400 m.complete(p, FIELD_NAME_DYNAMIC);401 } else if p.at(IDENT) {402 name(p);403 m.complete(p, FIELD_NAME_FIXED);404 } else if Text::can_cast(p.current()) {405 text(p);406 m.complete(p, FIELD_NAME_FIXED);407 } else {408 m.forget(p);409 410 p.error_with_recovery_set(TS![; : :: '('].with(T![:::]));411 }412}413fn visibility(p: &mut Parser) {414 415 if p.at_ts(TS![: ::].with(T![:::])) {416 p.bump();417 } else {418 p.error_with_recovery_set(TS![=]);419 }420}421fn assertion(p: &mut Parser) {422 let m = p.start();423 p.bump_assert(T![assert]);424 expr(p);425 if p.at(T![:]) {426 p.bump();427 expr(p);428 }429 m.complete(p, ASSERTION);430}431fn object(p: &mut Parser) -> CompletedMarker {432 let m_t = p.start();433 let m = p.start();434 p.bump_assert(T!['{']);435436 let mut elems = 0;437 let mut compspecs = Vec::new();438 let mut asserts = Vec::new();439 loop {440 if p.at(T!['}']) {441 p.bump();442 break;443 }444 if p.at_ts(TS![for]) {445 if elems == 0 {446 let m = p.start();447 m.complete_missing(p, ExpectedSyntax::Named("field definition"));448 }449 while p.at_ts(COMPSPEC) {450 compspecs.push(compspec(p));451 }452 if comma_with_alternatives(p, TS![;]) {453 continue;454 }455 p.expect(R_BRACE);456 break;457 }458 let m = p.start();459 if p.at(T![local]) {460 obj_local(p);461 m.complete(p, MEMBER_BIND_STMT);462 } else if p.at(T![assert]) {463 assertion(p);464 asserts.push(m.complete(p, MEMBER_ASSERT_STMT));465 } else {466 field_name(p);467 if p.at(T![+]) {468 p.bump();469 }470 let params = if p.at(T!['(']) {471 params_desc(p);472 visibility(p);473 expr(p);474 true475 476 } else if p.at_ts(TS![: ::].with(T![:::])) && p.nth_at(1, T![function]) {477 visibility(p);478 p.bump_assert(T![function]);479 params_desc(p);480 expr(p);481 true482 } else {483 visibility(p);484 expr(p);485 false486 };487 elems += 1;488489 if params {490 m.complete(p, MEMBER_FIELD_METHOD)491 } else {492 m.complete(p, MEMBER_FIELD_NORMAL)493 };494 }495 while p.at_ts(COMPSPEC) {496 compspecs.push(compspec(p));497 }498 if comma_with_alternatives(p, TS![;]) {499 continue;500 }501 p.expect(R_BRACE);502 break;503 }504505 if elems > 1 && !compspecs.is_empty() {506 for errored in compspecs {507 errored.wrap_error(508 p,509 "compspec may only be used if there is only one object element",510 true,511 );512 }513 m.complete(p, OBJ_BODY_MEMBER_LIST);514 } else if !compspecs.is_empty() {515 for errored in asserts {516 errored.wrap_error(p, "asserts can't be used in object comprehensions", true);517 }518 m.complete(p, OBJ_BODY_COMP);519 } else {520 m.complete(p, OBJ_BODY_MEMBER_LIST);521 }522 m_t.complete(p, EXPR_OBJECT)523}524fn param(p: &mut Parser) {525 let m = p.start();526 destruct(p);527 if p.at(T![=]) {528 p.bump();529 expr(p);530 }531 m.complete(p, PARAM);532}533fn params_desc(p: &mut Parser) -> CompletedMarker {534 let m = p.start();535 p.bump_assert(T!['(']);536537 loop {538 if p.at(T![')']) {539 p.bump();540 break;541 }542 param(p);543 if comma(p) {544 continue;545 }546 p.expect(T![')']);547 break;548 }549550 m.complete(p, PARAMS_DESC)551}552fn args_desc(p: &mut Parser) {553 let m = p.start();554 p.bump_assert(T!['(']);555556 let started_named = Cell::new(false);557 let mut unnamed_after_named = Vec::new();558559 loop {560 if p.at(T![')']) {561 break;562 }563564 let m = p.start();565 if p.at(IDENT) && p.nth_at(1, T![=]) {566 name(p);567 p.bump();568 expr(p);569 m.complete(p, ARG);570 started_named.set(true);571 } else {572 expr(p);573 let arg = m.complete(p, ARG);574 if started_named.get() {575 unnamed_after_named.push(arg);576 }577 }578 if comma(p) {579 continue;580 }581 break;582 }583 p.expect(T![')']);584 if p.at(T![tailstrict]) {585 p.bump();586 }587588 for errored in unnamed_after_named {589 errored.wrap_error(p, "can't use positional arguments after named", true);590 }591592 m.complete(p, ARGS_DESC);593}594595fn array(p: &mut Parser) -> CompletedMarker {596 597 let m = p.start();598 p.bump_assert(T!['[']);599600 let mut compspecs = Vec::new();601 let mut elems = 0;602603 loop {604 if p.at(T![']']) {605 p.bump();606 break;607 }608 if elems != 0 && p.at_ts(TS![for]) {609 while p.at_ts(COMPSPEC) {610 compspecs.push(compspec(p));611 }612 if comma(p) {613 continue;614 }615 p.expect(T![']']);616 break;617 }618 expr(p);619 elems += 1;620 while p.at_ts(COMPSPEC) {621 compspecs.push(compspec(p));622 }623 if comma(p) {624 continue;625 }626 p.expect(T![']']);627 break;628 }629630 if elems > 1 && !compspecs.is_empty() {631 for spec in compspecs {632 spec.wrap_error(633 p,634 "compspec may only be used if there is only one array element",635 true,636 );637 }638639 m.complete(p, EXPR_ARRAY)640 } else if !compspecs.is_empty() {641 m.complete(p, EXPR_ARRAY_COMP)642 } else {643 m.complete(p, EXPR_ARRAY)644 }645}646647#[must_use]648fn slice_desc_or_index(p: &mut Parser) -> bool {649 let m = p.start();650 p.bump();651 652 653 if !p.at(T![:]) && !p.at(T![::]) {654 expr(p);655 }656 if p.at(T![:]) {657 p.bump();658 659 if !p.at(T![']']) {660 expr(p).wrap(p, SLICE_DESC_END, true);661 }662 if p.at(T![:]) {663 p.bump();664 665 if !p.at(T![']']) {666 expr(p).wrap(p, SLICE_DESC_STEP, true);667 }668 }669 } else if p.at(T![::]) {670 p.bump();671 672 if !p.at(T![']']) {673 expr(p).wrap(p, SLICE_DESC_END, true);674 }675 } else {676 677 p.expect(T![']']);678 m.forget(p);679 return false;680 }681 p.expect(T![']']);682 m.complete(p, SLICE_DESC);683 true684}685686fn suffix(p: &mut Parser) {687 loop {688 let start = p.start();689 let _marker: CompletedMarker = if p.at(T![?]) {690 p.bump();691 p.expect(T![.]);692 if p.at(IDENT) {693 name(p);694 start.complete(p, SUFFIX_INDEX)695 } else if p.at(T!['[']) {696 p.bump();697 expr(p);698 p.expect(T![']']);699 start.complete(p, SUFFIX_INDEX_EXPR)700 } else {701 start.complete_missing(p, ExpectedSyntax::Named("index"))702 }703 } else if p.at(T![.]) {704 p.bump();705 name(p);706 start.complete(p, SUFFIX_INDEX)707 } else if p.at(T!['[']) {708 if slice_desc_or_index(p) {709 start.complete(p, SUFFIX_SLICE)710 } else {711 start.complete(p, SUFFIX_INDEX_EXPR)712 }713 } else if p.at(T!['(']) {714 args_desc(p);715 start.complete(p, SUFFIX_APPLY)716 } else {717 start.forget(p);718 break;719 };720 }721}722723fn lhs(p: &mut Parser) -> Result<CompletedMarker, CompletedMarker> {724 let lhs = lhs_basic(p)?;725726 suffix(p);727728 Ok(lhs)729}730fn name(p: &mut Parser) {731 let m = p.start();732 p.expect(IDENT);733 m.complete(p, NAME);734}735fn destruct_rest(p: &mut Parser) {736 let m = p.start();737 p.bump_assert(T![...]);738 if p.at(IDENT) {739 p.bump();740 }741 m.complete(p, DESTRUCT_REST);742}743fn destruct_object_field(p: &mut Parser) {744 let m = p.start();745 name(p);746 if p.at(T![:]) {747 p.bump();748 destruct(p);749 }750 if p.at(T![=]) {751 p.bump();752 expr(p);753 }754 m.complete(p, DESTRUCT_OBJECT_FIELD);755}756fn obj_local(p: &mut Parser) {757 let m = p.start();758 p.bump_assert(T![local]);759 bind(p);760 m.complete(p, OBJ_LOCAL);761}762fn destruct(p: &mut Parser) -> CompletedMarker {763 let m = p.start();764 let _ex = p.expected_syntax_name("destruction specifier");765 if p.at(T![?]) {766 p.bump();767 m.complete(p, DESTRUCT_SKIP)768 } else if p.at(T!['[']) {769 p.bump();770 771 loop {772 if p.at(T![']']) {773 p.bump();774 break;775 } else if p.at(T![...]) {776 777 destruct_rest(p);778 779 780 781 782 } else {783 destruct(p);784 }785 if p.at(T![,]) {786 p.bump();787 continue;788 }789 p.expect(T![']']);790 break;791 }792 m.complete(p, DESTRUCT_ARRAY)793 } else if p.at(T!['{']) {794 p.bump();795 let mut had_rest = false;796 loop {797 if p.at(T!['}']) {798 p.bump();799 break;800 } else if p.at(T![...]) {801 802 destruct_rest(p);803 804 805 806 had_rest = true;807 } else {808 if had_rest {809 p.error_with_recovery_set(TS![]);810 }811 destruct_object_field(p);812 }813 if p.at(T![,]) {814 p.bump();815 continue;816 }817 p.expect(T!['}']);818 break;819 }820 m.complete(p, DESTRUCT_OBJECT)821 } else if p.at(IDENT) {822 name(p);823 m.complete(p, DESTRUCT_FULL)824 } else {825 m.forget(p);826 p.error_with_recovery_set(TS![; , '}', '(', :])827 }828}829fn bind(p: &mut Parser) {830 let m = p.start();831 if p.at(IDENT) && p.nth_at(1, T!['(']) {832 name(p);833 params_desc(p);834 p.expect(T![=]);835 expr(p);836 m.complete(p, BIND_FUNCTION)837 } else if p.at(IDENT) && p.nth_at(1, T![=]) && p.nth_at(2, T![function]) {838 name(p);839 p.expect(T![=]);840 p.expect(T![function]);841 params_desc(p);842 expr(p);843 m.complete(p, BIND_FUNCTION)844 } else {845 destruct(p);846 p.expect(T![=]);847 expr(p);848 m.complete(p, BIND_DESTRUCT)849 };850}851fn text(p: &mut Parser) {852 assert!(Text::can_cast(p.current()));853 p.bump();854}855fn number(p: &mut Parser) {856 assert!(Number::can_cast(p.current()));857 p.bump();858}859fn literal(p: &mut Parser) {860 assert!(Literal::can_cast(p.current()));861 p.bump();862}863fn lhs_basic(p: &mut Parser) -> Result<CompletedMarker, CompletedMarker> {864 let _e = p.expected_syntax_name("expression");865 Ok(if Literal::can_cast(p.current()) {866 let m = p.start();867 literal(p);868 m.complete(p, EXPR_LITERAL)869 } else if Text::can_cast(p.current()) {870 let m = p.start();871 text(p);872 m.complete(p, EXPR_STRING)873 } else if Number::can_cast(p.current()) {874 let m = p.start();875 number(p);876 m.complete(p, EXPR_NUMBER)877 } else if p.at(IDENT) {878 let m = p.start();879 name(p);880 m.complete(p, EXPR_VAR)881 } else if p.at(T![if]) {882 let m = p.start();883 p.bump();884 expr(p);885 p.expect(T![then]);886 expr(p).wrap(p, TRUE_EXPR, true);887 if p.at(T![else]) {888 p.bump();889 expr(p).wrap(p, FALSE_EXPR, true);890 }891 m.complete(p, EXPR_IF_THEN_ELSE)892 } else if p.at(T!['[']) {893 array(p)894 } else if p.at(T!['{']) {895 object(p)896 } else if p.at(T![function]) {897 let m = p.start();898 p.bump();899 params_desc(p);900 expr(p);901 m.complete(p, EXPR_FUNCTION)902 } else if p.at(T![error]) {903 let m = p.start();904 p.bump();905 expr(p);906 m.complete(p, EXPR_ERROR)907 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {908 let m = p.start();909 p.bump();910 text(p);911 m.complete(p, EXPR_IMPORT)912 } else if let Some(op) = UnaryOperatorKind::cast(p.current()) {913 let ((), right_binding_power) = op.binding_power();914915 let m = p.start();916 p.bump();917 let _ = expr_binding_power(p, right_binding_power);918 m.complete(p, EXPR_UNARY)919 } else if p.at(T!['(']) {920 let m = p.start();921 p.bump();922 expr(p);923 p.expect(T![')']);924 m.complete(p, EXPR_PARENED)925 } else {926 return Err(p.error_with_no_skip());927 })928}929930impl Parse {931 pub fn syntax(&self) -> SyntaxNode {932 SyntaxNode::new_root(self.green_node.clone())933 }934}