difftreelog
refactor use grammar to classify tokens
in: master
13 files changed
cmds/jrsonnet-fmt/src/main.rsdiffbeforeafterboth--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
nodes::{
ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
- Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+ Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
UnaryOperator,
},
AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
}
}
-impl Printable for String {
+impl Printable for Text {
fn print(&self) -> PrintItems {
p!(new: str(&format!("{}", self)))
}
@@ -168,7 +168,7 @@
FieldName::FieldNameFixed(f) => {
if let Some(id) = f.id() {
p!(new: {id})
- } else if let Some(str) = f.string() {
+ } else if let Some(str) = f.text() {
p!(new: {str})
} else {
p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
- Expr::ExprString(s) => p!(new: {s.string()}),
+ Expr::ExprString(s) => p!(new: {s.text()}),
Expr::ExprNumber(n) => p!(new: {n.number()}),
Expr::ExprArray(a) => {
let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
pi
}
Expr::ExprImport(v) => {
- p!(new: {v.import_kind()} str(" ") {v.string()})
+ p!(new: {v.import_kind()} str(" ") {v.text()})
}
Expr::ExprVar(n) => p!(new: {n.name()}),
Expr::ExprLocal(l) => {
crates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
name:Name
')'
ExprString =
- String
+ Text
ExprNumber =
Number
ExprArray =
@@ -67,7 +67,7 @@
']'
ExprImport =
- ImportKind String
+ ImportKind Text
ImportKind =
'importstr'
@@ -217,7 +217,7 @@
FieldNameFixed =
id:Name
-| String
+| Text
FieldNameDynamic =
'['
Expr
@@ -239,16 +239,27 @@
| '$'
| 'super'
-String =
+Text =
'LIT_STRING_DOUBLE!'
+| 'ERROR_STRING_DOUBLE_UNTERMINATED!'
| 'LIT_STRING_SINGLE!'
+| 'ERROR_STRING_SINGLE_UNTERMINATED!'
| 'LIT_STRING_DOUBLE_VERBATIM!'
+| 'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
| 'LIT_STRING_SINGLE_VERBATIM!'
+| 'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+| 'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
| 'LIT_STRING_BLOCK!'
+| 'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+| 'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+| 'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+| 'ERROR_STRING_BLOCK_MISSING_INDENT!'
Number =
'LIT_FLOAT!'
-| 'META_FORCE_ENUM!'
+| 'ERROR_FLOAT_JUNK_AFTER_POINT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
ForSpec =
'for'
@@ -347,3 +358,12 @@
TrueExpr=Expr
FalseExpr=Expr
LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+ 'LIT_WHITESPACE!'
+| 'LIT_MULTI_LINE_COMMENT!'
+| 'ERROR_COMMENT_TOO_SHORT!'
+| 'ERROR_COMMENT_UNTERMINATED!'
+| 'LIT_SINGLE_LINE_HASH_COMMENT!'
+| 'LIT_SINGLE_LINE_SLASH_COMMENT!'
crates/jrsonnet-rowan-parser/src/classify.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
- pub fn is_trivia(self) -> bool {
- matches!(
- self,
- Self::WHITESPACE
- | Self::MULTI_LINE_COMMENT
- | Self::ERROR_COMMENT_TOO_SHORT
- | Self::ERROR_COMMENT_UNTERMINATED
- | Self::SINGLE_LINE_HASH_COMMENT
- | Self::SINGLE_LINE_SLASH_COMMENT
- )
- }
- pub fn is_string(self) -> bool {
- matches!(
- self,
- Self::STRING_SINGLE
- | Self::ERROR_STRING_SINGLE_UNTERMINATED
- | Self::STRING_DOUBLE
- | Self::ERROR_STRING_DOUBLE_UNTERMINATED
- | Self::STRING_SINGLE_VERBATIM
- | Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
- | Self::STRING_DOUBLE_VERBATIM
- | Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
- | Self::STRING_BLOCK
- | Self::ERROR_STRING_BLOCK_UNEXPECTED_END
- | Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
- | Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
- | Self::ERROR_STRING_BLOCK_MISSING_INDENT
- )
- }
- pub fn is_number(self) -> bool {
- matches!(
- self,
- Self::FLOAT
- | Self::ERROR_FLOAT_JUNK_AFTER_POINT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
- )
- }
- pub fn is_literal(self) -> bool {
- matches!(
- self,
- Self::NULL_KW
- | Self::TRUE_KW | Self::FALSE_KW
- | Self::SELF_KW | Self::DOLLAR
- | Self::SUPER_KW
- )
- }
-}
crates/jrsonnet-rowan-parser/src/event.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
use crate::{
lex::Lexeme,
+ nodes::Trivia,
parser::{Parse, SyntaxError},
- JsonnetLanguage, SyntaxKind,
+ AstToken, JsonnetLanguage, SyntaxKind,
};
#[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
}
fn skip_whitespace(&mut self) {
while let Some(lexeme) = self.lexemes.get(self.offset) {
- if !lexeme.kind.is_trivia() {
+ if !Trivia::can_cast(lexeme.kind) {
break;
}
crates/jrsonnet-rowan-parser/src/generated/nodes.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
pub(crate) syntax: SyntaxNode,
}
impl ExprString {
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -332,7 +332,7 @@
pub fn import_kind(&self) -> Option<ImportKind> {
support::token_child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -692,7 +692,7 @@
pub fn id(&self) -> Option<Name> {
support::child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -1038,18 +1038,27 @@
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
syntax: SyntaxToken,
- kind: StringKind,
+ kind: TextKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
StringDouble,
+ ErrorStringDoubleUnterminated,
StringSingle,
+ ErrorStringSingleUnterminated,
StringDoubleVerbatim,
+ ErrorStringDoubleVerbatimUnterminated,
StringSingleVerbatim,
+ ErrorStringSingleVerbatimUnterminated,
+ ErrorStringVerbatimMissingQuotes,
StringBlock,
+ ErrorStringBlockUnexpectedEnd,
+ ErrorStringBlockMissingNewLine,
+ ErrorStringBlockMissingTermination,
+ ErrorStringBlockMissingIndent,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberKind {
Float,
- MetaForceEnum,
+ ErrorFloatJunkAfterPoint,
+ ErrorFloatJunkAfterExponent,
+ ErrorFloatJunkAfterExponentSign,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
Coloncolon,
Colon,
}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+ syntax: SyntaxToken,
+ kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+ Whitespace,
+ MultiLineComment,
+ ErrorCommentTooShort,
+ ErrorCommentUnterminated,
+ SingleLineHashComment,
+ SingleLineSlashComment,
+}
impl AstNode for SourceFile {
fn can_cast(kind: SyntaxKind) -> bool {
kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
-impl AstToken for String {
+impl AstToken for Text {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
STRING_DOUBLE
+ | ERROR_STRING_DOUBLE_UNTERMINATED
| STRING_SINGLE
+ | ERROR_STRING_SINGLE_UNTERMINATED
| STRING_DOUBLE_VERBATIM
+ | ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
| STRING_SINGLE_VERBATIM
- | STRING_BLOCK => true,
+ | ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+ | ERROR_STRING_VERBATIM_MISSING_QUOTES
+ | STRING_BLOCK
+ | ERROR_STRING_BLOCK_UNEXPECTED_END
+ | ERROR_STRING_BLOCK_MISSING_NEW_LINE
+ | ERROR_STRING_BLOCK_MISSING_TERMINATION
+ | ERROR_STRING_BLOCK_MISSING_INDENT => true,
_ => false,
}
}
fn cast(syntax: SyntaxToken) -> Option<Self> {
let res = match syntax.kind() {
- STRING_DOUBLE => String {
+ STRING_DOUBLE => Text {
syntax,
- kind: StringKind::StringDouble,
+ kind: TextKind::StringDouble,
},
- STRING_SINGLE => String {
+ ERROR_STRING_DOUBLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingle,
+ kind: TextKind::ErrorStringDoubleUnterminated,
},
- STRING_DOUBLE_VERBATIM => String {
+ STRING_SINGLE => Text {
syntax,
- kind: StringKind::StringDoubleVerbatim,
+ kind: TextKind::StringSingle,
},
- STRING_SINGLE_VERBATIM => String {
+ ERROR_STRING_SINGLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingleVerbatim,
+ kind: TextKind::ErrorStringSingleUnterminated,
},
- STRING_BLOCK => String {
+ STRING_DOUBLE_VERBATIM => Text {
syntax,
- kind: StringKind::StringBlock,
+ kind: TextKind::StringDoubleVerbatim,
+ },
+ ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+ },
+ STRING_SINGLE_VERBATIM => Text {
+ syntax,
+ kind: TextKind::StringSingleVerbatim,
},
+ ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+ },
+ ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+ syntax,
+ kind: TextKind::ErrorStringVerbatimMissingQuotes,
+ },
+ STRING_BLOCK => Text {
+ syntax,
+ kind: TextKind::StringBlock,
+ },
+ ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockUnexpectedEnd,
+ },
+ ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingNewLine,
+ },
+ ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingTermination,
+ },
+ ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingIndent,
+ },
_ => return None,
};
Some(res)
@@ -2718,12 +2790,12 @@
&self.syntax
}
}
-impl String {
- pub fn kind(&self) -> StringKind {
+impl Text {
+ pub fn kind(&self) -> TextKind {
self.kind
}
}
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
@@ -2731,7 +2803,10 @@
impl AstToken for Number {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
- FLOAT | META_FORCE_ENUM => true,
+ FLOAT
+ | ERROR_FLOAT_JUNK_AFTER_POINT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
_ => false,
}
}
@@ -2741,10 +2816,18 @@
syntax,
kind: NumberKind::Float,
},
- META_FORCE_ENUM => Number {
+ ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterPoint,
+ },
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
syntax,
- kind: NumberKind::MetaForceEnum,
+ kind: NumberKind::ErrorFloatJunkAfterExponent,
},
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+ },
_ => return None,
};
Some(res)
@@ -2841,6 +2924,62 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
+impl AstToken for Trivia {
+ fn can_cast(kind: SyntaxKind) -> bool {
+ match kind {
+ WHITESPACE
+ | MULTI_LINE_COMMENT
+ | ERROR_COMMENT_TOO_SHORT
+ | ERROR_COMMENT_UNTERMINATED
+ | SINGLE_LINE_HASH_COMMENT
+ | SINGLE_LINE_SLASH_COMMENT => true,
+ _ => false,
+ }
+ }
+ fn cast(syntax: SyntaxToken) -> Option<Self> {
+ let res = match syntax.kind() {
+ WHITESPACE => Trivia {
+ syntax,
+ kind: TriviaKind::Whitespace,
+ },
+ MULTI_LINE_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::MultiLineComment,
+ },
+ ERROR_COMMENT_TOO_SHORT => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentTooShort,
+ },
+ ERROR_COMMENT_UNTERMINATED => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentUnterminated,
+ },
+ SINGLE_LINE_HASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineHashComment,
+ },
+ SINGLE_LINE_SLASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineSlashComment,
+ },
+ _ => return None,
+ };
+ Some(res)
+ }
+ fn syntax(&self) -> &SyntaxToken {
+ &self.syntax
+ }
+}
+impl Trivia {
+ pub fn kind(&self) -> TriviaKind {
+ self.kind
+ }
+}
+impl std::fmt::Display for Trivia {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ std::fmt::Display::fmt(self.syntax(), f)
+ }
+}
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
SELF_KW,
#[token("super")]
SUPER_KW,
- META_FORCE_ENUM,
#[token("for")]
FOR_KW,
#[token("assert")]
@@ -253,10 +252,11 @@
BINARY_OPERATOR,
UNARY_OPERATOR,
LITERAL,
- STRING,
+ TEXT,
NUMBER,
IMPORT_KIND,
VISIBILITY,
+ TRIVIA,
#[doc(hidden)]
__LAST,
}
@@ -277,8 +277,8 @@
pub fn is_enum(self) -> bool {
match self {
EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
- | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
- | NUMBER | IMPORT_KIND | VISIBILITY => true,
+ | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+ | IMPORT_KIND | VISIBILITY | TRIVIA => true,
_ => false,
}
}
crates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
use logos::Logos;
use rowan::{TextRange, TextSize};
-use crate::SyntaxKind;
+use crate::{
+ string_block::{lex_str_block, StringBlockError},
+ SyntaxKind,
+};
pub struct Lexer<'a> {
inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
type Item = Lexeme<'a>;
fn next(&mut self) -> Option<Self::Item> {
- let kind = self.inner.next()?;
+ use SyntaxKind::*;
+
+ let mut kind = self.inner.next()?;
let text = self.inner.slice();
+ if kind == STRING_BLOCK {
+ // We use custom lexer, which skips enough bytes, but not returns error
+ // Instead we should call lexer again to verify if there is something wrong with string block
+ let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+ // In kinds, string blocks is parsed at least as `|||`
+ lexer.bump(3);
+ let res = lex_str_block(&mut lexer);
+ debug_assert!(lexer.next().is_none(), "str_block is lexed");
+ match res {
+ Ok(_) => {}
+ Err(e) => {
+ kind = match e {
+ StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+ StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+ StringBlockError::MissingTermination => {
+ ERROR_STRING_BLOCK_MISSING_TERMINATION
+ }
+ StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+ }
+ }
+ }
+ }
+
Some(Self::Item {
kind,
text,
crates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
mod ast;
mod binary;
-mod classify;
mod event;
mod generated;
mod language;
crates/jrsonnet-rowan-parser/src/marker.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
!kind.is_enum(),
"{kind:?} is a enum kind, you should use variant kinds instead"
);
- // TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+ // TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
// debug_assert!(
- // !kind.is_parser(),
- // "{kind:?} should be only emitted by parser, not used directly"
+ // !kind.is_lexer(),
+ // "{kind:?} should be only emitted by lexer, not used directly"
// );
let event_at_pos = &mut p.events[self.start_event_idx];
assert_eq!(*event_at_pos, Event::Pending);
crates/jrsonnet-rowan-parser/src/parser.rsdiffbeforeafterboth1use std::{cell::Cell, fmt::Display, rc::Rc};23use miette::{LabeledSpan, SourceOffset, SourceSpan};4use rowan::{GreenNode, TextRange, TextSize};56use crate::{7 binary::BinaryOperator,8 event::Event,9 lex::Lexeme,10 marker::{AsRange, CompletedMarker, Marker, Ranger},11 string_block::{lex_str_block, StringBlockError},12 token_set::SyntaxKindSet,13 unary::UnaryOperator,14 SyntaxKind,15 SyntaxKind::*,16 SyntaxNode, T, TS,17};1819pub struct Parse {20 pub green_node: GreenNode,21 pub errors: Vec<SyntaxError>,22}2324#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]25pub enum ExpectedSyntax {26 Named(&'static str),27 Unnamed(SyntaxKind),28}29impl Display for ExpectedSyntax {30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {31 match self {32 ExpectedSyntax::Named(n) => write!(f, "{}", n),33 ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),34 }35 }36}3738pub struct Parser<'i> {39 lexemes: &'i [Lexeme<'i>],40 pub offset: usize,41 pub events: Vec<Event>,42 pub entered: u32,43 pub hints: Vec<(u32, TextRange, String)>,44 pub last_error_token: usize,45 expected_syntax: Option<ExpectedSyntax>,46 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,47 steps: Cell<u64>,48}4950const DEFAULT_RECOVERY_SET: SyntaxKindSet = TS![];5152#[derive(Clone, Debug, PartialEq, Eq)]53pub enum SyntaxError {54 Unexpected {55 expected: ExpectedSyntax,56 found: SyntaxKind,57 range: TextRange,58 },59 Missing {60 expected: ExpectedSyntax,61 offset: TextSize,62 },63 Custom {64 error: String,65 range: TextRange,66 },67 Hint {68 error: String,69 range: TextRange,70 },71}7273impl From<SyntaxError> for LabeledSpan {74 fn from(val: SyntaxError) -> Self {75 match val {76 SyntaxError::Unexpected {77 expected,78 found,79 range,80 } => LabeledSpan::new_with_span(81 Some(format!("expected {}, found {:?}", expected, found)),82 SourceSpan::new(83 SourceOffset::from(usize::from(range.start())),84 SourceOffset::from(usize::from(range.end() - range.start())),85 ),86 ),87 SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(88 Some(format!("missing {}", expected)),89 SourceSpan::new(90 SourceOffset::from(usize::from(offset)),91 SourceOffset::from(0),92 ),93 ),94 SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {95 LabeledSpan::new_with_span(96 Some(error),97 SourceSpan::new(98 SourceOffset::from(usize::from(range.start())),99 SourceOffset::from(usize::from(range.end() - range.start())),100 ),101 )102 }103 }104 }105}106107impl<'i> Parser<'i> {108 pub fn new(lexemes: &'i [Lexeme<'i>]) -> Self {109 Self {110 lexemes,111 offset: 0,112 events: vec![],113 entered: 0,114 last_error_token: 0,115 hints: vec![],116 expected_syntax: None,117 expected_syntax_tracking_state: Rc::new(Cell::new(118 ExpectedSyntaxTrackingState::Unnamed,119 )),120 steps: Cell::new(0),121 }122 }123 pub fn clear_outdated_hints(&mut self) {124 let amount = self125 .hints126 .iter()127 .rev()128 .take_while(|h| h.0 > self.entered)129 .count();130 self.hints.truncate(self.hints.len() - amount)131 }132 fn clear_expected_syntaxes(&mut self) {133 self.expected_syntax = None;134 self.expected_syntax_tracking_state135 .set(ExpectedSyntaxTrackingState::Unnamed);136 }137 pub fn start(&mut self) -> Marker {138 self.skip_trivia();139 let start_event_idx = self.events.len();140 self.events.push(Event::Pending);141 self.entered += 1;142 Marker::new(start_event_idx)143 }144 pub fn start_ranger(&mut self) -> Ranger {145 self.skip_trivia();146 let pos = self.offset;147 Ranger { pos }148 }149 pub fn parse(mut self) -> Vec<Event> {150 let m = self.start();151 expr(&mut self);152 self.expect(EOF);153 m.complete(&mut self, SOURCE_FILE);154155 self.events156 }157158 pub(crate) fn expect(&mut self, kind: SyntaxKind) {159 self.expect_with_recovery_set(kind, TS![])160 }161162 pub(crate) fn expect_with_recovery_set(163 &mut self,164 kind: SyntaxKind,165 recovery_set: SyntaxKindSet,166 ) {167 if self.at(kind) {168 if kind != EOF {169 self.bump();170 }171 } else {172 self.error_with_recovery_set(recovery_set);173 }174 }175176 pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {177 if self.at(kind) {178 self.bump();179 } else {180 self.error_with_no_skip();181 }182 }183 fn current_token(&self) -> Lexeme<'i> {184 self.lexemes[self.offset]185 }186 fn previous_token(&mut self) -> Option<Lexeme<'i>> {187 if self.offset == 0 {188 return None;189 }190 let mut previous_token_idx = self.offset - 1;191 while self192 .lexemes193 .get(previous_token_idx)194 .map_or(false, |l| l.kind.is_trivia())195 && previous_token_idx != 0196 {197 previous_token_idx -= 1;198 }199200 Some(self.lexemes[previous_token_idx])201 }202 pub fn start_of_token(&self, mut idx: usize) -> TextSize {203 while self.lexemes[idx].kind.is_trivia() {204 idx += 1;205 }206 self.lexemes[idx].range.start()207 }208 pub fn end_of_token(&self, mut idx: usize) -> TextSize {209 while self.lexemes[idx].kind.is_trivia() {210 idx -= 1;211 }212 self.lexemes[idx].range.end()213 }214 pub(crate) fn custom_error(&mut self, marker: impl AsRange, error: impl AsRef<str>) {215 self.last_error_token = marker.end_token();216 self.events.push(Event::Error(SyntaxError::Custom {217 error: error.as_ref().to_string(),218 range: marker.as_range(self),219 }));220 }221 pub(crate) fn error_with_recovery_set(222 &mut self,223 recovery_set: SyntaxKindSet,224 ) -> Option<CompletedMarker> {225 self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))226 }227 pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {228 self.error_with_recovery_set_no_default(SyntaxKindSet::ALL)229 }230231 pub fn error_with_recovery_set_no_default(232 &mut self,233 recovery_set: SyntaxKindSet,234 ) -> Option<CompletedMarker> {235 let expected_syntax = self236 .expected_syntax237 .take()238 .unwrap_or(ExpectedSyntax::Named("unknown"));239 self.expected_syntax_tracking_state240 .set(ExpectedSyntaxTrackingState::Unnamed);241242 self.skip_trivia();243 if self.at_end() || self.at_ts(recovery_set) {244 let range = self245 .previous_token()246 .map(|t| t.range)247 .unwrap_or_else(|| TextRange::at(TextSize::from(0), TextSize::from(0)));248249 self.events.push(Event::Error(SyntaxError::Missing {250 expected: expected_syntax,251 offset: range.end(),252 }));253 return None;254 }255256 let current_token = self.current_token();257258 self.events.push(Event::Error(SyntaxError::Unexpected {259 expected: expected_syntax,260 found: current_token.kind,261 range: current_token.range,262 }));263 self.clear_expected_syntaxes();264 self.last_error_token = self.offset;265266 let m = self.start();267 self.bump();268 Some(m.complete(self, SyntaxKind::ERROR))269 }270271 fn bump(&mut self) {272 self.skip_trivia();273 self.bump_remap(self.current());274 }275 fn bump_remap(&mut self, kind: SyntaxKind) {276 self.skip_trivia();277 assert_ne!(self.offset, self.lexemes.len(), "already at end");278 self.events.push(Event::Token { kind });279 self.offset += 1;280 self.clear_expected_syntaxes();281 }282 fn step(&self) {283 use std::fmt::Write;284 let steps = self.steps.get();285 if steps >= 15000000 {286 let mut out = "seems like parsing is stuck".to_owned();287 {288 let last = 20;289 write!(out, "\n\nLast {} events:", last).unwrap();290 for (i, event) in self291 .events292 .iter()293 .skip(self.events.len().saturating_sub(last))294 .enumerate()295 {296 write!(out, "\n{i}. {event:?}").unwrap();297 }298 }299 {300 let next = 20;301 write!(out, "\n\nNext {next} tokens:").unwrap();302 for (i, tok) in self.lexemes.iter().skip(self.offset).take(next).enumerate() {303 write!(out, "\n{i}. {tok:?}").unwrap();304 }305 }306 panic!("{out}")307 }308 self.steps.set(steps + 1);309 }310 fn nth(&self, i: usize) -> SyntaxKind {311 self.step();312 let mut offset = self.offset;313 for _ in 0..i {314 while self315 .lexemes316 .get(offset)317 .map(|l| l.kind.is_trivia())318 .unwrap_or(false)319 {320 offset += 1;321 }322 offset += 1;323 }324 while self325 .lexemes326 .get(offset)327 .map(|l| l.kind.is_trivia())328 .unwrap_or(false)329 {330 offset += 1;331 }332 self.lexemes.get(offset).map(|l| l.kind).unwrap_or(EOF)333 }334 fn current(&self) -> SyntaxKind {335 self.nth(0)336 }337 fn skip_trivia(&mut self) {338 while self.peek_raw().is_trivia() {339 self.offset += 1;340 }341 }342 fn current_lexeme(&mut self) -> Option<&Lexeme> {343 self.skip_trivia();344 self.lexemes.get(self.offset)345 }346 fn peek_raw(&mut self) -> SyntaxKind {347 self.lexemes348 .get(self.offset)349 .map(|l| l.kind)350 .unwrap_or(SyntaxKind::EOF)351 }352 #[must_use]353 pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {354 self.expected_syntax_tracking_state355 .set(ExpectedSyntaxTrackingState::Named);356 self.expected_syntax = Some(ExpectedSyntax::Named(name));357358 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))359 }360 pub fn at(&mut self, kind: SyntaxKind) -> bool {361 self.nth_at(0, kind)362 }363 pub fn nth_at(&mut self, n: usize, kind: SyntaxKind) -> bool {364 if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {365 self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));366 }367 self.nth(n) == kind368 }369 pub fn at_ts(&mut self, set: SyntaxKindSet) -> bool {370 set.contains(self.current())371 }372 pub fn at_end(&mut self) -> bool {373 self.at(EOF)374 }375}376pub(crate) struct ExpectedSyntaxGuard {377 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,378}379380impl ExpectedSyntaxGuard {381 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {382 Self {383 expected_syntax_tracking_state,384 }385 }386}387388impl Drop for ExpectedSyntaxGuard {389 fn drop(&mut self) {390 self.expected_syntax_tracking_state391 .set(ExpectedSyntaxTrackingState::Unnamed);392 }393}394395#[derive(Debug, Clone, Copy)]396enum ExpectedSyntaxTrackingState {397 Named,398 Unnamed,399}400macro_rules! at_match {401 ($p:ident {402 $($r:expr => $e:expr,)*403 _ => $else:expr $(,)?404 }) => {{405 $(406 if $p.at($r) {$e} else407 )* {408 $else409 }410 }}411}412413fn expr(p: &mut Parser) -> Option<CompletedMarker> {414 expr_binding_power(p, 0)415}416fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {417 let mut lhs = lhs(p)?;418419 loop {420 let op = at_match!(p {421 T![*] => BinaryOperator::Mul,422 T![/] => BinaryOperator::Div,423 T![%] => BinaryOperator::Mod,424 T![+] => BinaryOperator::Plus,425 T![-] => BinaryOperator::Minus,426 T![<<] => BinaryOperator::ShiftLeft,427 T![>>] => BinaryOperator::ShiftRight,428 T![<] => BinaryOperator::LessThan,429 T![>] => BinaryOperator::GreaterThan,430 T![<=] => BinaryOperator::LessThanOrEqual,431 T![>=] => BinaryOperator::GreaterThanOrEqual,432 T![==] => BinaryOperator::Equal,433 T![!=] => BinaryOperator::NotEqual,434 T![&] => BinaryOperator::BitAnd,435 T![^] => BinaryOperator::BitXor,436 T![|] => BinaryOperator::BitOr,437 T![&&] => BinaryOperator::And,438 T![||] => BinaryOperator::Or,439 T![in] => BinaryOperator::In,440 T!['{'] => BinaryOperator::ObjectApply,441 _ => break,442 });443 let (left_binding_power, right_binding_power) = op.binding_power();444 if left_binding_power < minimum_binding_power {445 break;446 }447448 // Object apply is not a real operator, we dont have something to bump449 if op != BinaryOperator::ObjectApply {450 p.bump();451 }452453 let m = lhs.wrap(p, LHS_EXPR).precede(p);454 let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();455 lhs = m.complete(456 p,457 if op == BinaryOperator::ObjectApply {458 EXPR_OBJ_EXTEND459 } else {460 EXPR_BINARY461 },462 );463464 if !parsed_rhs {465 break;466 }467 }468 Some(lhs)469}470fn compspec(p: &mut Parser) {471 assert!(p.at(T![for]) || p.at(T![if]));472 if p.at(T![for]) {473 let m = p.start();474 p.bump();475 name(p);476 p.expect(T![in]);477 expr(p);478 m.complete(p, FOR_SPEC);479 } else if p.at(T![if]) {480 let m = p.start();481 p.bump();482 expr(p);483 m.complete(p, IF_SPEC);484 } else {485 unreachable!()486 }487}488fn comma(p: &mut Parser) -> bool {489 if p.at(T![,]) {490 p.bump();491 true492 } else {493 false494 }495}496fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {497 if p.at(T![,]) {498 p.bump();499 true500 } else if p.at_ts(set) {501 p.expect_with_no_skip(T![,]);502 p.bump();503 true504 } else {505 false506 }507}508fn field_name(p: &mut Parser) {509 let _e = p.expected_syntax_name("field name");510 let m = p.start();511 if p.at(T!['[']) {512 p.bump();513 expr(p);514 p.expect(T![']']);515 m.complete(p, FIELD_NAME_DYNAMIC);516 } else if p.at(IDENT) {517 name(p);518 m.complete(p, FIELD_NAME_FIXED);519 } else if p.current().is_string() {520 string(p);521 m.complete(p, FIELD_NAME_FIXED);522 } else {523 p.error_with_recovery_set(TS![;]);524 }525}526fn visibility(p: &mut Parser) {527 if p.at_ts(TS![: :: :::]) {528 p.bump()529 } else {530 p.error_with_recovery_set(TS![]);531 }532}533fn field(p: &mut Parser) {534 let m = p.start();535 field_name(p);536 let plus = if p.at(T![+]) {537 let r = p.start_ranger();538 p.bump();539 Some(r.finish(p))540 } else {541 None542 };543 let params = if p.at(T!['(']) {544 if let Some(plus) = plus {545 p.custom_error(plus, "can't extend with method");546 }547 params_desc(p);548 if p.at(T![+]) {549 let r = p.start_ranger();550 p.bump();551 p.custom_error(r.finish(p), "can't extend with method");552 }553 true554 } else {555 false556 };557 visibility(p);558 expr(p);559560 if params {561 m.complete(p, FIELD_METHOD)562 } else {563 m.complete(p, FIELD_NORMAL)564 };565}566fn assertion(p: &mut Parser) {567 assert!(p.at(T![assert]));568 let m = p.start();569 p.bump();570 expr(p).map(|c| c.wrap(p, LHS_EXPR));571 if p.at(T![:]) {572 p.bump();573 expr(p);574 }575 m.complete(p, ASSERTION);576}577fn object(p: &mut Parser) -> CompletedMarker {578 assert!(p.at(T!['{']));579 let m_t = p.start();580 let m = p.start();581 p.bump();582583 loop {584 if p.at(T!['}']) {585 p.bump();586 break;587 }588 let m = p.start();589 if p.at(T![local]) {590 obj_local(p);591 m.complete(p, MEMBER_BIND_STMT)592 } else if p.at(T![assert]) {593 assertion(p);594 m.complete(p, MEMBER_ASSERT_STMT)595 } else {596 field(p);597 while p.at(T![for]) || p.at(T![if]) {598 compspec(p)599 }600 m.complete(p, MEMBER_FIELD)601 };602 if comma_with_alternatives(p, SyntaxKindSet::new(&[T![=]])) {603 continue;604 }605 p.expect(R_BRACE);606 break;607 }608609 m.complete(p, OBJ_BODY_MEMBER_LIST);610 m_t.complete(p, EXPR_OBJECT)611}612fn param(p: &mut Parser) {613 let m = p.start();614 destruct(p);615 if p.at(T![=]) {616 p.bump();617 expr(p);618 }619 m.complete(p, PARAM);620}621fn params_desc(p: &mut Parser) -> CompletedMarker {622 assert!(p.at(T!['(']));623 let m = p.start();624 p.bump();625626 loop {627 if p.at(T![')']) {628 p.bump();629 break;630 }631 param(p);632 if comma(p) {633 continue;634 }635 p.expect(T![')']);636 break;637 }638639 m.complete(p, PARAMS_DESC)640}641fn args_desc(p: &mut Parser) {642 let m = p.start();643 assert!(p.at(T!['(']));644 p.bump();645646 let started_named = Cell::new(false);647648 loop {649 if p.at(T![')']) {650 break;651 }652653 let m = p.start();654 if p.at(IDENT) && p.nth_at(1, T![=]) {655 name(p);656 p.bump();657 expr(p);658 m.complete(p, ARG);659 started_named.set(true);660 } else {661 expr(p);662 m.complete(p, ARG);663 }664 if comma(p) {665 continue;666 }667 break;668 }669 p.expect(T![')']);670 if p.at(T![tailstrict]) {671 p.bump()672 }673 m.complete(p, ARGS_DESC);674}675676fn array(p: &mut Parser) -> CompletedMarker {677 assert!(p.at(T!['[']));678 // Start the list node679 let m = p.start();680 p.bump(); // '['681682 // This vec will have at most one element in case of correct input683 let mut compspecs = Vec::with_capacity(1);684 let mut elems = 0;685686 loop {687 if p.at(T![']']) {688 p.bump();689 break;690 }691 elems += 1;692 expr(p);693 let c = p.start_ranger();694 let mut had_spec = false;695 while p.at(T![for]) || p.at(T![if]) {696 had_spec = true;697 compspec(p)698 }699 if had_spec {700 compspecs.push(c.finish(p));701 }702 if comma(p) {703 continue;704 }705 p.expect(T![']']);706 break;707 }708709 if elems > 1 && !compspecs.is_empty() {710 for spec in compspecs {711 p.custom_error(712 spec,713 "compspec may only be used if there is only one array element",714 )715 }716717 m.complete(p, EXPR_ARRAY)718 } else if !compspecs.is_empty() {719 m.complete(p, EXPR_ARRAY_COMP)720 } else {721 m.complete(p, EXPR_ARRAY)722 }723}724/// Returns true if it was slice, false if just index725#[must_use]726fn slice_desc_or_index(p: &mut Parser) -> bool {727 let m = p.start();728 p.bump();729 // TODO: do not treat :, ::, ::: as full tokens?730 // Start731 if !p.at(T![:]) && !p.at(T![::]) {732 expr(p);733 }734 if p.at(T![:]) {735 p.bump();736 // End737 if !p.at(T![']']) {738 expr(p).map(|c| c.wrap(p, SLICE_DESC_END));739 }740 if p.at(T![:]) {741 p.bump();742 // Step743 if !p.at(T![']']) {744 expr(p).map(|c| c.wrap(p, SLICE_DESC_STEP));745 }746 }747 } else if p.at(T![::]) {748 p.bump();749 // End750 if !p.at(T![']']) {751 expr(p).map(|c| c.wrap(p, SLICE_DESC_END));752 }753 } else {754 // It was not a slice755 p.expect(T![']']);756 m.forget(p);757 return false;758 }759 p.expect(T![']']);760 m.complete(p, SLICE_DESC);761 true762}763fn lhs(p: &mut Parser) -> Option<CompletedMarker> {764 let mut lhs = lhs_basic(p)?;765766 loop {767 if p.at(T![.]) {768 let m = lhs.precede(p);769 p.bump();770 name(p);771 lhs = m.complete(p, EXPR_INDEX);772 } else if p.at(T!['[']) {773 if slice_desc_or_index(p) {774 lhs = lhs.precede(p).complete(p, EXPR_SLICE);775 } else {776 lhs = lhs777 .wrap(p, LHS_EXPR)778 .precede(p)779 .complete(p, EXPR_INDEX_EXPR);780 }781 } else if p.at(T!['(']) {782 let m = lhs.precede(p);783 args_desc(p);784 lhs = m.complete(p, EXPR_APPLY);785 } else {786 break;787 }788 }789790 Some(lhs)791}792fn name(p: &mut Parser) {793 let m = p.start();794 p.expect(IDENT);795 m.complete(p, NAME);796}797fn destruct_rest(p: &mut Parser) {798 assert!(p.at(T![...]));799 p.bump();800 let m = p.start();801 if p.at(IDENT) {802 p.bump()803 }804 m.complete(p, DESTRUCT_REST);805}806fn destruct_object_field(p: &mut Parser) {807 let m = p.start();808 name(p);809 if p.at(T![:]) {810 p.bump();811 destruct(p);812 };813 if p.at(T![=]) {814 p.bump();815 expr(p);816 }817 m.complete(p, DESTRUCT_OBJECT_FIELD);818}819fn obj_local(p: &mut Parser) {820 assert!(p.at(T![local]));821 let m = p.start();822 p.bump();823 bind(p);824 m.complete(p, OBJ_LOCAL);825}826fn destruct(p: &mut Parser) -> CompletedMarker {827 let m = p.start();828 if p.at(T![?]) {829 p.bump();830 m.complete(p, DESTRUCT_SKIP)831 } else if p.at(T!['[']) {832 p.bump();833 let mut had_rest = false;834 loop {835 if p.at(T![']']) {836 p.bump();837 break;838 } else if p.at(T![...]) {839 let m_err = p.start_ranger();840 destruct_rest(p);841 if had_rest {842 p.custom_error(m_err.finish(p), "only one rest can be present in array");843 }844 had_rest = true;845 } else {846 destruct(p);847 }848 if p.at(T![,]) {849 p.bump();850 continue;851 }852 p.expect(T![']']);853 break;854 }855 m.complete(p, DESTRUCT_ARRAY)856 } else if p.at(T!['{']) {857 p.bump();858 let mut had_rest = false;859 loop {860 if p.at(T!['}']) {861 p.bump();862 break;863 } else if p.at(T![...]) {864 let m_err = p.start_ranger();865 destruct_rest(p);866 if had_rest {867 p.custom_error(m_err.finish(p), "only one rest can be present in object");868 }869 had_rest = true;870 } else {871 if had_rest {872 p.error_with_recovery_set(TS![]);873 }874 destruct_object_field(p);875 }876 if p.at(T![,]) {877 p.bump();878 continue;879 }880 p.expect(T!['}']);881 break;882 }883 m.complete(p, DESTRUCT_OBJECT)884 } else if p.at(IDENT) {885 name(p);886 m.complete(p, DESTRUCT_FULL)887 } else {888 m.complete(p, ERROR)889 }890}891fn bind(p: &mut Parser) {892 let m = p.start();893 if p.at(IDENT) && p.nth_at(1, T!['(']) {894 name(p);895 params_desc(p);896 p.expect(T![=]);897 expr(p);898 m.complete(p, BIND_FUNCTION)899 } else {900 destruct(p);901 p.expect(T![=]);902 expr(p);903 m.complete(p, BIND_DESTRUCT)904 };905}906fn string(p: &mut Parser) {907 assert!(p.current().is_string());908 if p.at(STRING_BLOCK) {909 // We use custom lexer, which skips enough bytes, but not returns error910 // Instead we should call lexer again to verify if there is something wrong with string block911 let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(912 &p.current_lexeme().expect("parser is at string block").text913 ));914 // In kinds, string blocks is parsed at least as `|||`915 lexer.bump(3);916 let res = lex_str_block(&mut lexer);917 debug_assert!(lexer.next().is_none(), "str_block is lexed");918 match res {919 Ok(_) => {920 p.bump();921 }922 Err(e) => p.bump_remap(match e {923 StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,924 StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,925 StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,926 StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,927 }),928 }929 } else {930 p.bump();931 }932}933fn number(p: &mut Parser) {934 assert!(p.current().is_number());935 p.bump();936}937fn literal(p: &mut Parser) {938 assert!(p.current().is_literal());939 p.bump();940}941fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {942 let _e = p.expected_syntax_name("value");943 Some(if p.current().is_literal() {944 let m = p.start();945 literal(p);946 m.complete(p, EXPR_LITERAL)947 } else if p.current().is_string() {948 let m = p.start();949 string(p);950 m.complete(p, EXPR_STRING)951 } else if p.current().is_number() {952 let m = p.start();953 number(p);954 m.complete(p, EXPR_NUMBER)955 } else if p.at(IDENT) {956 let m = p.start();957 name(p);958 m.complete(p, EXPR_VAR)959 } else if p.at(INTRINSIC_THIS_FILE) {960 let m = p.start();961 p.bump();962 m.complete(p, EXPR_INTRINSIC_THIS_FILE)963 } else if p.at(INTRINSIC_ID) {964 let m = p.start();965 p.bump();966 m.complete(p, EXPR_INTRINSIC_ID)967 } else if p.at(INTRINSIC) {968 let m = p.start();969 p.bump();970 p.expect(T!['(']);971 name(p);972 p.expect(T![')']);973 m.complete(p, EXPR_INTRINSIC)974 } else if p.at(T![if]) {975 let m = p.start();976 p.bump();977 expr(p);978 p.expect(T![then]);979 expr(p).map(|c| c.wrap(p, TRUE_EXPR));980 if p.at(T![else]) {981 p.bump();982 expr(p).map(|c| c.wrap(p, FALSE_EXPR));983 }984 m.complete(p, EXPR_IF_THEN_ELSE)985 } else if p.at(T!['[']) {986 array(p)987 } else if p.at(T!['{']) {988 object(p)989 } else if p.at(T![local]) {990 let m = p.start();991 p.bump();992 loop {993 if p.at(T![;]) {994 p.bump();995 break;996 }997 bind(p);998999 if p.at(T![,]) {1000 p.bump();1001 continue;1002 }1003 p.expect(T![;]);1004 break;1005 }1006 expr(p);1007 m.complete(p, EXPR_LOCAL)1008 } else if p.at(T![function]) {1009 let m = p.start();1010 p.bump();1011 params_desc(p);1012 expr(p);1013 m.complete(p, EXPR_FUNCTION)1014 } else if p.at(T![error]) {1015 let m = p.start();1016 p.bump();1017 expr(p);1018 m.complete(p, EXPR_ERROR)1019 } else if p.at(T![assert]) {1020 let m = p.start();1021 assertion(p);1022 p.expect(T![;]);1023 expr(p);1024 m.complete(p, EXPR_ASSERT)1025 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {1026 let m = p.start();1027 p.bump();1028 string(p);1029 m.complete(p, EXPR_IMPORT)1030 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {1031 let op = match p.current() {1032 T![-] => UnaryOperator::Minus,1033 T![!] => UnaryOperator::Not,1034 T![~] => UnaryOperator::BitNegate,1035 _ => unreachable!(),1036 };1037 let ((), right_binding_power) = op.binding_power();10381039 let m = p.start();1040 p.bump();1041 expr_binding_power(p, right_binding_power);1042 m.complete(p, EXPR_UNARY)1043 } else if p.at(T!['(']) {1044 let m = p.start();1045 p.bump();1046 expr(p);1047 assert!(p.at(T![')']));1048 p.bump();1049 m.complete(p, EXPR_PARENED)1050 } else {1051 p.error_with_recovery_set(TS![]);1052 return None;1053 })1054}10551056impl Parse {1057 pub fn syntax(&self) -> SyntaxNode {1058 SyntaxNode::new_root(self.green_node.clone())1059 }1060}1use std::{cell::Cell, fmt::Display, rc::Rc};23use miette::{LabeledSpan, SourceOffset, SourceSpan};4use rowan::{GreenNode, TextRange, TextSize};56use crate::{7 binary::BinaryOperator,8 event::Event,9 lex::Lexeme,10 marker::{AsRange, CompletedMarker, Marker, Ranger},11 nodes::{Literal, Number, Text, Trivia},12 token_set::SyntaxKindSet,13 unary::UnaryOperator,14 AstToken, SyntaxKind,15 SyntaxKind::*,16 SyntaxNode, T, TS,17};1819pub struct Parse {20 pub green_node: GreenNode,21 pub errors: Vec<SyntaxError>,22}2324#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]25pub enum ExpectedSyntax {26 Named(&'static str),27 Unnamed(SyntaxKind),28}29impl Display for ExpectedSyntax {30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {31 match self {32 ExpectedSyntax::Named(n) => write!(f, "{}", n),33 ExpectedSyntax::Unnamed(u) => write!(f, "{:?}", u),34 }35 }36}3738pub struct Parser<'i> {39 // TODO: remove all trivia before feeding to parser?40 lexemes: &'i [Lexeme<'i>],41 pub offset: usize,42 pub events: Vec<Event>,43 pub entered: u32,44 pub hints: Vec<(u32, TextRange, String)>,45 pub last_error_token: usize,46 expected_syntax: Option<ExpectedSyntax>,47 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,48 steps: Cell<u64>,49}5051const DEFAULT_RECOVERY_SET: SyntaxKindSet = TS![];5253#[derive(Clone, Debug, PartialEq, Eq)]54pub enum SyntaxError {55 Unexpected {56 expected: ExpectedSyntax,57 found: SyntaxKind,58 range: TextRange,59 },60 Missing {61 expected: ExpectedSyntax,62 offset: TextSize,63 },64 Custom {65 error: String,66 range: TextRange,67 },68 Hint {69 error: String,70 range: TextRange,71 },72}7374impl From<SyntaxError> for LabeledSpan {75 fn from(val: SyntaxError) -> Self {76 match val {77 SyntaxError::Unexpected {78 expected,79 found,80 range,81 } => LabeledSpan::new_with_span(82 Some(format!("expected {}, found {:?}", expected, found)),83 SourceSpan::new(84 SourceOffset::from(usize::from(range.start())),85 SourceOffset::from(usize::from(range.end() - range.start())),86 ),87 ),88 SyntaxError::Missing { expected, offset } => LabeledSpan::new_with_span(89 Some(format!("missing {}", expected)),90 SourceSpan::new(91 SourceOffset::from(usize::from(offset)),92 SourceOffset::from(0),93 ),94 ),95 SyntaxError::Custom { error, range } | SyntaxError::Hint { error, range } => {96 LabeledSpan::new_with_span(97 Some(error),98 SourceSpan::new(99 SourceOffset::from(usize::from(range.start())),100 SourceOffset::from(usize::from(range.end() - range.start())),101 ),102 )103 }104 }105 }106}107108impl<'i> Parser<'i> {109 pub fn new(lexemes: &'i [Lexeme<'i>]) -> Self {110 Self {111 lexemes,112 offset: 0,113 events: vec![],114 entered: 0,115 last_error_token: 0,116 hints: vec![],117 expected_syntax: None,118 expected_syntax_tracking_state: Rc::new(Cell::new(119 ExpectedSyntaxTrackingState::Unnamed,120 )),121 steps: Cell::new(0),122 }123 }124 pub fn clear_outdated_hints(&mut self) {125 let amount = self126 .hints127 .iter()128 .rev()129 .take_while(|h| h.0 > self.entered)130 .count();131 self.hints.truncate(self.hints.len() - amount)132 }133 fn clear_expected_syntaxes(&mut self) {134 self.expected_syntax = None;135 self.expected_syntax_tracking_state136 .set(ExpectedSyntaxTrackingState::Unnamed);137 }138 pub fn start(&mut self) -> Marker {139 self.skip_trivia();140 let start_event_idx = self.events.len();141 self.events.push(Event::Pending);142 self.entered += 1;143 Marker::new(start_event_idx)144 }145 pub fn start_ranger(&mut self) -> Ranger {146 self.skip_trivia();147 let pos = self.offset;148 Ranger { pos }149 }150 pub fn parse(mut self) -> Vec<Event> {151 let m = self.start();152 expr(&mut self);153 self.expect(EOF);154 m.complete(&mut self, SOURCE_FILE);155156 self.events157 }158159 pub(crate) fn expect(&mut self, kind: SyntaxKind) {160 self.expect_with_recovery_set(kind, TS![])161 }162163 pub(crate) fn expect_with_recovery_set(164 &mut self,165 kind: SyntaxKind,166 recovery_set: SyntaxKindSet,167 ) {168 if self.at(kind) {169 if kind != EOF {170 self.bump();171 }172 } else {173 self.error_with_recovery_set(recovery_set);174 }175 }176177 pub(crate) fn expect_with_no_skip(&mut self, kind: SyntaxKind) {178 if self.at(kind) {179 self.bump();180 } else {181 self.error_with_no_skip();182 }183 }184 fn current_token(&self) -> Lexeme<'i> {185 self.lexemes[self.offset]186 }187 fn previous_token(&mut self) -> Option<Lexeme<'i>> {188 if self.offset == 0 {189 return None;190 }191 let mut previous_token_idx = self.offset - 1;192 while self193 .lexemes194 .get(previous_token_idx)195 .map_or(false, |l| Trivia::can_cast(l.kind))196 && previous_token_idx != 0197 {198 previous_token_idx -= 1;199 }200201 Some(self.lexemes[previous_token_idx])202 }203 pub fn start_of_token(&self, mut idx: usize) -> TextSize {204 while Trivia::can_cast(self.lexemes[idx].kind) {205 idx += 1;206 }207 self.lexemes[idx].range.start()208 }209 pub fn end_of_token(&self, mut idx: usize) -> TextSize {210 while Trivia::can_cast(self.lexemes[idx].kind) {211 idx -= 1;212 }213 self.lexemes[idx].range.end()214 }215 pub(crate) fn custom_error(&mut self, marker: impl AsRange, error: impl AsRef<str>) {216 self.last_error_token = marker.end_token();217 self.events.push(Event::Error(SyntaxError::Custom {218 error: error.as_ref().to_string(),219 range: marker.as_range(self),220 }));221 }222 pub(crate) fn error_with_recovery_set(223 &mut self,224 recovery_set: SyntaxKindSet,225 ) -> Option<CompletedMarker> {226 self.error_with_recovery_set_no_default(recovery_set.union(DEFAULT_RECOVERY_SET))227 }228 pub fn error_with_no_skip(&mut self) -> Option<CompletedMarker> {229 self.error_with_recovery_set_no_default(SyntaxKindSet::ALL)230 }231232 pub fn error_with_recovery_set_no_default(233 &mut self,234 recovery_set: SyntaxKindSet,235 ) -> Option<CompletedMarker> {236 let expected_syntax = self237 .expected_syntax238 .take()239 .unwrap_or(ExpectedSyntax::Named("unknown"));240 self.expected_syntax_tracking_state241 .set(ExpectedSyntaxTrackingState::Unnamed);242243 self.skip_trivia();244 if self.at_end() || self.at_ts(recovery_set) {245 let range = self246 .previous_token()247 .map(|t| t.range)248 .unwrap_or_else(|| TextRange::at(TextSize::from(0), TextSize::from(0)));249250 self.events.push(Event::Error(SyntaxError::Missing {251 expected: expected_syntax,252 offset: range.end(),253 }));254 return None;255 }256257 let current_token = self.current_token();258259 self.events.push(Event::Error(SyntaxError::Unexpected {260 expected: expected_syntax,261 found: current_token.kind,262 range: current_token.range,263 }));264 self.clear_expected_syntaxes();265 self.last_error_token = self.offset;266267 let m = self.start();268 self.bump();269 Some(m.complete(self, SyntaxKind::ERROR))270 }271 fn bump_assert(&mut self, kind: SyntaxKind) {272 self.skip_trivia();273 assert!(self.at(kind), "expected {:?}", kind);274 self.bump_remap(self.current());275 }276 fn bump(&mut self) {277 self.skip_trivia();278 self.bump_remap(self.current());279 }280 fn bump_remap(&mut self, kind: SyntaxKind) {281 self.skip_trivia();282 assert_ne!(self.offset, self.lexemes.len(), "already at end");283 self.events.push(Event::Token { kind });284 self.offset += 1;285 self.clear_expected_syntaxes();286 }287 fn step(&self) {288 use std::fmt::Write;289 let steps = self.steps.get();290 if steps >= 15000000 {291 let mut out = "seems like parsing is stuck".to_owned();292 {293 let last = 20;294 write!(out, "\n\nLast {} events:", last).unwrap();295 for (i, event) in self296 .events297 .iter()298 .skip(self.events.len().saturating_sub(last))299 .enumerate()300 {301 write!(out, "\n{i}. {event:?}").unwrap();302 }303 }304 {305 let next = 20;306 write!(out, "\n\nNext {next} tokens:").unwrap();307 for (i, tok) in self.lexemes.iter().skip(self.offset).take(next).enumerate() {308 write!(out, "\n{i}. {tok:?}").unwrap();309 }310 }311 panic!("{out}")312 }313 self.steps.set(steps + 1);314 }315 fn nth(&self, i: usize) -> SyntaxKind {316 self.step();317 let mut offset = self.offset;318 for _ in 0..i {319 while self320 .lexemes321 .get(offset)322 .map(|l| Trivia::can_cast(l.kind))323 .unwrap_or(false)324 {325 offset += 1;326 }327 offset += 1;328 }329 while self330 .lexemes331 .get(offset)332 .map(|l| Trivia::can_cast(l.kind))333 .unwrap_or(false)334 {335 offset += 1;336 }337 self.lexemes.get(offset).map(|l| l.kind).unwrap_or(EOF)338 }339 fn current(&self) -> SyntaxKind {340 self.nth(0)341 }342 fn skip_trivia(&mut self) {343 while Trivia::can_cast(self.peek_raw()) {344 self.offset += 1;345 }346 }347 fn peek_raw(&mut self) -> SyntaxKind {348 self.lexemes349 .get(self.offset)350 .map(|l| l.kind)351 .unwrap_or(SyntaxKind::EOF)352 }353 #[must_use]354 pub(crate) fn expected_syntax_name(&mut self, name: &'static str) -> ExpectedSyntaxGuard {355 self.expected_syntax_tracking_state356 .set(ExpectedSyntaxTrackingState::Named);357 self.expected_syntax = Some(ExpectedSyntax::Named(name));358359 ExpectedSyntaxGuard::new(Rc::clone(&self.expected_syntax_tracking_state))360 }361 pub fn at(&mut self, kind: SyntaxKind) -> bool {362 self.nth_at(0, kind)363 }364 pub fn nth_at(&mut self, n: usize, kind: SyntaxKind) -> bool {365 if let ExpectedSyntaxTrackingState::Unnamed = self.expected_syntax_tracking_state.get() {366 self.expected_syntax = Some(ExpectedSyntax::Unnamed(kind));367 }368 self.nth(n) == kind369 }370 pub fn at_ts(&mut self, set: SyntaxKindSet) -> bool {371 set.contains(self.current())372 }373 pub fn at_end(&mut self) -> bool {374 self.at(EOF)375 }376}377pub(crate) struct ExpectedSyntaxGuard {378 expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>,379}380381impl ExpectedSyntaxGuard {382 fn new(expected_syntax_tracking_state: Rc<Cell<ExpectedSyntaxTrackingState>>) -> Self {383 Self {384 expected_syntax_tracking_state,385 }386 }387}388389impl Drop for ExpectedSyntaxGuard {390 fn drop(&mut self) {391 self.expected_syntax_tracking_state392 .set(ExpectedSyntaxTrackingState::Unnamed);393 }394}395396#[derive(Debug, Clone, Copy)]397enum ExpectedSyntaxTrackingState {398 Named,399 Unnamed,400}401macro_rules! at_match {402 ($p:ident {403 $($r:expr => $e:expr,)*404 _ => $else:expr $(,)?405 }) => {{406 $(407 if $p.at($r) {$e} else408 )* {409 $else410 }411 }}412}413414fn expr(p: &mut Parser) -> Option<CompletedMarker> {415 expr_binding_power(p, 0)416}417fn expr_binding_power(p: &mut Parser, minimum_binding_power: u8) -> Option<CompletedMarker> {418 let mut lhs = lhs(p)?;419420 loop {421 let op = at_match!(p {422 T![*] => BinaryOperator::Mul,423 T![/] => BinaryOperator::Div,424 T![%] => BinaryOperator::Mod,425 T![+] => BinaryOperator::Plus,426 T![-] => BinaryOperator::Minus,427 T![<<] => BinaryOperator::ShiftLeft,428 T![>>] => BinaryOperator::ShiftRight,429 T![<] => BinaryOperator::LessThan,430 T![>] => BinaryOperator::GreaterThan,431 T![<=] => BinaryOperator::LessThanOrEqual,432 T![>=] => BinaryOperator::GreaterThanOrEqual,433 T![==] => BinaryOperator::Equal,434 T![!=] => BinaryOperator::NotEqual,435 T![&] => BinaryOperator::BitAnd,436 T![^] => BinaryOperator::BitXor,437 T![|] => BinaryOperator::BitOr,438 T![&&] => BinaryOperator::And,439 T![||] => BinaryOperator::Or,440 T![in] => BinaryOperator::In,441 T!['{'] => BinaryOperator::ObjectApply,442 _ => break,443 });444 let (left_binding_power, right_binding_power) = op.binding_power();445 if left_binding_power < minimum_binding_power {446 break;447 }448449 // Object apply is not a real operator, we dont have something to bump450 if op != BinaryOperator::ObjectApply {451 p.bump();452 }453454 let m = lhs.wrap(p, LHS_EXPR).precede(p);455 let parsed_rhs = expr_binding_power(p, right_binding_power).is_some();456 lhs = m.complete(457 p,458 if op == BinaryOperator::ObjectApply {459 EXPR_OBJ_EXTEND460 } else {461 EXPR_BINARY462 },463 );464465 if !parsed_rhs {466 break;467 }468 }469 Some(lhs)470}471fn compspec(p: &mut Parser) {472 assert!(p.at(T![for]) || p.at(T![if]));473 if p.at(T![for]) {474 let m = p.start();475 p.bump();476 name(p);477 p.expect(T![in]);478 expr(p);479 m.complete(p, FOR_SPEC);480 } else if p.at(T![if]) {481 let m = p.start();482 p.bump();483 expr(p);484 m.complete(p, IF_SPEC);485 } else {486 unreachable!()487 }488}489fn comma(p: &mut Parser) -> bool {490 if p.at(T![,]) {491 p.bump();492 true493 } else {494 false495 }496}497fn comma_with_alternatives(p: &mut Parser, set: SyntaxKindSet) -> bool {498 if p.at(T![,]) {499 p.bump();500 true501 } else if p.at_ts(set) {502 p.expect_with_no_skip(T![,]);503 p.bump();504 true505 } else {506 false507 }508}509fn field_name(p: &mut Parser) {510 let _e = p.expected_syntax_name("field name");511 let m = p.start();512 if p.at(T!['[']) {513 p.bump();514 expr(p);515 p.expect(T![']']);516 m.complete(p, FIELD_NAME_DYNAMIC);517 } else if p.at(IDENT) {518 name(p);519 m.complete(p, FIELD_NAME_FIXED);520 } else if Text::can_cast(p.current()) {521 text(p);522 m.complete(p, FIELD_NAME_FIXED);523 } else {524 p.error_with_recovery_set(TS![;]);525 }526}527fn visibility(p: &mut Parser) {528 if p.at_ts(TS![: :: :::]) {529 p.bump()530 } else {531 p.error_with_recovery_set(TS![]);532 }533}534fn field(p: &mut Parser) {535 let m = p.start();536 field_name(p);537 let plus = if p.at(T![+]) {538 let r = p.start_ranger();539 p.bump();540 Some(r.finish(p))541 } else {542 None543 };544 let params = if p.at(T!['(']) {545 if let Some(plus) = plus {546 p.custom_error(plus, "can't extend with method");547 }548 params_desc(p);549 if p.at(T![+]) {550 let r = p.start_ranger();551 p.bump();552 p.custom_error(r.finish(p), "can't extend with method");553 }554 true555 } else {556 false557 };558 visibility(p);559 expr(p);560561 if params {562 m.complete(p, FIELD_METHOD)563 } else {564 m.complete(p, FIELD_NORMAL)565 };566}567fn assertion(p: &mut Parser) {568 let m = p.start();569 p.bump_assert(T![assert]);570 expr(p).map(|c| c.wrap(p, LHS_EXPR));571 if p.at(T![:]) {572 p.bump();573 expr(p);574 }575 m.complete(p, ASSERTION);576}577fn object(p: &mut Parser) -> CompletedMarker {578 let m_t = p.start();579 let m = p.start();580 p.bump_assert(T!['{']);581582 loop {583 if p.at(T!['}']) {584 p.bump();585 break;586 }587 let m = p.start();588 if p.at(T![local]) {589 obj_local(p);590 m.complete(p, MEMBER_BIND_STMT)591 } else if p.at(T![assert]) {592 assertion(p);593 m.complete(p, MEMBER_ASSERT_STMT)594 } else {595 field(p);596 while p.at(T![for]) || p.at(T![if]) {597 compspec(p)598 }599 m.complete(p, MEMBER_FIELD)600 };601 if comma_with_alternatives(p, SyntaxKindSet::new(&[T![=]])) {602 continue;603 }604 p.expect(R_BRACE);605 break;606 }607608 m.complete(p, OBJ_BODY_MEMBER_LIST);609 m_t.complete(p, EXPR_OBJECT)610}611fn param(p: &mut Parser) {612 let m = p.start();613 destruct(p);614 if p.at(T![=]) {615 p.bump();616 expr(p);617 }618 m.complete(p, PARAM);619}620fn params_desc(p: &mut Parser) -> CompletedMarker {621 let m = p.start();622 p.bump_assert(T!['(']);623624 loop {625 if p.at(T![')']) {626 p.bump();627 break;628 }629 param(p);630 if comma(p) {631 continue;632 }633 p.expect(T![')']);634 break;635 }636637 m.complete(p, PARAMS_DESC)638}639fn args_desc(p: &mut Parser) {640 let m = p.start();641 p.bump_assert(T!['(']);642643 let started_named = Cell::new(false);644645 loop {646 if p.at(T![')']) {647 break;648 }649650 let m = p.start();651 if p.at(IDENT) && p.nth_at(1, T![=]) {652 name(p);653 p.bump();654 expr(p);655 m.complete(p, ARG);656 started_named.set(true);657 } else {658 expr(p);659 m.complete(p, ARG);660 }661 if comma(p) {662 continue;663 }664 break;665 }666 p.expect(T![')']);667 if p.at(T![tailstrict]) {668 p.bump()669 }670 m.complete(p, ARGS_DESC);671}672673fn array(p: &mut Parser) -> CompletedMarker {674 // Start the list node675 let m = p.start();676 p.bump_assert(T!['[']);677678 // This vec will have at most one element in case of correct input679 let mut compspecs = Vec::with_capacity(1);680 let mut elems = 0;681682 loop {683 if p.at(T![']']) {684 p.bump();685 break;686 }687 elems += 1;688 expr(p);689 let c = p.start_ranger();690 let mut had_spec = false;691 while p.at(T![for]) || p.at(T![if]) {692 had_spec = true;693 compspec(p)694 }695 if had_spec {696 compspecs.push(c.finish(p));697 }698 if comma(p) {699 continue;700 }701 p.expect(T![']']);702 break;703 }704705 if elems > 1 && !compspecs.is_empty() {706 for spec in compspecs {707 p.custom_error(708 spec,709 "compspec may only be used if there is only one array element",710 )711 }712713 m.complete(p, EXPR_ARRAY)714 } else if !compspecs.is_empty() {715 m.complete(p, EXPR_ARRAY_COMP)716 } else {717 m.complete(p, EXPR_ARRAY)718 }719}720/// Returns true if it was slice, false if just index721#[must_use]722fn slice_desc_or_index(p: &mut Parser) -> bool {723 let m = p.start();724 p.bump();725 // TODO: do not treat :, ::, ::: as full tokens?726 // Start727 if !p.at(T![:]) && !p.at(T![::]) {728 expr(p);729 }730 if p.at(T![:]) {731 p.bump();732 // End733 if !p.at(T![']']) {734 expr(p).map(|c| c.wrap(p, SLICE_DESC_END));735 }736 if p.at(T![:]) {737 p.bump();738 // Step739 if !p.at(T![']']) {740 expr(p).map(|c| c.wrap(p, SLICE_DESC_STEP));741 }742 }743 } else if p.at(T![::]) {744 p.bump();745 // End746 if !p.at(T![']']) {747 expr(p).map(|c| c.wrap(p, SLICE_DESC_END));748 }749 } else {750 // It was not a slice751 p.expect(T![']']);752 m.forget(p);753 return false;754 }755 p.expect(T![']']);756 m.complete(p, SLICE_DESC);757 true758}759fn lhs(p: &mut Parser) -> Option<CompletedMarker> {760 let mut lhs = lhs_basic(p)?;761762 loop {763 if p.at(T![.]) {764 let m = lhs.precede(p);765 p.bump();766 name(p);767 lhs = m.complete(p, EXPR_INDEX);768 } else if p.at(T!['[']) {769 if slice_desc_or_index(p) {770 lhs = lhs.precede(p).complete(p, EXPR_SLICE);771 } else {772 lhs = lhs773 .wrap(p, LHS_EXPR)774 .precede(p)775 .complete(p, EXPR_INDEX_EXPR);776 }777 } else if p.at(T!['(']) {778 let m = lhs.precede(p);779 args_desc(p);780 lhs = m.complete(p, EXPR_APPLY);781 } else {782 break;783 }784 }785786 Some(lhs)787}788fn name(p: &mut Parser) {789 let m = p.start();790 p.expect(IDENT);791 m.complete(p, NAME);792}793fn destruct_rest(p: &mut Parser) {794 let m = p.start();795 p.bump_assert(T![...]);796 if p.at(IDENT) {797 p.bump()798 }799 m.complete(p, DESTRUCT_REST);800}801fn destruct_object_field(p: &mut Parser) {802 let m = p.start();803 name(p);804 if p.at(T![:]) {805 p.bump();806 destruct(p);807 };808 if p.at(T![=]) {809 p.bump();810 expr(p);811 }812 m.complete(p, DESTRUCT_OBJECT_FIELD);813}814fn obj_local(p: &mut Parser) {815 let m = p.start();816 p.bump_assert(T![local]);817 bind(p);818 m.complete(p, OBJ_LOCAL);819}820fn destruct(p: &mut Parser) -> CompletedMarker {821 let m = p.start();822 if p.at(T![?]) {823 p.bump();824 m.complete(p, DESTRUCT_SKIP)825 } else if p.at(T!['[']) {826 p.bump();827 let mut had_rest = false;828 loop {829 if p.at(T![']']) {830 p.bump();831 break;832 } else if p.at(T![...]) {833 let m_err = p.start_ranger();834 destruct_rest(p);835 if had_rest {836 p.custom_error(m_err.finish(p), "only one rest can be present in array");837 }838 had_rest = true;839 } else {840 destruct(p);841 }842 if p.at(T![,]) {843 p.bump();844 continue;845 }846 p.expect(T![']']);847 break;848 }849 m.complete(p, DESTRUCT_ARRAY)850 } else if p.at(T!['{']) {851 p.bump();852 let mut had_rest = false;853 loop {854 if p.at(T!['}']) {855 p.bump();856 break;857 } else if p.at(T![...]) {858 let m_err = p.start_ranger();859 destruct_rest(p);860 if had_rest {861 p.custom_error(m_err.finish(p), "only one rest can be present in object");862 }863 had_rest = true;864 } else {865 if had_rest {866 p.error_with_recovery_set(TS![]);867 }868 destruct_object_field(p);869 }870 if p.at(T![,]) {871 p.bump();872 continue;873 }874 p.expect(T!['}']);875 break;876 }877 m.complete(p, DESTRUCT_OBJECT)878 } else if p.at(IDENT) {879 name(p);880 m.complete(p, DESTRUCT_FULL)881 } else {882 m.complete(p, ERROR)883 }884}885fn bind(p: &mut Parser) {886 let m = p.start();887 if p.at(IDENT) && p.nth_at(1, T!['(']) {888 name(p);889 params_desc(p);890 p.expect(T![=]);891 expr(p);892 m.complete(p, BIND_FUNCTION)893 } else {894 destruct(p);895 p.expect(T![=]);896 expr(p);897 m.complete(p, BIND_DESTRUCT)898 };899}900fn text(p: &mut Parser) {901 assert!(Text::can_cast(p.current()));902 p.bump();903}904fn number(p: &mut Parser) {905 assert!(Number::can_cast(p.current()));906 p.bump();907}908fn literal(p: &mut Parser) {909 assert!(Literal::can_cast(p.current()));910 p.bump();911}912fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {913 let _e = p.expected_syntax_name("value");914 Some(if Literal::can_cast(p.current()) {915 let m = p.start();916 literal(p);917 m.complete(p, EXPR_LITERAL)918 } else if Text::can_cast(p.current()) {919 let m = p.start();920 text(p);921 m.complete(p, EXPR_STRING)922 } else if Number::can_cast(p.current()) {923 let m = p.start();924 number(p);925 m.complete(p, EXPR_NUMBER)926 } else if p.at(IDENT) {927 let m = p.start();928 name(p);929 m.complete(p, EXPR_VAR)930 } else if p.at(INTRINSIC_THIS_FILE) {931 let m = p.start();932 p.bump();933 m.complete(p, EXPR_INTRINSIC_THIS_FILE)934 } else if p.at(INTRINSIC_ID) {935 let m = p.start();936 p.bump();937 m.complete(p, EXPR_INTRINSIC_ID)938 } else if p.at(INTRINSIC) {939 let m = p.start();940 p.bump();941 p.expect(T!['(']);942 name(p);943 p.expect(T![')']);944 m.complete(p, EXPR_INTRINSIC)945 } else if p.at(T![if]) {946 let m = p.start();947 p.bump();948 expr(p);949 p.expect(T![then]);950 expr(p).map(|c| c.wrap(p, TRUE_EXPR));951 if p.at(T![else]) {952 p.bump();953 expr(p).map(|c| c.wrap(p, FALSE_EXPR));954 }955 m.complete(p, EXPR_IF_THEN_ELSE)956 } else if p.at(T!['[']) {957 array(p)958 } else if p.at(T!['{']) {959 object(p)960 } else if p.at(T![local]) {961 let m = p.start();962 p.bump();963 loop {964 if p.at(T![;]) {965 p.bump();966 break;967 }968 bind(p);969970 if p.at(T![,]) {971 p.bump();972 continue;973 }974 p.expect(T![;]);975 break;976 }977 expr(p);978 m.complete(p, EXPR_LOCAL)979 } else if p.at(T![function]) {980 let m = p.start();981 p.bump();982 params_desc(p);983 expr(p);984 m.complete(p, EXPR_FUNCTION)985 } else if p.at(T![error]) {986 let m = p.start();987 p.bump();988 expr(p);989 m.complete(p, EXPR_ERROR)990 } else if p.at(T![assert]) {991 let m = p.start();992 assertion(p);993 p.expect(T![;]);994 expr(p);995 m.complete(p, EXPR_ASSERT)996 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {997 let m = p.start();998 p.bump();999 text(p);1000 m.complete(p, EXPR_IMPORT)1001 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {1002 let op = match p.current() {1003 T![-] => UnaryOperator::Minus,1004 T![!] => UnaryOperator::Not,1005 T![~] => UnaryOperator::BitNegate,1006 _ => unreachable!(),1007 };1008 let ((), right_binding_power) = op.binding_power();10091010 let m = p.start();1011 p.bump();1012 expr_binding_power(p, right_binding_power);1013 m.complete(p, EXPR_UNARY)1014 } else if p.at(T!['(']) {1015 let m = p.start();1016 p.bump();1017 expr(p);1018 p.expect(T![')']);1019 m.complete(p, EXPR_PARENED)1020 } else {1021 p.error_with_recovery_set(TS![]);1022 return None;1023 })1024}10251026impl Parse {1027 pub fn syntax(&self) -> SyntaxNode {1028 SyntaxNode::new_root(self.green_node.clone())1029 }1030}xtask/src/sourcegen/kinds.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
pub enum TokenKind {
/// May exist in token tree, but never in source code
Meta { grammar_name: String, name: String },
- /// Specific parsing errors may be emitted as this type of kind
+ /// Specific parsing/lexing errors may be emitted as this type of kind
Error {
grammar_name: String,
name: String,
+ /// Is this error returned by lexer directly, or from lex.rs
+ is_lexer_error: bool,
regex: Option<String>,
priority: Option<u32>,
},
@@ -133,13 +135,18 @@
});
$(define_kinds!($into = $($rest)*))?
}};
- ($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
- $into.define_token(TokenKind::Error {
- grammar_name: format!("ERROR_{}!", $name),
- name: format!("ERROR_{}", $name),
- regex: None$(.or(Some($regex.to_owned())))?,
- priority: None$(.or(Some($priority)))?,
- });
+ ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+ {
+ let regex = None$(.or(Some($regex.to_owned())))?;
+ let priority = None$(.or(Some($priority)))?;
+ $into.define_token(TokenKind::Error {
+ grammar_name: format!("ERROR_{}!", $name),
+ name: format!("ERROR_{}", $name),
+ is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+ regex,
+ priority,
+ });
+ }
$(define_kinds!($into = $($rest)*))?
}};
($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
- error("STRING_BLOCK_UNEXPECTED_END");
- error("STRING_BLOCK_MISSING_NEW_LINE");
- error("STRING_BLOCK_MISSING_TERMINATION");
- error("STRING_BLOCK_MISSING_INDENT");
+ error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+ error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+ error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+ error("STRING_BLOCK_MISSING_INDENT", lexer = true);
lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
lit("WHITESPACE") => r"[ \t\n\r]+";
lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";
xtask/src/sourcegen/mod.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -48,20 +48,28 @@
if let Some((special, name)) = classify_special(token) {
match special {
SpecialName::Literal => panic!("literal is not defined: {name}"),
- SpecialName::Meta => kinds.define_token(TokenKind::Meta {
- grammar_name: token.to_owned(),
- name: format!("META_{}", name),
- }),
- SpecialName::Error => kinds.define_token(TokenKind::Error {
- grammar_name: token.to_owned(),
- name: format!("ERROR_{}", name),
- regex: None,
- priority: None,
- }),
+ SpecialName::Meta => {
+ eprintln!("implicit meta: {}", name);
+ kinds.define_token(TokenKind::Meta {
+ grammar_name: token.to_owned(),
+ name: format!("META_{}", name),
+ })
+ }
+ SpecialName::Error => {
+ eprintln!("implicit error: {}", name);
+ kinds.define_token(TokenKind::Error {
+ grammar_name: token.to_owned(),
+ name: format!("ERROR_{}", name),
+ regex: None,
+ priority: None,
+ is_lexer_error: true,
+ })
+ }
};
continue;
};
let name = to_upper_snake_case(token);
+ eprintln!("implicit kw: {}", token);
kinds.define_token(TokenKind::Keyword {
code: token.to_owned(),
name: format!("{name}_KW"),
xtask/src/sourcegen/util.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
}
}
- eprintln!(" {} was not up-to-date, updating\n", file.display());
- if std::env::var("CI").is_ok() {
- eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
- }
+ eprintln!("{} was not up-to-date, updating", file.display());
if let Some(parent) = file.parent() {
let _ = fs::create_dir_all(parent);
}