difftreelog
refactor use grammar to classify tokens
in: master
13 files changed
cmds/jrsonnet-fmt/src/main.rsdiffbeforeafterboth--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
nodes::{
ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
- Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+ Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
UnaryOperator,
},
AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
}
}
-impl Printable for String {
+impl Printable for Text {
fn print(&self) -> PrintItems {
p!(new: str(&format!("{}", self)))
}
@@ -168,7 +168,7 @@
FieldName::FieldNameFixed(f) => {
if let Some(id) = f.id() {
p!(new: {id})
- } else if let Some(str) = f.string() {
+ } else if let Some(str) = f.text() {
p!(new: {str})
} else {
p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
- Expr::ExprString(s) => p!(new: {s.string()}),
+ Expr::ExprString(s) => p!(new: {s.text()}),
Expr::ExprNumber(n) => p!(new: {n.number()}),
Expr::ExprArray(a) => {
let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
pi
}
Expr::ExprImport(v) => {
- p!(new: {v.import_kind()} str(" ") {v.string()})
+ p!(new: {v.import_kind()} str(" ") {v.text()})
}
Expr::ExprVar(n) => p!(new: {n.name()}),
Expr::ExprLocal(l) => {
crates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
name:Name
')'
ExprString =
- String
+ Text
ExprNumber =
Number
ExprArray =
@@ -67,7 +67,7 @@
']'
ExprImport =
- ImportKind String
+ ImportKind Text
ImportKind =
'importstr'
@@ -217,7 +217,7 @@
FieldNameFixed =
id:Name
-| String
+| Text
FieldNameDynamic =
'['
Expr
@@ -239,16 +239,27 @@
| '$'
| 'super'
-String =
+Text =
'LIT_STRING_DOUBLE!'
+| 'ERROR_STRING_DOUBLE_UNTERMINATED!'
| 'LIT_STRING_SINGLE!'
+| 'ERROR_STRING_SINGLE_UNTERMINATED!'
| 'LIT_STRING_DOUBLE_VERBATIM!'
+| 'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
| 'LIT_STRING_SINGLE_VERBATIM!'
+| 'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+| 'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
| 'LIT_STRING_BLOCK!'
+| 'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+| 'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+| 'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+| 'ERROR_STRING_BLOCK_MISSING_INDENT!'
Number =
'LIT_FLOAT!'
-| 'META_FORCE_ENUM!'
+| 'ERROR_FLOAT_JUNK_AFTER_POINT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
ForSpec =
'for'
@@ -347,3 +358,12 @@
TrueExpr=Expr
FalseExpr=Expr
LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+ 'LIT_WHITESPACE!'
+| 'LIT_MULTI_LINE_COMMENT!'
+| 'ERROR_COMMENT_TOO_SHORT!'
+| 'ERROR_COMMENT_UNTERMINATED!'
+| 'LIT_SINGLE_LINE_HASH_COMMENT!'
+| 'LIT_SINGLE_LINE_SLASH_COMMENT!'
crates/jrsonnet-rowan-parser/src/classify.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
- pub fn is_trivia(self) -> bool {
- matches!(
- self,
- Self::WHITESPACE
- | Self::MULTI_LINE_COMMENT
- | Self::ERROR_COMMENT_TOO_SHORT
- | Self::ERROR_COMMENT_UNTERMINATED
- | Self::SINGLE_LINE_HASH_COMMENT
- | Self::SINGLE_LINE_SLASH_COMMENT
- )
- }
- pub fn is_string(self) -> bool {
- matches!(
- self,
- Self::STRING_SINGLE
- | Self::ERROR_STRING_SINGLE_UNTERMINATED
- | Self::STRING_DOUBLE
- | Self::ERROR_STRING_DOUBLE_UNTERMINATED
- | Self::STRING_SINGLE_VERBATIM
- | Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
- | Self::STRING_DOUBLE_VERBATIM
- | Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
- | Self::STRING_BLOCK
- | Self::ERROR_STRING_BLOCK_UNEXPECTED_END
- | Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
- | Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
- | Self::ERROR_STRING_BLOCK_MISSING_INDENT
- )
- }
- pub fn is_number(self) -> bool {
- matches!(
- self,
- Self::FLOAT
- | Self::ERROR_FLOAT_JUNK_AFTER_POINT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
- )
- }
- pub fn is_literal(self) -> bool {
- matches!(
- self,
- Self::NULL_KW
- | Self::TRUE_KW | Self::FALSE_KW
- | Self::SELF_KW | Self::DOLLAR
- | Self::SUPER_KW
- )
- }
-}
crates/jrsonnet-rowan-parser/src/event.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
use crate::{
lex::Lexeme,
+ nodes::Trivia,
parser::{Parse, SyntaxError},
- JsonnetLanguage, SyntaxKind,
+ AstToken, JsonnetLanguage, SyntaxKind,
};
#[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
}
fn skip_whitespace(&mut self) {
while let Some(lexeme) = self.lexemes.get(self.offset) {
- if !lexeme.kind.is_trivia() {
+ if !Trivia::can_cast(lexeme.kind) {
break;
}
crates/jrsonnet-rowan-parser/src/generated/nodes.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
pub(crate) syntax: SyntaxNode,
}
impl ExprString {
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -332,7 +332,7 @@
pub fn import_kind(&self) -> Option<ImportKind> {
support::token_child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -692,7 +692,7 @@
pub fn id(&self) -> Option<Name> {
support::child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -1038,18 +1038,27 @@
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
syntax: SyntaxToken,
- kind: StringKind,
+ kind: TextKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
StringDouble,
+ ErrorStringDoubleUnterminated,
StringSingle,
+ ErrorStringSingleUnterminated,
StringDoubleVerbatim,
+ ErrorStringDoubleVerbatimUnterminated,
StringSingleVerbatim,
+ ErrorStringSingleVerbatimUnterminated,
+ ErrorStringVerbatimMissingQuotes,
StringBlock,
+ ErrorStringBlockUnexpectedEnd,
+ ErrorStringBlockMissingNewLine,
+ ErrorStringBlockMissingTermination,
+ ErrorStringBlockMissingIndent,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberKind {
Float,
- MetaForceEnum,
+ ErrorFloatJunkAfterPoint,
+ ErrorFloatJunkAfterExponent,
+ ErrorFloatJunkAfterExponentSign,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
Coloncolon,
Colon,
}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+ syntax: SyntaxToken,
+ kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+ Whitespace,
+ MultiLineComment,
+ ErrorCommentTooShort,
+ ErrorCommentUnterminated,
+ SingleLineHashComment,
+ SingleLineSlashComment,
+}
impl AstNode for SourceFile {
fn can_cast(kind: SyntaxKind) -> bool {
kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
-impl AstToken for String {
+impl AstToken for Text {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
STRING_DOUBLE
+ | ERROR_STRING_DOUBLE_UNTERMINATED
| STRING_SINGLE
+ | ERROR_STRING_SINGLE_UNTERMINATED
| STRING_DOUBLE_VERBATIM
+ | ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
| STRING_SINGLE_VERBATIM
- | STRING_BLOCK => true,
+ | ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+ | ERROR_STRING_VERBATIM_MISSING_QUOTES
+ | STRING_BLOCK
+ | ERROR_STRING_BLOCK_UNEXPECTED_END
+ | ERROR_STRING_BLOCK_MISSING_NEW_LINE
+ | ERROR_STRING_BLOCK_MISSING_TERMINATION
+ | ERROR_STRING_BLOCK_MISSING_INDENT => true,
_ => false,
}
}
fn cast(syntax: SyntaxToken) -> Option<Self> {
let res = match syntax.kind() {
- STRING_DOUBLE => String {
+ STRING_DOUBLE => Text {
syntax,
- kind: StringKind::StringDouble,
+ kind: TextKind::StringDouble,
},
- STRING_SINGLE => String {
+ ERROR_STRING_DOUBLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingle,
+ kind: TextKind::ErrorStringDoubleUnterminated,
},
- STRING_DOUBLE_VERBATIM => String {
+ STRING_SINGLE => Text {
syntax,
- kind: StringKind::StringDoubleVerbatim,
+ kind: TextKind::StringSingle,
},
- STRING_SINGLE_VERBATIM => String {
+ ERROR_STRING_SINGLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingleVerbatim,
+ kind: TextKind::ErrorStringSingleUnterminated,
},
- STRING_BLOCK => String {
+ STRING_DOUBLE_VERBATIM => Text {
syntax,
- kind: StringKind::StringBlock,
+ kind: TextKind::StringDoubleVerbatim,
+ },
+ ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+ },
+ STRING_SINGLE_VERBATIM => Text {
+ syntax,
+ kind: TextKind::StringSingleVerbatim,
},
+ ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+ },
+ ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+ syntax,
+ kind: TextKind::ErrorStringVerbatimMissingQuotes,
+ },
+ STRING_BLOCK => Text {
+ syntax,
+ kind: TextKind::StringBlock,
+ },
+ ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockUnexpectedEnd,
+ },
+ ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingNewLine,
+ },
+ ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingTermination,
+ },
+ ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingIndent,
+ },
_ => return None,
};
Some(res)
@@ -2718,12 +2790,12 @@
&self.syntax
}
}
-impl String {
- pub fn kind(&self) -> StringKind {
+impl Text {
+ pub fn kind(&self) -> TextKind {
self.kind
}
}
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
@@ -2731,7 +2803,10 @@
impl AstToken for Number {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
- FLOAT | META_FORCE_ENUM => true,
+ FLOAT
+ | ERROR_FLOAT_JUNK_AFTER_POINT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
_ => false,
}
}
@@ -2741,10 +2816,18 @@
syntax,
kind: NumberKind::Float,
},
- META_FORCE_ENUM => Number {
+ ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterPoint,
+ },
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
syntax,
- kind: NumberKind::MetaForceEnum,
+ kind: NumberKind::ErrorFloatJunkAfterExponent,
},
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+ },
_ => return None,
};
Some(res)
@@ -2841,6 +2924,62 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
+impl AstToken for Trivia {
+ fn can_cast(kind: SyntaxKind) -> bool {
+ match kind {
+ WHITESPACE
+ | MULTI_LINE_COMMENT
+ | ERROR_COMMENT_TOO_SHORT
+ | ERROR_COMMENT_UNTERMINATED
+ | SINGLE_LINE_HASH_COMMENT
+ | SINGLE_LINE_SLASH_COMMENT => true,
+ _ => false,
+ }
+ }
+ fn cast(syntax: SyntaxToken) -> Option<Self> {
+ let res = match syntax.kind() {
+ WHITESPACE => Trivia {
+ syntax,
+ kind: TriviaKind::Whitespace,
+ },
+ MULTI_LINE_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::MultiLineComment,
+ },
+ ERROR_COMMENT_TOO_SHORT => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentTooShort,
+ },
+ ERROR_COMMENT_UNTERMINATED => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentUnterminated,
+ },
+ SINGLE_LINE_HASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineHashComment,
+ },
+ SINGLE_LINE_SLASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineSlashComment,
+ },
+ _ => return None,
+ };
+ Some(res)
+ }
+ fn syntax(&self) -> &SyntaxToken {
+ &self.syntax
+ }
+}
+impl Trivia {
+ pub fn kind(&self) -> TriviaKind {
+ self.kind
+ }
+}
+impl std::fmt::Display for Trivia {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ std::fmt::Display::fmt(self.syntax(), f)
+ }
+}
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
SELF_KW,
#[token("super")]
SUPER_KW,
- META_FORCE_ENUM,
#[token("for")]
FOR_KW,
#[token("assert")]
@@ -253,10 +252,11 @@
BINARY_OPERATOR,
UNARY_OPERATOR,
LITERAL,
- STRING,
+ TEXT,
NUMBER,
IMPORT_KIND,
VISIBILITY,
+ TRIVIA,
#[doc(hidden)]
__LAST,
}
@@ -277,8 +277,8 @@
pub fn is_enum(self) -> bool {
match self {
EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
- | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
- | NUMBER | IMPORT_KIND | VISIBILITY => true,
+ | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+ | IMPORT_KIND | VISIBILITY | TRIVIA => true,
_ => false,
}
}
crates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
use logos::Logos;
use rowan::{TextRange, TextSize};
-use crate::SyntaxKind;
+use crate::{
+ string_block::{lex_str_block, StringBlockError},
+ SyntaxKind,
+};
pub struct Lexer<'a> {
inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
type Item = Lexeme<'a>;
fn next(&mut self) -> Option<Self::Item> {
- let kind = self.inner.next()?;
+ use SyntaxKind::*;
+
+ let mut kind = self.inner.next()?;
let text = self.inner.slice();
+ if kind == STRING_BLOCK {
+ // We use custom lexer, which skips enough bytes, but not returns error
+ // Instead we should call lexer again to verify if there is something wrong with string block
+ let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+ // In kinds, string blocks is parsed at least as `|||`
+ lexer.bump(3);
+ let res = lex_str_block(&mut lexer);
+ debug_assert!(lexer.next().is_none(), "str_block is lexed");
+ match res {
+ Ok(_) => {}
+ Err(e) => {
+ kind = match e {
+ StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+ StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+ StringBlockError::MissingTermination => {
+ ERROR_STRING_BLOCK_MISSING_TERMINATION
+ }
+ StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+ }
+ }
+ }
+ }
+
Some(Self::Item {
kind,
text,
crates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
mod ast;
mod binary;
-mod classify;
mod event;
mod generated;
mod language;
crates/jrsonnet-rowan-parser/src/marker.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
!kind.is_enum(),
"{kind:?} is a enum kind, you should use variant kinds instead"
);
- // TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+ // TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
// debug_assert!(
- // !kind.is_parser(),
- // "{kind:?} should be only emitted by parser, not used directly"
+ // !kind.is_lexer(),
+ // "{kind:?} should be only emitted by lexer, not used directly"
// );
let event_at_pos = &mut p.events[self.start_event_idx];
assert_eq!(*event_at_pos, Event::Pending);
crates/jrsonnet-rowan-parser/src/parser.rsdiffbeforeafterboth8 event::Event,8 event::Event,9 lex::Lexeme,9 lex::Lexeme,10 marker::{AsRange, CompletedMarker, Marker, Ranger},10 marker::{AsRange, CompletedMarker, Marker, Ranger},11 string_block::{lex_str_block, StringBlockError},11 nodes::{Literal, Number, Text, Trivia},12 token_set::SyntaxKindSet,12 token_set::SyntaxKindSet,13 unary::UnaryOperator,13 unary::UnaryOperator,14 SyntaxKind,14 AstToken, SyntaxKind,15 SyntaxKind::*,15 SyntaxKind::*,16 SyntaxNode, T, TS,16 SyntaxNode, T, TS,17};17};36}36}373738pub struct Parser<'i> {38pub struct Parser<'i> {39 // TODO: remove all trivia before feeding to parser?39 lexemes: &'i [Lexeme<'i>],40 lexemes: &'i [Lexeme<'i>],40 pub offset: usize,41 pub offset: usize,41 pub events: Vec<Event>,42 pub events: Vec<Event>,191 while self192 while self192 .lexemes193 .lexemes193 .get(previous_token_idx)194 .get(previous_token_idx)194 .map_or(false, |l| l.kind.is_trivia())195 .map_or(false, |l| Trivia::can_cast(l.kind))195 && previous_token_idx != 0196 && previous_token_idx != 0196 {197 {197 previous_token_idx -= 1;198 previous_token_idx -= 1;200 Some(self.lexemes[previous_token_idx])201 Some(self.lexemes[previous_token_idx])201 }202 }202 pub fn start_of_token(&self, mut idx: usize) -> TextSize {203 pub fn start_of_token(&self, mut idx: usize) -> TextSize {203 while self.lexemes[idx].kind.is_trivia() {204 while Trivia::can_cast(self.lexemes[idx].kind) {204 idx += 1;205 idx += 1;205 }206 }206 self.lexemes[idx].range.start()207 self.lexemes[idx].range.start()207 }208 }208 pub fn end_of_token(&self, mut idx: usize) -> TextSize {209 pub fn end_of_token(&self, mut idx: usize) -> TextSize {209 while self.lexemes[idx].kind.is_trivia() {210 while Trivia::can_cast(self.lexemes[idx].kind) {210 idx -= 1;211 idx -= 1;211 }212 }212 self.lexemes[idx].range.end()213 self.lexemes[idx].range.end()267 self.bump();268 self.bump();268 Some(m.complete(self, SyntaxKind::ERROR))269 Some(m.complete(self, SyntaxKind::ERROR))269 }270 }270271 fn bump_assert(&mut self, kind: SyntaxKind) {272 self.skip_trivia();273 assert!(self.at(kind), "expected {:?}", kind);274 self.bump_remap(self.current());275 }271 fn bump(&mut self) {276 fn bump(&mut self) {272 self.skip_trivia();277 self.skip_trivia();273 self.bump_remap(self.current());278 self.bump_remap(self.current());314 while self319 while self315 .lexemes320 .lexemes316 .get(offset)321 .get(offset)317 .map(|l| l.kind.is_trivia())322 .map(|l| Trivia::can_cast(l.kind))318 .unwrap_or(false)323 .unwrap_or(false)319 {324 {320 offset += 1;325 offset += 1;324 while self329 while self325 .lexemes330 .lexemes326 .get(offset)331 .get(offset)327 .map(|l| l.kind.is_trivia())332 .map(|l| Trivia::can_cast(l.kind))328 .unwrap_or(false)333 .unwrap_or(false)329 {334 {330 offset += 1;335 offset += 1;335 self.nth(0)340 self.nth(0)336 }341 }337 fn skip_trivia(&mut self) {342 fn skip_trivia(&mut self) {338 while self.peek_raw().is_trivia() {343 while Trivia::can_cast(self.peek_raw()) {339 self.offset += 1;344 self.offset += 1;340 }345 }341 }346 }342 fn current_lexeme(&mut self) -> Option<&Lexeme> {343 self.skip_trivia();344 self.lexemes.get(self.offset)345 }346 fn peek_raw(&mut self) -> SyntaxKind {347 fn peek_raw(&mut self) -> SyntaxKind {347 self.lexemes348 self.lexemes348 .get(self.offset)349 .get(self.offset)516 } else if p.at(IDENT) {517 } else if p.at(IDENT) {517 name(p);518 name(p);518 m.complete(p, FIELD_NAME_FIXED);519 m.complete(p, FIELD_NAME_FIXED);519 } else if p.current().is_string() {520 } else if Text::can_cast(p.current()) {520 string(p);521 text(p);521 m.complete(p, FIELD_NAME_FIXED);522 m.complete(p, FIELD_NAME_FIXED);522 } else {523 } else {523 p.error_with_recovery_set(TS![;]);524 p.error_with_recovery_set(TS![;]);564 };565 };565}566}566fn assertion(p: &mut Parser) {567fn assertion(p: &mut Parser) {567 assert!(p.at(T![assert]));568 let m = p.start();568 let m = p.start();569 p.bump();569 p.bump_assert(T![assert]);570 expr(p).map(|c| c.wrap(p, LHS_EXPR));570 expr(p).map(|c| c.wrap(p, LHS_EXPR));571 if p.at(T![:]) {571 if p.at(T![:]) {572 p.bump();572 p.bump();575 m.complete(p, ASSERTION);575 m.complete(p, ASSERTION);576}576}577fn object(p: &mut Parser) -> CompletedMarker {577fn object(p: &mut Parser) -> CompletedMarker {578 assert!(p.at(T!['{']));579 let m_t = p.start();578 let m_t = p.start();580 let m = p.start();579 let m = p.start();581 p.bump();580 p.bump_assert(T!['{']);582581583 loop {582 loop {584 if p.at(T!['}']) {583 if p.at(T!['}']) {619 m.complete(p, PARAM);618 m.complete(p, PARAM);620}619}621fn params_desc(p: &mut Parser) -> CompletedMarker {620fn params_desc(p: &mut Parser) -> CompletedMarker {622 assert!(p.at(T!['(']));623 let m = p.start();621 let m = p.start();624 p.bump();622 p.bump_assert(T!['(']);625623626 loop {624 loop {627 if p.at(T![')']) {625 if p.at(T![')']) {640}638}641fn args_desc(p: &mut Parser) {639fn args_desc(p: &mut Parser) {642 let m = p.start();640 let m = p.start();643 assert!(p.at(T!['(']));641 p.bump_assert(T!['(']);644 p.bump();645642646 let started_named = Cell::new(false);643 let started_named = Cell::new(false);647644674}671}675672676fn array(p: &mut Parser) -> CompletedMarker {673fn array(p: &mut Parser) -> CompletedMarker {677 assert!(p.at(T!['[']));678 // Start the list node674 // Start the list node679 let m = p.start();675 let m = p.start();680 p.bump(); // '['676 p.bump_assert(T!['[']);681677682 // This vec will have at most one element in case of correct input678 // This vec will have at most one element in case of correct input683 let mut compspecs = Vec::with_capacity(1);679 let mut compspecs = Vec::with_capacity(1);795 m.complete(p, NAME);791 m.complete(p, NAME);796}792}797fn destruct_rest(p: &mut Parser) {793fn destruct_rest(p: &mut Parser) {798 assert!(p.at(T![...]));799 p.bump();800 let m = p.start();794 let m = p.start();795 p.bump_assert(T![...]);801 if p.at(IDENT) {796 if p.at(IDENT) {802 p.bump()797 p.bump()803 }798 }817 m.complete(p, DESTRUCT_OBJECT_FIELD);812 m.complete(p, DESTRUCT_OBJECT_FIELD);818}813}819fn obj_local(p: &mut Parser) {814fn obj_local(p: &mut Parser) {820 assert!(p.at(T![local]));821 let m = p.start();815 let m = p.start();822 p.bump();816 p.bump_assert(T![local]);823 bind(p);817 bind(p);824 m.complete(p, OBJ_LOCAL);818 m.complete(p, OBJ_LOCAL);825}819}903 m.complete(p, BIND_DESTRUCT)897 m.complete(p, BIND_DESTRUCT)904 };898 };905}899}906fn string(p: &mut Parser) {900fn text(p: &mut Parser) {907 assert!(p.current().is_string());901 assert!(Text::can_cast(p.current()));908 if p.at(STRING_BLOCK) {909 // We use custom lexer, which skips enough bytes, but not returns error910 // Instead we should call lexer again to verify if there is something wrong with string block911 let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(912 &p.current_lexeme().expect("parser is at string block").text913 ));914 // In kinds, string blocks is parsed at least as `|||`902 p.bump();915 lexer.bump(3);916 let res = lex_str_block(&mut lexer);917 debug_assert!(lexer.next().is_none(), "str_block is lexed");918 match res {919 Ok(_) => {920 p.bump();921 }922 Err(e) => p.bump_remap(match e {923 StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,924 StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,925 StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,926 StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,927 }),928 }929 } else {930 p.bump();931 }932}903}933fn number(p: &mut Parser) {904fn number(p: &mut Parser) {934 assert!(p.current().is_number());905 assert!(Number::can_cast(p.current()));935 p.bump();906 p.bump();936}907}937fn literal(p: &mut Parser) {908fn literal(p: &mut Parser) {938 assert!(p.current().is_literal());909 assert!(Literal::can_cast(p.current()));939 p.bump();910 p.bump();940}911}941fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {912fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {942 let _e = p.expected_syntax_name("value");913 let _e = p.expected_syntax_name("value");943 Some(if p.current().is_literal() {914 Some(if Literal::can_cast(p.current()) {944 let m = p.start();915 let m = p.start();945 literal(p);916 literal(p);946 m.complete(p, EXPR_LITERAL)917 m.complete(p, EXPR_LITERAL)947 } else if p.current().is_string() {918 } else if Text::can_cast(p.current()) {948 let m = p.start();919 let m = p.start();949 string(p);920 text(p);950 m.complete(p, EXPR_STRING)921 m.complete(p, EXPR_STRING)951 } else if p.current().is_number() {922 } else if Number::can_cast(p.current()) {952 let m = p.start();923 let m = p.start();953 number(p);924 number(p);954 m.complete(p, EXPR_NUMBER)925 m.complete(p, EXPR_NUMBER)1025 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {996 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {1026 let m = p.start();997 let m = p.start();1027 p.bump();998 p.bump();1028 string(p);999 text(p);1029 m.complete(p, EXPR_IMPORT)1000 m.complete(p, EXPR_IMPORT)1030 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {1001 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {1031 let op = match p.current() {1002 let op = match p.current() {1044 let m = p.start();1015 let m = p.start();1045 p.bump();1016 p.bump();1046 expr(p);1017 expr(p);1047 assert!(p.at(T![')']));1018 p.expect(T![')']);1048 p.bump();1049 m.complete(p, EXPR_PARENED)1019 m.complete(p, EXPR_PARENED)1050 } else {1020 } else {1051 p.error_with_recovery_set(TS![]);1021 p.error_with_recovery_set(TS![]);xtask/src/sourcegen/kinds.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
pub enum TokenKind {
/// May exist in token tree, but never in source code
Meta { grammar_name: String, name: String },
- /// Specific parsing errors may be emitted as this type of kind
+ /// Specific parsing/lexing errors may be emitted as this type of kind
Error {
grammar_name: String,
name: String,
+ /// Is this error returned by lexer directly, or from lex.rs
+ is_lexer_error: bool,
regex: Option<String>,
priority: Option<u32>,
},
@@ -133,13 +135,18 @@
});
$(define_kinds!($into = $($rest)*))?
}};
- ($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
- $into.define_token(TokenKind::Error {
- grammar_name: format!("ERROR_{}!", $name),
- name: format!("ERROR_{}", $name),
- regex: None$(.or(Some($regex.to_owned())))?,
- priority: None$(.or(Some($priority)))?,
- });
+ ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+ {
+ let regex = None$(.or(Some($regex.to_owned())))?;
+ let priority = None$(.or(Some($priority)))?;
+ $into.define_token(TokenKind::Error {
+ grammar_name: format!("ERROR_{}!", $name),
+ name: format!("ERROR_{}", $name),
+ is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+ regex,
+ priority,
+ });
+ }
$(define_kinds!($into = $($rest)*))?
}};
($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
- error("STRING_BLOCK_UNEXPECTED_END");
- error("STRING_BLOCK_MISSING_NEW_LINE");
- error("STRING_BLOCK_MISSING_TERMINATION");
- error("STRING_BLOCK_MISSING_INDENT");
+ error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+ error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+ error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+ error("STRING_BLOCK_MISSING_INDENT", lexer = true);
lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
lit("WHITESPACE") => r"[ \t\n\r]+";
lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";
xtask/src/sourcegen/mod.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -48,20 +48,28 @@
if let Some((special, name)) = classify_special(token) {
match special {
SpecialName::Literal => panic!("literal is not defined: {name}"),
- SpecialName::Meta => kinds.define_token(TokenKind::Meta {
- grammar_name: token.to_owned(),
- name: format!("META_{}", name),
- }),
- SpecialName::Error => kinds.define_token(TokenKind::Error {
- grammar_name: token.to_owned(),
- name: format!("ERROR_{}", name),
- regex: None,
- priority: None,
- }),
+ SpecialName::Meta => {
+ eprintln!("implicit meta: {}", name);
+ kinds.define_token(TokenKind::Meta {
+ grammar_name: token.to_owned(),
+ name: format!("META_{}", name),
+ })
+ }
+ SpecialName::Error => {
+ eprintln!("implicit error: {}", name);
+ kinds.define_token(TokenKind::Error {
+ grammar_name: token.to_owned(),
+ name: format!("ERROR_{}", name),
+ regex: None,
+ priority: None,
+ is_lexer_error: true,
+ })
+ }
};
continue;
};
let name = to_upper_snake_case(token);
+ eprintln!("implicit kw: {}", token);
kinds.define_token(TokenKind::Keyword {
code: token.to_owned(),
name: format!("{name}_KW"),
xtask/src/sourcegen/util.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
}
}
- eprintln!(" {} was not up-to-date, updating\n", file.display());
- if std::env::var("CI").is_ok() {
- eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
- }
+ eprintln!("{} was not up-to-date, updating", file.display());
if let Some(parent) = file.parent() {
let _ = fs::create_dir_all(parent);
}