`5ad3c0601af8`

13 files changed

modifiedcmds/jrsonnet-fmt/src/main.rsdiff before after both

--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
 	nodes::{
 		ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
 		DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
-		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
 		UnaryOperator,
 	},
 	AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
 	}
 }
 
-impl Printable for String {
+impl Printable for Text {
 	fn print(&self) -> PrintItems {
 		p!(new: str(&format!("{}", self)))
 	}
@@ -168,7 +168,7 @@
 			FieldName::FieldNameFixed(f) => {
 				if let Some(id) = f.id() {
 					p!(new: {id})
-				} else if let Some(str) = f.string() {
+				} else if let Some(str) = f.text() {
 					p!(new: {str})
 				} else {
 					p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
 			Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
 			Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
 			Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
-			Expr::ExprString(s) => p!(new: {s.string()}),
+			Expr::ExprString(s) => p!(new: {s.text()}),
 			Expr::ExprNumber(n) => p!(new: {n.number()}),
 			Expr::ExprArray(a) => {
 				let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
 				pi
 			}
 			Expr::ExprImport(v) => {
-				p!(new: {v.import_kind()} str(" ") {v.string()})
+				p!(new: {v.import_kind()} str(" ") {v.text()})
 			}
 			Expr::ExprVar(n) => p!(new: {n.name()}),
 			Expr::ExprLocal(l) => {

modifiedcrates/jrsonnet-rowan-parser/jsonnet.ungramdiff before after both

--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
     name:Name
     ')'
 ExprString =
-    String
+    Text
 ExprNumber =
     Number
 ExprArray =
@@ -67,7 +67,7 @@
     ']'
 
 ExprImport =
-    ImportKind String
+    ImportKind Text
 
 ImportKind =
     'importstr'
@@ -217,7 +217,7 @@
 
 FieldNameFixed =
     id:Name
-|   String
+|   Text
 FieldNameDynamic =
     '['
     Expr
@@ -239,16 +239,27 @@
 |   '$'
 |   'super'
 
-String =
+Text =
     'LIT_STRING_DOUBLE!'
+|   'ERROR_STRING_DOUBLE_UNTERMINATED!'
 |   'LIT_STRING_SINGLE!'
+|   'ERROR_STRING_SINGLE_UNTERMINATED!'
 |   'LIT_STRING_DOUBLE_VERBATIM!'
+|   'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
 |   'LIT_STRING_SINGLE_VERBATIM!'
+|   'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+|   'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
 |   'LIT_STRING_BLOCK!'
+|   'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+|   'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+|   'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+|   'ERROR_STRING_BLOCK_MISSING_INDENT!'
 
 Number =
     'LIT_FLOAT!'
-|   'META_FORCE_ENUM!'
+|   'ERROR_FLOAT_JUNK_AFTER_POINT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
 
 ForSpec =
     'for'
@@ -347,3 +358,12 @@
 TrueExpr=Expr
 FalseExpr=Expr
 LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+    'LIT_WHITESPACE!'
+|   'LIT_MULTI_LINE_COMMENT!'
+|   'ERROR_COMMENT_TOO_SHORT!'
+|   'ERROR_COMMENT_UNTERMINATED!'
+|   'LIT_SINGLE_LINE_HASH_COMMENT!'
+|   'LIT_SINGLE_LINE_SLASH_COMMENT!'

deletedcrates/jrsonnet-rowan-parser/src/classify.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
-	pub fn is_trivia(self) -> bool {
-		matches!(
-			self,
-			Self::WHITESPACE
-				| Self::MULTI_LINE_COMMENT
-				| Self::ERROR_COMMENT_TOO_SHORT
-				| Self::ERROR_COMMENT_UNTERMINATED
-				| Self::SINGLE_LINE_HASH_COMMENT
-				| Self::SINGLE_LINE_SLASH_COMMENT
-		)
-	}
-	pub fn is_string(self) -> bool {
-		matches!(
-			self,
-			Self::STRING_SINGLE
-				| Self::ERROR_STRING_SINGLE_UNTERMINATED
-				| Self::STRING_DOUBLE
-				| Self::ERROR_STRING_DOUBLE_UNTERMINATED
-				| Self::STRING_SINGLE_VERBATIM
-				| Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
-				| Self::STRING_DOUBLE_VERBATIM
-				| Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
-				| Self::STRING_BLOCK
-				| Self::ERROR_STRING_BLOCK_UNEXPECTED_END
-				| Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
-				| Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
-				| Self::ERROR_STRING_BLOCK_MISSING_INDENT
-		)
-	}
-	pub fn is_number(self) -> bool {
-		matches!(
-			self,
-			Self::FLOAT
-				| Self::ERROR_FLOAT_JUNK_AFTER_POINT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
-		)
-	}
-	pub fn is_literal(self) -> bool {
-		matches!(
-			self,
-			Self::NULL_KW
-				| Self::TRUE_KW | Self::FALSE_KW
-				| Self::SELF_KW | Self::DOLLAR
-				| Self::SUPER_KW
-		)
-	}
-}

modifiedcrates/jrsonnet-rowan-parser/src/event.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
 
 use crate::{
 	lex::Lexeme,
+	nodes::Trivia,
 	parser::{Parse, SyntaxError},
-	JsonnetLanguage, SyntaxKind,
+	AstToken, JsonnetLanguage, SyntaxKind,
 };
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
 	}
 	fn skip_whitespace(&mut self) {
 		while let Some(lexeme) = self.lexemes.get(self.offset) {
-			if !lexeme.kind.is_trivia() {
+			if !Trivia::can_cast(lexeme.kind) {
 				break;
 			}

modifiedcrates/jrsonnet-rowan-parser/src/generated/nodes.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
 	pub(crate) syntax: SyntaxNode,
 }
 impl ExprString {
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -332,7 +332,7 @@
 	pub fn import_kind(&self) -> Option<ImportKind> {
 		support::token_child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -692,7 +692,7 @@
 	pub fn id(&self) -> Option<Name> {
 		support::child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -1038,18 +1038,27 @@
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
 	syntax: SyntaxToken,
-	kind: StringKind,
+	kind: TextKind,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
 	StringDouble,
+	ErrorStringDoubleUnterminated,
 	StringSingle,
+	ErrorStringSingleUnterminated,
 	StringDoubleVerbatim,
+	ErrorStringDoubleVerbatimUnterminated,
 	StringSingleVerbatim,
+	ErrorStringSingleVerbatimUnterminated,
+	ErrorStringVerbatimMissingQuotes,
 	StringBlock,
+	ErrorStringBlockUnexpectedEnd,
+	ErrorStringBlockMissingNewLine,
+	ErrorStringBlockMissingTermination,
+	ErrorStringBlockMissingIndent,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum NumberKind {
 	Float,
-	MetaForceEnum,
+	ErrorFloatJunkAfterPoint,
+	ErrorFloatJunkAfterExponent,
+	ErrorFloatJunkAfterExponentSign,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
 	Coloncolon,
 	Colon,
 }
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+	syntax: SyntaxToken,
+	kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+	Whitespace,
+	MultiLineComment,
+	ErrorCommentTooShort,
+	ErrorCommentUnterminated,
+	SingleLineHashComment,
+	SingleLineSlashComment,
+}
 impl AstNode for SourceFile {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
-impl AstToken for String {
+impl AstToken for Text {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
 			STRING_DOUBLE
+			| ERROR_STRING_DOUBLE_UNTERMINATED
 			| STRING_SINGLE
+			| ERROR_STRING_SINGLE_UNTERMINATED
 			| STRING_DOUBLE_VERBATIM
+			| ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
 			| STRING_SINGLE_VERBATIM
-			| STRING_BLOCK => true,
+			| ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+			| ERROR_STRING_VERBATIM_MISSING_QUOTES
+			| STRING_BLOCK
+			| ERROR_STRING_BLOCK_UNEXPECTED_END
+			| ERROR_STRING_BLOCK_MISSING_NEW_LINE
+			| ERROR_STRING_BLOCK_MISSING_TERMINATION
+			| ERROR_STRING_BLOCK_MISSING_INDENT => true,
 			_ => false,
 		}
 	}
 	fn cast(syntax: SyntaxToken) -> Option<Self> {
 		let res = match syntax.kind() {
-			STRING_DOUBLE => String {
+			STRING_DOUBLE => Text {
 				syntax,
-				kind: StringKind::StringDouble,
+				kind: TextKind::StringDouble,
 			},
-			STRING_SINGLE => String {
+			ERROR_STRING_DOUBLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingle,
+				kind: TextKind::ErrorStringDoubleUnterminated,
 			},
-			STRING_DOUBLE_VERBATIM => String {
+			STRING_SINGLE => Text {
 				syntax,
-				kind: StringKind::StringDoubleVerbatim,
+				kind: TextKind::StringSingle,
 			},
-			STRING_SINGLE_VERBATIM => String {
+			ERROR_STRING_SINGLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingleVerbatim,
+				kind: TextKind::ErrorStringSingleUnterminated,
 			},
-			STRING_BLOCK => String {
+			STRING_DOUBLE_VERBATIM => Text {
 				syntax,
-				kind: StringKind::StringBlock,
+				kind: TextKind::StringDoubleVerbatim,
+			},
+			ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+			},
+			STRING_SINGLE_VERBATIM => Text {
+				syntax,
+				kind: TextKind::StringSingleVerbatim,
 			},
+			ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+			},
+			ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+				syntax,
+				kind: TextKind::ErrorStringVerbatimMissingQuotes,
+			},
+			STRING_BLOCK => Text {
+				syntax,
+				kind: TextKind::StringBlock,
+			},
+			ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockUnexpectedEnd,
+			},
+			ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingNewLine,
+			},
+			ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingTermination,
+			},
+			ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingIndent,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2718,12 +2790,12 @@
 		&self.syntax
 	}
 }
-impl String {
-	pub fn kind(&self) -> StringKind {
+impl Text {
+	pub fn kind(&self) -> TextKind {
 		self.kind
 	}
 }
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
@@ -2731,7 +2803,10 @@
 impl AstToken for Number {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
-			FLOAT | META_FORCE_ENUM => true,
+			FLOAT
+			| ERROR_FLOAT_JUNK_AFTER_POINT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
 			_ => false,
 		}
 	}
@@ -2741,10 +2816,18 @@
 				syntax,
 				kind: NumberKind::Float,
 			},
-			META_FORCE_ENUM => Number {
+			ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterPoint,
+			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
 				syntax,
-				kind: NumberKind::MetaForceEnum,
+				kind: NumberKind::ErrorFloatJunkAfterExponent,
 			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2841,6 +2924,62 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
+impl AstToken for Trivia {
+	fn can_cast(kind: SyntaxKind) -> bool {
+		match kind {
+			WHITESPACE
+			| MULTI_LINE_COMMENT
+			| ERROR_COMMENT_TOO_SHORT
+			| ERROR_COMMENT_UNTERMINATED
+			| SINGLE_LINE_HASH_COMMENT
+			| SINGLE_LINE_SLASH_COMMENT => true,
+			_ => false,
+		}
+	}
+	fn cast(syntax: SyntaxToken) -> Option<Self> {
+		let res = match syntax.kind() {
+			WHITESPACE => Trivia {
+				syntax,
+				kind: TriviaKind::Whitespace,
+			},
+			MULTI_LINE_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::MultiLineComment,
+			},
+			ERROR_COMMENT_TOO_SHORT => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentTooShort,
+			},
+			ERROR_COMMENT_UNTERMINATED => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentUnterminated,
+			},
+			SINGLE_LINE_HASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineHashComment,
+			},
+			SINGLE_LINE_SLASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineSlashComment,
+			},
+			_ => return None,
+		};
+		Some(res)
+	}
+	fn syntax(&self) -> &SyntaxToken {
+		&self.syntax
+	}
+}
+impl Trivia {
+	pub fn kind(&self) -> TriviaKind {
+		self.kind
+	}
+}
+impl std::fmt::Display for Trivia {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		std::fmt::Display::fmt(self.syntax(), f)
+	}
+}
 impl std::fmt::Display for Expr {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)

modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
 	SELF_KW,
 	#[token("super")]
 	SUPER_KW,
-	META_FORCE_ENUM,
 	#[token("for")]
 	FOR_KW,
 	#[token("assert")]
@@ -253,10 +252,11 @@
 	BINARY_OPERATOR,
 	UNARY_OPERATOR,
 	LITERAL,
-	STRING,
+	TEXT,
 	NUMBER,
 	IMPORT_KIND,
 	VISIBILITY,
+	TRIVIA,
 	#[doc(hidden)]
 	__LAST,
 }
@@ -277,8 +277,8 @@
 	pub fn is_enum(self) -> bool {
 		match self {
 			EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
-			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
-			| NUMBER | IMPORT_KIND | VISIBILITY => true,
+			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+			| IMPORT_KIND | VISIBILITY | TRIVIA => true,
 			_ => false,
 		}
 	}

modifiedcrates/jrsonnet-rowan-parser/src/lex.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
 use logos::Logos;
 use rowan::{TextRange, TextSize};
 
-use crate::SyntaxKind;
+use crate::{
+	string_block::{lex_str_block, StringBlockError},
+	SyntaxKind,
+};
 
 pub struct Lexer<'a> {
 	inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
 	type Item = Lexeme<'a>;
 
 	fn next(&mut self) -> Option<Self::Item> {
-		let kind = self.inner.next()?;
+		use SyntaxKind::*;
+
+		let mut kind = self.inner.next()?;
 		let text = self.inner.slice();
 
+		if kind == STRING_BLOCK {
+			// We use custom lexer, which skips enough bytes, but not returns error
+			// Instead we should call lexer again to verify if there is something wrong with string block
+			let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+			// In kinds, string blocks is parsed at least as `|||`
+			lexer.bump(3);
+			let res = lex_str_block(&mut lexer);
+			debug_assert!(lexer.next().is_none(), "str_block is lexed");
+			match res {
+				Ok(_) => {}
+				Err(e) => {
+					kind = match e {
+						StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+						StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+						StringBlockError::MissingTermination => {
+							ERROR_STRING_BLOCK_MISSING_TERMINATION
+						}
+						StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+					}
+				}
+			}
+		}
+
 		Some(Self::Item {
 			kind,
 			text,

modifiedcrates/jrsonnet-rowan-parser/src/lib.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
 
 mod ast;
 mod binary;
-mod classify;
 mod event;
 mod generated;
 mod language;

modifiedcrates/jrsonnet-rowan-parser/src/marker.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
 			!kind.is_enum(),
 			"{kind:?} is a enum kind, you should use variant kinds instead"
 		);
-		// TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+		// TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
 		// debug_assert!(
-		// 	!kind.is_parser(),
-		// 	"{kind:?} should be only emitted by parser, not used directly"
+		// 	!kind.is_lexer(),
+		// 	"{kind:?} should be only emitted by lexer, not used directly"
 		// );
 		let event_at_pos = &mut p.events[self.start_event_idx];
 		assert_eq!(*event_at_pos, Event::Pending);

modifiedcrates/jrsonnet-rowan-parser/src/parser.rsdiff before after both

8	event::Event,8	event::Event,
9	lex::Lexeme,9	lex::Lexeme,
10	marker::{AsRange, CompletedMarker, Marker, Ranger},10	marker::{AsRange, CompletedMarker, Marker, Ranger},
11	string_block::{lex_str_block, StringBlockError},11	nodes::{Literal, Number, Text, Trivia},
12	token_set::SyntaxKindSet,12	token_set::SyntaxKindSet,
13	unary::UnaryOperator,13	unary::UnaryOperator,
14	SyntaxKind,14	AstToken, SyntaxKind,
15	SyntaxKind::*,15	SyntaxKind::*,
16	SyntaxNode, T, TS,16	SyntaxNode, T, TS,
17};17};
36}36}
3737
38pub struct Parser<'i> {38pub struct Parser<'i> {
39	// TODO: remove all trivia before feeding to parser?
39	lexemes: &'i [Lexeme<'i>],40	lexemes: &'i [Lexeme<'i>],
40	pub offset: usize,41	pub offset: usize,
41	pub events: Vec<Event>,42	pub events: Vec<Event>,
191		while self192		while self
192			.lexemes193			.lexemes
193			.get(previous_token_idx)194			.get(previous_token_idx)
194			.map_or(false, |l| l.kind.is_trivia())195			.map_or(false, |l| Trivia::can_cast(l.kind))
195			&& previous_token_idx != 0196			&& previous_token_idx != 0
196		{197		{
197			previous_token_idx -= 1;198			previous_token_idx -= 1;
200		Some(self.lexemes[previous_token_idx])201		Some(self.lexemes[previous_token_idx])
201	}202	}
202	pub fn start_of_token(&self, mut idx: usize) -> TextSize {203	pub fn start_of_token(&self, mut idx: usize) -> TextSize {
203		while self.lexemes[idx].kind.is_trivia() {204		while Trivia::can_cast(self.lexemes[idx].kind) {
204			idx += 1;205			idx += 1;
205		}206		}
206		self.lexemes[idx].range.start()207		self.lexemes[idx].range.start()
207	}208	}
208	pub fn end_of_token(&self, mut idx: usize) -> TextSize {209	pub fn end_of_token(&self, mut idx: usize) -> TextSize {
209		while self.lexemes[idx].kind.is_trivia() {210		while Trivia::can_cast(self.lexemes[idx].kind) {
210			idx -= 1;211			idx -= 1;
211		}212		}
212		self.lexemes[idx].range.end()213		self.lexemes[idx].range.end()
267		self.bump();268		self.bump();
268		Some(m.complete(self, SyntaxKind::ERROR))269		Some(m.complete(self, SyntaxKind::ERROR))
269	}270	}
270271	fn bump_assert(&mut self, kind: SyntaxKind) {
272		self.skip_trivia();
273		assert!(self.at(kind), "expected {:?}", kind);
274		self.bump_remap(self.current());
275	}
271	fn bump(&mut self) {276	fn bump(&mut self) {
272		self.skip_trivia();277		self.skip_trivia();
273		self.bump_remap(self.current());278		self.bump_remap(self.current());
314			while self319			while self
315				.lexemes320				.lexemes
316				.get(offset)321				.get(offset)
317				.map(|l| l.kind.is_trivia())322				.map(|l| Trivia::can_cast(l.kind))
318				.unwrap_or(false)323				.unwrap_or(false)
319			{324			{
320				offset += 1;325				offset += 1;
324		while self329		while self
325			.lexemes330			.lexemes
326			.get(offset)331			.get(offset)
327			.map(|l| l.kind.is_trivia())332			.map(|l| Trivia::can_cast(l.kind))
328			.unwrap_or(false)333			.unwrap_or(false)
329		{334		{
330			offset += 1;335			offset += 1;
335		self.nth(0)340		self.nth(0)
336	}341	}
337	fn skip_trivia(&mut self) {342	fn skip_trivia(&mut self) {
338		while self.peek_raw().is_trivia() {343		while Trivia::can_cast(self.peek_raw()) {
339			self.offset += 1;344			self.offset += 1;
340		}345		}
341	}346	}
342	fn current_lexeme(&mut self) -> Option<&Lexeme> {
343		self.skip_trivia();
344		self.lexemes.get(self.offset)
345	}
346	fn peek_raw(&mut self) -> SyntaxKind {347	fn peek_raw(&mut self) -> SyntaxKind {
347		self.lexemes348		self.lexemes
348			.get(self.offset)349			.get(self.offset)
516	} else if p.at(IDENT) {517	} else if p.at(IDENT) {
517		name(p);518		name(p);
518		m.complete(p, FIELD_NAME_FIXED);519		m.complete(p, FIELD_NAME_FIXED);
519	} else if p.current().is_string() {520	} else if Text::can_cast(p.current()) {
520		string(p);521		text(p);
521		m.complete(p, FIELD_NAME_FIXED);522		m.complete(p, FIELD_NAME_FIXED);
522	} else {523	} else {
523		p.error_with_recovery_set(TS![;]);524		p.error_with_recovery_set(TS![;]);
564	};565	};
565}566}
566fn assertion(p: &mut Parser) {567fn assertion(p: &mut Parser) {
567	assert!(p.at(T![assert]));
568	let m = p.start();568	let m = p.start();
569	p.bump();569	p.bump_assert(T![assert]);
570	expr(p).map(|c| c.wrap(p, LHS_EXPR));570	expr(p).map(|c| c.wrap(p, LHS_EXPR));
571	if p.at(T![:]) {571	if p.at(T![:]) {
572		p.bump();572		p.bump();
575	m.complete(p, ASSERTION);575	m.complete(p, ASSERTION);
576}576}
577fn object(p: &mut Parser) -> CompletedMarker {577fn object(p: &mut Parser) -> CompletedMarker {
578	assert!(p.at(T!['{']));
579	let m_t = p.start();578	let m_t = p.start();
580	let m = p.start();579	let m = p.start();
581	p.bump();580	p.bump_assert(T!['{']);
582581
583	loop {582	loop {
584		if p.at(T!['}']) {583		if p.at(T!['}']) {
619	m.complete(p, PARAM);618	m.complete(p, PARAM);
620}619}
621fn params_desc(p: &mut Parser) -> CompletedMarker {620fn params_desc(p: &mut Parser) -> CompletedMarker {
622	assert!(p.at(T!['(']));
623	let m = p.start();621	let m = p.start();
624	p.bump();622	p.bump_assert(T!['(']);
625623
626	loop {624	loop {
627		if p.at(T![')']) {625		if p.at(T![')']) {
640}638}
641fn args_desc(p: &mut Parser) {639fn args_desc(p: &mut Parser) {
642	let m = p.start();640	let m = p.start();
643	assert!(p.at(T!['(']));641	p.bump_assert(T!['(']);
644	p.bump();
645642
646	let started_named = Cell::new(false);643	let started_named = Cell::new(false);
647644
674}671}
675672
676fn array(p: &mut Parser) -> CompletedMarker {673fn array(p: &mut Parser) -> CompletedMarker {
677	assert!(p.at(T!['[']));
678	// Start the list node674	// Start the list node
679	let m = p.start();675	let m = p.start();
680	p.bump(); // '['676	p.bump_assert(T!['[']);
681677
682	// This vec will have at most one element in case of correct input678	// This vec will have at most one element in case of correct input
683	let mut compspecs = Vec::with_capacity(1);679	let mut compspecs = Vec::with_capacity(1);
795	m.complete(p, NAME);791	m.complete(p, NAME);
796}792}
797fn destruct_rest(p: &mut Parser) {793fn destruct_rest(p: &mut Parser) {
798	assert!(p.at(T![...]));
799	p.bump();
800	let m = p.start();794	let m = p.start();
795	p.bump_assert(T![...]);
801	if p.at(IDENT) {796	if p.at(IDENT) {
802		p.bump()797		p.bump()
803	}798	}
817	m.complete(p, DESTRUCT_OBJECT_FIELD);812	m.complete(p, DESTRUCT_OBJECT_FIELD);
818}813}
819fn obj_local(p: &mut Parser) {814fn obj_local(p: &mut Parser) {
820	assert!(p.at(T![local]));
821	let m = p.start();815	let m = p.start();
822	p.bump();816	p.bump_assert(T![local]);
823	bind(p);817	bind(p);
824	m.complete(p, OBJ_LOCAL);818	m.complete(p, OBJ_LOCAL);
825}819}
903		m.complete(p, BIND_DESTRUCT)897		m.complete(p, BIND_DESTRUCT)
904	};898	};
905}899}
906fn string(p: &mut Parser) {900fn text(p: &mut Parser) {
907	assert!(p.current().is_string());901	assert!(Text::can_cast(p.current()));
908	if p.at(STRING_BLOCK) {
909		// We use custom lexer, which skips enough bytes, but not returns error
910		// Instead we should call lexer again to verify if there is something wrong with string block
911		let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(
912			&p.current_lexeme().expect("parser is at string block").text
913		));
914		// In kinds, string blocks is parsed at least as `|||`902	p.bump();
915		lexer.bump(3);
916		let res = lex_str_block(&mut lexer);
917		debug_assert!(lexer.next().is_none(), "str_block is lexed");
918		match res {
919			Ok(_) => {
920				p.bump();
921			}
922			Err(e) => p.bump_remap(match e {
923				StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
924				StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
925				StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,
926				StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
927			}),
928		}
929	} else {
930		p.bump();
931	}
932}903}
933fn number(p: &mut Parser) {904fn number(p: &mut Parser) {
934	assert!(p.current().is_number());905	assert!(Number::can_cast(p.current()));
935	p.bump();906	p.bump();
936}907}
937fn literal(p: &mut Parser) {908fn literal(p: &mut Parser) {
938	assert!(p.current().is_literal());909	assert!(Literal::can_cast(p.current()));
939	p.bump();910	p.bump();
940}911}
941fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {912fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {
942	let _e = p.expected_syntax_name("value");913	let _e = p.expected_syntax_name("value");
943	Some(if p.current().is_literal() {914	Some(if Literal::can_cast(p.current()) {
944		let m = p.start();915		let m = p.start();
945		literal(p);916		literal(p);
946		m.complete(p, EXPR_LITERAL)917		m.complete(p, EXPR_LITERAL)
947	} else if p.current().is_string() {918	} else if Text::can_cast(p.current()) {
948		let m = p.start();919		let m = p.start();
949		string(p);920		text(p);
950		m.complete(p, EXPR_STRING)921		m.complete(p, EXPR_STRING)
951	} else if p.current().is_number() {922	} else if Number::can_cast(p.current()) {
952		let m = p.start();923		let m = p.start();
953		number(p);924		number(p);
954		m.complete(p, EXPR_NUMBER)925		m.complete(p, EXPR_NUMBER)
1025	} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {996	} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {
1026		let m = p.start();997		let m = p.start();
1027		p.bump();998		p.bump();
1028		string(p);999		text(p);
1029		m.complete(p, EXPR_IMPORT)1000		m.complete(p, EXPR_IMPORT)
1030	} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {1001	} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {
1031		let op = match p.current() {1002		let op = match p.current() {
1044		let m = p.start();1015		let m = p.start();
1045		p.bump();1016		p.bump();
1046		expr(p);1017		expr(p);
1047		assert!(p.at(T![')']));1018		p.expect(T![')']);
1048		p.bump();
1049		m.complete(p, EXPR_PARENED)1019		m.complete(p, EXPR_PARENED)
1050	} else {1020	} else {
1051		p.error_with_recovery_set(TS![]);1021		p.error_with_recovery_set(TS![]);

modifiedxtask/src/sourcegen/kinds.rsdiff before after both

--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
 pub enum TokenKind {
 	/// May exist in token tree, but never in source code
 	Meta { grammar_name: String, name: String },
-	/// Specific parsing errors may be emitted as this type of kind
+	/// Specific parsing/lexing errors may be emitted as this type of kind
 	Error {
 		grammar_name: String,
 		name: String,
+		/// Is this error returned by lexer directly, or from lex.rs
+		is_lexer_error: bool,
 		regex: Option<String>,
 		priority: Option<u32>,
 	},
@@ -133,13 +135,18 @@
 		});
 		$(define_kinds!($into = $($rest)*))?
 	}};
-	($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
-		$into.define_token(TokenKind::Error {
-			grammar_name: format!("ERROR_{}!", $name),
-			name: format!("ERROR_{}", $name),
-			regex: None$(.or(Some($regex.to_owned())))?,
-			priority: None$(.or(Some($priority)))?,
-		});
+	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+		{
+			let regex = None$(.or(Some($regex.to_owned())))?;
+			let priority = None$(.or(Some($priority)))?;
+			$into.define_token(TokenKind::Error {
+				grammar_name: format!("ERROR_{}!", $name),
+				name: format!("ERROR_{}", $name),
+				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+				regex,
+				priority,
+			});
+		}
 		$(define_kinds!($into = $($rest)*))?
 	}};
 	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
 		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
 		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
 		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
-		error("STRING_BLOCK_UNEXPECTED_END");
-		error("STRING_BLOCK_MISSING_NEW_LINE");
-		error("STRING_BLOCK_MISSING_TERMINATION");
-		error("STRING_BLOCK_MISSING_INDENT");
+		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+		error("STRING_BLOCK_MISSING_INDENT", lexer = true);
 		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
 		lit("WHITESPACE") => r"[ \t\n\r]+";
 		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";

modifiedxtask/src/sourcegen/mod.rsdiff before after both

--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -48,20 +48,28 @@
 			if let Some((special, name)) = classify_special(token) {
 				match special {
 					SpecialName::Literal => panic!("literal is not defined: {name}"),
-					SpecialName::Meta => kinds.define_token(TokenKind::Meta {
-						grammar_name: token.to_owned(),
-						name: format!("META_{}", name),
-					}),
-					SpecialName::Error => kinds.define_token(TokenKind::Error {
-						grammar_name: token.to_owned(),
-						name: format!("ERROR_{}", name),
-						regex: None,
-						priority: None,
-					}),
+					SpecialName::Meta => {
+						eprintln!("implicit meta: {}", name);
+						kinds.define_token(TokenKind::Meta {
+							grammar_name: token.to_owned(),
+							name: format!("META_{}", name),
+						})
+					}
+					SpecialName::Error => {
+						eprintln!("implicit error: {}", name);
+						kinds.define_token(TokenKind::Error {
+							grammar_name: token.to_owned(),
+							name: format!("ERROR_{}", name),
+							regex: None,
+							priority: None,
+							is_lexer_error: true,
+						})
+					}
 				};
 				continue;
 			};
 			let name = to_upper_snake_case(token);
+			eprintln!("implicit kw: {}", token);
 			kinds.define_token(TokenKind::Keyword {
 				code: token.to_owned(),
 				name: format!("{name}_KW"),

modifiedxtask/src/sourcegen/util.rsdiff before after both

--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
 		}
 	}
 
-	eprintln!(" {} was not up-to-date, updating\n", file.display());
-	if std::env::var("CI").is_ok() {
-		eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
-	}
+	eprintln!("{} was not up-to-date, updating", file.display());
 	if let Some(parent) = file.parent() {
 		let _ = fs::create_dir_all(parent);
 	}