From 5ad3c0601af8f372a9bbb40a30df5deb27fd3b1e Mon Sep 17 00:00:00 2001
From: Yaroslav Bolyukin <iam@lach.pw>
Date: Mon, 20 Jun 2022 13:43:51 +0000
Subject: [PATCH] refactor: use grammar to classify tokens


---
--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
 	nodes::{
 		ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
 		DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
-		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
 		UnaryOperator,
 	},
 	AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
 	}
 }
 
-impl Printable for String {
+impl Printable for Text {
 	fn print(&self) -> PrintItems {
 		p!(new: str(&format!("{}", self)))
 	}
@@ -168,7 +168,7 @@
 			FieldName::FieldNameFixed(f) => {
 				if let Some(id) = f.id() {
 					p!(new: {id})
-				} else if let Some(str) = f.string() {
+				} else if let Some(str) = f.text() {
 					p!(new: {str})
 				} else {
 					p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
 			Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
 			Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
 			Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
-			Expr::ExprString(s) => p!(new: {s.string()}),
+			Expr::ExprString(s) => p!(new: {s.text()}),
 			Expr::ExprNumber(n) => p!(new: {n.number()}),
 			Expr::ExprArray(a) => {
 				let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
 				pi
 			}
 			Expr::ExprImport(v) => {
-				p!(new: {v.import_kind()} str(" ") {v.string()})
+				p!(new: {v.import_kind()} str(" ") {v.text()})
 			}
 			Expr::ExprVar(n) => p!(new: {n.name()}),
 			Expr::ExprLocal(l) => {
--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
     name:Name
     ')'
 ExprString =
-    String
+    Text
 ExprNumber =
     Number
 ExprArray =
@@ -67,7 +67,7 @@
     ']'
 
 ExprImport =
-    ImportKind String
+    ImportKind Text
 
 ImportKind =
     'importstr'
@@ -217,7 +217,7 @@
 
 FieldNameFixed =
     id:Name
-|   String
+|   Text
 FieldNameDynamic =
     '['
     Expr
@@ -239,16 +239,27 @@
 |   '$'
 |   'super'
 
-String =
+Text =
     'LIT_STRING_DOUBLE!'
+|   'ERROR_STRING_DOUBLE_UNTERMINATED!'
 |   'LIT_STRING_SINGLE!'
+|   'ERROR_STRING_SINGLE_UNTERMINATED!'
 |   'LIT_STRING_DOUBLE_VERBATIM!'
+|   'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
 |   'LIT_STRING_SINGLE_VERBATIM!'
+|   'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+|   'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
 |   'LIT_STRING_BLOCK!'
+|   'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+|   'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+|   'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+|   'ERROR_STRING_BLOCK_MISSING_INDENT!'
 
 Number =
     'LIT_FLOAT!'
-|   'META_FORCE_ENUM!'
+|   'ERROR_FLOAT_JUNK_AFTER_POINT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
 
 ForSpec =
     'for'
@@ -347,3 +358,12 @@
 TrueExpr=Expr
 FalseExpr=Expr
 LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+    'LIT_WHITESPACE!'
+|   'LIT_MULTI_LINE_COMMENT!'
+|   'ERROR_COMMENT_TOO_SHORT!'
+|   'ERROR_COMMENT_UNTERMINATED!'
+|   'LIT_SINGLE_LINE_HASH_COMMENT!'
+|   'LIT_SINGLE_LINE_SLASH_COMMENT!'
--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
-	pub fn is_trivia(self) -> bool {
-		matches!(
-			self,
-			Self::WHITESPACE
-				| Self::MULTI_LINE_COMMENT
-				| Self::ERROR_COMMENT_TOO_SHORT
-				| Self::ERROR_COMMENT_UNTERMINATED
-				| Self::SINGLE_LINE_HASH_COMMENT
-				| Self::SINGLE_LINE_SLASH_COMMENT
-		)
-	}
-	pub fn is_string(self) -> bool {
-		matches!(
-			self,
-			Self::STRING_SINGLE
-				| Self::ERROR_STRING_SINGLE_UNTERMINATED
-				| Self::STRING_DOUBLE
-				| Self::ERROR_STRING_DOUBLE_UNTERMINATED
-				| Self::STRING_SINGLE_VERBATIM
-				| Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
-				| Self::STRING_DOUBLE_VERBATIM
-				| Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
-				| Self::STRING_BLOCK
-				| Self::ERROR_STRING_BLOCK_UNEXPECTED_END
-				| Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
-				| Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
-				| Self::ERROR_STRING_BLOCK_MISSING_INDENT
-		)
-	}
-	pub fn is_number(self) -> bool {
-		matches!(
-			self,
-			Self::FLOAT
-				| Self::ERROR_FLOAT_JUNK_AFTER_POINT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
-		)
-	}
-	pub fn is_literal(self) -> bool {
-		matches!(
-			self,
-			Self::NULL_KW
-				| Self::TRUE_KW | Self::FALSE_KW
-				| Self::SELF_KW | Self::DOLLAR
-				| Self::SUPER_KW
-		)
-	}
-}
--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
 
 use crate::{
 	lex::Lexeme,
+	nodes::Trivia,
 	parser::{Parse, SyntaxError},
-	JsonnetLanguage, SyntaxKind,
+	AstToken, JsonnetLanguage, SyntaxKind,
 };
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
 	}
 	fn skip_whitespace(&mut self) {
 		while let Some(lexeme) = self.lexemes.get(self.offset) {
-			if !lexeme.kind.is_trivia() {
+			if !Trivia::can_cast(lexeme.kind) {
 				break;
 			}
 
--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
 	pub(crate) syntax: SyntaxNode,
 }
 impl ExprString {
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -332,7 +332,7 @@
 	pub fn import_kind(&self) -> Option<ImportKind> {
 		support::token_child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -692,7 +692,7 @@
 	pub fn id(&self) -> Option<Name> {
 		support::child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -1038,18 +1038,27 @@
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
 	syntax: SyntaxToken,
-	kind: StringKind,
+	kind: TextKind,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
 	StringDouble,
+	ErrorStringDoubleUnterminated,
 	StringSingle,
+	ErrorStringSingleUnterminated,
 	StringDoubleVerbatim,
+	ErrorStringDoubleVerbatimUnterminated,
 	StringSingleVerbatim,
+	ErrorStringSingleVerbatimUnterminated,
+	ErrorStringVerbatimMissingQuotes,
 	StringBlock,
+	ErrorStringBlockUnexpectedEnd,
+	ErrorStringBlockMissingNewLine,
+	ErrorStringBlockMissingTermination,
+	ErrorStringBlockMissingIndent,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum NumberKind {
 	Float,
-	MetaForceEnum,
+	ErrorFloatJunkAfterPoint,
+	ErrorFloatJunkAfterExponent,
+	ErrorFloatJunkAfterExponentSign,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
 	Coloncolon,
 	Colon,
 }
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+	syntax: SyntaxToken,
+	kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+	Whitespace,
+	MultiLineComment,
+	ErrorCommentTooShort,
+	ErrorCommentUnterminated,
+	SingleLineHashComment,
+	SingleLineSlashComment,
+}
 impl AstNode for SourceFile {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
-impl AstToken for String {
+impl AstToken for Text {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
 			STRING_DOUBLE
+			| ERROR_STRING_DOUBLE_UNTERMINATED
 			| STRING_SINGLE
+			| ERROR_STRING_SINGLE_UNTERMINATED
 			| STRING_DOUBLE_VERBATIM
+			| ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
 			| STRING_SINGLE_VERBATIM
-			| STRING_BLOCK => true,
+			| ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+			| ERROR_STRING_VERBATIM_MISSING_QUOTES
+			| STRING_BLOCK
+			| ERROR_STRING_BLOCK_UNEXPECTED_END
+			| ERROR_STRING_BLOCK_MISSING_NEW_LINE
+			| ERROR_STRING_BLOCK_MISSING_TERMINATION
+			| ERROR_STRING_BLOCK_MISSING_INDENT => true,
 			_ => false,
 		}
 	}
 	fn cast(syntax: SyntaxToken) -> Option<Self> {
 		let res = match syntax.kind() {
-			STRING_DOUBLE => String {
+			STRING_DOUBLE => Text {
 				syntax,
-				kind: StringKind::StringDouble,
+				kind: TextKind::StringDouble,
 			},
-			STRING_SINGLE => String {
+			ERROR_STRING_DOUBLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingle,
+				kind: TextKind::ErrorStringDoubleUnterminated,
 			},
-			STRING_DOUBLE_VERBATIM => String {
+			STRING_SINGLE => Text {
 				syntax,
-				kind: StringKind::StringDoubleVerbatim,
+				kind: TextKind::StringSingle,
 			},
-			STRING_SINGLE_VERBATIM => String {
+			ERROR_STRING_SINGLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingleVerbatim,
+				kind: TextKind::ErrorStringSingleUnterminated,
 			},
-			STRING_BLOCK => String {
+			STRING_DOUBLE_VERBATIM => Text {
 				syntax,
-				kind: StringKind::StringBlock,
+				kind: TextKind::StringDoubleVerbatim,
+			},
+			ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+			},
+			STRING_SINGLE_VERBATIM => Text {
+				syntax,
+				kind: TextKind::StringSingleVerbatim,
 			},
+			ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+			},
+			ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+				syntax,
+				kind: TextKind::ErrorStringVerbatimMissingQuotes,
+			},
+			STRING_BLOCK => Text {
+				syntax,
+				kind: TextKind::StringBlock,
+			},
+			ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockUnexpectedEnd,
+			},
+			ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingNewLine,
+			},
+			ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingTermination,
+			},
+			ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingIndent,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2718,12 +2790,12 @@
 		&self.syntax
 	}
 }
-impl String {
-	pub fn kind(&self) -> StringKind {
+impl Text {
+	pub fn kind(&self) -> TextKind {
 		self.kind
 	}
 }
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
@@ -2731,7 +2803,10 @@
 impl AstToken for Number {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
-			FLOAT | META_FORCE_ENUM => true,
+			FLOAT
+			| ERROR_FLOAT_JUNK_AFTER_POINT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
 			_ => false,
 		}
 	}
@@ -2741,10 +2816,18 @@
 				syntax,
 				kind: NumberKind::Float,
 			},
-			META_FORCE_ENUM => Number {
+			ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterPoint,
+			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
 				syntax,
-				kind: NumberKind::MetaForceEnum,
+				kind: NumberKind::ErrorFloatJunkAfterExponent,
 			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2841,6 +2924,62 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
+impl AstToken for Trivia {
+	fn can_cast(kind: SyntaxKind) -> bool {
+		match kind {
+			WHITESPACE
+			| MULTI_LINE_COMMENT
+			| ERROR_COMMENT_TOO_SHORT
+			| ERROR_COMMENT_UNTERMINATED
+			| SINGLE_LINE_HASH_COMMENT
+			| SINGLE_LINE_SLASH_COMMENT => true,
+			_ => false,
+		}
+	}
+	fn cast(syntax: SyntaxToken) -> Option<Self> {
+		let res = match syntax.kind() {
+			WHITESPACE => Trivia {
+				syntax,
+				kind: TriviaKind::Whitespace,
+			},
+			MULTI_LINE_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::MultiLineComment,
+			},
+			ERROR_COMMENT_TOO_SHORT => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentTooShort,
+			},
+			ERROR_COMMENT_UNTERMINATED => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentUnterminated,
+			},
+			SINGLE_LINE_HASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineHashComment,
+			},
+			SINGLE_LINE_SLASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineSlashComment,
+			},
+			_ => return None,
+		};
+		Some(res)
+	}
+	fn syntax(&self) -> &SyntaxToken {
+		&self.syntax
+	}
+}
+impl Trivia {
+	pub fn kind(&self) -> TriviaKind {
+		self.kind
+	}
+}
+impl std::fmt::Display for Trivia {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		std::fmt::Display::fmt(self.syntax(), f)
+	}
+}
 impl std::fmt::Display for Expr {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)
--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
 	SELF_KW,
 	#[token("super")]
 	SUPER_KW,
-	META_FORCE_ENUM,
 	#[token("for")]
 	FOR_KW,
 	#[token("assert")]
@@ -253,10 +252,11 @@
 	BINARY_OPERATOR,
 	UNARY_OPERATOR,
 	LITERAL,
-	STRING,
+	TEXT,
 	NUMBER,
 	IMPORT_KIND,
 	VISIBILITY,
+	TRIVIA,
 	#[doc(hidden)]
 	__LAST,
 }
@@ -277,8 +277,8 @@
 	pub fn is_enum(self) -> bool {
 		match self {
 			EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
-			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
-			| NUMBER | IMPORT_KIND | VISIBILITY => true,
+			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+			| IMPORT_KIND | VISIBILITY | TRIVIA => true,
 			_ => false,
 		}
 	}
--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
 use logos::Logos;
 use rowan::{TextRange, TextSize};
 
-use crate::SyntaxKind;
+use crate::{
+	string_block::{lex_str_block, StringBlockError},
+	SyntaxKind,
+};
 
 pub struct Lexer<'a> {
 	inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
 	type Item = Lexeme<'a>;
 
 	fn next(&mut self) -> Option<Self::Item> {
-		let kind = self.inner.next()?;
+		use SyntaxKind::*;
+
+		let mut kind = self.inner.next()?;
 		let text = self.inner.slice();
 
+		if kind == STRING_BLOCK {
+			// We use custom lexer, which skips enough bytes, but not returns error
+			// Instead we should call lexer again to verify if there is something wrong with string block
+			let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+			// In kinds, string blocks is parsed at least as `|||`
+			lexer.bump(3);
+			let res = lex_str_block(&mut lexer);
+			debug_assert!(lexer.next().is_none(), "str_block is lexed");
+			match res {
+				Ok(_) => {}
+				Err(e) => {
+					kind = match e {
+						StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+						StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+						StringBlockError::MissingTermination => {
+							ERROR_STRING_BLOCK_MISSING_TERMINATION
+						}
+						StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+					}
+				}
+			}
+		}
+
 		Some(Self::Item {
 			kind,
 			text,
--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
 
 mod ast;
 mod binary;
-mod classify;
 mod event;
 mod generated;
 mod language;
--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
 			!kind.is_enum(),
 			"{kind:?} is a enum kind, you should use variant kinds instead"
 		);
-		// TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+		// TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
 		// debug_assert!(
-		// 	!kind.is_parser(),
-		// 	"{kind:?} should be only emitted by parser, not used directly"
+		// 	!kind.is_lexer(),
+		// 	"{kind:?} should be only emitted by lexer, not used directly"
 		// );
 		let event_at_pos = &mut p.events[self.start_event_idx];
 		assert_eq!(*event_at_pos, Event::Pending);
--- a/crates/jrsonnet-rowan-parser/src/parser.rs
+++ b/crates/jrsonnet-rowan-parser/src/parser.rs
@@ -8,10 +8,10 @@
 	event::Event,
 	lex::Lexeme,
 	marker::{AsRange, CompletedMarker, Marker, Ranger},
-	string_block::{lex_str_block, StringBlockError},
+	nodes::{Literal, Number, Text, Trivia},
 	token_set::SyntaxKindSet,
 	unary::UnaryOperator,
-	SyntaxKind,
+	AstToken, SyntaxKind,
 	SyntaxKind::*,
 	SyntaxNode, T, TS,
 };
@@ -36,6 +36,7 @@
 }
 
 pub struct Parser<'i> {
+	// TODO: remove all trivia before feeding to parser?
 	lexemes: &'i [Lexeme<'i>],
 	pub offset: usize,
 	pub events: Vec<Event>,
@@ -191,7 +192,7 @@
 		while self
 			.lexemes
 			.get(previous_token_idx)
-			.map_or(false, |l| l.kind.is_trivia())
+			.map_or(false, |l| Trivia::can_cast(l.kind))
 			&& previous_token_idx != 0
 		{
 			previous_token_idx -= 1;
@@ -200,13 +201,13 @@
 		Some(self.lexemes[previous_token_idx])
 	}
 	pub fn start_of_token(&self, mut idx: usize) -> TextSize {
-		while self.lexemes[idx].kind.is_trivia() {
+		while Trivia::can_cast(self.lexemes[idx].kind) {
 			idx += 1;
 		}
 		self.lexemes[idx].range.start()
 	}
 	pub fn end_of_token(&self, mut idx: usize) -> TextSize {
-		while self.lexemes[idx].kind.is_trivia() {
+		while Trivia::can_cast(self.lexemes[idx].kind) {
 			idx -= 1;
 		}
 		self.lexemes[idx].range.end()
@@ -267,7 +268,11 @@
 		self.bump();
 		Some(m.complete(self, SyntaxKind::ERROR))
 	}
-
+	fn bump_assert(&mut self, kind: SyntaxKind) {
+		self.skip_trivia();
+		assert!(self.at(kind), "expected {:?}", kind);
+		self.bump_remap(self.current());
+	}
 	fn bump(&mut self) {
 		self.skip_trivia();
 		self.bump_remap(self.current());
@@ -314,7 +319,7 @@
 			while self
 				.lexemes
 				.get(offset)
-				.map(|l| l.kind.is_trivia())
+				.map(|l| Trivia::can_cast(l.kind))
 				.unwrap_or(false)
 			{
 				offset += 1;
@@ -324,7 +329,7 @@
 		while self
 			.lexemes
 			.get(offset)
-			.map(|l| l.kind.is_trivia())
+			.map(|l| Trivia::can_cast(l.kind))
 			.unwrap_or(false)
 		{
 			offset += 1;
@@ -335,14 +340,10 @@
 		self.nth(0)
 	}
 	fn skip_trivia(&mut self) {
-		while self.peek_raw().is_trivia() {
+		while Trivia::can_cast(self.peek_raw()) {
 			self.offset += 1;
 		}
 	}
-	fn current_lexeme(&mut self) -> Option<&Lexeme> {
-		self.skip_trivia();
-		self.lexemes.get(self.offset)
-	}
 	fn peek_raw(&mut self) -> SyntaxKind {
 		self.lexemes
 			.get(self.offset)
@@ -516,8 +517,8 @@
 	} else if p.at(IDENT) {
 		name(p);
 		m.complete(p, FIELD_NAME_FIXED);
-	} else if p.current().is_string() {
-		string(p);
+	} else if Text::can_cast(p.current()) {
+		text(p);
 		m.complete(p, FIELD_NAME_FIXED);
 	} else {
 		p.error_with_recovery_set(TS![;]);
@@ -564,9 +565,8 @@
 	};
 }
 fn assertion(p: &mut Parser) {
-	assert!(p.at(T![assert]));
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T![assert]);
 	expr(p).map(|c| c.wrap(p, LHS_EXPR));
 	if p.at(T![:]) {
 		p.bump();
@@ -575,10 +575,9 @@
 	m.complete(p, ASSERTION);
 }
 fn object(p: &mut Parser) -> CompletedMarker {
-	assert!(p.at(T!['{']));
 	let m_t = p.start();
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T!['{']);
 
 	loop {
 		if p.at(T!['}']) {
@@ -619,9 +618,8 @@
 	m.complete(p, PARAM);
 }
 fn params_desc(p: &mut Parser) -> CompletedMarker {
-	assert!(p.at(T!['(']));
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T!['(']);
 
 	loop {
 		if p.at(T![')']) {
@@ -640,8 +638,7 @@
 }
 fn args_desc(p: &mut Parser) {
 	let m = p.start();
-	assert!(p.at(T!['(']));
-	p.bump();
+	p.bump_assert(T!['(']);
 
 	let started_named = Cell::new(false);
 
@@ -674,10 +671,9 @@
 }
 
 fn array(p: &mut Parser) -> CompletedMarker {
-	assert!(p.at(T!['[']));
 	// Start the list node
 	let m = p.start();
-	p.bump(); // '['
+	p.bump_assert(T!['[']);
 
 	// This vec will have at most one element in case of correct input
 	let mut compspecs = Vec::with_capacity(1);
@@ -795,9 +791,8 @@
 	m.complete(p, NAME);
 }
 fn destruct_rest(p: &mut Parser) {
-	assert!(p.at(T![...]));
-	p.bump();
 	let m = p.start();
+	p.bump_assert(T![...]);
 	if p.at(IDENT) {
 		p.bump()
 	}
@@ -817,9 +812,8 @@
 	m.complete(p, DESTRUCT_OBJECT_FIELD);
 }
 fn obj_local(p: &mut Parser) {
-	assert!(p.at(T![local]));
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T![local]);
 	bind(p);
 	m.complete(p, OBJ_LOCAL);
 }
@@ -903,52 +897,29 @@
 		m.complete(p, BIND_DESTRUCT)
 	};
 }
-fn string(p: &mut Parser) {
-	assert!(p.current().is_string());
-	if p.at(STRING_BLOCK) {
-		// We use custom lexer, which skips enough bytes, but not returns error
-		// Instead we should call lexer again to verify if there is something wrong with string block
-		let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(
-			&p.current_lexeme().expect("parser is at string block").text
-		));
-		// In kinds, string blocks is parsed at least as `|||`
-		lexer.bump(3);
-		let res = lex_str_block(&mut lexer);
-		debug_assert!(lexer.next().is_none(), "str_block is lexed");
-		match res {
-			Ok(_) => {
-				p.bump();
-			}
-			Err(e) => p.bump_remap(match e {
-				StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
-				StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
-				StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,
-				StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
-			}),
-		}
-	} else {
-		p.bump();
-	}
+fn text(p: &mut Parser) {
+	assert!(Text::can_cast(p.current()));
+	p.bump();
 }
 fn number(p: &mut Parser) {
-	assert!(p.current().is_number());
+	assert!(Number::can_cast(p.current()));
 	p.bump();
 }
 fn literal(p: &mut Parser) {
-	assert!(p.current().is_literal());
+	assert!(Literal::can_cast(p.current()));
 	p.bump();
 }
 fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {
 	let _e = p.expected_syntax_name("value");
-	Some(if p.current().is_literal() {
+	Some(if Literal::can_cast(p.current()) {
 		let m = p.start();
 		literal(p);
 		m.complete(p, EXPR_LITERAL)
-	} else if p.current().is_string() {
+	} else if Text::can_cast(p.current()) {
 		let m = p.start();
-		string(p);
+		text(p);
 		m.complete(p, EXPR_STRING)
-	} else if p.current().is_number() {
+	} else if Number::can_cast(p.current()) {
 		let m = p.start();
 		number(p);
 		m.complete(p, EXPR_NUMBER)
@@ -1025,7 +996,7 @@
 	} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {
 		let m = p.start();
 		p.bump();
-		string(p);
+		text(p);
 		m.complete(p, EXPR_IMPORT)
 	} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {
 		let op = match p.current() {
@@ -1044,8 +1015,7 @@
 		let m = p.start();
 		p.bump();
 		expr(p);
-		assert!(p.at(T![')']));
-		p.bump();
+		p.expect(T![')']);
 		m.complete(p, EXPR_PARENED)
 	} else {
 		p.error_with_recovery_set(TS![]);
--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
 pub enum TokenKind {
 	/// May exist in token tree, but never in source code
 	Meta { grammar_name: String, name: String },
-	/// Specific parsing errors may be emitted as this type of kind
+	/// Specific parsing/lexing errors may be emitted as this type of kind
 	Error {
 		grammar_name: String,
 		name: String,
+		/// Is this error returned by lexer directly, or from lex.rs
+		is_lexer_error: bool,
 		regex: Option<String>,
 		priority: Option<u32>,
 	},
@@ -133,13 +135,18 @@
 		});
 		$(define_kinds!($into = $($rest)*))?
 	}};
-	($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
-		$into.define_token(TokenKind::Error {
-			grammar_name: format!("ERROR_{}!", $name),
-			name: format!("ERROR_{}", $name),
-			regex: None$(.or(Some($regex.to_owned())))?,
-			priority: None$(.or(Some($priority)))?,
-		});
+	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+		{
+			let regex = None$(.or(Some($regex.to_owned())))?;
+			let priority = None$(.or(Some($priority)))?;
+			$into.define_token(TokenKind::Error {
+				grammar_name: format!("ERROR_{}!", $name),
+				name: format!("ERROR_{}", $name),
+				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+				regex,
+				priority,
+			});
+		}
 		$(define_kinds!($into = $($rest)*))?
 	}};
 	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
 		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
 		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
 		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
-		error("STRING_BLOCK_UNEXPECTED_END");
-		error("STRING_BLOCK_MISSING_NEW_LINE");
-		error("STRING_BLOCK_MISSING_TERMINATION");
-		error("STRING_BLOCK_MISSING_INDENT");
+		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+		error("STRING_BLOCK_MISSING_INDENT", lexer = true);
 		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
 		lit("WHITESPACE") => r"[ \t\n\r]+";
 		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";
--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -48,20 +48,28 @@
 			if let Some((special, name)) = classify_special(token) {
 				match special {
 					SpecialName::Literal => panic!("literal is not defined: {name}"),
-					SpecialName::Meta => kinds.define_token(TokenKind::Meta {
-						grammar_name: token.to_owned(),
-						name: format!("META_{}", name),
-					}),
-					SpecialName::Error => kinds.define_token(TokenKind::Error {
-						grammar_name: token.to_owned(),
-						name: format!("ERROR_{}", name),
-						regex: None,
-						priority: None,
-					}),
+					SpecialName::Meta => {
+						eprintln!("implicit meta: {}", name);
+						kinds.define_token(TokenKind::Meta {
+							grammar_name: token.to_owned(),
+							name: format!("META_{}", name),
+						})
+					}
+					SpecialName::Error => {
+						eprintln!("implicit error: {}", name);
+						kinds.define_token(TokenKind::Error {
+							grammar_name: token.to_owned(),
+							name: format!("ERROR_{}", name),
+							regex: None,
+							priority: None,
+							is_lexer_error: true,
+						})
+					}
 				};
 				continue;
 			};
 			let name = to_upper_snake_case(token);
+			eprintln!("implicit kw: {}", token);
 			kinds.define_token(TokenKind::Keyword {
 				code: token.to_owned(),
 				name: format!("{name}_KW"),
--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
 		}
 	}
 
-	eprintln!(" {} was not up-to-date, updating\n", file.display());
-	if std::env::var("CI").is_ok() {
-		eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
-	}
+	eprintln!("{} was not up-to-date, updating", file.display());
 	if let Some(parent) = file.parent() {
 		let _ = fs::create_dir_all(parent);
 	}

-- 
gitstuff