git.delta.rocks / jrsonnet / refs/commits / 5ad3c0601af8

difftreelog

refactor use grammar to classify tokens

Yaroslav Bolyukin2022-06-20parent: #dfc47a6.patch.diff
in: master

13 files changed

modifiedcmds/jrsonnet-fmt/src/main.rsdiffbeforeafterboth
--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
 	nodes::{
 		ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
 		DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
-		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
 		UnaryOperator,
 	},
 	AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
 	}
 }
 
-impl Printable for String {
+impl Printable for Text {
 	fn print(&self) -> PrintItems {
 		p!(new: str(&format!("{}", self)))
 	}
@@ -168,7 +168,7 @@
 			FieldName::FieldNameFixed(f) => {
 				if let Some(id) = f.id() {
 					p!(new: {id})
-				} else if let Some(str) = f.string() {
+				} else if let Some(str) = f.text() {
 					p!(new: {str})
 				} else {
 					p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
 			Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
 			Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
 			Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
-			Expr::ExprString(s) => p!(new: {s.string()}),
+			Expr::ExprString(s) => p!(new: {s.text()}),
 			Expr::ExprNumber(n) => p!(new: {n.number()}),
 			Expr::ExprArray(a) => {
 				let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
 				pi
 			}
 			Expr::ExprImport(v) => {
-				p!(new: {v.import_kind()} str(" ") {v.string()})
+				p!(new: {v.import_kind()} str(" ") {v.text()})
 			}
 			Expr::ExprVar(n) => p!(new: {n.name()}),
 			Expr::ExprLocal(l) => {
modifiedcrates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
     name:Name
     ')'
 ExprString =
-    String
+    Text
 ExprNumber =
     Number
 ExprArray =
@@ -67,7 +67,7 @@
     ']'
 
 ExprImport =
-    ImportKind String
+    ImportKind Text
 
 ImportKind =
     'importstr'
@@ -217,7 +217,7 @@
 
 FieldNameFixed =
     id:Name
-|   String
+|   Text
 FieldNameDynamic =
     '['
     Expr
@@ -239,16 +239,27 @@
 |   '$'
 |   'super'
 
-String =
+Text =
     'LIT_STRING_DOUBLE!'
+|   'ERROR_STRING_DOUBLE_UNTERMINATED!'
 |   'LIT_STRING_SINGLE!'
+|   'ERROR_STRING_SINGLE_UNTERMINATED!'
 |   'LIT_STRING_DOUBLE_VERBATIM!'
+|   'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
 |   'LIT_STRING_SINGLE_VERBATIM!'
+|   'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+|   'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
 |   'LIT_STRING_BLOCK!'
+|   'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+|   'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+|   'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+|   'ERROR_STRING_BLOCK_MISSING_INDENT!'
 
 Number =
     'LIT_FLOAT!'
-|   'META_FORCE_ENUM!'
+|   'ERROR_FLOAT_JUNK_AFTER_POINT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
 
 ForSpec =
     'for'
@@ -347,3 +358,12 @@
 TrueExpr=Expr
 FalseExpr=Expr
 LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+    'LIT_WHITESPACE!'
+|   'LIT_MULTI_LINE_COMMENT!'
+|   'ERROR_COMMENT_TOO_SHORT!'
+|   'ERROR_COMMENT_UNTERMINATED!'
+|   'LIT_SINGLE_LINE_HASH_COMMENT!'
+|   'LIT_SINGLE_LINE_SLASH_COMMENT!'
deletedcrates/jrsonnet-rowan-parser/src/classify.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
-	pub fn is_trivia(self) -> bool {
-		matches!(
-			self,
-			Self::WHITESPACE
-				| Self::MULTI_LINE_COMMENT
-				| Self::ERROR_COMMENT_TOO_SHORT
-				| Self::ERROR_COMMENT_UNTERMINATED
-				| Self::SINGLE_LINE_HASH_COMMENT
-				| Self::SINGLE_LINE_SLASH_COMMENT
-		)
-	}
-	pub fn is_string(self) -> bool {
-		matches!(
-			self,
-			Self::STRING_SINGLE
-				| Self::ERROR_STRING_SINGLE_UNTERMINATED
-				| Self::STRING_DOUBLE
-				| Self::ERROR_STRING_DOUBLE_UNTERMINATED
-				| Self::STRING_SINGLE_VERBATIM
-				| Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
-				| Self::STRING_DOUBLE_VERBATIM
-				| Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
-				| Self::STRING_BLOCK
-				| Self::ERROR_STRING_BLOCK_UNEXPECTED_END
-				| Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
-				| Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
-				| Self::ERROR_STRING_BLOCK_MISSING_INDENT
-		)
-	}
-	pub fn is_number(self) -> bool {
-		matches!(
-			self,
-			Self::FLOAT
-				| Self::ERROR_FLOAT_JUNK_AFTER_POINT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
-		)
-	}
-	pub fn is_literal(self) -> bool {
-		matches!(
-			self,
-			Self::NULL_KW
-				| Self::TRUE_KW | Self::FALSE_KW
-				| Self::SELF_KW | Self::DOLLAR
-				| Self::SUPER_KW
-		)
-	}
-}
modifiedcrates/jrsonnet-rowan-parser/src/event.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
 
 use crate::{
 	lex::Lexeme,
+	nodes::Trivia,
 	parser::{Parse, SyntaxError},
-	JsonnetLanguage, SyntaxKind,
+	AstToken, JsonnetLanguage, SyntaxKind,
 };
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
 	}
 	fn skip_whitespace(&mut self) {
 		while let Some(lexeme) = self.lexemes.get(self.offset) {
-			if !lexeme.kind.is_trivia() {
+			if !Trivia::can_cast(lexeme.kind) {
 				break;
 			}
 
modifiedcrates/jrsonnet-rowan-parser/src/generated/nodes.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
 	pub(crate) syntax: SyntaxNode,
 }
 impl ExprString {
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -332,7 +332,7 @@
 	pub fn import_kind(&self) -> Option<ImportKind> {
 		support::token_child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -692,7 +692,7 @@
 	pub fn id(&self) -> Option<Name> {
 		support::child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -1038,18 +1038,27 @@
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
 	syntax: SyntaxToken,
-	kind: StringKind,
+	kind: TextKind,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
 	StringDouble,
+	ErrorStringDoubleUnterminated,
 	StringSingle,
+	ErrorStringSingleUnterminated,
 	StringDoubleVerbatim,
+	ErrorStringDoubleVerbatimUnterminated,
 	StringSingleVerbatim,
+	ErrorStringSingleVerbatimUnterminated,
+	ErrorStringVerbatimMissingQuotes,
 	StringBlock,
+	ErrorStringBlockUnexpectedEnd,
+	ErrorStringBlockMissingNewLine,
+	ErrorStringBlockMissingTermination,
+	ErrorStringBlockMissingIndent,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum NumberKind {
 	Float,
-	MetaForceEnum,
+	ErrorFloatJunkAfterPoint,
+	ErrorFloatJunkAfterExponent,
+	ErrorFloatJunkAfterExponentSign,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
 	Coloncolon,
 	Colon,
 }
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+	syntax: SyntaxToken,
+	kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+	Whitespace,
+	MultiLineComment,
+	ErrorCommentTooShort,
+	ErrorCommentUnterminated,
+	SingleLineHashComment,
+	SingleLineSlashComment,
+}
 impl AstNode for SourceFile {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
-impl AstToken for String {
+impl AstToken for Text {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
 			STRING_DOUBLE
+			| ERROR_STRING_DOUBLE_UNTERMINATED
 			| STRING_SINGLE
+			| ERROR_STRING_SINGLE_UNTERMINATED
 			| STRING_DOUBLE_VERBATIM
+			| ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
 			| STRING_SINGLE_VERBATIM
-			| STRING_BLOCK => true,
+			| ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+			| ERROR_STRING_VERBATIM_MISSING_QUOTES
+			| STRING_BLOCK
+			| ERROR_STRING_BLOCK_UNEXPECTED_END
+			| ERROR_STRING_BLOCK_MISSING_NEW_LINE
+			| ERROR_STRING_BLOCK_MISSING_TERMINATION
+			| ERROR_STRING_BLOCK_MISSING_INDENT => true,
 			_ => false,
 		}
 	}
 	fn cast(syntax: SyntaxToken) -> Option<Self> {
 		let res = match syntax.kind() {
-			STRING_DOUBLE => String {
+			STRING_DOUBLE => Text {
 				syntax,
-				kind: StringKind::StringDouble,
+				kind: TextKind::StringDouble,
 			},
-			STRING_SINGLE => String {
+			ERROR_STRING_DOUBLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingle,
+				kind: TextKind::ErrorStringDoubleUnterminated,
 			},
-			STRING_DOUBLE_VERBATIM => String {
+			STRING_SINGLE => Text {
 				syntax,
-				kind: StringKind::StringDoubleVerbatim,
+				kind: TextKind::StringSingle,
 			},
-			STRING_SINGLE_VERBATIM => String {
+			ERROR_STRING_SINGLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingleVerbatim,
+				kind: TextKind::ErrorStringSingleUnterminated,
 			},
-			STRING_BLOCK => String {
+			STRING_DOUBLE_VERBATIM => Text {
 				syntax,
-				kind: StringKind::StringBlock,
+				kind: TextKind::StringDoubleVerbatim,
+			},
+			ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+			},
+			STRING_SINGLE_VERBATIM => Text {
+				syntax,
+				kind: TextKind::StringSingleVerbatim,
 			},
+			ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+			},
+			ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+				syntax,
+				kind: TextKind::ErrorStringVerbatimMissingQuotes,
+			},
+			STRING_BLOCK => Text {
+				syntax,
+				kind: TextKind::StringBlock,
+			},
+			ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockUnexpectedEnd,
+			},
+			ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingNewLine,
+			},
+			ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingTermination,
+			},
+			ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingIndent,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2718,12 +2790,12 @@
 		&self.syntax
 	}
 }
-impl String {
-	pub fn kind(&self) -> StringKind {
+impl Text {
+	pub fn kind(&self) -> TextKind {
 		self.kind
 	}
 }
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
@@ -2731,7 +2803,10 @@
 impl AstToken for Number {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
-			FLOAT | META_FORCE_ENUM => true,
+			FLOAT
+			| ERROR_FLOAT_JUNK_AFTER_POINT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
 			_ => false,
 		}
 	}
@@ -2741,10 +2816,18 @@
 				syntax,
 				kind: NumberKind::Float,
 			},
-			META_FORCE_ENUM => Number {
+			ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterPoint,
+			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
 				syntax,
-				kind: NumberKind::MetaForceEnum,
+				kind: NumberKind::ErrorFloatJunkAfterExponent,
 			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2841,6 +2924,62 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
+impl AstToken for Trivia {
+	fn can_cast(kind: SyntaxKind) -> bool {
+		match kind {
+			WHITESPACE
+			| MULTI_LINE_COMMENT
+			| ERROR_COMMENT_TOO_SHORT
+			| ERROR_COMMENT_UNTERMINATED
+			| SINGLE_LINE_HASH_COMMENT
+			| SINGLE_LINE_SLASH_COMMENT => true,
+			_ => false,
+		}
+	}
+	fn cast(syntax: SyntaxToken) -> Option<Self> {
+		let res = match syntax.kind() {
+			WHITESPACE => Trivia {
+				syntax,
+				kind: TriviaKind::Whitespace,
+			},
+			MULTI_LINE_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::MultiLineComment,
+			},
+			ERROR_COMMENT_TOO_SHORT => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentTooShort,
+			},
+			ERROR_COMMENT_UNTERMINATED => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentUnterminated,
+			},
+			SINGLE_LINE_HASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineHashComment,
+			},
+			SINGLE_LINE_SLASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineSlashComment,
+			},
+			_ => return None,
+		};
+		Some(res)
+	}
+	fn syntax(&self) -> &SyntaxToken {
+		&self.syntax
+	}
+}
+impl Trivia {
+	pub fn kind(&self) -> TriviaKind {
+		self.kind
+	}
+}
+impl std::fmt::Display for Trivia {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		std::fmt::Display::fmt(self.syntax(), f)
+	}
+}
 impl std::fmt::Display for Expr {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)
modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
 	SELF_KW,
 	#[token("super")]
 	SUPER_KW,
-	META_FORCE_ENUM,
 	#[token("for")]
 	FOR_KW,
 	#[token("assert")]
@@ -253,10 +252,11 @@
 	BINARY_OPERATOR,
 	UNARY_OPERATOR,
 	LITERAL,
-	STRING,
+	TEXT,
 	NUMBER,
 	IMPORT_KIND,
 	VISIBILITY,
+	TRIVIA,
 	#[doc(hidden)]
 	__LAST,
 }
@@ -277,8 +277,8 @@
 	pub fn is_enum(self) -> bool {
 		match self {
 			EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
-			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
-			| NUMBER | IMPORT_KIND | VISIBILITY => true,
+			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+			| IMPORT_KIND | VISIBILITY | TRIVIA => true,
 			_ => false,
 		}
 	}
modifiedcrates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
 use logos::Logos;
 use rowan::{TextRange, TextSize};
 
-use crate::SyntaxKind;
+use crate::{
+	string_block::{lex_str_block, StringBlockError},
+	SyntaxKind,
+};
 
 pub struct Lexer<'a> {
 	inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
 	type Item = Lexeme<'a>;
 
 	fn next(&mut self) -> Option<Self::Item> {
-		let kind = self.inner.next()?;
+		use SyntaxKind::*;
+
+		let mut kind = self.inner.next()?;
 		let text = self.inner.slice();
 
+		if kind == STRING_BLOCK {
+			// We use custom lexer, which skips enough bytes, but not returns error
+			// Instead we should call lexer again to verify if there is something wrong with string block
+			let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+			// In kinds, string blocks is parsed at least as `|||`
+			lexer.bump(3);
+			let res = lex_str_block(&mut lexer);
+			debug_assert!(lexer.next().is_none(), "str_block is lexed");
+			match res {
+				Ok(_) => {}
+				Err(e) => {
+					kind = match e {
+						StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+						StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+						StringBlockError::MissingTermination => {
+							ERROR_STRING_BLOCK_MISSING_TERMINATION
+						}
+						StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+					}
+				}
+			}
+		}
+
 		Some(Self::Item {
 			kind,
 			text,
modifiedcrates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
 
 mod ast;
 mod binary;
-mod classify;
 mod event;
 mod generated;
 mod language;
modifiedcrates/jrsonnet-rowan-parser/src/marker.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
 			!kind.is_enum(),
 			"{kind:?} is a enum kind, you should use variant kinds instead"
 		);
-		// TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+		// TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
 		// debug_assert!(
-		// 	!kind.is_parser(),
-		// 	"{kind:?} should be only emitted by parser, not used directly"
+		// 	!kind.is_lexer(),
+		// 	"{kind:?} should be only emitted by lexer, not used directly"
 		// );
 		let event_at_pos = &mut p.events[self.start_event_idx];
 		assert_eq!(*event_at_pos, Event::Pending);
modifiedcrates/jrsonnet-rowan-parser/src/parser.rsdiffbeforeafterboth
8 event::Event,8 event::Event,
9 lex::Lexeme,9 lex::Lexeme,
10 marker::{AsRange, CompletedMarker, Marker, Ranger},10 marker::{AsRange, CompletedMarker, Marker, Ranger},
11 string_block::{lex_str_block, StringBlockError},11 nodes::{Literal, Number, Text, Trivia},
12 token_set::SyntaxKindSet,12 token_set::SyntaxKindSet,
13 unary::UnaryOperator,13 unary::UnaryOperator,
14 SyntaxKind,14 AstToken, SyntaxKind,
15 SyntaxKind::*,15 SyntaxKind::*,
16 SyntaxNode, T, TS,16 SyntaxNode, T, TS,
17};17};
36}36}
3737
38pub struct Parser<'i> {38pub struct Parser<'i> {
39 // TODO: remove all trivia before feeding to parser?
39 lexemes: &'i [Lexeme<'i>],40 lexemes: &'i [Lexeme<'i>],
40 pub offset: usize,41 pub offset: usize,
41 pub events: Vec<Event>,42 pub events: Vec<Event>,
191 while self192 while self
192 .lexemes193 .lexemes
193 .get(previous_token_idx)194 .get(previous_token_idx)
194 .map_or(false, |l| l.kind.is_trivia())195 .map_or(false, |l| Trivia::can_cast(l.kind))
195 && previous_token_idx != 0196 && previous_token_idx != 0
196 {197 {
197 previous_token_idx -= 1;198 previous_token_idx -= 1;
200 Some(self.lexemes[previous_token_idx])201 Some(self.lexemes[previous_token_idx])
201 }202 }
202 pub fn start_of_token(&self, mut idx: usize) -> TextSize {203 pub fn start_of_token(&self, mut idx: usize) -> TextSize {
203 while self.lexemes[idx].kind.is_trivia() {204 while Trivia::can_cast(self.lexemes[idx].kind) {
204 idx += 1;205 idx += 1;
205 }206 }
206 self.lexemes[idx].range.start()207 self.lexemes[idx].range.start()
207 }208 }
208 pub fn end_of_token(&self, mut idx: usize) -> TextSize {209 pub fn end_of_token(&self, mut idx: usize) -> TextSize {
209 while self.lexemes[idx].kind.is_trivia() {210 while Trivia::can_cast(self.lexemes[idx].kind) {
210 idx -= 1;211 idx -= 1;
211 }212 }
212 self.lexemes[idx].range.end()213 self.lexemes[idx].range.end()
267 self.bump();268 self.bump();
268 Some(m.complete(self, SyntaxKind::ERROR))269 Some(m.complete(self, SyntaxKind::ERROR))
269 }270 }
270271 fn bump_assert(&mut self, kind: SyntaxKind) {
272 self.skip_trivia();
273 assert!(self.at(kind), "expected {:?}", kind);
274 self.bump_remap(self.current());
275 }
271 fn bump(&mut self) {276 fn bump(&mut self) {
272 self.skip_trivia();277 self.skip_trivia();
273 self.bump_remap(self.current());278 self.bump_remap(self.current());
314 while self319 while self
315 .lexemes320 .lexemes
316 .get(offset)321 .get(offset)
317 .map(|l| l.kind.is_trivia())322 .map(|l| Trivia::can_cast(l.kind))
318 .unwrap_or(false)323 .unwrap_or(false)
319 {324 {
320 offset += 1;325 offset += 1;
324 while self329 while self
325 .lexemes330 .lexemes
326 .get(offset)331 .get(offset)
327 .map(|l| l.kind.is_trivia())332 .map(|l| Trivia::can_cast(l.kind))
328 .unwrap_or(false)333 .unwrap_or(false)
329 {334 {
330 offset += 1;335 offset += 1;
335 self.nth(0)340 self.nth(0)
336 }341 }
337 fn skip_trivia(&mut self) {342 fn skip_trivia(&mut self) {
338 while self.peek_raw().is_trivia() {343 while Trivia::can_cast(self.peek_raw()) {
339 self.offset += 1;344 self.offset += 1;
340 }345 }
341 }346 }
342 fn current_lexeme(&mut self) -> Option<&Lexeme> {
343 self.skip_trivia();
344 self.lexemes.get(self.offset)
345 }
346 fn peek_raw(&mut self) -> SyntaxKind {347 fn peek_raw(&mut self) -> SyntaxKind {
347 self.lexemes348 self.lexemes
348 .get(self.offset)349 .get(self.offset)
516 } else if p.at(IDENT) {517 } else if p.at(IDENT) {
517 name(p);518 name(p);
518 m.complete(p, FIELD_NAME_FIXED);519 m.complete(p, FIELD_NAME_FIXED);
519 } else if p.current().is_string() {520 } else if Text::can_cast(p.current()) {
520 string(p);521 text(p);
521 m.complete(p, FIELD_NAME_FIXED);522 m.complete(p, FIELD_NAME_FIXED);
522 } else {523 } else {
523 p.error_with_recovery_set(TS![;]);524 p.error_with_recovery_set(TS![;]);
564 };565 };
565}566}
566fn assertion(p: &mut Parser) {567fn assertion(p: &mut Parser) {
567 assert!(p.at(T![assert]));
568 let m = p.start();568 let m = p.start();
569 p.bump();569 p.bump_assert(T![assert]);
570 expr(p).map(|c| c.wrap(p, LHS_EXPR));570 expr(p).map(|c| c.wrap(p, LHS_EXPR));
571 if p.at(T![:]) {571 if p.at(T![:]) {
572 p.bump();572 p.bump();
575 m.complete(p, ASSERTION);575 m.complete(p, ASSERTION);
576}576}
577fn object(p: &mut Parser) -> CompletedMarker {577fn object(p: &mut Parser) -> CompletedMarker {
578 assert!(p.at(T!['{']));
579 let m_t = p.start();578 let m_t = p.start();
580 let m = p.start();579 let m = p.start();
581 p.bump();580 p.bump_assert(T!['{']);
582581
583 loop {582 loop {
584 if p.at(T!['}']) {583 if p.at(T!['}']) {
619 m.complete(p, PARAM);618 m.complete(p, PARAM);
620}619}
621fn params_desc(p: &mut Parser) -> CompletedMarker {620fn params_desc(p: &mut Parser) -> CompletedMarker {
622 assert!(p.at(T!['(']));
623 let m = p.start();621 let m = p.start();
624 p.bump();622 p.bump_assert(T!['(']);
625623
626 loop {624 loop {
627 if p.at(T![')']) {625 if p.at(T![')']) {
640}638}
641fn args_desc(p: &mut Parser) {639fn args_desc(p: &mut Parser) {
642 let m = p.start();640 let m = p.start();
643 assert!(p.at(T!['(']));641 p.bump_assert(T!['(']);
644 p.bump();
645642
646 let started_named = Cell::new(false);643 let started_named = Cell::new(false);
647644
674}671}
675672
676fn array(p: &mut Parser) -> CompletedMarker {673fn array(p: &mut Parser) -> CompletedMarker {
677 assert!(p.at(T!['[']));
678 // Start the list node674 // Start the list node
679 let m = p.start();675 let m = p.start();
680 p.bump(); // '['676 p.bump_assert(T!['[']);
681677
682 // This vec will have at most one element in case of correct input678 // This vec will have at most one element in case of correct input
683 let mut compspecs = Vec::with_capacity(1);679 let mut compspecs = Vec::with_capacity(1);
795 m.complete(p, NAME);791 m.complete(p, NAME);
796}792}
797fn destruct_rest(p: &mut Parser) {793fn destruct_rest(p: &mut Parser) {
798 assert!(p.at(T![...]));
799 p.bump();
800 let m = p.start();794 let m = p.start();
795 p.bump_assert(T![...]);
801 if p.at(IDENT) {796 if p.at(IDENT) {
802 p.bump()797 p.bump()
803 }798 }
817 m.complete(p, DESTRUCT_OBJECT_FIELD);812 m.complete(p, DESTRUCT_OBJECT_FIELD);
818}813}
819fn obj_local(p: &mut Parser) {814fn obj_local(p: &mut Parser) {
820 assert!(p.at(T![local]));
821 let m = p.start();815 let m = p.start();
822 p.bump();816 p.bump_assert(T![local]);
823 bind(p);817 bind(p);
824 m.complete(p, OBJ_LOCAL);818 m.complete(p, OBJ_LOCAL);
825}819}
903 m.complete(p, BIND_DESTRUCT)897 m.complete(p, BIND_DESTRUCT)
904 };898 };
905}899}
906fn string(p: &mut Parser) {900fn text(p: &mut Parser) {
907 assert!(p.current().is_string());901 assert!(Text::can_cast(p.current()));
908 if p.at(STRING_BLOCK) {
909 // We use custom lexer, which skips enough bytes, but not returns error
910 // Instead we should call lexer again to verify if there is something wrong with string block
911 let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(
912 &p.current_lexeme().expect("parser is at string block").text
913 ));
914 // In kinds, string blocks is parsed at least as `|||`902 p.bump();
915 lexer.bump(3);
916 let res = lex_str_block(&mut lexer);
917 debug_assert!(lexer.next().is_none(), "str_block is lexed");
918 match res {
919 Ok(_) => {
920 p.bump();
921 }
922 Err(e) => p.bump_remap(match e {
923 StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
924 StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
925 StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,
926 StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
927 }),
928 }
929 } else {
930 p.bump();
931 }
932}903}
933fn number(p: &mut Parser) {904fn number(p: &mut Parser) {
934 assert!(p.current().is_number());905 assert!(Number::can_cast(p.current()));
935 p.bump();906 p.bump();
936}907}
937fn literal(p: &mut Parser) {908fn literal(p: &mut Parser) {
938 assert!(p.current().is_literal());909 assert!(Literal::can_cast(p.current()));
939 p.bump();910 p.bump();
940}911}
941fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {912fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {
942 let _e = p.expected_syntax_name("value");913 let _e = p.expected_syntax_name("value");
943 Some(if p.current().is_literal() {914 Some(if Literal::can_cast(p.current()) {
944 let m = p.start();915 let m = p.start();
945 literal(p);916 literal(p);
946 m.complete(p, EXPR_LITERAL)917 m.complete(p, EXPR_LITERAL)
947 } else if p.current().is_string() {918 } else if Text::can_cast(p.current()) {
948 let m = p.start();919 let m = p.start();
949 string(p);920 text(p);
950 m.complete(p, EXPR_STRING)921 m.complete(p, EXPR_STRING)
951 } else if p.current().is_number() {922 } else if Number::can_cast(p.current()) {
952 let m = p.start();923 let m = p.start();
953 number(p);924 number(p);
954 m.complete(p, EXPR_NUMBER)925 m.complete(p, EXPR_NUMBER)
1025 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {996 } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {
1026 let m = p.start();997 let m = p.start();
1027 p.bump();998 p.bump();
1028 string(p);999 text(p);
1029 m.complete(p, EXPR_IMPORT)1000 m.complete(p, EXPR_IMPORT)
1030 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {1001 } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {
1031 let op = match p.current() {1002 let op = match p.current() {
1044 let m = p.start();1015 let m = p.start();
1045 p.bump();1016 p.bump();
1046 expr(p);1017 expr(p);
1047 assert!(p.at(T![')']));1018 p.expect(T![')']);
1048 p.bump();
1049 m.complete(p, EXPR_PARENED)1019 m.complete(p, EXPR_PARENED)
1050 } else {1020 } else {
1051 p.error_with_recovery_set(TS![]);1021 p.error_with_recovery_set(TS![]);
modifiedxtask/src/sourcegen/kinds.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
 pub enum TokenKind {
 	/// May exist in token tree, but never in source code
 	Meta { grammar_name: String, name: String },
-	/// Specific parsing errors may be emitted as this type of kind
+	/// Specific parsing/lexing errors may be emitted as this type of kind
 	Error {
 		grammar_name: String,
 		name: String,
+		/// Is this error returned by lexer directly, or from lex.rs
+		is_lexer_error: bool,
 		regex: Option<String>,
 		priority: Option<u32>,
 	},
@@ -133,13 +135,18 @@
 		});
 		$(define_kinds!($into = $($rest)*))?
 	}};
-	($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
-		$into.define_token(TokenKind::Error {
-			grammar_name: format!("ERROR_{}!", $name),
-			name: format!("ERROR_{}", $name),
-			regex: None$(.or(Some($regex.to_owned())))?,
-			priority: None$(.or(Some($priority)))?,
-		});
+	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+		{
+			let regex = None$(.or(Some($regex.to_owned())))?;
+			let priority = None$(.or(Some($priority)))?;
+			$into.define_token(TokenKind::Error {
+				grammar_name: format!("ERROR_{}!", $name),
+				name: format!("ERROR_{}", $name),
+				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+				regex,
+				priority,
+			});
+		}
 		$(define_kinds!($into = $($rest)*))?
 	}};
 	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
 		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
 		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
 		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
-		error("STRING_BLOCK_UNEXPECTED_END");
-		error("STRING_BLOCK_MISSING_NEW_LINE");
-		error("STRING_BLOCK_MISSING_TERMINATION");
-		error("STRING_BLOCK_MISSING_INDENT");
+		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+		error("STRING_BLOCK_MISSING_INDENT", lexer = true);
 		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
 		lit("WHITESPACE") => r"[ \t\n\r]+";
 		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";
modifiedxtask/src/sourcegen/mod.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -48,20 +48,28 @@
 			if let Some((special, name)) = classify_special(token) {
 				match special {
 					SpecialName::Literal => panic!("literal is not defined: {name}"),
-					SpecialName::Meta => kinds.define_token(TokenKind::Meta {
-						grammar_name: token.to_owned(),
-						name: format!("META_{}", name),
-					}),
-					SpecialName::Error => kinds.define_token(TokenKind::Error {
-						grammar_name: token.to_owned(),
-						name: format!("ERROR_{}", name),
-						regex: None,
-						priority: None,
-					}),
+					SpecialName::Meta => {
+						eprintln!("implicit meta: {}", name);
+						kinds.define_token(TokenKind::Meta {
+							grammar_name: token.to_owned(),
+							name: format!("META_{}", name),
+						})
+					}
+					SpecialName::Error => {
+						eprintln!("implicit error: {}", name);
+						kinds.define_token(TokenKind::Error {
+							grammar_name: token.to_owned(),
+							name: format!("ERROR_{}", name),
+							regex: None,
+							priority: None,
+							is_lexer_error: true,
+						})
+					}
 				};
 				continue;
 			};
 			let name = to_upper_snake_case(token);
+			eprintln!("implicit kw: {}", token);
 			kinds.define_token(TokenKind::Keyword {
 				code: token.to_owned(),
 				name: format!("{name}_KW"),
modifiedxtask/src/sourcegen/util.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
 		}
 	}
 
-	eprintln!(" {} was not up-to-date, updating\n", file.display());
-	if std::env::var("CI").is_ok() {
-		eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
-	}
+	eprintln!("{} was not up-to-date, updating", file.display());
 	if let Some(parent) = file.parent() {
 		let _ = fs::create_dir_all(parent);
 	}