`5ad3c0601af8`

13 files changed

modifiedcmds/jrsonnet-fmt/src/main.rsdiff before after both

--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
 	nodes::{
 		ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
 		DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
-		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+		Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
 		UnaryOperator,
 	},
 	AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
 	}
 }
 
-impl Printable for String {
+impl Printable for Text {
 	fn print(&self) -> PrintItems {
 		p!(new: str(&format!("{}", self)))
 	}
@@ -168,7 +168,7 @@
 			FieldName::FieldNameFixed(f) => {
 				if let Some(id) = f.id() {
 					p!(new: {id})
-				} else if let Some(str) = f.string() {
+				} else if let Some(str) = f.text() {
 					p!(new: {str})
 				} else {
 					p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
 			Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
 			Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
 			Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
-			Expr::ExprString(s) => p!(new: {s.string()}),
+			Expr::ExprString(s) => p!(new: {s.text()}),
 			Expr::ExprNumber(n) => p!(new: {n.number()}),
 			Expr::ExprArray(a) => {
 				let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
 				pi
 			}
 			Expr::ExprImport(v) => {
-				p!(new: {v.import_kind()} str(" ") {v.string()})
+				p!(new: {v.import_kind()} str(" ") {v.text()})
 			}
 			Expr::ExprVar(n) => p!(new: {n.name()}),
 			Expr::ExprLocal(l) => {

modifiedcrates/jrsonnet-rowan-parser/jsonnet.ungramdiff before after both

--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
     name:Name
     ')'
 ExprString =
-    String
+    Text
 ExprNumber =
     Number
 ExprArray =
@@ -67,7 +67,7 @@
     ']'
 
 ExprImport =
-    ImportKind String
+    ImportKind Text
 
 ImportKind =
     'importstr'
@@ -217,7 +217,7 @@
 
 FieldNameFixed =
     id:Name
-|   String
+|   Text
 FieldNameDynamic =
     '['
     Expr
@@ -239,16 +239,27 @@
 |   '$'
 |   'super'
 
-String =
+Text =
     'LIT_STRING_DOUBLE!'
+|   'ERROR_STRING_DOUBLE_UNTERMINATED!'
 |   'LIT_STRING_SINGLE!'
+|   'ERROR_STRING_SINGLE_UNTERMINATED!'
 |   'LIT_STRING_DOUBLE_VERBATIM!'
+|   'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
 |   'LIT_STRING_SINGLE_VERBATIM!'
+|   'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+|   'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
 |   'LIT_STRING_BLOCK!'
+|   'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+|   'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+|   'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+|   'ERROR_STRING_BLOCK_MISSING_INDENT!'
 
 Number =
     'LIT_FLOAT!'
-|   'META_FORCE_ENUM!'
+|   'ERROR_FLOAT_JUNK_AFTER_POINT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+|   'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
 
 ForSpec =
     'for'
@@ -347,3 +358,12 @@
 TrueExpr=Expr
 FalseExpr=Expr
 LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+    'LIT_WHITESPACE!'
+|   'LIT_MULTI_LINE_COMMENT!'
+|   'ERROR_COMMENT_TOO_SHORT!'
+|   'ERROR_COMMENT_UNTERMINATED!'
+|   'LIT_SINGLE_LINE_HASH_COMMENT!'
+|   'LIT_SINGLE_LINE_SLASH_COMMENT!'

deletedcrates/jrsonnet-rowan-parser/src/classify.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
-	pub fn is_trivia(self) -> bool {
-		matches!(
-			self,
-			Self::WHITESPACE
-				| Self::MULTI_LINE_COMMENT
-				| Self::ERROR_COMMENT_TOO_SHORT
-				| Self::ERROR_COMMENT_UNTERMINATED
-				| Self::SINGLE_LINE_HASH_COMMENT
-				| Self::SINGLE_LINE_SLASH_COMMENT
-		)
-	}
-	pub fn is_string(self) -> bool {
-		matches!(
-			self,
-			Self::STRING_SINGLE
-				| Self::ERROR_STRING_SINGLE_UNTERMINATED
-				| Self::STRING_DOUBLE
-				| Self::ERROR_STRING_DOUBLE_UNTERMINATED
-				| Self::STRING_SINGLE_VERBATIM
-				| Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
-				| Self::STRING_DOUBLE_VERBATIM
-				| Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
-				| Self::STRING_BLOCK
-				| Self::ERROR_STRING_BLOCK_UNEXPECTED_END
-				| Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
-				| Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
-				| Self::ERROR_STRING_BLOCK_MISSING_INDENT
-		)
-	}
-	pub fn is_number(self) -> bool {
-		matches!(
-			self,
-			Self::FLOAT
-				| Self::ERROR_FLOAT_JUNK_AFTER_POINT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
-				| Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
-		)
-	}
-	pub fn is_literal(self) -> bool {
-		matches!(
-			self,
-			Self::NULL_KW
-				| Self::TRUE_KW | Self::FALSE_KW
-				| Self::SELF_KW | Self::DOLLAR
-				| Self::SUPER_KW
-		)
-	}
-}

modifiedcrates/jrsonnet-rowan-parser/src/event.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
 
 use crate::{
 	lex::Lexeme,
+	nodes::Trivia,
 	parser::{Parse, SyntaxError},
-	JsonnetLanguage, SyntaxKind,
+	AstToken, JsonnetLanguage, SyntaxKind,
 };
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
 	}
 	fn skip_whitespace(&mut self) {
 		while let Some(lexeme) = self.lexemes.get(self.offset) {
-			if !lexeme.kind.is_trivia() {
+			if !Trivia::can_cast(lexeme.kind) {
 				break;
 			}

modifiedcrates/jrsonnet-rowan-parser/src/generated/nodes.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
 	pub(crate) syntax: SyntaxNode,
 }
 impl ExprString {
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -332,7 +332,7 @@
 	pub fn import_kind(&self) -> Option<ImportKind> {
 		support::token_child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -692,7 +692,7 @@
 	pub fn id(&self) -> Option<Name> {
 		support::child(&self.syntax)
 	}
-	pub fn string(&self) -> Option<String> {
+	pub fn text(&self) -> Option<Text> {
 		support::token_child(&self.syntax)
 	}
 }
@@ -1038,18 +1038,27 @@
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
 	syntax: SyntaxToken,
-	kind: StringKind,
+	kind: TextKind,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
 	StringDouble,
+	ErrorStringDoubleUnterminated,
 	StringSingle,
+	ErrorStringSingleUnterminated,
 	StringDoubleVerbatim,
+	ErrorStringDoubleVerbatimUnterminated,
 	StringSingleVerbatim,
+	ErrorStringSingleVerbatimUnterminated,
+	ErrorStringVerbatimMissingQuotes,
 	StringBlock,
+	ErrorStringBlockUnexpectedEnd,
+	ErrorStringBlockMissingNewLine,
+	ErrorStringBlockMissingTermination,
+	ErrorStringBlockMissingIndent,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum NumberKind {
 	Float,
-	MetaForceEnum,
+	ErrorFloatJunkAfterPoint,
+	ErrorFloatJunkAfterExponent,
+	ErrorFloatJunkAfterExponentSign,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
 	Coloncolon,
 	Colon,
 }
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+	syntax: SyntaxToken,
+	kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+	Whitespace,
+	MultiLineComment,
+	ErrorCommentTooShort,
+	ErrorCommentUnterminated,
+	SingleLineHashComment,
+	SingleLineSlashComment,
+}
 impl AstNode for SourceFile {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
-impl AstToken for String {
+impl AstToken for Text {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
 			STRING_DOUBLE
+			| ERROR_STRING_DOUBLE_UNTERMINATED
 			| STRING_SINGLE
+			| ERROR_STRING_SINGLE_UNTERMINATED
 			| STRING_DOUBLE_VERBATIM
+			| ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
 			| STRING_SINGLE_VERBATIM
-			| STRING_BLOCK => true,
+			| ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+			| ERROR_STRING_VERBATIM_MISSING_QUOTES
+			| STRING_BLOCK
+			| ERROR_STRING_BLOCK_UNEXPECTED_END
+			| ERROR_STRING_BLOCK_MISSING_NEW_LINE
+			| ERROR_STRING_BLOCK_MISSING_TERMINATION
+			| ERROR_STRING_BLOCK_MISSING_INDENT => true,
 			_ => false,
 		}
 	}
 	fn cast(syntax: SyntaxToken) -> Option<Self> {
 		let res = match syntax.kind() {
-			STRING_DOUBLE => String {
+			STRING_DOUBLE => Text {
 				syntax,
-				kind: StringKind::StringDouble,
+				kind: TextKind::StringDouble,
 			},
-			STRING_SINGLE => String {
+			ERROR_STRING_DOUBLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingle,
+				kind: TextKind::ErrorStringDoubleUnterminated,
 			},
-			STRING_DOUBLE_VERBATIM => String {
+			STRING_SINGLE => Text {
 				syntax,
-				kind: StringKind::StringDoubleVerbatim,
+				kind: TextKind::StringSingle,
 			},
-			STRING_SINGLE_VERBATIM => String {
+			ERROR_STRING_SINGLE_UNTERMINATED => Text {
 				syntax,
-				kind: StringKind::StringSingleVerbatim,
+				kind: TextKind::ErrorStringSingleUnterminated,
 			},
-			STRING_BLOCK => String {
+			STRING_DOUBLE_VERBATIM => Text {
 				syntax,
-				kind: StringKind::StringBlock,
+				kind: TextKind::StringDoubleVerbatim,
+			},
+			ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+			},
+			STRING_SINGLE_VERBATIM => Text {
+				syntax,
+				kind: TextKind::StringSingleVerbatim,
 			},
+			ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+				syntax,
+				kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+			},
+			ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+				syntax,
+				kind: TextKind::ErrorStringVerbatimMissingQuotes,
+			},
+			STRING_BLOCK => Text {
+				syntax,
+				kind: TextKind::StringBlock,
+			},
+			ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockUnexpectedEnd,
+			},
+			ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingNewLine,
+			},
+			ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingTermination,
+			},
+			ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+				syntax,
+				kind: TextKind::ErrorStringBlockMissingIndent,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2718,12 +2790,12 @@
 		&self.syntax
 	}
 }
-impl String {
-	pub fn kind(&self) -> StringKind {
+impl Text {
+	pub fn kind(&self) -> TextKind {
 		self.kind
 	}
 }
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
@@ -2731,7 +2803,10 @@
 impl AstToken for Number {
 	fn can_cast(kind: SyntaxKind) -> bool {
 		match kind {
-			FLOAT | META_FORCE_ENUM => true,
+			FLOAT
+			| ERROR_FLOAT_JUNK_AFTER_POINT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT
+			| ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
 			_ => false,
 		}
 	}
@@ -2741,10 +2816,18 @@
 				syntax,
 				kind: NumberKind::Float,
 			},
-			META_FORCE_ENUM => Number {
+			ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterPoint,
+			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
 				syntax,
-				kind: NumberKind::MetaForceEnum,
+				kind: NumberKind::ErrorFloatJunkAfterExponent,
 			},
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+				syntax,
+				kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+			},
 			_ => return None,
 		};
 		Some(res)
@@ -2841,6 +2924,62 @@
 		std::fmt::Display::fmt(self.syntax(), f)
 	}
 }
+impl AstToken for Trivia {
+	fn can_cast(kind: SyntaxKind) -> bool {
+		match kind {
+			WHITESPACE
+			| MULTI_LINE_COMMENT
+			| ERROR_COMMENT_TOO_SHORT
+			| ERROR_COMMENT_UNTERMINATED
+			| SINGLE_LINE_HASH_COMMENT
+			| SINGLE_LINE_SLASH_COMMENT => true,
+			_ => false,
+		}
+	}
+	fn cast(syntax: SyntaxToken) -> Option<Self> {
+		let res = match syntax.kind() {
+			WHITESPACE => Trivia {
+				syntax,
+				kind: TriviaKind::Whitespace,
+			},
+			MULTI_LINE_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::MultiLineComment,
+			},
+			ERROR_COMMENT_TOO_SHORT => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentTooShort,
+			},
+			ERROR_COMMENT_UNTERMINATED => Trivia {
+				syntax,
+				kind: TriviaKind::ErrorCommentUnterminated,
+			},
+			SINGLE_LINE_HASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineHashComment,
+			},
+			SINGLE_LINE_SLASH_COMMENT => Trivia {
+				syntax,
+				kind: TriviaKind::SingleLineSlashComment,
+			},
+			_ => return None,
+		};
+		Some(res)
+	}
+	fn syntax(&self) -> &SyntaxToken {
+		&self.syntax
+	}
+}
+impl Trivia {
+	pub fn kind(&self) -> TriviaKind {
+		self.kind
+	}
+}
+impl std::fmt::Display for Trivia {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		std::fmt::Display::fmt(self.syntax(), f)
+	}
+}
 impl std::fmt::Display for Expr {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		std::fmt::Display::fmt(self.syntax(), f)

modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
 	SELF_KW,
 	#[token("super")]
 	SUPER_KW,
-	META_FORCE_ENUM,
 	#[token("for")]
 	FOR_KW,
 	#[token("assert")]
@@ -253,10 +252,11 @@
 	BINARY_OPERATOR,
 	UNARY_OPERATOR,
 	LITERAL,
-	STRING,
+	TEXT,
 	NUMBER,
 	IMPORT_KIND,
 	VISIBILITY,
+	TRIVIA,
 	#[doc(hidden)]
 	__LAST,
 }
@@ -277,8 +277,8 @@
 	pub fn is_enum(self) -> bool {
 		match self {
 			EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
-			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
-			| NUMBER | IMPORT_KIND | VISIBILITY => true,
+			| DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+			| IMPORT_KIND | VISIBILITY | TRIVIA => true,
 			_ => false,
 		}
 	}

modifiedcrates/jrsonnet-rowan-parser/src/lex.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
 use logos::Logos;
 use rowan::{TextRange, TextSize};
 
-use crate::SyntaxKind;
+use crate::{
+	string_block::{lex_str_block, StringBlockError},
+	SyntaxKind,
+};
 
 pub struct Lexer<'a> {
 	inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
 	type Item = Lexeme<'a>;
 
 	fn next(&mut self) -> Option<Self::Item> {
-		let kind = self.inner.next()?;
+		use SyntaxKind::*;
+
+		let mut kind = self.inner.next()?;
 		let text = self.inner.slice();
 
+		if kind == STRING_BLOCK {
+			// We use custom lexer, which skips enough bytes, but not returns error
+			// Instead we should call lexer again to verify if there is something wrong with string block
+			let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+			// In kinds, string blocks is parsed at least as `|||`
+			lexer.bump(3);
+			let res = lex_str_block(&mut lexer);
+			debug_assert!(lexer.next().is_none(), "str_block is lexed");
+			match res {
+				Ok(_) => {}
+				Err(e) => {
+					kind = match e {
+						StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+						StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+						StringBlockError::MissingTermination => {
+							ERROR_STRING_BLOCK_MISSING_TERMINATION
+						}
+						StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+					}
+				}
+			}
+		}
+
 		Some(Self::Item {
 			kind,
 			text,

modifiedcrates/jrsonnet-rowan-parser/src/lib.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
 
 mod ast;
 mod binary;
-mod classify;
 mod event;
 mod generated;
 mod language;

modifiedcrates/jrsonnet-rowan-parser/src/marker.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
 			!kind.is_enum(),
 			"{kind:?} is a enum kind, you should use variant kinds instead"
 		);
-		// TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+		// TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
 		// debug_assert!(
-		// 	!kind.is_parser(),
-		// 	"{kind:?} should be only emitted by parser, not used directly"
+		// 	!kind.is_lexer(),
+		// 	"{kind:?} should be only emitted by lexer, not used directly"
 		// );
 		let event_at_pos = &mut p.events[self.start_event_idx];
 		assert_eq!(*event_at_pos, Event::Pending);

modifiedcrates/jrsonnet-rowan-parser/src/parser.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/parser.rs
+++ b/crates/jrsonnet-rowan-parser/src/parser.rs
@@ -8,10 +8,10 @@
 	event::Event,
 	lex::Lexeme,
 	marker::{AsRange, CompletedMarker, Marker, Ranger},
-	string_block::{lex_str_block, StringBlockError},
+	nodes::{Literal, Number, Text, Trivia},
 	token_set::SyntaxKindSet,
 	unary::UnaryOperator,
-	SyntaxKind,
+	AstToken, SyntaxKind,
 	SyntaxKind::*,
 	SyntaxNode, T, TS,
 };
@@ -36,6 +36,7 @@
 }
 
 pub struct Parser<'i> {
+	// TODO: remove all trivia before feeding to parser?
 	lexemes: &'i [Lexeme<'i>],
 	pub offset: usize,
 	pub events: Vec<Event>,
@@ -191,7 +192,7 @@
 		while self
 			.lexemes
 			.get(previous_token_idx)
-			.map_or(false, |l| l.kind.is_trivia())
+			.map_or(false, |l| Trivia::can_cast(l.kind))
 			&& previous_token_idx != 0
 		{
 			previous_token_idx -= 1;
@@ -200,13 +201,13 @@
 		Some(self.lexemes[previous_token_idx])
 	}
 	pub fn start_of_token(&self, mut idx: usize) -> TextSize {
-		while self.lexemes[idx].kind.is_trivia() {
+		while Trivia::can_cast(self.lexemes[idx].kind) {
 			idx += 1;
 		}
 		self.lexemes[idx].range.start()
 	}
 	pub fn end_of_token(&self, mut idx: usize) -> TextSize {
-		while self.lexemes[idx].kind.is_trivia() {
+		while Trivia::can_cast(self.lexemes[idx].kind) {
 			idx -= 1;
 		}
 		self.lexemes[idx].range.end()
@@ -267,7 +268,11 @@
 		self.bump();
 		Some(m.complete(self, SyntaxKind::ERROR))
 	}
-
+	fn bump_assert(&mut self, kind: SyntaxKind) {
+		self.skip_trivia();
+		assert!(self.at(kind), "expected {:?}", kind);
+		self.bump_remap(self.current());
+	}
 	fn bump(&mut self) {
 		self.skip_trivia();
 		self.bump_remap(self.current());
@@ -314,7 +319,7 @@
 			while self
 				.lexemes
 				.get(offset)
-				.map(|l| l.kind.is_trivia())
+				.map(|l| Trivia::can_cast(l.kind))
 				.unwrap_or(false)
 			{
 				offset += 1;
@@ -324,7 +329,7 @@
 		while self
 			.lexemes
 			.get(offset)
-			.map(|l| l.kind.is_trivia())
+			.map(|l| Trivia::can_cast(l.kind))
 			.unwrap_or(false)
 		{
 			offset += 1;
@@ -335,14 +340,10 @@
 		self.nth(0)
 	}
 	fn skip_trivia(&mut self) {
-		while self.peek_raw().is_trivia() {
+		while Trivia::can_cast(self.peek_raw()) {
 			self.offset += 1;
 		}
 	}
-	fn current_lexeme(&mut self) -> Option<&Lexeme> {
-		self.skip_trivia();
-		self.lexemes.get(self.offset)
-	}
 	fn peek_raw(&mut self) -> SyntaxKind {
 		self.lexemes
 			.get(self.offset)
@@ -516,8 +517,8 @@
 	} else if p.at(IDENT) {
 		name(p);
 		m.complete(p, FIELD_NAME_FIXED);
-	} else if p.current().is_string() {
-		string(p);
+	} else if Text::can_cast(p.current()) {
+		text(p);
 		m.complete(p, FIELD_NAME_FIXED);
 	} else {
 		p.error_with_recovery_set(TS![;]);
@@ -564,9 +565,8 @@
 	};
 }
 fn assertion(p: &mut Parser) {
-	assert!(p.at(T![assert]));
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T![assert]);
 	expr(p).map(|c| c.wrap(p, LHS_EXPR));
 	if p.at(T![:]) {
 		p.bump();
@@ -575,10 +575,9 @@
 	m.complete(p, ASSERTION);
 }
 fn object(p: &mut Parser) -> CompletedMarker {
-	assert!(p.at(T!['{']));
 	let m_t = p.start();
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T!['{']);
 
 	loop {
 		if p.at(T!['}']) {
@@ -619,9 +618,8 @@
 	m.complete(p, PARAM);
 }
 fn params_desc(p: &mut Parser) -> CompletedMarker {
-	assert!(p.at(T!['(']));
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T!['(']);
 
 	loop {
 		if p.at(T![')']) {
@@ -640,8 +638,7 @@
 }
 fn args_desc(p: &mut Parser) {
 	let m = p.start();
-	assert!(p.at(T!['(']));
-	p.bump();
+	p.bump_assert(T!['(']);
 
 	let started_named = Cell::new(false);
 
@@ -674,10 +671,9 @@
 }
 
 fn array(p: &mut Parser) -> CompletedMarker {
-	assert!(p.at(T!['[']));
 	// Start the list node
 	let m = p.start();
-	p.bump(); // '['
+	p.bump_assert(T!['[']);
 
 	// This vec will have at most one element in case of correct input
 	let mut compspecs = Vec::with_capacity(1);
@@ -795,9 +791,8 @@
 	m.complete(p, NAME);
 }
 fn destruct_rest(p: &mut Parser) {
-	assert!(p.at(T![...]));
-	p.bump();
 	let m = p.start();
+	p.bump_assert(T![...]);
 	if p.at(IDENT) {
 		p.bump()
 	}
@@ -817,9 +812,8 @@
 	m.complete(p, DESTRUCT_OBJECT_FIELD);
 }
 fn obj_local(p: &mut Parser) {
-	assert!(p.at(T![local]));
 	let m = p.start();
-	p.bump();
+	p.bump_assert(T![local]);
 	bind(p);
 	m.complete(p, OBJ_LOCAL);
 }
@@ -903,52 +897,29 @@
 		m.complete(p, BIND_DESTRUCT)
 	};
 }
-fn string(p: &mut Parser) {
-	assert!(p.current().is_string());
-	if p.at(STRING_BLOCK) {
-		// We use custom lexer, which skips enough bytes, but not returns error
-		// Instead we should call lexer again to verify if there is something wrong with string block
-		let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(
-			&p.current_lexeme().expect("parser is at string block").text
-		));
-		// In kinds, string blocks is parsed at least as `|||`
-		lexer.bump(3);
-		let res = lex_str_block(&mut lexer);
-		debug_assert!(lexer.next().is_none(), "str_block is lexed");
-		match res {
-			Ok(_) => {
-				p.bump();
-			}
-			Err(e) => p.bump_remap(match e {
-				StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
-				StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
-				StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,
-				StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
-			}),
-		}
-	} else {
-		p.bump();
-	}
+fn text(p: &mut Parser) {
+	assert!(Text::can_cast(p.current()));
+	p.bump();
 }
 fn number(p: &mut Parser) {
-	assert!(p.current().is_number());
+	assert!(Number::can_cast(p.current()));
 	p.bump();
 }
 fn literal(p: &mut Parser) {
-	assert!(p.current().is_literal());
+	assert!(Literal::can_cast(p.current()));
 	p.bump();
 }
 fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {
 	let _e = p.expected_syntax_name("value");
-	Some(if p.current().is_literal() {
+	Some(if Literal::can_cast(p.current()) {
 		let m = p.start();
 		literal(p);
 		m.complete(p, EXPR_LITERAL)
-	} else if p.current().is_string() {
+	} else if Text::can_cast(p.current()) {
 		let m = p.start();
-		string(p);
+		text(p);
 		m.complete(p, EXPR_STRING)
-	} else if p.current().is_number() {
+	} else if Number::can_cast(p.current()) {
 		let m = p.start();
 		number(p);
 		m.complete(p, EXPR_NUMBER)
@@ -1025,7 +996,7 @@
 	} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {
 		let m = p.start();
 		p.bump();
-		string(p);
+		text(p);
 		m.complete(p, EXPR_IMPORT)
 	} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {
 		let op = match p.current() {
@@ -1044,8 +1015,7 @@
 		let m = p.start();
 		p.bump();
 		expr(p);
-		assert!(p.at(T![')']));
-		p.bump();
+		p.expect(T![')']);
 		m.complete(p, EXPR_PARENED)
 	} else {
 		p.error_with_recovery_set(TS![]);

modifiedxtask/src/sourcegen/kinds.rsdiff before after both

--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
 pub enum TokenKind {
 	/// May exist in token tree, but never in source code
 	Meta { grammar_name: String, name: String },
-	/// Specific parsing errors may be emitted as this type of kind
+	/// Specific parsing/lexing errors may be emitted as this type of kind
 	Error {
 		grammar_name: String,
 		name: String,
+		/// Is this error returned by lexer directly, or from lex.rs
+		is_lexer_error: bool,
 		regex: Option<String>,
 		priority: Option<u32>,
 	},
@@ -133,13 +135,18 @@
 		});
 		$(define_kinds!($into = $($rest)*))?
 	}};
-	($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
-		$into.define_token(TokenKind::Error {
-			grammar_name: format!("ERROR_{}!", $name),
-			name: format!("ERROR_{}", $name),
-			regex: None$(.or(Some($regex.to_owned())))?,
-			priority: None$(.or(Some($priority)))?,
-		});
+	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+		{
+			let regex = None$(.or(Some($regex.to_owned())))?;
+			let priority = None$(.or(Some($priority)))?;
+			$into.define_token(TokenKind::Error {
+				grammar_name: format!("ERROR_{}!", $name),
+				name: format!("ERROR_{}", $name),
+				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+				regex,
+				priority,
+			});
+		}
 		$(define_kinds!($into = $($rest)*))?
 	}};
 	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
 		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
 		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
 		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
-		error("STRING_BLOCK_UNEXPECTED_END");
-		error("STRING_BLOCK_MISSING_NEW_LINE");
-		error("STRING_BLOCK_MISSING_TERMINATION");
-		error("STRING_BLOCK_MISSING_INDENT");
+		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+		error("STRING_BLOCK_MISSING_INDENT", lexer = true);
 		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
 		lit("WHITESPACE") => r"[ \t\n\r]+";
 		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";

modifiedxtask/src/sourcegen/mod.rsdiff before after both

1use std::path::PathBuf;23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Punct, Spacing, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => kinds.define_token(TokenKind::Meta {52						grammar_name: token.to_owned(),53						name: format!("META_{}", name),54					}),55					SpecialName::Error => kinds.define_token(TokenKind::Error {56						grammar_name: token.to_owned(),57						name: format!("ERROR_{}", name),58						regex: None,59						priority: None,60					}),61				};62				continue;63			};64			let name = to_upper_snake_case(token);65			kinds.define_token(TokenKind::Keyword {66				code: token.to_owned(),67				name: format!("{name}_KW"),68			});69		}70	}71	for node in &ast.nodes {72		let name = to_upper_snake_case(&node.name);73		kinds.define_node(&name);74	}75	for enum_ in &ast.enums {76		let name = to_upper_snake_case(&enum_.name);77		kinds.define_node(&name);78	}79	for token_enum in &ast.token_enums {80		let name = to_upper_snake_case(&token_enum.name);81		kinds.define_node(&name);82	}8384	let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;8586	let nodes = generate_nodes(&kinds, &ast)?;87	ensure_file_contents(88		&PathBuf::from(concat!(89			env!("CARGO_MANIFEST_DIR"),90			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",91		)),92		&syntax_kinds,93	)?;94	ensure_file_contents(95		&PathBuf::from(concat!(96			env!("CARGO_MANIFEST_DIR"),97			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",98		)),99		&nodes,100	)?;101	Ok(())102}103104fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {105	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);106	let token_kinds = kinds.tokens().map(TokenKind::expand_kind);107108	let keywords = kinds109		.tokens()110		.filter(|k| matches!(k, TokenKind::Keyword { .. }))111		.map(TokenKind::name)112		.map(|n| format_ident!("{n}"));113114	let nodes = kinds115		.nodes116		.iter()117		.map(|name| format_ident!("{}", name))118		.collect::<Vec<_>>();119120	let enums = grammar121		.enums122		.iter()123		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))124		.chain(125			grammar126				.token_enums127				.iter()128				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),129		);130131	let ast = quote! {132		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]133		use logos::Logos;134135		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.136		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]137		#[repr(u16)]138		pub enum SyntaxKind {139			#[doc(hidden)]140			TOMBSTONE,141			#[doc(hidden)]142			EOF,143			#(#token_kinds,)*144			#[error]145			ERROR,146			#(#nodes,)*147			#[doc(hidden)]148			__LAST,149		}150		use self::SyntaxKind::*;151152		impl SyntaxKind {153			pub fn is_keyword(self) -> bool {154				match self {155					#(#keywords)|* => true,156					_ => false,157				}158			}159			pub fn is_enum(self) -> bool {160				match self {161					#(#enums)|* => true,162					_ => false,163				}164			}165166			pub fn from_raw(r: u16) -> Self {167				assert!(r < Self::__LAST as u16);168				unsafe { std::mem::transmute(r) }169			}170			pub fn into_raw(self) -> u16 {171				self as u16172			}173		}174175		#[macro_export]176		macro_rules! T {#(#t_macros);*}177		pub use T;178	};179180	reformat(&ast.to_string())181}182183fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {184	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar185		.nodes186		.iter()187		.map(|node| {188			let name = format_ident!("{}", node.name);189			let kind = format_ident!("{}", to_upper_snake_case(&node.name));190			let traits = node.traits.iter().map(|trait_name| {191				let trait_name = format_ident!("{}", trait_name);192				quote!(impl ast::#trait_name for #name {})193			});194195			let methods = node.fields.iter().map(|field| {196				let method_name = field.method_name(kinds);197				let ty = field.ty();198199				if field.is_many() {200					quote! {201						pub fn #method_name(&self) -> AstChildren<#ty> {202							support::children(&self.syntax)203						}204					}205				} else if let Some(token_kind) = field.token_kind(kinds) {206					quote! {207						pub fn #method_name(&self) -> Option<#ty> {208							support::token(&self.syntax, #token_kind)209						}210					}211				} else if field.is_token_enum(grammar) {212					quote! {213						pub fn #method_name(&self) -> Option<#ty> {214							support::token_child(&self.syntax)215						}216					}217				} else {218					quote! {219						pub fn #method_name(&self) -> Option<#ty> {220							support::child(&self.syntax)221						}222					}223				}224			});225			(226				quote! {227					#[pretty_doc_comment_placeholder_workaround]228					#[derive(Debug, Clone, PartialEq, Eq, Hash)]229					pub struct #name {230						pub(crate) syntax: SyntaxNode,231					}232233					#(#traits)*234235					impl #name {236						#(#methods)*237					}238				},239				quote! {240					impl AstNode for #name {241						fn can_cast(kind: SyntaxKind) -> bool {242							kind == #kind243						}244						fn cast(syntax: SyntaxNode) -> Option<Self> {245							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }246						}247						fn syntax(&self) -> &SyntaxNode { &self.syntax }248					}249				},250			)251		})252		.unzip();253254	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar255		.enums256		.iter()257		.map(|en| {258			let variants: Vec<_> = en259				.variants260				.iter()261				.map(|var| format_ident!("{}", var))262				.collect();263			let name = format_ident!("{}", en.name);264			let kinds: Vec<_> = variants265				.iter()266				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))267				.collect();268			let traits = en.traits.iter().map(|trait_name| {269				let trait_name = format_ident!("{}", trait_name);270				quote!(impl ast::#trait_name for #name {})271			});272273			let ast_node = quote! {274				impl AstNode for #name {275					fn can_cast(kind: SyntaxKind) -> bool {276						match kind {277							#(#kinds)|* => true,278							_ => false,279						}280					}281					fn cast(syntax: SyntaxNode) -> Option<Self> {282						let res = match syntax.kind() {283							#(284							#kinds => #name::#variants(#variants { syntax }),285							)*286							_ => return None,287						};288						Some(res)289					}290					fn syntax(&self) -> &SyntaxNode {291						match self {292							#(293							#name::#variants(it) => &it.syntax,294							)*295						}296					}297				}298			};299300			(301				quote! {302					#[pretty_doc_comment_placeholder_workaround]303					#[derive(Debug, Clone, PartialEq, Eq, Hash)]304					pub enum #name {305						#(#variants(#variants),)*306					}307308					#(#traits)*309				},310				quote! {311					#(312						impl From<#variants> for #name {313							fn from(node: #variants) -> #name {314								#name::#variants(node)315							}316						}317					)*318					#ast_node319				},320			)321		})322		.unzip();323324	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar325		.token_enums326		.iter()327		.map(|en| {328			let variants: Vec<_> = en329				.variants330				.iter()331				.map(|token| {332					format_ident!(333						"{}",334						to_pascal_case(kinds.token(token).expect("token exists").name())335					)336				})337				.collect();338			let name = format_ident!("{}", en.name);339			let kind_name = format_ident!("{}Kind", en.name);340			let kinds: Vec<_> = variants341				.iter()342				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))343				.collect();344345			let ast_node = quote! {346				impl AstToken for #name {347					fn can_cast(kind: SyntaxKind) -> bool {348						match kind {349							#(#kinds)|* => true,350							_ => false,351						}352					}353					fn cast(syntax: SyntaxToken) -> Option<Self> {354						let res = match syntax.kind() {355							#(356							#kinds => #name { syntax, kind: #kind_name::#variants },357							)*358							_ => return None,359						};360						Some(res)361					}362					fn syntax(&self) -> &SyntaxToken {363						&self.syntax364					}365				}366			};367368			(369				quote! {370					#[pretty_doc_comment_placeholder_workaround]371					#[derive(Debug, Clone, PartialEq, Eq, Hash)]372					pub struct #name { syntax: SyntaxToken, kind: #kind_name }373374					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]375					pub enum #kind_name {376						#(#variants,)*377					}378				},379				quote! {380					#ast_node381382					impl #name {383						pub fn kind(&self) -> #kind_name {384							self.kind385						}386					}387388					impl std::fmt::Display for #name {389						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {390							std::fmt::Display::fmt(self.syntax(), f)391						}392					}393				},394			)395		})396		.unzip();397398	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar399		.nodes400		.iter()401		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))402		.into_group_map()403		.into_iter()404		.sorted_by_key(|(k, _)| *k)405		.map(|(trait_name, nodes)| {406			let name = format_ident!("Any{}", trait_name);407			let trait_name = format_ident!("{}", trait_name);408			let kinds: Vec<_> = nodes409				.iter()410				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))411				.collect();412413			(414				quote! {415					#[pretty_doc_comment_placeholder_workaround]416					#[derive(Debug, Clone, PartialEq, Eq, Hash)]417					pub struct #name {418						pub(crate) syntax: SyntaxNode,419					}420					impl ast::#trait_name for #name {}421				},422				quote! {423					impl #name {424						#[inline]425						pub fn new<T: ast::#trait_name>(node: T) -> #name {426							#name {427								syntax: node.syntax().clone()428							}429						}430					}431					impl AstNode for #name {432						fn can_cast(kind: SyntaxKind) -> bool {433							match kind {434								#(#kinds)|* => true,435								_ => false,436							}437						}438						fn cast(syntax: SyntaxNode) -> Option<Self> {439							Self::can_cast(syntax.kind()).then(|| #name { syntax })440						}441						fn syntax(&self) -> &SyntaxNode {442							&self.syntax443						}444					}445				},446			)447		})448		.unzip();449450	let enum_names = grammar.enums.iter().map(|it| &it.name);451	let node_names = grammar.nodes.iter().map(|it| &it.name);452453	let display_impls = enum_names454		.chain(node_names.clone())455		.map(|it| format_ident!("{}", it))456		.map(|name| {457			quote! {458				impl std::fmt::Display for #name {459					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {460						std::fmt::Display::fmt(self.syntax(), f)461					}462				}463			}464		});465466	let ast = quote! {467		#![allow(non_snake_case, clippy::match_like_matches_macro)]468469		use crate::{470			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},471			ast::{AstNode, AstToken, AstChildren, support},472			T,473		};474475		#(#node_defs)*476		#(#enum_defs)*477		#(#token_enum_defs)*478		#(#any_node_defs)*479		#(#node_boilerplate_impls)*480		#(#enum_boilerplate_impls)*481		#(#token_enum_boilerplate_impls)*482		#(#any_node_boilerplate_impls)*483		#(#display_impls)*484	};485486	let ast = ast.to_string().replace("T ! [", "T![");487488	let mut res = String::with_capacity(ast.len() * 2);489490	let mut docs = grammar491		.nodes492		.iter()493		.map(|it| &it.doc)494		.chain(grammar.enums.iter().map(|it| &it.doc));495496	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {497		res.push_str(chunk);498		if let Some(doc) = docs.next() {499			write_doc_comment(doc, &mut res);500		}501	}502503	let res = reformat(&res)?;504	Ok(res.replace("#[derive", "\n#[derive"))505}506507fn write_doc_comment(contents: &[String], dest: &mut String) {508	use std::fmt::Write;509	for line in contents {510		writeln!(dest, "///{}", line).unwrap();511	}512}513514pub fn escape_token_macro(token: &str) -> TokenStream {515	if "{}[]()$".contains(token) {516		let c = token.chars().next().unwrap();517		quote! { #c }518	} else if token.contains('$') {519		quote! { #token }520	} else {521		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));522		quote! { #(#cs)* }523	}524}

1use std::path::PathBuf;23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Punct, Spacing, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => {52						eprintln!("implicit meta: {}", name);53						kinds.define_token(TokenKind::Meta {54							grammar_name: token.to_owned(),55							name: format!("META_{}", name),56						})57					}58					SpecialName::Error => {59						eprintln!("implicit error: {}", name);60						kinds.define_token(TokenKind::Error {61							grammar_name: token.to_owned(),62							name: format!("ERROR_{}", name),63							regex: None,64							priority: None,65							is_lexer_error: true,66						})67					}68				};69				continue;70			};71			let name = to_upper_snake_case(token);72			eprintln!("implicit kw: {}", token);73			kinds.define_token(TokenKind::Keyword {74				code: token.to_owned(),75				name: format!("{name}_KW"),76			});77		}78	}79	for node in &ast.nodes {80		let name = to_upper_snake_case(&node.name);81		kinds.define_node(&name);82	}83	for enum_ in &ast.enums {84		let name = to_upper_snake_case(&enum_.name);85		kinds.define_node(&name);86	}87	for token_enum in &ast.token_enums {88		let name = to_upper_snake_case(&token_enum.name);89		kinds.define_node(&name);90	}9192	let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;9394	let nodes = generate_nodes(&kinds, &ast)?;95	ensure_file_contents(96		&PathBuf::from(concat!(97			env!("CARGO_MANIFEST_DIR"),98			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",99		)),100		&syntax_kinds,101	)?;102	ensure_file_contents(103		&PathBuf::from(concat!(104			env!("CARGO_MANIFEST_DIR"),105			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",106		)),107		&nodes,108	)?;109	Ok(())110}111112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {113	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114	let token_kinds = kinds.tokens().map(TokenKind::expand_kind);115116	let keywords = kinds117		.tokens()118		.filter(|k| matches!(k, TokenKind::Keyword { .. }))119		.map(TokenKind::name)120		.map(|n| format_ident!("{n}"));121122	let nodes = kinds123		.nodes124		.iter()125		.map(|name| format_ident!("{}", name))126		.collect::<Vec<_>>();127128	let enums = grammar129		.enums130		.iter()131		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))132		.chain(133			grammar134				.token_enums135				.iter()136				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137		);138139	let ast = quote! {140		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141		use logos::Logos;142143		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.144		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]145		#[repr(u16)]146		pub enum SyntaxKind {147			#[doc(hidden)]148			TOMBSTONE,149			#[doc(hidden)]150			EOF,151			#(#token_kinds,)*152			#[error]153			ERROR,154			#(#nodes,)*155			#[doc(hidden)]156			__LAST,157		}158		use self::SyntaxKind::*;159160		impl SyntaxKind {161			pub fn is_keyword(self) -> bool {162				match self {163					#(#keywords)|* => true,164					_ => false,165				}166			}167			pub fn is_enum(self) -> bool {168				match self {169					#(#enums)|* => true,170					_ => false,171				}172			}173174			pub fn from_raw(r: u16) -> Self {175				assert!(r < Self::__LAST as u16);176				unsafe { std::mem::transmute(r) }177			}178			pub fn into_raw(self) -> u16 {179				self as u16180			}181		}182183		#[macro_export]184		macro_rules! T {#(#t_macros);*}185		pub use T;186	};187188	reformat(&ast.to_string())189}190191fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {192	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar193		.nodes194		.iter()195		.map(|node| {196			let name = format_ident!("{}", node.name);197			let kind = format_ident!("{}", to_upper_snake_case(&node.name));198			let traits = node.traits.iter().map(|trait_name| {199				let trait_name = format_ident!("{}", trait_name);200				quote!(impl ast::#trait_name for #name {})201			});202203			let methods = node.fields.iter().map(|field| {204				let method_name = field.method_name(kinds);205				let ty = field.ty();206207				if field.is_many() {208					quote! {209						pub fn #method_name(&self) -> AstChildren<#ty> {210							support::children(&self.syntax)211						}212					}213				} else if let Some(token_kind) = field.token_kind(kinds) {214					quote! {215						pub fn #method_name(&self) -> Option<#ty> {216							support::token(&self.syntax, #token_kind)217						}218					}219				} else if field.is_token_enum(grammar) {220					quote! {221						pub fn #method_name(&self) -> Option<#ty> {222							support::token_child(&self.syntax)223						}224					}225				} else {226					quote! {227						pub fn #method_name(&self) -> Option<#ty> {228							support::child(&self.syntax)229						}230					}231				}232			});233			(234				quote! {235					#[pretty_doc_comment_placeholder_workaround]236					#[derive(Debug, Clone, PartialEq, Eq, Hash)]237					pub struct #name {238						pub(crate) syntax: SyntaxNode,239					}240241					#(#traits)*242243					impl #name {244						#(#methods)*245					}246				},247				quote! {248					impl AstNode for #name {249						fn can_cast(kind: SyntaxKind) -> bool {250							kind == #kind251						}252						fn cast(syntax: SyntaxNode) -> Option<Self> {253							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }254						}255						fn syntax(&self) -> &SyntaxNode { &self.syntax }256					}257				},258			)259		})260		.unzip();261262	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar263		.enums264		.iter()265		.map(|en| {266			let variants: Vec<_> = en267				.variants268				.iter()269				.map(|var| format_ident!("{}", var))270				.collect();271			let name = format_ident!("{}", en.name);272			let kinds: Vec<_> = variants273				.iter()274				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))275				.collect();276			let traits = en.traits.iter().map(|trait_name| {277				let trait_name = format_ident!("{}", trait_name);278				quote!(impl ast::#trait_name for #name {})279			});280281			let ast_node = quote! {282				impl AstNode for #name {283					fn can_cast(kind: SyntaxKind) -> bool {284						match kind {285							#(#kinds)|* => true,286							_ => false,287						}288					}289					fn cast(syntax: SyntaxNode) -> Option<Self> {290						let res = match syntax.kind() {291							#(292							#kinds => #name::#variants(#variants { syntax }),293							)*294							_ => return None,295						};296						Some(res)297					}298					fn syntax(&self) -> &SyntaxNode {299						match self {300							#(301							#name::#variants(it) => &it.syntax,302							)*303						}304					}305				}306			};307308			(309				quote! {310					#[pretty_doc_comment_placeholder_workaround]311					#[derive(Debug, Clone, PartialEq, Eq, Hash)]312					pub enum #name {313						#(#variants(#variants),)*314					}315316					#(#traits)*317				},318				quote! {319					#(320						impl From<#variants> for #name {321							fn from(node: #variants) -> #name {322								#name::#variants(node)323							}324						}325					)*326					#ast_node327				},328			)329		})330		.unzip();331332	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar333		.token_enums334		.iter()335		.map(|en| {336			let variants: Vec<_> = en337				.variants338				.iter()339				.map(|token| {340					format_ident!(341						"{}",342						to_pascal_case(kinds.token(token).expect("token exists").name())343					)344				})345				.collect();346			let name = format_ident!("{}", en.name);347			let kind_name = format_ident!("{}Kind", en.name);348			let kinds: Vec<_> = variants349				.iter()350				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))351				.collect();352353			let ast_node = quote! {354				impl AstToken for #name {355					fn can_cast(kind: SyntaxKind) -> bool {356						match kind {357							#(#kinds)|* => true,358							_ => false,359						}360					}361					fn cast(syntax: SyntaxToken) -> Option<Self> {362						let res = match syntax.kind() {363							#(364							#kinds => #name { syntax, kind: #kind_name::#variants },365							)*366							_ => return None,367						};368						Some(res)369					}370					fn syntax(&self) -> &SyntaxToken {371						&self.syntax372					}373				}374			};375376			(377				quote! {378					#[pretty_doc_comment_placeholder_workaround]379					#[derive(Debug, Clone, PartialEq, Eq, Hash)]380					pub struct #name { syntax: SyntaxToken, kind: #kind_name }381382					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]383					pub enum #kind_name {384						#(#variants,)*385					}386				},387				quote! {388					#ast_node389390					impl #name {391						pub fn kind(&self) -> #kind_name {392							self.kind393						}394					}395396					impl std::fmt::Display for #name {397						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {398							std::fmt::Display::fmt(self.syntax(), f)399						}400					}401				},402			)403		})404		.unzip();405406	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar407		.nodes408		.iter()409		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))410		.into_group_map()411		.into_iter()412		.sorted_by_key(|(k, _)| *k)413		.map(|(trait_name, nodes)| {414			let name = format_ident!("Any{}", trait_name);415			let trait_name = format_ident!("{}", trait_name);416			let kinds: Vec<_> = nodes417				.iter()418				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))419				.collect();420421			(422				quote! {423					#[pretty_doc_comment_placeholder_workaround]424					#[derive(Debug, Clone, PartialEq, Eq, Hash)]425					pub struct #name {426						pub(crate) syntax: SyntaxNode,427					}428					impl ast::#trait_name for #name {}429				},430				quote! {431					impl #name {432						#[inline]433						pub fn new<T: ast::#trait_name>(node: T) -> #name {434							#name {435								syntax: node.syntax().clone()436							}437						}438					}439					impl AstNode for #name {440						fn can_cast(kind: SyntaxKind) -> bool {441							match kind {442								#(#kinds)|* => true,443								_ => false,444							}445						}446						fn cast(syntax: SyntaxNode) -> Option<Self> {447							Self::can_cast(syntax.kind()).then(|| #name { syntax })448						}449						fn syntax(&self) -> &SyntaxNode {450							&self.syntax451						}452					}453				},454			)455		})456		.unzip();457458	let enum_names = grammar.enums.iter().map(|it| &it.name);459	let node_names = grammar.nodes.iter().map(|it| &it.name);460461	let display_impls = enum_names462		.chain(node_names.clone())463		.map(|it| format_ident!("{}", it))464		.map(|name| {465			quote! {466				impl std::fmt::Display for #name {467					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {468						std::fmt::Display::fmt(self.syntax(), f)469					}470				}471			}472		});473474	let ast = quote! {475		#![allow(non_snake_case, clippy::match_like_matches_macro)]476477		use crate::{478			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},479			ast::{AstNode, AstToken, AstChildren, support},480			T,481		};482483		#(#node_defs)*484		#(#enum_defs)*485		#(#token_enum_defs)*486		#(#any_node_defs)*487		#(#node_boilerplate_impls)*488		#(#enum_boilerplate_impls)*489		#(#token_enum_boilerplate_impls)*490		#(#any_node_boilerplate_impls)*491		#(#display_impls)*492	};493494	let ast = ast.to_string().replace("T ! [", "T![");495496	let mut res = String::with_capacity(ast.len() * 2);497498	let mut docs = grammar499		.nodes500		.iter()501		.map(|it| &it.doc)502		.chain(grammar.enums.iter().map(|it| &it.doc));503504	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {505		res.push_str(chunk);506		if let Some(doc) = docs.next() {507			write_doc_comment(doc, &mut res);508		}509	}510511	let res = reformat(&res)?;512	Ok(res.replace("#[derive", "\n#[derive"))513}514515fn write_doc_comment(contents: &[String], dest: &mut String) {516	use std::fmt::Write;517	for line in contents {518		writeln!(dest, "///{}", line).unwrap();519	}520}521522pub fn escape_token_macro(token: &str) -> TokenStream {523	if "{}[]()$".contains(token) {524		let c = token.chars().next().unwrap();525		quote! { #c }526	} else if token.contains('$') {527		quote! { #token }528	} else {529		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));530		quote! { #(#cs)* }531	}532}

modifiedxtask/src/sourcegen/util.rsdiff before after both

--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
 		}
 	}
 
-	eprintln!(" {} was not up-to-date, updating\n", file.display());
-	if std::env::var("CI").is_ok() {
-		eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
-	}
+	eprintln!("{} was not up-to-date, updating", file.display());
 	if let Some(parent) = file.parent() {
 		let _ = fs::create_dir_all(parent);
 	}