git.delta.rocks / jrsonnet / refs/commits / 7c03fc40023d

difftreelog

feat(lexer) explicit token names

voylxuxyYaroslav Bolyukin2026-03-23parent: #ab84d0c.patch.diff
in: master

7 files changed

modifiedCargo.tomldiffbeforeafterboth
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@
 jrsonnet-evaluator = { path = "./crates/jrsonnet-evaluator", version = "0.5.0-pre97" }
 jrsonnet-macros = { path = "./crates/jrsonnet-macros", version = "0.5.0-pre97" }
 jrsonnet-ir = { path = "./crates/jrsonnet-ir", version = "0.5.0-pre97" }
-jrsonnet-ir-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
+jrsonnet-ir-parser = { path = "./crates/jrsonnet-ir-parser", version = "0.5.0-pre97" }
 jrsonnet-peg-parser = { path = "./crates/jrsonnet-peg-parser", version = "0.5.0-pre97" }
 jrsonnet-rowan-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
 jrsonnet-interner = { path = "./crates/jrsonnet-interner", version = "0.5.0-pre97" }
modifiedcrates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterboth
--- a/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
@@ -132,6 +132,10 @@
 	ERROR_COMMENT_TOO_SHORT,
 	#[regex("/\\*([^*/]|\\*[^/])+")]
 	ERROR_COMMENT_UNTERMINATED,
+	ERROR_NO_OPERATOR,
+	ERROR_MISSING_TOKEN,
+	ERROR_UNEXPECTED_TOKEN,
+	ERROR_CUSTOM,
 	#[token("tailstrict")]
 	TAILSTRICT_KW,
 	#[token("local")]
@@ -155,7 +159,6 @@
 	#[token("in")]
 	IN_KW,
 	META_OBJECT_APPLY,
-	ERROR_NO_OPERATOR,
 	#[token("null")]
 	NULL_KW,
 	#[token("true")]
@@ -170,9 +173,6 @@
 	FOR_KW,
 	#[token("assert")]
 	ASSERT_KW,
-	ERROR_MISSING_TOKEN,
-	ERROR_UNEXPECTED_TOKEN,
-	ERROR_CUSTOM,
 	LEXING_ERROR,
 	__LAST_TOKEN,
 	#[doc(hidden)]
modifiedcrates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterboth
--- a/crates/jrsonnet-lexer/src/string_block.rs
+++ b/crates/jrsonnet-lexer/src/string_block.rs
@@ -211,6 +211,7 @@
 
 	// Process leading blank lines before calculating string block indent
 	while ctx.peek() == Some('\n') {
+		lex.mark_line("");
 		ctx.next();
 	}
 
modifiedcrates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -209,7 +209,7 @@
 |   FieldNameDynamic
 
 Visibility =
-    ':' v1:':'? v2:':'?
+    ':' ':'? ':'?
 
 Literal =
     'null'
modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -76,6 +76,10 @@
 	MULTI_LINE_COMMENT,
 	ERROR_COMMENT_TOO_SHORT,
 	ERROR_COMMENT_UNTERMINATED,
+	ERROR_NO_OPERATOR,
+	ERROR_MISSING_TOKEN,
+	ERROR_UNEXPECTED_TOKEN,
+	ERROR_CUSTOM,
 	TAILSTRICT_KW,
 	LOCAL_KW,
 	IMPORTSTR_KW,
@@ -88,7 +92,6 @@
 	ERROR_KW,
 	IN_KW,
 	META_OBJECT_APPLY,
-	ERROR_NO_OPERATOR,
 	NULL_KW,
 	TRUE_KW,
 	FALSE_KW,
@@ -96,9 +99,6 @@
 	SUPER_KW,
 	FOR_KW,
 	ASSERT_KW,
-	ERROR_MISSING_TOKEN,
-	ERROR_UNEXPECTED_TOKEN,
-	ERROR_CUSTOM,
 	LEXING_ERROR,
 	__LAST_TOKEN,
 	SOURCE_FILE,
@@ -199,6 +199,149 @@
 			_ => false,
 		}
 	}
+	pub fn error_description(self) -> Option<&'static str> {
+		match self {
+			ERROR_FLOAT_JUNK_AFTER_POINT => {
+				::core::option::Option::Some("junk after decimal point in number literal")
+			}
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT => {
+				::core::option::Option::Some("junk after exponent in number literal")
+			}
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => {
+				::core::option::Option::Some("junk after exponent sign in number literal")
+			}
+			ERROR_STRING_DOUBLE_UNTERMINATED => {
+				::core::option::Option::Some("unterminated double-quoted string")
+			}
+			ERROR_STRING_SINGLE_UNTERMINATED => {
+				::core::option::Option::Some("unterminated single-quoted string")
+			}
+			ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => {
+				::core::option::Option::Some("unterminated verbatim double-quoted string")
+			}
+			ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => {
+				::core::option::Option::Some("unterminated verbatim single-quoted string")
+			}
+			ERROR_STRING_VERBATIM_MISSING_QUOTES => {
+				::core::option::Option::Some("verbatim string missing opening quotes")
+			}
+			ERROR_STRING_BLOCK_UNEXPECTED_END => {
+				::core::option::Option::Some("unexpected end of text block")
+			}
+			ERROR_STRING_BLOCK_MISSING_NEW_LINE => {
+				::core::option::Option::Some("text block requires new line after |||")
+			}
+			ERROR_STRING_BLOCK_MISSING_TERMINATION => {
+				::core::option::Option::Some("unterminated text block")
+			}
+			ERROR_STRING_BLOCK_MISSING_INDENT => {
+				::core::option::Option::Some("text block first line must be indented")
+			}
+			ERROR_COMMENT_TOO_SHORT => ::core::option::Option::Some("comment too short"),
+			ERROR_COMMENT_UNTERMINATED => {
+				::core::option::Option::Some("unterminated multi-line comment")
+			}
+			ERROR_NO_OPERATOR => ::core::option::Option::Some("expected operator"),
+			ERROR_MISSING_TOKEN => ::core::option::Option::Some("missing token"),
+			ERROR_UNEXPECTED_TOKEN => ::core::option::Option::Some("unexpected token"),
+			ERROR_CUSTOM => ::core::option::Option::Some("error"),
+			LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
+			_ => None,
+		}
+	}
+	pub fn display_name(self) -> &'static str {
+		match self {
+			OR => "'||'",
+			NULL_COAELSE => "'??'",
+			AND => "'&&'",
+			BIT_OR => "'|'",
+			BIT_XOR => "'^'",
+			BIT_AND => "'&'",
+			EQ => "'=='",
+			NE => "'!='",
+			LT => "'<'",
+			GT => "'>'",
+			LE => "'<='",
+			GE => "'>='",
+			LHS => "'<<'",
+			RHS => "'>>'",
+			PLUS => "'+'",
+			MINUS => "'-'",
+			MUL => "'*'",
+			DIV => "'/'",
+			MODULO => "'%'",
+			NOT => "'!'",
+			BIT_NOT => "'~'",
+			L_BRACK => "'['",
+			R_BRACK => "']'",
+			L_PAREN => "'('",
+			R_PAREN => "')'",
+			L_BRACE => "'{'",
+			R_BRACE => "'}'",
+			COLON => "':'",
+			SEMI => "';'",
+			DOT => "'.'",
+			DOTDOTDOT => "'...'",
+			COMMA => "','",
+			DOLLAR => "'$'",
+			ASSIGN => "'='",
+			QUESTION_MARK => "'?'",
+			FLOAT => "number",
+			ERROR_FLOAT_JUNK_AFTER_POINT => "junk after decimal point in number literal",
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT => "junk after exponent in number literal",
+			ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => "junk after exponent sign in number literal",
+			STRING_DOUBLE => "string",
+			ERROR_STRING_DOUBLE_UNTERMINATED => "unterminated double-quoted string",
+			STRING_SINGLE => "string",
+			ERROR_STRING_SINGLE_UNTERMINATED => "unterminated single-quoted string",
+			STRING_DOUBLE_VERBATIM => "string",
+			ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => {
+				"unterminated verbatim double-quoted string"
+			}
+			STRING_SINGLE_VERBATIM => "string",
+			ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => {
+				"unterminated verbatim single-quoted string"
+			}
+			ERROR_STRING_VERBATIM_MISSING_QUOTES => "verbatim string missing opening quotes",
+			STRING_BLOCK => "string",
+			ERROR_STRING_BLOCK_UNEXPECTED_END => "unexpected end of text block",
+			ERROR_STRING_BLOCK_MISSING_NEW_LINE => "text block requires new line after |||",
+			ERROR_STRING_BLOCK_MISSING_TERMINATION => "unterminated text block",
+			ERROR_STRING_BLOCK_MISSING_INDENT => "text block first line must be indented",
+			IDENT => "identifier",
+			WHITESPACE => "whitespace",
+			SINGLE_LINE_SLASH_COMMENT => "comment",
+			SINGLE_LINE_HASH_COMMENT => "comment",
+			MULTI_LINE_COMMENT => "comment",
+			ERROR_COMMENT_TOO_SHORT => "comment too short",
+			ERROR_COMMENT_UNTERMINATED => "unterminated multi-line comment",
+			ERROR_NO_OPERATOR => "expected operator",
+			ERROR_MISSING_TOKEN => "missing token",
+			ERROR_UNEXPECTED_TOKEN => "unexpected token",
+			ERROR_CUSTOM => "error",
+			TAILSTRICT_KW => "'tailstrict'",
+			LOCAL_KW => "'local'",
+			IMPORTSTR_KW => "'importstr'",
+			IMPORTBIN_KW => "'importbin'",
+			IMPORT_KW => "'import'",
+			IF_KW => "'if'",
+			THEN_KW => "'then'",
+			ELSE_KW => "'else'",
+			FUNCTION_KW => "'function'",
+			ERROR_KW => "'error'",
+			IN_KW => "'in'",
+			META_OBJECT_APPLY => "meta_object_apply",
+			NULL_KW => "'null'",
+			TRUE_KW => "'true'",
+			FALSE_KW => "'false'",
+			SELF_KW => "'self'",
+			SUPER_KW => "'super'",
+			FOR_KW => "'for'",
+			ASSERT_KW => "'assert'",
+			LEXING_ERROR => "unexpected character",
+			_ => "unknown",
+		}
+	}
 	pub fn from_raw(r: u16) -> Self {
 		assert!(r < Self::__LAST as u16);
 		unsafe { std::mem::transmute(r) }
modifiedxtask/src/sourcegen/kinds.rsdiffbeforeafterboth
before · xtask/src/sourcegen/kinds.rs
1#[derive(Debug)]2pub struct KindsSrc {3	/// Key - how this token appears in ungrammar4	defined_tokens: IndexMap<String, TokenKind>,5	defined_node_names: HashSet<String>,6	pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11	/// May exist in token tree, but never in source code12	Meta { grammar_name: String, name: String },13	/// Specific parsing/lexing errors may be emitted as this type of kind14	Error {15		grammar_name: String,16		name: String,17		#[allow(dead_code)]18		/// Is this error returned by lexer directly, or from lex.rs19		is_lexer_error: bool,20		regex: Option<String>,21		priority: Option<u32>,22	},23	/// Keyword - literal match of token24	Keyword {25		/// How this keyword appears in grammar/code, should be same as Kinds key26		code: String,27		name: String,28	},29	/// Literal - something defined by user, i.e strings, identifiers, smth30	Literal {31		/// How this keyword appears in grammar, should be same as Kinds key32		grammar_name: String,33		name: String,34		/// Regex for Logos lexer35		regex: String,36		/// Path to custom lexer37		lexer: Option<String>,38	},39}4041impl TokenKind {42	pub fn grammar_name(&self) -> &str {43		match self {44			Self::Keyword { code, .. } => code,45			Self::Literal { grammar_name, .. }46			| Self::Meta { grammar_name, .. }47			| Self::Error { grammar_name, .. } => grammar_name,48		}49	}50	/// How this keyword should appear in kinds enum, screaming snake cased51	pub fn name(&self) -> &str {52		match self {53			Self::Keyword { name, .. }54			| Self::Literal { name, .. }55			| Self::Meta { name, .. }56			| Self::Error { name, .. } => name,57		}58	}59	pub fn expand_kind(&self, lexer: bool) -> TokenStream {60		let name = format_ident!("{}", self.name());61		let attr = match self {62			Self::Keyword { code, .. } => quote! {#[token(#code)]},63			Self::Literal { regex, lexer, .. } => {64				let lexer = lexer65					.as_deref()66					.map(TokenStream::from_str)67					.map(|r| r.expect("path is correct"));68				quote! {#[regex(#regex, #lexer)]}69			}70			Self::Error {71				regex, priority, ..72			} if regex.is_some() => {73				let priority = priority.map(|p| quote! {, priority = #p});74				quote! {#[regex(#regex #priority)]}75			}76			_ => quote! {},77		};78		let attr = if lexer {79			attr80		} else {81			quote! {}82		};83		quote! {84			#attr85			#name86		}87	}88	pub fn expand_t_macros(&self) -> Option<TokenStream> {89		match self {90			Self::Keyword { code, name } => {91				let code = escape_token_macro(code);92				let name = format_ident!("{name}");93				Some(quote! {94					[#code] => {$crate::SyntaxKind::#name}95				})96			}97			// Meta items should not appear in T![_]98			_ => None,99		}100	}101102	/// How this token should be referenced in code103	/// Keywords are referenced with `T![_]` macro,104	/// and literals are referenced directly by name105	pub fn reference(&self) -> TokenStream {106		if let Self::Keyword { code, .. } = self {107			let code = escape_token_macro(code);108			quote! {T![#code]}109		} else {110			let name = self.name();111			let ident = format_ident!("{name}");112			quote! {#ident}113		}114	}115116	pub fn method_name(&self) -> Ident {117		match self {118			Self::Keyword { name, .. } => {119				format_ident!("{}_token", name.to_lowercase())120			}121			Self::Literal { name, .. } => {122				format_ident!("{}_lit", name.to_lowercase())123			}124			Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),125			Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),126		}127	}128}129130#[macro_export]131macro_rules! define_kinds {132	($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{133		$into.define_token(TokenKind::Literal {134			grammar_name: format!("LIT_{}!", $name),135			name: $name.to_owned(),136			regex: $regex.to_owned(),137			lexer: None $(.or_else(|| Some($lexer.to_string())))?,138		});139		$(define_kinds!($into = $($rest)*))?140	}};141	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{142		{143			let regex = None$(.or(Some($regex.to_owned())))?;144			let priority = None$(.or(Some($priority)))?;145			$into.define_token(TokenKind::Error {146				grammar_name: format!("ERROR_{}!", $name),147				name: format!("ERROR_{}", $name),148				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),149				regex,150				priority,151			});152		}153		$(define_kinds!($into = $($rest)*))?154	}};155	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{156		$into.define_token(TokenKind::Keyword {157			code: format!("{}", $tok),158			name: $name.to_owned(),159		});160		$(define_kinds!($into = $($rest)*))?161	}};162	($into:ident =) => {{}}163}164use std::{collections::HashSet, str::FromStr};165166use indexmap::IndexMap;167use proc_macro2::{Ident, TokenStream};168use quote::{format_ident, quote};169170use super::escape_token_macro;171172impl KindsSrc {173	pub fn new() -> Self {174		Self {175			defined_tokens: IndexMap::new(),176			defined_node_names: HashSet::new(),177			nodes: Vec::new(),178		}179	}180	pub fn define_token(&mut self, token: TokenKind) {181		assert!(182			self.defined_node_names.insert(token.name().to_owned()),183			"node name already defined: {}",184			token.name()185		);186		assert!(187			self.defined_tokens188				.insert(token.grammar_name().to_owned(), token.clone())189				.is_none(),190			"token already defined: {}",191			token.grammar_name()192		);193	}194	pub fn define_node(&mut self, node: &str) {195		assert!(196			self.defined_node_names.insert(node.to_owned()),197			"node name already defined: {node}"198		);199		self.nodes.push(node.to_string());200	}201	pub fn token(&self, tok: &str) -> Option<&TokenKind> {202		self.defined_tokens.get(tok)203	}204	pub fn is_token(&self, tok: &str) -> bool {205		self.defined_tokens.contains_key(tok)206	}207	pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {208		self.defined_tokens.iter().map(|(_, v)| v)209	}210}211212pub fn jsonnet_kinds() -> KindsSrc {213	let mut kinds = KindsSrc::new();214	define_kinds![kinds =215		"||" => "OR";216		"??" => "NULL_COAELSE";217		"&&" => "AND";218		"|" => "BIT_OR";219		"^" => "BIT_XOR";220		"&" => "BIT_AND";221		"==" => "EQ";222		"!=" => "NE";223		"<" => "LT";224		">" => "GT";225		"<=" => "LE";226		">=" => "GE";227		"<<" => "LHS";228		">>" => "RHS";229		"+" => "PLUS";230		"-" => "MINUS";231		"*" => "MUL";232		"/" => "DIV";233		"%" => "MODULO";234		"!" => "NOT";235		"~" => "BIT_NOT";236		"[" => "L_BRACK";237		"]" => "R_BRACK";238		"(" => "L_PAREN";239		")" => "R_PAREN";240		"{" => "L_BRACE";241		"}" => "R_BRACE";242		":" => "COLON";243		";" => "SEMI";244		"." => "DOT";245		"..." => "DOTDOTDOT";246		"," => "COMMA";247		"$" => "DOLLAR";248		"=" => "ASSIGN";249		"?" => "QUESTION_MARK";250		// Literals251		lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";252		error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";253		error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";254		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";255		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";256		error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";257		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";258		error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";259		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";260		error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";261		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";262		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";263		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";264		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";265		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);266		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);267		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);268		error("STRING_BLOCK_MISSING_INDENT", lexer = true);269		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";270		lit("WHITESPACE") => r"[ \t\n\r]+";271		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";272		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";273		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";274		error("COMMENT_TOO_SHORT") => r"/\*/";275		error("COMMENT_UNTERMINATED") =>  r"/\*([^*/]|\*[^/])+";276	];277	kinds278}
after · xtask/src/sourcegen/kinds.rs
1#[derive(Debug)]2pub struct KindsSrc {3	/// Key - how this token appears in ungrammar4	defined_tokens: IndexMap<String, TokenKind>,5	defined_node_names: HashSet<String>,6	pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11	/// May exist in token tree, but never in source code12	Meta { grammar_name: String, name: String },13	/// Specific parsing/lexing errors may be emitted as this type of kind14	Error {15		grammar_name: String,16		name: String,17		#[allow(dead_code)]18		/// Is this error returned by lexer directly, or from lex.rs19		is_lexer_error: bool,20		regex: Option<String>,21		priority: Option<u32>,22		description: String,23	},24	/// Keyword - literal match of token25	Keyword {26		/// How this keyword appears in grammar/code, should be same as Kinds key27		code: String,28		name: String,29	},30	/// Literal - something defined by user, i.e strings, identifiers, smth31	Literal {32		/// How this keyword appears in grammar, should be same as Kinds key33		grammar_name: String,34		name: String,35		/// Regex for Logos lexer36		regex: String,37		/// Path to custom lexer38		lexer: Option<String>,39	},40}4142impl TokenKind {43	pub fn grammar_name(&self) -> &str {44		match self {45			Self::Keyword { code, .. } => code,46			Self::Literal { grammar_name, .. }47			| Self::Meta { grammar_name, .. }48			| Self::Error { grammar_name, .. } => grammar_name,49		}50	}51	/// How this keyword should appear in kinds enum, screaming snake cased52	pub fn name(&self) -> &str {53		match self {54			Self::Keyword { name, .. }55			| Self::Literal { name, .. }56			| Self::Meta { name, .. }57			| Self::Error { name, .. } => name,58		}59	}60	pub fn expand_kind(&self, lexer: bool) -> TokenStream {61		let name = format_ident!("{}", self.name());62		let attr = match self {63			Self::Keyword { code, .. } => quote! {#[token(#code)]},64			Self::Literal { regex, lexer, .. } => {65				let lexer = lexer66					.as_deref()67					.map(TokenStream::from_str)68					.map(|r| r.expect("path is correct"));69				quote! {#[regex(#regex, #lexer)]}70			}71			Self::Error {72				regex, priority, ..73			} if regex.is_some() => {74				let priority = priority.map(|p| quote! {, priority = #p});75				quote! {#[regex(#regex #priority)]}76			}77			_ => quote! {},78		};79		let attr = if lexer {80			attr81		} else {82			quote! {}83		};84		quote! {85			#attr86			#name87		}88	}89	pub fn expand_t_macros(&self) -> Option<TokenStream> {90		match self {91			Self::Keyword { code, name } => {92				let code = escape_token_macro(code);93				let name = format_ident!("{name}");94				Some(quote! {95					[#code] => {$crate::SyntaxKind::#name}96				})97			}98			// Meta items should not appear in T![_]99			_ => None,100		}101	}102103	/// How this token should be referenced in code104	/// Keywords are referenced with `T![_]` macro,105	/// and literals are referenced directly by name106	pub fn reference(&self) -> TokenStream {107		if let Self::Keyword { code, .. } = self {108			let code = escape_token_macro(code);109			quote! {T![#code]}110		} else {111			let name = self.name();112			let ident = format_ident!("{name}");113			quote! {#ident}114		}115	}116117	pub fn display_name(&self) -> String {118		match self {119			Self::Keyword { code, .. } => format!("'{code}'"),120			Self::Literal { name, .. } => match name.as_str() {121				"FLOAT" => "number".to_owned(),122				"IDENT" => "identifier".to_owned(),123				"STRING_DOUBLE" | "STRING_SINGLE" | "STRING_DOUBLE_VERBATIM"124				| "STRING_SINGLE_VERBATIM" | "STRING_BLOCK" => "string".to_owned(),125				"WHITESPACE" => "whitespace".to_owned(),126				"SINGLE_LINE_SLASH_COMMENT" | "SINGLE_LINE_HASH_COMMENT"127				| "MULTI_LINE_COMMENT" => "comment".to_owned(),128				_ => name.to_lowercase(),129			},130			Self::Meta { name, .. } => name.to_lowercase(),131			Self::Error { description, .. } => description.clone(),132		}133	}134135	pub fn method_name(&self) -> Ident {136		match self {137			Self::Keyword { name, .. } => {138				format_ident!("{}_token", name.to_lowercase())139			}140			Self::Literal { name, .. } => {141				format_ident!("{}_lit", name.to_lowercase())142			}143			Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),144			Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),145		}146	}147}148149#[macro_export]150macro_rules! define_kinds {151	($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{152		$into.define_token(TokenKind::Literal {153			grammar_name: format!("LIT_{}!", $name),154			name: $name.to_owned(),155			regex: $regex.to_owned(),156			lexer: None $(.or_else(|| Some($lexer.to_string())))?,157		});158		$(define_kinds!($into = $($rest)*))?159	}};160	($into:ident = error($name:literal, $desc:literal $(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{161		{162			let regex = None$(.or(Some($regex.to_owned())))?;163			let priority = None$(.or(Some($priority)))?;164			$into.define_token(TokenKind::Error {165				grammar_name: format!("ERROR_{}!", $name),166				name: format!("ERROR_{}", $name),167				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),168				regex,169				priority,170				description: $desc.to_owned(),171			});172		}173		$(define_kinds!($into = $($rest)*))?174	}};175	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{176		$into.define_token(TokenKind::Keyword {177			code: format!("{}", $tok),178			name: $name.to_owned(),179		});180		$(define_kinds!($into = $($rest)*))?181	}};182	($into:ident =) => {{}}183}184use std::{collections::HashSet, str::FromStr};185186use indexmap::IndexMap;187use proc_macro2::{Ident, TokenStream};188use quote::{format_ident, quote};189190use super::escape_token_macro;191192impl KindsSrc {193	pub fn new() -> Self {194		Self {195			defined_tokens: IndexMap::new(),196			defined_node_names: HashSet::new(),197			nodes: Vec::new(),198		}199	}200	pub fn define_token(&mut self, token: TokenKind) {201		assert!(202			self.defined_node_names.insert(token.name().to_owned()),203			"node name already defined: {}",204			token.name()205		);206		assert!(207			self.defined_tokens208				.insert(token.grammar_name().to_owned(), token.clone())209				.is_none(),210			"token already defined: {}",211			token.grammar_name()212		);213	}214	pub fn define_node(&mut self, node: &str) {215		assert!(216			self.defined_node_names.insert(node.to_owned()),217			"node name already defined: {node}"218		);219		self.nodes.push(node.to_string());220	}221	pub fn token(&self, tok: &str) -> Option<&TokenKind> {222		self.defined_tokens.get(tok)223	}224	pub fn is_token(&self, tok: &str) -> bool {225		self.defined_tokens.contains_key(tok)226	}227	pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {228		self.defined_tokens.iter().map(|(_, v)| v)229	}230}231232pub fn jsonnet_kinds() -> KindsSrc {233	let mut kinds = KindsSrc::new();234	define_kinds![kinds =235		"||" => "OR";236		"??" => "NULL_COAELSE";237		"&&" => "AND";238		"|" => "BIT_OR";239		"^" => "BIT_XOR";240		"&" => "BIT_AND";241		"==" => "EQ";242		"!=" => "NE";243		"<" => "LT";244		">" => "GT";245		"<=" => "LE";246		">=" => "GE";247		"<<" => "LHS";248		">>" => "RHS";249		"+" => "PLUS";250		"-" => "MINUS";251		"*" => "MUL";252		"/" => "DIV";253		"%" => "MODULO";254		"!" => "NOT";255		"~" => "BIT_NOT";256		"[" => "L_BRACK";257		"]" => "R_BRACK";258		"(" => "L_PAREN";259		")" => "R_PAREN";260		"{" => "L_BRACE";261		"}" => "R_BRACE";262		":" => "COLON";263		";" => "SEMI";264		"." => "DOT";265		"..." => "DOTDOTDOT";266		"," => "COMMA";267		"$" => "DOLLAR";268		"=" => "ASSIGN";269		"?" => "QUESTION_MARK";270		// Literals271		lit("FLOAT") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?(?:[eE][+-]?[0-9]+(?:_[0-9]+)*)?";272		error("FLOAT_JUNK_AFTER_POINT", "junk after decimal point in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)\.[^0-9]";273		error("FLOAT_JUNK_AFTER_EXPONENT", "junk after exponent in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][^+\-0-9]";274		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN", "junk after exponent sign in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][+-][^0-9]";275		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";276		error("STRING_DOUBLE_UNTERMINATED", "unterminated double-quoted string") => "\"(?s:[^\"\\\\]|\\\\.)*";277		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";278		error("STRING_SINGLE_UNTERMINATED", "unterminated single-quoted string") => "'(?s:[^'\\\\]|\\\\.)*";279		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";280		error("STRING_DOUBLE_VERBATIM_UNTERMINATED", "unterminated verbatim double-quoted string") => "@\"(?:[^\"]|\"\")*";281		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";282		error("STRING_SINGLE_VERBATIM_UNTERMINATED", "unterminated verbatim single-quoted string") => "@'(?:[^']|'')*";283		error("STRING_VERBATIM_MISSING_QUOTES", "verbatim string missing opening quotes") => "@[^\"'\\s]\\S+";284		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";285		error("STRING_BLOCK_UNEXPECTED_END", "unexpected end of text block", lexer = true);286		error("STRING_BLOCK_MISSING_NEW_LINE", "text block requires new line after |||", lexer = true);287		error("STRING_BLOCK_MISSING_TERMINATION", "unterminated text block", lexer = true);288		error("STRING_BLOCK_MISSING_INDENT", "text block first line must be indented", lexer = true);289		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";290		lit("WHITESPACE") => r"[ \t\n\r]+";291		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";292		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";293		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";294		error("COMMENT_TOO_SHORT", "comment too short") => r"/\*/";295		error("COMMENT_UNTERMINATED", "unterminated multi-line comment") =>  r"/\*([^*/]|\*[^/])+";296		error("NO_OPERATOR", "expected operator");297		error("MISSING_TOKEN", "missing token");298		error("UNEXPECTED_TOKEN", "unexpected token");299		error("CUSTOM", "error");300	];301	kinds302}
modifiedxtask/src/sourcegen/mod.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -56,14 +56,7 @@
 						});
 					}
 					SpecialName::Error => {
-						eprintln!("implicit error: {name}");
-						kinds.define_token(TokenKind::Error {
-							grammar_name: token.to_owned(),
-							name: format!("ERROR_{name}"),
-							regex: None,
-							priority: None,
-							is_lexer_error: true,
-						});
+						panic!("error token ERROR_{name} must be explicitly defined in jsonnet_kinds()");
 					}
 				}
 				continue;
@@ -170,6 +163,24 @@
 		quote! {}
 	};
 
+	let error_desc_arms = kinds.tokens().filter_map(|t| {
+		if let TokenKind::Error {
+			name, description, ..
+		} = t
+		{
+			let ident = format_ident!("{name}");
+			Some(quote! { #ident => ::core::option::Option::Some(#description) })
+		} else {
+			None
+		}
+	});
+
+	let display_name_arms = kinds.tokens().map(|t| {
+		let ident = format_ident!("{}", t.name());
+		let display = t.display_name();
+		quote! { #ident => #display }
+	});
+
 	let ast = quote! {
 		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]
 
@@ -200,6 +211,22 @@
 
 			#is_enum
 
+			pub fn error_description(self) -> Option<&'static str> {
+				match self {
+					#(#error_desc_arms,)*
+					LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
+					_ => None,
+				}
+			}
+
+			pub fn display_name(self) -> &'static str {
+				match self {
+					#(#display_name_arms,)*
+					LEXING_ERROR => "unexpected character",
+					_ => "unknown",
+				}
+			}
+
 			pub fn from_raw(r: u16) -> Self {
 				assert!(r < Self::__LAST as u16);
 				unsafe { std::mem::transmute(r) }