git.delta.rocks / jrsonnet / refs/commits / 7c03fc40023d

difftreelog

feat(lexer) explicit token names

voylxuxyYaroslav Bolyukin2026-03-23parent: #ab84d0c.patch.diff
in: master

7 files changed

modifiedCargo.tomldiffbeforeafterboth
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@
 jrsonnet-evaluator = { path = "./crates/jrsonnet-evaluator", version = "0.5.0-pre97" }
 jrsonnet-macros = { path = "./crates/jrsonnet-macros", version = "0.5.0-pre97" }
 jrsonnet-ir = { path = "./crates/jrsonnet-ir", version = "0.5.0-pre97" }
-jrsonnet-ir-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
+jrsonnet-ir-parser = { path = "./crates/jrsonnet-ir-parser", version = "0.5.0-pre97" }
 jrsonnet-peg-parser = { path = "./crates/jrsonnet-peg-parser", version = "0.5.0-pre97" }
 jrsonnet-rowan-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
 jrsonnet-interner = { path = "./crates/jrsonnet-interner", version = "0.5.0-pre97" }
modifiedcrates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterboth
--- a/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
@@ -132,6 +132,10 @@
 	ERROR_COMMENT_TOO_SHORT,
 	#[regex("/\\*([^*/]|\\*[^/])+")]
 	ERROR_COMMENT_UNTERMINATED,
+	ERROR_NO_OPERATOR,
+	ERROR_MISSING_TOKEN,
+	ERROR_UNEXPECTED_TOKEN,
+	ERROR_CUSTOM,
 	#[token("tailstrict")]
 	TAILSTRICT_KW,
 	#[token("local")]
@@ -155,7 +159,6 @@
 	#[token("in")]
 	IN_KW,
 	META_OBJECT_APPLY,
-	ERROR_NO_OPERATOR,
 	#[token("null")]
 	NULL_KW,
 	#[token("true")]
@@ -170,9 +173,6 @@
 	FOR_KW,
 	#[token("assert")]
 	ASSERT_KW,
-	ERROR_MISSING_TOKEN,
-	ERROR_UNEXPECTED_TOKEN,
-	ERROR_CUSTOM,
 	LEXING_ERROR,
 	__LAST_TOKEN,
 	#[doc(hidden)]
modifiedcrates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterboth
--- a/crates/jrsonnet-lexer/src/string_block.rs
+++ b/crates/jrsonnet-lexer/src/string_block.rs
@@ -211,6 +211,7 @@
 
 	// Process leading blank lines before calculating string block indent
 	while ctx.peek() == Some('\n') {
+		lex.mark_line("");
 		ctx.next();
 	}
 
modifiedcrates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -209,7 +209,7 @@
 |   FieldNameDynamic
 
 Visibility =
-    ':' v1:':'? v2:':'?
+    ':' ':'? ':'?
 
 Literal =
     'null'
modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth
76 MULTI_LINE_COMMENT,76 MULTI_LINE_COMMENT,
77 ERROR_COMMENT_TOO_SHORT,77 ERROR_COMMENT_TOO_SHORT,
78 ERROR_COMMENT_UNTERMINATED,78 ERROR_COMMENT_UNTERMINATED,
79 ERROR_NO_OPERATOR,
80 ERROR_MISSING_TOKEN,
81 ERROR_UNEXPECTED_TOKEN,
82 ERROR_CUSTOM,
79 TAILSTRICT_KW,83 TAILSTRICT_KW,
80 LOCAL_KW,84 LOCAL_KW,
81 IMPORTSTR_KW,85 IMPORTSTR_KW,
88 ERROR_KW,92 ERROR_KW,
89 IN_KW,93 IN_KW,
90 META_OBJECT_APPLY,94 META_OBJECT_APPLY,
91 ERROR_NO_OPERATOR,
92 NULL_KW,95 NULL_KW,
93 TRUE_KW,96 TRUE_KW,
94 FALSE_KW,97 FALSE_KW,
95 SELF_KW,98 SELF_KW,
96 SUPER_KW,99 SUPER_KW,
97 FOR_KW,100 FOR_KW,
98 ASSERT_KW,101 ASSERT_KW,
99 ERROR_MISSING_TOKEN,
100 ERROR_UNEXPECTED_TOKEN,
101 ERROR_CUSTOM,
102 LEXING_ERROR,102 LEXING_ERROR,
103 __LAST_TOKEN,103 __LAST_TOKEN,
104 SOURCE_FILE,104 SOURCE_FILE,
197 | FIELD_NAME | DESTRUCT | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR197 | FIELD_NAME | DESTRUCT | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR
198 | LITERAL | TEXT | NUMBER | IMPORT_KIND | TRIVIA | CUSTOM_ERROR => true,198 | LITERAL | TEXT | NUMBER | IMPORT_KIND | TRIVIA | CUSTOM_ERROR => true,
199 _ => false,199 _ => false,
200 }
201 }
202 pub fn error_description(self) -> Option<&'static str> {
203 match self {
204 ERROR_FLOAT_JUNK_AFTER_POINT => {
205 ::core::option::Option::Some("junk after decimal point in number literal")
206 }
207 ERROR_FLOAT_JUNK_AFTER_EXPONENT => {
208 ::core::option::Option::Some("junk after exponent in number literal")
209 }
210 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => {
211 ::core::option::Option::Some("junk after exponent sign in number literal")
212 }
213 ERROR_STRING_DOUBLE_UNTERMINATED => {
214 ::core::option::Option::Some("unterminated double-quoted string")
215 }
216 ERROR_STRING_SINGLE_UNTERMINATED => {
217 ::core::option::Option::Some("unterminated single-quoted string")
218 }
219 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => {
220 ::core::option::Option::Some("unterminated verbatim double-quoted string")
221 }
222 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => {
223 ::core::option::Option::Some("unterminated verbatim single-quoted string")
224 }
225 ERROR_STRING_VERBATIM_MISSING_QUOTES => {
226 ::core::option::Option::Some("verbatim string missing opening quotes")
227 }
228 ERROR_STRING_BLOCK_UNEXPECTED_END => {
229 ::core::option::Option::Some("unexpected end of text block")
230 }
231 ERROR_STRING_BLOCK_MISSING_NEW_LINE => {
232 ::core::option::Option::Some("text block requires new line after |||")
233 }
234 ERROR_STRING_BLOCK_MISSING_TERMINATION => {
235 ::core::option::Option::Some("unterminated text block")
236 }
237 ERROR_STRING_BLOCK_MISSING_INDENT => {
238 ::core::option::Option::Some("text block first line must be indented")
239 }
240 ERROR_COMMENT_TOO_SHORT => ::core::option::Option::Some("comment too short"),
241 ERROR_COMMENT_UNTERMINATED => {
242 ::core::option::Option::Some("unterminated multi-line comment")
243 }
244 ERROR_NO_OPERATOR => ::core::option::Option::Some("expected operator"),
245 ERROR_MISSING_TOKEN => ::core::option::Option::Some("missing token"),
246 ERROR_UNEXPECTED_TOKEN => ::core::option::Option::Some("unexpected token"),
247 ERROR_CUSTOM => ::core::option::Option::Some("error"),
248 LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
249 _ => None,
250 }
251 }
252 pub fn display_name(self) -> &'static str {
253 match self {
254 OR => "'||'",
255 NULL_COAELSE => "'??'",
256 AND => "'&&'",
257 BIT_OR => "'|'",
258 BIT_XOR => "'^'",
259 BIT_AND => "'&'",
260 EQ => "'=='",
261 NE => "'!='",
262 LT => "'<'",
263 GT => "'>'",
264 LE => "'<='",
265 GE => "'>='",
266 LHS => "'<<'",
267 RHS => "'>>'",
268 PLUS => "'+'",
269 MINUS => "'-'",
270 MUL => "'*'",
271 DIV => "'/'",
272 MODULO => "'%'",
273 NOT => "'!'",
274 BIT_NOT => "'~'",
275 L_BRACK => "'['",
276 R_BRACK => "']'",
277 L_PAREN => "'('",
278 R_PAREN => "')'",
279 L_BRACE => "'{'",
280 R_BRACE => "'}'",
281 COLON => "':'",
282 SEMI => "';'",
283 DOT => "'.'",
284 DOTDOTDOT => "'...'",
285 COMMA => "','",
286 DOLLAR => "'$'",
287 ASSIGN => "'='",
288 QUESTION_MARK => "'?'",
289 FLOAT => "number",
290 ERROR_FLOAT_JUNK_AFTER_POINT => "junk after decimal point in number literal",
291 ERROR_FLOAT_JUNK_AFTER_EXPONENT => "junk after exponent in number literal",
292 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => "junk after exponent sign in number literal",
293 STRING_DOUBLE => "string",
294 ERROR_STRING_DOUBLE_UNTERMINATED => "unterminated double-quoted string",
295 STRING_SINGLE => "string",
296 ERROR_STRING_SINGLE_UNTERMINATED => "unterminated single-quoted string",
297 STRING_DOUBLE_VERBATIM => "string",
298 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => {
299 "unterminated verbatim double-quoted string"
300 }
301 STRING_SINGLE_VERBATIM => "string",
302 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => {
303 "unterminated verbatim single-quoted string"
304 }
305 ERROR_STRING_VERBATIM_MISSING_QUOTES => "verbatim string missing opening quotes",
306 STRING_BLOCK => "string",
307 ERROR_STRING_BLOCK_UNEXPECTED_END => "unexpected end of text block",
308 ERROR_STRING_BLOCK_MISSING_NEW_LINE => "text block requires new line after |||",
309 ERROR_STRING_BLOCK_MISSING_TERMINATION => "unterminated text block",
310 ERROR_STRING_BLOCK_MISSING_INDENT => "text block first line must be indented",
311 IDENT => "identifier",
312 WHITESPACE => "whitespace",
313 SINGLE_LINE_SLASH_COMMENT => "comment",
314 SINGLE_LINE_HASH_COMMENT => "comment",
315 MULTI_LINE_COMMENT => "comment",
316 ERROR_COMMENT_TOO_SHORT => "comment too short",
317 ERROR_COMMENT_UNTERMINATED => "unterminated multi-line comment",
318 ERROR_NO_OPERATOR => "expected operator",
319 ERROR_MISSING_TOKEN => "missing token",
320 ERROR_UNEXPECTED_TOKEN => "unexpected token",
321 ERROR_CUSTOM => "error",
322 TAILSTRICT_KW => "'tailstrict'",
323 LOCAL_KW => "'local'",
324 IMPORTSTR_KW => "'importstr'",
325 IMPORTBIN_KW => "'importbin'",
326 IMPORT_KW => "'import'",
327 IF_KW => "'if'",
328 THEN_KW => "'then'",
329 ELSE_KW => "'else'",
330 FUNCTION_KW => "'function'",
331 ERROR_KW => "'error'",
332 IN_KW => "'in'",
333 META_OBJECT_APPLY => "meta_object_apply",
334 NULL_KW => "'null'",
335 TRUE_KW => "'true'",
336 FALSE_KW => "'false'",
337 SELF_KW => "'self'",
338 SUPER_KW => "'super'",
339 FOR_KW => "'for'",
340 ASSERT_KW => "'assert'",
341 LEXING_ERROR => "unexpected character",
342 _ => "unknown",
200 }343 }
201 }344 }
202 pub fn from_raw(r: u16) -> Self {345 pub fn from_raw(r: u16) -> Self {
modifiedxtask/src/sourcegen/kinds.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -19,6 +19,7 @@
 		is_lexer_error: bool,
 		regex: Option<String>,
 		priority: Option<u32>,
+		description: String,
 	},
 	/// Keyword - literal match of token
 	Keyword {
@@ -113,6 +114,24 @@
 		}
 	}
 
+	pub fn display_name(&self) -> String {
+		match self {
+			Self::Keyword { code, .. } => format!("'{code}'"),
+			Self::Literal { name, .. } => match name.as_str() {
+				"FLOAT" => "number".to_owned(),
+				"IDENT" => "identifier".to_owned(),
+				"STRING_DOUBLE" | "STRING_SINGLE" | "STRING_DOUBLE_VERBATIM"
+				| "STRING_SINGLE_VERBATIM" | "STRING_BLOCK" => "string".to_owned(),
+				"WHITESPACE" => "whitespace".to_owned(),
+				"SINGLE_LINE_SLASH_COMMENT" | "SINGLE_LINE_HASH_COMMENT"
+				| "MULTI_LINE_COMMENT" => "comment".to_owned(),
+				_ => name.to_lowercase(),
+			},
+			Self::Meta { name, .. } => name.to_lowercase(),
+			Self::Error { description, .. } => description.clone(),
+		}
+	}
+
 	pub fn method_name(&self) -> Ident {
 		match self {
 			Self::Keyword { name, .. } => {
@@ -138,7 +157,7 @@
 		});
 		$(define_kinds!($into = $($rest)*))?
 	}};
-	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+	($into:ident = error($name:literal, $desc:literal $(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
 		{
 			let regex = None$(.or(Some($regex.to_owned())))?;
 			let priority = None$(.or(Some($priority)))?;
@@ -148,6 +167,7 @@
 				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
 				regex,
 				priority,
+				description: $desc.to_owned(),
 			});
 		}
 		$(define_kinds!($into = $($rest)*))?
@@ -248,31 +268,35 @@
 		"=" => "ASSIGN";
 		"?" => "QUESTION_MARK";
 		// Literals
-		lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";
-		error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";
-		error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";
-		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";
+		lit("FLOAT") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?(?:[eE][+-]?[0-9]+(?:_[0-9]+)*)?";
+		error("FLOAT_JUNK_AFTER_POINT", "junk after decimal point in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)\.[^0-9]";
+		error("FLOAT_JUNK_AFTER_EXPONENT", "junk after exponent in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][^+\-0-9]";
+		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN", "junk after exponent sign in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][+-][^0-9]";
 		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";
-		error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";
+		error("STRING_DOUBLE_UNTERMINATED", "unterminated double-quoted string") => "\"(?s:[^\"\\\\]|\\\\.)*";
 		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";
-		error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";
+		error("STRING_SINGLE_UNTERMINATED", "unterminated single-quoted string") => "'(?s:[^'\\\\]|\\\\.)*";
 		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";
-		error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";
+		error("STRING_DOUBLE_VERBATIM_UNTERMINATED", "unterminated verbatim double-quoted string") => "@\"(?:[^\"]|\"\")*";
 		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";
-		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
-		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
+		error("STRING_SINGLE_VERBATIM_UNTERMINATED", "unterminated verbatim single-quoted string") => "@'(?:[^']|'')*";
+		error("STRING_VERBATIM_MISSING_QUOTES", "verbatim string missing opening quotes") => "@[^\"'\\s]\\S+";
 		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
-		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
-		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
-		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
-		error("STRING_BLOCK_MISSING_INDENT", lexer = true);
+		error("STRING_BLOCK_UNEXPECTED_END", "unexpected end of text block", lexer = true);
+		error("STRING_BLOCK_MISSING_NEW_LINE", "text block requires new line after |||", lexer = true);
+		error("STRING_BLOCK_MISSING_TERMINATION", "unterminated text block", lexer = true);
+		error("STRING_BLOCK_MISSING_INDENT", "text block first line must be indented", lexer = true);
 		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
 		lit("WHITESPACE") => r"[ \t\n\r]+";
 		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";
 		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";
 		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";
-		error("COMMENT_TOO_SHORT") => r"/\*/";
-		error("COMMENT_UNTERMINATED") =>  r"/\*([^*/]|\*[^/])+";
+		error("COMMENT_TOO_SHORT", "comment too short") => r"/\*/";
+		error("COMMENT_UNTERMINATED", "unterminated multi-line comment") =>  r"/\*([^*/]|\*[^/])+";
+		error("NO_OPERATOR", "expected operator");
+		error("MISSING_TOKEN", "missing token");
+		error("UNEXPECTED_TOKEN", "unexpected token");
+		error("CUSTOM", "error");
 	];
 	kinds
 }
modifiedxtask/src/sourcegen/mod.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -56,14 +56,7 @@
 						});
 					}
 					SpecialName::Error => {
-						eprintln!("implicit error: {name}");
-						kinds.define_token(TokenKind::Error {
-							grammar_name: token.to_owned(),
-							name: format!("ERROR_{name}"),
-							regex: None,
-							priority: None,
-							is_lexer_error: true,
-						});
+						panic!("error token ERROR_{name} must be explicitly defined in jsonnet_kinds()");
 					}
 				}
 				continue;
@@ -170,6 +163,24 @@
 		quote! {}
 	};
 
+	let error_desc_arms = kinds.tokens().filter_map(|t| {
+		if let TokenKind::Error {
+			name, description, ..
+		} = t
+		{
+			let ident = format_ident!("{name}");
+			Some(quote! { #ident => ::core::option::Option::Some(#description) })
+		} else {
+			None
+		}
+	});
+
+	let display_name_arms = kinds.tokens().map(|t| {
+		let ident = format_ident!("{}", t.name());
+		let display = t.display_name();
+		quote! { #ident => #display }
+	});
+
 	let ast = quote! {
 		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]
 
@@ -200,6 +211,22 @@
 
 			#is_enum
 
+			pub fn error_description(self) -> Option<&'static str> {
+				match self {
+					#(#error_desc_arms,)*
+					LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
+					_ => None,
+				}
+			}
+
+			pub fn display_name(self) -> &'static str {
+				match self {
+					#(#display_name_arms,)*
+					LEXING_ERROR => "unexpected character",
+					_ => "unknown",
+				}
+			}
+
 			pub fn from_raw(r: u16) -> Self {
 				assert!(r < Self::__LAST as u16);
 				unsafe { std::mem::transmute(r) }