difftreelog
feat(lexer) explicit token names
in: master
7 files changed
Cargo.tomldiffbeforeafterboth--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@
jrsonnet-evaluator = { path = "./crates/jrsonnet-evaluator", version = "0.5.0-pre97" }
jrsonnet-macros = { path = "./crates/jrsonnet-macros", version = "0.5.0-pre97" }
jrsonnet-ir = { path = "./crates/jrsonnet-ir", version = "0.5.0-pre97" }
-jrsonnet-ir-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
+jrsonnet-ir-parser = { path = "./crates/jrsonnet-ir-parser", version = "0.5.0-pre97" }
jrsonnet-peg-parser = { path = "./crates/jrsonnet-peg-parser", version = "0.5.0-pre97" }
jrsonnet-rowan-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
jrsonnet-interner = { path = "./crates/jrsonnet-interner", version = "0.5.0-pre97" }
crates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
@@ -132,6 +132,10 @@
ERROR_COMMENT_TOO_SHORT,
#[regex("/\\*([^*/]|\\*[^/])+")]
ERROR_COMMENT_UNTERMINATED,
+ ERROR_NO_OPERATOR,
+ ERROR_MISSING_TOKEN,
+ ERROR_UNEXPECTED_TOKEN,
+ ERROR_CUSTOM,
#[token("tailstrict")]
TAILSTRICT_KW,
#[token("local")]
@@ -155,7 +159,6 @@
#[token("in")]
IN_KW,
META_OBJECT_APPLY,
- ERROR_NO_OPERATOR,
#[token("null")]
NULL_KW,
#[token("true")]
@@ -170,9 +173,6 @@
FOR_KW,
#[token("assert")]
ASSERT_KW,
- ERROR_MISSING_TOKEN,
- ERROR_UNEXPECTED_TOKEN,
- ERROR_CUSTOM,
LEXING_ERROR,
__LAST_TOKEN,
#[doc(hidden)]
crates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterboth--- a/crates/jrsonnet-lexer/src/string_block.rs
+++ b/crates/jrsonnet-lexer/src/string_block.rs
@@ -211,6 +211,7 @@
// Process leading blank lines before calculating string block indent
while ctx.peek() == Some('\n') {
+ lex.mark_line("");
ctx.next();
}
crates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -209,7 +209,7 @@
| FieldNameDynamic
Visibility =
- ':' v1:':'? v2:':'?
+ ':' ':'? ':'?
Literal =
'null'
crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth1//! This is a generated file, please do not edit manually. Changes can be2//! made in codegeneration that lives in `xtask` top-level dir.34#![allow(5 bad_style,6 missing_docs,7 unreachable_pub,8 clippy::manual_non_exhaustive,9 clippy::match_like_matches_macro10)]11#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]12#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]13#[repr(u16)]14pub enum SyntaxKind {15 #[doc(hidden)]16 TOMBSTONE,17 #[doc(hidden)]18 EOF,19 OR,20 NULL_COAELSE,21 AND,22 BIT_OR,23 BIT_XOR,24 BIT_AND,25 EQ,26 NE,27 LT,28 GT,29 LE,30 GE,31 LHS,32 RHS,33 PLUS,34 MINUS,35 MUL,36 DIV,37 MODULO,38 NOT,39 BIT_NOT,40 L_BRACK,41 R_BRACK,42 L_PAREN,43 R_PAREN,44 L_BRACE,45 R_BRACE,46 COLON,47 SEMI,48 DOT,49 DOTDOTDOT,50 COMMA,51 DOLLAR,52 ASSIGN,53 QUESTION_MARK,54 FLOAT,55 ERROR_FLOAT_JUNK_AFTER_POINT,56 ERROR_FLOAT_JUNK_AFTER_EXPONENT,57 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,58 STRING_DOUBLE,59 ERROR_STRING_DOUBLE_UNTERMINATED,60 STRING_SINGLE,61 ERROR_STRING_SINGLE_UNTERMINATED,62 STRING_DOUBLE_VERBATIM,63 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,64 STRING_SINGLE_VERBATIM,65 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,66 ERROR_STRING_VERBATIM_MISSING_QUOTES,67 STRING_BLOCK,68 ERROR_STRING_BLOCK_UNEXPECTED_END,69 ERROR_STRING_BLOCK_MISSING_NEW_LINE,70 ERROR_STRING_BLOCK_MISSING_TERMINATION,71 ERROR_STRING_BLOCK_MISSING_INDENT,72 IDENT,73 WHITESPACE,74 SINGLE_LINE_SLASH_COMMENT,75 SINGLE_LINE_HASH_COMMENT,76 MULTI_LINE_COMMENT,77 ERROR_COMMENT_TOO_SHORT,78 ERROR_COMMENT_UNTERMINATED,79 TAILSTRICT_KW,80 LOCAL_KW,81 IMPORTSTR_KW,82 IMPORTBIN_KW,83 IMPORT_KW,84 IF_KW,85 THEN_KW,86 ELSE_KW,87 FUNCTION_KW,88 ERROR_KW,89 IN_KW,90 META_OBJECT_APPLY,91 ERROR_NO_OPERATOR,92 NULL_KW,93 TRUE_KW,94 FALSE_KW,95 SELF_KW,96 SUPER_KW,97 FOR_KW,98 ASSERT_KW,99 ERROR_MISSING_TOKEN,100 ERROR_UNEXPECTED_TOKEN,101 ERROR_CUSTOM,102 LEXING_ERROR,103 __LAST_TOKEN,104 SOURCE_FILE,105 EXPR,106 SUFFIX_INDEX,107 NAME,108 SUFFIX_INDEX_EXPR,109 SUFFIX_SLICE,110 SLICE_DESC,111 SUFFIX_APPLY,112 ARGS_DESC,113 STMT_LOCAL,114 STMT_ASSERT,115 ASSERTION,116 EXPR_BINARY,117 EXPR_UNARY,118 EXPR_OBJ_EXTEND,119 EXPR_PARENED,120 EXPR_LITERAL,121 EXPR_STRING,122 EXPR_NUMBER,123 EXPR_ARRAY,124 EXPR_OBJECT,125 EXPR_ARRAY_COMP,126 EXPR_IMPORT,127 EXPR_VAR,128 EXPR_IF_THEN_ELSE,129 TRUE_EXPR,130 FALSE_EXPR,131 EXPR_FUNCTION,132 PARAMS_DESC,133 EXPR_ERROR,134 SLICE_DESC_END,135 SLICE_DESC_STEP,136 ARG,137 OBJ_BODY_COMP,138 OBJ_BODY_MEMBER_LIST,139 MEMBER_BIND_STMT,140 OBJ_LOCAL,141 MEMBER_ASSERT_STMT,142 MEMBER_FIELD_NORMAL,143 VISIBILITY,144 MEMBER_FIELD_METHOD,145 FIELD_NAME_FIXED,146 FIELD_NAME_DYNAMIC,147 FOR_SPEC,148 IF_SPEC,149 BIND_DESTRUCT,150 BIND_FUNCTION,151 PARAM,152 DESTRUCT_FULL,153 DESTRUCT_SKIP,154 DESTRUCT_ARRAY,155 DESTRUCT_OBJECT,156 DESTRUCT_OBJECT_FIELD,157 DESTRUCT_REST,158 DESTRUCT_ARRAY_ELEMENT,159 SUFFIX,160 BIND,161 STMT,162 OBJ_BODY,163 COMP_SPEC,164 EXPR_BASE,165 MEMBER_COMP,166 MEMBER,167 FIELD_NAME,168 DESTRUCT,169 DESTRUCT_ARRAY_PART,170 BINARY_OPERATOR,171 UNARY_OPERATOR,172 LITERAL,173 TEXT,174 NUMBER,175 IMPORT_KIND,176 TRIVIA,177 CUSTOM_ERROR,178 #[doc(hidden)]179 __LAST,180}181use self::SyntaxKind::*;182impl SyntaxKind {183 pub fn is_keyword(self) -> bool {184 match self {185 OR | NULL_COAELSE | AND | BIT_OR | BIT_XOR | BIT_AND | EQ | NE | LT | GT | LE | GE186 | LHS | RHS | PLUS | MINUS | MUL | DIV | MODULO | NOT | BIT_NOT | L_BRACK | R_BRACK187 | L_PAREN | R_PAREN | L_BRACE | R_BRACE | COLON | SEMI | DOT | DOTDOTDOT | COMMA188 | DOLLAR | ASSIGN | QUESTION_MARK | TAILSTRICT_KW | LOCAL_KW | IMPORTSTR_KW189 | IMPORTBIN_KW | IMPORT_KW | IF_KW | THEN_KW | ELSE_KW | FUNCTION_KW | ERROR_KW190 | IN_KW | NULL_KW | TRUE_KW | FALSE_KW | SELF_KW | SUPER_KW | FOR_KW | ASSERT_KW => true,191 _ => false,192 }193 }194 pub fn is_enum(self) -> bool {195 match self {196 SUFFIX | BIND | STMT | OBJ_BODY | COMP_SPEC | EXPR_BASE | MEMBER_COMP | MEMBER197 | FIELD_NAME | DESTRUCT | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR198 | LITERAL | TEXT | NUMBER | IMPORT_KIND | TRIVIA | CUSTOM_ERROR => true,199 _ => false,200 }201 }202 pub fn from_raw(r: u16) -> Self {203 assert!(r < Self::__LAST as u16);204 unsafe { std::mem::transmute(r) }205 }206 pub fn into_raw(self) -> u16 {207 self as u16208 }209}210#[macro_export]211macro_rules ! T { [||] => { $ crate :: SyntaxKind :: OR } ; [??] => { $ crate :: SyntaxKind :: NULL_COAELSE } ; [&&] => { $ crate :: SyntaxKind :: AND } ; [|] => { $ crate :: SyntaxKind :: BIT_OR } ; [^] => { $ crate :: SyntaxKind :: BIT_XOR } ; [&] => { $ crate :: SyntaxKind :: BIT_AND } ; [==] => { $ crate :: SyntaxKind :: EQ } ; [!=] => { $ crate :: SyntaxKind :: NE } ; [<] => { $ crate :: SyntaxKind :: LT } ; [>] => { $ crate :: SyntaxKind :: GT } ; [<=] => { $ crate :: SyntaxKind :: LE } ; [>=] => { $ crate :: SyntaxKind :: GE } ; [<<] => { $ crate :: SyntaxKind :: LHS } ; [>>] => { $ crate :: SyntaxKind :: RHS } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [*] => { $ crate :: SyntaxKind :: MUL } ; [/] => { $ crate :: SyntaxKind :: DIV } ; [%] => { $ crate :: SyntaxKind :: MODULO } ; [!] => { $ crate :: SyntaxKind :: NOT } ; [~] => { $ crate :: SyntaxKind :: BIT_NOT } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_BRACE } ; ['}'] => { $ crate :: SyntaxKind :: R_BRACE } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [;] => { $ crate :: SyntaxKind :: SEMI } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [...] => { $ crate :: SyntaxKind :: DOTDOTDOT } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['$'] => { $ crate :: SyntaxKind :: DOLLAR } ; [=] => { $ crate :: SyntaxKind :: ASSIGN } ; [?] => { $ crate :: SyntaxKind :: QUESTION_MARK } ; [tailstrict] => { $ crate :: SyntaxKind :: TAILSTRICT_KW } ; [local] => { $ crate :: SyntaxKind :: LOCAL_KW } ; [importstr] => { $ crate :: SyntaxKind :: IMPORTSTR_KW } ; [importbin] => { $ crate :: SyntaxKind :: IMPORTBIN_KW } ; [import] => { $ crate :: SyntaxKind :: IMPORT_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [then] => { $ crate :: SyntaxKind :: THEN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [function] => { $ crate :: SyntaxKind :: FUNCTION_KW } ; [error] => { $ crate :: SyntaxKind :: ERROR_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [null] => { $ crate :: SyntaxKind :: NULL_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [assert] => { $ crate :: SyntaxKind :: ASSERT_KW } }212#[allow(unused_imports)]213pub use T;xtask/src/sourcegen/kinds.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -19,6 +19,7 @@
is_lexer_error: bool,
regex: Option<String>,
priority: Option<u32>,
+ description: String,
},
/// Keyword - literal match of token
Keyword {
@@ -113,6 +114,24 @@
}
}
+ pub fn display_name(&self) -> String {
+ match self {
+ Self::Keyword { code, .. } => format!("'{code}'"),
+ Self::Literal { name, .. } => match name.as_str() {
+ "FLOAT" => "number".to_owned(),
+ "IDENT" => "identifier".to_owned(),
+ "STRING_DOUBLE" | "STRING_SINGLE" | "STRING_DOUBLE_VERBATIM"
+ | "STRING_SINGLE_VERBATIM" | "STRING_BLOCK" => "string".to_owned(),
+ "WHITESPACE" => "whitespace".to_owned(),
+ "SINGLE_LINE_SLASH_COMMENT" | "SINGLE_LINE_HASH_COMMENT"
+ | "MULTI_LINE_COMMENT" => "comment".to_owned(),
+ _ => name.to_lowercase(),
+ },
+ Self::Meta { name, .. } => name.to_lowercase(),
+ Self::Error { description, .. } => description.clone(),
+ }
+ }
+
pub fn method_name(&self) -> Ident {
match self {
Self::Keyword { name, .. } => {
@@ -138,7 +157,7 @@
});
$(define_kinds!($into = $($rest)*))?
}};
- ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+ ($into:ident = error($name:literal, $desc:literal $(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
{
let regex = None$(.or(Some($regex.to_owned())))?;
let priority = None$(.or(Some($priority)))?;
@@ -148,6 +167,7 @@
is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
regex,
priority,
+ description: $desc.to_owned(),
});
}
$(define_kinds!($into = $($rest)*))?
@@ -248,31 +268,35 @@
"=" => "ASSIGN";
"?" => "QUESTION_MARK";
// Literals
- lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";
- error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";
- error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";
- error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";
+ lit("FLOAT") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?(?:[eE][+-]?[0-9]+(?:_[0-9]+)*)?";
+ error("FLOAT_JUNK_AFTER_POINT", "junk after decimal point in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)\.[^0-9]";
+ error("FLOAT_JUNK_AFTER_EXPONENT", "junk after exponent in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][^+\-0-9]";
+ error("FLOAT_JUNK_AFTER_EXPONENT_SIGN", "junk after exponent sign in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][+-][^0-9]";
lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";
- error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";
+ error("STRING_DOUBLE_UNTERMINATED", "unterminated double-quoted string") => "\"(?s:[^\"\\\\]|\\\\.)*";
lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";
- error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";
+ error("STRING_SINGLE_UNTERMINATED", "unterminated single-quoted string") => "'(?s:[^'\\\\]|\\\\.)*";
lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";
- error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";
+ error("STRING_DOUBLE_VERBATIM_UNTERMINATED", "unterminated verbatim double-quoted string") => "@\"(?:[^\"]|\"\")*";
lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";
- error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
- error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
+ error("STRING_SINGLE_VERBATIM_UNTERMINATED", "unterminated verbatim single-quoted string") => "@'(?:[^']|'')*";
+ error("STRING_VERBATIM_MISSING_QUOTES", "verbatim string missing opening quotes") => "@[^\"'\\s]\\S+";
lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
- error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
- error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
- error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
- error("STRING_BLOCK_MISSING_INDENT", lexer = true);
+ error("STRING_BLOCK_UNEXPECTED_END", "unexpected end of text block", lexer = true);
+ error("STRING_BLOCK_MISSING_NEW_LINE", "text block requires new line after |||", lexer = true);
+ error("STRING_BLOCK_MISSING_TERMINATION", "unterminated text block", lexer = true);
+ error("STRING_BLOCK_MISSING_INDENT", "text block first line must be indented", lexer = true);
lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
lit("WHITESPACE") => r"[ \t\n\r]+";
lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";
lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";
lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";
- error("COMMENT_TOO_SHORT") => r"/\*/";
- error("COMMENT_UNTERMINATED") => r"/\*([^*/]|\*[^/])+";
+ error("COMMENT_TOO_SHORT", "comment too short") => r"/\*/";
+ error("COMMENT_UNTERMINATED", "unterminated multi-line comment") => r"/\*([^*/]|\*[^/])+";
+ error("NO_OPERATOR", "expected operator");
+ error("MISSING_TOKEN", "missing token");
+ error("UNEXPECTED_TOKEN", "unexpected token");
+ error("CUSTOM", "error");
];
kinds
}
xtask/src/sourcegen/mod.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -56,14 +56,7 @@
});
}
SpecialName::Error => {
- eprintln!("implicit error: {name}");
- kinds.define_token(TokenKind::Error {
- grammar_name: token.to_owned(),
- name: format!("ERROR_{name}"),
- regex: None,
- priority: None,
- is_lexer_error: true,
- });
+ panic!("error token ERROR_{name} must be explicitly defined in jsonnet_kinds()");
}
}
continue;
@@ -170,6 +163,24 @@
quote! {}
};
+ let error_desc_arms = kinds.tokens().filter_map(|t| {
+ if let TokenKind::Error {
+ name, description, ..
+ } = t
+ {
+ let ident = format_ident!("{name}");
+ Some(quote! { #ident => ::core::option::Option::Some(#description) })
+ } else {
+ None
+ }
+ });
+
+ let display_name_arms = kinds.tokens().map(|t| {
+ let ident = format_ident!("{}", t.name());
+ let display = t.display_name();
+ quote! { #ident => #display }
+ });
+
let ast = quote! {
#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]
@@ -200,6 +211,22 @@
#is_enum
+ pub fn error_description(self) -> Option<&'static str> {
+ match self {
+ #(#error_desc_arms,)*
+ LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
+ _ => None,
+ }
+ }
+
+ pub fn display_name(self) -> &'static str {
+ match self {
+ #(#display_name_arms,)*
+ LEXING_ERROR => "unexpected character",
+ _ => "unknown",
+ }
+ }
+
pub fn from_raw(r: u16) -> Self {
assert!(r < Self::__LAST as u16);
unsafe { std::mem::transmute(r) }