difftreelog
feat(lexer) explicit token names
in: master
7 files changed
Cargo.tomldiffbeforeafterboth--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@
jrsonnet-evaluator = { path = "./crates/jrsonnet-evaluator", version = "0.5.0-pre97" }
jrsonnet-macros = { path = "./crates/jrsonnet-macros", version = "0.5.0-pre97" }
jrsonnet-ir = { path = "./crates/jrsonnet-ir", version = "0.5.0-pre97" }
-jrsonnet-ir-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
+jrsonnet-ir-parser = { path = "./crates/jrsonnet-ir-parser", version = "0.5.0-pre97" }
jrsonnet-peg-parser = { path = "./crates/jrsonnet-peg-parser", version = "0.5.0-pre97" }
jrsonnet-rowan-parser = { path = "./crates/jrsonnet-rowan-parser", version = "0.5.0-pre97" }
jrsonnet-interner = { path = "./crates/jrsonnet-interner", version = "0.5.0-pre97" }
crates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
@@ -132,6 +132,10 @@
ERROR_COMMENT_TOO_SHORT,
#[regex("/\\*([^*/]|\\*[^/])+")]
ERROR_COMMENT_UNTERMINATED,
+ ERROR_NO_OPERATOR,
+ ERROR_MISSING_TOKEN,
+ ERROR_UNEXPECTED_TOKEN,
+ ERROR_CUSTOM,
#[token("tailstrict")]
TAILSTRICT_KW,
#[token("local")]
@@ -155,7 +159,6 @@
#[token("in")]
IN_KW,
META_OBJECT_APPLY,
- ERROR_NO_OPERATOR,
#[token("null")]
NULL_KW,
#[token("true")]
@@ -170,9 +173,6 @@
FOR_KW,
#[token("assert")]
ASSERT_KW,
- ERROR_MISSING_TOKEN,
- ERROR_UNEXPECTED_TOKEN,
- ERROR_CUSTOM,
LEXING_ERROR,
__LAST_TOKEN,
#[doc(hidden)]
crates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterboth--- a/crates/jrsonnet-lexer/src/string_block.rs
+++ b/crates/jrsonnet-lexer/src/string_block.rs
@@ -211,6 +211,7 @@
// Process leading blank lines before calculating string block indent
while ctx.peek() == Some('\n') {
+ lex.mark_line("");
ctx.next();
}
crates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -209,7 +209,7 @@
| FieldNameDynamic
Visibility =
- ':' v1:':'? v2:':'?
+ ':' ':'? ':'?
Literal =
'null'
crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -76,6 +76,10 @@
MULTI_LINE_COMMENT,
ERROR_COMMENT_TOO_SHORT,
ERROR_COMMENT_UNTERMINATED,
+ ERROR_NO_OPERATOR,
+ ERROR_MISSING_TOKEN,
+ ERROR_UNEXPECTED_TOKEN,
+ ERROR_CUSTOM,
TAILSTRICT_KW,
LOCAL_KW,
IMPORTSTR_KW,
@@ -88,7 +92,6 @@
ERROR_KW,
IN_KW,
META_OBJECT_APPLY,
- ERROR_NO_OPERATOR,
NULL_KW,
TRUE_KW,
FALSE_KW,
@@ -96,9 +99,6 @@
SUPER_KW,
FOR_KW,
ASSERT_KW,
- ERROR_MISSING_TOKEN,
- ERROR_UNEXPECTED_TOKEN,
- ERROR_CUSTOM,
LEXING_ERROR,
__LAST_TOKEN,
SOURCE_FILE,
@@ -199,6 +199,149 @@
_ => false,
}
}
+ pub fn error_description(self) -> Option<&'static str> {
+ match self {
+ ERROR_FLOAT_JUNK_AFTER_POINT => {
+ ::core::option::Option::Some("junk after decimal point in number literal")
+ }
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT => {
+ ::core::option::Option::Some("junk after exponent in number literal")
+ }
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => {
+ ::core::option::Option::Some("junk after exponent sign in number literal")
+ }
+ ERROR_STRING_DOUBLE_UNTERMINATED => {
+ ::core::option::Option::Some("unterminated double-quoted string")
+ }
+ ERROR_STRING_SINGLE_UNTERMINATED => {
+ ::core::option::Option::Some("unterminated single-quoted string")
+ }
+ ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => {
+ ::core::option::Option::Some("unterminated verbatim double-quoted string")
+ }
+ ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => {
+ ::core::option::Option::Some("unterminated verbatim single-quoted string")
+ }
+ ERROR_STRING_VERBATIM_MISSING_QUOTES => {
+ ::core::option::Option::Some("verbatim string missing opening quotes")
+ }
+ ERROR_STRING_BLOCK_UNEXPECTED_END => {
+ ::core::option::Option::Some("unexpected end of text block")
+ }
+ ERROR_STRING_BLOCK_MISSING_NEW_LINE => {
+ ::core::option::Option::Some("text block requires new line after |||")
+ }
+ ERROR_STRING_BLOCK_MISSING_TERMINATION => {
+ ::core::option::Option::Some("unterminated text block")
+ }
+ ERROR_STRING_BLOCK_MISSING_INDENT => {
+ ::core::option::Option::Some("text block first line must be indented")
+ }
+ ERROR_COMMENT_TOO_SHORT => ::core::option::Option::Some("comment too short"),
+ ERROR_COMMENT_UNTERMINATED => {
+ ::core::option::Option::Some("unterminated multi-line comment")
+ }
+ ERROR_NO_OPERATOR => ::core::option::Option::Some("expected operator"),
+ ERROR_MISSING_TOKEN => ::core::option::Option::Some("missing token"),
+ ERROR_UNEXPECTED_TOKEN => ::core::option::Option::Some("unexpected token"),
+ ERROR_CUSTOM => ::core::option::Option::Some("error"),
+ LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
+ _ => None,
+ }
+ }
+ pub fn display_name(self) -> &'static str {
+ match self {
+ OR => "'||'",
+ NULL_COAELSE => "'??'",
+ AND => "'&&'",
+ BIT_OR => "'|'",
+ BIT_XOR => "'^'",
+ BIT_AND => "'&'",
+ EQ => "'=='",
+ NE => "'!='",
+ LT => "'<'",
+ GT => "'>'",
+ LE => "'<='",
+ GE => "'>='",
+ LHS => "'<<'",
+ RHS => "'>>'",
+ PLUS => "'+'",
+ MINUS => "'-'",
+ MUL => "'*'",
+ DIV => "'/'",
+ MODULO => "'%'",
+ NOT => "'!'",
+ BIT_NOT => "'~'",
+ L_BRACK => "'['",
+ R_BRACK => "']'",
+ L_PAREN => "'('",
+ R_PAREN => "')'",
+ L_BRACE => "'{'",
+ R_BRACE => "'}'",
+ COLON => "':'",
+ SEMI => "';'",
+ DOT => "'.'",
+ DOTDOTDOT => "'...'",
+ COMMA => "','",
+ DOLLAR => "'$'",
+ ASSIGN => "'='",
+ QUESTION_MARK => "'?'",
+ FLOAT => "number",
+ ERROR_FLOAT_JUNK_AFTER_POINT => "junk after decimal point in number literal",
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT => "junk after exponent in number literal",
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => "junk after exponent sign in number literal",
+ STRING_DOUBLE => "string",
+ ERROR_STRING_DOUBLE_UNTERMINATED => "unterminated double-quoted string",
+ STRING_SINGLE => "string",
+ ERROR_STRING_SINGLE_UNTERMINATED => "unterminated single-quoted string",
+ STRING_DOUBLE_VERBATIM => "string",
+ ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => {
+ "unterminated verbatim double-quoted string"
+ }
+ STRING_SINGLE_VERBATIM => "string",
+ ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => {
+ "unterminated verbatim single-quoted string"
+ }
+ ERROR_STRING_VERBATIM_MISSING_QUOTES => "verbatim string missing opening quotes",
+ STRING_BLOCK => "string",
+ ERROR_STRING_BLOCK_UNEXPECTED_END => "unexpected end of text block",
+ ERROR_STRING_BLOCK_MISSING_NEW_LINE => "text block requires new line after |||",
+ ERROR_STRING_BLOCK_MISSING_TERMINATION => "unterminated text block",
+ ERROR_STRING_BLOCK_MISSING_INDENT => "text block first line must be indented",
+ IDENT => "identifier",
+ WHITESPACE => "whitespace",
+ SINGLE_LINE_SLASH_COMMENT => "comment",
+ SINGLE_LINE_HASH_COMMENT => "comment",
+ MULTI_LINE_COMMENT => "comment",
+ ERROR_COMMENT_TOO_SHORT => "comment too short",
+ ERROR_COMMENT_UNTERMINATED => "unterminated multi-line comment",
+ ERROR_NO_OPERATOR => "expected operator",
+ ERROR_MISSING_TOKEN => "missing token",
+ ERROR_UNEXPECTED_TOKEN => "unexpected token",
+ ERROR_CUSTOM => "error",
+ TAILSTRICT_KW => "'tailstrict'",
+ LOCAL_KW => "'local'",
+ IMPORTSTR_KW => "'importstr'",
+ IMPORTBIN_KW => "'importbin'",
+ IMPORT_KW => "'import'",
+ IF_KW => "'if'",
+ THEN_KW => "'then'",
+ ELSE_KW => "'else'",
+ FUNCTION_KW => "'function'",
+ ERROR_KW => "'error'",
+ IN_KW => "'in'",
+ META_OBJECT_APPLY => "meta_object_apply",
+ NULL_KW => "'null'",
+ TRUE_KW => "'true'",
+ FALSE_KW => "'false'",
+ SELF_KW => "'self'",
+ SUPER_KW => "'super'",
+ FOR_KW => "'for'",
+ ASSERT_KW => "'assert'",
+ LEXING_ERROR => "unexpected character",
+ _ => "unknown",
+ }
+ }
pub fn from_raw(r: u16) -> Self {
assert!(r < Self::__LAST as u16);
unsafe { std::mem::transmute(r) }
xtask/src/sourcegen/kinds.rsdiffbeforeafterboth1#[derive(Debug)]2pub struct KindsSrc {3 /// Key - how this token appears in ungrammar4 defined_tokens: IndexMap<String, TokenKind>,5 defined_node_names: HashSet<String>,6 pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11 /// May exist in token tree, but never in source code12 Meta { grammar_name: String, name: String },13 /// Specific parsing/lexing errors may be emitted as this type of kind14 Error {15 grammar_name: String,16 name: String,17 #[allow(dead_code)]18 /// Is this error returned by lexer directly, or from lex.rs19 is_lexer_error: bool,20 regex: Option<String>,21 priority: Option<u32>,22 },23 /// Keyword - literal match of token24 Keyword {25 /// How this keyword appears in grammar/code, should be same as Kinds key26 code: String,27 name: String,28 },29 /// Literal - something defined by user, i.e strings, identifiers, smth30 Literal {31 /// How this keyword appears in grammar, should be same as Kinds key32 grammar_name: String,33 name: String,34 /// Regex for Logos lexer35 regex: String,36 /// Path to custom lexer37 lexer: Option<String>,38 },39}4041impl TokenKind {42 pub fn grammar_name(&self) -> &str {43 match self {44 Self::Keyword { code, .. } => code,45 Self::Literal { grammar_name, .. }46 | Self::Meta { grammar_name, .. }47 | Self::Error { grammar_name, .. } => grammar_name,48 }49 }50 /// How this keyword should appear in kinds enum, screaming snake cased51 pub fn name(&self) -> &str {52 match self {53 Self::Keyword { name, .. }54 | Self::Literal { name, .. }55 | Self::Meta { name, .. }56 | Self::Error { name, .. } => name,57 }58 }59 pub fn expand_kind(&self, lexer: bool) -> TokenStream {60 let name = format_ident!("{}", self.name());61 let attr = match self {62 Self::Keyword { code, .. } => quote! {#[token(#code)]},63 Self::Literal { regex, lexer, .. } => {64 let lexer = lexer65 .as_deref()66 .map(TokenStream::from_str)67 .map(|r| r.expect("path is correct"));68 quote! {#[regex(#regex, #lexer)]}69 }70 Self::Error {71 regex, priority, ..72 } if regex.is_some() => {73 let priority = priority.map(|p| quote! {, priority = #p});74 quote! {#[regex(#regex #priority)]}75 }76 _ => quote! {},77 };78 let attr = if lexer {79 attr80 } else {81 quote! {}82 };83 quote! {84 #attr85 #name86 }87 }88 pub fn expand_t_macros(&self) -> Option<TokenStream> {89 match self {90 Self::Keyword { code, name } => {91 let code = escape_token_macro(code);92 let name = format_ident!("{name}");93 Some(quote! {94 [#code] => {$crate::SyntaxKind::#name}95 })96 }97 // Meta items should not appear in T![_]98 _ => None,99 }100 }101102 /// How this token should be referenced in code103 /// Keywords are referenced with `T![_]` macro,104 /// and literals are referenced directly by name105 pub fn reference(&self) -> TokenStream {106 if let Self::Keyword { code, .. } = self {107 let code = escape_token_macro(code);108 quote! {T![#code]}109 } else {110 let name = self.name();111 let ident = format_ident!("{name}");112 quote! {#ident}113 }114 }115116 pub fn method_name(&self) -> Ident {117 match self {118 Self::Keyword { name, .. } => {119 format_ident!("{}_token", name.to_lowercase())120 }121 Self::Literal { name, .. } => {122 format_ident!("{}_lit", name.to_lowercase())123 }124 Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),125 Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),126 }127 }128}129130#[macro_export]131macro_rules! define_kinds {132 ($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{133 $into.define_token(TokenKind::Literal {134 grammar_name: format!("LIT_{}!", $name),135 name: $name.to_owned(),136 regex: $regex.to_owned(),137 lexer: None $(.or_else(|| Some($lexer.to_string())))?,138 });139 $(define_kinds!($into = $($rest)*))?140 }};141 ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{142 {143 let regex = None$(.or(Some($regex.to_owned())))?;144 let priority = None$(.or(Some($priority)))?;145 $into.define_token(TokenKind::Error {146 grammar_name: format!("ERROR_{}!", $name),147 name: format!("ERROR_{}", $name),148 is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),149 regex,150 priority,151 });152 }153 $(define_kinds!($into = $($rest)*))?154 }};155 ($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{156 $into.define_token(TokenKind::Keyword {157 code: format!("{}", $tok),158 name: $name.to_owned(),159 });160 $(define_kinds!($into = $($rest)*))?161 }};162 ($into:ident =) => {{}}163}164use std::{collections::HashSet, str::FromStr};165166use indexmap::IndexMap;167use proc_macro2::{Ident, TokenStream};168use quote::{format_ident, quote};169170use super::escape_token_macro;171172impl KindsSrc {173 pub fn new() -> Self {174 Self {175 defined_tokens: IndexMap::new(),176 defined_node_names: HashSet::new(),177 nodes: Vec::new(),178 }179 }180 pub fn define_token(&mut self, token: TokenKind) {181 assert!(182 self.defined_node_names.insert(token.name().to_owned()),183 "node name already defined: {}",184 token.name()185 );186 assert!(187 self.defined_tokens188 .insert(token.grammar_name().to_owned(), token.clone())189 .is_none(),190 "token already defined: {}",191 token.grammar_name()192 );193 }194 pub fn define_node(&mut self, node: &str) {195 assert!(196 self.defined_node_names.insert(node.to_owned()),197 "node name already defined: {node}"198 );199 self.nodes.push(node.to_string());200 }201 pub fn token(&self, tok: &str) -> Option<&TokenKind> {202 self.defined_tokens.get(tok)203 }204 pub fn is_token(&self, tok: &str) -> bool {205 self.defined_tokens.contains_key(tok)206 }207 pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {208 self.defined_tokens.iter().map(|(_, v)| v)209 }210}211212pub fn jsonnet_kinds() -> KindsSrc {213 let mut kinds = KindsSrc::new();214 define_kinds![kinds =215 "||" => "OR";216 "??" => "NULL_COAELSE";217 "&&" => "AND";218 "|" => "BIT_OR";219 "^" => "BIT_XOR";220 "&" => "BIT_AND";221 "==" => "EQ";222 "!=" => "NE";223 "<" => "LT";224 ">" => "GT";225 "<=" => "LE";226 ">=" => "GE";227 "<<" => "LHS";228 ">>" => "RHS";229 "+" => "PLUS";230 "-" => "MINUS";231 "*" => "MUL";232 "/" => "DIV";233 "%" => "MODULO";234 "!" => "NOT";235 "~" => "BIT_NOT";236 "[" => "L_BRACK";237 "]" => "R_BRACK";238 "(" => "L_PAREN";239 ")" => "R_PAREN";240 "{" => "L_BRACE";241 "}" => "R_BRACE";242 ":" => "COLON";243 ";" => "SEMI";244 "." => "DOT";245 "..." => "DOTDOTDOT";246 "," => "COMMA";247 "$" => "DOLLAR";248 "=" => "ASSIGN";249 "?" => "QUESTION_MARK";250 // Literals251 lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";252 error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";253 error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";254 error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";255 lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";256 error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";257 lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";258 error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";259 lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";260 error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";261 lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";262 error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";263 error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";264 lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";265 error("STRING_BLOCK_UNEXPECTED_END", lexer = true);266 error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);267 error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);268 error("STRING_BLOCK_MISSING_INDENT", lexer = true);269 lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";270 lit("WHITESPACE") => r"[ \t\n\r]+";271 lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";272 lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";273 lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";274 error("COMMENT_TOO_SHORT") => r"/\*/";275 error("COMMENT_UNTERMINATED") => r"/\*([^*/]|\*[^/])+";276 ];277 kinds278}xtask/src/sourcegen/mod.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -56,14 +56,7 @@
});
}
SpecialName::Error => {
- eprintln!("implicit error: {name}");
- kinds.define_token(TokenKind::Error {
- grammar_name: token.to_owned(),
- name: format!("ERROR_{name}"),
- regex: None,
- priority: None,
- is_lexer_error: true,
- });
+ panic!("error token ERROR_{name} must be explicitly defined in jsonnet_kinds()");
}
}
continue;
@@ -170,6 +163,24 @@
quote! {}
};
+ let error_desc_arms = kinds.tokens().filter_map(|t| {
+ if let TokenKind::Error {
+ name, description, ..
+ } = t
+ {
+ let ident = format_ident!("{name}");
+ Some(quote! { #ident => ::core::option::Option::Some(#description) })
+ } else {
+ None
+ }
+ });
+
+ let display_name_arms = kinds.tokens().map(|t| {
+ let ident = format_ident!("{}", t.name());
+ let display = t.display_name();
+ quote! { #ident => #display }
+ });
+
let ast = quote! {
#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]
@@ -200,6 +211,22 @@
#is_enum
+ pub fn error_description(self) -> Option<&'static str> {
+ match self {
+ #(#error_desc_arms,)*
+ LEXING_ERROR => ::core::option::Option::Some("unexpected character"),
+ _ => None,
+ }
+ }
+
+ pub fn display_name(self) -> &'static str {
+ match self {
+ #(#display_name_arms,)*
+ LEXING_ERROR => "unexpected character",
+ _ => "unknown",
+ }
+ }
+
pub fn from_raw(r: u16) -> Self {
assert!(r < Self::__LAST as u16);
unsafe { std::mem::transmute(r) }