1#[derive(Debug)]2pub struct KindsSrc {3 4 defined_tokens: IndexMap<String, TokenKind>,5 defined_node_names: HashSet<String>,6 pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11 12 Meta { grammar_name: String, name: String },13 14 Error {15 grammar_name: String,16 name: String,17 #[allow(dead_code)]18 19 is_lexer_error: bool,20 regex: Option<String>,21 priority: Option<u32>,22 description: String,23 },24 25 Keyword {26 27 code: String,28 name: String,29 },30 31 Literal {32 33 grammar_name: String,34 name: String,35 36 regex: String,37 38 lexer: Option<String>,39 },40}4142impl TokenKind {43 pub fn grammar_name(&self) -> &str {44 match self {45 Self::Keyword { code, .. } => code,46 Self::Literal { grammar_name, .. }47 | Self::Meta { grammar_name, .. }48 | Self::Error { grammar_name, .. } => grammar_name,49 }50 }51 52 pub fn name(&self) -> &str {53 match self {54 Self::Keyword { name, .. }55 | Self::Literal { name, .. }56 | Self::Meta { name, .. }57 | Self::Error { name, .. } => name,58 }59 }60 pub fn expand_kind(&self, lexer: bool) -> TokenStream {61 let name = format_ident!("{}", self.name());62 let attr = match self {63 Self::Keyword { code, .. } => quote! {#[token(#code)]},64 Self::Literal { regex, lexer, .. } => {65 let lexer = lexer66 .as_deref()67 .map(TokenStream::from_str)68 .map(|r| r.expect("path is correct"));69 quote! {#[regex(#regex, #lexer)]}70 }71 Self::Error {72 regex, priority, ..73 } if regex.is_some() => {74 let priority = priority.map(|p| quote! {, priority = #p});75 quote! {#[regex(#regex #priority)]}76 }77 _ => quote! {},78 };79 let attr = if lexer {80 attr81 } else {82 quote! {}83 };84 quote! {85 #attr86 #name87 }88 }89 pub fn expand_t_macros(&self) -> Option<TokenStream> {90 match self {91 Self::Keyword { code, name } => {92 let code = escape_token_macro(code);93 let name = format_ident!("{name}");94 Some(quote! {95 [#code] => {$crate::SyntaxKind::#name}96 })97 }98 99 _ => None,100 }101 }102103 104 105 106 pub fn reference(&self) -> TokenStream {107 if let Self::Keyword { code, .. } = self {108 let code = escape_token_macro(code);109 quote! {T![#code]}110 } else {111 let name = self.name();112 let ident = format_ident!("{name}");113 quote! {#ident}114 }115 }116117 pub fn display_name(&self) -> String {118 match self {119 Self::Keyword { code, .. } => format!("'{code}'"),120 Self::Literal { name, .. } => match name.as_str() {121 "FLOAT" => "number".to_owned(),122 "IDENT" => "identifier".to_owned(),123 "STRING_DOUBLE" => "\"string\"".to_owned(),124 "STRING_SINGLE" => "'string'".to_owned(),125 "STRING_DOUBLE_VERBATIM" => "@\"string\"".to_owned(),126 "STRING_SINGLE_VERBATIM" => "@'string'".to_owned(),127 "STRING_BLOCK" => "|||string|||".to_owned(),128 "WHITESPACE" => "whitespace".to_owned(),129 "SINGLE_LINE_SLASH_COMMENT" => "//comment".to_owned(),130 "SINGLE_LINE_HASH_COMMENT" => "#comment".to_owned(),131 "MULTI_LINE_COMMENT" => "/*comment*/".to_owned(),132 _ => name.to_lowercase(),133 },134 Self::Meta { name, .. } => name.to_lowercase(),135 Self::Error { description, .. } => description.clone(),136 }137 }138139 pub fn method_name(&self) -> Ident {140 match self {141 Self::Keyword { name, .. } => {142 format_ident!("{}_token", name.to_lowercase())143 }144 Self::Literal { name, .. } => {145 format_ident!("{}_lit", name.to_lowercase())146 }147 Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),148 Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),149 }150 }151}152153#[macro_export]154macro_rules! define_kinds {155 ($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{156 $into.define_token(TokenKind::Literal {157 grammar_name: format!("LIT_{}!", $name),158 name: $name.to_owned(),159 regex: $regex.to_owned(),160 lexer: None $(.or_else(|| Some($lexer.to_string())))?,161 });162 $(define_kinds!($into = $($rest)*))?163 }};164 ($into:ident = error($name:literal, $desc:literal $(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{165 {166 let regex = None$(.or(Some($regex.to_owned())))?;167 let priority = None$(.or(Some($priority)))?;168 $into.define_token(TokenKind::Error {169 grammar_name: format!("ERROR_{}!", $name),170 name: format!("ERROR_{}", $name),171 is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),172 regex,173 priority,174 description: $desc.to_owned(),175 });176 }177 $(define_kinds!($into = $($rest)*))?178 }};179 ($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{180 $into.define_token(TokenKind::Keyword {181 code: format!("{}", $tok),182 name: $name.to_owned(),183 });184 $(define_kinds!($into = $($rest)*))?185 }};186 ($into:ident =) => {{}}187}188use std::{collections::HashSet, str::FromStr};189190use indexmap::IndexMap;191use proc_macro2::{Ident, TokenStream};192use quote::{format_ident, quote};193194use super::escape_token_macro;195196impl KindsSrc {197 pub fn new() -> Self {198 Self {199 defined_tokens: IndexMap::new(),200 defined_node_names: HashSet::new(),201 nodes: Vec::new(),202 }203 }204 pub fn define_token(&mut self, token: TokenKind) {205 assert!(206 self.defined_node_names.insert(token.name().to_owned()),207 "node name already defined: {}",208 token.name()209 );210 assert!(211 self.defined_tokens212 .insert(token.grammar_name().to_owned(), token.clone())213 .is_none(),214 "token already defined: {}",215 token.grammar_name()216 );217 }218 pub fn define_node(&mut self, node: &str) {219 assert!(220 self.defined_node_names.insert(node.to_owned()),221 "node name already defined: {node}"222 );223 self.nodes.push(node.to_string());224 }225 pub fn token(&self, tok: &str) -> Option<&TokenKind> {226 self.defined_tokens.get(tok)227 }228 pub fn is_token(&self, tok: &str) -> bool {229 self.defined_tokens.contains_key(tok)230 }231 pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {232 self.defined_tokens.iter().map(|(_, v)| v)233 }234}235236pub fn jsonnet_kinds() -> KindsSrc {237 let mut kinds = KindsSrc::new();238 define_kinds![kinds =239 "||" => "OR";240 "??" => "NULL_COAELSE";241 "&&" => "AND";242 "|" => "BIT_OR";243 "^" => "BIT_XOR";244 "&" => "BIT_AND";245 "==" => "EQ";246 "!=" => "NE";247 "<" => "LT";248 ">" => "GT";249 "<=" => "LE";250 ">=" => "GE";251 "<<" => "LHS";252 ">>" => "RHS";253 "+" => "PLUS";254 "-" => "MINUS";255 "*" => "MUL";256 "/" => "DIV";257 "%" => "MODULO";258 "!" => "NOT";259 "~" => "BIT_NOT";260 "[" => "L_BRACK";261 "]" => "R_BRACK";262 "(" => "L_PAREN";263 ")" => "R_PAREN";264 "{" => "L_BRACE";265 "}" => "R_BRACE";266 ":" => "COLON";267 ";" => "SEMI";268 "." => "DOT";269 "..." => "DOTDOTDOT";270 "," => "COMMA";271 "$" => "DOLLAR";272 "=" => "ASSIGN";273 "?" => "QUESTION_MARK";274 275 lit("FLOAT") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?(?:[eE][+-]?[0-9]+(?:_[0-9]+)*)?";276 error("FLOAT_JUNK_AFTER_POINT", "junk after decimal point in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)\.[^0-9]";277 error("FLOAT_JUNK_AFTER_EXPONENT", "junk after exponent in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][^+\-0-9]";278 error("FLOAT_JUNK_AFTER_EXPONENT_SIGN", "junk after exponent sign in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][+-][^0-9]";279 lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";280 error("STRING_DOUBLE_UNTERMINATED", "unterminated double-quoted string") => "\"(?s:[^\"\\\\]|\\\\.)*";281 lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";282 error("STRING_SINGLE_UNTERMINATED", "unterminated single-quoted string") => "'(?s:[^'\\\\]|\\\\.)*";283 lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";284 error("STRING_DOUBLE_VERBATIM_UNTERMINATED", "unterminated verbatim double-quoted string") => "@\"(?:[^\"]|\"\")*";285 lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";286 error("STRING_SINGLE_VERBATIM_UNTERMINATED", "unterminated verbatim single-quoted string") => "@'(?:[^']|'')*";287 error("STRING_VERBATIM_MISSING_QUOTES", "verbatim string missing opening quotes") => "@[^\"'\\s]\\S+";288 lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";289 error("STRING_BLOCK_UNEXPECTED_END", "unexpected end of text block", lexer = true);290 error("STRING_BLOCK_MISSING_NEW_LINE", "text block requires new line after |||", lexer = true);291 error("STRING_BLOCK_MISSING_TERMINATION", "unterminated text block", lexer = true);292 error("STRING_BLOCK_MISSING_INDENT", "text block first line must be indented", lexer = true);293 lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";294 lit("WHITESPACE") => r"[ \t\n\r]+";295 lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";296 lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";297 lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*+[^*/])*\*+/";298 error("COMMENT_TOO_SHORT", "comment too short") => r"/\*/";299 error("COMMENT_UNTERMINATED", "unterminated multi-line comment") => r"/\*([^*/]|\*[^/])+";300 error("NO_OPERATOR", "expected operator");301 error("MISSING_TOKEN", "missing token");302 error("UNEXPECTED_TOKEN", "unexpected token");303 error("CUSTOM", "error");304 ];305 kinds306}