1#[derive(Debug)]2pub struct KindsSrc {3 4 defined_tokens: IndexMap<String, TokenKind>,5 defined_node_names: HashSet<String>,6 pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11 12 Meta { grammar_name: String, name: String },13 14 Error {15 grammar_name: String,16 name: String,17 #[allow(dead_code)]18 19 is_lexer_error: bool,20 regex: Option<String>,21 priority: Option<u32>,22 description: String,23 },24 25 Keyword {26 27 code: String,28 name: String,29 },30 31 Literal {32 33 grammar_name: String,34 name: String,35 36 regex: String,37 38 lexer: Option<String>,39 },40}4142impl TokenKind {43 pub fn grammar_name(&self) -> &str {44 match self {45 Self::Keyword { code, .. } => code,46 Self::Literal { grammar_name, .. }47 | Self::Meta { grammar_name, .. }48 | Self::Error { grammar_name, .. } => grammar_name,49 }50 }51 52 pub fn name(&self) -> &str {53 match self {54 Self::Keyword { name, .. }55 | Self::Literal { name, .. }56 | Self::Meta { name, .. }57 | Self::Error { name, .. } => name,58 }59 }60 pub fn expand_kind(&self, lexer: bool) -> TokenStream {61 let name = format_ident!("{}", self.name());62 let attr = match self {63 Self::Keyword { code, .. } => quote! {#[token(#code)]},64 Self::Literal { regex, lexer, .. } => {65 let lexer = lexer66 .as_deref()67 .map(TokenStream::from_str)68 .map(|r| r.expect("path is correct"));69 quote! {#[regex(#regex, #lexer)]}70 }71 Self::Error {72 regex, priority, ..73 } if regex.is_some() => {74 let priority = priority.map(|p| quote! {, priority = #p});75 quote! {#[regex(#regex #priority)]}76 }77 _ => quote! {},78 };79 let attr = if lexer {80 attr81 } else {82 quote! {}83 };84 quote! {85 #attr86 #name87 }88 }89 pub fn expand_t_macros(&self) -> Option<TokenStream> {90 match self {91 Self::Keyword { code, name } => {92 let code = escape_token_macro(code);93 let name = format_ident!("{name}");94 Some(quote! {95 [#code] => {$crate::SyntaxKind::#name}96 })97 }98 99 _ => None,100 }101 }102103 104 105 106 pub fn reference(&self) -> TokenStream {107 if let Self::Keyword { code, .. } = self {108 let code = escape_token_macro(code);109 quote! {T![#code]}110 } else {111 let name = self.name();112 let ident = format_ident!("{name}");113 quote! {#ident}114 }115 }116117 pub fn display_name(&self) -> String {118 match self {119 Self::Keyword { code, .. } => format!("'{code}'"),120 Self::Literal { name, .. } => match name.as_str() {121 "FLOAT" => "number".to_owned(),122 "IDENT" => "identifier".to_owned(),123 "STRING_DOUBLE" | "STRING_SINGLE" | "STRING_DOUBLE_VERBATIM"124 | "STRING_SINGLE_VERBATIM" | "STRING_BLOCK" => "string".to_owned(),125 "WHITESPACE" => "whitespace".to_owned(),126 "SINGLE_LINE_SLASH_COMMENT" | "SINGLE_LINE_HASH_COMMENT"127 | "MULTI_LINE_COMMENT" => "comment".to_owned(),128 _ => name.to_lowercase(),129 },130 Self::Meta { name, .. } => name.to_lowercase(),131 Self::Error { description, .. } => description.clone(),132 }133 }134135 pub fn method_name(&self) -> Ident {136 match self {137 Self::Keyword { name, .. } => {138 format_ident!("{}_token", name.to_lowercase())139 }140 Self::Literal { name, .. } => {141 format_ident!("{}_lit", name.to_lowercase())142 }143 Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),144 Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),145 }146 }147}148149#[macro_export]150macro_rules! define_kinds {151 ($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{152 $into.define_token(TokenKind::Literal {153 grammar_name: format!("LIT_{}!", $name),154 name: $name.to_owned(),155 regex: $regex.to_owned(),156 lexer: None $(.or_else(|| Some($lexer.to_string())))?,157 });158 $(define_kinds!($into = $($rest)*))?159 }};160 ($into:ident = error($name:literal, $desc:literal $(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{161 {162 let regex = None$(.or(Some($regex.to_owned())))?;163 let priority = None$(.or(Some($priority)))?;164 $into.define_token(TokenKind::Error {165 grammar_name: format!("ERROR_{}!", $name),166 name: format!("ERROR_{}", $name),167 is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),168 regex,169 priority,170 description: $desc.to_owned(),171 });172 }173 $(define_kinds!($into = $($rest)*))?174 }};175 ($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{176 $into.define_token(TokenKind::Keyword {177 code: format!("{}", $tok),178 name: $name.to_owned(),179 });180 $(define_kinds!($into = $($rest)*))?181 }};182 ($into:ident =) => {{}}183}184use std::{collections::HashSet, str::FromStr};185186use indexmap::IndexMap;187use proc_macro2::{Ident, TokenStream};188use quote::{format_ident, quote};189190use super::escape_token_macro;191192impl KindsSrc {193 pub fn new() -> Self {194 Self {195 defined_tokens: IndexMap::new(),196 defined_node_names: HashSet::new(),197 nodes: Vec::new(),198 }199 }200 pub fn define_token(&mut self, token: TokenKind) {201 assert!(202 self.defined_node_names.insert(token.name().to_owned()),203 "node name already defined: {}",204 token.name()205 );206 assert!(207 self.defined_tokens208 .insert(token.grammar_name().to_owned(), token.clone())209 .is_none(),210 "token already defined: {}",211 token.grammar_name()212 );213 }214 pub fn define_node(&mut self, node: &str) {215 assert!(216 self.defined_node_names.insert(node.to_owned()),217 "node name already defined: {node}"218 );219 self.nodes.push(node.to_string());220 }221 pub fn token(&self, tok: &str) -> Option<&TokenKind> {222 self.defined_tokens.get(tok)223 }224 pub fn is_token(&self, tok: &str) -> bool {225 self.defined_tokens.contains_key(tok)226 }227 pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {228 self.defined_tokens.iter().map(|(_, v)| v)229 }230}231232pub fn jsonnet_kinds() -> KindsSrc {233 let mut kinds = KindsSrc::new();234 define_kinds![kinds =235 "||" => "OR";236 "??" => "NULL_COAELSE";237 "&&" => "AND";238 "|" => "BIT_OR";239 "^" => "BIT_XOR";240 "&" => "BIT_AND";241 "==" => "EQ";242 "!=" => "NE";243 "<" => "LT";244 ">" => "GT";245 "<=" => "LE";246 ">=" => "GE";247 "<<" => "LHS";248 ">>" => "RHS";249 "+" => "PLUS";250 "-" => "MINUS";251 "*" => "MUL";252 "/" => "DIV";253 "%" => "MODULO";254 "!" => "NOT";255 "~" => "BIT_NOT";256 "[" => "L_BRACK";257 "]" => "R_BRACK";258 "(" => "L_PAREN";259 ")" => "R_PAREN";260 "{" => "L_BRACE";261 "}" => "R_BRACE";262 ":" => "COLON";263 ";" => "SEMI";264 "." => "DOT";265 "..." => "DOTDOTDOT";266 "," => "COMMA";267 "$" => "DOLLAR";268 "=" => "ASSIGN";269 "?" => "QUESTION_MARK";270 271 lit("FLOAT") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?(?:[eE][+-]?[0-9]+(?:_[0-9]+)*)?";272 error("FLOAT_JUNK_AFTER_POINT", "junk after decimal point in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)\.[^0-9]";273 error("FLOAT_JUNK_AFTER_EXPONENT", "junk after exponent in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][^+\-0-9]";274 error("FLOAT_JUNK_AFTER_EXPONENT_SIGN", "junk after exponent sign in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][+-][^0-9]";275 lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";276 error("STRING_DOUBLE_UNTERMINATED", "unterminated double-quoted string") => "\"(?s:[^\"\\\\]|\\\\.)*";277 lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";278 error("STRING_SINGLE_UNTERMINATED", "unterminated single-quoted string") => "'(?s:[^'\\\\]|\\\\.)*";279 lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";280 error("STRING_DOUBLE_VERBATIM_UNTERMINATED", "unterminated verbatim double-quoted string") => "@\"(?:[^\"]|\"\")*";281 lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";282 error("STRING_SINGLE_VERBATIM_UNTERMINATED", "unterminated verbatim single-quoted string") => "@'(?:[^']|'')*";283 error("STRING_VERBATIM_MISSING_QUOTES", "verbatim string missing opening quotes") => "@[^\"'\\s]\\S+";284 lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";285 error("STRING_BLOCK_UNEXPECTED_END", "unexpected end of text block", lexer = true);286 error("STRING_BLOCK_MISSING_NEW_LINE", "text block requires new line after |||", lexer = true);287 error("STRING_BLOCK_MISSING_TERMINATION", "unterminated text block", lexer = true);288 error("STRING_BLOCK_MISSING_INDENT", "text block first line must be indented", lexer = true);289 lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";290 lit("WHITESPACE") => r"[ \t\n\r]+";291 lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";292 lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";293 lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";294 error("COMMENT_TOO_SHORT", "comment too short") => r"/\*/";295 error("COMMENT_UNTERMINATED", "unterminated multi-line comment") => r"/\*([^*/]|\*[^/])+";296 error("NO_OPERATOR", "expected operator");297 error("MISSING_TOKEN", "missing token");298 error("UNEXPECTED_TOKEN", "unexpected token");299 error("CUSTOM", "error");300 ];301 kinds302}