1#[derive(Debug)]2pub struct KindsSrc {3 4 defined_tokens: IndexMap<String, TokenKind>,5 defined_node_names: HashSet<String>,6 pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11 12 Meta { grammar_name: String, name: String },13 14 Error {15 grammar_name: String,16 name: String,17 #[allow(dead_code)]18 19 is_lexer_error: bool,20 regex: Option<String>,21 priority: Option<u32>,22 },23 24 Keyword {25 26 code: String,27 name: String,28 },29 30 Literal {31 32 grammar_name: String,33 name: String,34 35 regex: String,36 37 lexer: Option<String>,38 },39}4041impl TokenKind {42 pub fn grammar_name(&self) -> &str {43 match self {44 Self::Keyword { code, .. } => code,45 Self::Literal { grammar_name, .. }46 | Self::Meta { grammar_name, .. }47 | Self::Error { grammar_name, .. } => grammar_name,48 }49 }50 51 pub fn name(&self) -> &str {52 match self {53 Self::Keyword { name, .. }54 | Self::Literal { name, .. }55 | Self::Meta { name, .. }56 | Self::Error { name, .. } => name,57 }58 }59 pub fn expand_kind(&self, lexer: bool) -> TokenStream {60 let name = format_ident!("{}", self.name());61 let attr = match self {62 Self::Keyword { code, .. } => quote! {#[token(#code)]},63 Self::Literal { regex, lexer, .. } => {64 let lexer = lexer65 .as_deref()66 .map(TokenStream::from_str)67 .map(|r| r.expect("path is correct"));68 quote! {#[regex(#regex, #lexer)]}69 }70 Self::Error {71 regex, priority, ..72 } if regex.is_some() => {73 let priority = priority.map(|p| quote! {, priority = #p});74 quote! {#[regex(#regex #priority)]}75 }76 _ => quote! {},77 };78 let attr = if lexer {79 attr80 } else {81 quote! {}82 };83 quote! {84 #attr85 #name86 }87 }88 pub fn expand_t_macros(&self) -> Option<TokenStream> {89 match self {90 Self::Keyword { code, name } => {91 let code = escape_token_macro(code);92 let name = format_ident!("{name}");93 Some(quote! {94 [#code] => {$crate::SyntaxKind::#name}95 })96 }97 98 _ => None,99 }100 }101102 103 104 105 pub fn reference(&self) -> TokenStream {106 if let Self::Keyword { code, .. } = self {107 let code = escape_token_macro(code);108 quote! {T![#code]}109 } else {110 let name = self.name();111 let ident = format_ident!("{name}");112 quote! {#ident}113 }114 }115116 pub fn method_name(&self) -> Ident {117 match self {118 Self::Keyword { name, .. } => {119 format_ident!("{}_token", name.to_lowercase())120 }121 Self::Literal { name, .. } => {122 format_ident!("{}_lit", name.to_lowercase())123 }124 Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),125 Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),126 }127 }128}129130#[macro_export]131macro_rules! define_kinds {132 ($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{133 $into.define_token(TokenKind::Literal {134 grammar_name: format!("LIT_{}!", $name),135 name: $name.to_owned(),136 regex: $regex.to_owned(),137 lexer: None $(.or_else(|| Some($lexer.to_string())))?,138 });139 $(define_kinds!($into = $($rest)*))?140 }};141 ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{142 {143 let regex = None$(.or(Some($regex.to_owned())))?;144 let priority = None$(.or(Some($priority)))?;145 $into.define_token(TokenKind::Error {146 grammar_name: format!("ERROR_{}!", $name),147 name: format!("ERROR_{}", $name),148 is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),149 regex,150 priority,151 });152 }153 $(define_kinds!($into = $($rest)*))?154 }};155 ($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{156 $into.define_token(TokenKind::Keyword {157 code: format!("{}", $tok),158 name: $name.to_owned(),159 });160 $(define_kinds!($into = $($rest)*))?161 }};162 ($into:ident =) => {{}}163}164use std::{collections::HashSet, str::FromStr};165166use indexmap::IndexMap;167use proc_macro2::{Ident, TokenStream};168use quote::{format_ident, quote};169170use super::escape_token_macro;171172impl KindsSrc {173 pub fn new() -> Self {174 Self {175 defined_tokens: IndexMap::new(),176 defined_node_names: HashSet::new(),177 nodes: Vec::new(),178 }179 }180 pub fn define_token(&mut self, token: TokenKind) {181 assert!(182 self.defined_node_names.insert(token.name().to_owned()),183 "node name already defined: {}",184 token.name()185 );186 assert!(187 self.defined_tokens188 .insert(token.grammar_name().to_owned(), token.clone())189 .is_none(),190 "token already defined: {}",191 token.grammar_name()192 );193 }194 pub fn define_node(&mut self, node: &str) {195 assert!(196 self.defined_node_names.insert(node.to_owned()),197 "node name already defined: {node}"198 );199 self.nodes.push(node.to_string());200 }201 pub fn token(&self, tok: &str) -> Option<&TokenKind> {202 self.defined_tokens.get(tok)203 }204 pub fn is_token(&self, tok: &str) -> bool {205 self.defined_tokens.contains_key(tok)206 }207 pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {208 self.defined_tokens.iter().map(|(_, v)| v)209 }210}211212pub fn jsonnet_kinds() -> KindsSrc {213 let mut kinds = KindsSrc::new();214 define_kinds![kinds =215 "||" => "OR";216 "??" => "NULL_COAELSE";217 "&&" => "AND";218 "|" => "BIT_OR";219 "^" => "BIT_XOR";220 "&" => "BIT_AND";221 "==" => "EQ";222 "!=" => "NE";223 "<" => "LT";224 ">" => "GT";225 "<=" => "LE";226 ">=" => "GE";227 "<<" => "LHS";228 ">>" => "RHS";229 "+" => "PLUS";230 "-" => "MINUS";231 "*" => "MUL";232 "/" => "DIV";233 "%" => "MODULO";234 "!" => "NOT";235 "~" => "BIT_NOT";236 "[" => "L_BRACK";237 "]" => "R_BRACK";238 "(" => "L_PAREN";239 ")" => "R_PAREN";240 "{" => "L_BRACE";241 "}" => "R_BRACE";242 ":" => "COLON";243 "::" => "COLONCOLON";244 ":::" => "COLONCOLONCOLON";245 ";" => "SEMI";246 "." => "DOT";247 "..." => "DOTDOTDOT";248 "," => "COMMA";249 "$" => "DOLLAR";250 "=" => "ASSIGN";251 "?" => "QUESTION_MARK";252 253 lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";254 error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";255 error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";256 error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";257 lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";258 error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";259 lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";260 error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";261 lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";262 error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";263 lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";264 error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";265 error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";266 lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";267 error("STRING_BLOCK_UNEXPECTED_END", lexer = true);268 error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);269 error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);270 error("STRING_BLOCK_MISSING_INDENT", lexer = true);271 lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";272 lit("WHITESPACE") => r"[ \t\n\r]+";273 lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";274 lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";275 lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";276 error("COMMENT_TOO_SHORT") => r"/\*/";277 error("COMMENT_UNTERMINATED") => r"/\*([^*/]|\*[^/])+";278 ];279 kinds280}