git.delta.rocks / jrsonnet / refs/commits / f2ff4e6fd929

difftreelog

source

xtask/src/sourcegen/kinds.rs8.0 KiBsourcehistory
1#[derive(Debug)]2pub struct KindsSrc {3	/// Key - how this token appears in ungrammar4	defined_tokens: IndexMap<String, TokenKind>,5	defined_node_names: HashSet<String>,6	pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11	/// May exist in token tree, but never in source code12	Meta { grammar_name: String, name: String },13	/// Specific parsing/lexing errors may be emitted as this type of kind14	Error {15		grammar_name: String,16		name: String,17		/// Is this error returned by lexer directly, or from lex.rs18		is_lexer_error: bool,19		regex: Option<String>,20		priority: Option<u32>,21	},22	/// Keyword - literal match of token23	Keyword {24		/// How this keyword appears in grammar/code, should be same as Kinds key25		code: String,26		name: String,27	},28	/// Literal - something defined by user, i.e strings, identifiers, smth29	Literal {30		/// How this keyword appears in grammar, should be same as Kinds key31		grammar_name: String,32		name: String,33		/// Regex for Logos lexer34		regex: String,35		/// Path to custom lexer36		lexer: Option<String>,37	},38}3940impl TokenKind {41	pub fn grammar_name(&self) -> &str {42		match self {43			TokenKind::Keyword { code, .. } => code,44			TokenKind::Literal { grammar_name, .. } => grammar_name,45			TokenKind::Meta { grammar_name, .. } => grammar_name,46			TokenKind::Error { grammar_name, .. } => grammar_name,47		}48	}49	/// How this keyword should appear in kinds enum, screaming snake cased50	pub fn name(&self) -> &str {51		match self {52			TokenKind::Keyword { name, .. } => name,53			TokenKind::Literal { name, .. } => name,54			TokenKind::Meta { name, .. } => name,55			TokenKind::Error { name, .. } => name,56		}57	}58	pub fn expand_kind(&self) -> TokenStream {59		let name = format_ident!("{}", self.name());60		let attr = match self {61			TokenKind::Keyword { code, .. } => quote! {#[token(#code)]},62			TokenKind::Literal { regex, lexer, .. } => {63				let lexer = lexer64					.as_deref()65					.map(TokenStream::from_str)66					.map(|r| r.expect("path is correct"));67				quote! {#[regex(#regex, #lexer)]}68			}69			TokenKind::Error {70				regex, priority, ..71			} if regex.is_some() => {72				let priority = priority.map(|p| quote! {, priority = #p});73				quote! {#[regex(#regex #priority)]}74			}75			_ => quote! {},76		};77		quote! {78			#attr79			#name80		}81	}82	pub fn expand_t_macros(&self) -> Option<TokenStream> {83		match self {84			TokenKind::Keyword { code, name } => {85				let code = escape_token_macro(code);86				let name = format_ident!("{name}");87				Some(quote! {88					[#code] => {$crate::SyntaxKind::#name}89				})90			}91			// Meta items should not appear in T![_]92			_ => None,93		}94	}9596	/// How this token should be referenced in code97	/// Keywords are referenced with `T![_]` macro,98	/// and literals are referenced directly by name99	pub fn reference(&self) -> TokenStream {100		match self {101			TokenKind::Keyword { code, .. } => {102				let code = escape_token_macro(code);103				quote! {T![#code]}104			}105			_ => {106				let name = self.name();107				let ident = format_ident!("{name}");108				quote! {#ident}109			}110		}111	}112113	pub fn method_name(&self) -> Ident {114		match self {115			TokenKind::Keyword { name, .. } => {116				format_ident!("{}_token", name.to_lowercase())117			}118			TokenKind::Literal { name, .. } => {119				format_ident!("{}_lit", name.to_lowercase())120			}121			TokenKind::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),122			TokenKind::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),123		}124	}125}126127#[macro_export]128macro_rules! define_kinds {129	($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{130		$into.define_token(TokenKind::Literal {131			grammar_name: format!("LIT_{}!", $name),132			name: $name.to_owned(),133			regex: $regex.to_owned(),134			lexer: None $(.or_else(|| Some($lexer.to_string())))?,135		});136		$(define_kinds!($into = $($rest)*))?137	}};138	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{139		{140			let regex = None$(.or(Some($regex.to_owned())))?;141			let priority = None$(.or(Some($priority)))?;142			$into.define_token(TokenKind::Error {143				grammar_name: format!("ERROR_{}!", $name),144				name: format!("ERROR_{}", $name),145				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),146				regex,147				priority,148			});149		}150		$(define_kinds!($into = $($rest)*))?151	}};152	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{153		$into.define_token(TokenKind::Keyword {154			code: format!("{}", $tok),155			name: $name.to_owned(),156		});157		$(define_kinds!($into = $($rest)*))?158	}};159	($into:ident =) => {{}}160}161use std::{collections::HashSet, str::FromStr};162163pub use define_kinds;164use indexmap::IndexMap;165use proc_macro2::{Ident, TokenStream};166use quote::{format_ident, quote};167168use super::escape_token_macro;169170impl KindsSrc {171	pub fn new() -> Self {172		Self {173			defined_tokens: IndexMap::new(),174			defined_node_names: HashSet::new(),175			nodes: Vec::new(),176		}177	}178	pub fn define_token(&mut self, token: TokenKind) {179		assert!(180			self.defined_node_names.insert(token.name().to_owned()),181			"node name already defined: {}",182			token.name()183		);184		assert!(185			self.defined_tokens186				.insert(token.grammar_name().to_owned(), token.clone())187				.is_none(),188			"token already defined: {}",189			token.grammar_name()190		)191	}192	pub fn define_node(&mut self, node: &str) {193		assert!(194			self.defined_node_names.insert(node.to_owned()),195			"node name already defined: {}",196			node197		);198		self.nodes.push(node.to_string())199	}200	pub fn token(&self, tok: &str) -> Option<&TokenKind> {201		self.defined_tokens.get(tok)202	}203	pub fn is_token(&self, tok: &str) -> bool {204		self.defined_tokens.contains_key(tok)205	}206	pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {207		self.defined_tokens.iter().map(|(_, v)| v)208	}209}210211pub fn jsonnet_kinds() -> KindsSrc {212	let mut kinds = KindsSrc::new();213	define_kinds![kinds =214		"||" => "OR";215		"??" => "NULL_COAELSE";216		"&&" => "AND";217		"|" => "BIT_OR";218		"^" => "BIT_XOR";219		"&" => "BIT_AND";220		"==" => "EQ";221		"!=" => "NE";222		"<" => "LT";223		">" => "GT";224		"<=" => "LE";225		">=" => "GE";226		"<<" => "LHS";227		">>" => "RHS";228		"+" => "PLUS";229		"-" => "MINUS";230		"*" => "MUL";231		"/" => "DIV";232		"%" => "MODULO";233		"!" => "NOT";234		"~" => "BIT_NOT";235		"[" => "L_BRACK";236		"]" => "R_BRACK";237		"(" => "L_PAREN";238		")" => "R_PAREN";239		"{" => "L_BRACE";240		"}" => "R_BRACE";241		":" => "COLON";242		"::" => "COLONCOLON";243		":::" => "COLONCOLONCOLON";244		";" => "SEMI";245		"." => "DOT";246		"..." => "DOTDOTDOT";247		"," => "COMMA";248		"$" => "DOLLAR";249		"=" => "ASSIGN";250		"?" => "QUESTION_MARK";251		// Literals252		lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";253		error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";254		error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";255		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";256		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";257		error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";258		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";259		error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";260		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";261		error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";262		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";263		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";264		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";265		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";266		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);267		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);268		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);269		error("STRING_BLOCK_MISSING_INDENT", lexer = true);270		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";271		lit("WHITESPACE") => r"[ \t\n\r]+";272		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";273		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*(\r\n|\n)?";274		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";275		error("COMMENT_TOO_SHORT") => r"/\*/";276		error("COMMENT_UNTERMINATED") =>  r"/\*([^*]|\*[^/])+";277	];278	kinds279}