git.delta.rocks / jrsonnet / refs/heads / master

difftreelog

source

xtask/src/sourcegen/kinds.rs9.6 KiBsourcehistory
1#[derive(Debug)]2pub struct KindsSrc {3	/// Key - how this token appears in ungrammar4	defined_tokens: IndexMap<String, TokenKind>,5	defined_node_names: HashSet<String>,6	pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11	/// May exist in token tree, but never in source code12	Meta { grammar_name: String, name: String },13	/// Specific parsing/lexing errors may be emitted as this type of kind14	Error {15		grammar_name: String,16		name: String,17		#[allow(dead_code)]18		/// Is this error returned by lexer directly, or from lex.rs19		is_lexer_error: bool,20		regex: Option<String>,21		priority: Option<u32>,22		description: String,23	},24	/// Keyword - literal match of token25	Keyword {26		/// How this keyword appears in grammar/code, should be same as Kinds key27		code: String,28		name: String,29	},30	/// Literal - something defined by user, i.e strings, identifiers, smth31	Literal {32		/// How this keyword appears in grammar, should be same as Kinds key33		grammar_name: String,34		name: String,35		/// Regex for Logos lexer36		regex: String,37		/// Path to custom lexer38		lexer: Option<String>,39	},40}4142impl TokenKind {43	pub fn grammar_name(&self) -> &str {44		match self {45			Self::Keyword { code, .. } => code,46			Self::Literal { grammar_name, .. }47			| Self::Meta { grammar_name, .. }48			| Self::Error { grammar_name, .. } => grammar_name,49		}50	}51	/// How this keyword should appear in kinds enum, screaming snake cased52	pub fn name(&self) -> &str {53		match self {54			Self::Keyword { name, .. }55			| Self::Literal { name, .. }56			| Self::Meta { name, .. }57			| Self::Error { name, .. } => name,58		}59	}60	pub fn expand_kind(&self, lexer: bool) -> TokenStream {61		let name = format_ident!("{}", self.name());62		let attr = match self {63			Self::Keyword { code, .. } => quote! {#[token(#code)]},64			Self::Literal { regex, lexer, .. } => {65				let lexer = lexer66					.as_deref()67					.map(TokenStream::from_str)68					.map(|r| r.expect("path is correct"));69				quote! {#[regex(#regex, #lexer)]}70			}71			Self::Error {72				regex, priority, ..73			} if regex.is_some() => {74				let priority = priority.map(|p| quote! {, priority = #p});75				quote! {#[regex(#regex #priority)]}76			}77			_ => quote! {},78		};79		let attr = if lexer {80			attr81		} else {82			quote! {}83		};84		quote! {85			#attr86			#name87		}88	}89	pub fn expand_t_macros(&self) -> Option<TokenStream> {90		match self {91			Self::Keyword { code, name } => {92				let code = escape_token_macro(code);93				let name = format_ident!("{name}");94				Some(quote! {95					[#code] => {$crate::SyntaxKind::#name}96				})97			}98			// Meta items should not appear in T![_]99			_ => None,100		}101	}102103	/// How this token should be referenced in code104	/// Keywords are referenced with `T![_]` macro,105	/// and literals are referenced directly by name106	pub fn reference(&self) -> TokenStream {107		if let Self::Keyword { code, .. } = self {108			let code = escape_token_macro(code);109			quote! {T![#code]}110		} else {111			let name = self.name();112			let ident = format_ident!("{name}");113			quote! {#ident}114		}115	}116117	pub fn display_name(&self) -> String {118		match self {119			Self::Keyword { code, .. } => format!("'{code}'"),120			Self::Literal { name, .. } => match name.as_str() {121				"FLOAT" => "number".to_owned(),122				"IDENT" => "identifier".to_owned(),123				"STRING_DOUBLE" => "\"string\"".to_owned(),124				"STRING_SINGLE" => "'string'".to_owned(),125				"STRING_DOUBLE_VERBATIM" => "@\"string\"".to_owned(),126				"STRING_SINGLE_VERBATIM" => "@'string'".to_owned(),127				"STRING_BLOCK" => "|||string|||".to_owned(),128				"WHITESPACE" => "whitespace".to_owned(),129				"SINGLE_LINE_SLASH_COMMENT" => "//comment".to_owned(),130				"SINGLE_LINE_HASH_COMMENT" => "#comment".to_owned(),131				"MULTI_LINE_COMMENT" => "/*comment*/".to_owned(),132				_ => name.to_lowercase(),133			},134			Self::Meta { name, .. } => name.to_lowercase(),135			Self::Error { description, .. } => description.clone(),136		}137	}138139	pub fn method_name(&self) -> Ident {140		match self {141			Self::Keyword { name, .. } => {142				format_ident!("{}_token", name.to_lowercase())143			}144			Self::Literal { name, .. } => {145				format_ident!("{}_lit", name.to_lowercase())146			}147			Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),148			Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),149		}150	}151}152153#[macro_export]154macro_rules! define_kinds {155	($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{156		$into.define_token(TokenKind::Literal {157			grammar_name: format!("LIT_{}!", $name),158			name: $name.to_owned(),159			regex: $regex.to_owned(),160			lexer: None $(.or_else(|| Some($lexer.to_string())))?,161		});162		$(define_kinds!($into = $($rest)*))?163	}};164	($into:ident = error($name:literal, $desc:literal $(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{165		{166			let regex = None$(.or(Some($regex.to_owned())))?;167			let priority = None$(.or(Some($priority)))?;168			$into.define_token(TokenKind::Error {169				grammar_name: format!("ERROR_{}!", $name),170				name: format!("ERROR_{}", $name),171				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),172				regex,173				priority,174				description: $desc.to_owned(),175			});176		}177		$(define_kinds!($into = $($rest)*))?178	}};179	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{180		$into.define_token(TokenKind::Keyword {181			code: format!("{}", $tok),182			name: $name.to_owned(),183		});184		$(define_kinds!($into = $($rest)*))?185	}};186	($into:ident =) => {{}}187}188use std::{collections::HashSet, str::FromStr};189190use indexmap::IndexMap;191use proc_macro2::{Ident, TokenStream};192use quote::{format_ident, quote};193194use super::escape_token_macro;195196impl KindsSrc {197	pub fn new() -> Self {198		Self {199			defined_tokens: IndexMap::new(),200			defined_node_names: HashSet::new(),201			nodes: Vec::new(),202		}203	}204	pub fn define_token(&mut self, token: TokenKind) {205		assert!(206			self.defined_node_names.insert(token.name().to_owned()),207			"node name already defined: {}",208			token.name()209		);210		assert!(211			self.defined_tokens212				.insert(token.grammar_name().to_owned(), token.clone())213				.is_none(),214			"token already defined: {}",215			token.grammar_name()216		);217	}218	pub fn define_node(&mut self, node: &str) {219		assert!(220			self.defined_node_names.insert(node.to_owned()),221			"node name already defined: {node}"222		);223		self.nodes.push(node.to_string());224	}225	pub fn token(&self, tok: &str) -> Option<&TokenKind> {226		self.defined_tokens.get(tok)227	}228	pub fn is_token(&self, tok: &str) -> bool {229		self.defined_tokens.contains_key(tok)230	}231	pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {232		self.defined_tokens.iter().map(|(_, v)| v)233	}234}235236pub fn jsonnet_kinds() -> KindsSrc {237	let mut kinds = KindsSrc::new();238	define_kinds![kinds =239		"||" => "OR";240		"??" => "NULL_COAELSE";241		"&&" => "AND";242		"|" => "BIT_OR";243		"^" => "BIT_XOR";244		"&" => "BIT_AND";245		"==" => "EQ";246		"!=" => "NE";247		"<" => "LT";248		">" => "GT";249		"<=" => "LE";250		">=" => "GE";251		"<<" => "LHS";252		">>" => "RHS";253		"+" => "PLUS";254		"-" => "MINUS";255		"*" => "MUL";256		"/" => "DIV";257		"%" => "MODULO";258		"!" => "NOT";259		"~" => "BIT_NOT";260		"[" => "L_BRACK";261		"]" => "R_BRACK";262		"(" => "L_PAREN";263		")" => "R_PAREN";264		"{" => "L_BRACE";265		"}" => "R_BRACE";266		":" => "COLON";267		";" => "SEMI";268		"." => "DOT";269		"..." => "DOTDOTDOT";270		"," => "COMMA";271		"$" => "DOLLAR";272		"=" => "ASSIGN";273		"?" => "QUESTION_MARK";274		// Literals275		lit("FLOAT") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?(?:[eE][+-]?[0-9]+(?:_[0-9]+)*)?";276		error("FLOAT_JUNK_AFTER_POINT", "junk after decimal point in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)\.[^0-9]";277		error("FLOAT_JUNK_AFTER_EXPONENT", "junk after exponent in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][^+\-0-9]";278		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN", "junk after exponent sign in number literal") => r"(?:0|[1-9][0-9]*(?:_[0-9]+)*)(?:\.[0-9]+(?:_[0-9]+)*)?[eE][+-][^0-9]";279		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";280		error("STRING_DOUBLE_UNTERMINATED", "unterminated double-quoted string") => "\"(?s:[^\"\\\\]|\\\\.)*";281		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";282		error("STRING_SINGLE_UNTERMINATED", "unterminated single-quoted string") => "'(?s:[^'\\\\]|\\\\.)*";283		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";284		error("STRING_DOUBLE_VERBATIM_UNTERMINATED", "unterminated verbatim double-quoted string") => "@\"(?:[^\"]|\"\")*";285		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";286		error("STRING_SINGLE_VERBATIM_UNTERMINATED", "unterminated verbatim single-quoted string") => "@'(?:[^']|'')*";287		error("STRING_VERBATIM_MISSING_QUOTES", "verbatim string missing opening quotes") => "@[^\"'\\s]\\S+";288		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";289		error("STRING_BLOCK_UNEXPECTED_END", "unexpected end of text block", lexer = true);290		error("STRING_BLOCK_MISSING_NEW_LINE", "text block requires new line after |||", lexer = true);291		error("STRING_BLOCK_MISSING_TERMINATION", "unterminated text block", lexer = true);292		error("STRING_BLOCK_MISSING_INDENT", "text block first line must be indented", lexer = true);293		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";294		lit("WHITESPACE") => r"[ \t\n\r]+";295		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";296		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";297		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*+[^*/])*\*+/";298		error("COMMENT_TOO_SHORT", "comment too short") => r"/\*/";299		error("COMMENT_UNTERMINATED", "unterminated multi-line comment") =>  r"/\*([^*/]|\*[^/])+";300		error("NO_OPERATOR", "expected operator");301		error("MISSING_TOKEN", "missing token");302		error("UNEXPECTED_TOKEN", "unexpected token");303		error("CUSTOM", "error");304	];305	kinds306}