git.delta.rocks / jrsonnet / refs/commits / 4824700b357f

difftreelog

source

xtask/src/sourcegen/mod.rs14.5 KiBsourcehistory
1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => {52						eprintln!("implicit meta: {name}");53						kinds.define_token(TokenKind::Meta {54							grammar_name: token.to_owned(),55							name: format!("META_{name}"),56						});57					}58					SpecialName::Error => {59						panic!("error token ERROR_{name} must be explicitly defined in jsonnet_kinds()");60					}61				}62				continue;63			}64			let name = to_upper_snake_case(token);65			eprintln!("implicit kw: {token}");66			kinds.define_token(TokenKind::Keyword {67				code: token.to_owned(),68				name: format!("{name}_KW"),69			});70		}71	}72	for node in &ast.nodes {73		let name = to_upper_snake_case(&node.name);74		kinds.define_node(&name);75	}76	for enum_ in &ast.enums {77		let name = to_upper_snake_case(&enum_.name);78		kinds.define_node(&name);79	}80	for token_enum in &ast.token_enums {81		let name = to_upper_snake_case(&token_enum.name);82		kinds.define_node(&name);83	}8485	let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;8687	let nodes = generate_nodes(&kinds, &ast)?;88	ensure_file_contents(89		&PathBuf::from(concat!(90			env!("CARGO_MANIFEST_DIR"),91			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",92		)),93		&syntax_kinds,94	);95	ensure_file_contents(96		&PathBuf::from(concat!(97			env!("CARGO_MANIFEST_DIR"),98			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",99		)),100		&nodes,101	);102103	let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;104	ensure_file_contents(105		&PathBuf::from(concat!(106			env!("CARGO_MANIFEST_DIR"),107			"/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",108		)),109		&lexer_syntax_kinds,110	);111	Ok(())112}113114fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {115	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);116	let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));117118	let keywords = kinds119		.tokens()120		.filter(|k| matches!(k, TokenKind::Keyword { .. }))121		.map(TokenKind::name)122		.map(|n| format_ident!("{n}"));123124	let mut nodes = kinds125		.nodes126		.iter()127		.map(|name| format_ident!("{}", name))128		.collect::<Vec<_>>();129130	if lexer {131		nodes.clear();132	}133134	let enums = grammar135		.enums136		.iter()137		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))138		.chain(139			grammar140				.token_enums141				.iter()142				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),143		)144		.collect::<Vec<_>>();145	let is_enum = if lexer {146		quote! {}147	} else {148		quote! {149			pub fn is_enum(self) -> bool {150				match self {151					#(#enums)|* => true,152					_ => false,153				}154			}155		}156	};157158	let derive_logos = if lexer {159		quote! {160			, logos::Logos161		}162	} else {163		quote! {}164	};165166	let error_desc_arms = kinds.tokens().filter_map(|t| {167		if let TokenKind::Error {168			name, description, ..169		} = t170		{171			let ident = format_ident!("{name}");172			Some(quote! { #ident => ::core::option::Option::Some(#description) })173		} else {174			None175		}176	});177178	let display_name_arms = kinds.tokens().map(|t| {179		let ident = format_ident!("{}", t.name());180		let display = t.display_name();181		quote! { #ident => #display }182	});183184	let ast = quote! {185		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]186187		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.188		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]189		#[repr(u16)]190		pub enum SyntaxKind {191			#[doc(hidden)]192			TOMBSTONE,193			#[doc(hidden)]194			EOF,195			#(#token_kinds,)*196			LEXING_ERROR,197			__LAST_TOKEN,198			#(#nodes,)*199			#[doc(hidden)]200			__LAST,201		}202		use self::SyntaxKind::*;203204		impl SyntaxKind {205			pub fn is_keyword(self) -> bool {206				match self {207					#(#keywords)|* => true,208					_ => false,209				}210			}211212			#is_enum213214			pub fn error_description(self) -> Option<&'static str> {215				match self {216					#(#error_desc_arms,)*217					LEXING_ERROR => ::core::option::Option::Some("unexpected character"),218					_ => None,219				}220			}221222			pub fn display_name(self) -> &'static str {223				match self {224					#(#display_name_arms,)*225					LEXING_ERROR => "unexpected character",226					_ => "unknown",227				}228			}229230			pub fn from_raw(r: u16) -> Self {231				assert!(r < Self::__LAST as u16);232				unsafe { std::mem::transmute(r) }233			}234			pub fn into_raw(self) -> u16 {235				self as u16236			}237		}238239		#[macro_export]240		macro_rules! T {#(#t_macros);*}241		#[allow(unused_imports)]242		pub use T;243	};244245	reformat(&ast.to_string())246}247248#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]249fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {250	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar251		.nodes252		.iter()253		.map(|node| {254			let name = format_ident!("{}", node.name);255			let kind = format_ident!("{}", to_upper_snake_case(&node.name));256			let traits = node.traits.iter().map(|trait_name| {257				let trait_name = format_ident!("{}", trait_name);258				quote!(impl ast::#trait_name for #name {})259			});260261			let mut type_positions: HashMap<String, usize> = HashMap::new();262			let field_positions: Vec<_> = node263				.fields264				.iter()265				.map(|field| {266					let ty_str = field.ty().to_string();267					let pos = *type_positions.get(&ty_str).unwrap_or(&0);268					type_positions.insert(ty_str, pos + 1);269					pos270				})271				.collect();272273			let methods = node274				.fields275				.iter()276				.zip(field_positions.iter())277				.map(|(field, &pos)| {278					let method_name = field.method_name(kinds);279					let ty = field.ty();280281					if field.is_many() {282						quote! {283							pub fn #method_name(&self) -> AstChildren<#ty> {284								support::children(&self.syntax)285							}286						}287					} else if let Some(token_kind) = field.token_kind(kinds) {288						quote! {289							pub fn #method_name(&self) -> Option<#ty> {290								support::token(&self.syntax, #token_kind)291							}292						}293					} else if field.is_token_enum(grammar) {294						quote! {295							pub fn #method_name(&self) -> Option<#ty> {296								support::token_child(&self.syntax)297							}298						}299					} else if pos == 0 {300						quote! {301							pub fn #method_name(&self) -> Option<#ty> {302								support::children(&self.syntax).next()303							}304						}305					} else {306						quote! {307							pub fn #method_name(&self) -> Option<#ty> {308								support::children(&self.syntax).nth(#pos)309							}310						}311					}312				});313			(314				quote! {315					#[pretty_doc_comment_placeholder_workaround]316					#[derive(Debug, Clone, PartialEq, Eq, Hash)]317					pub struct #name {318						pub(crate) syntax: SyntaxNode,319					}320321					#(#traits)*322323					impl #name {324						#(#methods)*325					}326				},327				quote! {328					impl AstNode for #name {329						fn can_cast(kind: SyntaxKind) -> bool {330							kind == #kind331						}332						fn cast(syntax: SyntaxNode) -> Option<Self> {333							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }334						}335						fn syntax(&self) -> &SyntaxNode { &self.syntax }336					}337				},338			)339		})340		.unzip();341342	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar343		.enums344		.iter()345		.map(|en| {346			let variants: Vec<_> = en347				.variants348				.iter()349				.map(|var| format_ident!("{}", var))350				.collect();351			let name = format_ident!("{}", en.name);352			let kinds: Vec<_> = variants353				.iter()354				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))355				.collect();356			let traits = en.traits.iter().map(|trait_name| {357				let trait_name = format_ident!("{}", trait_name);358				quote!(impl ast::#trait_name for #name {})359			});360361			let ast_node = quote! {362				impl AstNode for #name {363					fn can_cast(kind: SyntaxKind) -> bool {364						match kind {365							#(#kinds)|* => true,366							_ => false,367						}368					}369					fn cast(syntax: SyntaxNode) -> Option<Self> {370						let res = match syntax.kind() {371							#(372							#kinds => #name::#variants(#variants { syntax }),373							)*374							_ => return None,375						};376						Some(res)377					}378					fn syntax(&self) -> &SyntaxNode {379						match self {380							#(381							#name::#variants(it) => &it.syntax,382							)*383						}384					}385				}386			};387388			(389				quote! {390					#[pretty_doc_comment_placeholder_workaround]391					#[derive(Debug, Clone, PartialEq, Eq, Hash)]392					pub enum #name {393						#(#variants(#variants),)*394					}395396					#(#traits)*397				},398				quote! {399					#(400						impl From<#variants> for #name {401							fn from(node: #variants) -> #name {402								#name::#variants(node)403							}404						}405					)*406					#ast_node407				},408			)409		})410		.unzip();411412	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar413		.token_enums414		.iter()415		.map(|en| {416			let variants: Vec<_> = en417				.variants418				.iter()419				.map(|token| {420					format_ident!(421						"{}",422						to_pascal_case(kinds.token(token).expect("token exists").name())423					)424				})425				.collect();426			let name = format_ident!("{}", en.name);427			let kind_name = format_ident!("{}Kind", en.name);428			let kinds: Vec<_> = variants429				.iter()430				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))431				.collect();432433			let ast_node = quote! {434				impl AstToken for #name {435					fn can_cast(kind: SyntaxKind) -> bool {436						#kind_name::can_cast(kind)437					}438					fn cast(syntax: SyntaxToken) -> Option<Self> {439						let kind = #kind_name::cast(syntax.kind())?;440						Some(#name { syntax, kind })441					}442					fn syntax(&self) -> &SyntaxToken {443						&self.syntax444					}445				}446447				impl #kind_name {448					fn can_cast(kind: SyntaxKind) -> bool {449						match kind {450							#(#kinds)|* => true,451							_ => false,452						}453					}454					pub fn cast(kind: SyntaxKind) -> Option<Self> {455						let res = match kind {456							#(#kinds => Self::#variants,)*457							_ => return None,458						};459						Some(res)460					}461				}462			};463464			(465				quote! {466					#[pretty_doc_comment_placeholder_workaround]467					#[derive(Debug, Clone, PartialEq, Eq, Hash)]468					pub struct #name { syntax: SyntaxToken, kind: #kind_name }469470					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]471					pub enum #kind_name {472						#(#variants,)*473					}474				},475				quote! {476					#ast_node477478					impl #name {479						pub fn kind(&self) -> #kind_name {480							self.kind481						}482					}483484					impl std::fmt::Display for #name {485						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {486							std::fmt::Display::fmt(self.syntax(), f)487						}488					}489				},490			)491		})492		.unzip();493494	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar495		.nodes496		.iter()497		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))498		.into_group_map()499		.into_iter()500		.sorted_by_key(|(k, _)| *k)501		.map(|(trait_name, nodes)| {502			let name = format_ident!("Any{}", trait_name);503			let trait_name = format_ident!("{}", trait_name);504			let kinds: Vec<_> = nodes505				.iter()506				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))507				.collect();508509			(510				quote! {511					#[pretty_doc_comment_placeholder_workaround]512					#[derive(Debug, Clone, PartialEq, Eq, Hash)]513					pub struct #name {514						pub(crate) syntax: SyntaxNode,515					}516					impl ast::#trait_name for #name {}517				},518				quote! {519					impl #name {520						#[inline]521						pub fn new<T: ast::#trait_name>(node: T) -> #name {522							#name {523								syntax: node.syntax().clone()524							}525						}526					}527					impl AstNode for #name {528						fn can_cast(kind: SyntaxKind) -> bool {529							match kind {530								#(#kinds)|* => true,531								_ => false,532							}533						}534						fn cast(syntax: SyntaxNode) -> Option<Self> {535							Self::can_cast(syntax.kind()).then(|| #name { syntax })536						}537						fn syntax(&self) -> &SyntaxNode {538							&self.syntax539						}540					}541				},542			)543		})544		.unzip();545546	let enum_names = grammar.enums.iter().map(|it| &it.name);547	let node_names = grammar.nodes.iter().map(|it| &it.name);548549	let display_impls = enum_names550		.chain(node_names.clone())551		.map(|it| format_ident!("{}", it))552		.map(|name| {553			quote! {554				impl std::fmt::Display for #name {555					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {556						std::fmt::Display::fmt(self.syntax(), f)557					}558				}559			}560		});561562	let ast = quote! {563		#![allow(non_snake_case, clippy::match_like_matches_macro)]564565		use crate::{566			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},567			ast::{AstNode, AstToken, AstChildren, support},568			T,569		};570571		#(#node_defs)*572		#(#enum_defs)*573		#(#token_enum_defs)*574		#(#any_node_defs)*575		#(#node_boilerplate_impls)*576		#(#enum_boilerplate_impls)*577		#(#token_enum_boilerplate_impls)*578		#(#any_node_boilerplate_impls)*579		#(#display_impls)*580	};581582	let ast = ast.to_string().replace("T ! [", "T![");583584	let mut res = String::with_capacity(ast.len() * 2);585586	let mut docs = grammar587		.nodes588		.iter()589		.map(|it| &it.doc)590		.chain(grammar.enums.iter().map(|it| &it.doc));591592	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {593		res.push_str(chunk);594		if let Some(doc) = docs.next() {595			write_doc_comment(doc, &mut res);596		}597	}598599	let res = reformat(&res)?;600	Ok(res.replace("#[derive", "\n#[derive"))601}602603fn write_doc_comment(contents: &[String], dest: &mut String) {604	use std::fmt::Write;605	for line in contents {606		writeln!(dest, "///{line}").unwrap();607	}608}609610pub fn escape_token_macro(token: &str) -> TokenStream {611	if "{}[]()$".contains(token) {612		let c = token.chars().next().unwrap();613		quote! { #c }614	} else if token.contains('$') {615		quote! { #token }616	} else if token.chars().all(|v: char| v.is_ascii_lowercase()) {617		let i = Ident::new(token, Span::call_site());618		quote! { #i }619	} else {620		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));621		quote! { #(#cs)* }622	}623}