git.delta.rocks / jrsonnet / refs/commits / b8427047424c

difftreelog

source

xtask/src/sourcegen/mod.rs14.6 KiBsourcehistory
1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{AstSrc, lower};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => {52						eprintln!("implicit meta: {name}");53						kinds.define_token(TokenKind::Meta {54							grammar_name: token.to_owned(),55							name: format!("META_{name}"),56						});57					}58					SpecialName::Error => {59						panic!(60							"error token ERROR_{name} must be explicitly defined in jsonnet_kinds()"61						);62					}63				}64				continue;65			}66			let name = to_upper_snake_case(token);67			eprintln!("implicit kw: {token}");68			kinds.define_token(TokenKind::Keyword {69				code: token.to_owned(),70				name: format!("{name}_KW"),71			});72		}73	}74	for node in &ast.nodes {75		let name = to_upper_snake_case(&node.name);76		kinds.define_node(&name);77	}78	for enum_ in &ast.enums {79		let name = to_upper_snake_case(&enum_.name);80		kinds.define_node(&name);81	}82	for token_enum in &ast.token_enums {83		let name = to_upper_snake_case(&token_enum.name);84		kinds.define_node(&name);85	}8687	let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;8889	let nodes = generate_nodes(&kinds, &ast)?;90	ensure_file_contents(91		&PathBuf::from(concat!(92			env!("CARGO_MANIFEST_DIR"),93			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",94		)),95		&syntax_kinds,96	);97	ensure_file_contents(98		&PathBuf::from(concat!(99			env!("CARGO_MANIFEST_DIR"),100			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",101		)),102		&nodes,103	);104105	let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;106	ensure_file_contents(107		&PathBuf::from(concat!(108			env!("CARGO_MANIFEST_DIR"),109			"/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",110		)),111		&lexer_syntax_kinds,112	);113	Ok(())114}115116#[allow(clippy::too_many_lines)]117fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {118	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);119	let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));120121	let keywords = kinds122		.tokens()123		.filter(|k| matches!(k, TokenKind::Keyword { .. }))124		.map(TokenKind::name)125		.map(|n| format_ident!("{n}"));126127	let mut nodes = kinds128		.nodes129		.iter()130		.map(|name| format_ident!("{}", name))131		.collect::<Vec<_>>();132133	if lexer {134		nodes.clear();135	}136137	let enums = grammar138		.enums139		.iter()140		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))141		.chain(142			grammar143				.token_enums144				.iter()145				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),146		)147		.collect::<Vec<_>>();148	let is_enum = if lexer {149		quote! {}150	} else {151		quote! {152			pub fn is_enum(self) -> bool {153				match self {154					#(#enums)|* => true,155					_ => false,156				}157			}158		}159	};160161	let derive_logos = if lexer {162		quote! {163			, logos::Logos164		}165	} else {166		quote! {}167	};168169	let error_desc_arms = kinds.tokens().filter_map(|t| {170		if let TokenKind::Error {171			name, description, ..172		} = t173		{174			let ident = format_ident!("{name}");175			Some(quote! { #ident => ::core::option::Option::Some(#description) })176		} else {177			None178		}179	});180181	let display_name_arms = kinds.tokens().map(|t| {182		let ident = format_ident!("{}", t.name());183		let display = t.display_name();184		quote! { #ident => #display }185	});186187	let ast = quote! {188		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]189190		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.191		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]192		#[repr(u16)]193		pub enum SyntaxKind {194			#[doc(hidden)]195			TOMBSTONE,196			#[doc(hidden)]197			EOF,198			#(#token_kinds,)*199			LEXING_ERROR,200			__LAST_TOKEN,201			#(#nodes,)*202			#[doc(hidden)]203			__LAST,204		}205		use self::SyntaxKind::*;206207		impl SyntaxKind {208			pub fn is_keyword(self) -> bool {209				match self {210					#(#keywords)|* => true,211					_ => false,212				}213			}214215			#is_enum216217			pub fn error_description(self) -> Option<&'static str> {218				match self {219					#(#error_desc_arms,)*220					LEXING_ERROR => ::core::option::Option::Some("unexpected character"),221					_ => None,222				}223			}224225			pub fn display_name(self) -> &'static str {226				match self {227					#(#display_name_arms,)*228					LEXING_ERROR => "unexpected character",229					_ => "unknown",230				}231			}232233			pub fn from_raw(r: u16) -> Self {234				assert!(r < Self::__LAST as u16);235				unsafe { std::mem::transmute(r) }236			}237			pub fn into_raw(self) -> u16 {238				self as u16239			}240		}241242		#[macro_export]243		macro_rules! T {#(#t_macros);*}244		#[allow(unused_imports)]245		pub use T;246	};247248	reformat(&ast.to_string())249}250251#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]252fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {253	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar254		.nodes255		.iter()256		.map(|node| {257			let name = format_ident!("{}", node.name);258			let kind = format_ident!("{}", to_upper_snake_case(&node.name));259			let traits = node.traits.iter().map(|trait_name| {260				let trait_name = format_ident!("{}", trait_name);261				quote!(impl ast::#trait_name for #name {})262			});263264			let mut type_positions: HashMap<String, usize> = HashMap::new();265			let field_positions: Vec<_> = node266				.fields267				.iter()268				.map(|field| {269					let ty_str = field.ty().to_string();270					let pos = *type_positions.get(&ty_str).unwrap_or(&0);271					type_positions.insert(ty_str, pos + 1);272					pos273				})274				.collect();275276			let methods = node277				.fields278				.iter()279				.zip(field_positions.iter())280				.map(|(field, &pos)| {281					let method_name = field.method_name(kinds);282					let ty = field.ty();283284					if field.is_many() {285						quote! {286							pub fn #method_name(&self) -> AstChildren<#ty> {287								support::children(&self.syntax)288							}289						}290					} else if let Some(token_kind) = field.token_kind(kinds) {291						quote! {292							pub fn #method_name(&self) -> Option<#ty> {293								support::token(&self.syntax, #token_kind)294							}295						}296					} else if field.is_token_enum(grammar) {297						quote! {298							pub fn #method_name(&self) -> Option<#ty> {299								support::token_child(&self.syntax)300							}301						}302					} else if pos == 0 {303						quote! {304							pub fn #method_name(&self) -> Option<#ty> {305								support::children(&self.syntax).next()306							}307						}308					} else {309						quote! {310							pub fn #method_name(&self) -> Option<#ty> {311								support::children(&self.syntax).nth(#pos)312							}313						}314					}315				});316			(317				quote! {318					#[pretty_doc_comment_placeholder_workaround]319					#[derive(Debug, Clone, PartialEq, Eq, Hash)]320					pub struct #name {321						pub(crate) syntax: SyntaxNode,322					}323324					#(#traits)*325326					impl #name {327						#(#methods)*328					}329				},330				quote! {331					impl AstNode for #name {332						fn can_cast(kind: SyntaxKind) -> bool {333							kind == #kind334						}335						fn cast(syntax: SyntaxNode) -> Option<Self> {336							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }337						}338						fn syntax(&self) -> &SyntaxNode { &self.syntax }339					}340				},341			)342		})343		.unzip();344345	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar346		.enums347		.iter()348		.map(|en| {349			let variants: Vec<_> = en350				.variants351				.iter()352				.map(|var| format_ident!("{}", var))353				.collect();354			let name = format_ident!("{}", en.name);355			let kinds: Vec<_> = variants356				.iter()357				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))358				.collect();359			let traits = en.traits.iter().map(|trait_name| {360				let trait_name = format_ident!("{}", trait_name);361				quote!(impl ast::#trait_name for #name {})362			});363364			let ast_node = quote! {365				impl AstNode for #name {366					fn can_cast(kind: SyntaxKind) -> bool {367						match kind {368							#(#kinds)|* => true,369							_ => false,370						}371					}372					fn cast(syntax: SyntaxNode) -> Option<Self> {373						let res = match syntax.kind() {374							#(375							#kinds => #name::#variants(#variants { syntax }),376							)*377							_ => return None,378						};379						Some(res)380					}381					fn syntax(&self) -> &SyntaxNode {382						match self {383							#(384							#name::#variants(it) => &it.syntax,385							)*386						}387					}388				}389			};390391			(392				quote! {393					#[pretty_doc_comment_placeholder_workaround]394					#[derive(Debug, Clone, PartialEq, Eq, Hash)]395					pub enum #name {396						#(#variants(#variants),)*397					}398399					#(#traits)*400				},401				quote! {402					#(403						impl From<#variants> for #name {404							fn from(node: #variants) -> #name {405								#name::#variants(node)406							}407						}408					)*409					#ast_node410				},411			)412		})413		.unzip();414415	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar416		.token_enums417		.iter()418		.map(|en| {419			let variants: Vec<_> = en420				.variants421				.iter()422				.map(|token| {423					format_ident!(424						"{}",425						to_pascal_case(kinds.token(token).expect("token exists").name())426					)427				})428				.collect();429			let name = format_ident!("{}", en.name);430			let kind_name = format_ident!("{}Kind", en.name);431			let kinds: Vec<_> = variants432				.iter()433				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))434				.collect();435436			let ast_node = quote! {437				impl AstToken for #name {438					fn can_cast(kind: SyntaxKind) -> bool {439						#kind_name::can_cast(kind)440					}441					fn cast(syntax: SyntaxToken) -> Option<Self> {442						let kind = #kind_name::cast(syntax.kind())?;443						Some(#name { syntax, kind })444					}445					fn syntax(&self) -> &SyntaxToken {446						&self.syntax447					}448				}449450				impl #kind_name {451					fn can_cast(kind: SyntaxKind) -> bool {452						match kind {453							#(#kinds)|* => true,454							_ => false,455						}456					}457					pub fn cast(kind: SyntaxKind) -> Option<Self> {458						let res = match kind {459							#(#kinds => Self::#variants,)*460							_ => return None,461						};462						Some(res)463					}464				}465			};466467			(468				quote! {469					#[pretty_doc_comment_placeholder_workaround]470					#[derive(Debug, Clone, PartialEq, Eq, Hash)]471					pub struct #name { syntax: SyntaxToken, kind: #kind_name }472473					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]474					pub enum #kind_name {475						#(#variants,)*476					}477				},478				quote! {479					#ast_node480481					impl #name {482						pub fn kind(&self) -> #kind_name {483							self.kind484						}485					}486487					impl std::fmt::Display for #name {488						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {489							std::fmt::Display::fmt(self.syntax(), f)490						}491					}492				},493			)494		})495		.unzip();496497	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar498		.nodes499		.iter()500		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))501		.into_group_map()502		.into_iter()503		.sorted_by_key(|(k, _)| *k)504		.map(|(trait_name, nodes)| {505			let name = format_ident!("Any{}", trait_name);506			let trait_name = format_ident!("{}", trait_name);507			let kinds: Vec<_> = nodes508				.iter()509				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))510				.collect();511512			(513				quote! {514					#[pretty_doc_comment_placeholder_workaround]515					#[derive(Debug, Clone, PartialEq, Eq, Hash)]516					pub struct #name {517						pub(crate) syntax: SyntaxNode,518					}519					impl ast::#trait_name for #name {}520				},521				quote! {522					impl #name {523						#[inline]524						pub fn new<T: ast::#trait_name>(node: T) -> #name {525							#name {526								syntax: node.syntax().clone()527							}528						}529					}530					impl AstNode for #name {531						fn can_cast(kind: SyntaxKind) -> bool {532							match kind {533								#(#kinds)|* => true,534								_ => false,535							}536						}537						fn cast(syntax: SyntaxNode) -> Option<Self> {538							Self::can_cast(syntax.kind()).then(|| #name { syntax })539						}540						fn syntax(&self) -> &SyntaxNode {541							&self.syntax542						}543					}544				},545			)546		})547		.unzip();548549	let enum_names = grammar.enums.iter().map(|it| &it.name);550	let node_names = grammar.nodes.iter().map(|it| &it.name);551552	let display_impls = enum_names553		.chain(node_names.clone())554		.map(|it| format_ident!("{}", it))555		.map(|name| {556			quote! {557				impl std::fmt::Display for #name {558					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {559						std::fmt::Display::fmt(self.syntax(), f)560					}561				}562			}563		});564565	let ast = quote! {566		#![allow(non_snake_case, clippy::match_like_matches_macro)]567568		use crate::{569			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},570			ast::{AstNode, AstToken, AstChildren, support},571			T,572		};573574		#(#node_defs)*575		#(#enum_defs)*576		#(#token_enum_defs)*577		#(#any_node_defs)*578		#(#node_boilerplate_impls)*579		#(#enum_boilerplate_impls)*580		#(#token_enum_boilerplate_impls)*581		#(#any_node_boilerplate_impls)*582		#(#display_impls)*583	};584585	let ast = ast.to_string().replace("T ! [", "T![");586587	let mut res = String::with_capacity(ast.len() * 2);588589	let mut docs = grammar590		.nodes591		.iter()592		.map(|it| &it.doc)593		.chain(grammar.enums.iter().map(|it| &it.doc));594595	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {596		res.push_str(chunk);597		if let Some(doc) = docs.next() {598			write_doc_comment(doc, &mut res);599		}600	}601602	let res = reformat(&res)?;603	Ok(res.replace("#[derive", "\n#[derive"))604}605606fn write_doc_comment(contents: &[String], dest: &mut String) {607	use std::fmt::Write;608	for line in contents {609		writeln!(dest, "///{line}").unwrap();610	}611}612613pub fn escape_token_macro(token: &str) -> TokenStream {614	if "{}[]()$".contains(token) {615		let c = token.chars().next().unwrap();616		quote! { #c }617	} else if token.contains('$') {618		quote! { #token }619	} else if token.chars().all(|v: char| v.is_ascii_lowercase()) {620		let i = Ident::new(token, Span::call_site());621		quote! { #i }622	} else {623		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));624		quote! { #(#cs)* }625	}626}