git.delta.rocks / jrsonnet / refs/commits / cf33b6edf3e1

difftreelog

source

xtask/src/sourcegen/mod.rs14.5 KiBsourcehistory
1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{AstSrc, lower};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => {52						eprintln!("implicit meta: {name}");53						kinds.define_token(TokenKind::Meta {54							grammar_name: token.to_owned(),55							name: format!("META_{name}"),56						});57					}58					SpecialName::Error => {59						panic!(60							"error token ERROR_{name} must be explicitly defined in jsonnet_kinds()"61						);62					}63				}64				continue;65			}66			let name = to_upper_snake_case(token);67			eprintln!("implicit kw: {token}");68			kinds.define_token(TokenKind::Keyword {69				code: token.to_owned(),70				name: format!("{name}_KW"),71			});72		}73	}74	for node in &ast.nodes {75		let name = to_upper_snake_case(&node.name);76		kinds.define_node(&name);77	}78	for enum_ in &ast.enums {79		let name = to_upper_snake_case(&enum_.name);80		kinds.define_node(&name);81	}82	for token_enum in &ast.token_enums {83		let name = to_upper_snake_case(&token_enum.name);84		kinds.define_node(&name);85	}8687	let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;8889	let nodes = generate_nodes(&kinds, &ast)?;90	ensure_file_contents(91		&PathBuf::from(concat!(92			env!("CARGO_MANIFEST_DIR"),93			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",94		)),95		&syntax_kinds,96	);97	ensure_file_contents(98		&PathBuf::from(concat!(99			env!("CARGO_MANIFEST_DIR"),100			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",101		)),102		&nodes,103	);104105	let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;106	ensure_file_contents(107		&PathBuf::from(concat!(108			env!("CARGO_MANIFEST_DIR"),109			"/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",110		)),111		&lexer_syntax_kinds,112	);113	Ok(())114}115116fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {117	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);118	let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));119120	let keywords = kinds121		.tokens()122		.filter(|k| matches!(k, TokenKind::Keyword { .. }))123		.map(TokenKind::name)124		.map(|n| format_ident!("{n}"));125126	let mut nodes = kinds127		.nodes128		.iter()129		.map(|name| format_ident!("{}", name))130		.collect::<Vec<_>>();131132	if lexer {133		nodes.clear();134	}135136	let enums = grammar137		.enums138		.iter()139		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))140		.chain(141			grammar142				.token_enums143				.iter()144				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),145		)146		.collect::<Vec<_>>();147	let is_enum = if lexer {148		quote! {}149	} else {150		quote! {151			pub fn is_enum(self) -> bool {152				match self {153					#(#enums)|* => true,154					_ => false,155				}156			}157		}158	};159160	let derive_logos = if lexer {161		quote! {162			, logos::Logos163		}164	} else {165		quote! {}166	};167168	let error_desc_arms = kinds.tokens().filter_map(|t| {169		if let TokenKind::Error {170			name, description, ..171		} = t172		{173			let ident = format_ident!("{name}");174			Some(quote! { #ident => ::core::option::Option::Some(#description) })175		} else {176			None177		}178	});179180	let display_name_arms = kinds.tokens().map(|t| {181		let ident = format_ident!("{}", t.name());182		let display = t.display_name();183		quote! { #ident => #display }184	});185186	let ast = quote! {187		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]188189		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.190		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]191		#[repr(u16)]192		pub enum SyntaxKind {193			#[doc(hidden)]194			TOMBSTONE,195			#[doc(hidden)]196			EOF,197			#(#token_kinds,)*198			LEXING_ERROR,199			__LAST_TOKEN,200			#(#nodes,)*201			#[doc(hidden)]202			__LAST,203		}204		use self::SyntaxKind::*;205206		impl SyntaxKind {207			pub fn is_keyword(self) -> bool {208				match self {209					#(#keywords)|* => true,210					_ => false,211				}212			}213214			#is_enum215216			pub fn error_description(self) -> Option<&'static str> {217				match self {218					#(#error_desc_arms,)*219					LEXING_ERROR => ::core::option::Option::Some("unexpected character"),220					_ => None,221				}222			}223224			pub fn display_name(self) -> &'static str {225				match self {226					#(#display_name_arms,)*227					LEXING_ERROR => "unexpected character",228					_ => "unknown",229				}230			}231232			pub fn from_raw(r: u16) -> Self {233				assert!(r < Self::__LAST as u16);234				unsafe { std::mem::transmute(r) }235			}236			pub fn into_raw(self) -> u16 {237				self as u16238			}239		}240241		#[macro_export]242		macro_rules! T {#(#t_macros);*}243		#[allow(unused_imports)]244		pub use T;245	};246247	reformat(&ast.to_string())248}249250#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]251fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {252	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar253		.nodes254		.iter()255		.map(|node| {256			let name = format_ident!("{}", node.name);257			let kind = format_ident!("{}", to_upper_snake_case(&node.name));258			let traits = node.traits.iter().map(|trait_name| {259				let trait_name = format_ident!("{}", trait_name);260				quote!(impl ast::#trait_name for #name {})261			});262263			let mut type_positions: HashMap<String, usize> = HashMap::new();264			let field_positions: Vec<_> = node265				.fields266				.iter()267				.map(|field| {268					let ty_str = field.ty().to_string();269					let pos = *type_positions.get(&ty_str).unwrap_or(&0);270					type_positions.insert(ty_str, pos + 1);271					pos272				})273				.collect();274275			let methods = node276				.fields277				.iter()278				.zip(field_positions.iter())279				.map(|(field, &pos)| {280					let method_name = field.method_name(kinds);281					let ty = field.ty();282283					if field.is_many() {284						quote! {285							pub fn #method_name(&self) -> AstChildren<#ty> {286								support::children(&self.syntax)287							}288						}289					} else if let Some(token_kind) = field.token_kind(kinds) {290						quote! {291							pub fn #method_name(&self) -> Option<#ty> {292								support::token(&self.syntax, #token_kind)293							}294						}295					} else if field.is_token_enum(grammar) {296						quote! {297							pub fn #method_name(&self) -> Option<#ty> {298								support::token_child(&self.syntax)299							}300						}301					} else if pos == 0 {302						quote! {303							pub fn #method_name(&self) -> Option<#ty> {304								support::children(&self.syntax).next()305							}306						}307					} else {308						quote! {309							pub fn #method_name(&self) -> Option<#ty> {310								support::children(&self.syntax).nth(#pos)311							}312						}313					}314				});315			(316				quote! {317					#[pretty_doc_comment_placeholder_workaround]318					#[derive(Debug, Clone, PartialEq, Eq, Hash)]319					pub struct #name {320						pub(crate) syntax: SyntaxNode,321					}322323					#(#traits)*324325					impl #name {326						#(#methods)*327					}328				},329				quote! {330					impl AstNode for #name {331						fn can_cast(kind: SyntaxKind) -> bool {332							kind == #kind333						}334						fn cast(syntax: SyntaxNode) -> Option<Self> {335							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }336						}337						fn syntax(&self) -> &SyntaxNode { &self.syntax }338					}339				},340			)341		})342		.unzip();343344	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar345		.enums346		.iter()347		.map(|en| {348			let variants: Vec<_> = en349				.variants350				.iter()351				.map(|var| format_ident!("{}", var))352				.collect();353			let name = format_ident!("{}", en.name);354			let kinds: Vec<_> = variants355				.iter()356				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))357				.collect();358			let traits = en.traits.iter().map(|trait_name| {359				let trait_name = format_ident!("{}", trait_name);360				quote!(impl ast::#trait_name for #name {})361			});362363			let ast_node = quote! {364				impl AstNode for #name {365					fn can_cast(kind: SyntaxKind) -> bool {366						match kind {367							#(#kinds)|* => true,368							_ => false,369						}370					}371					fn cast(syntax: SyntaxNode) -> Option<Self> {372						let res = match syntax.kind() {373							#(374							#kinds => #name::#variants(#variants { syntax }),375							)*376							_ => return None,377						};378						Some(res)379					}380					fn syntax(&self) -> &SyntaxNode {381						match self {382							#(383							#name::#variants(it) => &it.syntax,384							)*385						}386					}387				}388			};389390			(391				quote! {392					#[pretty_doc_comment_placeholder_workaround]393					#[derive(Debug, Clone, PartialEq, Eq, Hash)]394					pub enum #name {395						#(#variants(#variants),)*396					}397398					#(#traits)*399				},400				quote! {401					#(402						impl From<#variants> for #name {403							fn from(node: #variants) -> #name {404								#name::#variants(node)405							}406						}407					)*408					#ast_node409				},410			)411		})412		.unzip();413414	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar415		.token_enums416		.iter()417		.map(|en| {418			let variants: Vec<_> = en419				.variants420				.iter()421				.map(|token| {422					format_ident!(423						"{}",424						to_pascal_case(kinds.token(token).expect("token exists").name())425					)426				})427				.collect();428			let name = format_ident!("{}", en.name);429			let kind_name = format_ident!("{}Kind", en.name);430			let kinds: Vec<_> = variants431				.iter()432				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))433				.collect();434435			let ast_node = quote! {436				impl AstToken for #name {437					fn can_cast(kind: SyntaxKind) -> bool {438						#kind_name::can_cast(kind)439					}440					fn cast(syntax: SyntaxToken) -> Option<Self> {441						let kind = #kind_name::cast(syntax.kind())?;442						Some(#name { syntax, kind })443					}444					fn syntax(&self) -> &SyntaxToken {445						&self.syntax446					}447				}448449				impl #kind_name {450					fn can_cast(kind: SyntaxKind) -> bool {451						match kind {452							#(#kinds)|* => true,453							_ => false,454						}455					}456					pub fn cast(kind: SyntaxKind) -> Option<Self> {457						let res = match kind {458							#(#kinds => Self::#variants,)*459							_ => return None,460						};461						Some(res)462					}463				}464			};465466			(467				quote! {468					#[pretty_doc_comment_placeholder_workaround]469					#[derive(Debug, Clone, PartialEq, Eq, Hash)]470					pub struct #name { syntax: SyntaxToken, kind: #kind_name }471472					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]473					pub enum #kind_name {474						#(#variants,)*475					}476				},477				quote! {478					#ast_node479480					impl #name {481						pub fn kind(&self) -> #kind_name {482							self.kind483						}484					}485486					impl std::fmt::Display for #name {487						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {488							std::fmt::Display::fmt(self.syntax(), f)489						}490					}491				},492			)493		})494		.unzip();495496	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar497		.nodes498		.iter()499		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))500		.into_group_map()501		.into_iter()502		.sorted_by_key(|(k, _)| *k)503		.map(|(trait_name, nodes)| {504			let name = format_ident!("Any{}", trait_name);505			let trait_name = format_ident!("{}", trait_name);506			let kinds: Vec<_> = nodes507				.iter()508				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))509				.collect();510511			(512				quote! {513					#[pretty_doc_comment_placeholder_workaround]514					#[derive(Debug, Clone, PartialEq, Eq, Hash)]515					pub struct #name {516						pub(crate) syntax: SyntaxNode,517					}518					impl ast::#trait_name for #name {}519				},520				quote! {521					impl #name {522						#[inline]523						pub fn new<T: ast::#trait_name>(node: T) -> #name {524							#name {525								syntax: node.syntax().clone()526							}527						}528					}529					impl AstNode for #name {530						fn can_cast(kind: SyntaxKind) -> bool {531							match kind {532								#(#kinds)|* => true,533								_ => false,534							}535						}536						fn cast(syntax: SyntaxNode) -> Option<Self> {537							Self::can_cast(syntax.kind()).then(|| #name { syntax })538						}539						fn syntax(&self) -> &SyntaxNode {540							&self.syntax541						}542					}543				},544			)545		})546		.unzip();547548	let enum_names = grammar.enums.iter().map(|it| &it.name);549	let node_names = grammar.nodes.iter().map(|it| &it.name);550551	let display_impls = enum_names552		.chain(node_names.clone())553		.map(|it| format_ident!("{}", it))554		.map(|name| {555			quote! {556				impl std::fmt::Display for #name {557					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {558						std::fmt::Display::fmt(self.syntax(), f)559					}560				}561			}562		});563564	let ast = quote! {565		#![allow(non_snake_case, clippy::match_like_matches_macro)]566567		use crate::{568			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},569			ast::{AstNode, AstToken, AstChildren, support},570			T,571		};572573		#(#node_defs)*574		#(#enum_defs)*575		#(#token_enum_defs)*576		#(#any_node_defs)*577		#(#node_boilerplate_impls)*578		#(#enum_boilerplate_impls)*579		#(#token_enum_boilerplate_impls)*580		#(#any_node_boilerplate_impls)*581		#(#display_impls)*582	};583584	let ast = ast.to_string().replace("T ! [", "T![");585586	let mut res = String::with_capacity(ast.len() * 2);587588	let mut docs = grammar589		.nodes590		.iter()591		.map(|it| &it.doc)592		.chain(grammar.enums.iter().map(|it| &it.doc));593594	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {595		res.push_str(chunk);596		if let Some(doc) = docs.next() {597			write_doc_comment(doc, &mut res);598		}599	}600601	let res = reformat(&res)?;602	Ok(res.replace("#[derive", "\n#[derive"))603}604605fn write_doc_comment(contents: &[String], dest: &mut String) {606	use std::fmt::Write;607	for line in contents {608		writeln!(dest, "///{line}").unwrap();609	}610}611612pub fn escape_token_macro(token: &str) -> TokenStream {613	if "{}[]()$".contains(token) {614		let c = token.chars().next().unwrap();615		quote! { #c }616	} else if token.contains('$') {617		quote! { #token }618	} else if token.chars().all(|v: char| v.is_ascii_lowercase()) {619		let i = Ident::new(token, Span::call_site());620		quote! { #i }621	} else {622		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));623		quote! { #(#cs)* }624	}625}