git.delta.rocks / jrsonnet / refs/commits / 5ad3c0601af8

difftreelog

source

xtask/src/sourcegen/mod.rs12.4 KiBsourcehistory
1use std::path::PathBuf;23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Punct, Spacing, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => {52						eprintln!("implicit meta: {}", name);53						kinds.define_token(TokenKind::Meta {54							grammar_name: token.to_owned(),55							name: format!("META_{}", name),56						})57					}58					SpecialName::Error => {59						eprintln!("implicit error: {}", name);60						kinds.define_token(TokenKind::Error {61							grammar_name: token.to_owned(),62							name: format!("ERROR_{}", name),63							regex: None,64							priority: None,65							is_lexer_error: true,66						})67					}68				};69				continue;70			};71			let name = to_upper_snake_case(token);72			eprintln!("implicit kw: {}", token);73			kinds.define_token(TokenKind::Keyword {74				code: token.to_owned(),75				name: format!("{name}_KW"),76			});77		}78	}79	for node in &ast.nodes {80		let name = to_upper_snake_case(&node.name);81		kinds.define_node(&name);82	}83	for enum_ in &ast.enums {84		let name = to_upper_snake_case(&enum_.name);85		kinds.define_node(&name);86	}87	for token_enum in &ast.token_enums {88		let name = to_upper_snake_case(&token_enum.name);89		kinds.define_node(&name);90	}9192	let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;9394	let nodes = generate_nodes(&kinds, &ast)?;95	ensure_file_contents(96		&PathBuf::from(concat!(97			env!("CARGO_MANIFEST_DIR"),98			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",99		)),100		&syntax_kinds,101	)?;102	ensure_file_contents(103		&PathBuf::from(concat!(104			env!("CARGO_MANIFEST_DIR"),105			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",106		)),107		&nodes,108	)?;109	Ok(())110}111112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {113	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114	let token_kinds = kinds.tokens().map(TokenKind::expand_kind);115116	let keywords = kinds117		.tokens()118		.filter(|k| matches!(k, TokenKind::Keyword { .. }))119		.map(TokenKind::name)120		.map(|n| format_ident!("{n}"));121122	let nodes = kinds123		.nodes124		.iter()125		.map(|name| format_ident!("{}", name))126		.collect::<Vec<_>>();127128	let enums = grammar129		.enums130		.iter()131		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))132		.chain(133			grammar134				.token_enums135				.iter()136				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137		);138139	let ast = quote! {140		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141		use logos::Logos;142143		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.144		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]145		#[repr(u16)]146		pub enum SyntaxKind {147			#[doc(hidden)]148			TOMBSTONE,149			#[doc(hidden)]150			EOF,151			#(#token_kinds,)*152			#[error]153			ERROR,154			#(#nodes,)*155			#[doc(hidden)]156			__LAST,157		}158		use self::SyntaxKind::*;159160		impl SyntaxKind {161			pub fn is_keyword(self) -> bool {162				match self {163					#(#keywords)|* => true,164					_ => false,165				}166			}167			pub fn is_enum(self) -> bool {168				match self {169					#(#enums)|* => true,170					_ => false,171				}172			}173174			pub fn from_raw(r: u16) -> Self {175				assert!(r < Self::__LAST as u16);176				unsafe { std::mem::transmute(r) }177			}178			pub fn into_raw(self) -> u16 {179				self as u16180			}181		}182183		#[macro_export]184		macro_rules! T {#(#t_macros);*}185		pub use T;186	};187188	reformat(&ast.to_string())189}190191fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {192	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar193		.nodes194		.iter()195		.map(|node| {196			let name = format_ident!("{}", node.name);197			let kind = format_ident!("{}", to_upper_snake_case(&node.name));198			let traits = node.traits.iter().map(|trait_name| {199				let trait_name = format_ident!("{}", trait_name);200				quote!(impl ast::#trait_name for #name {})201			});202203			let methods = node.fields.iter().map(|field| {204				let method_name = field.method_name(kinds);205				let ty = field.ty();206207				if field.is_many() {208					quote! {209						pub fn #method_name(&self) -> AstChildren<#ty> {210							support::children(&self.syntax)211						}212					}213				} else if let Some(token_kind) = field.token_kind(kinds) {214					quote! {215						pub fn #method_name(&self) -> Option<#ty> {216							support::token(&self.syntax, #token_kind)217						}218					}219				} else if field.is_token_enum(grammar) {220					quote! {221						pub fn #method_name(&self) -> Option<#ty> {222							support::token_child(&self.syntax)223						}224					}225				} else {226					quote! {227						pub fn #method_name(&self) -> Option<#ty> {228							support::child(&self.syntax)229						}230					}231				}232			});233			(234				quote! {235					#[pretty_doc_comment_placeholder_workaround]236					#[derive(Debug, Clone, PartialEq, Eq, Hash)]237					pub struct #name {238						pub(crate) syntax: SyntaxNode,239					}240241					#(#traits)*242243					impl #name {244						#(#methods)*245					}246				},247				quote! {248					impl AstNode for #name {249						fn can_cast(kind: SyntaxKind) -> bool {250							kind == #kind251						}252						fn cast(syntax: SyntaxNode) -> Option<Self> {253							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }254						}255						fn syntax(&self) -> &SyntaxNode { &self.syntax }256					}257				},258			)259		})260		.unzip();261262	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar263		.enums264		.iter()265		.map(|en| {266			let variants: Vec<_> = en267				.variants268				.iter()269				.map(|var| format_ident!("{}", var))270				.collect();271			let name = format_ident!("{}", en.name);272			let kinds: Vec<_> = variants273				.iter()274				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))275				.collect();276			let traits = en.traits.iter().map(|trait_name| {277				let trait_name = format_ident!("{}", trait_name);278				quote!(impl ast::#trait_name for #name {})279			});280281			let ast_node = quote! {282				impl AstNode for #name {283					fn can_cast(kind: SyntaxKind) -> bool {284						match kind {285							#(#kinds)|* => true,286							_ => false,287						}288					}289					fn cast(syntax: SyntaxNode) -> Option<Self> {290						let res = match syntax.kind() {291							#(292							#kinds => #name::#variants(#variants { syntax }),293							)*294							_ => return None,295						};296						Some(res)297					}298					fn syntax(&self) -> &SyntaxNode {299						match self {300							#(301							#name::#variants(it) => &it.syntax,302							)*303						}304					}305				}306			};307308			(309				quote! {310					#[pretty_doc_comment_placeholder_workaround]311					#[derive(Debug, Clone, PartialEq, Eq, Hash)]312					pub enum #name {313						#(#variants(#variants),)*314					}315316					#(#traits)*317				},318				quote! {319					#(320						impl From<#variants> for #name {321							fn from(node: #variants) -> #name {322								#name::#variants(node)323							}324						}325					)*326					#ast_node327				},328			)329		})330		.unzip();331332	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar333		.token_enums334		.iter()335		.map(|en| {336			let variants: Vec<_> = en337				.variants338				.iter()339				.map(|token| {340					format_ident!(341						"{}",342						to_pascal_case(kinds.token(token).expect("token exists").name())343					)344				})345				.collect();346			let name = format_ident!("{}", en.name);347			let kind_name = format_ident!("{}Kind", en.name);348			let kinds: Vec<_> = variants349				.iter()350				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))351				.collect();352353			let ast_node = quote! {354				impl AstToken for #name {355					fn can_cast(kind: SyntaxKind) -> bool {356						match kind {357							#(#kinds)|* => true,358							_ => false,359						}360					}361					fn cast(syntax: SyntaxToken) -> Option<Self> {362						let res = match syntax.kind() {363							#(364							#kinds => #name { syntax, kind: #kind_name::#variants },365							)*366							_ => return None,367						};368						Some(res)369					}370					fn syntax(&self) -> &SyntaxToken {371						&self.syntax372					}373				}374			};375376			(377				quote! {378					#[pretty_doc_comment_placeholder_workaround]379					#[derive(Debug, Clone, PartialEq, Eq, Hash)]380					pub struct #name { syntax: SyntaxToken, kind: #kind_name }381382					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]383					pub enum #kind_name {384						#(#variants,)*385					}386				},387				quote! {388					#ast_node389390					impl #name {391						pub fn kind(&self) -> #kind_name {392							self.kind393						}394					}395396					impl std::fmt::Display for #name {397						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {398							std::fmt::Display::fmt(self.syntax(), f)399						}400					}401				},402			)403		})404		.unzip();405406	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar407		.nodes408		.iter()409		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))410		.into_group_map()411		.into_iter()412		.sorted_by_key(|(k, _)| *k)413		.map(|(trait_name, nodes)| {414			let name = format_ident!("Any{}", trait_name);415			let trait_name = format_ident!("{}", trait_name);416			let kinds: Vec<_> = nodes417				.iter()418				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))419				.collect();420421			(422				quote! {423					#[pretty_doc_comment_placeholder_workaround]424					#[derive(Debug, Clone, PartialEq, Eq, Hash)]425					pub struct #name {426						pub(crate) syntax: SyntaxNode,427					}428					impl ast::#trait_name for #name {}429				},430				quote! {431					impl #name {432						#[inline]433						pub fn new<T: ast::#trait_name>(node: T) -> #name {434							#name {435								syntax: node.syntax().clone()436							}437						}438					}439					impl AstNode for #name {440						fn can_cast(kind: SyntaxKind) -> bool {441							match kind {442								#(#kinds)|* => true,443								_ => false,444							}445						}446						fn cast(syntax: SyntaxNode) -> Option<Self> {447							Self::can_cast(syntax.kind()).then(|| #name { syntax })448						}449						fn syntax(&self) -> &SyntaxNode {450							&self.syntax451						}452					}453				},454			)455		})456		.unzip();457458	let enum_names = grammar.enums.iter().map(|it| &it.name);459	let node_names = grammar.nodes.iter().map(|it| &it.name);460461	let display_impls = enum_names462		.chain(node_names.clone())463		.map(|it| format_ident!("{}", it))464		.map(|name| {465			quote! {466				impl std::fmt::Display for #name {467					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {468						std::fmt::Display::fmt(self.syntax(), f)469					}470				}471			}472		});473474	let ast = quote! {475		#![allow(non_snake_case, clippy::match_like_matches_macro)]476477		use crate::{478			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},479			ast::{AstNode, AstToken, AstChildren, support},480			T,481		};482483		#(#node_defs)*484		#(#enum_defs)*485		#(#token_enum_defs)*486		#(#any_node_defs)*487		#(#node_boilerplate_impls)*488		#(#enum_boilerplate_impls)*489		#(#token_enum_boilerplate_impls)*490		#(#any_node_boilerplate_impls)*491		#(#display_impls)*492	};493494	let ast = ast.to_string().replace("T ! [", "T![");495496	let mut res = String::with_capacity(ast.len() * 2);497498	let mut docs = grammar499		.nodes500		.iter()501		.map(|it| &it.doc)502		.chain(grammar.enums.iter().map(|it| &it.doc));503504	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {505		res.push_str(chunk);506		if let Some(doc) = docs.next() {507			write_doc_comment(doc, &mut res);508		}509	}510511	let res = reformat(&res)?;512	Ok(res.replace("#[derive", "\n#[derive"))513}514515fn write_doc_comment(contents: &[String], dest: &mut String) {516	use std::fmt::Write;517	for line in contents {518		writeln!(dest, "///{}", line).unwrap();519	}520}521522pub fn escape_token_macro(token: &str) -> TokenStream {523	if "{}[]()$".contains(token) {524		let c = token.chars().next().unwrap();525		quote! { #c }526	} else if token.contains('$') {527		quote! { #token }528	} else {529		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));530		quote! { #(#cs)* }531	}532}