git.delta.rocks / jrsonnet / refs/commits / c6d0d24a32d3

difftreelog

source

xtask/src/sourcegen/mod.rs13.4 KiBsourcehistory
1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17	Literal,18	Meta,19	Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22	let name = name.strip_suffix('!')?;23	Some(if let Some(name) = name.strip_prefix("LIT_") {24		(SpecialName::Literal, name)25	} else if let Some(name) = name.strip_prefix("META_") {26		(SpecialName::Meta, name)27	} else if let Some(name) = name.strip_prefix("ERROR_") {28		(SpecialName::Error, name)29	} else {30		return None;31	})32}3334pub fn generate_ungrammar() -> Result<()> {35	let grammar: Grammar = include_str!(concat!(36		env!("CARGO_MANIFEST_DIR"),37		"/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38	))39	.parse()?;4041	let mut kinds = kinds::jsonnet_kinds();42	let ast = lower(&kinds, &grammar);4344	for token in grammar.tokens() {45		let token = &grammar[token];46		let token = &token.name.clone();47		if !kinds.is_token(token) {48			if let Some((special, name)) = classify_special(token) {49				match special {50					SpecialName::Literal => panic!("literal is not defined: {name}"),51					SpecialName::Meta => {52						eprintln!("implicit meta: {name}");53						kinds.define_token(TokenKind::Meta {54							grammar_name: token.to_owned(),55							name: format!("META_{name}"),56						});57					}58					SpecialName::Error => {59						eprintln!("implicit error: {name}");60						kinds.define_token(TokenKind::Error {61							grammar_name: token.to_owned(),62							name: format!("ERROR_{name}"),63							regex: None,64							priority: None,65							is_lexer_error: true,66						});67					}68				}69				continue;70			}71			let name = to_upper_snake_case(token);72			eprintln!("implicit kw: {token}");73			kinds.define_token(TokenKind::Keyword {74				code: token.to_owned(),75				name: format!("{name}_KW"),76			});77		}78	}79	for node in &ast.nodes {80		let name = to_upper_snake_case(&node.name);81		kinds.define_node(&name);82	}83	for enum_ in &ast.enums {84		let name = to_upper_snake_case(&enum_.name);85		kinds.define_node(&name);86	}87	for token_enum in &ast.token_enums {88		let name = to_upper_snake_case(&token_enum.name);89		kinds.define_node(&name);90	}9192	let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;9394	let nodes = generate_nodes(&kinds, &ast)?;95	ensure_file_contents(96		&PathBuf::from(concat!(97			env!("CARGO_MANIFEST_DIR"),98			"/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",99		)),100		&syntax_kinds,101	);102	ensure_file_contents(103		&PathBuf::from(concat!(104			env!("CARGO_MANIFEST_DIR"),105			"/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",106		)),107		&nodes,108	);109	Ok(())110}111112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {113	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114	let token_kinds = kinds.tokens().map(TokenKind::expand_kind);115116	let keywords = kinds117		.tokens()118		.filter(|k| matches!(k, TokenKind::Keyword { .. }))119		.map(TokenKind::name)120		.map(|n| format_ident!("{n}"));121122	let nodes = kinds123		.nodes124		.iter()125		.map(|name| format_ident!("{}", name))126		.collect::<Vec<_>>();127128	let enums = grammar129		.enums130		.iter()131		.map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))132		.chain(133			grammar134				.token_enums135				.iter()136				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137		);138139	let ast = quote! {140		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141		use logos::Logos;142143		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.144		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]145		#[repr(u16)]146		pub enum SyntaxKind {147			#[doc(hidden)]148			TOMBSTONE,149			#[doc(hidden)]150			EOF,151			#(#token_kinds,)*152			LEXING_ERROR,153			__LAST_TOKEN,154			#(#nodes,)*155			#[doc(hidden)]156			__LAST,157		}158		use self::SyntaxKind::*;159160		impl SyntaxKind {161			pub fn is_keyword(self) -> bool {162				match self {163					#(#keywords)|* => true,164					_ => false,165				}166			}167			pub fn is_enum(self) -> bool {168				match self {169					#(#enums)|* => true,170					_ => false,171				}172			}173174			pub fn from_raw(r: u16) -> Self {175				assert!(r < Self::__LAST as u16);176				unsafe { std::mem::transmute(r) }177			}178			pub fn into_raw(self) -> u16 {179				self as u16180			}181		}182183		#[macro_export]184		macro_rules! T {#(#t_macros);*}185		#[allow(unused_imports)]186		pub use T;187	};188189	reformat(&ast.to_string())190}191192#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]193fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {194	let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar195		.nodes196		.iter()197		.map(|node| {198			let name = format_ident!("{}", node.name);199			let kind = format_ident!("{}", to_upper_snake_case(&node.name));200			let traits = node.traits.iter().map(|trait_name| {201				let trait_name = format_ident!("{}", trait_name);202				quote!(impl ast::#trait_name for #name {})203			});204205			let mut type_positions: HashMap<String, usize> = HashMap::new();206			let field_positions: Vec<_> = node207				.fields208				.iter()209				.map(|field| {210					let ty_str = field.ty().to_string();211					let pos = *type_positions.get(&ty_str).unwrap_or(&0);212					type_positions.insert(ty_str, pos + 1);213					pos214				})215				.collect();216217			let methods = node218				.fields219				.iter()220				.zip(field_positions.iter())221				.map(|(field, &pos)| {222					let method_name = field.method_name(kinds);223					let ty = field.ty();224225					if field.is_many() {226						quote! {227							pub fn #method_name(&self) -> AstChildren<#ty> {228								support::children(&self.syntax)229							}230						}231					} else if let Some(token_kind) = field.token_kind(kinds) {232						quote! {233							pub fn #method_name(&self) -> Option<#ty> {234								support::token(&self.syntax, #token_kind)235							}236						}237					} else if field.is_token_enum(grammar) {238						quote! {239							pub fn #method_name(&self) -> Option<#ty> {240								support::token_child(&self.syntax)241							}242						}243					} else if pos == 0 {244						quote! {245							pub fn #method_name(&self) -> Option<#ty> {246								support::children(&self.syntax).next()247							}248						}249					} else {250						quote! {251							pub fn #method_name(&self) -> Option<#ty> {252								support::children(&self.syntax).nth(#pos)253							}254						}255					}256				});257			(258				quote! {259					#[pretty_doc_comment_placeholder_workaround]260					#[derive(Debug, Clone, PartialEq, Eq, Hash)]261					pub struct #name {262						pub(crate) syntax: SyntaxNode,263					}264265					#(#traits)*266267					impl #name {268						#(#methods)*269					}270				},271				quote! {272					impl AstNode for #name {273						fn can_cast(kind: SyntaxKind) -> bool {274							kind == #kind275						}276						fn cast(syntax: SyntaxNode) -> Option<Self> {277							if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }278						}279						fn syntax(&self) -> &SyntaxNode { &self.syntax }280					}281				},282			)283		})284		.unzip();285286	let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar287		.enums288		.iter()289		.map(|en| {290			let variants: Vec<_> = en291				.variants292				.iter()293				.map(|var| format_ident!("{}", var))294				.collect();295			let name = format_ident!("{}", en.name);296			let kinds: Vec<_> = variants297				.iter()298				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))299				.collect();300			let traits = en.traits.iter().map(|trait_name| {301				let trait_name = format_ident!("{}", trait_name);302				quote!(impl ast::#trait_name for #name {})303			});304305			let ast_node = quote! {306				impl AstNode for #name {307					fn can_cast(kind: SyntaxKind) -> bool {308						match kind {309							#(#kinds)|* => true,310							_ => false,311						}312					}313					fn cast(syntax: SyntaxNode) -> Option<Self> {314						let res = match syntax.kind() {315							#(316							#kinds => #name::#variants(#variants { syntax }),317							)*318							_ => return None,319						};320						Some(res)321					}322					fn syntax(&self) -> &SyntaxNode {323						match self {324							#(325							#name::#variants(it) => &it.syntax,326							)*327						}328					}329				}330			};331332			(333				quote! {334					#[pretty_doc_comment_placeholder_workaround]335					#[derive(Debug, Clone, PartialEq, Eq, Hash)]336					pub enum #name {337						#(#variants(#variants),)*338					}339340					#(#traits)*341				},342				quote! {343					#(344						impl From<#variants> for #name {345							fn from(node: #variants) -> #name {346								#name::#variants(node)347							}348						}349					)*350					#ast_node351				},352			)353		})354		.unzip();355356	let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar357		.token_enums358		.iter()359		.map(|en| {360			let variants: Vec<_> = en361				.variants362				.iter()363				.map(|token| {364					format_ident!(365						"{}",366						to_pascal_case(kinds.token(token).expect("token exists").name())367					)368				})369				.collect();370			let name = format_ident!("{}", en.name);371			let kind_name = format_ident!("{}Kind", en.name);372			let kinds: Vec<_> = variants373				.iter()374				.map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))375				.collect();376377			let ast_node = quote! {378				impl AstToken for #name {379					fn can_cast(kind: SyntaxKind) -> bool {380						#kind_name::can_cast(kind)381					}382					fn cast(syntax: SyntaxToken) -> Option<Self> {383						let kind = #kind_name::cast(syntax.kind())?;384						Some(#name { syntax, kind })385					}386					fn syntax(&self) -> &SyntaxToken {387						&self.syntax388					}389				}390391				impl #kind_name {392					fn can_cast(kind: SyntaxKind) -> bool {393						match kind {394							#(#kinds)|* => true,395							_ => false,396						}397					}398					pub fn cast(kind: SyntaxKind) -> Option<Self> {399						let res = match kind {400							#(#kinds => Self::#variants,)*401							_ => return None,402						};403						Some(res)404					}405				}406			};407408			(409				quote! {410					#[pretty_doc_comment_placeholder_workaround]411					#[derive(Debug, Clone, PartialEq, Eq, Hash)]412					pub struct #name { syntax: SyntaxToken, kind: #kind_name }413414					#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]415					pub enum #kind_name {416						#(#variants,)*417					}418				},419				quote! {420					#ast_node421422					impl #name {423						pub fn kind(&self) -> #kind_name {424							self.kind425						}426					}427428					impl std::fmt::Display for #name {429						fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {430							std::fmt::Display::fmt(self.syntax(), f)431						}432					}433				},434			)435		})436		.unzip();437438	let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar439		.nodes440		.iter()441		.flat_map(|node| node.traits.iter().map(move |t| (t, node)))442		.into_group_map()443		.into_iter()444		.sorted_by_key(|(k, _)| *k)445		.map(|(trait_name, nodes)| {446			let name = format_ident!("Any{}", trait_name);447			let trait_name = format_ident!("{}", trait_name);448			let kinds: Vec<_> = nodes449				.iter()450				.map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))451				.collect();452453			(454				quote! {455					#[pretty_doc_comment_placeholder_workaround]456					#[derive(Debug, Clone, PartialEq, Eq, Hash)]457					pub struct #name {458						pub(crate) syntax: SyntaxNode,459					}460					impl ast::#trait_name for #name {}461				},462				quote! {463					impl #name {464						#[inline]465						pub fn new<T: ast::#trait_name>(node: T) -> #name {466							#name {467								syntax: node.syntax().clone()468							}469						}470					}471					impl AstNode for #name {472						fn can_cast(kind: SyntaxKind) -> bool {473							match kind {474								#(#kinds)|* => true,475								_ => false,476							}477						}478						fn cast(syntax: SyntaxNode) -> Option<Self> {479							Self::can_cast(syntax.kind()).then(|| #name { syntax })480						}481						fn syntax(&self) -> &SyntaxNode {482							&self.syntax483						}484					}485				},486			)487		})488		.unzip();489490	let enum_names = grammar.enums.iter().map(|it| &it.name);491	let node_names = grammar.nodes.iter().map(|it| &it.name);492493	let display_impls = enum_names494		.chain(node_names.clone())495		.map(|it| format_ident!("{}", it))496		.map(|name| {497			quote! {498				impl std::fmt::Display for #name {499					fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {500						std::fmt::Display::fmt(self.syntax(), f)501					}502				}503			}504		});505506	let ast = quote! {507		#![allow(non_snake_case, clippy::match_like_matches_macro)]508509		use crate::{510			SyntaxNode, SyntaxToken, SyntaxKind::{self, *},511			ast::{AstNode, AstToken, AstChildren, support},512			T,513		};514515		#(#node_defs)*516		#(#enum_defs)*517		#(#token_enum_defs)*518		#(#any_node_defs)*519		#(#node_boilerplate_impls)*520		#(#enum_boilerplate_impls)*521		#(#token_enum_boilerplate_impls)*522		#(#any_node_boilerplate_impls)*523		#(#display_impls)*524	};525526	let ast = ast.to_string().replace("T ! [", "T![");527528	let mut res = String::with_capacity(ast.len() * 2);529530	let mut docs = grammar531		.nodes532		.iter()533		.map(|it| &it.doc)534		.chain(grammar.enums.iter().map(|it| &it.doc));535536	for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {537		res.push_str(chunk);538		if let Some(doc) = docs.next() {539			write_doc_comment(doc, &mut res);540		}541	}542543	let res = reformat(&res)?;544	Ok(res.replace("#[derive", "\n#[derive"))545}546547fn write_doc_comment(contents: &[String], dest: &mut String) {548	use std::fmt::Write;549	for line in contents {550		writeln!(dest, "///{line}").unwrap();551	}552}553554pub fn escape_token_macro(token: &str) -> TokenStream {555	if "{}[]()$".contains(token) {556		let c = token.chars().next().unwrap();557		quote! { #c }558	} else if token.contains('$') {559		quote! { #token }560	} else if token.chars().all(|v: char| v.is_ascii_lowercase()) {561		let i = Ident::new(token, Span::call_site());562		quote! { #i }563	} else {564		let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));565		quote! { #(#cs)* }566	}567}