1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{AstSrc, lower};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17 Literal,18 Meta,19 Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22 let name = name.strip_suffix('!')?;23 Some(if let Some(name) = name.strip_prefix("LIT_") {24 (SpecialName::Literal, name)25 } else if let Some(name) = name.strip_prefix("META_") {26 (SpecialName::Meta, name)27 } else if let Some(name) = name.strip_prefix("ERROR_") {28 (SpecialName::Error, name)29 } else {30 return None;31 })32}3334pub fn generate_ungrammar() -> Result<()> {35 let grammar: Grammar = include_str!(concat!(36 env!("CARGO_MANIFEST_DIR"),37 "/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38 ))39 .parse()?;4041 let mut kinds = kinds::jsonnet_kinds();42 let ast = lower(&kinds, &grammar);4344 for token in grammar.tokens() {45 let token = &grammar[token];46 let token = &token.name.clone();47 if !kinds.is_token(token) {48 if let Some((special, name)) = classify_special(token) {49 match special {50 SpecialName::Literal => panic!("literal is not defined: {name}"),51 SpecialName::Meta => {52 eprintln!("implicit meta: {name}");53 kinds.define_token(TokenKind::Meta {54 grammar_name: token.to_owned(),55 name: format!("META_{name}"),56 });57 }58 SpecialName::Error => {59 panic!(60 "error token ERROR_{name} must be explicitly defined in jsonnet_kinds()"61 );62 }63 }64 continue;65 }66 let name = to_upper_snake_case(token);67 eprintln!("implicit kw: {token}");68 kinds.define_token(TokenKind::Keyword {69 code: token.to_owned(),70 name: format!("{name}_KW"),71 });72 }73 }74 for node in &ast.nodes {75 let name = to_upper_snake_case(&node.name);76 kinds.define_node(&name);77 }78 for enum_ in &ast.enums {79 let name = to_upper_snake_case(&enum_.name);80 kinds.define_node(&name);81 }82 for token_enum in &ast.token_enums {83 let name = to_upper_snake_case(&token_enum.name);84 kinds.define_node(&name);85 }8687 let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;8889 let nodes = generate_nodes(&kinds, &ast)?;90 ensure_file_contents(91 &PathBuf::from(concat!(92 env!("CARGO_MANIFEST_DIR"),93 "/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",94 )),95 &syntax_kinds,96 );97 ensure_file_contents(98 &PathBuf::from(concat!(99 env!("CARGO_MANIFEST_DIR"),100 "/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",101 )),102 &nodes,103 );104105 let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;106 ensure_file_contents(107 &PathBuf::from(concat!(108 env!("CARGO_MANIFEST_DIR"),109 "/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",110 )),111 &lexer_syntax_kinds,112 );113 Ok(())114}115116fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {117 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);118 let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));119120 let keywords = kinds121 .tokens()122 .filter(|k| matches!(k, TokenKind::Keyword { .. }))123 .map(TokenKind::name)124 .map(|n| format_ident!("{n}"));125126 let mut nodes = kinds127 .nodes128 .iter()129 .map(|name| format_ident!("{}", name))130 .collect::<Vec<_>>();131132 if lexer {133 nodes.clear();134 }135136 let enums = grammar137 .enums138 .iter()139 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))140 .chain(141 grammar142 .token_enums143 .iter()144 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),145 )146 .collect::<Vec<_>>();147 let is_enum = if lexer {148 quote! {}149 } else {150 quote! {151 pub fn is_enum(self) -> bool {152 match self {153 #(#enums)|* => true,154 _ => false,155 }156 }157 }158 };159160 let derive_logos = if lexer {161 quote! {162 , logos::Logos163 }164 } else {165 quote! {}166 };167168 let error_desc_arms = kinds.tokens().filter_map(|t| {169 if let TokenKind::Error {170 name, description, ..171 } = t172 {173 let ident = format_ident!("{name}");174 Some(quote! { #ident => ::core::option::Option::Some(#description) })175 } else {176 None177 }178 });179180 let display_name_arms = kinds.tokens().map(|t| {181 let ident = format_ident!("{}", t.name());182 let display = t.display_name();183 quote! { #ident => #display }184 });185186 let ast = quote! {187 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]188189 190 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]191 #[repr(u16)]192 pub enum SyntaxKind {193 #[doc(hidden)]194 TOMBSTONE,195 #[doc(hidden)]196 EOF,197 #(#token_kinds,)*198 LEXING_ERROR,199 __LAST_TOKEN,200 #(#nodes,)*201 #[doc(hidden)]202 __LAST,203 }204 use self::SyntaxKind::*;205206 impl SyntaxKind {207 pub fn is_keyword(self) -> bool {208 match self {209 #(#keywords)|* => true,210 _ => false,211 }212 }213214 #is_enum215216 pub fn error_description(self) -> Option<&'static str> {217 match self {218 #(#error_desc_arms,)*219 LEXING_ERROR => ::core::option::Option::Some("unexpected character"),220 _ => None,221 }222 }223224 pub fn display_name(self) -> &'static str {225 match self {226 #(#display_name_arms,)*227 LEXING_ERROR => "unexpected character",228 _ => "unknown",229 }230 }231232 pub fn from_raw(r: u16) -> Self {233 assert!(r < Self::__LAST as u16);234 unsafe { std::mem::transmute(r) }235 }236 pub fn into_raw(self) -> u16 {237 self as u16238 }239 }240241 #[macro_export]242 macro_rules! T {#(#t_macros);*}243 #[allow(unused_imports)]244 pub use T;245 };246247 reformat(&ast.to_string())248}249250#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]251fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {252 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar253 .nodes254 .iter()255 .map(|node| {256 let name = format_ident!("{}", node.name);257 let kind = format_ident!("{}", to_upper_snake_case(&node.name));258 let traits = node.traits.iter().map(|trait_name| {259 let trait_name = format_ident!("{}", trait_name);260 quote!(impl ast::#trait_name for #name {})261 });262263 let mut type_positions: HashMap<String, usize> = HashMap::new();264 let field_positions: Vec<_> = node265 .fields266 .iter()267 .map(|field| {268 let ty_str = field.ty().to_string();269 let pos = *type_positions.get(&ty_str).unwrap_or(&0);270 type_positions.insert(ty_str, pos + 1);271 pos272 })273 .collect();274275 let methods = node276 .fields277 .iter()278 .zip(field_positions.iter())279 .map(|(field, &pos)| {280 let method_name = field.method_name(kinds);281 let ty = field.ty();282283 if field.is_many() {284 quote! {285 pub fn #method_name(&self) -> AstChildren<#ty> {286 support::children(&self.syntax)287 }288 }289 } else if let Some(token_kind) = field.token_kind(kinds) {290 quote! {291 pub fn #method_name(&self) -> Option<#ty> {292 support::token(&self.syntax, #token_kind)293 }294 }295 } else if field.is_token_enum(grammar) {296 quote! {297 pub fn #method_name(&self) -> Option<#ty> {298 support::token_child(&self.syntax)299 }300 }301 } else if pos == 0 {302 quote! {303 pub fn #method_name(&self) -> Option<#ty> {304 support::children(&self.syntax).next()305 }306 }307 } else {308 quote! {309 pub fn #method_name(&self) -> Option<#ty> {310 support::children(&self.syntax).nth(#pos)311 }312 }313 }314 });315 (316 quote! {317 #[pretty_doc_comment_placeholder_workaround]318 #[derive(Debug, Clone, PartialEq, Eq, Hash)]319 pub struct #name {320 pub(crate) syntax: SyntaxNode,321 }322323 #(#traits)*324325 impl #name {326 #(#methods)*327 }328 },329 quote! {330 impl AstNode for #name {331 fn can_cast(kind: SyntaxKind) -> bool {332 kind == #kind333 }334 fn cast(syntax: SyntaxNode) -> Option<Self> {335 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }336 }337 fn syntax(&self) -> &SyntaxNode { &self.syntax }338 }339 },340 )341 })342 .unzip();343344 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar345 .enums346 .iter()347 .map(|en| {348 let variants: Vec<_> = en349 .variants350 .iter()351 .map(|var| format_ident!("{}", var))352 .collect();353 let name = format_ident!("{}", en.name);354 let kinds: Vec<_> = variants355 .iter()356 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))357 .collect();358 let traits = en.traits.iter().map(|trait_name| {359 let trait_name = format_ident!("{}", trait_name);360 quote!(impl ast::#trait_name for #name {})361 });362363 let ast_node = quote! {364 impl AstNode for #name {365 fn can_cast(kind: SyntaxKind) -> bool {366 match kind {367 #(#kinds)|* => true,368 _ => false,369 }370 }371 fn cast(syntax: SyntaxNode) -> Option<Self> {372 let res = match syntax.kind() {373 #(374 #kinds => #name::#variants(#variants { syntax }),375 )*376 _ => return None,377 };378 Some(res)379 }380 fn syntax(&self) -> &SyntaxNode {381 match self {382 #(383 #name::#variants(it) => &it.syntax,384 )*385 }386 }387 }388 };389390 (391 quote! {392 #[pretty_doc_comment_placeholder_workaround]393 #[derive(Debug, Clone, PartialEq, Eq, Hash)]394 pub enum #name {395 #(#variants(#variants),)*396 }397398 #(#traits)*399 },400 quote! {401 #(402 impl From<#variants> for #name {403 fn from(node: #variants) -> #name {404 #name::#variants(node)405 }406 }407 )*408 #ast_node409 },410 )411 })412 .unzip();413414 let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar415 .token_enums416 .iter()417 .map(|en| {418 let variants: Vec<_> = en419 .variants420 .iter()421 .map(|token| {422 format_ident!(423 "{}",424 to_pascal_case(kinds.token(token).expect("token exists").name())425 )426 })427 .collect();428 let name = format_ident!("{}", en.name);429 let kind_name = format_ident!("{}Kind", en.name);430 let kinds: Vec<_> = variants431 .iter()432 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))433 .collect();434435 let ast_node = quote! {436 impl AstToken for #name {437 fn can_cast(kind: SyntaxKind) -> bool {438 #kind_name::can_cast(kind)439 }440 fn cast(syntax: SyntaxToken) -> Option<Self> {441 let kind = #kind_name::cast(syntax.kind())?;442 Some(#name { syntax, kind })443 }444 fn syntax(&self) -> &SyntaxToken {445 &self.syntax446 }447 }448449 impl #kind_name {450 fn can_cast(kind: SyntaxKind) -> bool {451 match kind {452 #(#kinds)|* => true,453 _ => false,454 }455 }456 pub fn cast(kind: SyntaxKind) -> Option<Self> {457 let res = match kind {458 #(#kinds => Self::#variants,)*459 _ => return None,460 };461 Some(res)462 }463 }464 };465466 (467 quote! {468 #[pretty_doc_comment_placeholder_workaround]469 #[derive(Debug, Clone, PartialEq, Eq, Hash)]470 pub struct #name { syntax: SyntaxToken, kind: #kind_name }471472 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]473 pub enum #kind_name {474 #(#variants,)*475 }476 },477 quote! {478 #ast_node479480 impl #name {481 pub fn kind(&self) -> #kind_name {482 self.kind483 }484 }485486 impl std::fmt::Display for #name {487 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {488 std::fmt::Display::fmt(self.syntax(), f)489 }490 }491 },492 )493 })494 .unzip();495496 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar497 .nodes498 .iter()499 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))500 .into_group_map()501 .into_iter()502 .sorted_by_key(|(k, _)| *k)503 .map(|(trait_name, nodes)| {504 let name = format_ident!("Any{}", trait_name);505 let trait_name = format_ident!("{}", trait_name);506 let kinds: Vec<_> = nodes507 .iter()508 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))509 .collect();510511 (512 quote! {513 #[pretty_doc_comment_placeholder_workaround]514 #[derive(Debug, Clone, PartialEq, Eq, Hash)]515 pub struct #name {516 pub(crate) syntax: SyntaxNode,517 }518 impl ast::#trait_name for #name {}519 },520 quote! {521 impl #name {522 #[inline]523 pub fn new<T: ast::#trait_name>(node: T) -> #name {524 #name {525 syntax: node.syntax().clone()526 }527 }528 }529 impl AstNode for #name {530 fn can_cast(kind: SyntaxKind) -> bool {531 match kind {532 #(#kinds)|* => true,533 _ => false,534 }535 }536 fn cast(syntax: SyntaxNode) -> Option<Self> {537 Self::can_cast(syntax.kind()).then(|| #name { syntax })538 }539 fn syntax(&self) -> &SyntaxNode {540 &self.syntax541 }542 }543 },544 )545 })546 .unzip();547548 let enum_names = grammar.enums.iter().map(|it| &it.name);549 let node_names = grammar.nodes.iter().map(|it| &it.name);550551 let display_impls = enum_names552 .chain(node_names.clone())553 .map(|it| format_ident!("{}", it))554 .map(|name| {555 quote! {556 impl std::fmt::Display for #name {557 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {558 std::fmt::Display::fmt(self.syntax(), f)559 }560 }561 }562 });563564 let ast = quote! {565 #![allow(non_snake_case, clippy::match_like_matches_macro)]566567 use crate::{568 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},569 ast::{AstNode, AstToken, AstChildren, support},570 T,571 };572573 #(#node_defs)*574 #(#enum_defs)*575 #(#token_enum_defs)*576 #(#any_node_defs)*577 #(#node_boilerplate_impls)*578 #(#enum_boilerplate_impls)*579 #(#token_enum_boilerplate_impls)*580 #(#any_node_boilerplate_impls)*581 #(#display_impls)*582 };583584 let ast = ast.to_string().replace("T ! [", "T![");585586 let mut res = String::with_capacity(ast.len() * 2);587588 let mut docs = grammar589 .nodes590 .iter()591 .map(|it| &it.doc)592 .chain(grammar.enums.iter().map(|it| &it.doc));593594 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {595 res.push_str(chunk);596 if let Some(doc) = docs.next() {597 write_doc_comment(doc, &mut res);598 }599 }600601 let res = reformat(&res)?;602 Ok(res.replace("#[derive", "\n#[derive"))603}604605fn write_doc_comment(contents: &[String], dest: &mut String) {606 use std::fmt::Write;607 for line in contents {608 writeln!(dest, "///{line}").unwrap();609 }610}611612pub fn escape_token_macro(token: &str) -> TokenStream {613 if "{}[]()$".contains(token) {614 let c = token.chars().next().unwrap();615 quote! { #c }616 } else if token.contains('$') {617 quote! { #token }618 } else if token.chars().all(|v: char| v.is_ascii_lowercase()) {619 let i = Ident::new(token, Span::call_site());620 quote! { #i }621 } else {622 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));623 quote! { #(#cs)* }624 }625}