1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{AstSrc, lower};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17 Literal,18 Meta,19 Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22 let name = name.strip_suffix('!')?;23 Some(if let Some(name) = name.strip_prefix("LIT_") {24 (SpecialName::Literal, name)25 } else if let Some(name) = name.strip_prefix("META_") {26 (SpecialName::Meta, name)27 } else if let Some(name) = name.strip_prefix("ERROR_") {28 (SpecialName::Error, name)29 } else {30 return None;31 })32}3334pub fn generate_ungrammar() -> Result<()> {35 let grammar: Grammar = include_str!(concat!(36 env!("CARGO_MANIFEST_DIR"),37 "/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38 ))39 .parse()?;4041 let mut kinds = kinds::jsonnet_kinds();42 let ast = lower(&kinds, &grammar);4344 for token in grammar.tokens() {45 let token = &grammar[token];46 let token = &token.name.clone();47 if !kinds.is_token(token) {48 if let Some((special, name)) = classify_special(token) {49 match special {50 SpecialName::Literal => panic!("literal is not defined: {name}"),51 SpecialName::Meta => {52 eprintln!("implicit meta: {name}");53 kinds.define_token(TokenKind::Meta {54 grammar_name: token.to_owned(),55 name: format!("META_{name}"),56 });57 }58 SpecialName::Error => {59 panic!(60 "error token ERROR_{name} must be explicitly defined in jsonnet_kinds()"61 );62 }63 }64 continue;65 }66 let name = to_upper_snake_case(token);67 eprintln!("implicit kw: {token}");68 kinds.define_token(TokenKind::Keyword {69 code: token.to_owned(),70 name: format!("{name}_KW"),71 });72 }73 }74 for node in &ast.nodes {75 let name = to_upper_snake_case(&node.name);76 kinds.define_node(&name);77 }78 for enum_ in &ast.enums {79 let name = to_upper_snake_case(&enum_.name);80 kinds.define_node(&name);81 }82 for token_enum in &ast.token_enums {83 let name = to_upper_snake_case(&token_enum.name);84 kinds.define_node(&name);85 }8687 let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;8889 let nodes = generate_nodes(&kinds, &ast)?;90 ensure_file_contents(91 &PathBuf::from(concat!(92 env!("CARGO_MANIFEST_DIR"),93 "/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",94 )),95 &syntax_kinds,96 );97 ensure_file_contents(98 &PathBuf::from(concat!(99 env!("CARGO_MANIFEST_DIR"),100 "/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",101 )),102 &nodes,103 );104105 let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;106 ensure_file_contents(107 &PathBuf::from(concat!(108 env!("CARGO_MANIFEST_DIR"),109 "/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",110 )),111 &lexer_syntax_kinds,112 );113 Ok(())114}115116#[allow(clippy::too_many_lines)]117fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {118 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);119 let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));120121 let keywords = kinds122 .tokens()123 .filter(|k| matches!(k, TokenKind::Keyword { .. }))124 .map(TokenKind::name)125 .map(|n| format_ident!("{n}"));126127 let mut nodes = kinds128 .nodes129 .iter()130 .map(|name| format_ident!("{}", name))131 .collect::<Vec<_>>();132133 if lexer {134 nodes.clear();135 }136137 let enums = grammar138 .enums139 .iter()140 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))141 .chain(142 grammar143 .token_enums144 .iter()145 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),146 )147 .collect::<Vec<_>>();148 let is_enum = if lexer {149 quote! {}150 } else {151 quote! {152 pub fn is_enum(self) -> bool {153 match self {154 #(#enums)|* => true,155 _ => false,156 }157 }158 }159 };160161 let derive_logos = if lexer {162 quote! {163 , logos::Logos164 }165 } else {166 quote! {}167 };168169 let error_desc_arms = kinds.tokens().filter_map(|t| {170 if let TokenKind::Error {171 name, description, ..172 } = t173 {174 let ident = format_ident!("{name}");175 Some(quote! { #ident => ::core::option::Option::Some(#description) })176 } else {177 None178 }179 });180181 let display_name_arms = kinds.tokens().map(|t| {182 let ident = format_ident!("{}", t.name());183 let display = t.display_name();184 quote! { #ident => #display }185 });186187 let ast = quote! {188 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]189190 191 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]192 #[repr(u16)]193 pub enum SyntaxKind {194 #[doc(hidden)]195 TOMBSTONE,196 #[doc(hidden)]197 EOF,198 #(#token_kinds,)*199 LEXING_ERROR,200 __LAST_TOKEN,201 #(#nodes,)*202 #[doc(hidden)]203 __LAST,204 }205 use self::SyntaxKind::*;206207 impl SyntaxKind {208 pub fn is_keyword(self) -> bool {209 match self {210 #(#keywords)|* => true,211 _ => false,212 }213 }214215 #is_enum216217 pub fn error_description(self) -> Option<&'static str> {218 match self {219 #(#error_desc_arms,)*220 LEXING_ERROR => ::core::option::Option::Some("unexpected character"),221 _ => None,222 }223 }224225 pub fn display_name(self) -> &'static str {226 match self {227 #(#display_name_arms,)*228 LEXING_ERROR => "unexpected character",229 _ => "unknown",230 }231 }232233 pub fn from_raw(r: u16) -> Self {234 assert!(r < Self::__LAST as u16);235 unsafe { std::mem::transmute(r) }236 }237 pub fn into_raw(self) -> u16 {238 self as u16239 }240 }241242 #[macro_export]243 macro_rules! T {#(#t_macros);*}244 #[allow(unused_imports)]245 pub use T;246 };247248 reformat(&ast.to_string())249}250251#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]252fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {253 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar254 .nodes255 .iter()256 .map(|node| {257 let name = format_ident!("{}", node.name);258 let kind = format_ident!("{}", to_upper_snake_case(&node.name));259 let traits = node.traits.iter().map(|trait_name| {260 let trait_name = format_ident!("{}", trait_name);261 quote!(impl ast::#trait_name for #name {})262 });263264 let mut type_positions: HashMap<String, usize> = HashMap::new();265 let field_positions: Vec<_> = node266 .fields267 .iter()268 .map(|field| {269 let ty_str = field.ty().to_string();270 let pos = *type_positions.get(&ty_str).unwrap_or(&0);271 type_positions.insert(ty_str, pos + 1);272 pos273 })274 .collect();275276 let methods = node277 .fields278 .iter()279 .zip(field_positions.iter())280 .map(|(field, &pos)| {281 let method_name = field.method_name(kinds);282 let ty = field.ty();283284 if field.is_many() {285 quote! {286 pub fn #method_name(&self) -> AstChildren<#ty> {287 support::children(&self.syntax)288 }289 }290 } else if let Some(token_kind) = field.token_kind(kinds) {291 quote! {292 pub fn #method_name(&self) -> Option<#ty> {293 support::token(&self.syntax, #token_kind)294 }295 }296 } else if field.is_token_enum(grammar) {297 quote! {298 pub fn #method_name(&self) -> Option<#ty> {299 support::token_child(&self.syntax)300 }301 }302 } else if pos == 0 {303 quote! {304 pub fn #method_name(&self) -> Option<#ty> {305 support::children(&self.syntax).next()306 }307 }308 } else {309 quote! {310 pub fn #method_name(&self) -> Option<#ty> {311 support::children(&self.syntax).nth(#pos)312 }313 }314 }315 });316 (317 quote! {318 #[pretty_doc_comment_placeholder_workaround]319 #[derive(Debug, Clone, PartialEq, Eq, Hash)]320 pub struct #name {321 pub(crate) syntax: SyntaxNode,322 }323324 #(#traits)*325326 impl #name {327 #(#methods)*328 }329 },330 quote! {331 impl AstNode for #name {332 fn can_cast(kind: SyntaxKind) -> bool {333 kind == #kind334 }335 fn cast(syntax: SyntaxNode) -> Option<Self> {336 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }337 }338 fn syntax(&self) -> &SyntaxNode { &self.syntax }339 }340 },341 )342 })343 .unzip();344345 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar346 .enums347 .iter()348 .map(|en| {349 let variants: Vec<_> = en350 .variants351 .iter()352 .map(|var| format_ident!("{}", var))353 .collect();354 let name = format_ident!("{}", en.name);355 let kinds: Vec<_> = variants356 .iter()357 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))358 .collect();359 let traits = en.traits.iter().map(|trait_name| {360 let trait_name = format_ident!("{}", trait_name);361 quote!(impl ast::#trait_name for #name {})362 });363364 let ast_node = quote! {365 impl AstNode for #name {366 fn can_cast(kind: SyntaxKind) -> bool {367 match kind {368 #(#kinds)|* => true,369 _ => false,370 }371 }372 fn cast(syntax: SyntaxNode) -> Option<Self> {373 let res = match syntax.kind() {374 #(375 #kinds => #name::#variants(#variants { syntax }),376 )*377 _ => return None,378 };379 Some(res)380 }381 fn syntax(&self) -> &SyntaxNode {382 match self {383 #(384 #name::#variants(it) => &it.syntax,385 )*386 }387 }388 }389 };390391 (392 quote! {393 #[pretty_doc_comment_placeholder_workaround]394 #[derive(Debug, Clone, PartialEq, Eq, Hash)]395 pub enum #name {396 #(#variants(#variants),)*397 }398399 #(#traits)*400 },401 quote! {402 #(403 impl From<#variants> for #name {404 fn from(node: #variants) -> #name {405 #name::#variants(node)406 }407 }408 )*409 #ast_node410 },411 )412 })413 .unzip();414415 let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar416 .token_enums417 .iter()418 .map(|en| {419 let variants: Vec<_> = en420 .variants421 .iter()422 .map(|token| {423 format_ident!(424 "{}",425 to_pascal_case(kinds.token(token).expect("token exists").name())426 )427 })428 .collect();429 let name = format_ident!("{}", en.name);430 let kind_name = format_ident!("{}Kind", en.name);431 let kinds: Vec<_> = variants432 .iter()433 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))434 .collect();435436 let ast_node = quote! {437 impl AstToken for #name {438 fn can_cast(kind: SyntaxKind) -> bool {439 #kind_name::can_cast(kind)440 }441 fn cast(syntax: SyntaxToken) -> Option<Self> {442 let kind = #kind_name::cast(syntax.kind())?;443 Some(#name { syntax, kind })444 }445 fn syntax(&self) -> &SyntaxToken {446 &self.syntax447 }448 }449450 impl #kind_name {451 fn can_cast(kind: SyntaxKind) -> bool {452 match kind {453 #(#kinds)|* => true,454 _ => false,455 }456 }457 pub fn cast(kind: SyntaxKind) -> Option<Self> {458 let res = match kind {459 #(#kinds => Self::#variants,)*460 _ => return None,461 };462 Some(res)463 }464 }465 };466467 (468 quote! {469 #[pretty_doc_comment_placeholder_workaround]470 #[derive(Debug, Clone, PartialEq, Eq, Hash)]471 pub struct #name { syntax: SyntaxToken, kind: #kind_name }472473 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]474 pub enum #kind_name {475 #(#variants,)*476 }477 },478 quote! {479 #ast_node480481 impl #name {482 pub fn kind(&self) -> #kind_name {483 self.kind484 }485 }486487 impl std::fmt::Display for #name {488 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {489 std::fmt::Display::fmt(self.syntax(), f)490 }491 }492 },493 )494 })495 .unzip();496497 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar498 .nodes499 .iter()500 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))501 .into_group_map()502 .into_iter()503 .sorted_by_key(|(k, _)| *k)504 .map(|(trait_name, nodes)| {505 let name = format_ident!("Any{}", trait_name);506 let trait_name = format_ident!("{}", trait_name);507 let kinds: Vec<_> = nodes508 .iter()509 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))510 .collect();511512 (513 quote! {514 #[pretty_doc_comment_placeholder_workaround]515 #[derive(Debug, Clone, PartialEq, Eq, Hash)]516 pub struct #name {517 pub(crate) syntax: SyntaxNode,518 }519 impl ast::#trait_name for #name {}520 },521 quote! {522 impl #name {523 #[inline]524 pub fn new<T: ast::#trait_name>(node: T) -> #name {525 #name {526 syntax: node.syntax().clone()527 }528 }529 }530 impl AstNode for #name {531 fn can_cast(kind: SyntaxKind) -> bool {532 match kind {533 #(#kinds)|* => true,534 _ => false,535 }536 }537 fn cast(syntax: SyntaxNode) -> Option<Self> {538 Self::can_cast(syntax.kind()).then(|| #name { syntax })539 }540 fn syntax(&self) -> &SyntaxNode {541 &self.syntax542 }543 }544 },545 )546 })547 .unzip();548549 let enum_names = grammar.enums.iter().map(|it| &it.name);550 let node_names = grammar.nodes.iter().map(|it| &it.name);551552 let display_impls = enum_names553 .chain(node_names.clone())554 .map(|it| format_ident!("{}", it))555 .map(|name| {556 quote! {557 impl std::fmt::Display for #name {558 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {559 std::fmt::Display::fmt(self.syntax(), f)560 }561 }562 }563 });564565 let ast = quote! {566 #![allow(non_snake_case, clippy::match_like_matches_macro)]567568 use crate::{569 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},570 ast::{AstNode, AstToken, AstChildren, support},571 T,572 };573574 #(#node_defs)*575 #(#enum_defs)*576 #(#token_enum_defs)*577 #(#any_node_defs)*578 #(#node_boilerplate_impls)*579 #(#enum_boilerplate_impls)*580 #(#token_enum_boilerplate_impls)*581 #(#any_node_boilerplate_impls)*582 #(#display_impls)*583 };584585 let ast = ast.to_string().replace("T ! [", "T![");586587 let mut res = String::with_capacity(ast.len() * 2);588589 let mut docs = grammar590 .nodes591 .iter()592 .map(|it| &it.doc)593 .chain(grammar.enums.iter().map(|it| &it.doc));594595 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {596 res.push_str(chunk);597 if let Some(doc) = docs.next() {598 write_doc_comment(doc, &mut res);599 }600 }601602 let res = reformat(&res)?;603 Ok(res.replace("#[derive", "\n#[derive"))604}605606fn write_doc_comment(contents: &[String], dest: &mut String) {607 use std::fmt::Write;608 for line in contents {609 writeln!(dest, "///{line}").unwrap();610 }611}612613pub fn escape_token_macro(token: &str) -> TokenStream {614 if "{}[]()$".contains(token) {615 let c = token.chars().next().unwrap();616 quote! { #c }617 } else if token.contains('$') {618 quote! { #token }619 } else if token.chars().all(|v: char| v.is_ascii_lowercase()) {620 let i = Ident::new(token, Span::call_site());621 quote! { #i }622 } else {623 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));624 quote! { #(#cs)* }625 }626}