1use std::{collections::HashMap, path::PathBuf};23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Ident, Punct, Spacing, Span, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17 Literal,18 Meta,19 Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22 let name = name.strip_suffix('!')?;23 Some(if let Some(name) = name.strip_prefix("LIT_") {24 (SpecialName::Literal, name)25 } else if let Some(name) = name.strip_prefix("META_") {26 (SpecialName::Meta, name)27 } else if let Some(name) = name.strip_prefix("ERROR_") {28 (SpecialName::Error, name)29 } else {30 return None;31 })32}3334pub fn generate_ungrammar() -> Result<()> {35 let grammar: Grammar = include_str!(concat!(36 env!("CARGO_MANIFEST_DIR"),37 "/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38 ))39 .parse()?;4041 let mut kinds = kinds::jsonnet_kinds();42 let ast = lower(&kinds, &grammar);4344 for token in grammar.tokens() {45 let token = &grammar[token];46 let token = &token.name.clone();47 if !kinds.is_token(token) {48 if let Some((special, name)) = classify_special(token) {49 match special {50 SpecialName::Literal => panic!("literal is not defined: {name}"),51 SpecialName::Meta => {52 eprintln!("implicit meta: {name}");53 kinds.define_token(TokenKind::Meta {54 grammar_name: token.to_owned(),55 name: format!("META_{name}"),56 });57 }58 SpecialName::Error => {59 eprintln!("implicit error: {name}");60 kinds.define_token(TokenKind::Error {61 grammar_name: token.to_owned(),62 name: format!("ERROR_{name}"),63 regex: None,64 priority: None,65 is_lexer_error: true,66 });67 }68 }69 continue;70 }71 let name = to_upper_snake_case(token);72 eprintln!("implicit kw: {token}");73 kinds.define_token(TokenKind::Keyword {74 code: token.to_owned(),75 name: format!("{name}_KW"),76 });77 }78 }79 for node in &ast.nodes {80 let name = to_upper_snake_case(&node.name);81 kinds.define_node(&name);82 }83 for enum_ in &ast.enums {84 let name = to_upper_snake_case(&enum_.name);85 kinds.define_node(&name);86 }87 for token_enum in &ast.token_enums {88 let name = to_upper_snake_case(&token_enum.name);89 kinds.define_node(&name);90 }9192 let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;9394 let nodes = generate_nodes(&kinds, &ast)?;95 ensure_file_contents(96 &PathBuf::from(concat!(97 env!("CARGO_MANIFEST_DIR"),98 "/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",99 )),100 &syntax_kinds,101 );102 ensure_file_contents(103 &PathBuf::from(concat!(104 env!("CARGO_MANIFEST_DIR"),105 "/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",106 )),107 &nodes,108 );109 Ok(())110}111112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {113 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114 let token_kinds = kinds.tokens().map(TokenKind::expand_kind);115116 let keywords = kinds117 .tokens()118 .filter(|k| matches!(k, TokenKind::Keyword { .. }))119 .map(TokenKind::name)120 .map(|n| format_ident!("{n}"));121122 let nodes = kinds123 .nodes124 .iter()125 .map(|name| format_ident!("{}", name))126 .collect::<Vec<_>>();127128 let enums = grammar129 .enums130 .iter()131 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))132 .chain(133 grammar134 .token_enums135 .iter()136 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137 );138139 let ast = quote! {140 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141 use logos::Logos;142143 144 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]145 #[repr(u16)]146 pub enum SyntaxKind {147 #[doc(hidden)]148 TOMBSTONE,149 #[doc(hidden)]150 EOF,151 #(#token_kinds,)*152 LEXING_ERROR,153 __LAST_TOKEN,154 #(#nodes,)*155 #[doc(hidden)]156 __LAST,157 }158 use self::SyntaxKind::*;159160 impl SyntaxKind {161 pub fn is_keyword(self) -> bool {162 match self {163 #(#keywords)|* => true,164 _ => false,165 }166 }167 pub fn is_enum(self) -> bool {168 match self {169 #(#enums)|* => true,170 _ => false,171 }172 }173174 pub fn from_raw(r: u16) -> Self {175 assert!(r < Self::__LAST as u16);176 unsafe { std::mem::transmute(r) }177 }178 pub fn into_raw(self) -> u16 {179 self as u16180 }181 }182183 #[macro_export]184 macro_rules! T {#(#t_macros);*}185 #[allow(unused_imports)]186 pub use T;187 };188189 reformat(&ast.to_string())190}191192#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]193fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {194 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar195 .nodes196 .iter()197 .map(|node| {198 let name = format_ident!("{}", node.name);199 let kind = format_ident!("{}", to_upper_snake_case(&node.name));200 let traits = node.traits.iter().map(|trait_name| {201 let trait_name = format_ident!("{}", trait_name);202 quote!(impl ast::#trait_name for #name {})203 });204205 let mut type_positions: HashMap<String, usize> = HashMap::new();206 let field_positions: Vec<_> = node207 .fields208 .iter()209 .map(|field| {210 let ty_str = field.ty().to_string();211 let pos = *type_positions.get(&ty_str).unwrap_or(&0);212 type_positions.insert(ty_str, pos + 1);213 pos214 })215 .collect();216217 let methods = node218 .fields219 .iter()220 .zip(field_positions.iter())221 .map(|(field, &pos)| {222 let method_name = field.method_name(kinds);223 let ty = field.ty();224225 if field.is_many() {226 quote! {227 pub fn #method_name(&self) -> AstChildren<#ty> {228 support::children(&self.syntax)229 }230 }231 } else if let Some(token_kind) = field.token_kind(kinds) {232 quote! {233 pub fn #method_name(&self) -> Option<#ty> {234 support::token(&self.syntax, #token_kind)235 }236 }237 } else if field.is_token_enum(grammar) {238 quote! {239 pub fn #method_name(&self) -> Option<#ty> {240 support::token_child(&self.syntax)241 }242 }243 } else if pos == 0 {244 quote! {245 pub fn #method_name(&self) -> Option<#ty> {246 support::children(&self.syntax).next()247 }248 }249 } else {250 quote! {251 pub fn #method_name(&self) -> Option<#ty> {252 support::children(&self.syntax).nth(#pos)253 }254 }255 }256 });257 (258 quote! {259 #[pretty_doc_comment_placeholder_workaround]260 #[derive(Debug, Clone, PartialEq, Eq, Hash)]261 pub struct #name {262 pub(crate) syntax: SyntaxNode,263 }264265 #(#traits)*266267 impl #name {268 #(#methods)*269 }270 },271 quote! {272 impl AstNode for #name {273 fn can_cast(kind: SyntaxKind) -> bool {274 kind == #kind275 }276 fn cast(syntax: SyntaxNode) -> Option<Self> {277 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }278 }279 fn syntax(&self) -> &SyntaxNode { &self.syntax }280 }281 },282 )283 })284 .unzip();285286 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar287 .enums288 .iter()289 .map(|en| {290 let variants: Vec<_> = en291 .variants292 .iter()293 .map(|var| format_ident!("{}", var))294 .collect();295 let name = format_ident!("{}", en.name);296 let kinds: Vec<_> = variants297 .iter()298 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))299 .collect();300 let traits = en.traits.iter().map(|trait_name| {301 let trait_name = format_ident!("{}", trait_name);302 quote!(impl ast::#trait_name for #name {})303 });304305 let ast_node = quote! {306 impl AstNode for #name {307 fn can_cast(kind: SyntaxKind) -> bool {308 match kind {309 #(#kinds)|* => true,310 _ => false,311 }312 }313 fn cast(syntax: SyntaxNode) -> Option<Self> {314 let res = match syntax.kind() {315 #(316 #kinds => #name::#variants(#variants { syntax }),317 )*318 _ => return None,319 };320 Some(res)321 }322 fn syntax(&self) -> &SyntaxNode {323 match self {324 #(325 #name::#variants(it) => &it.syntax,326 )*327 }328 }329 }330 };331332 (333 quote! {334 #[pretty_doc_comment_placeholder_workaround]335 #[derive(Debug, Clone, PartialEq, Eq, Hash)]336 pub enum #name {337 #(#variants(#variants),)*338 }339340 #(#traits)*341 },342 quote! {343 #(344 impl From<#variants> for #name {345 fn from(node: #variants) -> #name {346 #name::#variants(node)347 }348 }349 )*350 #ast_node351 },352 )353 })354 .unzip();355356 let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar357 .token_enums358 .iter()359 .map(|en| {360 let variants: Vec<_> = en361 .variants362 .iter()363 .map(|token| {364 format_ident!(365 "{}",366 to_pascal_case(kinds.token(token).expect("token exists").name())367 )368 })369 .collect();370 let name = format_ident!("{}", en.name);371 let kind_name = format_ident!("{}Kind", en.name);372 let kinds: Vec<_> = variants373 .iter()374 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))375 .collect();376377 let ast_node = quote! {378 impl AstToken for #name {379 fn can_cast(kind: SyntaxKind) -> bool {380 #kind_name::can_cast(kind)381 }382 fn cast(syntax: SyntaxToken) -> Option<Self> {383 let kind = #kind_name::cast(syntax.kind())?;384 Some(#name { syntax, kind })385 }386 fn syntax(&self) -> &SyntaxToken {387 &self.syntax388 }389 }390391 impl #kind_name {392 fn can_cast(kind: SyntaxKind) -> bool {393 match kind {394 #(#kinds)|* => true,395 _ => false,396 }397 }398 pub fn cast(kind: SyntaxKind) -> Option<Self> {399 let res = match kind {400 #(#kinds => Self::#variants,)*401 _ => return None,402 };403 Some(res)404 }405 }406 };407408 (409 quote! {410 #[pretty_doc_comment_placeholder_workaround]411 #[derive(Debug, Clone, PartialEq, Eq, Hash)]412 pub struct #name { syntax: SyntaxToken, kind: #kind_name }413414 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]415 pub enum #kind_name {416 #(#variants,)*417 }418 },419 quote! {420 #ast_node421422 impl #name {423 pub fn kind(&self) -> #kind_name {424 self.kind425 }426 }427428 impl std::fmt::Display for #name {429 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {430 std::fmt::Display::fmt(self.syntax(), f)431 }432 }433 },434 )435 })436 .unzip();437438 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar439 .nodes440 .iter()441 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))442 .into_group_map()443 .into_iter()444 .sorted_by_key(|(k, _)| *k)445 .map(|(trait_name, nodes)| {446 let name = format_ident!("Any{}", trait_name);447 let trait_name = format_ident!("{}", trait_name);448 let kinds: Vec<_> = nodes449 .iter()450 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name)))451 .collect();452453 (454 quote! {455 #[pretty_doc_comment_placeholder_workaround]456 #[derive(Debug, Clone, PartialEq, Eq, Hash)]457 pub struct #name {458 pub(crate) syntax: SyntaxNode,459 }460 impl ast::#trait_name for #name {}461 },462 quote! {463 impl #name {464 #[inline]465 pub fn new<T: ast::#trait_name>(node: T) -> #name {466 #name {467 syntax: node.syntax().clone()468 }469 }470 }471 impl AstNode for #name {472 fn can_cast(kind: SyntaxKind) -> bool {473 match kind {474 #(#kinds)|* => true,475 _ => false,476 }477 }478 fn cast(syntax: SyntaxNode) -> Option<Self> {479 Self::can_cast(syntax.kind()).then(|| #name { syntax })480 }481 fn syntax(&self) -> &SyntaxNode {482 &self.syntax483 }484 }485 },486 )487 })488 .unzip();489490 let enum_names = grammar.enums.iter().map(|it| &it.name);491 let node_names = grammar.nodes.iter().map(|it| &it.name);492493 let display_impls = enum_names494 .chain(node_names.clone())495 .map(|it| format_ident!("{}", it))496 .map(|name| {497 quote! {498 impl std::fmt::Display for #name {499 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {500 std::fmt::Display::fmt(self.syntax(), f)501 }502 }503 }504 });505506 let ast = quote! {507 #![allow(non_snake_case, clippy::match_like_matches_macro)]508509 use crate::{510 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},511 ast::{AstNode, AstToken, AstChildren, support},512 T,513 };514515 #(#node_defs)*516 #(#enum_defs)*517 #(#token_enum_defs)*518 #(#any_node_defs)*519 #(#node_boilerplate_impls)*520 #(#enum_boilerplate_impls)*521 #(#token_enum_boilerplate_impls)*522 #(#any_node_boilerplate_impls)*523 #(#display_impls)*524 };525526 let ast = ast.to_string().replace("T ! [", "T![");527528 let mut res = String::with_capacity(ast.len() * 2);529530 let mut docs = grammar531 .nodes532 .iter()533 .map(|it| &it.doc)534 .chain(grammar.enums.iter().map(|it| &it.doc));535536 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {537 res.push_str(chunk);538 if let Some(doc) = docs.next() {539 write_doc_comment(doc, &mut res);540 }541 }542543 let res = reformat(&res)?;544 Ok(res.replace("#[derive", "\n#[derive"))545}546547fn write_doc_comment(contents: &[String], dest: &mut String) {548 use std::fmt::Write;549 for line in contents {550 writeln!(dest, "///{line}").unwrap();551 }552}553554pub fn escape_token_macro(token: &str) -> TokenStream {555 if "{}[]()$".contains(token) {556 let c = token.chars().next().unwrap();557 quote! { #c }558 } else if token.contains('$') {559 quote! { #token }560 } else if token.chars().all(|v: char| v.is_ascii_lowercase()) {561 let i = Ident::new(token, Span::call_site());562 quote! { #i }563 } else {564 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));565 quote! { #(#cs)* }566 }567}