1use std::path::PathBuf;23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Punct, Spacing, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17 Literal,18 Meta,19 Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22 let name = name.strip_suffix('!')?;23 Some(if let Some(name) = name.strip_prefix("LIT_") {24 (SpecialName::Literal, name)25 } else if let Some(name) = name.strip_prefix("META_") {26 (SpecialName::Meta, name)27 } else if let Some(name) = name.strip_prefix("ERROR_") {28 (SpecialName::Error, name)29 } else {30 return None;31 })32}3334pub fn generate_ungrammar() -> Result<()> {35 let grammar: Grammar = include_str!(concat!(36 env!("CARGO_MANIFEST_DIR"),37 "/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38 ))39 .parse()?;4041 let mut kinds = kinds::jsonnet_kinds();42 let ast = lower(&kinds, &grammar);4344 for token in grammar.tokens() {45 let token = &grammar[token];46 let token = &token.name.clone();47 if !kinds.is_token(token) {48 if let Some((special, name)) = classify_special(token) {49 match special {50 SpecialName::Literal => panic!("literal is not defined: {name}"),51 SpecialName::Meta => {52 eprintln!("implicit meta: {}", name);53 kinds.define_token(TokenKind::Meta {54 grammar_name: token.to_owned(),55 name: format!("META_{}", name),56 })57 }58 SpecialName::Error => {59 eprintln!("implicit error: {}", name);60 kinds.define_token(TokenKind::Error {61 grammar_name: token.to_owned(),62 name: format!("ERROR_{}", name),63 regex: None,64 priority: None,65 is_lexer_error: true,66 })67 }68 };69 continue;70 };71 let name = to_upper_snake_case(token);72 eprintln!("implicit kw: {}", token);73 kinds.define_token(TokenKind::Keyword {74 code: token.to_owned(),75 name: format!("{name}_KW"),76 });77 }78 }79 for node in &ast.nodes {80 let name = to_upper_snake_case(&node.name);81 kinds.define_node(&name);82 }83 for enum_ in &ast.enums {84 let name = to_upper_snake_case(&enum_.name);85 kinds.define_node(&name);86 }87 for token_enum in &ast.token_enums {88 let name = to_upper_snake_case(&token_enum.name);89 kinds.define_node(&name);90 }9192 let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;9394 let nodes = generate_nodes(&kinds, &ast)?;95 ensure_file_contents(96 &PathBuf::from(concat!(97 env!("CARGO_MANIFEST_DIR"),98 "/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",99 )),100 &syntax_kinds,101 )?;102 ensure_file_contents(103 &PathBuf::from(concat!(104 env!("CARGO_MANIFEST_DIR"),105 "/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",106 )),107 &nodes,108 )?;109 Ok(())110}111112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {113 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114 let token_kinds = kinds.tokens().map(TokenKind::expand_kind);115116 let keywords = kinds117 .tokens()118 .filter(|k| matches!(k, TokenKind::Keyword { .. }))119 .map(TokenKind::name)120 .map(|n| format_ident!("{n}"));121122 let nodes = kinds123 .nodes124 .iter()125 .map(|name| format_ident!("{}", name))126 .collect::<Vec<_>>();127128 let enums = grammar129 .enums130 .iter()131 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))132 .chain(133 grammar134 .token_enums135 .iter()136 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137 );138139 let ast = quote! {140 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141 use logos::Logos;142143 144 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]145 #[repr(u16)]146 pub enum SyntaxKind {147 #[doc(hidden)]148 TOMBSTONE,149 #[doc(hidden)]150 EOF,151 #(#token_kinds,)*152 153 #[error]154 LEXING_ERROR,155 #(#nodes,)*156 #[doc(hidden)]157 __LAST,158 }159 use self::SyntaxKind::*;160161 impl SyntaxKind {162 pub fn is_keyword(self) -> bool {163 match self {164 #(#keywords)|* => true,165 _ => false,166 }167 }168 pub fn is_enum(self) -> bool {169 match self {170 #(#enums)|* => true,171 _ => false,172 }173 }174175 pub fn from_raw(r: u16) -> Self {176 assert!(r < Self::__LAST as u16);177 unsafe { std::mem::transmute(r) }178 }179 pub fn into_raw(self) -> u16 {180 self as u16181 }182 }183184 #[macro_export]185 macro_rules! T {#(#t_macros);*}186 pub use T;187 };188189 reformat(&ast.to_string())190}191192fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {193 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar194 .nodes195 .iter()196 .map(|node| {197 let name = format_ident!("{}", node.name);198 let kind = format_ident!("{}", to_upper_snake_case(&node.name));199 let traits = node.traits.iter().map(|trait_name| {200 let trait_name = format_ident!("{}", trait_name);201 quote!(impl ast::#trait_name for #name {})202 });203204 let methods = node.fields.iter().map(|field| {205 let method_name = field.method_name(kinds);206 let ty = field.ty();207208 if field.is_many() {209 quote! {210 pub fn #method_name(&self) -> AstChildren<#ty> {211 support::children(&self.syntax)212 }213 }214 } else if let Some(token_kind) = field.token_kind(kinds) {215 quote! {216 pub fn #method_name(&self) -> Option<#ty> {217 support::token(&self.syntax, #token_kind)218 }219 }220 } else if field.is_token_enum(grammar) {221 quote! {222 pub fn #method_name(&self) -> Option<#ty> {223 support::token_child(&self.syntax)224 }225 }226 } else {227 quote! {228 pub fn #method_name(&self) -> Option<#ty> {229 support::child(&self.syntax)230 }231 }232 }233 });234 (235 quote! {236 #[pretty_doc_comment_placeholder_workaround]237 #[derive(Debug, Clone, PartialEq, Eq, Hash)]238 pub struct #name {239 pub(crate) syntax: SyntaxNode,240 }241242 #(#traits)*243244 impl #name {245 #(#methods)*246 }247 },248 quote! {249 impl AstNode for #name {250 fn can_cast(kind: SyntaxKind) -> bool {251 kind == #kind252 }253 fn cast(syntax: SyntaxNode) -> Option<Self> {254 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }255 }256 fn syntax(&self) -> &SyntaxNode { &self.syntax }257 }258 },259 )260 })261 .unzip();262263 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar264 .enums265 .iter()266 .map(|en| {267 let variants: Vec<_> = en268 .variants269 .iter()270 .map(|var| format_ident!("{}", var))271 .collect();272 let name = format_ident!("{}", en.name);273 let kinds: Vec<_> = variants274 .iter()275 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))276 .collect();277 let traits = en.traits.iter().map(|trait_name| {278 let trait_name = format_ident!("{}", trait_name);279 quote!(impl ast::#trait_name for #name {})280 });281282 let ast_node = quote! {283 impl AstNode for #name {284 fn can_cast(kind: SyntaxKind) -> bool {285 match kind {286 #(#kinds)|* => true,287 _ => false,288 }289 }290 fn cast(syntax: SyntaxNode) -> Option<Self> {291 let res = match syntax.kind() {292 #(293 #kinds => #name::#variants(#variants { syntax }),294 )*295 _ => return None,296 };297 Some(res)298 }299 fn syntax(&self) -> &SyntaxNode {300 match self {301 #(302 #name::#variants(it) => &it.syntax,303 )*304 }305 }306 }307 };308309 (310 quote! {311 #[pretty_doc_comment_placeholder_workaround]312 #[derive(Debug, Clone, PartialEq, Eq, Hash)]313 pub enum #name {314 #(#variants(#variants),)*315 }316317 #(#traits)*318 },319 quote! {320 #(321 impl From<#variants> for #name {322 fn from(node: #variants) -> #name {323 #name::#variants(node)324 }325 }326 )*327 #ast_node328 },329 )330 })331 .unzip();332333 let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar334 .token_enums335 .iter()336 .map(|en| {337 let variants: Vec<_> = en338 .variants339 .iter()340 .map(|token| {341 format_ident!(342 "{}",343 to_pascal_case(kinds.token(token).expect("token exists").name())344 )345 })346 .collect();347 let name = format_ident!("{}", en.name);348 let kind_name = format_ident!("{}Kind", en.name);349 let kinds: Vec<_> = variants350 .iter()351 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))352 .collect();353354 let ast_node = quote! {355 impl AstToken for #name {356 fn can_cast(kind: SyntaxKind) -> bool {357 #kind_name::can_cast(kind)358 }359 fn cast(syntax: SyntaxToken) -> Option<Self> {360 let kind = #kind_name::cast(syntax.kind())?;361 Some(#name { syntax, kind })362 }363 fn syntax(&self) -> &SyntaxToken {364 &self.syntax365 }366 }367368 impl #kind_name {369 fn can_cast(kind: SyntaxKind) -> bool {370 match kind {371 #(#kinds)|* => true,372 _ => false,373 }374 }375 pub fn cast(kind: SyntaxKind) -> Option<Self> {376 let res = match kind {377 #(#kinds => Self::#variants,)*378 _ => return None,379 };380 Some(res)381 }382 }383 };384385 (386 quote! {387 #[pretty_doc_comment_placeholder_workaround]388 #[derive(Debug, Clone, PartialEq, Eq, Hash)]389 pub struct #name { syntax: SyntaxToken, kind: #kind_name }390391 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]392 pub enum #kind_name {393 #(#variants,)*394 }395 },396 quote! {397 #ast_node398399 impl #name {400 pub fn kind(&self) -> #kind_name {401 self.kind402 }403 }404405 impl std::fmt::Display for #name {406 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {407 std::fmt::Display::fmt(self.syntax(), f)408 }409 }410 },411 )412 })413 .unzip();414415 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar416 .nodes417 .iter()418 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))419 .into_group_map()420 .into_iter()421 .sorted_by_key(|(k, _)| *k)422 .map(|(trait_name, nodes)| {423 let name = format_ident!("Any{}", trait_name);424 let trait_name = format_ident!("{}", trait_name);425 let kinds: Vec<_> = nodes426 .iter()427 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))428 .collect();429430 (431 quote! {432 #[pretty_doc_comment_placeholder_workaround]433 #[derive(Debug, Clone, PartialEq, Eq, Hash)]434 pub struct #name {435 pub(crate) syntax: SyntaxNode,436 }437 impl ast::#trait_name for #name {}438 },439 quote! {440 impl #name {441 #[inline]442 pub fn new<T: ast::#trait_name>(node: T) -> #name {443 #name {444 syntax: node.syntax().clone()445 }446 }447 }448 impl AstNode for #name {449 fn can_cast(kind: SyntaxKind) -> bool {450 match kind {451 #(#kinds)|* => true,452 _ => false,453 }454 }455 fn cast(syntax: SyntaxNode) -> Option<Self> {456 Self::can_cast(syntax.kind()).then(|| #name { syntax })457 }458 fn syntax(&self) -> &SyntaxNode {459 &self.syntax460 }461 }462 },463 )464 })465 .unzip();466467 let enum_names = grammar.enums.iter().map(|it| &it.name);468 let node_names = grammar.nodes.iter().map(|it| &it.name);469470 let display_impls = enum_names471 .chain(node_names.clone())472 .map(|it| format_ident!("{}", it))473 .map(|name| {474 quote! {475 impl std::fmt::Display for #name {476 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {477 std::fmt::Display::fmt(self.syntax(), f)478 }479 }480 }481 });482483 let ast = quote! {484 #![allow(non_snake_case, clippy::match_like_matches_macro)]485486 use crate::{487 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},488 ast::{AstNode, AstToken, AstChildren, support},489 T,490 };491492 #(#node_defs)*493 #(#enum_defs)*494 #(#token_enum_defs)*495 #(#any_node_defs)*496 #(#node_boilerplate_impls)*497 #(#enum_boilerplate_impls)*498 #(#token_enum_boilerplate_impls)*499 #(#any_node_boilerplate_impls)*500 #(#display_impls)*501 };502503 let ast = ast.to_string().replace("T ! [", "T![");504505 let mut res = String::with_capacity(ast.len() * 2);506507 let mut docs = grammar508 .nodes509 .iter()510 .map(|it| &it.doc)511 .chain(grammar.enums.iter().map(|it| &it.doc));512513 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {514 res.push_str(chunk);515 if let Some(doc) = docs.next() {516 write_doc_comment(doc, &mut res);517 }518 }519520 let res = reformat(&res)?;521 Ok(res.replace("#[derive", "\n#[derive"))522}523524fn write_doc_comment(contents: &[String], dest: &mut String) {525 use std::fmt::Write;526 for line in contents {527 writeln!(dest, "///{}", line).unwrap();528 }529}530531pub fn escape_token_macro(token: &str) -> TokenStream {532 if "{}[]()$".contains(token) {533 let c = token.chars().next().unwrap();534 quote! { #c }535 } else if token.contains('$') {536 quote! { #token }537 } else {538 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));539 quote! { #(#cs)* }540 }541}