git.delta.rocks / jrsonnet / refs/commits / 93f08ca017e4

difftreelog

refactor split lexer from rowan parser

tzxnlqzsYaroslav Bolyukin2026-03-22parent: #c6d0d24.patch.diff
in: master

15 files changed

modifiedCargo.lockdiffbeforeafterboth
715 "static_assertions",715 "static_assertions",
716]716]
717
718[[package]]
719name = "jrsonnet-lexer"
720version = "0.5.0-pre97"
721dependencies = [
722 "logos",
723]
717724
718[[package]]725[[package]]
719name = "jrsonnet-macros"726name = "jrsonnet-macros"
744 "hi-doc",751 "hi-doc",
745 "indoc",752 "indoc",
746 "insta",753 "insta",
747 "logos",754 "jrsonnet-lexer",
748 "rowan",755 "rowan",
749 "strip-ansi-escapes",756 "strip-ansi-escapes",
750 "thiserror",757 "thiserror",
modifiedCargo.tomldiffbeforeafterboth
81itertools = "0.14.0"81itertools = "0.14.0"
82xshell = "0.2.7"82xshell = "0.2.7"
83
84lsp-server = "0.7.9"
85lsp-types = "0.97.0"
8683
87regex = "1.12"84regex = "1.12"
88lru = "0.16.3"85lru = "0.16.3"
addedcrates/jrsonnet-lexer/Cargo.tomldiffbeforeafterboth

no changes

addedcrates/jrsonnet-lexer/src/generated/mod.rsdiffbeforeafterboth

no changes

addedcrates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterboth

no changes

addedcrates/jrsonnet-lexer/src/lex.rsdiffbeforeafterboth

no changes

addedcrates/jrsonnet-lexer/src/lib.rsdiffbeforeafterboth

no changes

addedcrates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterboth

no changes

modifiedcrates/jrsonnet-rowan-parser/Cargo.tomldiffbeforeafterboth
14drop_bomb.workspace = true14drop_bomb.workspace = true
15hi-doc.workspace = true15hi-doc.workspace = true
16indoc.workspace = true16indoc.workspace = true
17logos.workspace = true17jrsonnet-lexer = { version = "0.5.0-pre97", path = "../jrsonnet-lexer" }
18rowan.workspace = true18rowan.workspace = true
19thiserror.workspace = true19thiserror.workspace = true
2020
modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth
8 clippy::manual_non_exhaustive,8 clippy::manual_non_exhaustive,
9 clippy::match_like_matches_macro9 clippy::match_like_matches_macro
10)]10)]
11use logos::Logos;
12#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]11#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]
13#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]12#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
14#[repr(u16)]13#[repr(u16)]
15pub enum SyntaxKind {14pub enum SyntaxKind {
16 #[doc(hidden)]15 #[doc(hidden)]
17 TOMBSTONE,16 TOMBSTONE,
18 #[doc(hidden)]17 #[doc(hidden)]
19 EOF,18 EOF,
20 #[token("||")]
21 OR,19 OR,
22 #[token("??")]
23 NULL_COAELSE,20 NULL_COAELSE,
24 #[token("&&")]
25 AND,21 AND,
26 #[token("|")]
27 BIT_OR,22 BIT_OR,
28 #[token("^")]
29 BIT_XOR,23 BIT_XOR,
30 #[token("&")]
31 BIT_AND,24 BIT_AND,
32 #[token("==")]
33 EQ,25 EQ,
34 #[token("!=")]
35 NE,26 NE,
36 #[token("<")]
37 LT,27 LT,
38 #[token(">")]
39 GT,28 GT,
40 #[token("<=")]
41 LE,29 LE,
42 #[token(">=")]
43 GE,30 GE,
44 #[token("<<")]
45 LHS,31 LHS,
46 #[token(">>")]
47 RHS,32 RHS,
48 #[token("+")]
49 PLUS,33 PLUS,
50 #[token("-")]
51 MINUS,34 MINUS,
52 #[token("*")]
53 MUL,35 MUL,
54 #[token("/")]
55 DIV,36 DIV,
56 #[token("%")]
57 MODULO,37 MODULO,
58 #[token("!")]
59 NOT,38 NOT,
60 #[token("~")]
61 BIT_NOT,39 BIT_NOT,
62 #[token("[")]
63 L_BRACK,40 L_BRACK,
64 #[token("]")]
65 R_BRACK,41 R_BRACK,
66 #[token("(")]
67 L_PAREN,42 L_PAREN,
68 #[token(")")]
69 R_PAREN,43 R_PAREN,
70 #[token("{")]
71 L_BRACE,44 L_BRACE,
72 #[token("}")]
73 R_BRACE,45 R_BRACE,
74 #[token(":")]
75 COLON,46 COLON,
76 #[token("::")]
77 COLONCOLON,47 COLONCOLON,
78 #[token(":::")]
79 COLONCOLONCOLON,48 COLONCOLONCOLON,
80 #[token(";")]
81 SEMI,49 SEMI,
82 #[token(".")]
83 DOT,50 DOT,
84 #[token("...")]
85 DOTDOTDOT,51 DOTDOTDOT,
86 #[token(",")]
87 COMMA,52 COMMA,
88 #[token("$")]
89 DOLLAR,53 DOLLAR,
90 #[token("=")]
91 ASSIGN,54 ASSIGN,
92 #[token("?")]
93 QUESTION_MARK,55 QUESTION_MARK,
94 #[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?")]
95 FLOAT,56 FLOAT,
96 #[regex("(?:0|[1-9][0-9]*)\\.[^0-9]")]
97 ERROR_FLOAT_JUNK_AFTER_POINT,57 ERROR_FLOAT_JUNK_AFTER_POINT,
98 #[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][^+\\-0-9]")]
99 ERROR_FLOAT_JUNK_AFTER_EXPONENT,58 ERROR_FLOAT_JUNK_AFTER_EXPONENT,
100 #[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][+-][^0-9]")]
101 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,59 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,
102 #[regex("\"(?s:[^\"\\\\]|\\\\.)*\"")]
103 STRING_DOUBLE,60 STRING_DOUBLE,
104 #[regex("\"(?s:[^\"\\\\]|\\\\.)*")]
105 ERROR_STRING_DOUBLE_UNTERMINATED,61 ERROR_STRING_DOUBLE_UNTERMINATED,
106 #[regex("'(?s:[^'\\\\]|\\\\.)*'")]
107 STRING_SINGLE,62 STRING_SINGLE,
108 #[regex("'(?s:[^'\\\\]|\\\\.)*")]
109 ERROR_STRING_SINGLE_UNTERMINATED,63 ERROR_STRING_SINGLE_UNTERMINATED,
110 #[regex("@\"(?:[^\"]|\"\")*\"")]
111 STRING_DOUBLE_VERBATIM,64 STRING_DOUBLE_VERBATIM,
112 #[regex("@\"(?:[^\"]|\"\")*")]
113 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,65 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,
114 #[regex("@'(?:[^']|'')*'")]
115 STRING_SINGLE_VERBATIM,66 STRING_SINGLE_VERBATIM,
116 #[regex("@'(?:[^']|'')*")]
117 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,67 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,
118 #[regex("@[^\"'\\s]\\S+")]
119 ERROR_STRING_VERBATIM_MISSING_QUOTES,68 ERROR_STRING_VERBATIM_MISSING_QUOTES,
120 #[regex("\\|\\|\\|", crate::string_block::lex_str_block_test)]
121 STRING_BLOCK,69 STRING_BLOCK,
122 ERROR_STRING_BLOCK_UNEXPECTED_END,70 ERROR_STRING_BLOCK_UNEXPECTED_END,
123 ERROR_STRING_BLOCK_MISSING_NEW_LINE,71 ERROR_STRING_BLOCK_MISSING_NEW_LINE,
124 ERROR_STRING_BLOCK_MISSING_TERMINATION,72 ERROR_STRING_BLOCK_MISSING_TERMINATION,
125 ERROR_STRING_BLOCK_MISSING_INDENT,73 ERROR_STRING_BLOCK_MISSING_INDENT,
126 #[regex("[_a-zA-Z][_a-zA-Z0-9]*")]
127 IDENT,74 IDENT,
128 #[regex("[ \\t\\n\\r]+")]
129 WHITESPACE,75 WHITESPACE,
130 #[regex("//[^\\r\\n]*?(\\r\\n|\\n)?")]
131 SINGLE_LINE_SLASH_COMMENT,76 SINGLE_LINE_SLASH_COMMENT,
132 #[regex("#[^\\r\\n]*?(\\r\\n|\\n)?")]
133 SINGLE_LINE_HASH_COMMENT,77 SINGLE_LINE_HASH_COMMENT,
134 #[regex("/\\*([^*]|\\*[^/])*\\*/")]
135 MULTI_LINE_COMMENT,78 MULTI_LINE_COMMENT,
136 #[regex("/\\*/")]
137 ERROR_COMMENT_TOO_SHORT,79 ERROR_COMMENT_TOO_SHORT,
138 #[regex("/\\*([^*/]|\\*[^/])+")]
139 ERROR_COMMENT_UNTERMINATED,80 ERROR_COMMENT_UNTERMINATED,
140 #[token("tailstrict")]
141 TAILSTRICT_KW,81 TAILSTRICT_KW,
142 #[token("local")]
143 LOCAL_KW,82 LOCAL_KW,
144 #[token("importstr")]
145 IMPORTSTR_KW,83 IMPORTSTR_KW,
146 #[token("importbin")]
147 IMPORTBIN_KW,84 IMPORTBIN_KW,
148 #[token("import")]
149 IMPORT_KW,85 IMPORT_KW,
150 #[token("if")]
151 IF_KW,86 IF_KW,
152 #[token("then")]
153 THEN_KW,87 THEN_KW,
154 #[token("else")]
155 ELSE_KW,88 ELSE_KW,
156 #[token("function")]
157 FUNCTION_KW,89 FUNCTION_KW,
158 #[token("error")]
159 ERROR_KW,90 ERROR_KW,
160 #[token("in")]
161 IN_KW,91 IN_KW,
162 META_OBJECT_APPLY,92 META_OBJECT_APPLY,
163 ERROR_NO_OPERATOR,93 ERROR_NO_OPERATOR,
164 #[token("null")]
165 NULL_KW,94 NULL_KW,
166 #[token("true")]
167 TRUE_KW,95 TRUE_KW,
168 #[token("false")]
169 FALSE_KW,96 FALSE_KW,
170 #[token("self")]
171 SELF_KW,97 SELF_KW,
172 #[token("super")]
173 SUPER_KW,98 SUPER_KW,
174 #[token("for")]
175 FOR_KW,99 FOR_KW,
176 #[token("assert")]
177 ASSERT_KW,100 ASSERT_KW,
178 ERROR_MISSING_TOKEN,101 ERROR_MISSING_TOKEN,
179 ERROR_UNEXPECTED_TOKEN,102 ERROR_UNEXPECTED_TOKEN,
modifiedcrates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth
1use core::ops::Range;1use jrsonnet_lexer::Lexer;
2use std::convert::TryFrom;
3
4use logos::Logos;
5use rowan::{TextRange, TextSize};2use rowan::{TextRange, TextSize};
63
7use crate::{4use crate::SyntaxKind;
8 string_block::{lex_str_block, StringBlockError},
9 SyntaxKind,
10};
11
12pub struct Lexer<'a> {
13 inner: logos::Lexer<'a, SyntaxKind>,
14}
15
16impl<'a> Lexer<'a> {
17 pub fn new(input: &'a str) -> Self {
18 Self {
19 inner: SyntaxKind::lexer(input),
20 }
21 }
22}
23
24impl<'a> Iterator for Lexer<'a> {
25 type Item = Lexeme<'a>;
26
27 fn next(&mut self) -> Option<Self::Item> {
28 use SyntaxKind::*;
29
30 let mut kind = self.inner.next()?;
31 let text = self.inner.slice();
32
33 if kind == Ok(STRING_BLOCK) {
34 // We use custom lexer, which skips enough bytes, but not returns error
35 // Instead we should call lexer again to verify if there is something wrong with string block
36 let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
37 // In kinds, string blocks is parsed at least as `|||`
38 lexer.bump(3);
39 let res = lex_str_block(&mut lexer);
40 let next = lexer.next();
41 assert!(next.is_none(), "str_block is lexed");
42 match res {
43 Ok(()) => {}
44 Err(e) => {
45 kind = Ok(match e {
46 StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
47 StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
48 StringBlockError::MissingTermination => {
49 ERROR_STRING_BLOCK_MISSING_TERMINATION
50 }
51 StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
52 });
53 }
54 }
55 }
56
57 Some(Self::Item {
58 kind: kind.unwrap_or(SyntaxKind::LEXING_ERROR),
59 text,
60 range: {
61 let Range { start, end } = self.inner.span();
62
63 TextRange::new(
64 TextSize::try_from(start).unwrap(),
65 TextSize::try_from(end).unwrap(),
66 )
67 },
68 })
69 }
70}
715
72#[derive(Clone, Copy, Debug)]6#[derive(Clone, Copy, Debug)]
73pub struct Lexeme<'i> {7pub struct Lexeme<'s> {
74 pub kind: SyntaxKind,8 pub kind: SyntaxKind,
75 pub text: &'i str,9 pub text: &'s str,
76 pub range: TextRange,10 pub range: TextRange,
77}11}
7812
79pub fn lex(input: &str) -> Vec<Lexeme<'_>> {13pub fn lex(input: &str) -> Vec<Lexeme<'_>> {
80 Lexer::new(input).collect()14 Lexer::new(input).map(|l| Lexeme {
15 kind: SyntaxKind::from_raw(l.kind.into_raw()),
16 text: l.text,
17 range: TextRange::new(TextSize::from(l.range.0), TextSize::from(l.range.1)),
18 }).collect()
81}19}
8220
modifiedcrates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth
22
3use event::Sink;3use event::Sink;
4use generated::nodes::{SourceFile, Trivia};4use generated::nodes::{SourceFile, Trivia};
5use lex::lex;
6use parser::{LocatedSyntaxError, Parser};5use parser::{LocatedSyntaxError, Parser};
7pub use rowan;6pub use rowan;
87
14mod marker;13mod marker;
15mod parser;14mod parser;
16mod precedence;15mod precedence;
17mod string_block;
18mod tests;16mod tests;
19mod token_set;17mod token_set;
2018
21pub use ast::{AstChildren, AstNode, AstToken};19pub use ast::{AstChildren, AstNode, AstToken};
22pub use generated::{nodes, syntax_kinds::SyntaxKind};20pub use generated::{nodes, syntax_kinds::SyntaxKind};
23pub use language::*;21pub use language::*;
24pub use string_block::{collect_lexed_str_block, CollectStrBlock};
25pub use token_set::SyntaxKindSet;22pub use token_set::SyntaxKindSet;
2623
27use self::{24use self::{
30};27};
3128
32pub fn parse(input: &str) -> (SourceFile, Vec<LocatedSyntaxError>) {29pub fn parse(input: &str) -> (SourceFile, Vec<LocatedSyntaxError>) {
33 let lexemes = lex(input);30 let lexemes = lex::lex(input);
34 let kinds = lexemes31 let kinds = lexemes
35 .iter()32 .iter()
36 .map(|l| l.kind)33 .map(|l| l.kind)
deletedcrates/jrsonnet-rowan-parser/src/string_block.rsdiffbeforeafterboth

no changes

modifiedxtask/src/sourcegen/kinds.rsdiffbeforeafterboth
56 | Self::Error { name, .. } => name,56 | Self::Error { name, .. } => name,
57 }57 }
58 }58 }
59 pub fn expand_kind(&self) -> TokenStream {59 pub fn expand_kind(&self, lexer: bool) -> TokenStream {
60 let name = format_ident!("{}", self.name());60 let name = format_ident!("{}", self.name());
61 let attr = match self {61 let attr = match self {
62 Self::Keyword { code, .. } => quote! {#[token(#code)]},62 Self::Keyword { code, .. } => quote! {#[token(#code)]},
75 }75 }
76 _ => quote! {},76 _ => quote! {},
77 };77 };
78 let attr = if lexer {
79 attr
80 } else {
81 quote! {}
82 };
78 quote! {83 quote! {
79 #attr84 #attr
80 #name85 #name
modifiedxtask/src/sourcegen/mod.rsdiffbeforeafterboth
89 kinds.define_node(&name);89 kinds.define_node(&name);
90 }90 }
9191
92 let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;92 let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;
9393
94 let nodes = generate_nodes(&kinds, &ast)?;94 let nodes = generate_nodes(&kinds, &ast)?;
95 ensure_file_contents(95 ensure_file_contents(
107 &nodes,107 &nodes,
108 );108 );
109
110 let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;
111 ensure_file_contents(
112 &PathBuf::from(concat!(
113 env!("CARGO_MANIFEST_DIR"),
114 "/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",
115 )),
116 &lexer_syntax_kinds,
117 );
109 Ok(())118 Ok(())
110}119}
111120
112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {121fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {
113 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);122 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);
114 let token_kinds = kinds.tokens().map(TokenKind::expand_kind);123 let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));
115124
116 let keywords = kinds125 let keywords = kinds
117 .tokens()126 .tokens()
118 .filter(|k| matches!(k, TokenKind::Keyword { .. }))127 .filter(|k| matches!(k, TokenKind::Keyword { .. }))
119 .map(TokenKind::name)128 .map(TokenKind::name)
120 .map(|n| format_ident!("{n}"));129 .map(|n| format_ident!("{n}"));
121130
122 let nodes = kinds131 let mut nodes = kinds
123 .nodes132 .nodes
124 .iter()133 .iter()
125 .map(|name| format_ident!("{}", name))134 .map(|name| format_ident!("{}", name))
126 .collect::<Vec<_>>();135 .collect::<Vec<_>>();
136
137 if lexer {
138 nodes.clear();
139 }
127140
128 let enums = grammar141 let enums = grammar
129 .enums142 .enums
135 .iter()148 .iter()
136 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),149 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),
137 );150 )
151 .collect::<Vec<_>>();
152 let is_enum = if lexer {
153 quote! {}
154 } else {
155 quote! {
156 pub fn is_enum(self) -> bool {
157 match self {
158 #(#enums)|* => true,
159 _ => false,
160 }
161 }
162 }
163 };
164
165 let derive_logos = if lexer {
166 quote! {
167 , logos::Logos
168 }
169 } else {
170 quote! {}
171 };
138172
139 let ast = quote! {173 let ast = quote! {
140 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]174 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]
141 use logos::Logos;
142175
143 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.176 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
144 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]177 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]
145 #[repr(u16)]178 #[repr(u16)]
146 pub enum SyntaxKind {179 pub enum SyntaxKind {
147 #[doc(hidden)]180 #[doc(hidden)]
165 }198 }
166 }199 }
200
167 pub fn is_enum(self) -> bool {201 #is_enum
168 match self {
169 #(#enums)|* => true,
170 _ => false,
171 }
172 }
173202
174 pub fn from_raw(r: u16) -> Self {203 pub fn from_raw(r: u16) -> Self {
175 assert!(r < Self::__LAST as u16);204 assert!(r < Self::__LAST as u16);