difftreelog
refactor split lexer from rowan parser
in: master
15 files changed
Cargo.lockdiffbeforeafterboth715 "static_assertions",715 "static_assertions",716]716]717718[[package]]719name = "jrsonnet-lexer"720version = "0.5.0-pre97"721dependencies = [722 "logos",723]717724718[[package]]725[[package]]719name = "jrsonnet-macros"726name = "jrsonnet-macros"744 "hi-doc",751 "hi-doc",745 "indoc",752 "indoc",746 "insta",753 "insta",747 "logos",754 "jrsonnet-lexer",748 "rowan",755 "rowan",749 "strip-ansi-escapes",756 "strip-ansi-escapes",750 "thiserror",757 "thiserror",Cargo.tomldiffbeforeafterboth81itertools = "0.14.0"81itertools = "0.14.0"82xshell = "0.2.7"82xshell = "0.2.7"8384lsp-server = "0.7.9"85lsp-types = "0.97.0"868387regex = "1.12"84regex = "1.12"88lru = "0.16.3"85lru = "0.16.3"crates/jrsonnet-lexer/Cargo.tomldiffbeforeafterbothno changes
crates/jrsonnet-lexer/src/generated/mod.rsdiffbeforeafterbothno changes
crates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterbothno changes
crates/jrsonnet-lexer/src/lex.rsdiffbeforeafterbothno changes
crates/jrsonnet-lexer/src/lib.rsdiffbeforeafterbothno changes
crates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterbothno changes
crates/jrsonnet-rowan-parser/Cargo.tomldiffbeforeafterboth14drop_bomb.workspace = true14drop_bomb.workspace = true15hi-doc.workspace = true15hi-doc.workspace = true16indoc.workspace = true16indoc.workspace = true17logos.workspace = true17jrsonnet-lexer = { version = "0.5.0-pre97", path = "../jrsonnet-lexer" }18rowan.workspace = true18rowan.workspace = true19thiserror.workspace = true19thiserror.workspace = true2020crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth8 clippy::manual_non_exhaustive,8 clippy::manual_non_exhaustive,9 clippy::match_like_matches_macro9 clippy::match_like_matches_macro10)]10)]11use logos::Logos;12#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]11#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]13#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]12#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]14#[repr(u16)]13#[repr(u16)]15pub enum SyntaxKind {14pub enum SyntaxKind {16 #[doc(hidden)]15 #[doc(hidden)]17 TOMBSTONE,16 TOMBSTONE,18 #[doc(hidden)]17 #[doc(hidden)]19 EOF,18 EOF,20 #[token("||")]21 OR,19 OR,22 #[token("??")]23 NULL_COAELSE,20 NULL_COAELSE,24 #[token("&&")]25 AND,21 AND,26 #[token("|")]27 BIT_OR,22 BIT_OR,28 #[token("^")]29 BIT_XOR,23 BIT_XOR,30 #[token("&")]31 BIT_AND,24 BIT_AND,32 #[token("==")]33 EQ,25 EQ,34 #[token("!=")]35 NE,26 NE,36 #[token("<")]37 LT,27 LT,38 #[token(">")]39 GT,28 GT,40 #[token("<=")]41 LE,29 LE,42 #[token(">=")]43 GE,30 GE,44 #[token("<<")]45 LHS,31 LHS,46 #[token(">>")]47 RHS,32 RHS,48 #[token("+")]49 PLUS,33 PLUS,50 #[token("-")]51 MINUS,34 MINUS,52 #[token("*")]53 MUL,35 MUL,54 #[token("/")]55 DIV,36 DIV,56 #[token("%")]57 MODULO,37 MODULO,58 #[token("!")]59 NOT,38 NOT,60 #[token("~")]61 BIT_NOT,39 BIT_NOT,62 #[token("[")]63 L_BRACK,40 L_BRACK,64 #[token("]")]65 R_BRACK,41 R_BRACK,66 #[token("(")]67 L_PAREN,42 L_PAREN,68 #[token(")")]69 R_PAREN,43 R_PAREN,70 #[token("{")]71 L_BRACE,44 L_BRACE,72 #[token("}")]73 R_BRACE,45 R_BRACE,74 #[token(":")]75 COLON,46 COLON,76 #[token("::")]77 COLONCOLON,47 COLONCOLON,78 #[token(":::")]79 COLONCOLONCOLON,48 COLONCOLONCOLON,80 #[token(";")]81 SEMI,49 SEMI,82 #[token(".")]83 DOT,50 DOT,84 #[token("...")]85 DOTDOTDOT,51 DOTDOTDOT,86 #[token(",")]87 COMMA,52 COMMA,88 #[token("$")]89 DOLLAR,53 DOLLAR,90 #[token("=")]91 ASSIGN,54 ASSIGN,92 #[token("?")]93 QUESTION_MARK,55 QUESTION_MARK,94 #[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?")]95 FLOAT,56 FLOAT,96 #[regex("(?:0|[1-9][0-9]*)\\.[^0-9]")]97 ERROR_FLOAT_JUNK_AFTER_POINT,57 ERROR_FLOAT_JUNK_AFTER_POINT,98 #[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][^+\\-0-9]")]99 ERROR_FLOAT_JUNK_AFTER_EXPONENT,58 ERROR_FLOAT_JUNK_AFTER_EXPONENT,100 #[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][+-][^0-9]")]101 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,59 ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,102 #[regex("\"(?s:[^\"\\\\]|\\\\.)*\"")]103 STRING_DOUBLE,60 STRING_DOUBLE,104 #[regex("\"(?s:[^\"\\\\]|\\\\.)*")]105 ERROR_STRING_DOUBLE_UNTERMINATED,61 ERROR_STRING_DOUBLE_UNTERMINATED,106 #[regex("'(?s:[^'\\\\]|\\\\.)*'")]107 STRING_SINGLE,62 STRING_SINGLE,108 #[regex("'(?s:[^'\\\\]|\\\\.)*")]109 ERROR_STRING_SINGLE_UNTERMINATED,63 ERROR_STRING_SINGLE_UNTERMINATED,110 #[regex("@\"(?:[^\"]|\"\")*\"")]111 STRING_DOUBLE_VERBATIM,64 STRING_DOUBLE_VERBATIM,112 #[regex("@\"(?:[^\"]|\"\")*")]113 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,65 ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,114 #[regex("@'(?:[^']|'')*'")]115 STRING_SINGLE_VERBATIM,66 STRING_SINGLE_VERBATIM,116 #[regex("@'(?:[^']|'')*")]117 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,67 ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,118 #[regex("@[^\"'\\s]\\S+")]119 ERROR_STRING_VERBATIM_MISSING_QUOTES,68 ERROR_STRING_VERBATIM_MISSING_QUOTES,120 #[regex("\\|\\|\\|", crate::string_block::lex_str_block_test)]121 STRING_BLOCK,69 STRING_BLOCK,122 ERROR_STRING_BLOCK_UNEXPECTED_END,70 ERROR_STRING_BLOCK_UNEXPECTED_END,123 ERROR_STRING_BLOCK_MISSING_NEW_LINE,71 ERROR_STRING_BLOCK_MISSING_NEW_LINE,124 ERROR_STRING_BLOCK_MISSING_TERMINATION,72 ERROR_STRING_BLOCK_MISSING_TERMINATION,125 ERROR_STRING_BLOCK_MISSING_INDENT,73 ERROR_STRING_BLOCK_MISSING_INDENT,126 #[regex("[_a-zA-Z][_a-zA-Z0-9]*")]127 IDENT,74 IDENT,128 #[regex("[ \\t\\n\\r]+")]129 WHITESPACE,75 WHITESPACE,130 #[regex("//[^\\r\\n]*?(\\r\\n|\\n)?")]131 SINGLE_LINE_SLASH_COMMENT,76 SINGLE_LINE_SLASH_COMMENT,132 #[regex("#[^\\r\\n]*?(\\r\\n|\\n)?")]133 SINGLE_LINE_HASH_COMMENT,77 SINGLE_LINE_HASH_COMMENT,134 #[regex("/\\*([^*]|\\*[^/])*\\*/")]135 MULTI_LINE_COMMENT,78 MULTI_LINE_COMMENT,136 #[regex("/\\*/")]137 ERROR_COMMENT_TOO_SHORT,79 ERROR_COMMENT_TOO_SHORT,138 #[regex("/\\*([^*/]|\\*[^/])+")]139 ERROR_COMMENT_UNTERMINATED,80 ERROR_COMMENT_UNTERMINATED,140 #[token("tailstrict")]141 TAILSTRICT_KW,81 TAILSTRICT_KW,142 #[token("local")]143 LOCAL_KW,82 LOCAL_KW,144 #[token("importstr")]145 IMPORTSTR_KW,83 IMPORTSTR_KW,146 #[token("importbin")]147 IMPORTBIN_KW,84 IMPORTBIN_KW,148 #[token("import")]149 IMPORT_KW,85 IMPORT_KW,150 #[token("if")]151 IF_KW,86 IF_KW,152 #[token("then")]153 THEN_KW,87 THEN_KW,154 #[token("else")]155 ELSE_KW,88 ELSE_KW,156 #[token("function")]157 FUNCTION_KW,89 FUNCTION_KW,158 #[token("error")]159 ERROR_KW,90 ERROR_KW,160 #[token("in")]161 IN_KW,91 IN_KW,162 META_OBJECT_APPLY,92 META_OBJECT_APPLY,163 ERROR_NO_OPERATOR,93 ERROR_NO_OPERATOR,164 #[token("null")]165 NULL_KW,94 NULL_KW,166 #[token("true")]167 TRUE_KW,95 TRUE_KW,168 #[token("false")]169 FALSE_KW,96 FALSE_KW,170 #[token("self")]171 SELF_KW,97 SELF_KW,172 #[token("super")]173 SUPER_KW,98 SUPER_KW,174 #[token("for")]175 FOR_KW,99 FOR_KW,176 #[token("assert")]177 ASSERT_KW,100 ASSERT_KW,178 ERROR_MISSING_TOKEN,101 ERROR_MISSING_TOKEN,179 ERROR_UNEXPECTED_TOKEN,102 ERROR_UNEXPECTED_TOKEN,crates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth1use core::ops::Range;1use jrsonnet_lexer::Lexer;2use std::convert::TryFrom;34use logos::Logos;5use rowan::{TextRange, TextSize};2use rowan::{TextRange, TextSize};637use crate::{4use crate::SyntaxKind;8 string_block::{lex_str_block, StringBlockError},9 SyntaxKind,10};1112pub struct Lexer<'a> {13 inner: logos::Lexer<'a, SyntaxKind>,14}1516impl<'a> Lexer<'a> {17 pub fn new(input: &'a str) -> Self {18 Self {19 inner: SyntaxKind::lexer(input),20 }21 }22}2324impl<'a> Iterator for Lexer<'a> {25 type Item = Lexeme<'a>;2627 fn next(&mut self) -> Option<Self::Item> {28 use SyntaxKind::*;2930 let mut kind = self.inner.next()?;31 let text = self.inner.slice();3233 if kind == Ok(STRING_BLOCK) {34 // We use custom lexer, which skips enough bytes, but not returns error35 // Instead we should call lexer again to verify if there is something wrong with string block36 let mut lexer = logos::Lexer::<SyntaxKind>::new(text);37 // In kinds, string blocks is parsed at least as `|||`38 lexer.bump(3);39 let res = lex_str_block(&mut lexer);40 let next = lexer.next();41 assert!(next.is_none(), "str_block is lexed");42 match res {43 Ok(()) => {}44 Err(e) => {45 kind = Ok(match e {46 StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,47 StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,48 StringBlockError::MissingTermination => {49 ERROR_STRING_BLOCK_MISSING_TERMINATION50 }51 StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,52 });53 }54 }55 }5657 Some(Self::Item {58 kind: kind.unwrap_or(SyntaxKind::LEXING_ERROR),59 text,60 range: {61 let Range { start, end } = self.inner.span();6263 TextRange::new(64 TextSize::try_from(start).unwrap(),65 TextSize::try_from(end).unwrap(),66 )67 },68 })69 }70}71572#[derive(Clone, Copy, Debug)]6#[derive(Clone, Copy, Debug)]73pub struct Lexeme<'i> {7pub struct Lexeme<'s> {74 pub kind: SyntaxKind,8 pub kind: SyntaxKind,75 pub text: &'i str,9 pub text: &'s str,76 pub range: TextRange,10 pub range: TextRange,77}11}781279pub fn lex(input: &str) -> Vec<Lexeme<'_>> {13pub fn lex(input: &str) -> Vec<Lexeme<'_>> {80 Lexer::new(input).collect()14 Lexer::new(input).map(|l| Lexeme {15 kind: SyntaxKind::from_raw(l.kind.into_raw()),16 text: l.text,17 range: TextRange::new(TextSize::from(l.range.0), TextSize::from(l.range.1)),18 }).collect()81}19}8220crates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth223use event::Sink;3use event::Sink;4use generated::nodes::{SourceFile, Trivia};4use generated::nodes::{SourceFile, Trivia};5use lex::lex;6use parser::{LocatedSyntaxError, Parser};5use parser::{LocatedSyntaxError, Parser};7pub use rowan;6pub use rowan;8714mod marker;13mod marker;15mod parser;14mod parser;16mod precedence;15mod precedence;17mod string_block;18mod tests;16mod tests;19mod token_set;17mod token_set;201821pub use ast::{AstChildren, AstNode, AstToken};19pub use ast::{AstChildren, AstNode, AstToken};22pub use generated::{nodes, syntax_kinds::SyntaxKind};20pub use generated::{nodes, syntax_kinds::SyntaxKind};23pub use language::*;21pub use language::*;24pub use string_block::{collect_lexed_str_block, CollectStrBlock};25pub use token_set::SyntaxKindSet;22pub use token_set::SyntaxKindSet;262327use self::{24use self::{30};27};312832pub fn parse(input: &str) -> (SourceFile, Vec<LocatedSyntaxError>) {29pub fn parse(input: &str) -> (SourceFile, Vec<LocatedSyntaxError>) {33 let lexemes = lex(input);30 let lexemes = lex::lex(input);34 let kinds = lexemes31 let kinds = lexemes35 .iter()32 .iter()36 .map(|l| l.kind)33 .map(|l| l.kind)crates/jrsonnet-rowan-parser/src/string_block.rsdiffbeforeafterbothno changes
xtask/src/sourcegen/kinds.rsdiffbeforeafterboth56 | Self::Error { name, .. } => name,56 | Self::Error { name, .. } => name,57 }57 }58 }58 }59 pub fn expand_kind(&self) -> TokenStream {59 pub fn expand_kind(&self, lexer: bool) -> TokenStream {60 let name = format_ident!("{}", self.name());60 let name = format_ident!("{}", self.name());61 let attr = match self {61 let attr = match self {62 Self::Keyword { code, .. } => quote! {#[token(#code)]},62 Self::Keyword { code, .. } => quote! {#[token(#code)]},75 }75 }76 _ => quote! {},76 _ => quote! {},77 };77 };78 let attr = if lexer {79 attr80 } else {81 quote! {}82 };78 quote! {83 quote! {79 #attr84 #attr80 #name85 #namextask/src/sourcegen/mod.rsdiffbeforeafterboth89 kinds.define_node(&name);89 kinds.define_node(&name);90 }90 }919192 let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;92 let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;939394 let nodes = generate_nodes(&kinds, &ast)?;94 let nodes = generate_nodes(&kinds, &ast)?;95 ensure_file_contents(95 ensure_file_contents(107 &nodes,107 &nodes,108 );108 );109110 let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;111 ensure_file_contents(112 &PathBuf::from(concat!(113 env!("CARGO_MANIFEST_DIR"),114 "/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",115 )),116 &lexer_syntax_kinds,117 );109 Ok(())118 Ok(())110}119}111120112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {121fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {113 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);122 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114 let token_kinds = kinds.tokens().map(TokenKind::expand_kind);123 let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));115124116 let keywords = kinds125 let keywords = kinds117 .tokens()126 .tokens()118 .filter(|k| matches!(k, TokenKind::Keyword { .. }))127 .filter(|k| matches!(k, TokenKind::Keyword { .. }))119 .map(TokenKind::name)128 .map(TokenKind::name)120 .map(|n| format_ident!("{n}"));129 .map(|n| format_ident!("{n}"));121130122 let nodes = kinds131 let mut nodes = kinds123 .nodes132 .nodes124 .iter()133 .iter()125 .map(|name| format_ident!("{}", name))134 .map(|name| format_ident!("{}", name))126 .collect::<Vec<_>>();135 .collect::<Vec<_>>();136137 if lexer {138 nodes.clear();139 }127140128 let enums = grammar141 let enums = grammar129 .enums142 .enums135 .iter()148 .iter()136 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),149 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137 );150 )151 .collect::<Vec<_>>();152 let is_enum = if lexer {153 quote! {}154 } else {155 quote! {156 pub fn is_enum(self) -> bool {157 match self {158 #(#enums)|* => true,159 _ => false,160 }161 }162 }163 };164165 let derive_logos = if lexer {166 quote! {167 , logos::Logos168 }169 } else {170 quote! {}171 };138172139 let ast = quote! {173 let ast = quote! {140 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]174 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141 use logos::Logos;142175143 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.176 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.144 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]177 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]145 #[repr(u16)]178 #[repr(u16)]146 pub enum SyntaxKind {179 pub enum SyntaxKind {147 #[doc(hidden)]180 #[doc(hidden)]165 }198 }166 }199 }200167 pub fn is_enum(self) -> bool {201 #is_enum168 match self {169 #(#enums)|* => true,170 _ => false,171 }172 }173202174 pub fn from_raw(r: u16) -> Self {203 pub fn from_raw(r: u16) -> Self {175 assert!(r < Self::__LAST as u16);204 assert!(r < Self::__LAST as u16);