difftreelog
refactor use grammar to classify tokens
in: master
13 files changed
cmds/jrsonnet-fmt/src/main.rsdiffbeforeafterboth--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
nodes::{
ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
- Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+ Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
UnaryOperator,
},
AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
}
}
-impl Printable for String {
+impl Printable for Text {
fn print(&self) -> PrintItems {
p!(new: str(&format!("{}", self)))
}
@@ -168,7 +168,7 @@
FieldName::FieldNameFixed(f) => {
if let Some(id) = f.id() {
p!(new: {id})
- } else if let Some(str) = f.string() {
+ } else if let Some(str) = f.text() {
p!(new: {str})
} else {
p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
- Expr::ExprString(s) => p!(new: {s.string()}),
+ Expr::ExprString(s) => p!(new: {s.text()}),
Expr::ExprNumber(n) => p!(new: {n.number()}),
Expr::ExprArray(a) => {
let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
pi
}
Expr::ExprImport(v) => {
- p!(new: {v.import_kind()} str(" ") {v.string()})
+ p!(new: {v.import_kind()} str(" ") {v.text()})
}
Expr::ExprVar(n) => p!(new: {n.name()}),
Expr::ExprLocal(l) => {
crates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
name:Name
')'
ExprString =
- String
+ Text
ExprNumber =
Number
ExprArray =
@@ -67,7 +67,7 @@
']'
ExprImport =
- ImportKind String
+ ImportKind Text
ImportKind =
'importstr'
@@ -217,7 +217,7 @@
FieldNameFixed =
id:Name
-| String
+| Text
FieldNameDynamic =
'['
Expr
@@ -239,16 +239,27 @@
| '$'
| 'super'
-String =
+Text =
'LIT_STRING_DOUBLE!'
+| 'ERROR_STRING_DOUBLE_UNTERMINATED!'
| 'LIT_STRING_SINGLE!'
+| 'ERROR_STRING_SINGLE_UNTERMINATED!'
| 'LIT_STRING_DOUBLE_VERBATIM!'
+| 'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
| 'LIT_STRING_SINGLE_VERBATIM!'
+| 'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+| 'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
| 'LIT_STRING_BLOCK!'
+| 'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+| 'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+| 'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+| 'ERROR_STRING_BLOCK_MISSING_INDENT!'
Number =
'LIT_FLOAT!'
-| 'META_FORCE_ENUM!'
+| 'ERROR_FLOAT_JUNK_AFTER_POINT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
ForSpec =
'for'
@@ -347,3 +358,12 @@
TrueExpr=Expr
FalseExpr=Expr
LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+ 'LIT_WHITESPACE!'
+| 'LIT_MULTI_LINE_COMMENT!'
+| 'ERROR_COMMENT_TOO_SHORT!'
+| 'ERROR_COMMENT_UNTERMINATED!'
+| 'LIT_SINGLE_LINE_HASH_COMMENT!'
+| 'LIT_SINGLE_LINE_SLASH_COMMENT!'
crates/jrsonnet-rowan-parser/src/classify.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
- pub fn is_trivia(self) -> bool {
- matches!(
- self,
- Self::WHITESPACE
- | Self::MULTI_LINE_COMMENT
- | Self::ERROR_COMMENT_TOO_SHORT
- | Self::ERROR_COMMENT_UNTERMINATED
- | Self::SINGLE_LINE_HASH_COMMENT
- | Self::SINGLE_LINE_SLASH_COMMENT
- )
- }
- pub fn is_string(self) -> bool {
- matches!(
- self,
- Self::STRING_SINGLE
- | Self::ERROR_STRING_SINGLE_UNTERMINATED
- | Self::STRING_DOUBLE
- | Self::ERROR_STRING_DOUBLE_UNTERMINATED
- | Self::STRING_SINGLE_VERBATIM
- | Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
- | Self::STRING_DOUBLE_VERBATIM
- | Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
- | Self::STRING_BLOCK
- | Self::ERROR_STRING_BLOCK_UNEXPECTED_END
- | Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
- | Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
- | Self::ERROR_STRING_BLOCK_MISSING_INDENT
- )
- }
- pub fn is_number(self) -> bool {
- matches!(
- self,
- Self::FLOAT
- | Self::ERROR_FLOAT_JUNK_AFTER_POINT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
- )
- }
- pub fn is_literal(self) -> bool {
- matches!(
- self,
- Self::NULL_KW
- | Self::TRUE_KW | Self::FALSE_KW
- | Self::SELF_KW | Self::DOLLAR
- | Self::SUPER_KW
- )
- }
-}
crates/jrsonnet-rowan-parser/src/event.rsdiffbeforeafterboth1use std::mem;23use rowan::{GreenNodeBuilder, Language};45use crate::{6 lex::Lexeme,7 parser::{Parse, SyntaxError},8 JsonnetLanguage, SyntaxKind,9};1011#[derive(Clone, Debug, PartialEq, Eq)]12pub enum Event {13 /// Used for unfinished markers14 Pending,15 /// After marker is completed, Pending event is replaced with Start16 Start {17 kind: SyntaxKind,18 /// If marker is preceded or wrapped - instead of reordering events, we19 /// insert start event in the end of events Vec instead, and store relative offset to this event here20 forward_parent: Option<usize>,21 },22 /// Eat token23 Token {24 kind: SyntaxKind,25 },26 /// Position of finished node27 Finish {28 /// Same as forward_parent of Start, but for wrapping29 wrapper: Option<usize>,30 },31 Error(SyntaxError),32 /// Used for dropped markers and other things33 Noop,34}3536pub(super) struct Sink<'i> {37 pub builder: GreenNodeBuilder<'static>,38 lexemes: &'i [Lexeme<'i>],39 offset: usize,40 events: Vec<Event>,41 pub errors: Vec<SyntaxError>,42}4344impl<'i> Sink<'i> {45 pub(super) fn new(events: Vec<Event>, lexemes: &'i [Lexeme<'i>]) -> Self {46 Self {47 builder: GreenNodeBuilder::new(),48 lexemes,49 offset: 0,50 events,51 errors: vec![],52 }53 }5455 pub(super) fn finish(mut self) -> Parse {56 let mut eat_start_whitespace = false;57 let mut depth = 0;58 for idx in 0..self.events.len() {59 match mem::replace(&mut self.events[idx], Event::Noop) {60 Event::Start {61 kind,62 forward_parent,63 } => {64 if depth != 0 {65 self.skip_whitespace();66 }67 let mut kinds = vec![kind];6869 let mut idx = idx;70 let mut forward_parent = forward_parent;7172 // Walk through the forward parent of the forward parent, and the forward parent73 // of that, and of that, etc. until we reach a StartNode event without a forward74 // parent.75 while let Some(fp) = forward_parent {76 idx += fp;7778 forward_parent = if let Event::Start {79 kind,80 forward_parent,81 } = mem::replace(&mut self.events[idx], Event::Noop)82 {83 kinds.push(kind);84 forward_parent85 } else {86 unreachable!()87 };88 }8990 for kind in kinds.into_iter().rev() {91 self.builder.start_node(JsonnetLanguage::kind_to_raw(kind));92 depth += 1;93 if depth == 1 {94 self.skip_whitespace();95 }96 }9798 eat_start_whitespace = false;99 }100 Event::Token { kind } => {101 if eat_start_whitespace {102 self.skip_whitespace();103 }104 self.token(kind);105 eat_start_whitespace = true;106 }107 Event::Finish { wrapper } => {108 self.builder.finish_node();109 depth -= 1;110 let mut idx = idx;111 let mut wrapper = wrapper;112 while let Some(w) = wrapper {113 idx += w;114 wrapper = if let Event::Finish { wrapper } =115 mem::replace(&mut self.events[idx], Event::Noop)116 {117 self.builder.finish_node();118 depth -= 1;119 wrapper120 } else {121 unreachable!()122 }123 }124 eat_start_whitespace = true;125 }126 Event::Pending => panic!("placeholder should not end in events"),127 Event::Noop => {}128 Event::Error(e) => {129 self.errors.push(e);130 }131 }132 }133134 Parse {135 green_node: self.builder.finish(),136 errors: self.errors,137 }138 }139 fn token(&mut self, kind: SyntaxKind) {140 let lexeme = self.lexemes[self.offset];141 self.builder142 .token(JsonnetLanguage::kind_to_raw(kind), lexeme.text);143 self.offset += 1;144 }145 fn skip_whitespace(&mut self) {146 while let Some(lexeme) = self.lexemes.get(self.offset) {147 if !lexeme.kind.is_trivia() {148 break;149 }150151 self.token(lexeme.kind);152 }153 }154}crates/jrsonnet-rowan-parser/src/generated/nodes.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
pub(crate) syntax: SyntaxNode,
}
impl ExprString {
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -332,7 +332,7 @@
pub fn import_kind(&self) -> Option<ImportKind> {
support::token_child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -692,7 +692,7 @@
pub fn id(&self) -> Option<Name> {
support::child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -1038,18 +1038,27 @@
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
syntax: SyntaxToken,
- kind: StringKind,
+ kind: TextKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
StringDouble,
+ ErrorStringDoubleUnterminated,
StringSingle,
+ ErrorStringSingleUnterminated,
StringDoubleVerbatim,
+ ErrorStringDoubleVerbatimUnterminated,
StringSingleVerbatim,
+ ErrorStringSingleVerbatimUnterminated,
+ ErrorStringVerbatimMissingQuotes,
StringBlock,
+ ErrorStringBlockUnexpectedEnd,
+ ErrorStringBlockMissingNewLine,
+ ErrorStringBlockMissingTermination,
+ ErrorStringBlockMissingIndent,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberKind {
Float,
- MetaForceEnum,
+ ErrorFloatJunkAfterPoint,
+ ErrorFloatJunkAfterExponent,
+ ErrorFloatJunkAfterExponentSign,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
Coloncolon,
Colon,
}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+ syntax: SyntaxToken,
+ kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+ Whitespace,
+ MultiLineComment,
+ ErrorCommentTooShort,
+ ErrorCommentUnterminated,
+ SingleLineHashComment,
+ SingleLineSlashComment,
+}
impl AstNode for SourceFile {
fn can_cast(kind: SyntaxKind) -> bool {
kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
-impl AstToken for String {
+impl AstToken for Text {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
STRING_DOUBLE
+ | ERROR_STRING_DOUBLE_UNTERMINATED
| STRING_SINGLE
+ | ERROR_STRING_SINGLE_UNTERMINATED
| STRING_DOUBLE_VERBATIM
+ | ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
| STRING_SINGLE_VERBATIM
- | STRING_BLOCK => true,
+ | ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+ | ERROR_STRING_VERBATIM_MISSING_QUOTES
+ | STRING_BLOCK
+ | ERROR_STRING_BLOCK_UNEXPECTED_END
+ | ERROR_STRING_BLOCK_MISSING_NEW_LINE
+ | ERROR_STRING_BLOCK_MISSING_TERMINATION
+ | ERROR_STRING_BLOCK_MISSING_INDENT => true,
_ => false,
}
}
fn cast(syntax: SyntaxToken) -> Option<Self> {
let res = match syntax.kind() {
- STRING_DOUBLE => String {
+ STRING_DOUBLE => Text {
syntax,
- kind: StringKind::StringDouble,
+ kind: TextKind::StringDouble,
},
- STRING_SINGLE => String {
+ ERROR_STRING_DOUBLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingle,
+ kind: TextKind::ErrorStringDoubleUnterminated,
},
- STRING_DOUBLE_VERBATIM => String {
+ STRING_SINGLE => Text {
syntax,
- kind: StringKind::StringDoubleVerbatim,
+ kind: TextKind::StringSingle,
},
- STRING_SINGLE_VERBATIM => String {
+ ERROR_STRING_SINGLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingleVerbatim,
+ kind: TextKind::ErrorStringSingleUnterminated,
},
- STRING_BLOCK => String {
+ STRING_DOUBLE_VERBATIM => Text {
syntax,
- kind: StringKind::StringBlock,
+ kind: TextKind::StringDoubleVerbatim,
+ },
+ ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+ },
+ STRING_SINGLE_VERBATIM => Text {
+ syntax,
+ kind: TextKind::StringSingleVerbatim,
},
+ ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+ },
+ ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+ syntax,
+ kind: TextKind::ErrorStringVerbatimMissingQuotes,
+ },
+ STRING_BLOCK => Text {
+ syntax,
+ kind: TextKind::StringBlock,
+ },
+ ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockUnexpectedEnd,
+ },
+ ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingNewLine,
+ },
+ ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingTermination,
+ },
+ ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingIndent,
+ },
_ => return None,
};
Some(res)
@@ -2718,12 +2790,12 @@
&self.syntax
}
}
-impl String {
- pub fn kind(&self) -> StringKind {
+impl Text {
+ pub fn kind(&self) -> TextKind {
self.kind
}
}
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
@@ -2731,7 +2803,10 @@
impl AstToken for Number {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
- FLOAT | META_FORCE_ENUM => true,
+ FLOAT
+ | ERROR_FLOAT_JUNK_AFTER_POINT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
_ => false,
}
}
@@ -2741,10 +2816,18 @@
syntax,
kind: NumberKind::Float,
},
- META_FORCE_ENUM => Number {
+ ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterPoint,
+ },
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
syntax,
- kind: NumberKind::MetaForceEnum,
+ kind: NumberKind::ErrorFloatJunkAfterExponent,
},
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+ },
_ => return None,
};
Some(res)
@@ -2841,6 +2924,62 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
+impl AstToken for Trivia {
+ fn can_cast(kind: SyntaxKind) -> bool {
+ match kind {
+ WHITESPACE
+ | MULTI_LINE_COMMENT
+ | ERROR_COMMENT_TOO_SHORT
+ | ERROR_COMMENT_UNTERMINATED
+ | SINGLE_LINE_HASH_COMMENT
+ | SINGLE_LINE_SLASH_COMMENT => true,
+ _ => false,
+ }
+ }
+ fn cast(syntax: SyntaxToken) -> Option<Self> {
+ let res = match syntax.kind() {
+ WHITESPACE => Trivia {
+ syntax,
+ kind: TriviaKind::Whitespace,
+ },
+ MULTI_LINE_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::MultiLineComment,
+ },
+ ERROR_COMMENT_TOO_SHORT => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentTooShort,
+ },
+ ERROR_COMMENT_UNTERMINATED => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentUnterminated,
+ },
+ SINGLE_LINE_HASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineHashComment,
+ },
+ SINGLE_LINE_SLASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineSlashComment,
+ },
+ _ => return None,
+ };
+ Some(res)
+ }
+ fn syntax(&self) -> &SyntaxToken {
+ &self.syntax
+ }
+}
+impl Trivia {
+ pub fn kind(&self) -> TriviaKind {
+ self.kind
+ }
+}
+impl std::fmt::Display for Trivia {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ std::fmt::Display::fmt(self.syntax(), f)
+ }
+}
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
SELF_KW,
#[token("super")]
SUPER_KW,
- META_FORCE_ENUM,
#[token("for")]
FOR_KW,
#[token("assert")]
@@ -253,10 +252,11 @@
BINARY_OPERATOR,
UNARY_OPERATOR,
LITERAL,
- STRING,
+ TEXT,
NUMBER,
IMPORT_KIND,
VISIBILITY,
+ TRIVIA,
#[doc(hidden)]
__LAST,
}
@@ -277,8 +277,8 @@
pub fn is_enum(self) -> bool {
match self {
EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
- | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
- | NUMBER | IMPORT_KIND | VISIBILITY => true,
+ | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+ | IMPORT_KIND | VISIBILITY | TRIVIA => true,
_ => false,
}
}
crates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
use logos::Logos;
use rowan::{TextRange, TextSize};
-use crate::SyntaxKind;
+use crate::{
+ string_block::{lex_str_block, StringBlockError},
+ SyntaxKind,
+};
pub struct Lexer<'a> {
inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
type Item = Lexeme<'a>;
fn next(&mut self) -> Option<Self::Item> {
- let kind = self.inner.next()?;
+ use SyntaxKind::*;
+
+ let mut kind = self.inner.next()?;
let text = self.inner.slice();
+ if kind == STRING_BLOCK {
+ // We use custom lexer, which skips enough bytes, but not returns error
+ // Instead we should call lexer again to verify if there is something wrong with string block
+ let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+ // In kinds, string blocks is parsed at least as `|||`
+ lexer.bump(3);
+ let res = lex_str_block(&mut lexer);
+ debug_assert!(lexer.next().is_none(), "str_block is lexed");
+ match res {
+ Ok(_) => {}
+ Err(e) => {
+ kind = match e {
+ StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+ StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+ StringBlockError::MissingTermination => {
+ ERROR_STRING_BLOCK_MISSING_TERMINATION
+ }
+ StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+ }
+ }
+ }
+ }
+
Some(Self::Item {
kind,
text,
crates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
mod ast;
mod binary;
-mod classify;
mod event;
mod generated;
mod language;
crates/jrsonnet-rowan-parser/src/marker.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
!kind.is_enum(),
"{kind:?} is a enum kind, you should use variant kinds instead"
);
- // TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+ // TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
// debug_assert!(
- // !kind.is_parser(),
- // "{kind:?} should be only emitted by parser, not used directly"
+ // !kind.is_lexer(),
+ // "{kind:?} should be only emitted by lexer, not used directly"
// );
let event_at_pos = &mut p.events[self.start_event_idx];
assert_eq!(*event_at_pos, Event::Pending);
crates/jrsonnet-rowan-parser/src/parser.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/parser.rs
+++ b/crates/jrsonnet-rowan-parser/src/parser.rs
@@ -8,10 +8,10 @@
event::Event,
lex::Lexeme,
marker::{AsRange, CompletedMarker, Marker, Ranger},
- string_block::{lex_str_block, StringBlockError},
+ nodes::{Literal, Number, Text, Trivia},
token_set::SyntaxKindSet,
unary::UnaryOperator,
- SyntaxKind,
+ AstToken, SyntaxKind,
SyntaxKind::*,
SyntaxNode, T, TS,
};
@@ -36,6 +36,7 @@
}
pub struct Parser<'i> {
+ // TODO: remove all trivia before feeding to parser?
lexemes: &'i [Lexeme<'i>],
pub offset: usize,
pub events: Vec<Event>,
@@ -191,7 +192,7 @@
while self
.lexemes
.get(previous_token_idx)
- .map_or(false, |l| l.kind.is_trivia())
+ .map_or(false, |l| Trivia::can_cast(l.kind))
&& previous_token_idx != 0
{
previous_token_idx -= 1;
@@ -200,13 +201,13 @@
Some(self.lexemes[previous_token_idx])
}
pub fn start_of_token(&self, mut idx: usize) -> TextSize {
- while self.lexemes[idx].kind.is_trivia() {
+ while Trivia::can_cast(self.lexemes[idx].kind) {
idx += 1;
}
self.lexemes[idx].range.start()
}
pub fn end_of_token(&self, mut idx: usize) -> TextSize {
- while self.lexemes[idx].kind.is_trivia() {
+ while Trivia::can_cast(self.lexemes[idx].kind) {
idx -= 1;
}
self.lexemes[idx].range.end()
@@ -267,7 +268,11 @@
self.bump();
Some(m.complete(self, SyntaxKind::ERROR))
}
-
+ fn bump_assert(&mut self, kind: SyntaxKind) {
+ self.skip_trivia();
+ assert!(self.at(kind), "expected {:?}", kind);
+ self.bump_remap(self.current());
+ }
fn bump(&mut self) {
self.skip_trivia();
self.bump_remap(self.current());
@@ -314,7 +319,7 @@
while self
.lexemes
.get(offset)
- .map(|l| l.kind.is_trivia())
+ .map(|l| Trivia::can_cast(l.kind))
.unwrap_or(false)
{
offset += 1;
@@ -324,7 +329,7 @@
while self
.lexemes
.get(offset)
- .map(|l| l.kind.is_trivia())
+ .map(|l| Trivia::can_cast(l.kind))
.unwrap_or(false)
{
offset += 1;
@@ -335,14 +340,10 @@
self.nth(0)
}
fn skip_trivia(&mut self) {
- while self.peek_raw().is_trivia() {
+ while Trivia::can_cast(self.peek_raw()) {
self.offset += 1;
}
}
- fn current_lexeme(&mut self) -> Option<&Lexeme> {
- self.skip_trivia();
- self.lexemes.get(self.offset)
- }
fn peek_raw(&mut self) -> SyntaxKind {
self.lexemes
.get(self.offset)
@@ -516,8 +517,8 @@
} else if p.at(IDENT) {
name(p);
m.complete(p, FIELD_NAME_FIXED);
- } else if p.current().is_string() {
- string(p);
+ } else if Text::can_cast(p.current()) {
+ text(p);
m.complete(p, FIELD_NAME_FIXED);
} else {
p.error_with_recovery_set(TS![;]);
@@ -564,9 +565,8 @@
};
}
fn assertion(p: &mut Parser) {
- assert!(p.at(T![assert]));
let m = p.start();
- p.bump();
+ p.bump_assert(T![assert]);
expr(p).map(|c| c.wrap(p, LHS_EXPR));
if p.at(T![:]) {
p.bump();
@@ -575,10 +575,9 @@
m.complete(p, ASSERTION);
}
fn object(p: &mut Parser) -> CompletedMarker {
- assert!(p.at(T!['{']));
let m_t = p.start();
let m = p.start();
- p.bump();
+ p.bump_assert(T!['{']);
loop {
if p.at(T!['}']) {
@@ -619,9 +618,8 @@
m.complete(p, PARAM);
}
fn params_desc(p: &mut Parser) -> CompletedMarker {
- assert!(p.at(T!['(']));
let m = p.start();
- p.bump();
+ p.bump_assert(T!['(']);
loop {
if p.at(T![')']) {
@@ -640,8 +638,7 @@
}
fn args_desc(p: &mut Parser) {
let m = p.start();
- assert!(p.at(T!['(']));
- p.bump();
+ p.bump_assert(T!['(']);
let started_named = Cell::new(false);
@@ -674,10 +671,9 @@
}
fn array(p: &mut Parser) -> CompletedMarker {
- assert!(p.at(T!['[']));
// Start the list node
let m = p.start();
- p.bump(); // '['
+ p.bump_assert(T!['[']);
// This vec will have at most one element in case of correct input
let mut compspecs = Vec::with_capacity(1);
@@ -795,9 +791,8 @@
m.complete(p, NAME);
}
fn destruct_rest(p: &mut Parser) {
- assert!(p.at(T![...]));
- p.bump();
let m = p.start();
+ p.bump_assert(T![...]);
if p.at(IDENT) {
p.bump()
}
@@ -817,9 +812,8 @@
m.complete(p, DESTRUCT_OBJECT_FIELD);
}
fn obj_local(p: &mut Parser) {
- assert!(p.at(T![local]));
let m = p.start();
- p.bump();
+ p.bump_assert(T![local]);
bind(p);
m.complete(p, OBJ_LOCAL);
}
@@ -903,52 +897,29 @@
m.complete(p, BIND_DESTRUCT)
};
}
-fn string(p: &mut Parser) {
- assert!(p.current().is_string());
- if p.at(STRING_BLOCK) {
- // We use custom lexer, which skips enough bytes, but not returns error
- // Instead we should call lexer again to verify if there is something wrong with string block
- let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(
- &p.current_lexeme().expect("parser is at string block").text
- ));
- // In kinds, string blocks is parsed at least as `|||`
- lexer.bump(3);
- let res = lex_str_block(&mut lexer);
- debug_assert!(lexer.next().is_none(), "str_block is lexed");
- match res {
- Ok(_) => {
- p.bump();
- }
- Err(e) => p.bump_remap(match e {
- StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
- StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
- StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,
- StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
- }),
- }
- } else {
- p.bump();
- }
+fn text(p: &mut Parser) {
+ assert!(Text::can_cast(p.current()));
+ p.bump();
}
fn number(p: &mut Parser) {
- assert!(p.current().is_number());
+ assert!(Number::can_cast(p.current()));
p.bump();
}
fn literal(p: &mut Parser) {
- assert!(p.current().is_literal());
+ assert!(Literal::can_cast(p.current()));
p.bump();
}
fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {
let _e = p.expected_syntax_name("value");
- Some(if p.current().is_literal() {
+ Some(if Literal::can_cast(p.current()) {
let m = p.start();
literal(p);
m.complete(p, EXPR_LITERAL)
- } else if p.current().is_string() {
+ } else if Text::can_cast(p.current()) {
let m = p.start();
- string(p);
+ text(p);
m.complete(p, EXPR_STRING)
- } else if p.current().is_number() {
+ } else if Number::can_cast(p.current()) {
let m = p.start();
number(p);
m.complete(p, EXPR_NUMBER)
@@ -1025,7 +996,7 @@
} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {
let m = p.start();
p.bump();
- string(p);
+ text(p);
m.complete(p, EXPR_IMPORT)
} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {
let op = match p.current() {
@@ -1044,8 +1015,7 @@
let m = p.start();
p.bump();
expr(p);
- assert!(p.at(T![')']));
- p.bump();
+ p.expect(T![')']);
m.complete(p, EXPR_PARENED)
} else {
p.error_with_recovery_set(TS![]);
xtask/src/sourcegen/kinds.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
pub enum TokenKind {
/// May exist in token tree, but never in source code
Meta { grammar_name: String, name: String },
- /// Specific parsing errors may be emitted as this type of kind
+ /// Specific parsing/lexing errors may be emitted as this type of kind
Error {
grammar_name: String,
name: String,
+ /// Is this error returned by lexer directly, or from lex.rs
+ is_lexer_error: bool,
regex: Option<String>,
priority: Option<u32>,
},
@@ -133,13 +135,18 @@
});
$(define_kinds!($into = $($rest)*))?
}};
- ($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
- $into.define_token(TokenKind::Error {
- grammar_name: format!("ERROR_{}!", $name),
- name: format!("ERROR_{}", $name),
- regex: None$(.or(Some($regex.to_owned())))?,
- priority: None$(.or(Some($priority)))?,
- });
+ ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+ {
+ let regex = None$(.or(Some($regex.to_owned())))?;
+ let priority = None$(.or(Some($priority)))?;
+ $into.define_token(TokenKind::Error {
+ grammar_name: format!("ERROR_{}!", $name),
+ name: format!("ERROR_{}", $name),
+ is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+ regex,
+ priority,
+ });
+ }
$(define_kinds!($into = $($rest)*))?
}};
($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
- error("STRING_BLOCK_UNEXPECTED_END");
- error("STRING_BLOCK_MISSING_NEW_LINE");
- error("STRING_BLOCK_MISSING_TERMINATION");
- error("STRING_BLOCK_MISSING_INDENT");
+ error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+ error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+ error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+ error("STRING_BLOCK_MISSING_INDENT", lexer = true);
lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
lit("WHITESPACE") => r"[ \t\n\r]+";
lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";
xtask/src/sourcegen/mod.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -48,20 +48,28 @@
if let Some((special, name)) = classify_special(token) {
match special {
SpecialName::Literal => panic!("literal is not defined: {name}"),
- SpecialName::Meta => kinds.define_token(TokenKind::Meta {
- grammar_name: token.to_owned(),
- name: format!("META_{}", name),
- }),
- SpecialName::Error => kinds.define_token(TokenKind::Error {
- grammar_name: token.to_owned(),
- name: format!("ERROR_{}", name),
- regex: None,
- priority: None,
- }),
+ SpecialName::Meta => {
+ eprintln!("implicit meta: {}", name);
+ kinds.define_token(TokenKind::Meta {
+ grammar_name: token.to_owned(),
+ name: format!("META_{}", name),
+ })
+ }
+ SpecialName::Error => {
+ eprintln!("implicit error: {}", name);
+ kinds.define_token(TokenKind::Error {
+ grammar_name: token.to_owned(),
+ name: format!("ERROR_{}", name),
+ regex: None,
+ priority: None,
+ is_lexer_error: true,
+ })
+ }
};
continue;
};
let name = to_upper_snake_case(token);
+ eprintln!("implicit kw: {}", token);
kinds.define_token(TokenKind::Keyword {
code: token.to_owned(),
name: format!("{name}_KW"),
xtask/src/sourcegen/util.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
}
}
- eprintln!(" {} was not up-to-date, updating\n", file.display());
- if std::env::var("CI").is_ok() {
- eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
- }
+ eprintln!("{} was not up-to-date, updating", file.display());
if let Some(parent) = file.parent() {
let _ = fs::create_dir_all(parent);
}