difftreelog
refactor use grammar to classify tokens
in: master
13 files changed
cmds/jrsonnet-fmt/src/main.rsdiffbeforeafterboth--- a/cmds/jrsonnet-fmt/src/main.rs
+++ b/cmds/jrsonnet-fmt/src/main.rs
@@ -5,7 +5,7 @@
nodes::{
ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal,
- Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String,
+ Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text,
UnaryOperator,
},
AstToken, SyntaxToken,
@@ -91,7 +91,7 @@
}
}
-impl Printable for String {
+impl Printable for Text {
fn print(&self) -> PrintItems {
p!(new: str(&format!("{}", self)))
}
@@ -168,7 +168,7 @@
FieldName::FieldNameFixed(f) => {
if let Some(id) = f.id() {
p!(new: {id})
- } else if let Some(str) = f.string() {
+ } else if let Some(str) = f.text() {
p!(new: {str})
} else {
p!(new: str("/*missing FieldName*/"))
@@ -371,7 +371,7 @@
Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")),
Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")),
Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")),
- Expr::ExprString(s) => p!(new: {s.string()}),
+ Expr::ExprString(s) => p!(new: {s.text()}),
Expr::ExprNumber(n) => p!(new: {n.number()}),
Expr::ExprArray(a) => {
let mut pi = p!(new: str("[") >i nl);
@@ -393,7 +393,7 @@
pi
}
Expr::ExprImport(v) => {
- p!(new: {v.import_kind()} str(" ") {v.string()})
+ p!(new: {v.import_kind()} str(" ") {v.text()})
}
Expr::ExprVar(n) => p!(new: {n.name()}),
Expr::ExprLocal(l) => {
crates/jrsonnet-rowan-parser/jsonnet.ungramdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/jsonnet.ungram
+++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram
@@ -48,7 +48,7 @@
name:Name
')'
ExprString =
- String
+ Text
ExprNumber =
Number
ExprArray =
@@ -67,7 +67,7 @@
']'
ExprImport =
- ImportKind String
+ ImportKind Text
ImportKind =
'importstr'
@@ -217,7 +217,7 @@
FieldNameFixed =
id:Name
-| String
+| Text
FieldNameDynamic =
'['
Expr
@@ -239,16 +239,27 @@
| '$'
| 'super'
-String =
+Text =
'LIT_STRING_DOUBLE!'
+| 'ERROR_STRING_DOUBLE_UNTERMINATED!'
| 'LIT_STRING_SINGLE!'
+| 'ERROR_STRING_SINGLE_UNTERMINATED!'
| 'LIT_STRING_DOUBLE_VERBATIM!'
+| 'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!'
| 'LIT_STRING_SINGLE_VERBATIM!'
+| 'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!'
+| 'ERROR_STRING_VERBATIM_MISSING_QUOTES!'
| 'LIT_STRING_BLOCK!'
+| 'ERROR_STRING_BLOCK_UNEXPECTED_END!'
+| 'ERROR_STRING_BLOCK_MISSING_NEW_LINE!'
+| 'ERROR_STRING_BLOCK_MISSING_TERMINATION!'
+| 'ERROR_STRING_BLOCK_MISSING_INDENT!'
Number =
'LIT_FLOAT!'
-| 'META_FORCE_ENUM!'
+| 'ERROR_FLOAT_JUNK_AFTER_POINT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT!'
+| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!'
ForSpec =
'for'
@@ -347,3 +358,12 @@
TrueExpr=Expr
FalseExpr=Expr
LhsExpr=Expr
+
+// Trivia - tokens which will be implicitly skipped for parser
+Trivia =
+ 'LIT_WHITESPACE!'
+| 'LIT_MULTI_LINE_COMMENT!'
+| 'ERROR_COMMENT_TOO_SHORT!'
+| 'ERROR_COMMENT_UNTERMINATED!'
+| 'LIT_SINGLE_LINE_HASH_COMMENT!'
+| 'LIT_SINGLE_LINE_SLASH_COMMENT!'
crates/jrsonnet-rowan-parser/src/classify.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/classify.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::SyntaxKind;
-
-impl SyntaxKind {
- pub fn is_trivia(self) -> bool {
- matches!(
- self,
- Self::WHITESPACE
- | Self::MULTI_LINE_COMMENT
- | Self::ERROR_COMMENT_TOO_SHORT
- | Self::ERROR_COMMENT_UNTERMINATED
- | Self::SINGLE_LINE_HASH_COMMENT
- | Self::SINGLE_LINE_SLASH_COMMENT
- )
- }
- pub fn is_string(self) -> bool {
- matches!(
- self,
- Self::STRING_SINGLE
- | Self::ERROR_STRING_SINGLE_UNTERMINATED
- | Self::STRING_DOUBLE
- | Self::ERROR_STRING_DOUBLE_UNTERMINATED
- | Self::STRING_SINGLE_VERBATIM
- | Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
- | Self::STRING_DOUBLE_VERBATIM
- | Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
- | Self::STRING_BLOCK
- | Self::ERROR_STRING_BLOCK_UNEXPECTED_END
- | Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE
- | Self::ERROR_STRING_BLOCK_MISSING_TERMINATION
- | Self::ERROR_STRING_BLOCK_MISSING_INDENT
- )
- }
- pub fn is_number(self) -> bool {
- matches!(
- self,
- Self::FLOAT
- | Self::ERROR_FLOAT_JUNK_AFTER_POINT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT
- | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN
- )
- }
- pub fn is_literal(self) -> bool {
- matches!(
- self,
- Self::NULL_KW
- | Self::TRUE_KW | Self::FALSE_KW
- | Self::SELF_KW | Self::DOLLAR
- | Self::SUPER_KW
- )
- }
-}
crates/jrsonnet-rowan-parser/src/event.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/event.rs
+++ b/crates/jrsonnet-rowan-parser/src/event.rs
@@ -4,8 +4,9 @@
use crate::{
lex::Lexeme,
+ nodes::Trivia,
parser::{Parse, SyntaxError},
- JsonnetLanguage, SyntaxKind,
+ AstToken, JsonnetLanguage, SyntaxKind,
};
#[derive(Clone, Debug, PartialEq, Eq)]
@@ -144,7 +145,7 @@
}
fn skip_whitespace(&mut self) {
while let Some(lexeme) = self.lexemes.get(self.offset) {
- if !lexeme.kind.is_trivia() {
+ if !Trivia::can_cast(lexeme.kind) {
break;
}
crates/jrsonnet-rowan-parser/src/generated/nodes.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs
@@ -255,7 +255,7 @@
pub(crate) syntax: SyntaxNode,
}
impl ExprString {
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -332,7 +332,7 @@
pub fn import_kind(&self) -> Option<ImportKind> {
support::token_child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -692,7 +692,7 @@
pub fn id(&self) -> Option<Name> {
support::child(&self.syntax)
}
- pub fn string(&self) -> Option<String> {
+ pub fn text(&self) -> Option<Text> {
support::token_child(&self.syntax)
}
}
@@ -1038,18 +1038,27 @@
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct String {
+pub struct Text {
syntax: SyntaxToken,
- kind: StringKind,
+ kind: TextKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum StringKind {
+pub enum TextKind {
StringDouble,
+ ErrorStringDoubleUnterminated,
StringSingle,
+ ErrorStringSingleUnterminated,
StringDoubleVerbatim,
+ ErrorStringDoubleVerbatimUnterminated,
StringSingleVerbatim,
+ ErrorStringSingleVerbatimUnterminated,
+ ErrorStringVerbatimMissingQuotes,
StringBlock,
+ ErrorStringBlockUnexpectedEnd,
+ ErrorStringBlockMissingNewLine,
+ ErrorStringBlockMissingTermination,
+ ErrorStringBlockMissingIndent,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1061,7 +1070,9 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberKind {
Float,
- MetaForceEnum,
+ ErrorFloatJunkAfterPoint,
+ ErrorFloatJunkAfterExponent,
+ ErrorFloatJunkAfterExponentSign,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -1089,6 +1100,22 @@
Coloncolon,
Colon,
}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Trivia {
+ syntax: SyntaxToken,
+ kind: TriviaKind,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TriviaKind {
+ Whitespace,
+ MultiLineComment,
+ ErrorCommentTooShort,
+ ErrorCommentUnterminated,
+ SingleLineHashComment,
+ SingleLineSlashComment,
+}
impl AstNode for SourceFile {
fn can_cast(kind: SyntaxKind) -> bool {
kind == SOURCE_FILE
@@ -2677,39 +2704,84 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
-impl AstToken for String {
+impl AstToken for Text {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
STRING_DOUBLE
+ | ERROR_STRING_DOUBLE_UNTERMINATED
| STRING_SINGLE
+ | ERROR_STRING_SINGLE_UNTERMINATED
| STRING_DOUBLE_VERBATIM
+ | ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED
| STRING_SINGLE_VERBATIM
- | STRING_BLOCK => true,
+ | ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED
+ | ERROR_STRING_VERBATIM_MISSING_QUOTES
+ | STRING_BLOCK
+ | ERROR_STRING_BLOCK_UNEXPECTED_END
+ | ERROR_STRING_BLOCK_MISSING_NEW_LINE
+ | ERROR_STRING_BLOCK_MISSING_TERMINATION
+ | ERROR_STRING_BLOCK_MISSING_INDENT => true,
_ => false,
}
}
fn cast(syntax: SyntaxToken) -> Option<Self> {
let res = match syntax.kind() {
- STRING_DOUBLE => String {
+ STRING_DOUBLE => Text {
syntax,
- kind: StringKind::StringDouble,
+ kind: TextKind::StringDouble,
},
- STRING_SINGLE => String {
+ ERROR_STRING_DOUBLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingle,
+ kind: TextKind::ErrorStringDoubleUnterminated,
},
- STRING_DOUBLE_VERBATIM => String {
+ STRING_SINGLE => Text {
syntax,
- kind: StringKind::StringDoubleVerbatim,
+ kind: TextKind::StringSingle,
},
- STRING_SINGLE_VERBATIM => String {
+ ERROR_STRING_SINGLE_UNTERMINATED => Text {
syntax,
- kind: StringKind::StringSingleVerbatim,
+ kind: TextKind::ErrorStringSingleUnterminated,
},
- STRING_BLOCK => String {
+ STRING_DOUBLE_VERBATIM => Text {
syntax,
- kind: StringKind::StringBlock,
+ kind: TextKind::StringDoubleVerbatim,
+ },
+ ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringDoubleVerbatimUnterminated,
+ },
+ STRING_SINGLE_VERBATIM => Text {
+ syntax,
+ kind: TextKind::StringSingleVerbatim,
},
+ ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text {
+ syntax,
+ kind: TextKind::ErrorStringSingleVerbatimUnterminated,
+ },
+ ERROR_STRING_VERBATIM_MISSING_QUOTES => Text {
+ syntax,
+ kind: TextKind::ErrorStringVerbatimMissingQuotes,
+ },
+ STRING_BLOCK => Text {
+ syntax,
+ kind: TextKind::StringBlock,
+ },
+ ERROR_STRING_BLOCK_UNEXPECTED_END => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockUnexpectedEnd,
+ },
+ ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingNewLine,
+ },
+ ERROR_STRING_BLOCK_MISSING_TERMINATION => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingTermination,
+ },
+ ERROR_STRING_BLOCK_MISSING_INDENT => Text {
+ syntax,
+ kind: TextKind::ErrorStringBlockMissingIndent,
+ },
_ => return None,
};
Some(res)
@@ -2718,12 +2790,12 @@
&self.syntax
}
}
-impl String {
- pub fn kind(&self) -> StringKind {
+impl Text {
+ pub fn kind(&self) -> TextKind {
self.kind
}
}
-impl std::fmt::Display for String {
+impl std::fmt::Display for Text {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
}
@@ -2731,7 +2803,10 @@
impl AstToken for Number {
fn can_cast(kind: SyntaxKind) -> bool {
match kind {
- FLOAT | META_FORCE_ENUM => true,
+ FLOAT
+ | ERROR_FLOAT_JUNK_AFTER_POINT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT
+ | ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true,
_ => false,
}
}
@@ -2741,10 +2816,18 @@
syntax,
kind: NumberKind::Float,
},
- META_FORCE_ENUM => Number {
+ ERROR_FLOAT_JUNK_AFTER_POINT => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterPoint,
+ },
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number {
syntax,
- kind: NumberKind::MetaForceEnum,
+ kind: NumberKind::ErrorFloatJunkAfterExponent,
},
+ ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number {
+ syntax,
+ kind: NumberKind::ErrorFloatJunkAfterExponentSign,
+ },
_ => return None,
};
Some(res)
@@ -2841,6 +2924,62 @@
std::fmt::Display::fmt(self.syntax(), f)
}
}
+impl AstToken for Trivia {
+ fn can_cast(kind: SyntaxKind) -> bool {
+ match kind {
+ WHITESPACE
+ | MULTI_LINE_COMMENT
+ | ERROR_COMMENT_TOO_SHORT
+ | ERROR_COMMENT_UNTERMINATED
+ | SINGLE_LINE_HASH_COMMENT
+ | SINGLE_LINE_SLASH_COMMENT => true,
+ _ => false,
+ }
+ }
+ fn cast(syntax: SyntaxToken) -> Option<Self> {
+ let res = match syntax.kind() {
+ WHITESPACE => Trivia {
+ syntax,
+ kind: TriviaKind::Whitespace,
+ },
+ MULTI_LINE_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::MultiLineComment,
+ },
+ ERROR_COMMENT_TOO_SHORT => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentTooShort,
+ },
+ ERROR_COMMENT_UNTERMINATED => Trivia {
+ syntax,
+ kind: TriviaKind::ErrorCommentUnterminated,
+ },
+ SINGLE_LINE_HASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineHashComment,
+ },
+ SINGLE_LINE_SLASH_COMMENT => Trivia {
+ syntax,
+ kind: TriviaKind::SingleLineSlashComment,
+ },
+ _ => return None,
+ };
+ Some(res)
+ }
+ fn syntax(&self) -> &SyntaxToken {
+ &self.syntax
+ }
+}
+impl Trivia {
+ pub fn kind(&self) -> TriviaKind {
+ self.kind
+ }
+}
+impl std::fmt::Display for Trivia {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ std::fmt::Display::fmt(self.syntax(), f)
+ }
+}
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self.syntax(), f)
crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -174,7 +174,6 @@
SELF_KW,
#[token("super")]
SUPER_KW,
- META_FORCE_ENUM,
#[token("for")]
FOR_KW,
#[token("assert")]
@@ -253,10 +252,11 @@
BINARY_OPERATOR,
UNARY_OPERATOR,
LITERAL,
- STRING,
+ TEXT,
NUMBER,
IMPORT_KIND,
VISIBILITY,
+ TRIVIA,
#[doc(hidden)]
__LAST,
}
@@ -277,8 +277,8 @@
pub fn is_enum(self) -> bool {
match self {
EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT
- | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING
- | NUMBER | IMPORT_KIND | VISIBILITY => true,
+ | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER
+ | IMPORT_KIND | VISIBILITY | TRIVIA => true,
_ => false,
}
}
crates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -4,7 +4,10 @@
use logos::Logos;
use rowan::{TextRange, TextSize};
-use crate::SyntaxKind;
+use crate::{
+ string_block::{lex_str_block, StringBlockError},
+ SyntaxKind,
+};
pub struct Lexer<'a> {
inner: logos::Lexer<'a, SyntaxKind>,
@@ -22,9 +25,34 @@
type Item = Lexeme<'a>;
fn next(&mut self) -> Option<Self::Item> {
- let kind = self.inner.next()?;
+ use SyntaxKind::*;
+
+ let mut kind = self.inner.next()?;
let text = self.inner.slice();
+ if kind == STRING_BLOCK {
+ // We use custom lexer, which skips enough bytes, but not returns error
+ // Instead we should call lexer again to verify if there is something wrong with string block
+ let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+ // In kinds, string blocks is parsed at least as `|||`
+ lexer.bump(3);
+ let res = lex_str_block(&mut lexer);
+ debug_assert!(lexer.next().is_none(), "str_block is lexed");
+ match res {
+ Ok(_) => {}
+ Err(e) => {
+ kind = match e {
+ StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+ StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+ StringBlockError::MissingTermination => {
+ ERROR_STRING_BLOCK_MISSING_TERMINATION
+ }
+ StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+ }
+ }
+ }
+ }
+
Some(Self::Item {
kind,
text,
crates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
mod ast;
mod binary;
-mod classify;
mod event;
mod generated;
mod language;
crates/jrsonnet-rowan-parser/src/marker.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/marker.rs
+++ b/crates/jrsonnet-rowan-parser/src/marker.rs
@@ -44,10 +44,10 @@
!kind.is_enum(),
"{kind:?} is a enum kind, you should use variant kinds instead"
);
- // TODO: is_parser should return true if enum variant has #[regex]/#[token] over it
+ // TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly
// debug_assert!(
- // !kind.is_parser(),
- // "{kind:?} should be only emitted by parser, not used directly"
+ // !kind.is_lexer(),
+ // "{kind:?} should be only emitted by lexer, not used directly"
// );
let event_at_pos = &mut p.events[self.start_event_idx];
assert_eq!(*event_at_pos, Event::Pending);
crates/jrsonnet-rowan-parser/src/parser.rsdiffbeforeafterboth--- a/crates/jrsonnet-rowan-parser/src/parser.rs
+++ b/crates/jrsonnet-rowan-parser/src/parser.rs
@@ -8,10 +8,10 @@
event::Event,
lex::Lexeme,
marker::{AsRange, CompletedMarker, Marker, Ranger},
- string_block::{lex_str_block, StringBlockError},
+ nodes::{Literal, Number, Text, Trivia},
token_set::SyntaxKindSet,
unary::UnaryOperator,
- SyntaxKind,
+ AstToken, SyntaxKind,
SyntaxKind::*,
SyntaxNode, T, TS,
};
@@ -36,6 +36,7 @@
}
pub struct Parser<'i> {
+ // TODO: remove all trivia before feeding to parser?
lexemes: &'i [Lexeme<'i>],
pub offset: usize,
pub events: Vec<Event>,
@@ -191,7 +192,7 @@
while self
.lexemes
.get(previous_token_idx)
- .map_or(false, |l| l.kind.is_trivia())
+ .map_or(false, |l| Trivia::can_cast(l.kind))
&& previous_token_idx != 0
{
previous_token_idx -= 1;
@@ -200,13 +201,13 @@
Some(self.lexemes[previous_token_idx])
}
pub fn start_of_token(&self, mut idx: usize) -> TextSize {
- while self.lexemes[idx].kind.is_trivia() {
+ while Trivia::can_cast(self.lexemes[idx].kind) {
idx += 1;
}
self.lexemes[idx].range.start()
}
pub fn end_of_token(&self, mut idx: usize) -> TextSize {
- while self.lexemes[idx].kind.is_trivia() {
+ while Trivia::can_cast(self.lexemes[idx].kind) {
idx -= 1;
}
self.lexemes[idx].range.end()
@@ -267,7 +268,11 @@
self.bump();
Some(m.complete(self, SyntaxKind::ERROR))
}
-
+ fn bump_assert(&mut self, kind: SyntaxKind) {
+ self.skip_trivia();
+ assert!(self.at(kind), "expected {:?}", kind);
+ self.bump_remap(self.current());
+ }
fn bump(&mut self) {
self.skip_trivia();
self.bump_remap(self.current());
@@ -314,7 +319,7 @@
while self
.lexemes
.get(offset)
- .map(|l| l.kind.is_trivia())
+ .map(|l| Trivia::can_cast(l.kind))
.unwrap_or(false)
{
offset += 1;
@@ -324,7 +329,7 @@
while self
.lexemes
.get(offset)
- .map(|l| l.kind.is_trivia())
+ .map(|l| Trivia::can_cast(l.kind))
.unwrap_or(false)
{
offset += 1;
@@ -335,14 +340,10 @@
self.nth(0)
}
fn skip_trivia(&mut self) {
- while self.peek_raw().is_trivia() {
+ while Trivia::can_cast(self.peek_raw()) {
self.offset += 1;
}
}
- fn current_lexeme(&mut self) -> Option<&Lexeme> {
- self.skip_trivia();
- self.lexemes.get(self.offset)
- }
fn peek_raw(&mut self) -> SyntaxKind {
self.lexemes
.get(self.offset)
@@ -516,8 +517,8 @@
} else if p.at(IDENT) {
name(p);
m.complete(p, FIELD_NAME_FIXED);
- } else if p.current().is_string() {
- string(p);
+ } else if Text::can_cast(p.current()) {
+ text(p);
m.complete(p, FIELD_NAME_FIXED);
} else {
p.error_with_recovery_set(TS![;]);
@@ -564,9 +565,8 @@
};
}
fn assertion(p: &mut Parser) {
- assert!(p.at(T![assert]));
let m = p.start();
- p.bump();
+ p.bump_assert(T![assert]);
expr(p).map(|c| c.wrap(p, LHS_EXPR));
if p.at(T![:]) {
p.bump();
@@ -575,10 +575,9 @@
m.complete(p, ASSERTION);
}
fn object(p: &mut Parser) -> CompletedMarker {
- assert!(p.at(T!['{']));
let m_t = p.start();
let m = p.start();
- p.bump();
+ p.bump_assert(T!['{']);
loop {
if p.at(T!['}']) {
@@ -619,9 +618,8 @@
m.complete(p, PARAM);
}
fn params_desc(p: &mut Parser) -> CompletedMarker {
- assert!(p.at(T!['(']));
let m = p.start();
- p.bump();
+ p.bump_assert(T!['(']);
loop {
if p.at(T![')']) {
@@ -640,8 +638,7 @@
}
fn args_desc(p: &mut Parser) {
let m = p.start();
- assert!(p.at(T!['(']));
- p.bump();
+ p.bump_assert(T!['(']);
let started_named = Cell::new(false);
@@ -674,10 +671,9 @@
}
fn array(p: &mut Parser) -> CompletedMarker {
- assert!(p.at(T!['[']));
// Start the list node
let m = p.start();
- p.bump(); // '['
+ p.bump_assert(T!['[']);
// This vec will have at most one element in case of correct input
let mut compspecs = Vec::with_capacity(1);
@@ -795,9 +791,8 @@
m.complete(p, NAME);
}
fn destruct_rest(p: &mut Parser) {
- assert!(p.at(T![...]));
- p.bump();
let m = p.start();
+ p.bump_assert(T![...]);
if p.at(IDENT) {
p.bump()
}
@@ -817,9 +812,8 @@
m.complete(p, DESTRUCT_OBJECT_FIELD);
}
fn obj_local(p: &mut Parser) {
- assert!(p.at(T![local]));
let m = p.start();
- p.bump();
+ p.bump_assert(T![local]);
bind(p);
m.complete(p, OBJ_LOCAL);
}
@@ -903,52 +897,29 @@
m.complete(p, BIND_DESTRUCT)
};
}
-fn string(p: &mut Parser) {
- assert!(p.current().is_string());
- if p.at(STRING_BLOCK) {
- // We use custom lexer, which skips enough bytes, but not returns error
- // Instead we should call lexer again to verify if there is something wrong with string block
- let mut lexer = logos::Lexer::<SyntaxKind>::new(dbg!(
- &p.current_lexeme().expect("parser is at string block").text
- ));
- // In kinds, string blocks is parsed at least as `|||`
- lexer.bump(3);
- let res = lex_str_block(&mut lexer);
- debug_assert!(lexer.next().is_none(), "str_block is lexed");
- match res {
- Ok(_) => {
- p.bump();
- }
- Err(e) => p.bump_remap(match e {
- StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
- StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
- StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION,
- StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
- }),
- }
- } else {
- p.bump();
- }
+fn text(p: &mut Parser) {
+ assert!(Text::can_cast(p.current()));
+ p.bump();
}
fn number(p: &mut Parser) {
- assert!(p.current().is_number());
+ assert!(Number::can_cast(p.current()));
p.bump();
}
fn literal(p: &mut Parser) {
- assert!(p.current().is_literal());
+ assert!(Literal::can_cast(p.current()));
p.bump();
}
fn lhs_basic(p: &mut Parser) -> Option<CompletedMarker> {
let _e = p.expected_syntax_name("value");
- Some(if p.current().is_literal() {
+ Some(if Literal::can_cast(p.current()) {
let m = p.start();
literal(p);
m.complete(p, EXPR_LITERAL)
- } else if p.current().is_string() {
+ } else if Text::can_cast(p.current()) {
let m = p.start();
- string(p);
+ text(p);
m.complete(p, EXPR_STRING)
- } else if p.current().is_number() {
+ } else if Number::can_cast(p.current()) {
let m = p.start();
number(p);
m.complete(p, EXPR_NUMBER)
@@ -1025,7 +996,7 @@
} else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) {
let m = p.start();
p.bump();
- string(p);
+ text(p);
m.complete(p, EXPR_IMPORT)
} else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) {
let op = match p.current() {
@@ -1044,8 +1015,7 @@
let m = p.start();
p.bump();
expr(p);
- assert!(p.at(T![')']));
- p.bump();
+ p.expect(T![')']);
m.complete(p, EXPR_PARENED)
} else {
p.error_with_recovery_set(TS![]);
xtask/src/sourcegen/kinds.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/kinds.rs
+++ b/xtask/src/sourcegen/kinds.rs
@@ -10,10 +10,12 @@
pub enum TokenKind {
/// May exist in token tree, but never in source code
Meta { grammar_name: String, name: String },
- /// Specific parsing errors may be emitted as this type of kind
+ /// Specific parsing/lexing errors may be emitted as this type of kind
Error {
grammar_name: String,
name: String,
+ /// Is this error returned by lexer directly, or from lex.rs
+ is_lexer_error: bool,
regex: Option<String>,
priority: Option<u32>,
},
@@ -133,13 +135,18 @@
});
$(define_kinds!($into = $($rest)*))?
}};
- ($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
- $into.define_token(TokenKind::Error {
- grammar_name: format!("ERROR_{}!", $name),
- name: format!("ERROR_{}", $name),
- regex: None$(.or(Some($regex.to_owned())))?,
- priority: None$(.or(Some($priority)))?,
- });
+ ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{
+ {
+ let regex = None$(.or(Some($regex.to_owned())))?;
+ let priority = None$(.or(Some($priority)))?;
+ $into.define_token(TokenKind::Error {
+ grammar_name: format!("ERROR_{}!", $name),
+ name: format!("ERROR_{}", $name),
+ is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),
+ regex,
+ priority,
+ });
+ }
$(define_kinds!($into = $($rest)*))?
}};
($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{
@@ -258,10 +265,10 @@
error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";
error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";
lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";
- error("STRING_BLOCK_UNEXPECTED_END");
- error("STRING_BLOCK_MISSING_NEW_LINE");
- error("STRING_BLOCK_MISSING_TERMINATION");
- error("STRING_BLOCK_MISSING_INDENT");
+ error("STRING_BLOCK_UNEXPECTED_END", lexer = true);
+ error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);
+ error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);
+ error("STRING_BLOCK_MISSING_INDENT", lexer = true);
lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";
lit("WHITESPACE") => r"[ \t\n\r]+";
lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?";
xtask/src/sourcegen/mod.rsdiffbeforeafterboth1use std::path::PathBuf;23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Punct, Spacing, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17 Literal,18 Meta,19 Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22 let name = name.strip_suffix('!')?;23 Some(if let Some(name) = name.strip_prefix("LIT_") {24 (SpecialName::Literal, name)25 } else if let Some(name) = name.strip_prefix("META_") {26 (SpecialName::Meta, name)27 } else if let Some(name) = name.strip_prefix("ERROR_") {28 (SpecialName::Error, name)29 } else {30 return None;31 })32}3334pub fn generate_ungrammar() -> Result<()> {35 let grammar: Grammar = include_str!(concat!(36 env!("CARGO_MANIFEST_DIR"),37 "/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38 ))39 .parse()?;4041 let mut kinds = kinds::jsonnet_kinds();42 let ast = lower(&kinds, &grammar);4344 for token in grammar.tokens() {45 let token = &grammar[token];46 let token = &token.name.clone();47 if !kinds.is_token(token) {48 if let Some((special, name)) = classify_special(token) {49 match special {50 SpecialName::Literal => panic!("literal is not defined: {name}"),51 SpecialName::Meta => kinds.define_token(TokenKind::Meta {52 grammar_name: token.to_owned(),53 name: format!("META_{}", name),54 }),55 SpecialName::Error => kinds.define_token(TokenKind::Error {56 grammar_name: token.to_owned(),57 name: format!("ERROR_{}", name),58 regex: None,59 priority: None,60 }),61 };62 continue;63 };64 let name = to_upper_snake_case(token);65 kinds.define_token(TokenKind::Keyword {66 code: token.to_owned(),67 name: format!("{name}_KW"),68 });69 }70 }71 for node in &ast.nodes {72 let name = to_upper_snake_case(&node.name);73 kinds.define_node(&name);74 }75 for enum_ in &ast.enums {76 let name = to_upper_snake_case(&enum_.name);77 kinds.define_node(&name);78 }79 for token_enum in &ast.token_enums {80 let name = to_upper_snake_case(&token_enum.name);81 kinds.define_node(&name);82 }8384 let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;8586 let nodes = generate_nodes(&kinds, &ast)?;87 ensure_file_contents(88 &PathBuf::from(concat!(89 env!("CARGO_MANIFEST_DIR"),90 "/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",91 )),92 &syntax_kinds,93 )?;94 ensure_file_contents(95 &PathBuf::from(concat!(96 env!("CARGO_MANIFEST_DIR"),97 "/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",98 )),99 &nodes,100 )?;101 Ok(())102}103104fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {105 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);106 let token_kinds = kinds.tokens().map(TokenKind::expand_kind);107108 let keywords = kinds109 .tokens()110 .filter(|k| matches!(k, TokenKind::Keyword { .. }))111 .map(TokenKind::name)112 .map(|n| format_ident!("{n}"));113114 let nodes = kinds115 .nodes116 .iter()117 .map(|name| format_ident!("{}", name))118 .collect::<Vec<_>>();119120 let enums = grammar121 .enums122 .iter()123 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))124 .chain(125 grammar126 .token_enums127 .iter()128 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),129 );130131 let ast = quote! {132 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]133 use logos::Logos;134135 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.136 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]137 #[repr(u16)]138 pub enum SyntaxKind {139 #[doc(hidden)]140 TOMBSTONE,141 #[doc(hidden)]142 EOF,143 #(#token_kinds,)*144 #[error]145 ERROR,146 #(#nodes,)*147 #[doc(hidden)]148 __LAST,149 }150 use self::SyntaxKind::*;151152 impl SyntaxKind {153 pub fn is_keyword(self) -> bool {154 match self {155 #(#keywords)|* => true,156 _ => false,157 }158 }159 pub fn is_enum(self) -> bool {160 match self {161 #(#enums)|* => true,162 _ => false,163 }164 }165166 pub fn from_raw(r: u16) -> Self {167 assert!(r < Self::__LAST as u16);168 unsafe { std::mem::transmute(r) }169 }170 pub fn into_raw(self) -> u16 {171 self as u16172 }173 }174175 #[macro_export]176 macro_rules! T {#(#t_macros);*}177 pub use T;178 };179180 reformat(&ast.to_string())181}182183fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {184 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar185 .nodes186 .iter()187 .map(|node| {188 let name = format_ident!("{}", node.name);189 let kind = format_ident!("{}", to_upper_snake_case(&node.name));190 let traits = node.traits.iter().map(|trait_name| {191 let trait_name = format_ident!("{}", trait_name);192 quote!(impl ast::#trait_name for #name {})193 });194195 let methods = node.fields.iter().map(|field| {196 let method_name = field.method_name(kinds);197 let ty = field.ty();198199 if field.is_many() {200 quote! {201 pub fn #method_name(&self) -> AstChildren<#ty> {202 support::children(&self.syntax)203 }204 }205 } else if let Some(token_kind) = field.token_kind(kinds) {206 quote! {207 pub fn #method_name(&self) -> Option<#ty> {208 support::token(&self.syntax, #token_kind)209 }210 }211 } else if field.is_token_enum(grammar) {212 quote! {213 pub fn #method_name(&self) -> Option<#ty> {214 support::token_child(&self.syntax)215 }216 }217 } else {218 quote! {219 pub fn #method_name(&self) -> Option<#ty> {220 support::child(&self.syntax)221 }222 }223 }224 });225 (226 quote! {227 #[pretty_doc_comment_placeholder_workaround]228 #[derive(Debug, Clone, PartialEq, Eq, Hash)]229 pub struct #name {230 pub(crate) syntax: SyntaxNode,231 }232233 #(#traits)*234235 impl #name {236 #(#methods)*237 }238 },239 quote! {240 impl AstNode for #name {241 fn can_cast(kind: SyntaxKind) -> bool {242 kind == #kind243 }244 fn cast(syntax: SyntaxNode) -> Option<Self> {245 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }246 }247 fn syntax(&self) -> &SyntaxNode { &self.syntax }248 }249 },250 )251 })252 .unzip();253254 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar255 .enums256 .iter()257 .map(|en| {258 let variants: Vec<_> = en259 .variants260 .iter()261 .map(|var| format_ident!("{}", var))262 .collect();263 let name = format_ident!("{}", en.name);264 let kinds: Vec<_> = variants265 .iter()266 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))267 .collect();268 let traits = en.traits.iter().map(|trait_name| {269 let trait_name = format_ident!("{}", trait_name);270 quote!(impl ast::#trait_name for #name {})271 });272273 let ast_node = quote! {274 impl AstNode for #name {275 fn can_cast(kind: SyntaxKind) -> bool {276 match kind {277 #(#kinds)|* => true,278 _ => false,279 }280 }281 fn cast(syntax: SyntaxNode) -> Option<Self> {282 let res = match syntax.kind() {283 #(284 #kinds => #name::#variants(#variants { syntax }),285 )*286 _ => return None,287 };288 Some(res)289 }290 fn syntax(&self) -> &SyntaxNode {291 match self {292 #(293 #name::#variants(it) => &it.syntax,294 )*295 }296 }297 }298 };299300 (301 quote! {302 #[pretty_doc_comment_placeholder_workaround]303 #[derive(Debug, Clone, PartialEq, Eq, Hash)]304 pub enum #name {305 #(#variants(#variants),)*306 }307308 #(#traits)*309 },310 quote! {311 #(312 impl From<#variants> for #name {313 fn from(node: #variants) -> #name {314 #name::#variants(node)315 }316 }317 )*318 #ast_node319 },320 )321 })322 .unzip();323324 let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar325 .token_enums326 .iter()327 .map(|en| {328 let variants: Vec<_> = en329 .variants330 .iter()331 .map(|token| {332 format_ident!(333 "{}",334 to_pascal_case(kinds.token(token).expect("token exists").name())335 )336 })337 .collect();338 let name = format_ident!("{}", en.name);339 let kind_name = format_ident!("{}Kind", en.name);340 let kinds: Vec<_> = variants341 .iter()342 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))343 .collect();344345 let ast_node = quote! {346 impl AstToken for #name {347 fn can_cast(kind: SyntaxKind) -> bool {348 match kind {349 #(#kinds)|* => true,350 _ => false,351 }352 }353 fn cast(syntax: SyntaxToken) -> Option<Self> {354 let res = match syntax.kind() {355 #(356 #kinds => #name { syntax, kind: #kind_name::#variants },357 )*358 _ => return None,359 };360 Some(res)361 }362 fn syntax(&self) -> &SyntaxToken {363 &self.syntax364 }365 }366 };367368 (369 quote! {370 #[pretty_doc_comment_placeholder_workaround]371 #[derive(Debug, Clone, PartialEq, Eq, Hash)]372 pub struct #name { syntax: SyntaxToken, kind: #kind_name }373374 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]375 pub enum #kind_name {376 #(#variants,)*377 }378 },379 quote! {380 #ast_node381382 impl #name {383 pub fn kind(&self) -> #kind_name {384 self.kind385 }386 }387388 impl std::fmt::Display for #name {389 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {390 std::fmt::Display::fmt(self.syntax(), f)391 }392 }393 },394 )395 })396 .unzip();397398 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar399 .nodes400 .iter()401 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))402 .into_group_map()403 .into_iter()404 .sorted_by_key(|(k, _)| *k)405 .map(|(trait_name, nodes)| {406 let name = format_ident!("Any{}", trait_name);407 let trait_name = format_ident!("{}", trait_name);408 let kinds: Vec<_> = nodes409 .iter()410 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))411 .collect();412413 (414 quote! {415 #[pretty_doc_comment_placeholder_workaround]416 #[derive(Debug, Clone, PartialEq, Eq, Hash)]417 pub struct #name {418 pub(crate) syntax: SyntaxNode,419 }420 impl ast::#trait_name for #name {}421 },422 quote! {423 impl #name {424 #[inline]425 pub fn new<T: ast::#trait_name>(node: T) -> #name {426 #name {427 syntax: node.syntax().clone()428 }429 }430 }431 impl AstNode for #name {432 fn can_cast(kind: SyntaxKind) -> bool {433 match kind {434 #(#kinds)|* => true,435 _ => false,436 }437 }438 fn cast(syntax: SyntaxNode) -> Option<Self> {439 Self::can_cast(syntax.kind()).then(|| #name { syntax })440 }441 fn syntax(&self) -> &SyntaxNode {442 &self.syntax443 }444 }445 },446 )447 })448 .unzip();449450 let enum_names = grammar.enums.iter().map(|it| &it.name);451 let node_names = grammar.nodes.iter().map(|it| &it.name);452453 let display_impls = enum_names454 .chain(node_names.clone())455 .map(|it| format_ident!("{}", it))456 .map(|name| {457 quote! {458 impl std::fmt::Display for #name {459 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {460 std::fmt::Display::fmt(self.syntax(), f)461 }462 }463 }464 });465466 let ast = quote! {467 #![allow(non_snake_case, clippy::match_like_matches_macro)]468469 use crate::{470 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},471 ast::{AstNode, AstToken, AstChildren, support},472 T,473 };474475 #(#node_defs)*476 #(#enum_defs)*477 #(#token_enum_defs)*478 #(#any_node_defs)*479 #(#node_boilerplate_impls)*480 #(#enum_boilerplate_impls)*481 #(#token_enum_boilerplate_impls)*482 #(#any_node_boilerplate_impls)*483 #(#display_impls)*484 };485486 let ast = ast.to_string().replace("T ! [", "T![");487488 let mut res = String::with_capacity(ast.len() * 2);489490 let mut docs = grammar491 .nodes492 .iter()493 .map(|it| &it.doc)494 .chain(grammar.enums.iter().map(|it| &it.doc));495496 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {497 res.push_str(chunk);498 if let Some(doc) = docs.next() {499 write_doc_comment(doc, &mut res);500 }501 }502503 let res = reformat(&res)?;504 Ok(res.replace("#[derive", "\n#[derive"))505}506507fn write_doc_comment(contents: &[String], dest: &mut String) {508 use std::fmt::Write;509 for line in contents {510 writeln!(dest, "///{}", line).unwrap();511 }512}513514pub fn escape_token_macro(token: &str) -> TokenStream {515 if "{}[]()$".contains(token) {516 let c = token.chars().next().unwrap();517 quote! { #c }518 } else if token.contains('$') {519 quote! { #token }520 } else {521 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));522 quote! { #(#cs)* }523 }524}1use std::path::PathBuf;23use anyhow::Result;4use ast::{lower, AstSrc};5use itertools::Itertools;6use kinds::{KindsSrc, TokenKind};7use proc_macro2::{Punct, Spacing, TokenStream};8use quote::{format_ident, quote};9use ungrammar::Grammar;10use util::{ensure_file_contents, reformat, to_pascal_case, to_upper_snake_case};1112mod ast;13mod kinds;14mod util;1516enum SpecialName {17 Literal,18 Meta,19 Error,20}21fn classify_special(name: &str) -> Option<(SpecialName, &str)> {22 let name = name.strip_suffix('!')?;23 Some(if let Some(name) = name.strip_prefix("LIT_") {24 (SpecialName::Literal, name)25 } else if let Some(name) = name.strip_prefix("META_") {26 (SpecialName::Meta, name)27 } else if let Some(name) = name.strip_prefix("ERROR_") {28 (SpecialName::Error, name)29 } else {30 return None;31 })32}3334pub fn generate_ungrammar() -> Result<()> {35 let grammar: Grammar = include_str!(concat!(36 env!("CARGO_MANIFEST_DIR"),37 "/../crates/jrsonnet-rowan-parser/jsonnet.ungram"38 ))39 .parse()?;4041 let mut kinds = kinds::jsonnet_kinds();42 let ast = lower(&kinds, &grammar);4344 for token in grammar.tokens() {45 let token = &grammar[token];46 let token = &token.name.clone();47 if !kinds.is_token(token) {48 if let Some((special, name)) = classify_special(token) {49 match special {50 SpecialName::Literal => panic!("literal is not defined: {name}"),51 SpecialName::Meta => {52 eprintln!("implicit meta: {}", name);53 kinds.define_token(TokenKind::Meta {54 grammar_name: token.to_owned(),55 name: format!("META_{}", name),56 })57 }58 SpecialName::Error => {59 eprintln!("implicit error: {}", name);60 kinds.define_token(TokenKind::Error {61 grammar_name: token.to_owned(),62 name: format!("ERROR_{}", name),63 regex: None,64 priority: None,65 is_lexer_error: true,66 })67 }68 };69 continue;70 };71 let name = to_upper_snake_case(token);72 eprintln!("implicit kw: {}", token);73 kinds.define_token(TokenKind::Keyword {74 code: token.to_owned(),75 name: format!("{name}_KW"),76 });77 }78 }79 for node in &ast.nodes {80 let name = to_upper_snake_case(&node.name);81 kinds.define_node(&name);82 }83 for enum_ in &ast.enums {84 let name = to_upper_snake_case(&enum_.name);85 kinds.define_node(&name);86 }87 for token_enum in &ast.token_enums {88 let name = to_upper_snake_case(&token_enum.name);89 kinds.define_node(&name);90 }9192 let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;9394 let nodes = generate_nodes(&kinds, &ast)?;95 ensure_file_contents(96 &PathBuf::from(concat!(97 env!("CARGO_MANIFEST_DIR"),98 "/../crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs",99 )),100 &syntax_kinds,101 )?;102 ensure_file_contents(103 &PathBuf::from(concat!(104 env!("CARGO_MANIFEST_DIR"),105 "/../crates/jrsonnet-rowan-parser/src/generated/nodes.rs",106 )),107 &nodes,108 )?;109 Ok(())110}111112fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {113 let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);114 let token_kinds = kinds.tokens().map(TokenKind::expand_kind);115116 let keywords = kinds117 .tokens()118 .filter(|k| matches!(k, TokenKind::Keyword { .. }))119 .map(TokenKind::name)120 .map(|n| format_ident!("{n}"));121122 let nodes = kinds123 .nodes124 .iter()125 .map(|name| format_ident!("{}", name))126 .collect::<Vec<_>>();127128 let enums = grammar129 .enums130 .iter()131 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name)))132 .chain(133 grammar134 .token_enums135 .iter()136 .map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),137 );138139 let ast = quote! {140 #![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]141 use logos::Logos;142143 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.144 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]145 #[repr(u16)]146 pub enum SyntaxKind {147 #[doc(hidden)]148 TOMBSTONE,149 #[doc(hidden)]150 EOF,151 #(#token_kinds,)*152 #[error]153 ERROR,154 #(#nodes,)*155 #[doc(hidden)]156 __LAST,157 }158 use self::SyntaxKind::*;159160 impl SyntaxKind {161 pub fn is_keyword(self) -> bool {162 match self {163 #(#keywords)|* => true,164 _ => false,165 }166 }167 pub fn is_enum(self) -> bool {168 match self {169 #(#enums)|* => true,170 _ => false,171 }172 }173174 pub fn from_raw(r: u16) -> Self {175 assert!(r < Self::__LAST as u16);176 unsafe { std::mem::transmute(r) }177 }178 pub fn into_raw(self) -> u16 {179 self as u16180 }181 }182183 #[macro_export]184 macro_rules! T {#(#t_macros);*}185 pub use T;186 };187188 reformat(&ast.to_string())189}190191fn generate_nodes(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {192 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar193 .nodes194 .iter()195 .map(|node| {196 let name = format_ident!("{}", node.name);197 let kind = format_ident!("{}", to_upper_snake_case(&node.name));198 let traits = node.traits.iter().map(|trait_name| {199 let trait_name = format_ident!("{}", trait_name);200 quote!(impl ast::#trait_name for #name {})201 });202203 let methods = node.fields.iter().map(|field| {204 let method_name = field.method_name(kinds);205 let ty = field.ty();206207 if field.is_many() {208 quote! {209 pub fn #method_name(&self) -> AstChildren<#ty> {210 support::children(&self.syntax)211 }212 }213 } else if let Some(token_kind) = field.token_kind(kinds) {214 quote! {215 pub fn #method_name(&self) -> Option<#ty> {216 support::token(&self.syntax, #token_kind)217 }218 }219 } else if field.is_token_enum(grammar) {220 quote! {221 pub fn #method_name(&self) -> Option<#ty> {222 support::token_child(&self.syntax)223 }224 }225 } else {226 quote! {227 pub fn #method_name(&self) -> Option<#ty> {228 support::child(&self.syntax)229 }230 }231 }232 });233 (234 quote! {235 #[pretty_doc_comment_placeholder_workaround]236 #[derive(Debug, Clone, PartialEq, Eq, Hash)]237 pub struct #name {238 pub(crate) syntax: SyntaxNode,239 }240241 #(#traits)*242243 impl #name {244 #(#methods)*245 }246 },247 quote! {248 impl AstNode for #name {249 fn can_cast(kind: SyntaxKind) -> bool {250 kind == #kind251 }252 fn cast(syntax: SyntaxNode) -> Option<Self> {253 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }254 }255 fn syntax(&self) -> &SyntaxNode { &self.syntax }256 }257 },258 )259 })260 .unzip();261262 let (enum_defs, enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar263 .enums264 .iter()265 .map(|en| {266 let variants: Vec<_> = en267 .variants268 .iter()269 .map(|var| format_ident!("{}", var))270 .collect();271 let name = format_ident!("{}", en.name);272 let kinds: Vec<_> = variants273 .iter()274 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))275 .collect();276 let traits = en.traits.iter().map(|trait_name| {277 let trait_name = format_ident!("{}", trait_name);278 quote!(impl ast::#trait_name for #name {})279 });280281 let ast_node = quote! {282 impl AstNode for #name {283 fn can_cast(kind: SyntaxKind) -> bool {284 match kind {285 #(#kinds)|* => true,286 _ => false,287 }288 }289 fn cast(syntax: SyntaxNode) -> Option<Self> {290 let res = match syntax.kind() {291 #(292 #kinds => #name::#variants(#variants { syntax }),293 )*294 _ => return None,295 };296 Some(res)297 }298 fn syntax(&self) -> &SyntaxNode {299 match self {300 #(301 #name::#variants(it) => &it.syntax,302 )*303 }304 }305 }306 };307308 (309 quote! {310 #[pretty_doc_comment_placeholder_workaround]311 #[derive(Debug, Clone, PartialEq, Eq, Hash)]312 pub enum #name {313 #(#variants(#variants),)*314 }315316 #(#traits)*317 },318 quote! {319 #(320 impl From<#variants> for #name {321 fn from(node: #variants) -> #name {322 #name::#variants(node)323 }324 }325 )*326 #ast_node327 },328 )329 })330 .unzip();331332 let (token_enum_defs, token_enum_boilerplate_impls): (Vec<_>, Vec<_>) = grammar333 .token_enums334 .iter()335 .map(|en| {336 let variants: Vec<_> = en337 .variants338 .iter()339 .map(|token| {340 format_ident!(341 "{}",342 to_pascal_case(kinds.token(token).expect("token exists").name())343 )344 })345 .collect();346 let name = format_ident!("{}", en.name);347 let kind_name = format_ident!("{}Kind", en.name);348 let kinds: Vec<_> = variants349 .iter()350 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))351 .collect();352353 let ast_node = quote! {354 impl AstToken for #name {355 fn can_cast(kind: SyntaxKind) -> bool {356 match kind {357 #(#kinds)|* => true,358 _ => false,359 }360 }361 fn cast(syntax: SyntaxToken) -> Option<Self> {362 let res = match syntax.kind() {363 #(364 #kinds => #name { syntax, kind: #kind_name::#variants },365 )*366 _ => return None,367 };368 Some(res)369 }370 fn syntax(&self) -> &SyntaxToken {371 &self.syntax372 }373 }374 };375376 (377 quote! {378 #[pretty_doc_comment_placeholder_workaround]379 #[derive(Debug, Clone, PartialEq, Eq, Hash)]380 pub struct #name { syntax: SyntaxToken, kind: #kind_name }381382 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]383 pub enum #kind_name {384 #(#variants,)*385 }386 },387 quote! {388 #ast_node389390 impl #name {391 pub fn kind(&self) -> #kind_name {392 self.kind393 }394 }395396 impl std::fmt::Display for #name {397 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {398 std::fmt::Display::fmt(self.syntax(), f)399 }400 }401 },402 )403 })404 .unzip();405406 let (any_node_defs, any_node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar407 .nodes408 .iter()409 .flat_map(|node| node.traits.iter().map(move |t| (t, node)))410 .into_group_map()411 .into_iter()412 .sorted_by_key(|(k, _)| *k)413 .map(|(trait_name, nodes)| {414 let name = format_ident!("Any{}", trait_name);415 let trait_name = format_ident!("{}", trait_name);416 let kinds: Vec<_> = nodes417 .iter()418 .map(|name| format_ident!("{}", to_upper_snake_case(&name.name.to_string())))419 .collect();420421 (422 quote! {423 #[pretty_doc_comment_placeholder_workaround]424 #[derive(Debug, Clone, PartialEq, Eq, Hash)]425 pub struct #name {426 pub(crate) syntax: SyntaxNode,427 }428 impl ast::#trait_name for #name {}429 },430 quote! {431 impl #name {432 #[inline]433 pub fn new<T: ast::#trait_name>(node: T) -> #name {434 #name {435 syntax: node.syntax().clone()436 }437 }438 }439 impl AstNode for #name {440 fn can_cast(kind: SyntaxKind) -> bool {441 match kind {442 #(#kinds)|* => true,443 _ => false,444 }445 }446 fn cast(syntax: SyntaxNode) -> Option<Self> {447 Self::can_cast(syntax.kind()).then(|| #name { syntax })448 }449 fn syntax(&self) -> &SyntaxNode {450 &self.syntax451 }452 }453 },454 )455 })456 .unzip();457458 let enum_names = grammar.enums.iter().map(|it| &it.name);459 let node_names = grammar.nodes.iter().map(|it| &it.name);460461 let display_impls = enum_names462 .chain(node_names.clone())463 .map(|it| format_ident!("{}", it))464 .map(|name| {465 quote! {466 impl std::fmt::Display for #name {467 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {468 std::fmt::Display::fmt(self.syntax(), f)469 }470 }471 }472 });473474 let ast = quote! {475 #![allow(non_snake_case, clippy::match_like_matches_macro)]476477 use crate::{478 SyntaxNode, SyntaxToken, SyntaxKind::{self, *},479 ast::{AstNode, AstToken, AstChildren, support},480 T,481 };482483 #(#node_defs)*484 #(#enum_defs)*485 #(#token_enum_defs)*486 #(#any_node_defs)*487 #(#node_boilerplate_impls)*488 #(#enum_boilerplate_impls)*489 #(#token_enum_boilerplate_impls)*490 #(#any_node_boilerplate_impls)*491 #(#display_impls)*492 };493494 let ast = ast.to_string().replace("T ! [", "T![");495496 let mut res = String::with_capacity(ast.len() * 2);497498 let mut docs = grammar499 .nodes500 .iter()501 .map(|it| &it.doc)502 .chain(grammar.enums.iter().map(|it| &it.doc));503504 for chunk in ast.split("# [pretty_doc_comment_placeholder_workaround] ") {505 res.push_str(chunk);506 if let Some(doc) = docs.next() {507 write_doc_comment(doc, &mut res);508 }509 }510511 let res = reformat(&res)?;512 Ok(res.replace("#[derive", "\n#[derive"))513}514515fn write_doc_comment(contents: &[String], dest: &mut String) {516 use std::fmt::Write;517 for line in contents {518 writeln!(dest, "///{}", line).unwrap();519 }520}521522pub fn escape_token_macro(token: &str) -> TokenStream {523 if "{}[]()$".contains(token) {524 let c = token.chars().next().unwrap();525 quote! { #c }526 } else if token.contains('$') {527 quote! { #token }528 } else {529 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));530 quote! { #(#cs)* }531 }532}xtask/src/sourcegen/util.rsdiffbeforeafterboth--- a/xtask/src/sourcegen/util.rs
+++ b/xtask/src/sourcegen/util.rs
@@ -13,10 +13,7 @@
}
}
- eprintln!(" {} was not up-to-date, updating\n", file.display());
- if std::env::var("CI").is_ok() {
- eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n");
- }
+ eprintln!("{} was not up-to-date, updating", file.display());
if let Some(parent) = file.parent() {
let _ = fs::create_dir_all(parent);
}