From 5ad3c0601af8f372a9bbb40a30df5deb27fd3b1e Mon Sep 17 00:00:00 2001 From: Yaroslav Bolyukin Date: Mon, 20 Jun 2022 13:43:51 +0000 Subject: [PATCH] refactor: use grammar to classify tokens --- --- a/cmds/jrsonnet-fmt/src/main.rs +++ b/cmds/jrsonnet-fmt/src/main.rs @@ -5,7 +5,7 @@ nodes::{ ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart, DestructRest, Expr, Field, FieldName, ForSpec, IfSpec, ImportKind, LhsExpr, Literal, - Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, String, + Member, Name, Number, ObjBody, ObjLocal, ParamsDesc, SliceDesc, SourceFile, Text, UnaryOperator, }, AstToken, SyntaxToken, @@ -91,7 +91,7 @@ } } -impl Printable for String { +impl Printable for Text { fn print(&self) -> PrintItems { p!(new: str(&format!("{}", self))) } @@ -168,7 +168,7 @@ FieldName::FieldNameFixed(f) => { if let Some(id) = f.id() { p!(new: {id}) - } else if let Some(str) = f.string() { + } else if let Some(str) = f.text() { p!(new: {str}) } else { p!(new: str("/*missing FieldName*/")) @@ -371,7 +371,7 @@ Expr::ExprIntrinsicThisFile(_) => p!(new: str("$intrinsicThisFile")), Expr::ExprIntrinsicId(_) => p!(new: str("$intrinsicId")), Expr::ExprIntrinsic(i) => p!(new: str("$intrinsic(") {i.name()} str(")")), - Expr::ExprString(s) => p!(new: {s.string()}), + Expr::ExprString(s) => p!(new: {s.text()}), Expr::ExprNumber(n) => p!(new: {n.number()}), Expr::ExprArray(a) => { let mut pi = p!(new: str("[") >i nl); @@ -393,7 +393,7 @@ pi } Expr::ExprImport(v) => { - p!(new: {v.import_kind()} str(" ") {v.string()}) + p!(new: {v.import_kind()} str(" ") {v.text()}) } Expr::ExprVar(n) => p!(new: {n.name()}), Expr::ExprLocal(l) => { --- a/crates/jrsonnet-rowan-parser/jsonnet.ungram +++ b/crates/jrsonnet-rowan-parser/jsonnet.ungram @@ -48,7 +48,7 @@ name:Name ')' ExprString = - String + Text ExprNumber = Number ExprArray = @@ -67,7 +67,7 @@ ']' ExprImport = - ImportKind String + ImportKind Text ImportKind = 'importstr' @@ -217,7 +217,7 @@ FieldNameFixed = id:Name -| String +| Text FieldNameDynamic = '[' Expr @@ -239,16 +239,27 @@ | '$' | 'super' -String = +Text = 'LIT_STRING_DOUBLE!' +| 'ERROR_STRING_DOUBLE_UNTERMINATED!' | 'LIT_STRING_SINGLE!' +| 'ERROR_STRING_SINGLE_UNTERMINATED!' | 'LIT_STRING_DOUBLE_VERBATIM!' +| 'ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED!' | 'LIT_STRING_SINGLE_VERBATIM!' +| 'ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED!' +| 'ERROR_STRING_VERBATIM_MISSING_QUOTES!' | 'LIT_STRING_BLOCK!' +| 'ERROR_STRING_BLOCK_UNEXPECTED_END!' +| 'ERROR_STRING_BLOCK_MISSING_NEW_LINE!' +| 'ERROR_STRING_BLOCK_MISSING_TERMINATION!' +| 'ERROR_STRING_BLOCK_MISSING_INDENT!' Number = 'LIT_FLOAT!' -| 'META_FORCE_ENUM!' +| 'ERROR_FLOAT_JUNK_AFTER_POINT!' +| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT!' +| 'ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN!' ForSpec = 'for' @@ -347,3 +358,12 @@ TrueExpr=Expr FalseExpr=Expr LhsExpr=Expr + +// Trivia - tokens which will be implicitly skipped for parser +Trivia = + 'LIT_WHITESPACE!' +| 'LIT_MULTI_LINE_COMMENT!' +| 'ERROR_COMMENT_TOO_SHORT!' +| 'ERROR_COMMENT_UNTERMINATED!' +| 'LIT_SINGLE_LINE_HASH_COMMENT!' +| 'LIT_SINGLE_LINE_SLASH_COMMENT!' --- a/crates/jrsonnet-rowan-parser/src/classify.rs +++ /dev/null @@ -1,51 +0,0 @@ -use crate::SyntaxKind; - -impl SyntaxKind { - pub fn is_trivia(self) -> bool { - matches!( - self, - Self::WHITESPACE - | Self::MULTI_LINE_COMMENT - | Self::ERROR_COMMENT_TOO_SHORT - | Self::ERROR_COMMENT_UNTERMINATED - | Self::SINGLE_LINE_HASH_COMMENT - | Self::SINGLE_LINE_SLASH_COMMENT - ) - } - pub fn is_string(self) -> bool { - matches!( - self, - Self::STRING_SINGLE - | Self::ERROR_STRING_SINGLE_UNTERMINATED - | Self::STRING_DOUBLE - | Self::ERROR_STRING_DOUBLE_UNTERMINATED - | Self::STRING_SINGLE_VERBATIM - | Self::ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED - | Self::STRING_DOUBLE_VERBATIM - | Self::ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED - | Self::STRING_BLOCK - | Self::ERROR_STRING_BLOCK_UNEXPECTED_END - | Self::ERROR_STRING_BLOCK_MISSING_NEW_LINE - | Self::ERROR_STRING_BLOCK_MISSING_TERMINATION - | Self::ERROR_STRING_BLOCK_MISSING_INDENT - ) - } - pub fn is_number(self) -> bool { - matches!( - self, - Self::FLOAT - | Self::ERROR_FLOAT_JUNK_AFTER_POINT - | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT - | Self::ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN - ) - } - pub fn is_literal(self) -> bool { - matches!( - self, - Self::NULL_KW - | Self::TRUE_KW | Self::FALSE_KW - | Self::SELF_KW | Self::DOLLAR - | Self::SUPER_KW - ) - } -} --- a/crates/jrsonnet-rowan-parser/src/event.rs +++ b/crates/jrsonnet-rowan-parser/src/event.rs @@ -4,8 +4,9 @@ use crate::{ lex::Lexeme, + nodes::Trivia, parser::{Parse, SyntaxError}, - JsonnetLanguage, SyntaxKind, + AstToken, JsonnetLanguage, SyntaxKind, }; #[derive(Clone, Debug, PartialEq, Eq)] @@ -144,7 +145,7 @@ } fn skip_whitespace(&mut self) { while let Some(lexeme) = self.lexemes.get(self.offset) { - if !lexeme.kind.is_trivia() { + if !Trivia::can_cast(lexeme.kind) { break; } --- a/crates/jrsonnet-rowan-parser/src/generated/nodes.rs +++ b/crates/jrsonnet-rowan-parser/src/generated/nodes.rs @@ -255,7 +255,7 @@ pub(crate) syntax: SyntaxNode, } impl ExprString { - pub fn string(&self) -> Option { + pub fn text(&self) -> Option { support::token_child(&self.syntax) } } @@ -332,7 +332,7 @@ pub fn import_kind(&self) -> Option { support::token_child(&self.syntax) } - pub fn string(&self) -> Option { + pub fn text(&self) -> Option { support::token_child(&self.syntax) } } @@ -692,7 +692,7 @@ pub fn id(&self) -> Option { support::child(&self.syntax) } - pub fn string(&self) -> Option { + pub fn text(&self) -> Option { support::token_child(&self.syntax) } } @@ -1038,18 +1038,27 @@ } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct String { +pub struct Text { syntax: SyntaxToken, - kind: StringKind, + kind: TextKind, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum StringKind { +pub enum TextKind { StringDouble, + ErrorStringDoubleUnterminated, StringSingle, + ErrorStringSingleUnterminated, StringDoubleVerbatim, + ErrorStringDoubleVerbatimUnterminated, StringSingleVerbatim, + ErrorStringSingleVerbatimUnterminated, + ErrorStringVerbatimMissingQuotes, StringBlock, + ErrorStringBlockUnexpectedEnd, + ErrorStringBlockMissingNewLine, + ErrorStringBlockMissingTermination, + ErrorStringBlockMissingIndent, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -1061,7 +1070,9 @@ #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NumberKind { Float, - MetaForceEnum, + ErrorFloatJunkAfterPoint, + ErrorFloatJunkAfterExponent, + ErrorFloatJunkAfterExponentSign, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -1089,6 +1100,22 @@ Coloncolon, Colon, } + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Trivia { + syntax: SyntaxToken, + kind: TriviaKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum TriviaKind { + Whitespace, + MultiLineComment, + ErrorCommentTooShort, + ErrorCommentUnterminated, + SingleLineHashComment, + SingleLineSlashComment, +} impl AstNode for SourceFile { fn can_cast(kind: SyntaxKind) -> bool { kind == SOURCE_FILE @@ -2677,39 +2704,84 @@ std::fmt::Display::fmt(self.syntax(), f) } } -impl AstToken for String { +impl AstToken for Text { fn can_cast(kind: SyntaxKind) -> bool { match kind { STRING_DOUBLE + | ERROR_STRING_DOUBLE_UNTERMINATED | STRING_SINGLE + | ERROR_STRING_SINGLE_UNTERMINATED | STRING_DOUBLE_VERBATIM + | ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED | STRING_SINGLE_VERBATIM - | STRING_BLOCK => true, + | ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED + | ERROR_STRING_VERBATIM_MISSING_QUOTES + | STRING_BLOCK + | ERROR_STRING_BLOCK_UNEXPECTED_END + | ERROR_STRING_BLOCK_MISSING_NEW_LINE + | ERROR_STRING_BLOCK_MISSING_TERMINATION + | ERROR_STRING_BLOCK_MISSING_INDENT => true, _ => false, } } fn cast(syntax: SyntaxToken) -> Option { let res = match syntax.kind() { - STRING_DOUBLE => String { + STRING_DOUBLE => Text { syntax, - kind: StringKind::StringDouble, + kind: TextKind::StringDouble, }, - STRING_SINGLE => String { + ERROR_STRING_DOUBLE_UNTERMINATED => Text { syntax, - kind: StringKind::StringSingle, + kind: TextKind::ErrorStringDoubleUnterminated, }, - STRING_DOUBLE_VERBATIM => String { + STRING_SINGLE => Text { syntax, - kind: StringKind::StringDoubleVerbatim, + kind: TextKind::StringSingle, }, - STRING_SINGLE_VERBATIM => String { + ERROR_STRING_SINGLE_UNTERMINATED => Text { syntax, - kind: StringKind::StringSingleVerbatim, + kind: TextKind::ErrorStringSingleUnterminated, }, - STRING_BLOCK => String { + STRING_DOUBLE_VERBATIM => Text { syntax, - kind: StringKind::StringBlock, + kind: TextKind::StringDoubleVerbatim, + }, + ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED => Text { + syntax, + kind: TextKind::ErrorStringDoubleVerbatimUnterminated, + }, + STRING_SINGLE_VERBATIM => Text { + syntax, + kind: TextKind::StringSingleVerbatim, }, + ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED => Text { + syntax, + kind: TextKind::ErrorStringSingleVerbatimUnterminated, + }, + ERROR_STRING_VERBATIM_MISSING_QUOTES => Text { + syntax, + kind: TextKind::ErrorStringVerbatimMissingQuotes, + }, + STRING_BLOCK => Text { + syntax, + kind: TextKind::StringBlock, + }, + ERROR_STRING_BLOCK_UNEXPECTED_END => Text { + syntax, + kind: TextKind::ErrorStringBlockUnexpectedEnd, + }, + ERROR_STRING_BLOCK_MISSING_NEW_LINE => Text { + syntax, + kind: TextKind::ErrorStringBlockMissingNewLine, + }, + ERROR_STRING_BLOCK_MISSING_TERMINATION => Text { + syntax, + kind: TextKind::ErrorStringBlockMissingTermination, + }, + ERROR_STRING_BLOCK_MISSING_INDENT => Text { + syntax, + kind: TextKind::ErrorStringBlockMissingIndent, + }, _ => return None, }; Some(res) @@ -2718,12 +2790,12 @@ &self.syntax } } -impl String { - pub fn kind(&self) -> StringKind { +impl Text { + pub fn kind(&self) -> TextKind { self.kind } } -impl std::fmt::Display for String { +impl std::fmt::Display for Text { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } @@ -2731,7 +2803,10 @@ impl AstToken for Number { fn can_cast(kind: SyntaxKind) -> bool { match kind { - FLOAT | META_FORCE_ENUM => true, + FLOAT + | ERROR_FLOAT_JUNK_AFTER_POINT + | ERROR_FLOAT_JUNK_AFTER_EXPONENT + | ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => true, _ => false, } } @@ -2741,10 +2816,18 @@ syntax, kind: NumberKind::Float, }, - META_FORCE_ENUM => Number { + ERROR_FLOAT_JUNK_AFTER_POINT => Number { + syntax, + kind: NumberKind::ErrorFloatJunkAfterPoint, + }, + ERROR_FLOAT_JUNK_AFTER_EXPONENT => Number { syntax, - kind: NumberKind::MetaForceEnum, + kind: NumberKind::ErrorFloatJunkAfterExponent, }, + ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN => Number { + syntax, + kind: NumberKind::ErrorFloatJunkAfterExponentSign, + }, _ => return None, }; Some(res) @@ -2841,6 +2924,62 @@ std::fmt::Display::fmt(self.syntax(), f) } } +impl AstToken for Trivia { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + WHITESPACE + | MULTI_LINE_COMMENT + | ERROR_COMMENT_TOO_SHORT + | ERROR_COMMENT_UNTERMINATED + | SINGLE_LINE_HASH_COMMENT + | SINGLE_LINE_SLASH_COMMENT => true, + _ => false, + } + } + fn cast(syntax: SyntaxToken) -> Option { + let res = match syntax.kind() { + WHITESPACE => Trivia { + syntax, + kind: TriviaKind::Whitespace, + }, + MULTI_LINE_COMMENT => Trivia { + syntax, + kind: TriviaKind::MultiLineComment, + }, + ERROR_COMMENT_TOO_SHORT => Trivia { + syntax, + kind: TriviaKind::ErrorCommentTooShort, + }, + ERROR_COMMENT_UNTERMINATED => Trivia { + syntax, + kind: TriviaKind::ErrorCommentUnterminated, + }, + SINGLE_LINE_HASH_COMMENT => Trivia { + syntax, + kind: TriviaKind::SingleLineHashComment, + }, + SINGLE_LINE_SLASH_COMMENT => Trivia { + syntax, + kind: TriviaKind::SingleLineSlashComment, + }, + _ => return None, + }; + Some(res) + } + fn syntax(&self) -> &SyntaxToken { + &self.syntax + } +} +impl Trivia { + pub fn kind(&self) -> TriviaKind { + self.kind + } +} +impl std::fmt::Display for Trivia { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for Expr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) --- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs +++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs @@ -174,7 +174,6 @@ SELF_KW, #[token("super")] SUPER_KW, - META_FORCE_ENUM, #[token("for")] FOR_KW, #[token("assert")] @@ -253,10 +252,11 @@ BINARY_OPERATOR, UNARY_OPERATOR, LITERAL, - STRING, + TEXT, NUMBER, IMPORT_KIND, VISIBILITY, + TRIVIA, #[doc(hidden)] __LAST, } @@ -277,8 +277,8 @@ pub fn is_enum(self) -> bool { match self { EXPR | OBJ_BODY | COMP_SPEC | BIND | MEMBER | FIELD | FIELD_NAME | DESTRUCT - | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | STRING - | NUMBER | IMPORT_KIND | VISIBILITY => true, + | DESTRUCT_ARRAY_PART | BINARY_OPERATOR | UNARY_OPERATOR | LITERAL | TEXT | NUMBER + | IMPORT_KIND | VISIBILITY | TRIVIA => true, _ => false, } } --- a/crates/jrsonnet-rowan-parser/src/lex.rs +++ b/crates/jrsonnet-rowan-parser/src/lex.rs @@ -4,7 +4,10 @@ use logos::Logos; use rowan::{TextRange, TextSize}; -use crate::SyntaxKind; +use crate::{ + string_block::{lex_str_block, StringBlockError}, + SyntaxKind, +}; pub struct Lexer<'a> { inner: logos::Lexer<'a, SyntaxKind>, @@ -22,9 +25,34 @@ type Item = Lexeme<'a>; fn next(&mut self) -> Option { - let kind = self.inner.next()?; + use SyntaxKind::*; + + let mut kind = self.inner.next()?; let text = self.inner.slice(); + if kind == STRING_BLOCK { + // We use custom lexer, which skips enough bytes, but not returns error + // Instead we should call lexer again to verify if there is something wrong with string block + let mut lexer = logos::Lexer::::new(text); + // In kinds, string blocks is parsed at least as `|||` + lexer.bump(3); + let res = lex_str_block(&mut lexer); + debug_assert!(lexer.next().is_none(), "str_block is lexed"); + match res { + Ok(_) => {} + Err(e) => { + kind = match e { + StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END, + StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE, + StringBlockError::MissingTermination => { + ERROR_STRING_BLOCK_MISSING_TERMINATION + } + StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT, + } + } + } + } + Some(Self::Item { kind, text, --- a/crates/jrsonnet-rowan-parser/src/lib.rs +++ b/crates/jrsonnet-rowan-parser/src/lib.rs @@ -2,7 +2,6 @@ mod ast; mod binary; -mod classify; mod event; mod generated; mod language; --- a/crates/jrsonnet-rowan-parser/src/marker.rs +++ b/crates/jrsonnet-rowan-parser/src/marker.rs @@ -44,10 +44,10 @@ !kind.is_enum(), "{kind:?} is a enum kind, you should use variant kinds instead" ); - // TODO: is_parser should return true if enum variant has #[regex]/#[token] over it + // TODO: is_lexer should return true if enum variant has #[regex]/#[token] over it, or it is defined as lexer error explicitly // debug_assert!( - // !kind.is_parser(), - // "{kind:?} should be only emitted by parser, not used directly" + // !kind.is_lexer(), + // "{kind:?} should be only emitted by lexer, not used directly" // ); let event_at_pos = &mut p.events[self.start_event_idx]; assert_eq!(*event_at_pos, Event::Pending); --- a/crates/jrsonnet-rowan-parser/src/parser.rs +++ b/crates/jrsonnet-rowan-parser/src/parser.rs @@ -8,10 +8,10 @@ event::Event, lex::Lexeme, marker::{AsRange, CompletedMarker, Marker, Ranger}, - string_block::{lex_str_block, StringBlockError}, + nodes::{Literal, Number, Text, Trivia}, token_set::SyntaxKindSet, unary::UnaryOperator, - SyntaxKind, + AstToken, SyntaxKind, SyntaxKind::*, SyntaxNode, T, TS, }; @@ -36,6 +36,7 @@ } pub struct Parser<'i> { + // TODO: remove all trivia before feeding to parser? lexemes: &'i [Lexeme<'i>], pub offset: usize, pub events: Vec, @@ -191,7 +192,7 @@ while self .lexemes .get(previous_token_idx) - .map_or(false, |l| l.kind.is_trivia()) + .map_or(false, |l| Trivia::can_cast(l.kind)) && previous_token_idx != 0 { previous_token_idx -= 1; @@ -200,13 +201,13 @@ Some(self.lexemes[previous_token_idx]) } pub fn start_of_token(&self, mut idx: usize) -> TextSize { - while self.lexemes[idx].kind.is_trivia() { + while Trivia::can_cast(self.lexemes[idx].kind) { idx += 1; } self.lexemes[idx].range.start() } pub fn end_of_token(&self, mut idx: usize) -> TextSize { - while self.lexemes[idx].kind.is_trivia() { + while Trivia::can_cast(self.lexemes[idx].kind) { idx -= 1; } self.lexemes[idx].range.end() @@ -267,7 +268,11 @@ self.bump(); Some(m.complete(self, SyntaxKind::ERROR)) } - + fn bump_assert(&mut self, kind: SyntaxKind) { + self.skip_trivia(); + assert!(self.at(kind), "expected {:?}", kind); + self.bump_remap(self.current()); + } fn bump(&mut self) { self.skip_trivia(); self.bump_remap(self.current()); @@ -314,7 +319,7 @@ while self .lexemes .get(offset) - .map(|l| l.kind.is_trivia()) + .map(|l| Trivia::can_cast(l.kind)) .unwrap_or(false) { offset += 1; @@ -324,7 +329,7 @@ while self .lexemes .get(offset) - .map(|l| l.kind.is_trivia()) + .map(|l| Trivia::can_cast(l.kind)) .unwrap_or(false) { offset += 1; @@ -335,14 +340,10 @@ self.nth(0) } fn skip_trivia(&mut self) { - while self.peek_raw().is_trivia() { + while Trivia::can_cast(self.peek_raw()) { self.offset += 1; } } - fn current_lexeme(&mut self) -> Option<&Lexeme> { - self.skip_trivia(); - self.lexemes.get(self.offset) - } fn peek_raw(&mut self) -> SyntaxKind { self.lexemes .get(self.offset) @@ -516,8 +517,8 @@ } else if p.at(IDENT) { name(p); m.complete(p, FIELD_NAME_FIXED); - } else if p.current().is_string() { - string(p); + } else if Text::can_cast(p.current()) { + text(p); m.complete(p, FIELD_NAME_FIXED); } else { p.error_with_recovery_set(TS![;]); @@ -564,9 +565,8 @@ }; } fn assertion(p: &mut Parser) { - assert!(p.at(T![assert])); let m = p.start(); - p.bump(); + p.bump_assert(T![assert]); expr(p).map(|c| c.wrap(p, LHS_EXPR)); if p.at(T![:]) { p.bump(); @@ -575,10 +575,9 @@ m.complete(p, ASSERTION); } fn object(p: &mut Parser) -> CompletedMarker { - assert!(p.at(T!['{'])); let m_t = p.start(); let m = p.start(); - p.bump(); + p.bump_assert(T!['{']); loop { if p.at(T!['}']) { @@ -619,9 +618,8 @@ m.complete(p, PARAM); } fn params_desc(p: &mut Parser) -> CompletedMarker { - assert!(p.at(T!['('])); let m = p.start(); - p.bump(); + p.bump_assert(T!['(']); loop { if p.at(T![')']) { @@ -640,8 +638,7 @@ } fn args_desc(p: &mut Parser) { let m = p.start(); - assert!(p.at(T!['('])); - p.bump(); + p.bump_assert(T!['(']); let started_named = Cell::new(false); @@ -674,10 +671,9 @@ } fn array(p: &mut Parser) -> CompletedMarker { - assert!(p.at(T!['['])); // Start the list node let m = p.start(); - p.bump(); // '[' + p.bump_assert(T!['[']); // This vec will have at most one element in case of correct input let mut compspecs = Vec::with_capacity(1); @@ -795,9 +791,8 @@ m.complete(p, NAME); } fn destruct_rest(p: &mut Parser) { - assert!(p.at(T![...])); - p.bump(); let m = p.start(); + p.bump_assert(T![...]); if p.at(IDENT) { p.bump() } @@ -817,9 +812,8 @@ m.complete(p, DESTRUCT_OBJECT_FIELD); } fn obj_local(p: &mut Parser) { - assert!(p.at(T![local])); let m = p.start(); - p.bump(); + p.bump_assert(T![local]); bind(p); m.complete(p, OBJ_LOCAL); } @@ -903,52 +897,29 @@ m.complete(p, BIND_DESTRUCT) }; } -fn string(p: &mut Parser) { - assert!(p.current().is_string()); - if p.at(STRING_BLOCK) { - // We use custom lexer, which skips enough bytes, but not returns error - // Instead we should call lexer again to verify if there is something wrong with string block - let mut lexer = logos::Lexer::::new(dbg!( - &p.current_lexeme().expect("parser is at string block").text - )); - // In kinds, string blocks is parsed at least as `|||` - lexer.bump(3); - let res = lex_str_block(&mut lexer); - debug_assert!(lexer.next().is_none(), "str_block is lexed"); - match res { - Ok(_) => { - p.bump(); - } - Err(e) => p.bump_remap(match e { - StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END, - StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE, - StringBlockError::MissingTermination => ERROR_STRING_BLOCK_MISSING_TERMINATION, - StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT, - }), - } - } else { - p.bump(); - } +fn text(p: &mut Parser) { + assert!(Text::can_cast(p.current())); + p.bump(); } fn number(p: &mut Parser) { - assert!(p.current().is_number()); + assert!(Number::can_cast(p.current())); p.bump(); } fn literal(p: &mut Parser) { - assert!(p.current().is_literal()); + assert!(Literal::can_cast(p.current())); p.bump(); } fn lhs_basic(p: &mut Parser) -> Option { let _e = p.expected_syntax_name("value"); - Some(if p.current().is_literal() { + Some(if Literal::can_cast(p.current()) { let m = p.start(); literal(p); m.complete(p, EXPR_LITERAL) - } else if p.current().is_string() { + } else if Text::can_cast(p.current()) { let m = p.start(); - string(p); + text(p); m.complete(p, EXPR_STRING) - } else if p.current().is_number() { + } else if Number::can_cast(p.current()) { let m = p.start(); number(p); m.complete(p, EXPR_NUMBER) @@ -1025,7 +996,7 @@ } else if p.at(T![import]) || p.at(T![importstr]) || p.at(T![importbin]) { let m = p.start(); p.bump(); - string(p); + text(p); m.complete(p, EXPR_IMPORT) } else if p.at(T![-]) || p.at(T![!]) || p.at(T![~]) { let op = match p.current() { @@ -1044,8 +1015,7 @@ let m = p.start(); p.bump(); expr(p); - assert!(p.at(T![')'])); - p.bump(); + p.expect(T![')']); m.complete(p, EXPR_PARENED) } else { p.error_with_recovery_set(TS![]); --- a/xtask/src/sourcegen/kinds.rs +++ b/xtask/src/sourcegen/kinds.rs @@ -10,10 +10,12 @@ pub enum TokenKind { /// May exist in token tree, but never in source code Meta { grammar_name: String, name: String }, - /// Specific parsing errors may be emitted as this type of kind + /// Specific parsing/lexing errors may be emitted as this type of kind Error { grammar_name: String, name: String, + /// Is this error returned by lexer directly, or from lex.rs + is_lexer_error: bool, regex: Option, priority: Option, }, @@ -133,13 +135,18 @@ }); $(define_kinds!($into = $($rest)*))? }}; - ($into:ident = error($name:literal$(, priority = $priority:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{ - $into.define_token(TokenKind::Error { - grammar_name: format!("ERROR_{}!", $name), - name: format!("ERROR_{}", $name), - regex: None$(.or(Some($regex.to_owned())))?, - priority: None$(.or(Some($priority)))?, - }); + ($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{ + { + let regex = None$(.or(Some($regex.to_owned())))?; + let priority = None$(.or(Some($priority)))?; + $into.define_token(TokenKind::Error { + grammar_name: format!("ERROR_{}!", $name), + name: format!("ERROR_{}", $name), + is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(), + regex, + priority, + }); + } $(define_kinds!($into = $($rest)*))? }}; ($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{ @@ -258,10 +265,10 @@ error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*"; error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+"; lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test"; - error("STRING_BLOCK_UNEXPECTED_END"); - error("STRING_BLOCK_MISSING_NEW_LINE"); - error("STRING_BLOCK_MISSING_TERMINATION"); - error("STRING_BLOCK_MISSING_INDENT"); + error("STRING_BLOCK_UNEXPECTED_END", lexer = true); + error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true); + error("STRING_BLOCK_MISSING_TERMINATION", lexer = true); + error("STRING_BLOCK_MISSING_INDENT", lexer = true); lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*"; lit("WHITESPACE") => r"[ \t\n\r]+"; lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*(\r\n|\n)?"; --- a/xtask/src/sourcegen/mod.rs +++ b/xtask/src/sourcegen/mod.rs @@ -48,20 +48,28 @@ if let Some((special, name)) = classify_special(token) { match special { SpecialName::Literal => panic!("literal is not defined: {name}"), - SpecialName::Meta => kinds.define_token(TokenKind::Meta { - grammar_name: token.to_owned(), - name: format!("META_{}", name), - }), - SpecialName::Error => kinds.define_token(TokenKind::Error { - grammar_name: token.to_owned(), - name: format!("ERROR_{}", name), - regex: None, - priority: None, - }), + SpecialName::Meta => { + eprintln!("implicit meta: {}", name); + kinds.define_token(TokenKind::Meta { + grammar_name: token.to_owned(), + name: format!("META_{}", name), + }) + } + SpecialName::Error => { + eprintln!("implicit error: {}", name); + kinds.define_token(TokenKind::Error { + grammar_name: token.to_owned(), + name: format!("ERROR_{}", name), + regex: None, + priority: None, + is_lexer_error: true, + }) + } }; continue; }; let name = to_upper_snake_case(token); + eprintln!("implicit kw: {}", token); kinds.define_token(TokenKind::Keyword { code: token.to_owned(), name: format!("{name}_KW"), --- a/xtask/src/sourcegen/util.rs +++ b/xtask/src/sourcegen/util.rs @@ -13,10 +13,7 @@ } } - eprintln!(" {} was not up-to-date, updating\n", file.display()); - if std::env::var("CI").is_ok() { - eprintln!("NOTE: run `cargo xtask` locally and commit the updated files\n"); - } + eprintln!("{} was not up-to-date, updating", file.display()); if let Some(parent) = file.parent() { let _ = fs::create_dir_all(parent); } -- gitstuff