git.delta.rocks / jrsonnet / refs/commits / 93f08ca017e4

difftreelog

refactor split lexer from rowan parser

tzxnlqzsYaroslav Bolyukin2026-03-22parent: #c6d0d24.patch.diff
in: master

15 files changed

modifiedCargo.lockdiffbeforeafterboth
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -716,6 +716,13 @@
 ]
 
 [[package]]
+name = "jrsonnet-lexer"
+version = "0.5.0-pre97"
+dependencies = [
+ "logos",
+]
+
+[[package]]
 name = "jrsonnet-macros"
 version = "0.5.0-pre97"
 dependencies = [
@@ -744,7 +751,7 @@
  "hi-doc",
  "indoc",
  "insta",
- "logos",
+ "jrsonnet-lexer",
  "rowan",
  "strip-ansi-escapes",
  "thiserror",
modifiedCargo.tomldiffbeforeafterboth
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,9 +81,6 @@
 itertools = "0.14.0"
 xshell = "0.2.7"
 
-lsp-server = "0.7.9"
-lsp-types = "0.97.0"
-
 regex = "1.12"
 lru = "0.16.3"
 
addedcrates/jrsonnet-lexer/Cargo.tomldiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-lexer/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "jrsonnet-lexer"
+authors.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+version.workspace = true
+
+[dependencies]
+logos.workspace = true
+
+[lints]
+workspace = true
addedcrates/jrsonnet-lexer/src/generated/mod.rsdiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-lexer/src/generated/mod.rs
@@ -0,0 +1 @@
+pub mod syntax_kinds;
addedcrates/jrsonnet-lexer/src/generated/syntax_kinds.rsdiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-lexer/src/generated/syntax_kinds.rs
@@ -0,0 +1,210 @@
+//! This is a generated file, please do not edit manually. Changes can be
+//! made in codegeneration that lives in `xtask` top-level dir.
+
+#![allow(
+	bad_style,
+	missing_docs,
+	unreachable_pub,
+	clippy::manual_non_exhaustive,
+	clippy::match_like_matches_macro
+)]
+#[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, logos :: Logos)]
+#[repr(u16)]
+pub enum SyntaxKind {
+	#[doc(hidden)]
+	TOMBSTONE,
+	#[doc(hidden)]
+	EOF,
+	#[token("||")]
+	OR,
+	#[token("??")]
+	NULL_COAELSE,
+	#[token("&&")]
+	AND,
+	#[token("|")]
+	BIT_OR,
+	#[token("^")]
+	BIT_XOR,
+	#[token("&")]
+	BIT_AND,
+	#[token("==")]
+	EQ,
+	#[token("!=")]
+	NE,
+	#[token("<")]
+	LT,
+	#[token(">")]
+	GT,
+	#[token("<=")]
+	LE,
+	#[token(">=")]
+	GE,
+	#[token("<<")]
+	LHS,
+	#[token(">>")]
+	RHS,
+	#[token("+")]
+	PLUS,
+	#[token("-")]
+	MINUS,
+	#[token("*")]
+	MUL,
+	#[token("/")]
+	DIV,
+	#[token("%")]
+	MODULO,
+	#[token("!")]
+	NOT,
+	#[token("~")]
+	BIT_NOT,
+	#[token("[")]
+	L_BRACK,
+	#[token("]")]
+	R_BRACK,
+	#[token("(")]
+	L_PAREN,
+	#[token(")")]
+	R_PAREN,
+	#[token("{")]
+	L_BRACE,
+	#[token("}")]
+	R_BRACE,
+	#[token(":")]
+	COLON,
+	#[token("::")]
+	COLONCOLON,
+	#[token(":::")]
+	COLONCOLONCOLON,
+	#[token(";")]
+	SEMI,
+	#[token(".")]
+	DOT,
+	#[token("...")]
+	DOTDOTDOT,
+	#[token(",")]
+	COMMA,
+	#[token("$")]
+	DOLLAR,
+	#[token("=")]
+	ASSIGN,
+	#[token("?")]
+	QUESTION_MARK,
+	#[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?")]
+	FLOAT,
+	#[regex("(?:0|[1-9][0-9]*)\\.[^0-9]")]
+	ERROR_FLOAT_JUNK_AFTER_POINT,
+	#[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][^+\\-0-9]")]
+	ERROR_FLOAT_JUNK_AFTER_EXPONENT,
+	#[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][+-][^0-9]")]
+	ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,
+	#[regex("\"(?s:[^\"\\\\]|\\\\.)*\"")]
+	STRING_DOUBLE,
+	#[regex("\"(?s:[^\"\\\\]|\\\\.)*")]
+	ERROR_STRING_DOUBLE_UNTERMINATED,
+	#[regex("'(?s:[^'\\\\]|\\\\.)*'")]
+	STRING_SINGLE,
+	#[regex("'(?s:[^'\\\\]|\\\\.)*")]
+	ERROR_STRING_SINGLE_UNTERMINATED,
+	#[regex("@\"(?:[^\"]|\"\")*\"")]
+	STRING_DOUBLE_VERBATIM,
+	#[regex("@\"(?:[^\"]|\"\")*")]
+	ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,
+	#[regex("@'(?:[^']|'')*'")]
+	STRING_SINGLE_VERBATIM,
+	#[regex("@'(?:[^']|'')*")]
+	ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,
+	#[regex("@[^\"'\\s]\\S+")]
+	ERROR_STRING_VERBATIM_MISSING_QUOTES,
+	#[regex("\\|\\|\\|", crate::string_block::lex_str_block_test)]
+	STRING_BLOCK,
+	ERROR_STRING_BLOCK_UNEXPECTED_END,
+	ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+	ERROR_STRING_BLOCK_MISSING_TERMINATION,
+	ERROR_STRING_BLOCK_MISSING_INDENT,
+	#[regex("[_a-zA-Z][_a-zA-Z0-9]*")]
+	IDENT,
+	#[regex("[ \\t\\n\\r]+")]
+	WHITESPACE,
+	#[regex("//[^\\r\\n]*?(\\r\\n|\\n)?")]
+	SINGLE_LINE_SLASH_COMMENT,
+	#[regex("#[^\\r\\n]*?(\\r\\n|\\n)?")]
+	SINGLE_LINE_HASH_COMMENT,
+	#[regex("/\\*([^*]|\\*[^/])*\\*/")]
+	MULTI_LINE_COMMENT,
+	#[regex("/\\*/")]
+	ERROR_COMMENT_TOO_SHORT,
+	#[regex("/\\*([^*/]|\\*[^/])+")]
+	ERROR_COMMENT_UNTERMINATED,
+	#[token("tailstrict")]
+	TAILSTRICT_KW,
+	#[token("local")]
+	LOCAL_KW,
+	#[token("importstr")]
+	IMPORTSTR_KW,
+	#[token("importbin")]
+	IMPORTBIN_KW,
+	#[token("import")]
+	IMPORT_KW,
+	#[token("if")]
+	IF_KW,
+	#[token("then")]
+	THEN_KW,
+	#[token("else")]
+	ELSE_KW,
+	#[token("function")]
+	FUNCTION_KW,
+	#[token("error")]
+	ERROR_KW,
+	#[token("in")]
+	IN_KW,
+	META_OBJECT_APPLY,
+	ERROR_NO_OPERATOR,
+	#[token("null")]
+	NULL_KW,
+	#[token("true")]
+	TRUE_KW,
+	#[token("false")]
+	FALSE_KW,
+	#[token("self")]
+	SELF_KW,
+	#[token("super")]
+	SUPER_KW,
+	#[token("for")]
+	FOR_KW,
+	#[token("assert")]
+	ASSERT_KW,
+	ERROR_MISSING_TOKEN,
+	ERROR_UNEXPECTED_TOKEN,
+	ERROR_CUSTOM,
+	LEXING_ERROR,
+	__LAST_TOKEN,
+	#[doc(hidden)]
+	__LAST,
+}
+use self::SyntaxKind::*;
+impl SyntaxKind {
+	pub fn is_keyword(self) -> bool {
+		match self {
+			OR | NULL_COAELSE | AND | BIT_OR | BIT_XOR | BIT_AND | EQ | NE | LT | GT | LE | GE
+			| LHS | RHS | PLUS | MINUS | MUL | DIV | MODULO | NOT | BIT_NOT | L_BRACK | R_BRACK
+			| L_PAREN | R_PAREN | L_BRACE | R_BRACE | COLON | COLONCOLON | COLONCOLONCOLON
+			| SEMI | DOT | DOTDOTDOT | COMMA | DOLLAR | ASSIGN | QUESTION_MARK | TAILSTRICT_KW
+			| LOCAL_KW | IMPORTSTR_KW | IMPORTBIN_KW | IMPORT_KW | IF_KW | THEN_KW | ELSE_KW
+			| FUNCTION_KW | ERROR_KW | IN_KW | NULL_KW | TRUE_KW | FALSE_KW | SELF_KW
+			| SUPER_KW | FOR_KW | ASSERT_KW => true,
+			_ => false,
+		}
+	}
+	pub fn from_raw(r: u16) -> Self {
+		assert!(r < Self::__LAST as u16);
+		unsafe { std::mem::transmute(r) }
+	}
+	pub fn into_raw(self) -> u16 {
+		self as u16
+	}
+}
+#[macro_export]
+macro_rules ! T { [||] => { $ crate :: SyntaxKind :: OR } ; [??] => { $ crate :: SyntaxKind :: NULL_COAELSE } ; [&&] => { $ crate :: SyntaxKind :: AND } ; [|] => { $ crate :: SyntaxKind :: BIT_OR } ; [^] => { $ crate :: SyntaxKind :: BIT_XOR } ; [&] => { $ crate :: SyntaxKind :: BIT_AND } ; [==] => { $ crate :: SyntaxKind :: EQ } ; [!=] => { $ crate :: SyntaxKind :: NE } ; [<] => { $ crate :: SyntaxKind :: LT } ; [>] => { $ crate :: SyntaxKind :: GT } ; [<=] => { $ crate :: SyntaxKind :: LE } ; [>=] => { $ crate :: SyntaxKind :: GE } ; [<<] => { $ crate :: SyntaxKind :: LHS } ; [>>] => { $ crate :: SyntaxKind :: RHS } ; [+] => { $ crate :: SyntaxKind :: PLUS } ; [-] => { $ crate :: SyntaxKind :: MINUS } ; [*] => { $ crate :: SyntaxKind :: MUL } ; [/] => { $ crate :: SyntaxKind :: DIV } ; [%] => { $ crate :: SyntaxKind :: MODULO } ; [!] => { $ crate :: SyntaxKind :: NOT } ; [~] => { $ crate :: SyntaxKind :: BIT_NOT } ; ['['] => { $ crate :: SyntaxKind :: L_BRACK } ; [']'] => { $ crate :: SyntaxKind :: R_BRACK } ; ['('] => { $ crate :: SyntaxKind :: L_PAREN } ; [')'] => { $ crate :: SyntaxKind :: R_PAREN } ; ['{'] => { $ crate :: SyntaxKind :: L_BRACE } ; ['}'] => { $ crate :: SyntaxKind :: R_BRACE } ; [:] => { $ crate :: SyntaxKind :: COLON } ; [::] => { $ crate :: SyntaxKind :: COLONCOLON } ; [:::] => { $ crate :: SyntaxKind :: COLONCOLONCOLON } ; [;] => { $ crate :: SyntaxKind :: SEMI } ; [.] => { $ crate :: SyntaxKind :: DOT } ; [...] => { $ crate :: SyntaxKind :: DOTDOTDOT } ; [,] => { $ crate :: SyntaxKind :: COMMA } ; ['$'] => { $ crate :: SyntaxKind :: DOLLAR } ; [=] => { $ crate :: SyntaxKind :: ASSIGN } ; [?] => { $ crate :: SyntaxKind :: QUESTION_MARK } ; [tailstrict] => { $ crate :: SyntaxKind :: TAILSTRICT_KW } ; [local] => { $ crate :: SyntaxKind :: LOCAL_KW } ; [importstr] => { $ crate :: SyntaxKind :: IMPORTSTR_KW } ; [importbin] => { $ crate :: SyntaxKind :: IMPORTBIN_KW } ; [import] => { $ crate :: SyntaxKind :: IMPORT_KW } ; [if] => { $ crate :: SyntaxKind :: IF_KW } ; [then] => { $ crate :: SyntaxKind :: THEN_KW } ; [else] => { $ crate :: SyntaxKind :: ELSE_KW } ; [function] => { $ crate :: SyntaxKind :: FUNCTION_KW } ; [error] => { $ crate :: SyntaxKind :: ERROR_KW } ; [in] => { $ crate :: SyntaxKind :: IN_KW } ; [null] => { $ crate :: SyntaxKind :: NULL_KW } ; [true] => { $ crate :: SyntaxKind :: TRUE_KW } ; [false] => { $ crate :: SyntaxKind :: FALSE_KW } ; [self] => { $ crate :: SyntaxKind :: SELF_KW } ; [super] => { $ crate :: SyntaxKind :: SUPER_KW } ; [for] => { $ crate :: SyntaxKind :: FOR_KW } ; [assert] => { $ crate :: SyntaxKind :: ASSERT_KW } }
+#[allow(unused_imports)]
+pub use T;
addedcrates/jrsonnet-lexer/src/lex.rsdiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-lexer/src/lex.rs
@@ -0,0 +1,78 @@
+use core::ops::Range;
+
+use logos::Logos;
+// use rowan::{TextRange, TextSize};
+
+use crate::{
+	generated::syntax_kinds::SyntaxKind,
+	string_block::{lex_str_block, StringBlockError},
+	Span,
+};
+
+pub struct Lexer<'a> {
+	inner: logos::Lexer<'a, SyntaxKind>,
+}
+
+impl<'a> Lexer<'a> {
+	pub fn new(input: &'a str) -> Self {
+		Self {
+			inner: SyntaxKind::lexer(input),
+		}
+	}
+}
+
+impl<'a> Iterator for Lexer<'a> {
+	type Item = Lexeme<'a>;
+
+	fn next(&mut self) -> Option<Self::Item> {
+		use SyntaxKind::*;
+
+		let mut kind = self.inner.next()?;
+		let text = self.inner.slice();
+
+		if kind == Ok(STRING_BLOCK) {
+			// We use custom lexer, which skips enough bytes, but not returns error
+			// Instead we should call lexer again to verify if there is something wrong with string block
+			let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
+			// In kinds, string blocks is parsed at least as `|||`
+			lexer.bump(3);
+			let res = lex_str_block(&mut lexer);
+			let next = lexer.next();
+			assert!(next.is_none(), "str_block is lexed");
+			match res {
+				Ok(()) => {}
+				Err(e) => {
+					kind = Ok(match e {
+						StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
+						StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
+						StringBlockError::MissingTermination => {
+							ERROR_STRING_BLOCK_MISSING_TERMINATION
+						}
+						StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
+					});
+				}
+			}
+		}
+
+		Some(Self::Item {
+			kind: kind.unwrap_or(SyntaxKind::LEXING_ERROR),
+			text,
+			range: {
+				let Range { start, end } = self.inner.span();
+
+				Span(start as u32, end as u32)
+			},
+		})
+	}
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct Lexeme<'s> {
+	pub kind: SyntaxKind,
+	pub text: &'s str,
+	pub range: Span,
+}
+
+pub fn lex(input: &str) -> Vec<Lexeme<'_>> {
+	Lexer::new(input).collect()
+}
addedcrates/jrsonnet-lexer/src/lib.rsdiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-lexer/src/lib.rs
@@ -0,0 +1,8 @@
+mod generated;
+mod lex;
+mod string_block;
+
+#[derive(Clone, Copy, Debug)]
+pub struct Span(pub u32, pub u32);
+
+pub use lex::{Lexeme, Lexer};
addedcrates/jrsonnet-lexer/src/string_block.rsdiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-lexer/src/string_block.rs
@@ -0,0 +1,282 @@
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum StringBlockError {
+	UnexpectedEnd,
+	MissingNewLine,
+	MissingTermination,
+	MissingIndent,
+}
+
+use logos::Lexer;
+use StringBlockError::*;
+
+use crate::generated::syntax_kinds::SyntaxKind;
+
+pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {
+	let _ = lex_str_block(lex);
+}
+
+pub(crate) struct Context<'a> {
+	source: &'a str,
+	index: usize,
+}
+
+impl<'a> Context<'a> {
+	fn rest(&self) -> &'a str {
+		&self.source[self.index..]
+	}
+
+	fn next(&mut self) -> Option<char> {
+		if self.index == self.source.len() {
+			return None;
+		}
+
+		match self.rest().chars().next() {
+			None => None,
+			Some(c) => {
+				self.index += c.len_utf8();
+				Some(c)
+			}
+		}
+	}
+
+	fn peek(&self) -> Option<char> {
+		if self.index == self.source.len() {
+			return None;
+		}
+
+		self.rest().chars().next()
+	}
+
+	fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
+		if self.peek().is_some_and(f) {
+			self.index += 1;
+			return 1;
+		}
+		0
+	}
+
+	fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
+		if self.index == self.source.len() {
+			return 0;
+		}
+
+		let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
+
+		match next_char {
+			None => {
+				let diff = self.source.len() - self.index;
+				self.index = self.source.len();
+				diff
+			}
+			Some((idx, _)) => {
+				self.index += idx;
+				idx
+			}
+		}
+	}
+
+	fn skip(&mut self, len: usize) {
+		self.index = match self.index + len {
+			n if n > self.source.len() => self.source.len(),
+			n => n,
+		};
+	}
+}
+
+// Check that b has at least the same whitespace prefix as a and returns the
+// amount of this whitespace, otherwise returns 0.  If a has no whitespace
+// prefix than return 0.
+fn check_whitespace(a: &str, b: &str) -> usize {
+	let a = a.as_bytes();
+	let b = b.as_bytes();
+
+	for i in 0..a.len() {
+		if a[i] != b' ' && a[i] != b'\t' {
+			// a has run out of whitespace and b matched up to this point. Return result.
+			return i;
+		}
+
+		if i >= b.len() {
+			// We ran off the edge of b while a still has whitespace. Return 0 as failure.
+			return 0;
+		}
+
+		if a[i] != b[i] {
+			// a has whitespace but b does not. Return 0 as failure.
+			return 0;
+		}
+	}
+
+	// We ran off the end of a and b kept up
+	a.len()
+}
+
+pub(crate) trait StrBlockLexCtx<'d> {
+	fn remainder(&self) -> &'d str;
+	fn eat_error(&mut self, ctx: &Context<'d>);
+	fn bump_pos(&mut self, s: usize);
+	fn mark_truncating(&mut self);
+	fn mark_line(&mut self, line: &'d str);
+}
+
+impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {
+	fn remainder(&self) -> &'d str {
+		self.remainder()
+	}
+	fn eat_error(&mut self, ctx: &Context<'d>) {
+		let end_index = ctx
+			.rest()
+			.find("|||")
+			.map_or_else(|| ctx.rest().len(), |v| v + 3);
+		self.bump(ctx.index + end_index);
+	}
+	fn bump_pos(&mut self, s: usize) {
+		self.bump(s);
+	}
+	fn mark_truncating(&mut self) {
+		// Lexer test doesn't collect anything
+	}
+	fn mark_line(&mut self, _line: &'d str) {
+		// Lexer test doesn't collect anything
+	}
+}
+
+pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {
+	let mut collect = CollectStrBlock {
+		truncate: false,
+		lines: vec![],
+		input,
+		offset: 0,
+	};
+	lex_str_block(&mut collect)?;
+	Ok(collect)
+}
+
+pub struct CollectStrBlock<'s> {
+	pub truncate: bool,
+	pub lines: Vec<&'s str>,
+	input: &'s str,
+	offset: usize,
+}
+
+impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {
+	fn remainder(&self) -> &'d str {
+		self.input
+	}
+
+	fn eat_error(&mut self, _ctx: &Context<'d>) {
+		// Error will be returned, no need to record it here
+	}
+
+	fn bump_pos(&mut self, s: usize) {
+		self.offset += s;
+	}
+
+	fn mark_truncating(&mut self) {
+		self.truncate = true;
+	}
+
+	fn mark_line(&mut self, line: &'d str) {
+		self.lines.push(line);
+	}
+}
+
+pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {
+	// debug_assert_eq!(lex.slice(), "|||");
+	let mut ctx = Context::<'a> {
+		source: lex.remainder(),
+		index: 0,
+	};
+
+	if ctx.eat_if(|v| v == '-') != 0 {
+		lex.mark_truncating();
+	}
+
+	// Skip whitespaces
+	ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');
+
+	// Skip \n
+	match ctx.next() {
+		Some('\n') => (),
+		None => {
+			lex.eat_error(&ctx);
+			return Err(UnexpectedEnd);
+		}
+		// Text block requires new line after |||.
+		Some(_) => {
+			lex.eat_error(&ctx);
+			return Err(MissingNewLine);
+		}
+	}
+
+	// Process leading blank lines before calculating string block indent
+	while ctx.peek() == Some('\n') {
+		ctx.next();
+	}
+
+	let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());
+	let str_block_indent = &ctx.rest()[..num_whitespace];
+
+	if num_whitespace == 0 {
+		// Text block's first line must start with whitespace
+		lex.eat_error(&ctx);
+		return Err(MissingIndent);
+	}
+
+	loop {
+		debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");
+		ctx.skip(num_whitespace);
+
+		let line_start = ctx.index;
+		let mut line_size = 0;
+		loop {
+			match ctx.next() {
+				None => {
+					lex.eat_error(&ctx);
+					return Err(UnexpectedEnd);
+				}
+				Some('\n') => {
+					lex.mark_line(&ctx.source[line_start..line_start + line_size]);
+					break;
+				}
+				Some(c) => {
+					line_size += c.len_utf8();
+				}
+			}
+		}
+
+		// Skip any blank lines
+		while ctx.peek() == Some('\n') {
+			lex.mark_line("");
+			ctx.next();
+		}
+
+		// Look at the next line
+		num_whitespace = check_whitespace(str_block_indent, ctx.rest());
+		if num_whitespace == 0 {
+			// End of the text block
+			// let mut term_indent = String::with_capacity(num_whitespace);
+			while let Some(' ' | '\t') = ctx.peek() {
+				// term_indent.push(
+				ctx.next().unwrap();
+				// );
+			}
+
+			if !ctx.rest().starts_with("|||") {
+				if ctx.rest().is_empty() {
+					lex.bump_pos(ctx.index);
+					return Err(UnexpectedEnd);
+				}
+				lex.eat_error(&ctx);
+				return Err(MissingTermination);
+			}
+
+			// Skip '|||'
+			ctx.skip(3);
+			break;
+		}
+	}
+
+	lex.bump_pos(ctx.index);
+	Ok(())
+}
modifiedcrates/jrsonnet-rowan-parser/Cargo.tomldiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/Cargo.toml
+++ b/crates/jrsonnet-rowan-parser/Cargo.toml
@@ -14,7 +14,7 @@
 drop_bomb.workspace = true
 hi-doc.workspace = true
 indoc.workspace = true
-logos.workspace = true
+jrsonnet-lexer = { version = "0.5.0-pre97", path = "../jrsonnet-lexer" }
 rowan.workspace = true
 thiserror.workspace = true
 
modifiedcrates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
+++ b/crates/jrsonnet-rowan-parser/src/generated/syntax_kinds.rs
@@ -8,172 +8,95 @@
 	clippy::manual_non_exhaustive,
 	clippy::match_like_matches_macro
 )]
-use logos::Logos;
 #[doc = r" The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`."]
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
 #[repr(u16)]
 pub enum SyntaxKind {
 	#[doc(hidden)]
 	TOMBSTONE,
 	#[doc(hidden)]
 	EOF,
-	#[token("||")]
 	OR,
-	#[token("??")]
 	NULL_COAELSE,
-	#[token("&&")]
 	AND,
-	#[token("|")]
 	BIT_OR,
-	#[token("^")]
 	BIT_XOR,
-	#[token("&")]
 	BIT_AND,
-	#[token("==")]
 	EQ,
-	#[token("!=")]
 	NE,
-	#[token("<")]
 	LT,
-	#[token(">")]
 	GT,
-	#[token("<=")]
 	LE,
-	#[token(">=")]
 	GE,
-	#[token("<<")]
 	LHS,
-	#[token(">>")]
 	RHS,
-	#[token("+")]
 	PLUS,
-	#[token("-")]
 	MINUS,
-	#[token("*")]
 	MUL,
-	#[token("/")]
 	DIV,
-	#[token("%")]
 	MODULO,
-	#[token("!")]
 	NOT,
-	#[token("~")]
 	BIT_NOT,
-	#[token("[")]
 	L_BRACK,
-	#[token("]")]
 	R_BRACK,
-	#[token("(")]
 	L_PAREN,
-	#[token(")")]
 	R_PAREN,
-	#[token("{")]
 	L_BRACE,
-	#[token("}")]
 	R_BRACE,
-	#[token(":")]
 	COLON,
-	#[token("::")]
 	COLONCOLON,
-	#[token(":::")]
 	COLONCOLONCOLON,
-	#[token(";")]
 	SEMI,
-	#[token(".")]
 	DOT,
-	#[token("...")]
 	DOTDOTDOT,
-	#[token(",")]
 	COMMA,
-	#[token("$")]
 	DOLLAR,
-	#[token("=")]
 	ASSIGN,
-	#[token("?")]
 	QUESTION_MARK,
-	#[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?")]
 	FLOAT,
-	#[regex("(?:0|[1-9][0-9]*)\\.[^0-9]")]
 	ERROR_FLOAT_JUNK_AFTER_POINT,
-	#[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][^+\\-0-9]")]
 	ERROR_FLOAT_JUNK_AFTER_EXPONENT,
-	#[regex("(?:0|[1-9][0-9]*)(?:\\.[0-9]+)?[eE][+-][^0-9]")]
 	ERROR_FLOAT_JUNK_AFTER_EXPONENT_SIGN,
-	#[regex("\"(?s:[^\"\\\\]|\\\\.)*\"")]
 	STRING_DOUBLE,
-	#[regex("\"(?s:[^\"\\\\]|\\\\.)*")]
 	ERROR_STRING_DOUBLE_UNTERMINATED,
-	#[regex("'(?s:[^'\\\\]|\\\\.)*'")]
 	STRING_SINGLE,
-	#[regex("'(?s:[^'\\\\]|\\\\.)*")]
 	ERROR_STRING_SINGLE_UNTERMINATED,
-	#[regex("@\"(?:[^\"]|\"\")*\"")]
 	STRING_DOUBLE_VERBATIM,
-	#[regex("@\"(?:[^\"]|\"\")*")]
 	ERROR_STRING_DOUBLE_VERBATIM_UNTERMINATED,
-	#[regex("@'(?:[^']|'')*'")]
 	STRING_SINGLE_VERBATIM,
-	#[regex("@'(?:[^']|'')*")]
 	ERROR_STRING_SINGLE_VERBATIM_UNTERMINATED,
-	#[regex("@[^\"'\\s]\\S+")]
 	ERROR_STRING_VERBATIM_MISSING_QUOTES,
-	#[regex("\\|\\|\\|", crate::string_block::lex_str_block_test)]
 	STRING_BLOCK,
 	ERROR_STRING_BLOCK_UNEXPECTED_END,
 	ERROR_STRING_BLOCK_MISSING_NEW_LINE,
 	ERROR_STRING_BLOCK_MISSING_TERMINATION,
 	ERROR_STRING_BLOCK_MISSING_INDENT,
-	#[regex("[_a-zA-Z][_a-zA-Z0-9]*")]
 	IDENT,
-	#[regex("[ \\t\\n\\r]+")]
 	WHITESPACE,
-	#[regex("//[^\\r\\n]*?(\\r\\n|\\n)?")]
 	SINGLE_LINE_SLASH_COMMENT,
-	#[regex("#[^\\r\\n]*?(\\r\\n|\\n)?")]
 	SINGLE_LINE_HASH_COMMENT,
-	#[regex("/\\*([^*]|\\*[^/])*\\*/")]
 	MULTI_LINE_COMMENT,
-	#[regex("/\\*/")]
 	ERROR_COMMENT_TOO_SHORT,
-	#[regex("/\\*([^*/]|\\*[^/])+")]
 	ERROR_COMMENT_UNTERMINATED,
-	#[token("tailstrict")]
 	TAILSTRICT_KW,
-	#[token("local")]
 	LOCAL_KW,
-	#[token("importstr")]
 	IMPORTSTR_KW,
-	#[token("importbin")]
 	IMPORTBIN_KW,
-	#[token("import")]
 	IMPORT_KW,
-	#[token("if")]
 	IF_KW,
-	#[token("then")]
 	THEN_KW,
-	#[token("else")]
 	ELSE_KW,
-	#[token("function")]
 	FUNCTION_KW,
-	#[token("error")]
 	ERROR_KW,
-	#[token("in")]
 	IN_KW,
 	META_OBJECT_APPLY,
 	ERROR_NO_OPERATOR,
-	#[token("null")]
 	NULL_KW,
-	#[token("true")]
 	TRUE_KW,
-	#[token("false")]
 	FALSE_KW,
-	#[token("self")]
 	SELF_KW,
-	#[token("super")]
 	SUPER_KW,
-	#[token("for")]
 	FOR_KW,
-	#[token("assert")]
 	ASSERT_KW,
 	ERROR_MISSING_TOKEN,
 	ERROR_UNEXPECTED_TOKEN,
modifiedcrates/jrsonnet-rowan-parser/src/lex.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/lex.rs
+++ b/crates/jrsonnet-rowan-parser/src/lex.rs
@@ -1,81 +1,19 @@
-use core::ops::Range;
-use std::convert::TryFrom;
-
-use logos::Logos;
+use jrsonnet_lexer::Lexer;
 use rowan::{TextRange, TextSize};
 
-use crate::{
-	string_block::{lex_str_block, StringBlockError},
-	SyntaxKind,
-};
-
-pub struct Lexer<'a> {
-	inner: logos::Lexer<'a, SyntaxKind>,
-}
-
-impl<'a> Lexer<'a> {
-	pub fn new(input: &'a str) -> Self {
-		Self {
-			inner: SyntaxKind::lexer(input),
-		}
-	}
-}
-
-impl<'a> Iterator for Lexer<'a> {
-	type Item = Lexeme<'a>;
-
-	fn next(&mut self) -> Option<Self::Item> {
-		use SyntaxKind::*;
-
-		let mut kind = self.inner.next()?;
-		let text = self.inner.slice();
-
-		if kind == Ok(STRING_BLOCK) {
-			// We use custom lexer, which skips enough bytes, but not returns error
-			// Instead we should call lexer again to verify if there is something wrong with string block
-			let mut lexer = logos::Lexer::<SyntaxKind>::new(text);
-			// In kinds, string blocks is parsed at least as `|||`
-			lexer.bump(3);
-			let res = lex_str_block(&mut lexer);
-			let next = lexer.next();
-			assert!(next.is_none(), "str_block is lexed");
-			match res {
-				Ok(()) => {}
-				Err(e) => {
-					kind = Ok(match e {
-						StringBlockError::UnexpectedEnd => ERROR_STRING_BLOCK_UNEXPECTED_END,
-						StringBlockError::MissingNewLine => ERROR_STRING_BLOCK_MISSING_NEW_LINE,
-						StringBlockError::MissingTermination => {
-							ERROR_STRING_BLOCK_MISSING_TERMINATION
-						}
-						StringBlockError::MissingIndent => ERROR_STRING_BLOCK_MISSING_INDENT,
-					});
-				}
-			}
-		}
-
-		Some(Self::Item {
-			kind: kind.unwrap_or(SyntaxKind::LEXING_ERROR),
-			text,
-			range: {
-				let Range { start, end } = self.inner.span();
-
-				TextRange::new(
-					TextSize::try_from(start).unwrap(),
-					TextSize::try_from(end).unwrap(),
-				)
-			},
-		})
-	}
-}
+use crate::SyntaxKind;
 
 #[derive(Clone, Copy, Debug)]
-pub struct Lexeme<'i> {
+pub struct Lexeme<'s> {
 	pub kind: SyntaxKind,
-	pub text: &'i str,
+	pub text: &'s str,
 	pub range: TextRange,
 }
 
 pub fn lex(input: &str) -> Vec<Lexeme<'_>> {
-	Lexer::new(input).collect()
+	Lexer::new(input).map(|l| Lexeme {
+		kind: SyntaxKind::from_raw(l.kind.into_raw()),
+		text: l.text,
+		range: TextRange::new(TextSize::from(l.range.0), TextSize::from(l.range.1)),
+	}).collect()
 }
modifiedcrates/jrsonnet-rowan-parser/src/lib.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -2,7 +2,6 @@
 
 use event::Sink;
 use generated::nodes::{SourceFile, Trivia};
-use lex::lex;
 use parser::{LocatedSyntaxError, Parser};
 pub use rowan;
 
@@ -14,14 +13,12 @@
 mod marker;
 mod parser;
 mod precedence;
-mod string_block;
 mod tests;
 mod token_set;
 
 pub use ast::{AstChildren, AstNode, AstToken};
 pub use generated::{nodes, syntax_kinds::SyntaxKind};
 pub use language::*;
-pub use string_block::{collect_lexed_str_block, CollectStrBlock};
 pub use token_set::SyntaxKindSet;
 
 use self::{
@@ -30,7 +27,7 @@
 };
 
 pub fn parse(input: &str) -> (SourceFile, Vec<LocatedSyntaxError>) {
-	let lexemes = lex(input);
+	let lexemes = lex::lex(input);
 	let kinds = lexemes
 		.iter()
 		.map(|l| l.kind)
deletedcrates/jrsonnet-rowan-parser/src/string_block.rsdiffbeforeafterboth
--- a/crates/jrsonnet-rowan-parser/src/string_block.rs
+++ /dev/null
@@ -1,282 +0,0 @@
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum StringBlockError {
-	UnexpectedEnd,
-	MissingNewLine,
-	MissingTermination,
-	MissingIndent,
-}
-
-use logos::Lexer;
-use StringBlockError::*;
-
-use crate::SyntaxKind;
-
-pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {
-	let _ = lex_str_block(lex);
-}
-
-pub(crate) struct Context<'a> {
-	source: &'a str,
-	index: usize,
-}
-
-impl<'a> Context<'a> {
-	fn rest(&self) -> &'a str {
-		&self.source[self.index..]
-	}
-
-	fn next(&mut self) -> Option<char> {
-		if self.index == self.source.len() {
-			return None;
-		}
-
-		match self.rest().chars().next() {
-			None => None,
-			Some(c) => {
-				self.index += c.len_utf8();
-				Some(c)
-			}
-		}
-	}
-
-	fn peek(&self) -> Option<char> {
-		if self.index == self.source.len() {
-			return None;
-		}
-
-		self.rest().chars().next()
-	}
-
-	fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
-		if self.peek().is_some_and(f) {
-			self.index += 1;
-			return 1;
-		}
-		0
-	}
-
-	fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
-		if self.index == self.source.len() {
-			return 0;
-		}
-
-		let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
-
-		match next_char {
-			None => {
-				let diff = self.source.len() - self.index;
-				self.index = self.source.len();
-				diff
-			}
-			Some((idx, _)) => {
-				self.index += idx;
-				idx
-			}
-		}
-	}
-
-	fn skip(&mut self, len: usize) {
-		self.index = match self.index + len {
-			n if n > self.source.len() => self.source.len(),
-			n => n,
-		};
-	}
-}
-
-// Check that b has at least the same whitespace prefix as a and returns the
-// amount of this whitespace, otherwise returns 0.  If a has no whitespace
-// prefix than return 0.
-fn check_whitespace(a: &str, b: &str) -> usize {
-	let a = a.as_bytes();
-	let b = b.as_bytes();
-
-	for i in 0..a.len() {
-		if a[i] != b' ' && a[i] != b'\t' {
-			// a has run out of whitespace and b matched up to this point. Return result.
-			return i;
-		}
-
-		if i >= b.len() {
-			// We ran off the edge of b while a still has whitespace. Return 0 as failure.
-			return 0;
-		}
-
-		if a[i] != b[i] {
-			// a has whitespace but b does not. Return 0 as failure.
-			return 0;
-		}
-	}
-
-	// We ran off the end of a and b kept up
-	a.len()
-}
-
-pub(crate) trait StrBlockLexCtx<'d> {
-	fn remainder(&self) -> &'d str;
-	fn eat_error(&mut self, ctx: &Context<'d>);
-	fn bump_pos(&mut self, s: usize);
-	fn mark_truncating(&mut self);
-	fn mark_line(&mut self, line: &'d str);
-}
-
-impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {
-	fn remainder(&self) -> &'d str {
-		self.remainder()
-	}
-	fn eat_error(&mut self, ctx: &Context<'d>) {
-		let end_index = ctx
-			.rest()
-			.find("|||")
-			.map_or_else(|| ctx.rest().len(), |v| v + 3);
-		self.bump(ctx.index + end_index);
-	}
-	fn bump_pos(&mut self, s: usize) {
-		self.bump(s);
-	}
-	fn mark_truncating(&mut self) {
-		// Lexer test doesn't collect anything
-	}
-	fn mark_line(&mut self, _line: &'d str) {
-		// Lexer test doesn't collect anything
-	}
-}
-
-pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {
-	let mut collect = CollectStrBlock {
-		truncate: false,
-		lines: vec![],
-		input,
-		offset: 0,
-	};
-	lex_str_block(&mut collect)?;
-	Ok(collect)
-}
-
-pub struct CollectStrBlock<'s> {
-	pub truncate: bool,
-	pub lines: Vec<&'s str>,
-	input: &'s str,
-	offset: usize,
-}
-
-impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {
-	fn remainder(&self) -> &'d str {
-		self.input
-	}
-
-	fn eat_error(&mut self, _ctx: &Context<'d>) {
-		// Error will be returned, no need to record it here
-	}
-
-	fn bump_pos(&mut self, s: usize) {
-		self.offset += s;
-	}
-
-	fn mark_truncating(&mut self) {
-		self.truncate = true;
-	}
-
-	fn mark_line(&mut self, line: &'d str) {
-		self.lines.push(line);
-	}
-}
-
-pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {
-	// debug_assert_eq!(lex.slice(), "|||");
-	let mut ctx = Context::<'a> {
-		source: lex.remainder(),
-		index: 0,
-	};
-
-	if ctx.eat_if(|v| v == '-') != 0 {
-		lex.mark_truncating();
-	}
-
-	// Skip whitespaces
-	ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');
-
-	// Skip \n
-	match ctx.next() {
-		Some('\n') => (),
-		None => {
-			lex.eat_error(&ctx);
-			return Err(UnexpectedEnd);
-		}
-		// Text block requires new line after |||.
-		Some(_) => {
-			lex.eat_error(&ctx);
-			return Err(MissingNewLine);
-		}
-	}
-
-	// Process leading blank lines before calculating string block indent
-	while ctx.peek() == Some('\n') {
-		ctx.next();
-	}
-
-	let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());
-	let str_block_indent = &ctx.rest()[..num_whitespace];
-
-	if num_whitespace == 0 {
-		// Text block's first line must start with whitespace
-		lex.eat_error(&ctx);
-		return Err(MissingIndent);
-	}
-
-	loop {
-		debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");
-		ctx.skip(num_whitespace);
-
-		let line_start = ctx.index;
-		let mut line_size = 0;
-		loop {
-			match ctx.next() {
-				None => {
-					lex.eat_error(&ctx);
-					return Err(UnexpectedEnd);
-				}
-				Some('\n') => {
-					lex.mark_line(&ctx.source[line_start..line_start + line_size]);
-					break;
-				}
-				Some(c) => {
-					line_size += c.len_utf8();
-				}
-			}
-		}
-
-		// Skip any blank lines
-		while ctx.peek() == Some('\n') {
-			lex.mark_line("");
-			ctx.next();
-		}
-
-		// Look at the next line
-		num_whitespace = check_whitespace(str_block_indent, ctx.rest());
-		if num_whitespace == 0 {
-			// End of the text block
-			// let mut term_indent = String::with_capacity(num_whitespace);
-			while let Some(' ' | '\t') = ctx.peek() {
-				// term_indent.push(
-				ctx.next().unwrap();
-				// );
-			}
-
-			if !ctx.rest().starts_with("|||") {
-				if ctx.rest().is_empty() {
-					lex.bump_pos(ctx.index);
-					return Err(UnexpectedEnd);
-				}
-				lex.eat_error(&ctx);
-				return Err(MissingTermination);
-			}
-
-			// Skip '|||'
-			ctx.skip(3);
-			break;
-		}
-	}
-
-	lex.bump_pos(ctx.index);
-	Ok(())
-}
modifiedxtask/src/sourcegen/kinds.rsdiffbeforeafterboth
before · xtask/src/sourcegen/kinds.rs
1#[derive(Debug)]2pub struct KindsSrc {3	/// Key - how this token appears in ungrammar4	defined_tokens: IndexMap<String, TokenKind>,5	defined_node_names: HashSet<String>,6	pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11	/// May exist in token tree, but never in source code12	Meta { grammar_name: String, name: String },13	/// Specific parsing/lexing errors may be emitted as this type of kind14	Error {15		grammar_name: String,16		name: String,17		#[allow(dead_code)]18		/// Is this error returned by lexer directly, or from lex.rs19		is_lexer_error: bool,20		regex: Option<String>,21		priority: Option<u32>,22	},23	/// Keyword - literal match of token24	Keyword {25		/// How this keyword appears in grammar/code, should be same as Kinds key26		code: String,27		name: String,28	},29	/// Literal - something defined by user, i.e strings, identifiers, smth30	Literal {31		/// How this keyword appears in grammar, should be same as Kinds key32		grammar_name: String,33		name: String,34		/// Regex for Logos lexer35		regex: String,36		/// Path to custom lexer37		lexer: Option<String>,38	},39}4041impl TokenKind {42	pub fn grammar_name(&self) -> &str {43		match self {44			Self::Keyword { code, .. } => code,45			Self::Literal { grammar_name, .. }46			| Self::Meta { grammar_name, .. }47			| Self::Error { grammar_name, .. } => grammar_name,48		}49	}50	/// How this keyword should appear in kinds enum, screaming snake cased51	pub fn name(&self) -> &str {52		match self {53			Self::Keyword { name, .. }54			| Self::Literal { name, .. }55			| Self::Meta { name, .. }56			| Self::Error { name, .. } => name,57		}58	}59	pub fn expand_kind(&self) -> TokenStream {60		let name = format_ident!("{}", self.name());61		let attr = match self {62			Self::Keyword { code, .. } => quote! {#[token(#code)]},63			Self::Literal { regex, lexer, .. } => {64				let lexer = lexer65					.as_deref()66					.map(TokenStream::from_str)67					.map(|r| r.expect("path is correct"));68				quote! {#[regex(#regex, #lexer)]}69			}70			Self::Error {71				regex, priority, ..72			} if regex.is_some() => {73				let priority = priority.map(|p| quote! {, priority = #p});74				quote! {#[regex(#regex #priority)]}75			}76			_ => quote! {},77		};78		quote! {79			#attr80			#name81		}82	}83	pub fn expand_t_macros(&self) -> Option<TokenStream> {84		match self {85			Self::Keyword { code, name } => {86				let code = escape_token_macro(code);87				let name = format_ident!("{name}");88				Some(quote! {89					[#code] => {$crate::SyntaxKind::#name}90				})91			}92			// Meta items should not appear in T![_]93			_ => None,94		}95	}9697	/// How this token should be referenced in code98	/// Keywords are referenced with `T![_]` macro,99	/// and literals are referenced directly by name100	pub fn reference(&self) -> TokenStream {101		if let Self::Keyword { code, .. } = self {102			let code = escape_token_macro(code);103			quote! {T![#code]}104		} else {105			let name = self.name();106			let ident = format_ident!("{name}");107			quote! {#ident}108		}109	}110111	pub fn method_name(&self) -> Ident {112		match self {113			Self::Keyword { name, .. } => {114				format_ident!("{}_token", name.to_lowercase())115			}116			Self::Literal { name, .. } => {117				format_ident!("{}_lit", name.to_lowercase())118			}119			Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),120			Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),121		}122	}123}124125#[macro_export]126macro_rules! define_kinds {127	($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{128		$into.define_token(TokenKind::Literal {129			grammar_name: format!("LIT_{}!", $name),130			name: $name.to_owned(),131			regex: $regex.to_owned(),132			lexer: None $(.or_else(|| Some($lexer.to_string())))?,133		});134		$(define_kinds!($into = $($rest)*))?135	}};136	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{137		{138			let regex = None$(.or(Some($regex.to_owned())))?;139			let priority = None$(.or(Some($priority)))?;140			$into.define_token(TokenKind::Error {141				grammar_name: format!("ERROR_{}!", $name),142				name: format!("ERROR_{}", $name),143				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),144				regex,145				priority,146			});147		}148		$(define_kinds!($into = $($rest)*))?149	}};150	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{151		$into.define_token(TokenKind::Keyword {152			code: format!("{}", $tok),153			name: $name.to_owned(),154		});155		$(define_kinds!($into = $($rest)*))?156	}};157	($into:ident =) => {{}}158}159use std::{collections::HashSet, str::FromStr};160161use indexmap::IndexMap;162use proc_macro2::{Ident, TokenStream};163use quote::{format_ident, quote};164165use super::escape_token_macro;166167impl KindsSrc {168	pub fn new() -> Self {169		Self {170			defined_tokens: IndexMap::new(),171			defined_node_names: HashSet::new(),172			nodes: Vec::new(),173		}174	}175	pub fn define_token(&mut self, token: TokenKind) {176		assert!(177			self.defined_node_names.insert(token.name().to_owned()),178			"node name already defined: {}",179			token.name()180		);181		assert!(182			self.defined_tokens183				.insert(token.grammar_name().to_owned(), token.clone())184				.is_none(),185			"token already defined: {}",186			token.grammar_name()187		);188	}189	pub fn define_node(&mut self, node: &str) {190		assert!(191			self.defined_node_names.insert(node.to_owned()),192			"node name already defined: {node}"193		);194		self.nodes.push(node.to_string());195	}196	pub fn token(&self, tok: &str) -> Option<&TokenKind> {197		self.defined_tokens.get(tok)198	}199	pub fn is_token(&self, tok: &str) -> bool {200		self.defined_tokens.contains_key(tok)201	}202	pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {203		self.defined_tokens.iter().map(|(_, v)| v)204	}205}206207pub fn jsonnet_kinds() -> KindsSrc {208	let mut kinds = KindsSrc::new();209	define_kinds![kinds =210		"||" => "OR";211		"??" => "NULL_COAELSE";212		"&&" => "AND";213		"|" => "BIT_OR";214		"^" => "BIT_XOR";215		"&" => "BIT_AND";216		"==" => "EQ";217		"!=" => "NE";218		"<" => "LT";219		">" => "GT";220		"<=" => "LE";221		">=" => "GE";222		"<<" => "LHS";223		">>" => "RHS";224		"+" => "PLUS";225		"-" => "MINUS";226		"*" => "MUL";227		"/" => "DIV";228		"%" => "MODULO";229		"!" => "NOT";230		"~" => "BIT_NOT";231		"[" => "L_BRACK";232		"]" => "R_BRACK";233		"(" => "L_PAREN";234		")" => "R_PAREN";235		"{" => "L_BRACE";236		"}" => "R_BRACE";237		":" => "COLON";238		"::" => "COLONCOLON";239		":::" => "COLONCOLONCOLON";240		";" => "SEMI";241		"." => "DOT";242		"..." => "DOTDOTDOT";243		"," => "COMMA";244		"$" => "DOLLAR";245		"=" => "ASSIGN";246		"?" => "QUESTION_MARK";247		// Literals248		lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";249		error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";250		error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";251		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";252		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";253		error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";254		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";255		error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";256		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";257		error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";258		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";259		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";260		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";261		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";262		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);263		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);264		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);265		error("STRING_BLOCK_MISSING_INDENT", lexer = true);266		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";267		lit("WHITESPACE") => r"[ \t\n\r]+";268		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";269		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";270		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";271		error("COMMENT_TOO_SHORT") => r"/\*/";272		error("COMMENT_UNTERMINATED") =>  r"/\*([^*/]|\*[^/])+";273	];274	kinds275}
after · xtask/src/sourcegen/kinds.rs
1#[derive(Debug)]2pub struct KindsSrc {3	/// Key - how this token appears in ungrammar4	defined_tokens: IndexMap<String, TokenKind>,5	defined_node_names: HashSet<String>,6	pub nodes: Vec<String>,7}89#[derive(Debug, Clone)]10pub enum TokenKind {11	/// May exist in token tree, but never in source code12	Meta { grammar_name: String, name: String },13	/// Specific parsing/lexing errors may be emitted as this type of kind14	Error {15		grammar_name: String,16		name: String,17		#[allow(dead_code)]18		/// Is this error returned by lexer directly, or from lex.rs19		is_lexer_error: bool,20		regex: Option<String>,21		priority: Option<u32>,22	},23	/// Keyword - literal match of token24	Keyword {25		/// How this keyword appears in grammar/code, should be same as Kinds key26		code: String,27		name: String,28	},29	/// Literal - something defined by user, i.e strings, identifiers, smth30	Literal {31		/// How this keyword appears in grammar, should be same as Kinds key32		grammar_name: String,33		name: String,34		/// Regex for Logos lexer35		regex: String,36		/// Path to custom lexer37		lexer: Option<String>,38	},39}4041impl TokenKind {42	pub fn grammar_name(&self) -> &str {43		match self {44			Self::Keyword { code, .. } => code,45			Self::Literal { grammar_name, .. }46			| Self::Meta { grammar_name, .. }47			| Self::Error { grammar_name, .. } => grammar_name,48		}49	}50	/// How this keyword should appear in kinds enum, screaming snake cased51	pub fn name(&self) -> &str {52		match self {53			Self::Keyword { name, .. }54			| Self::Literal { name, .. }55			| Self::Meta { name, .. }56			| Self::Error { name, .. } => name,57		}58	}59	pub fn expand_kind(&self, lexer: bool) -> TokenStream {60		let name = format_ident!("{}", self.name());61		let attr = match self {62			Self::Keyword { code, .. } => quote! {#[token(#code)]},63			Self::Literal { regex, lexer, .. } => {64				let lexer = lexer65					.as_deref()66					.map(TokenStream::from_str)67					.map(|r| r.expect("path is correct"));68				quote! {#[regex(#regex, #lexer)]}69			}70			Self::Error {71				regex, priority, ..72			} if regex.is_some() => {73				let priority = priority.map(|p| quote! {, priority = #p});74				quote! {#[regex(#regex #priority)]}75			}76			_ => quote! {},77		};78		let attr = if lexer {79			attr80		} else {81			quote! {}82		};83		quote! {84			#attr85			#name86		}87	}88	pub fn expand_t_macros(&self) -> Option<TokenStream> {89		match self {90			Self::Keyword { code, name } => {91				let code = escape_token_macro(code);92				let name = format_ident!("{name}");93				Some(quote! {94					[#code] => {$crate::SyntaxKind::#name}95				})96			}97			// Meta items should not appear in T![_]98			_ => None,99		}100	}101102	/// How this token should be referenced in code103	/// Keywords are referenced with `T![_]` macro,104	/// and literals are referenced directly by name105	pub fn reference(&self) -> TokenStream {106		if let Self::Keyword { code, .. } = self {107			let code = escape_token_macro(code);108			quote! {T![#code]}109		} else {110			let name = self.name();111			let ident = format_ident!("{name}");112			quote! {#ident}113		}114	}115116	pub fn method_name(&self) -> Ident {117		match self {118			Self::Keyword { name, .. } => {119				format_ident!("{}_token", name.to_lowercase())120			}121			Self::Literal { name, .. } => {122				format_ident!("{}_lit", name.to_lowercase())123			}124			Self::Meta { name, .. } => format_ident!("{}_meta", name.to_lowercase()),125			Self::Error { name, .. } => format_ident!("{}_error", name.to_lowercase()),126		}127	}128}129130#[macro_export]131macro_rules! define_kinds {132	($into:ident = lit($name:literal) => $regex:literal $(, $lexer:literal)? $(; $($rest:tt)*)?) => {{133		$into.define_token(TokenKind::Literal {134			grammar_name: format!("LIT_{}!", $name),135			name: $name.to_owned(),136			regex: $regex.to_owned(),137			lexer: None $(.or_else(|| Some($lexer.to_string())))?,138		});139		$(define_kinds!($into = $($rest)*))?140	}};141	($into:ident = error($name:literal$(, priority = $priority:literal)? $(, lexer = $lexer:literal)?) $(=> $regex:literal)? $(; $($rest:tt)*)?) => {{142		{143			let regex = None$(.or(Some($regex.to_owned())))?;144			let priority = None$(.or(Some($priority)))?;145			$into.define_token(TokenKind::Error {146				grammar_name: format!("ERROR_{}!", $name),147				name: format!("ERROR_{}", $name),148				is_lexer_error: false $(|| $lexer)? || regex.is_some() || priority.is_some(),149				regex,150				priority,151			});152		}153		$(define_kinds!($into = $($rest)*))?154	}};155	($into:ident = $tok:literal => $name:literal $(; $($rest:tt)*)?) => {{156		$into.define_token(TokenKind::Keyword {157			code: format!("{}", $tok),158			name: $name.to_owned(),159		});160		$(define_kinds!($into = $($rest)*))?161	}};162	($into:ident =) => {{}}163}164use std::{collections::HashSet, str::FromStr};165166use indexmap::IndexMap;167use proc_macro2::{Ident, TokenStream};168use quote::{format_ident, quote};169170use super::escape_token_macro;171172impl KindsSrc {173	pub fn new() -> Self {174		Self {175			defined_tokens: IndexMap::new(),176			defined_node_names: HashSet::new(),177			nodes: Vec::new(),178		}179	}180	pub fn define_token(&mut self, token: TokenKind) {181		assert!(182			self.defined_node_names.insert(token.name().to_owned()),183			"node name already defined: {}",184			token.name()185		);186		assert!(187			self.defined_tokens188				.insert(token.grammar_name().to_owned(), token.clone())189				.is_none(),190			"token already defined: {}",191			token.grammar_name()192		);193	}194	pub fn define_node(&mut self, node: &str) {195		assert!(196			self.defined_node_names.insert(node.to_owned()),197			"node name already defined: {node}"198		);199		self.nodes.push(node.to_string());200	}201	pub fn token(&self, tok: &str) -> Option<&TokenKind> {202		self.defined_tokens.get(tok)203	}204	pub fn is_token(&self, tok: &str) -> bool {205		self.defined_tokens.contains_key(tok)206	}207	pub fn tokens(&self) -> impl Iterator<Item = &TokenKind> {208		self.defined_tokens.iter().map(|(_, v)| v)209	}210}211212pub fn jsonnet_kinds() -> KindsSrc {213	let mut kinds = KindsSrc::new();214	define_kinds![kinds =215		"||" => "OR";216		"??" => "NULL_COAELSE";217		"&&" => "AND";218		"|" => "BIT_OR";219		"^" => "BIT_XOR";220		"&" => "BIT_AND";221		"==" => "EQ";222		"!=" => "NE";223		"<" => "LT";224		">" => "GT";225		"<=" => "LE";226		">=" => "GE";227		"<<" => "LHS";228		">>" => "RHS";229		"+" => "PLUS";230		"-" => "MINUS";231		"*" => "MUL";232		"/" => "DIV";233		"%" => "MODULO";234		"!" => "NOT";235		"~" => "BIT_NOT";236		"[" => "L_BRACK";237		"]" => "R_BRACK";238		"(" => "L_PAREN";239		")" => "R_PAREN";240		"{" => "L_BRACE";241		"}" => "R_BRACE";242		":" => "COLON";243		"::" => "COLONCOLON";244		":::" => "COLONCOLONCOLON";245		";" => "SEMI";246		"." => "DOT";247		"..." => "DOTDOTDOT";248		"," => "COMMA";249		"$" => "DOLLAR";250		"=" => "ASSIGN";251		"?" => "QUESTION_MARK";252		// Literals253		lit("FLOAT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?";254		error("FLOAT_JUNK_AFTER_POINT") => r"(?:0|[1-9][0-9]*)\.[^0-9]";255		error("FLOAT_JUNK_AFTER_EXPONENT") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][^+\-0-9]";256		error("FLOAT_JUNK_AFTER_EXPONENT_SIGN") => r"(?:0|[1-9][0-9]*)(?:\.[0-9]+)?[eE][+-][^0-9]";257		lit("STRING_DOUBLE") => "\"(?s:[^\"\\\\]|\\\\.)*\"";258		error("STRING_DOUBLE_UNTERMINATED") => "\"(?s:[^\"\\\\]|\\\\.)*";259		lit("STRING_SINGLE") => "'(?s:[^'\\\\]|\\\\.)*'";260		error("STRING_SINGLE_UNTERMINATED") => "'(?s:[^'\\\\]|\\\\.)*";261		lit("STRING_DOUBLE_VERBATIM") => "@\"(?:[^\"]|\"\")*\"";262		error("STRING_DOUBLE_VERBATIM_UNTERMINATED") => "@\"(?:[^\"]|\"\")*";263		lit("STRING_SINGLE_VERBATIM") => "@'(?:[^']|'')*'";264		error("STRING_SINGLE_VERBATIM_UNTERMINATED") => "@'(?:[^']|'')*";265		error("STRING_VERBATIM_MISSING_QUOTES") => "@[^\"'\\s]\\S+";266		lit("STRING_BLOCK") => r"\|\|\|", "crate::string_block::lex_str_block_test";267		error("STRING_BLOCK_UNEXPECTED_END", lexer = true);268		error("STRING_BLOCK_MISSING_NEW_LINE", lexer = true);269		error("STRING_BLOCK_MISSING_TERMINATION", lexer = true);270		error("STRING_BLOCK_MISSING_INDENT", lexer = true);271		lit("IDENT") => r"[_a-zA-Z][_a-zA-Z0-9]*";272		lit("WHITESPACE") => r"[ \t\n\r]+";273		lit("SINGLE_LINE_SLASH_COMMENT") => r"//[^\r\n]*?(\r\n|\n)?";274		lit("SINGLE_LINE_HASH_COMMENT") => r"#[^\r\n]*?(\r\n|\n)?";275		lit("MULTI_LINE_COMMENT") => r"/\*([^*]|\*[^/])*\*/";276		error("COMMENT_TOO_SHORT") => r"/\*/";277		error("COMMENT_UNTERMINATED") =>  r"/\*([^*/]|\*[^/])+";278	];279	kinds280}
modifiedxtask/src/sourcegen/mod.rsdiffbeforeafterboth
--- a/xtask/src/sourcegen/mod.rs
+++ b/xtask/src/sourcegen/mod.rs
@@ -89,7 +89,7 @@
 		kinds.define_node(&name);
 	}
 
-	let syntax_kinds = generate_syntax_kinds(&kinds, &ast)?;
+	let syntax_kinds = generate_syntax_kinds(&kinds, &ast, false)?;
 
 	let nodes = generate_nodes(&kinds, &ast)?;
 	ensure_file_contents(
@@ -106,12 +106,21 @@
 		)),
 		&nodes,
 	);
+
+	let lexer_syntax_kinds = generate_syntax_kinds(&kinds, &ast, true)?;
+	ensure_file_contents(
+		&PathBuf::from(concat!(
+			env!("CARGO_MANIFEST_DIR"),
+			"/../crates/jrsonnet-lexer/src/generated/syntax_kinds.rs",
+		)),
+		&lexer_syntax_kinds,
+	);
 	Ok(())
 }
 
-fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc) -> Result<String> {
+fn generate_syntax_kinds(kinds: &KindsSrc, grammar: &AstSrc, lexer: bool) -> Result<String> {
 	let t_macros = kinds.tokens().filter_map(TokenKind::expand_t_macros);
-	let token_kinds = kinds.tokens().map(TokenKind::expand_kind);
+	let token_kinds = kinds.tokens().map(|t| t.expand_kind(lexer));
 
 	let keywords = kinds
 		.tokens()
@@ -119,12 +128,16 @@
 		.map(TokenKind::name)
 		.map(|n| format_ident!("{n}"));
 
-	let nodes = kinds
+	let mut nodes = kinds
 		.nodes
 		.iter()
 		.map(|name| format_ident!("{}", name))
 		.collect::<Vec<_>>();
 
+	if lexer {
+		nodes.clear();
+	}
+
 	let enums = grammar
 		.enums
 		.iter()
@@ -134,14 +147,34 @@
 				.token_enums
 				.iter()
 				.map(|e| format_ident!("{}", to_upper_snake_case(&e.name))),
-		);
+		)
+		.collect::<Vec<_>>();
+	let is_enum = if lexer {
+		quote! {}
+	} else {
+		quote! {
+			pub fn is_enum(self) -> bool {
+				match self {
+					#(#enums)|* => true,
+					_ => false,
+				}
+			}
+		}
+	};
 
+	let derive_logos = if lexer {
+		quote! {
+			, logos::Logos
+		}
+	} else {
+		quote! {}
+	};
+
 	let ast = quote! {
 		#![allow(bad_style, missing_docs, unreachable_pub, clippy::manual_non_exhaustive, clippy::match_like_matches_macro)]
-		use logos::Logos;
 
 		/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
-		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Logos)]
+		#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug #derive_logos)]
 		#[repr(u16)]
 		pub enum SyntaxKind {
 			#[doc(hidden)]
@@ -164,13 +197,9 @@
 					_ => false,
 				}
 			}
-			pub fn is_enum(self) -> bool {
-				match self {
-					#(#enums)|* => true,
-					_ => false,
-				}
-			}
 
+			#is_enum
+
 			pub fn from_raw(r: u16) -> Self {
 				assert!(r < Self::__LAST as u16);
 				unsafe { std::mem::transmute(r) }