git.delta.rocks / jrsonnet / refs/commits / 50afc8afc49d

difftreelog

source

crates/jrsonnet-lexer/src/string_block.rs5.7 KiBsourcehistory
1#[derive(Clone, Copy, Debug, PartialEq, Eq)]2pub enum StringBlockError {3	UnexpectedEnd,4	MissingNewLine,5	MissingTermination,6	MissingIndent,7}89use StringBlockError::*;10use logos::Lexer;1112use crate::generated::syntax_kinds::SyntaxKind;1314pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {15	let _ = lex_str_block(lex);16}1718pub(crate) struct Context<'a> {19	source: &'a str,20	index: usize,21}2223impl<'a> Context<'a> {24	fn rest(&self) -> &'a str {25		&self.source[self.index..]26	}2728	fn next(&mut self) -> Option<char> {29		if self.index == self.source.len() {30			return None;31		}3233		match self.rest().chars().next() {34			None => None,35			Some(c) => {36				self.index += c.len_utf8();37				Some(c)38			}39		}40	}4142	fn peek(&self) -> Option<char> {43		if self.index == self.source.len() {44			return None;45		}4647		self.rest().chars().next()48	}4950	fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {51		if self.peek().is_some_and(f) {52			self.index += 1;53			return 1;54		}55		056	}5758	fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {59		if self.index == self.source.len() {60			return 0;61		}6263		let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));6465		match next_char {66			None => {67				let diff = self.source.len() - self.index;68				self.index = self.source.len();69				diff70			}71			Some((idx, _)) => {72				self.index += idx;73				idx74			}75		}76	}7778	fn skip(&mut self, len: usize) {79		self.index = match self.index + len {80			n if n > self.source.len() => self.source.len(),81			n => n,82		};83	}84}8586// Check that b has at least the same whitespace prefix as a and returns the87// amount of this whitespace, otherwise returns 0.  If a has no whitespace88// prefix than return 0.89fn check_whitespace(a: &str, b: &str) -> usize {90	let a = a.as_bytes();91	let b = b.as_bytes();9293	for i in 0..a.len() {94		if a[i] != b' ' && a[i] != b'\t' {95			// a has run out of whitespace and b matched up to this point. Return result.96			return i;97		}9899		if i >= b.len() {100			// We ran off the edge of b while a still has whitespace. Return 0 as failure.101			return 0;102		}103104		if a[i] != b[i] {105			// a has whitespace but b does not. Return 0 as failure.106			return 0;107		}108	}109110	// We ran off the end of a and b kept up111	a.len()112}113114pub(crate) trait StrBlockLexCtx<'d> {115	fn remainder(&self) -> &'d str;116	fn eat_error(&mut self, ctx: &Context<'d>);117	fn bump_pos(&mut self, s: usize);118	fn mark_truncating(&mut self);119	fn mark_line(&mut self, line: &'d str);120}121122impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {123	fn remainder(&self) -> &'d str {124		self.remainder()125	}126	fn eat_error(&mut self, ctx: &Context<'d>) {127		let end_index = ctx128			.rest()129			.find("|||")130			.map_or_else(|| ctx.rest().len(), |v| v + 3);131		self.bump(ctx.index + end_index);132	}133	fn bump_pos(&mut self, s: usize) {134		self.bump(s);135	}136	fn mark_truncating(&mut self) {137		// Lexer test doesn't collect anything138	}139	fn mark_line(&mut self, _line: &'d str) {140		// Lexer test doesn't collect anything141	}142}143144pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {145	let mut collect = CollectStrBlock {146		truncate: false,147		lines: vec![],148		input,149		offset: 0,150	};151	lex_str_block(&mut collect)?;152	Ok(collect)153}154155pub struct CollectStrBlock<'s> {156	pub truncate: bool,157	pub lines: Vec<&'s str>,158	input: &'s str,159	offset: usize,160}161162impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {163	fn remainder(&self) -> &'d str {164		self.input165	}166167	fn eat_error(&mut self, _ctx: &Context<'d>) {168		// Error will be returned, no need to record it here169	}170171	fn bump_pos(&mut self, s: usize) {172		self.offset += s;173	}174175	fn mark_truncating(&mut self) {176		self.truncate = true;177	}178179	fn mark_line(&mut self, line: &'d str) {180		self.lines.push(line);181	}182}183184pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {185	// debug_assert_eq!(lex.slice(), "|||");186	let mut ctx = Context::<'a> {187		source: lex.remainder(),188		index: 0,189	};190191	if ctx.eat_if(|v| v == '-') != 0 {192		lex.mark_truncating();193	}194195	// Skip whitespaces196	ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');197198	// Skip \n199	match ctx.next() {200		Some('\n') => (),201		None => {202			lex.eat_error(&ctx);203			return Err(UnexpectedEnd);204		}205		// Text block requires new line after |||.206		Some(_) => {207			lex.eat_error(&ctx);208			return Err(MissingNewLine);209		}210	}211212	// Process leading blank lines before calculating string block indent213	while ctx.peek() == Some('\n') {214		lex.mark_line("");215		ctx.next();216	}217218	let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());219	let str_block_indent = &ctx.rest()[..num_whitespace];220221	if num_whitespace == 0 {222		// Text block's first line must start with whitespace223		lex.eat_error(&ctx);224		return Err(MissingIndent);225	}226227	loop {228		debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");229		ctx.skip(num_whitespace);230231		let rest = ctx.rest();232		match rest.find('\n') {233			None => {234				ctx.index = ctx.source.len();235				lex.eat_error(&ctx);236				return Err(UnexpectedEnd);237			}238			Some(nl_pos) => {239				lex.mark_line(&rest[..nl_pos]);240				ctx.index += nl_pos + 1;241			}242		}243244		// Skip any blank lines245		while ctx.peek() == Some('\n') {246			lex.mark_line("");247			ctx.next();248		}249250		// Look at the next line251		num_whitespace = check_whitespace(str_block_indent, ctx.rest());252		if num_whitespace == 0 {253			// End of the text block254			// let mut term_indent = String::with_capacity(num_whitespace);255			while let Some(' ' | '\t') = ctx.peek() {256				// term_indent.push(257				ctx.next().unwrap();258				// );259			}260261			if !ctx.rest().starts_with("|||") {262				if ctx.rest().is_empty() {263					lex.bump_pos(ctx.index);264					return Err(UnexpectedEnd);265				}266				lex.eat_error(&ctx);267				return Err(MissingTermination);268			}269270			// Skip '|||'271			ctx.skip(3);272			break;273		}274	}275276	lex.bump_pos(ctx.index);277	Ok(())278}