1#[derive(Clone, Copy, Debug, PartialEq, Eq)]2pub enum StringBlockError {3 UnexpectedEnd,4 MissingNewLine,5 MissingTermination,6 MissingIndent,7}89use StringBlockError::*;10use logos::Lexer;1112use crate::generated::syntax_kinds::SyntaxKind;1314pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {15 let _ = lex_str_block(lex);16}1718pub(crate) struct Context<'a> {19 source: &'a str,20 index: usize,21}2223impl<'a> Context<'a> {24 fn rest(&self) -> &'a str {25 &self.source[self.index..]26 }2728 fn next(&mut self) -> Option<char> {29 if self.index == self.source.len() {30 return None;31 }3233 match self.rest().chars().next() {34 None => None,35 Some(c) => {36 self.index += c.len_utf8();37 Some(c)38 }39 }40 }4142 fn peek(&self) -> Option<char> {43 if self.index == self.source.len() {44 return None;45 }4647 self.rest().chars().next()48 }4950 fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {51 if self.peek().is_some_and(f) {52 self.index += 1;53 return 1;54 }55 056 }5758 fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {59 if self.index == self.source.len() {60 return 0;61 }6263 let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));6465 match next_char {66 None => {67 let diff = self.source.len() - self.index;68 self.index = self.source.len();69 diff70 }71 Some((idx, _)) => {72 self.index += idx;73 idx74 }75 }76 }7778 fn skip(&mut self, len: usize) {79 self.index = match self.index + len {80 n if n > self.source.len() => self.source.len(),81 n => n,82 };83 }84}8586878889fn check_whitespace(a: &str, b: &str) -> usize {90 let a = a.as_bytes();91 let b = b.as_bytes();9293 for i in 0..a.len() {94 if a[i] != b' ' && a[i] != b'\t' {95 96 return i;97 }9899 if i >= b.len() {100 101 return 0;102 }103104 if a[i] != b[i] {105 106 return 0;107 }108 }109110 111 a.len()112}113114pub(crate) trait StrBlockLexCtx<'d> {115 fn remainder(&self) -> &'d str;116 fn eat_error(&mut self, ctx: &Context<'d>);117 fn bump_pos(&mut self, s: usize);118 fn mark_truncating(&mut self);119 fn mark_line(&mut self, line: &'d str);120}121122impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {123 fn remainder(&self) -> &'d str {124 self.remainder()125 }126 fn eat_error(&mut self, ctx: &Context<'d>) {127 let end_index = ctx128 .rest()129 .find("|||")130 .map_or_else(|| ctx.rest().len(), |v| v + 3);131 self.bump(ctx.index + end_index);132 }133 fn bump_pos(&mut self, s: usize) {134 self.bump(s);135 }136 fn mark_truncating(&mut self) {137 138 }139 fn mark_line(&mut self, _line: &'d str) {140 141 }142}143144pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {145 let mut collect = CollectStrBlock {146 truncate: false,147 lines: vec![],148 input,149 offset: 0,150 };151 lex_str_block(&mut collect)?;152 Ok(collect)153}154155pub struct CollectStrBlock<'s> {156 pub truncate: bool,157 pub lines: Vec<&'s str>,158 input: &'s str,159 offset: usize,160}161162impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {163 fn remainder(&self) -> &'d str {164 self.input165 }166167 fn eat_error(&mut self, _ctx: &Context<'d>) {168 169 }170171 fn bump_pos(&mut self, s: usize) {172 self.offset += s;173 }174175 fn mark_truncating(&mut self) {176 self.truncate = true;177 }178179 fn mark_line(&mut self, line: &'d str) {180 self.lines.push(line);181 }182}183184pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {185 186 let mut ctx = Context::<'a> {187 source: lex.remainder(),188 index: 0,189 };190191 if ctx.eat_if(|v| v == '-') != 0 {192 lex.mark_truncating();193 }194195 196 ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');197198 199 match ctx.next() {200 Some('\n') => (),201 None => {202 lex.eat_error(&ctx);203 return Err(UnexpectedEnd);204 }205 206 Some(_) => {207 lex.eat_error(&ctx);208 return Err(MissingNewLine);209 }210 }211212 213 while ctx.peek() == Some('\n') {214 lex.mark_line("");215 ctx.next();216 }217218 let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());219 let str_block_indent = &ctx.rest()[..num_whitespace];220221 if num_whitespace == 0 {222 223 lex.eat_error(&ctx);224 return Err(MissingIndent);225 }226227 loop {228 debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");229 ctx.skip(num_whitespace);230231 let rest = ctx.rest();232 match rest.find('\n') {233 None => {234 ctx.index = ctx.source.len();235 lex.eat_error(&ctx);236 return Err(UnexpectedEnd);237 }238 Some(nl_pos) => {239 lex.mark_line(&rest[..nl_pos]);240 ctx.index += nl_pos + 1;241 }242 }243244 245 while ctx.peek() == Some('\n') {246 lex.mark_line("");247 ctx.next();248 }249250 251 num_whitespace = check_whitespace(str_block_indent, ctx.rest());252 if num_whitespace == 0 {253 254 255 while let Some(' ' | '\t') = ctx.peek() {256 257 ctx.next().unwrap();258 259 }260261 if !ctx.rest().starts_with("|||") {262 if ctx.rest().is_empty() {263 lex.bump_pos(ctx.index);264 return Err(UnexpectedEnd);265 }266 lex.eat_error(&ctx);267 return Err(MissingTermination);268 }269270 271 ctx.skip(3);272 break;273 }274 }275276 lex.bump_pos(ctx.index);277 Ok(())278}