1#[derive(Clone, Copy, Debug, PartialEq, Eq)]2pub enum StringBlockError {3 UnexpectedEnd,4 MissingNewLine,5 MissingTermination,6 MissingIndent,7}89use logos::Lexer;10use StringBlockError::*;1112use crate::SyntaxKind;1314pub(crate) fn lex_str_block_test<'d>(lex: &mut Lexer<'d, SyntaxKind>) {15 let _ = lex_str_block(lex);16}1718pub(crate) struct Context<'a> {19 source: &'a str,20 index: usize,21}2223impl<'a> Context<'a> {24 fn rest(&self) -> &'a str {25 &self.source[self.index..]26 }2728 fn next(&mut self) -> Option<char> {29 if self.index == self.source.len() {30 return None;31 }3233 match self.rest().chars().next() {34 None => None,35 Some(c) => {36 self.index += c.len_utf8();37 Some(c)38 }39 }40 }4142 fn peek(&self) -> Option<char> {43 if self.index == self.source.len() {44 return None;45 }4647 self.rest().chars().next()48 }4950 fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {51 if self.peek().map(f).unwrap_or(false) {52 self.index += 1;53 return 1;54 }55 056 }5758 fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {59 if self.index == self.source.len() {60 return 0;61 }6263 let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));6465 match next_char {66 None => {67 let diff = self.source.len() - self.index;68 self.index = self.source.len();69 diff70 }71 Some((idx, _)) => {72 self.index += idx;73 idx74 }75 }76 }7778 fn skip(&mut self, len: usize) {79 self.index = match self.index + len {80 n if n > self.source.len() => self.source.len(),81 n => n,82 };83 }84}8586878889fn check_whitespace(a: &str, b: &str) -> usize {90 let a = a.as_bytes();91 let b = b.as_bytes();9293 for i in 0..a.len() {94 if a[i] != b' ' && a[i] != b'\t' {95 96 return i;97 }9899 if i >= b.len() {100 101 return 0;102 }103104 if a[i] != b[i] {105 106 return 0;107 }108 }109110 111 a.len()112}113114pub(crate) trait StrBlockLexCtx<'d> {115 fn remainder(&self) -> &'d str;116 fn eat_error(&mut self, ctx: &Context<'d>);117 fn bump_pos(&mut self, s: usize);118 fn mark_truncating(&mut self);119 fn mark_line(&mut self, line: &'d str);120}121122impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {123 fn remainder(&self) -> &'d str {124 self.remainder()125 }126 fn eat_error(&mut self, ctx: &Context<'d>) {127 let end_index = ctx128 .rest()129 .find("|||")130 .map_or_else(|| ctx.rest().len(), |v| v + 3);131 self.bump(ctx.index + end_index);132 }133 fn bump_pos(&mut self, s: usize) {134 self.bump(s);135 }136 fn mark_truncating(&mut self) {137 138 }139 fn mark_line(&mut self, _line: &'d str) {140 141 }142}143144pub fn collect_lexed_str_block<'s>(145 input: &'s str,146) -> Result<CollectStrBlock<'s>, StringBlockError> {147 let mut collect = CollectStrBlock {148 truncate: false,149 lines: vec![],150 input,151 offset: 0,152 };153 lex_str_block(&mut collect)?;154 Ok(collect)155}156157pub struct CollectStrBlock<'s> {158 pub truncate: bool,159 pub lines: Vec<&'s str>,160 input: &'s str,161 offset: usize,162}163164impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {165 fn remainder(&self) -> &'d str {166 self.input167 }168169 fn eat_error(&mut self, _ctx: &Context<'d>) {170 171 }172173 fn bump_pos(&mut self, s: usize) {174 self.offset += s;175 }176177 fn mark_truncating(&mut self) {178 self.truncate = true;179 }180181 fn mark_line(&mut self, line: &'d str) {182 self.lines.push(line)183 }184}185186pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {187 188 let mut ctx = Context::<'a> {189 source: lex.remainder(),190 index: 0,191 };192193 if ctx.eat_if(|v| v == '-') != 0 {194 lex.mark_truncating();195 }196197 198 ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');199200 201 match ctx.next() {202 Some('\n') => (),203 None => {204 lex.eat_error(&ctx);205 return Err(UnexpectedEnd);206 }207 208 Some(_) => {209 lex.eat_error(&ctx);210 return Err(MissingNewLine);211 }212 }213214 215 while ctx.peek() == Some('\n') {216 ctx.next();217 }218219 let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());220 let str_block_indent = &ctx.rest()[..num_whitespace];221222 if num_whitespace == 0 {223 224 lex.eat_error(&ctx);225 return Err(MissingIndent);226 }227228 loop {229 debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");230 ctx.skip(num_whitespace);231232 let line_start = ctx.index;233 let mut line_size = 0;234 loop {235 match ctx.next() {236 None => {237 lex.eat_error(&ctx);238 return Err(UnexpectedEnd);239 }240 Some('\n') => {241 lex.mark_line(&ctx.source[line_start..line_start + line_size]);242 break;243 }244 Some(c) => {245 line_size += c.len_utf8();246 }247 }248 }249250 251 while ctx.peek() == Some('\n') {252 lex.mark_line("");253 ctx.next();254 }255256 257 num_whitespace = check_whitespace(str_block_indent, ctx.rest());258 if num_whitespace == 0 {259 260 261 while let Some(' ' | '\t') = ctx.peek() {262 263 ctx.next().unwrap();264 265 }266267 if !ctx.rest().starts_with("|||") {268 if ctx.rest().is_empty() {269 lex.bump_pos(ctx.index);270 return Err(UnexpectedEnd);271 }272 lex.eat_error(&ctx);273 return Err(MissingTermination);274 }275276 277 ctx.skip(3);278 break;279 }280 }281282 lex.bump_pos(ctx.index);283 Ok(())284}