From d32a788bb470b1d9a1514c0a604b58ab5f1b883b Mon Sep 17 00:00:00 2001 From: Yaroslav Bolyukin Date: Thu, 12 Feb 2026 15:26:25 +0000 Subject: [PATCH] feat(fmt): reformat text block --- --- a/crates/jrsonnet-formatter/src/lib.rs +++ b/crates/jrsonnet-formatter/src/lib.rs @@ -9,6 +9,7 @@ }; use hi_doc::{Formatting, SnippetBuilder}; use jrsonnet_rowan_parser::{ + collect_lexed_str_block, nodes::{ Arg, ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart, DestructRest, Expr, ExprBase, FieldName, ForSpec, IfSpec, ImportKind, Literal, Member, @@ -83,6 +84,14 @@ $o.push_signal(dprint_core::formatting::Signal::FinishIndent); pi!(@s; $o: $($t)*); }}; + (@s; $o:ident: >ii $($t:tt)*) => {{ + $o.push_signal(dprint_core::formatting::Signal::StartIgnoringIndent); + pi!(@s; $o: $($t)*); + }}; + (@s; $o:ident: {{ + $o.push_signal(dprint_core::formatting::Signal::FinishIgnoringIndent); + pi!(@s; $o: $($t)*); + }}; (@s; $o:ident: info($v:expr) $($t:tt)*) => {{ $o.push_info($v); pi!(@s; $o: $($t)*); @@ -201,14 +210,28 @@ fn print(&self, out: &mut PrintItems) { if matches!(self.kind(), TextKind::StringBlock) { let text = self.text(); + let mut text = collect_lexed_str_block(&text[3..]) + .expect("formatting is not performed on code with parsing errors"); - for (i, ele) in text.split("\n").enumerate() { - if i != 0 { - p!(out, nl); + if text.truncate && text.lines.ends_with(&[""]) { + text.truncate = false; + text.lines.pop(); + } + + p!(out, str("|||")); + if text.truncate { + p!(out, str("-")); + } + p!(out, nl > i); + for ele in text.lines { + if ele.is_empty() { + p!(out, >ii nl ) { +pub(crate) fn lex_str_block_test<'d>(lex: &mut Lexer<'d, SyntaxKind>) { let _ = lex_str_block(lex); } -#[allow(clippy::too_many_lines)] -pub fn lex_str_block(lex: &mut Lexer) -> Result<(), StringBlockError> { - struct Context<'a> { - source: &'a str, - index: usize, - offset: usize, +pub(crate) struct Context<'a> { + source: &'a str, + index: usize, +} + +impl<'a> Context<'a> { + fn rest(&self) -> &'a str { + &self.source[self.index..] } - impl<'a> Context<'a> { - fn rest(&self) -> &'a str { - &self.source[self.index..] + fn next(&mut self) -> Option { + if self.index == self.source.len() { + return None; } - fn next(&mut self) -> Option { - if self.index == self.source.len() { - return None; + match self.rest().chars().next() { + None => None, + Some(c) => { + self.index += c.len_utf8(); + Some(c) } + } + } - match self.rest().chars().next() { - None => None, - Some(c) => { - self.index += c.len_utf8(); - Some(c) - } - } + fn peek(&self) -> Option { + if self.index == self.source.len() { + return None; } - fn peek(&self) -> Option { - if self.index == self.source.len() { - return None; - } + self.rest().chars().next() + } - self.rest().chars().next() + fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize { + if self.peek().map(f).unwrap_or(false) { + self.index += 1; + return 1; } + 0 + } - fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize { - if self.peek().map(f).unwrap_or(false) { - self.index += 1; - return 1; - } - 0 + fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize { + if self.index == self.source.len() { + return 0; } - fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize { - if self.index == self.source.len() { - return 0; - } + let next_char = self.rest().char_indices().find(|(_, c)| !f(*c)); - let next_char = self.rest().char_indices().find(|(_, c)| !f(*c)); - - match next_char { - None => { - let diff = self.source.len() - self.index; - self.index = self.source.len(); - diff - } - Some((idx, _)) => { - self.index += idx; - idx - } + match next_char { + None => { + let diff = self.source.len() - self.index; + self.index = self.source.len(); + diff } - } - - fn skip(&mut self, len: usize) { - self.index = match self.index + len { - n if n > self.source.len() => self.source.len(), - n => n, - }; - } - - #[allow(clippy::range_plus_one)] - fn pos(&self) -> Range { - if self.index == self.source.len() { - self.offset + self.index..self.offset + self.index - } else { - // TODO: char size - self.offset + self.index..self.offset + self.index + 1 + Some((idx, _)) => { + self.index += idx; + idx } } } - // Check that b has at least the same whitespace prefix as a and returns the - // amount of this whitespace, otherwise returns 0. If a has no whitespace - // prefix than return 0. - fn check_whitespace(a: &str, b: &str) -> usize { - let a = a.as_bytes(); - let b = b.as_bytes(); + fn skip(&mut self, len: usize) { + self.index = match self.index + len { + n if n > self.source.len() => self.source.len(), + n => n, + }; + } +} - for i in 0..a.len() { - if a[i] != b' ' && a[i] != b'\t' { - // a has run out of whitespace and b matched up to this point. Return result. - return i; - } +// Check that b has at least the same whitespace prefix as a and returns the +// amount of this whitespace, otherwise returns 0. If a has no whitespace +// prefix than return 0. +fn check_whitespace(a: &str, b: &str) -> usize { + let a = a.as_bytes(); + let b = b.as_bytes(); - if i >= b.len() { - // We ran off the edge of b while a still has whitespace. Return 0 as failure. - return 0; - } + for i in 0..a.len() { + if a[i] != b' ' && a[i] != b'\t' { + // a has run out of whitespace and b matched up to this point. Return result. + return i; + } - if a[i] != b[i] { - // a has whitespace but b does not. Return 0 as failure. - return 0; - } + if i >= b.len() { + // We ran off the edge of b while a still has whitespace. Return 0 as failure. + return 0; } - // We ran off the end of a and b kept up - a.len() + if a[i] != b[i] { + // a has whitespace but b does not. Return 0 as failure. + return 0; + } } - fn guess_token_end_and_bump<'a>(lex: &mut Lexer<'a, SyntaxKind>, ctx: &Context<'a>) { + // We ran off the end of a and b kept up + a.len() +} + +pub(crate) trait StrBlockLexCtx<'d> { + fn remainder(&self) -> &'d str; + fn eat_error(&mut self, ctx: &Context<'d>); + fn bump_pos(&mut self, s: usize); + fn mark_truncating(&mut self); + fn mark_line(&mut self, line: &'d str); +} + +impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> { + fn remainder(&self) -> &'d str { + self.remainder() + } + fn eat_error(&mut self, ctx: &Context<'d>) { let end_index = ctx .rest() .find("|||") .map_or_else(|| ctx.rest().len(), |v| v + 3); - lex.bump(ctx.index + end_index); + self.bump(ctx.index + end_index); } + fn bump_pos(&mut self, s: usize) { + self.bump(s); + } + fn mark_truncating(&mut self) { + // Lexer test doesn't collect anything + } + fn mark_line(&mut self, _line: &'d str) { + // Lexer test doesn't collect anything + } +} - debug_assert_eq!(lex.slice(), "|||"); - let mut ctx = Context { +pub fn collect_lexed_str_block<'s>( + input: &'s str, +) -> Result, StringBlockError> { + let mut collect = CollectStrBlock { + truncate: false, + lines: vec![], + input, + offset: 0, + }; + lex_str_block(&mut collect)?; + Ok(collect) +} + +pub struct CollectStrBlock<'s> { + pub truncate: bool, + pub lines: Vec<&'s str>, + input: &'s str, + offset: usize, +} + +impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> { + fn remainder(&self) -> &'d str { + self.input + } + + fn eat_error(&mut self, _ctx: &Context<'d>) { + // Error will be returned, no need to record it here + } + + fn bump_pos(&mut self, s: usize) { + self.offset += s; + } + + fn mark_truncating(&mut self) { + self.truncate = true; + } + + fn mark_line(&mut self, line: &'d str) { + self.lines.push(line) + } +} + +pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> { + // debug_assert_eq!(lex.slice(), "|||"); + let mut ctx = Context::<'a> { source: lex.remainder(), index: 0, - offset: lex.span().end, }; - ctx.eat_if(|v| v == '-'); + if ctx.eat_if(|v| v == '-') != 0 { + lex.mark_truncating(); + } // Skip whitespaces ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r'); @@ -150,12 +201,12 @@ match ctx.next() { Some('\n') => (), None => { - guess_token_end_and_bump(lex, &ctx); + lex.eat_error(&ctx); return Err(UnexpectedEnd); } // Text block requires new line after |||. Some(_) => { - guess_token_end_and_bump(lex, &ctx); + lex.eat_error(&ctx); return Err(MissingNewLine); } } @@ -170,7 +221,7 @@ if num_whitespace == 0 { // Text block's first line must start with whitespace - guess_token_end_and_bump(lex, &ctx); + lex.eat_error(&ctx); return Err(MissingIndent); } @@ -178,19 +229,27 @@ debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace"); ctx.skip(num_whitespace); + let line_start = ctx.index; + let mut line_size = 0; loop { match ctx.next() { None => { - guess_token_end_and_bump(lex, &ctx); + lex.eat_error(&ctx); return Err(UnexpectedEnd); } - Some('\n') => break, - Some(_) => (), + Some('\n') => { + lex.mark_line(&ctx.source[line_start..line_start + line_size]); + break; + } + Some(c) => { + line_size += c.len_utf8(); + } } } // Skip any blank lines while ctx.peek() == Some('\n') { + lex.mark_line(""); ctx.next(); } @@ -206,15 +265,11 @@ } if !ctx.rest().starts_with("|||") { - // Text block not terminated with ||| - let pos = ctx.pos(); - if pos.is_empty() { - // eof - lex.bump(ctx.index); + if ctx.rest().is_empty() { + lex.bump_pos(ctx.index); return Err(UnexpectedEnd); } - - guess_token_end_and_bump(lex, &ctx); + lex.eat_error(&ctx); return Err(MissingTermination); } @@ -224,6 +279,6 @@ } } - lex.bump(ctx.index); + lex.bump_pos(ctx.index); Ok(()) } -- gitstuff