`d32a788bb470`

6 files changed

modifiedcrates/jrsonnet-formatter/src/lib.rsdiff before after both

--- a/crates/jrsonnet-formatter/src/lib.rs
+++ b/crates/jrsonnet-formatter/src/lib.rs
@@ -9,6 +9,7 @@
 };
 use hi_doc::{Formatting, SnippetBuilder};
 use jrsonnet_rowan_parser::{
+	collect_lexed_str_block,
 	nodes::{
 		Arg, ArgsDesc, Assertion, BinaryOperator, Bind, CompSpec, Destruct, DestructArrayPart,
 		DestructRest, Expr, ExprBase, FieldName, ForSpec, IfSpec, ImportKind, Literal, Member,
@@ -83,6 +84,14 @@
 		$o.push_signal(dprint_core::formatting::Signal::FinishIndent);
 		pi!(@s; $o: $($t)*);
 	}};
+	(@s; $o:ident: >ii $($t:tt)*) => {{
+		$o.push_signal(dprint_core::formatting::Signal::StartIgnoringIndent);
+		pi!(@s; $o: $($t)*);
+	}};
+	(@s; $o:ident: <ii $($t:tt)*) => {{
+		$o.push_signal(dprint_core::formatting::Signal::FinishIgnoringIndent);
+		pi!(@s; $o: $($t)*);
+	}};
 	(@s; $o:ident: info($v:expr) $($t:tt)*) => {{
 		$o.push_info($v);
 		pi!(@s; $o: $($t)*);
@@ -201,14 +210,28 @@
 	fn print(&self, out: &mut PrintItems) {
 		if matches!(self.kind(), TextKind::StringBlock) {
 			let text = self.text();
+			let mut text = collect_lexed_str_block(&text[3..])
+				.expect("formatting is not performed on code with parsing errors");
 
-			for (i, ele) in text.split("\n").enumerate() {
-				if i != 0 {
-					p!(out, nl);
+			if text.truncate && text.lines.ends_with(&[""]) {
+				text.truncate = false;
+				text.lines.pop();
+			}
+
+			p!(out, str("|||"));
+			if text.truncate {
+				p!(out, str("-"));
+			}
+			p!(out, nl > i);
+			for ele in text.lines {
+				if ele.is_empty() {
+					p!(out, >ii nl <ii);
+				} else {
+					p!(out, string(ele.to_string()) nl);
 				}
-				// TODO: Trim and recreate whitespace
-				p!(out, string(ele.to_string()));
 			}
+			p!(out, <i str("|||"));
+
 			return;
 		}
 		p!(out, string(format!("{}", self)));

modifiedcrates/jrsonnet-formatter/src/snapshots/jrsonnet_formatter__tests__snapshots@string_styles.jsonnet.snapdiff before after both

--- a/crates/jrsonnet-formatter/src/snapshots/jrsonnet_formatter__tests__snapshots@string_styles.jsonnet.snap
+++ b/crates/jrsonnet-formatter/src/snapshots/jrsonnet_formatter__tests__snapshots@string_styles.jsonnet.snap
@@ -8,7 +8,18 @@
    single_quote: 'hello world',
    escaped: 'line1\nline2',
    multiline: |||
-       This is a
-       multiline string
-     |||,
+      This is a
+
+      multiline string
+   |||,
+   multiline_truncated: |||-
+      This is a
+
+      multiline string with truncated newline
+   |||,
+   multiline_to_truncated: |||
+      This is a
+
+      multiline string with to-be truncated newline
+   |||,
 }

modifiedcrates/jrsonnet-formatter/src/tests.rsdiff before after both

--- a/crates/jrsonnet-formatter/src/tests.rs
+++ b/crates/jrsonnet-formatter/src/tests.rs
@@ -3,7 +3,6 @@
 use std::fs;
 
 use dprint_core::formatting::{PrintItems, PrintOptions};
-use indoc::indoc;
 use insta::{assert_snapshot, glob};
 
 use crate::Printable;

modifiedcrates/jrsonnet-formatter/src/tests/string_styles.jsonnetdiff before after both

4  escaped: 'line1\nline2',4  escaped: 'line1\nline2',
5  multiline: |||5  multiline: |||
6    This is a6    This is a
7
7    multiline string8    multiline string
8  |||,9  |||,
10  multiline_truncated: |||-
11    This is a
12
13    multiline string with truncated newline
14  |||,
15  multiline_to_truncated: |||-
16    This is a
17
18    multiline string with to-be truncated newline
19
20  |||,
9}21}
1022

modifiedcrates/jrsonnet-rowan-parser/src/lib.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/lib.rs
+++ b/crates/jrsonnet-rowan-parser/src/lib.rs
@@ -22,6 +22,7 @@
 pub use generated::{nodes, syntax_kinds::SyntaxKind};
 pub use language::*;
 pub use token_set::SyntaxKindSet;
+pub use string_block::{collect_lexed_str_block, CollectStrBlock};
 
 use self::{
 	ast::support,

modifiedcrates/jrsonnet-rowan-parser/src/string_block.rsdiff before after both

--- a/crates/jrsonnet-rowan-parser/src/string_block.rs
+++ b/crates/jrsonnet-rowan-parser/src/string_block.rs
@@ -6,142 +6,193 @@
 	MissingIndent,
 }
 
-use std::ops::Range;
-
 use logos::Lexer;
 use StringBlockError::*;
 
 use crate::SyntaxKind;
 
-pub fn lex_str_block_test(lex: &mut Lexer<SyntaxKind>) {
+pub(crate) fn lex_str_block_test<'d>(lex: &mut Lexer<'d, SyntaxKind>) {
 	let _ = lex_str_block(lex);
 }
 
-#[allow(clippy::too_many_lines)]
-pub fn lex_str_block(lex: &mut Lexer<SyntaxKind>) -> Result<(), StringBlockError> {
-	struct Context<'a> {
-		source: &'a str,
-		index: usize,
-		offset: usize,
+pub(crate) struct Context<'a> {
+	source: &'a str,
+	index: usize,
+}
+
+impl<'a> Context<'a> {
+	fn rest(&self) -> &'a str {
+		&self.source[self.index..]
 	}
 
-	impl<'a> Context<'a> {
-		fn rest(&self) -> &'a str {
-			&self.source[self.index..]
+	fn next(&mut self) -> Option<char> {
+		if self.index == self.source.len() {
+			return None;
 		}
 
-		fn next(&mut self) -> Option<char> {
-			if self.index == self.source.len() {
-				return None;
+		match self.rest().chars().next() {
+			None => None,
+			Some(c) => {
+				self.index += c.len_utf8();
+				Some(c)
 			}
+		}
+	}
 
-			match self.rest().chars().next() {
-				None => None,
-				Some(c) => {
-					self.index += c.len_utf8();
-					Some(c)
-				}
-			}
+	fn peek(&self) -> Option<char> {
+		if self.index == self.source.len() {
+			return None;
 		}
 
-		fn peek(&self) -> Option<char> {
-			if self.index == self.source.len() {
-				return None;
-			}
+		self.rest().chars().next()
+	}
 
-			self.rest().chars().next()
+	fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
+		if self.peek().map(f).unwrap_or(false) {
+			self.index += 1;
+			return 1;
 		}
+		0
+	}
 
-		fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
-			if self.peek().map(f).unwrap_or(false) {
-				self.index += 1;
-				return 1;
-			}
-			0
+	fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
+		if self.index == self.source.len() {
+			return 0;
 		}
 
-		fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
-			if self.index == self.source.len() {
-				return 0;
-			}
+		let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
 
-			let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
-
-			match next_char {
-				None => {
-					let diff = self.source.len() - self.index;
-					self.index = self.source.len();
-					diff
-				}
-				Some((idx, _)) => {
-					self.index += idx;
-					idx
-				}
+		match next_char {
+			None => {
+				let diff = self.source.len() - self.index;
+				self.index = self.source.len();
+				diff
 			}
-		}
-
-		fn skip(&mut self, len: usize) {
-			self.index = match self.index + len {
-				n if n > self.source.len() => self.source.len(),
-				n => n,
-			};
-		}
-
-		#[allow(clippy::range_plus_one)]
-		fn pos(&self) -> Range<usize> {
-			if self.index == self.source.len() {
-				self.offset + self.index..self.offset + self.index
-			} else {
-				// TODO: char size
-				self.offset + self.index..self.offset + self.index + 1
+			Some((idx, _)) => {
+				self.index += idx;
+				idx
 			}
 		}
 	}
 
-	// Check that b has at least the same whitespace prefix as a and returns the
-	// amount of this whitespace, otherwise returns 0.  If a has no whitespace
-	// prefix than return 0.
-	fn check_whitespace(a: &str, b: &str) -> usize {
-		let a = a.as_bytes();
-		let b = b.as_bytes();
+	fn skip(&mut self, len: usize) {
+		self.index = match self.index + len {
+			n if n > self.source.len() => self.source.len(),
+			n => n,
+		};
+	}
+}
 
-		for i in 0..a.len() {
-			if a[i] != b' ' && a[i] != b'\t' {
-				// a has run out of whitespace and b matched up to this point. Return result.
-				return i;
-			}
+// Check that b has at least the same whitespace prefix as a and returns the
+// amount of this whitespace, otherwise returns 0.  If a has no whitespace
+// prefix than return 0.
+fn check_whitespace(a: &str, b: &str) -> usize {
+	let a = a.as_bytes();
+	let b = b.as_bytes();
 
-			if i >= b.len() {
-				// We ran off the edge of b while a still has whitespace. Return 0 as failure.
-				return 0;
-			}
+	for i in 0..a.len() {
+		if a[i] != b' ' && a[i] != b'\t' {
+			// a has run out of whitespace and b matched up to this point. Return result.
+			return i;
+		}
 
-			if a[i] != b[i] {
-				// a has whitespace but b does not. Return 0 as failure.
-				return 0;
-			}
+		if i >= b.len() {
+			// We ran off the edge of b while a still has whitespace. Return 0 as failure.
+			return 0;
 		}
 
-		// We ran off the end of a and b kept up
-		a.len()
+		if a[i] != b[i] {
+			// a has whitespace but b does not. Return 0 as failure.
+			return 0;
+		}
 	}
 
-	fn guess_token_end_and_bump<'a>(lex: &mut Lexer<'a, SyntaxKind>, ctx: &Context<'a>) {
+	// We ran off the end of a and b kept up
+	a.len()
+}
+
+pub(crate) trait StrBlockLexCtx<'d> {
+	fn remainder(&self) -> &'d str;
+	fn eat_error(&mut self, ctx: &Context<'d>);
+	fn bump_pos(&mut self, s: usize);
+	fn mark_truncating(&mut self);
+	fn mark_line(&mut self, line: &'d str);
+}
+
+impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {
+	fn remainder(&self) -> &'d str {
+		self.remainder()
+	}
+	fn eat_error(&mut self, ctx: &Context<'d>) {
 		let end_index = ctx
 			.rest()
 			.find("|||")
 			.map_or_else(|| ctx.rest().len(), |v| v + 3);
-		lex.bump(ctx.index + end_index);
+		self.bump(ctx.index + end_index);
 	}
+	fn bump_pos(&mut self, s: usize) {
+		self.bump(s);
+	}
+	fn mark_truncating(&mut self) {
+		// Lexer test doesn't collect anything
+	}
+	fn mark_line(&mut self, _line: &'d str) {
+		// Lexer test doesn't collect anything
+	}
+}
 
-	debug_assert_eq!(lex.slice(), "|||");
-	let mut ctx = Context {
+pub fn collect_lexed_str_block<'s>(
+	input: &'s str,
+) -> Result<CollectStrBlock<'s>, StringBlockError> {
+	let mut collect = CollectStrBlock {
+		truncate: false,
+		lines: vec![],
+		input,
+		offset: 0,
+	};
+	lex_str_block(&mut collect)?;
+	Ok(collect)
+}
+
+pub struct CollectStrBlock<'s> {
+	pub truncate: bool,
+	pub lines: Vec<&'s str>,
+	input: &'s str,
+	offset: usize,
+}
+
+impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {
+	fn remainder(&self) -> &'d str {
+		self.input
+	}
+
+	fn eat_error(&mut self, _ctx: &Context<'d>) {
+		// Error will be returned, no need to record it here
+	}
+
+	fn bump_pos(&mut self, s: usize) {
+		self.offset += s;
+	}
+
+	fn mark_truncating(&mut self) {
+		self.truncate = true;
+	}
+
+	fn mark_line(&mut self, line: &'d str) {
+		self.lines.push(line)
+	}
+}
+
+pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {
+	// debug_assert_eq!(lex.slice(), "|||");
+	let mut ctx = Context::<'a> {
 		source: lex.remainder(),
 		index: 0,
-		offset: lex.span().end,
 	};
 
-	ctx.eat_if(|v| v == '-');
+	if ctx.eat_if(|v| v == '-') != 0 {
+		lex.mark_truncating();
+	}
 
 	// Skip whitespaces
 	ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');
@@ -150,12 +201,12 @@
 	match ctx.next() {
 		Some('\n') => (),
 		None => {
-			guess_token_end_and_bump(lex, &ctx);
+			lex.eat_error(&ctx);
 			return Err(UnexpectedEnd);
 		}
 		// Text block requires new line after |||.
 		Some(_) => {
-			guess_token_end_and_bump(lex, &ctx);
+			lex.eat_error(&ctx);
 			return Err(MissingNewLine);
 		}
 	}
@@ -170,7 +221,7 @@
 
 	if num_whitespace == 0 {
 		// Text block's first line must start with whitespace
-		guess_token_end_and_bump(lex, &ctx);
+		lex.eat_error(&ctx);
 		return Err(MissingIndent);
 	}
 
@@ -178,19 +229,27 @@
 		debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");
 		ctx.skip(num_whitespace);
 
+		let line_start = ctx.index;
+		let mut line_size = 0;
 		loop {
 			match ctx.next() {
 				None => {
-					guess_token_end_and_bump(lex, &ctx);
+					lex.eat_error(&ctx);
 					return Err(UnexpectedEnd);
 				}
-				Some('\n') => break,
-				Some(_) => (),
+				Some('\n') => {
+					lex.mark_line(&ctx.source[line_start..line_start + line_size]);
+					break;
+				}
+				Some(c) => {
+					line_size += c.len_utf8();
+				}
 			}
 		}
 
 		// Skip any blank lines
 		while ctx.peek() == Some('\n') {
+			lex.mark_line("");
 			ctx.next();
 		}
 
@@ -206,15 +265,11 @@
 			}
 
 			if !ctx.rest().starts_with("|||") {
-				// Text block not terminated with |||
-				let pos = ctx.pos();
-				if pos.is_empty() {
-					// eof
-					lex.bump(ctx.index);
+				if ctx.rest().is_empty() {
+					lex.bump_pos(ctx.index);
 					return Err(UnexpectedEnd);
 				}
-
-				guess_token_end_and_bump(lex, &ctx);
+				lex.eat_error(&ctx);
 				return Err(MissingTermination);
 			}
 
@@ -224,6 +279,6 @@
 		}
 	}
 
-	lex.bump(ctx.index);
+	lex.bump_pos(ctx.index);
 	Ok(())
 }