git.delta.rocks / jrsonnet / refs/commits / 103c2a57d922

difftreelog

feat add std regex builtins

Yaroslav Bolyukin2023-12-10parent: #11193ce.patch.diff
in: master
Upstream issue: https://github.com/google/jsonnet/pull/1039

8 files changed

modifiedCargo.lockdiffbeforeafterboth
before · Cargo.lock
200 packageslockfile v3
modifiedCargo.tomldiffbeforeafterboth
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,6 +64,9 @@
 lsp-server = "0.7.4"
 lsp-types = "0.94.1"
 
+regex = "1.8.4"
+lru = "0.10.0"
+
 #[profile.test]
 #opt-level = 1
 
modifiedcmds/jrsonnet/Cargo.tomldiffbeforeafterboth
--- a/cmds/jrsonnet/Cargo.toml
+++ b/cmds/jrsonnet/Cargo.toml
@@ -29,6 +29,10 @@
 exp-object-iteration = ["jrsonnet-evaluator/exp-object-iteration"]
 # Bigint type
 exp-bigint = ["jrsonnet-evaluator/exp-bigint", "jrsonnet-cli/exp-bigint"]
+# std.regex and co.
+exp-regex = [
+    "jrsonnet-stdlib/exp-regex",
+]
 # obj?.field, obj?.['field']
 exp-null-coaelse = [
     "jrsonnet-evaluator/exp-null-coaelse",
modifiedcrates/jrsonnet-cli/Cargo.tomldiffbeforeafterboth
--- a/crates/jrsonnet-cli/Cargo.toml
+++ b/crates/jrsonnet-cli/Cargo.toml
@@ -20,6 +20,9 @@
     "jrsonnet-evaluator/exp-null-coaelse",
     "jrsonnet-stdlib/exp-null-coaelse",
 ]
+exp-regex = [
+    "jrsonnet-stdlib/exp-regex",
+]
 legacy-this-file = ["jrsonnet-stdlib/legacy-this-file"]
 
 [dependencies]
modifiedcrates/jrsonnet-evaluator/src/typed/conversions.rsdiffbeforeafterboth
--- a/crates/jrsonnet-evaluator/src/typed/conversions.rs
+++ b/crates/jrsonnet-evaluator/src/typed/conversions.rs
@@ -304,6 +304,22 @@
 	}
 }
 
+impl Typed for StrValue {
+	const TYPE: &'static ComplexValType = &ComplexValType::Simple(ValType::Str);
+
+	fn into_untyped(value: Self) -> Result<Val> {
+		Ok(Val::Str(value))
+	}
+
+	fn from_untyped(value: Val) -> Result<Self> {
+		<Self as Typed>::TYPE.check(&value)?;
+		match value {
+			Val::Str(s) => Ok(s),
+			_ => unreachable!(),
+		}
+	}
+}
+
 impl Typed for char {
 	const TYPE: &'static ComplexValType = &ComplexValType::Char;
 
modifiedcrates/jrsonnet-stdlib/Cargo.tomldiffbeforeafterboth
--- a/crates/jrsonnet-stdlib/Cargo.toml
+++ b/crates/jrsonnet-stdlib/Cargo.toml
@@ -20,6 +20,8 @@
 exp-bigint = ["num-bigint", "jrsonnet-evaluator/exp-bigint"]
 
 exp-null-coaelse = ["jrsonnet-parser/exp-null-coaelse", "jrsonnet-evaluator/exp-null-coaelse"]
+# std.regexMatch and other helpers
+exp-regex = ["regex", "lru", "rustc-hash"]
 
 [dependencies]
 jrsonnet-evaluator.workspace = true
@@ -49,6 +51,11 @@
 
 num-bigint = { workspace = true, optional = true }
 
+# regex
+regex = { workspace = true, optional = true }
+lru = { workspace = true, optional = true }
+rustc-hash = { workspace = true, optional = true }
+
 [build-dependencies]
 jrsonnet-parser.workspace = true
 structdump = { workspace = true, features = ["derive"] }
modifiedcrates/jrsonnet-stdlib/src/lib.rsdiffbeforeafterboth
--- a/crates/jrsonnet-stdlib/src/lib.rs
+++ b/crates/jrsonnet-stdlib/src/lib.rs
@@ -43,6 +43,10 @@
 pub use sets::*;
 mod compat;
 pub use compat::*;
+#[cfg(feature = "exp-regex")]
+mod regex;
+#[cfg(feature = "exp-regex")]
+pub use crate::regex::*;
 
 pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
 	let mut builder = ObjValueBuilder::new();
@@ -185,6 +189,9 @@
 		("setInter", builtin_set_inter::INST),
 		("setDiff", builtin_set_diff::INST),
 		("setUnion", builtin_set_union::INST),
+		// Regex
+		#[cfg(feature = "exp-regex")]
+		("regexQuoteMeta", builtin_regex_quote_meta::INST),
 		// Compat
 		("__compare", builtin___compare::INST),
 	]
@@ -207,8 +214,37 @@
 		},
 	);
 	builder.method("trace", builtin_trace { settings });
+	builder.method("id", FuncVal::Id);
 
-	builder.method("id", FuncVal::Id);
+	#[cfg(feature = "exp-regex")]
+	{
+		// Regex
+		let regex_cache = RegexCache::default();
+		builder.method(
+			"regexFullMatch",
+			builtin_regex_full_match {
+				cache: regex_cache.clone(),
+			},
+		);
+		builder.method(
+			"regexPartialMatch",
+			builtin_regex_partial_match {
+				cache: regex_cache.clone(),
+			},
+		);
+		builder.method(
+			"regexReplace",
+			builtin_regex_replace {
+				cache: regex_cache.clone(),
+			},
+		);
+		builder.method(
+			"regexGlobalReplace",
+			builtin_regex_global_replace {
+				cache: regex_cache.clone(),
+			},
+		);
+	};
 
 	builder.build()
 }
addedcrates/jrsonnet-stdlib/src/regex.rsdiffbeforeafterboth
--- /dev/null
+++ b/crates/jrsonnet-stdlib/src/regex.rs
@@ -0,0 +1,134 @@
+use std::{cell::RefCell, hash::BuildHasherDefault, num::NonZeroUsize, rc::Rc};
+
+use ::regex::Regex;
+use jrsonnet_evaluator::{
+	error::{ErrorKind::*, Result},
+	val::StrValue,
+	IStr, ObjValueBuilder, Val,
+};
+use jrsonnet_macros::builtin;
+use lru::LruCache;
+use rustc_hash::FxHasher;
+
+pub struct RegexCacheInner {
+	cache: RefCell<LruCache<IStr, Rc<Regex>, BuildHasherDefault<FxHasher>>>,
+}
+impl Default for RegexCacheInner {
+	fn default() -> Self {
+		Self {
+			cache: RefCell::new(LruCache::with_hasher(
+				NonZeroUsize::new(20).unwrap(),
+				BuildHasherDefault::default(),
+			)),
+		}
+	}
+}
+pub type RegexCache = Rc<RegexCacheInner>;
+impl RegexCacheInner {
+	fn parse(&self, pattern: IStr) -> Result<Rc<Regex>> {
+		let mut cache = self.cache.borrow_mut();
+		if let Some(found) = cache.get(&pattern) {
+			return Ok(found.clone());
+		}
+		let regex = Regex::new(&pattern)
+			.map_err(|e| RuntimeError(format!("regex parse failed: {e}").into()))?;
+		let regex = Rc::new(regex);
+		cache.push(pattern, regex.clone());
+		Ok(regex)
+	}
+}
+
+pub fn regex_match_inner(regex: &Regex, str: String) -> Result<Val> {
+	let mut out = ObjValueBuilder::with_capacity(3);
+
+	let mut captures = Vec::with_capacity(regex.captures_len());
+	let mut named_captures = ObjValueBuilder::with_capacity(regex.capture_names().len());
+
+	let Some(captured) = regex.captures(&str) else {
+		return Ok(Val::Null)
+	};
+
+	for ele in captured.iter().skip(1) {
+		if let Some(ele) = ele {
+			captures.push(Val::Str(StrValue::Flat(ele.as_str().into())))
+		} else {
+			captures.push(Val::Str(StrValue::Flat(IStr::empty())))
+		}
+	}
+	for (i, name) in regex
+		.capture_names()
+		.skip(1)
+		.enumerate()
+		.flat_map(|(i, v)| Some((i, v?)))
+	{
+		let capture = captures[i].clone();
+		named_captures.member(name.into()).value(capture)?;
+	}
+
+	out.member("string".into())
+		.value_unchecked(Val::Str(captured.get(0).unwrap().as_str().into()));
+	out.member("captures".into())
+		.value_unchecked(Val::Arr(captures.into()));
+	out.member("namedCaptures".into())
+		.value_unchecked(Val::Obj(named_captures.build()));
+
+	Ok(Val::Obj(out.build()))
+}
+
+#[builtin(fields(
+    cache: RegexCache,
+))]
+pub fn builtin_regex_partial_match(
+	this: &builtin_regex_partial_match,
+	pattern: IStr,
+	str: String,
+) -> Result<Val> {
+	let regex = this.cache.parse(pattern)?;
+	regex_match_inner(&regex, str)
+}
+
+#[builtin(fields(
+    cache: RegexCache,
+))]
+pub fn builtin_regex_full_match(
+	this: &builtin_regex_full_match,
+	pattern: StrValue,
+	str: String,
+) -> Result<Val> {
+	let pattern = format!("^{pattern}$").into();
+	let regex = this.cache.parse(pattern)?;
+	regex_match_inner(&regex, str)
+}
+
+#[builtin]
+pub fn builtin_regex_quote_meta(pattern: String) -> String {
+	regex::escape(&pattern)
+}
+
+#[builtin(fields(
+    cache: RegexCache,
+))]
+pub fn builtin_regex_replace(
+	this: &builtin_regex_replace,
+	str: String,
+	pattern: IStr,
+	to: String,
+) -> Result<String> {
+	let regex = this.cache.parse(pattern)?;
+	let replaced = regex.replace(&str, to);
+	Ok(replaced.to_string())
+}
+
+#[builtin(fields(
+    cache: RegexCache,
+))]
+pub fn builtin_regex_global_replace(
+	this: &builtin_regex_global_replace,
+	str: String,
+	pattern: IStr,
+	to: String,
+) -> Result<String> {
+	let regex = this.cache.parse(pattern)?;
+	let replaced = regex.replace_all(&str, to);
+	Ok(replaced.to_string())
+}