git.delta.rocks / jrsonnet / refs/commits / d5225b820ddc

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.8 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::Cow,10	cell::RefCell,11	fmt::{self, Display},12	hash::{Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::{HashMap, hash_map::RawEntryMut};18use jrsonnet_gcmodule::{Acyclic, Trace};19use rustc_hash::FxBuildHasher;2021mod inner;22use inner::Inner;2324mod names;2526/// Interned string27///28/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]29#[derive(Clone, PartialOrd, Ord, Eq)]30pub struct IStr(Inner);31impl Trace for IStr {32	fn is_type_tracked() -> bool {33		false34	}35}3637/// SAFETY:38///39/// `IStr` is acyclic40unsafe impl Acyclic for IStr {}4142impl IStr {43	#[must_use]44	pub fn empty() -> Self {45		"".into()46	}47	#[must_use]48	pub fn as_str(&self) -> &str {49		self as &str50	}5152	#[must_use]53	pub fn cast_bytes(self) -> IBytes {54		IBytes(self.0.clone())55	}56}5758impl Deref for IStr {59	type Target = str;6061	fn deref(&self) -> &Self::Target {62		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-863		unsafe { self.0.as_str_unchecked() }64	}65}6667impl PartialEq for IStr {68	fn eq(&self, other: &Self) -> bool {69		// all IStr should be inlined into same pool70		Inner::ptr_eq(&self.0, &other.0)71	}72}7374impl PartialEq<str> for IStr {75	fn eq(&self, other: &str) -> bool {76		self as &str == other77	}78}7980impl Hash for IStr {81	fn hash<H: Hasher>(&self, state: &mut H) {82		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address83		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);84	}85}8687impl Drop for IStr {88	fn drop(&mut self) {89		maybe_unpool(&self.0);90	}91}9293impl fmt::Debug for IStr {94	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {95		fmt::Debug::fmt(self as &str, f)96	}97}9899impl Display for IStr {100	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {101		fmt::Display::fmt(self as &str, f)102	}103}104105/// Interned byte array106#[derive(Clone, PartialOrd, Ord, Eq)]107pub struct IBytes(Inner);108impl Trace for IBytes {109	fn is_type_tracked() -> bool {110		false111	}112}113114impl IBytes {115	#[must_use]116	pub fn cast_str(self) -> Option<IStr> {117		if Inner::check_utf8(&self.0) {118			Some(IStr(self.0.clone()))119		} else {120			None121		}122	}123	/// # Safety124	/// data should be valid utf8125	unsafe fn cast_str_unchecked(self) -> IStr {126		// SAFETY: data is utf8127		unsafe { Inner::assume_utf8(&self.0) };128		IStr(self.0.clone())129	}130131	#[must_use]132	pub fn as_slice(&self) -> &[u8] {133		self.0.as_slice()134	}135}136137impl Deref for IBytes {138	type Target = [u8];139140	fn deref(&self) -> &Self::Target {141		self.0.as_slice()142	}143}144145impl PartialEq for IBytes {146	fn eq(&self, other: &Self) -> bool {147		// all IStr should be inlined into same pool148		Inner::ptr_eq(&self.0, &other.0)149	}150}151152impl Hash for IBytes {153	fn hash<H: Hasher>(&self, state: &mut H) {154		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address155		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);156	}157}158159impl Drop for IBytes {160	fn drop(&mut self) {161		maybe_unpool(&self.0);162	}163}164165fn maybe_unpool(inner: &Inner) {166	#[cold]167	#[inline(never)]168	fn unpool(inner: &Inner) {169		// May fail on program termination170		let _ = POOL.try_with(|pool| {171			let mut pool = pool.borrow_mut();172173			if pool.remove(inner).is_none() {174				// DOC(string-pooling)175				// On some platforms (i.e i686-windows), try_with will not fail after TLS176				// destructor is called, but instead re-initialize the TLS with the empty pool.177				// Allow non-pooled Drop in this case.178				// https://github.com/CertainLach/jrsonnet/issues/98#issuecomment-1591624016179				// Another cause might be that you have improperly used jrsonnet in multi-threaded environment:180				// https://github.com/CertainLach/jrsonnet/issues/113181				debug_assert!(pool.is_empty(), "if you have landed here - you most likely did something naughty with multi-threading. jrsonnet string pooling uses thread_local pool");182			}183		});184	}185	// First reference - current object, second - POOL186	if Inner::strong_count(inner) <= 2 {187		unpool(inner);188	}189}190191impl fmt::Debug for IBytes {192	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {193		fmt::Debug::fmt(self as &[u8], f)194	}195}196197impl<'c> From<Cow<'c, str>> for IStr {198	fn from(v: Cow<'c, str>) -> Self {199		intern_str(&v)200	}201}202impl From<&str> for IStr {203	fn from(v: &str) -> Self {204		intern_str(v)205	}206}207impl From<String> for IStr {208	fn from(s: String) -> Self {209		s.as_str().into()210	}211}212impl From<&String> for IStr {213	fn from(s: &String) -> Self {214		s.as_str().into()215	}216}217impl From<char> for IStr {218	fn from(value: char) -> Self {219		let mut buf = [0; 5];220		Self::from(&*value.encode_utf8(&mut buf))221	}222}223impl From<&[u8]> for IBytes {224	fn from(v: &[u8]) -> Self {225		intern_bytes(v)226	}227}228229type PoolMap = HashMap<Inner, (), FxBuildHasher>;230231thread_local! {232	static POOL: RefCell<PoolMap> = RefCell::new(HashMap::with_capacity_and_hasher(200, FxBuildHasher));233}234235/// Utils for embedding jrsonnet in non-rust.236///237/// Jrsonnet golang bindings require that it is possible to move jsonnet238/// VM between OS threads, and this is not possible due to usage of239/// `thread_local`. Instead, there is two methods added, one should be240/// called at the end of current thread work, and one that should be241/// used when using other thread.242pub mod interop {243	use std::mem;244245	use crate::{POOL, PoolMap};246247	/// Type-erased interned string pool248	pub enum PoolState {}249250	/// Dump current interned string pool, to be restored by251	/// `reenter_thread`252	pub fn exit_thread() -> *mut PoolState {253		Box::into_raw(Box::new(POOL.with_borrow_mut(mem::take))).cast()254	}255256	/// Reenter thread, using state dumped by `exit_thread`.257	///258	/// # Safety259	///260	/// `state` should be acquired from `exit_thread`, it is not allowed261	/// to reuse state to reenter multiple threads.262	pub unsafe fn reenter_thread(state: *mut PoolState) {263		let ptr: *mut PoolMap = state.cast();264		// SAFETY: ptr is an unique state per method safety requirements.265		let ptr: Box<PoolMap> = unsafe { Box::from_raw(ptr) };266		let ptr: PoolMap = *ptr;267		POOL.with_borrow_mut(|pool| {268			let _ = mem::replace(pool, ptr);269		});270	}271}272273#[must_use]274pub fn intern_bytes(bytes: &[u8]) -> IBytes {275	POOL.with(|pool| {276		let mut pool = pool.borrow_mut();277		let entry = pool.raw_entry_mut().from_key(bytes);278		match entry {279			RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),280			RawEntryMut::Vacant(e) => {281				let (k, ()) = e.insert(Inner::new_bytes(bytes), ());282				IBytes(k.clone())283			}284		}285	})286}287288#[must_use]289pub fn intern_str(str: &str) -> IStr {290	// SAFETY: Rust strings always utf8291	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }292}293294#[cfg(test)]295mod tests {296	use crate::IStr;297298	#[test]299	fn simple() {300		let a = IStr::from("a");301		let b = IStr::from("a");302303		assert_eq!(a.as_ptr(), b.as_ptr());304	}305}