git.delta.rocks / jrsonnet / refs/commits / a8b25fbfa77b

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.7 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::Cow,10	cell::RefCell,11	fmt::{self, Display},12	hash::{Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::{hash_map::RawEntryMut, HashMap};18use jrsonnet_gcmodule::{Acyclic, Trace};19use rustc_hash::FxBuildHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30	fn is_type_tracked() -> bool {31		false32	}33}34unsafe impl Acyclic for IStr {}3536impl IStr {37	#[must_use]38	pub fn empty() -> Self {39		"".into()40	}41	#[must_use]42	pub fn as_str(&self) -> &str {43		self as &str44	}4546	#[must_use]47	pub fn cast_bytes(self) -> IBytes {48		IBytes(self.0.clone())49	}50}5152impl Deref for IStr {53	type Target = str;5455	fn deref(&self) -> &Self::Target {56		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-857		unsafe { self.0.as_str_unchecked() }58	}59}6061impl PartialEq for IStr {62	fn eq(&self, other: &Self) -> bool {63		// all IStr should be inlined into same pool64		Inner::ptr_eq(&self.0, &other.0)65	}66}6768impl PartialEq<str> for IStr {69	fn eq(&self, other: &str) -> bool {70		self as &str == other71	}72}7374impl Hash for IStr {75	fn hash<H: Hasher>(&self, state: &mut H) {76		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address77		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);78	}79}8081impl Drop for IStr {82	fn drop(&mut self) {83		maybe_unpool(&self.0);84	}85}8687impl fmt::Debug for IStr {88	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {89		fmt::Debug::fmt(self as &str, f)90	}91}9293impl Display for IStr {94	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {95		fmt::Display::fmt(self as &str, f)96	}97}9899/// Interned byte array100#[derive(Clone, PartialOrd, Ord, Eq)]101pub struct IBytes(Inner);102impl Trace for IBytes {103	fn is_type_tracked() -> bool {104		false105	}106}107108impl IBytes {109	#[must_use]110	pub fn cast_str(self) -> Option<IStr> {111		if Inner::check_utf8(&self.0) {112			Some(IStr(self.0.clone()))113		} else {114			None115		}116	}117	/// # Safety118	/// data should be valid utf8119	unsafe fn cast_str_unchecked(self) -> IStr {120		// SAFETY: data is utf8121		unsafe { Inner::assume_utf8(&self.0) };122		IStr(self.0.clone())123	}124125	#[must_use]126	pub fn as_slice(&self) -> &[u8] {127		self.0.as_slice()128	}129}130131impl Deref for IBytes {132	type Target = [u8];133134	fn deref(&self) -> &Self::Target {135		self.0.as_slice()136	}137}138139impl PartialEq for IBytes {140	fn eq(&self, other: &Self) -> bool {141		// all IStr should be inlined into same pool142		Inner::ptr_eq(&self.0, &other.0)143	}144}145146impl Hash for IBytes {147	fn hash<H: Hasher>(&self, state: &mut H) {148		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address149		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);150	}151}152153impl Drop for IBytes {154	fn drop(&mut self) {155		maybe_unpool(&self.0);156	}157}158159fn maybe_unpool(inner: &Inner) {160	#[cold]161	#[inline(never)]162	fn unpool(inner: &Inner) {163		// May fail on program termination164		let _ = POOL.try_with(|pool| {165			let mut pool = pool.borrow_mut();166167			if pool.remove(inner).is_none() {168				// On some platforms (i.e i686-windows), try_with will not fail after TLS169				// destructor is called, but instead re-initialize the TLS with the empty pool.170				// Allow non-pooled Drop in this case.171				// https://github.com/CertainLach/jrsonnet/issues/98#issuecomment-1591624016172				//173				// However, if pool is not empty, most likely this is issue #113, and then I don't174				// have any explainations for now.175				assert!(pool.is_empty(), "received an unpooled string not during the program termination, please write any info regarding this crash to https://github.com/CertainLach/jrsonnet/issues/113, thanks!");176			}177		});178	}179	// First reference - current object, second - POOL180	if Inner::strong_count(inner) <= 2 {181		unpool(inner);182	}183}184185impl fmt::Debug for IBytes {186	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {187		fmt::Debug::fmt(self as &[u8], f)188	}189}190191impl<'c> From<Cow<'c, str>> for IStr {192	fn from(v: Cow<'c, str>) -> Self {193		intern_str(&v)194	}195}196impl From<&str> for IStr {197	fn from(v: &str) -> Self {198		intern_str(v)199	}200}201impl From<String> for IStr {202	fn from(s: String) -> Self {203		s.as_str().into()204	}205}206impl From<&String> for IStr {207	fn from(s: &String) -> Self {208		s.as_str().into()209	}210}211impl From<char> for IStr {212	fn from(value: char) -> Self {213		let mut buf = [0; 5];214		Self::from(&*value.encode_utf8(&mut buf))215	}216}217impl From<&[u8]> for IBytes {218	fn from(v: &[u8]) -> Self {219		intern_bytes(v)220	}221}222223type PoolMap = HashMap<Inner, (), FxBuildHasher>;224225thread_local! {226	static POOL: RefCell<PoolMap> = RefCell::new(HashMap::with_capacity_and_hasher(200, FxBuildHasher::default()));227}228229/// Jrsonnet golang bindings require that it is possible to move jsonnet230/// VM between OS threads, and this is not possible due to usage of231/// `thread_local`. Instead, there is two methods added, one should be232/// called at the end of current thread work, and one that should be233/// used when using other thread.234pub mod interop {235	use std::mem;236237	use crate::{PoolMap, POOL};238239	/// Type-erased interned string pool240	pub enum PoolState {}241242	/// Dump current interned string pool, to be restored by243	/// `reenter_thread`244	pub fn exit_thread() -> *mut PoolState {245		Box::into_raw(Box::new(POOL.with_borrow_mut(mem::take))).cast()246	}247248	/// Reenter thread, using state dumped by `exit_thread`.249	///250	/// # Safety251	///252	/// `state` should be acquired from `exit_thread`, it is not allowed253	/// to reuse state to reenter multiple threads.254	pub unsafe fn reenter_thread(state: *mut PoolState) {255		let ptr: *mut PoolMap = state.cast();256		// SAFETY: ptr is an unique state per method safety requirements.257		let ptr: Box<PoolMap> = unsafe { Box::from_raw(ptr) };258		let ptr: PoolMap = *ptr;259		POOL.with_borrow_mut(|pool| {260			let _ = mem::replace(pool, ptr);261		});262	}263}264265#[must_use]266pub fn intern_bytes(bytes: &[u8]) -> IBytes {267	POOL.with(|pool| {268		let mut pool = pool.borrow_mut();269		let entry = pool.raw_entry_mut().from_key(bytes);270		match entry {271			RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),272			RawEntryMut::Vacant(e) => {273				let (k, ()) = e.insert(Inner::new_bytes(bytes), ());274				IBytes(k.clone())275			}276		}277	})278}279280#[must_use]281pub fn intern_str(str: &str) -> IStr {282	// SAFETY: Rust strings always utf8283	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }284}285286#[cfg(test)]287mod tests {288	use crate::IStr;289290	#[test]291	fn simple() {292		let a = IStr::from("a");293		let b = IStr::from("a");294295		assert_eq!(a.as_ptr(), b.as_ptr());296	}297}