git.delta.rocks / jrsonnet / refs/commits / b3f009bb7b2e

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.7 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::Cow,10	cell::RefCell,11	fmt::{self, Display},12	hash::{Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::{hash_map::RawEntryMut, HashMap};18use jrsonnet_gcmodule::{Acyclic, Trace};19use rustc_hash::FxBuildHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30	fn is_type_tracked() -> bool {31		false32	}33}3435/// SAFETY:36///37/// `IStr` is acyclic38unsafe impl Acyclic for IStr {}3940impl IStr {41	#[must_use]42	pub fn empty() -> Self {43		"".into()44	}45	#[must_use]46	pub fn as_str(&self) -> &str {47		self as &str48	}4950	#[must_use]51	pub fn cast_bytes(self) -> IBytes {52		IBytes(self.0.clone())53	}54}5556impl Deref for IStr {57	type Target = str;5859	fn deref(&self) -> &Self::Target {60		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-861		unsafe { self.0.as_str_unchecked() }62	}63}6465impl PartialEq for IStr {66	fn eq(&self, other: &Self) -> bool {67		// all IStr should be inlined into same pool68		Inner::ptr_eq(&self.0, &other.0)69	}70}7172impl PartialEq<str> for IStr {73	fn eq(&self, other: &str) -> bool {74		self as &str == other75	}76}7778impl Hash for IStr {79	fn hash<H: Hasher>(&self, state: &mut H) {80		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address81		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);82	}83}8485impl Drop for IStr {86	fn drop(&mut self) {87		maybe_unpool(&self.0);88	}89}9091impl fmt::Debug for IStr {92	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {93		fmt::Debug::fmt(self as &str, f)94	}95}9697impl Display for IStr {98	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {99		fmt::Display::fmt(self as &str, f)100	}101}102103/// Interned byte array104#[derive(Clone, PartialOrd, Ord, Eq)]105pub struct IBytes(Inner);106impl Trace for IBytes {107	fn is_type_tracked() -> bool {108		false109	}110}111112impl IBytes {113	#[must_use]114	pub fn cast_str(self) -> Option<IStr> {115		if Inner::check_utf8(&self.0) {116			Some(IStr(self.0.clone()))117		} else {118			None119		}120	}121	/// # Safety122	/// data should be valid utf8123	unsafe fn cast_str_unchecked(self) -> IStr {124		// SAFETY: data is utf8125		unsafe { Inner::assume_utf8(&self.0) };126		IStr(self.0.clone())127	}128129	#[must_use]130	pub fn as_slice(&self) -> &[u8] {131		self.0.as_slice()132	}133}134135impl Deref for IBytes {136	type Target = [u8];137138	fn deref(&self) -> &Self::Target {139		self.0.as_slice()140	}141}142143impl PartialEq for IBytes {144	fn eq(&self, other: &Self) -> bool {145		// all IStr should be inlined into same pool146		Inner::ptr_eq(&self.0, &other.0)147	}148}149150impl Hash for IBytes {151	fn hash<H: Hasher>(&self, state: &mut H) {152		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address153		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);154	}155}156157impl Drop for IBytes {158	fn drop(&mut self) {159		maybe_unpool(&self.0);160	}161}162163fn maybe_unpool(inner: &Inner) {164	#[cold]165	#[inline(never)]166	fn unpool(inner: &Inner) {167		// May fail on program termination168		let _ = POOL.try_with(|pool| {169			let mut pool = pool.borrow_mut();170171			if pool.remove(inner).is_none() {172				// On some platforms (i.e i686-windows), try_with will not fail after TLS173				// destructor is called, but instead re-initialize the TLS with the empty pool.174				// Allow non-pooled Drop in this case.175				// https://github.com/CertainLach/jrsonnet/issues/98#issuecomment-1591624016176				//177				// However, if pool is not empty, most likely this is issue #113, and then I don't178				// have any explainations for now.179				assert!(pool.is_empty(), "received an unpooled string not during the program termination, please write any info regarding this crash to https://github.com/CertainLach/jrsonnet/issues/113, thanks!");180			}181		});182	}183	// First reference - current object, second - POOL184	if Inner::strong_count(inner) <= 2 {185		unpool(inner);186	}187}188189impl fmt::Debug for IBytes {190	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {191		fmt::Debug::fmt(self as &[u8], f)192	}193}194195impl<'c> From<Cow<'c, str>> for IStr {196	fn from(v: Cow<'c, str>) -> Self {197		intern_str(&v)198	}199}200impl From<&str> for IStr {201	fn from(v: &str) -> Self {202		intern_str(v)203	}204}205impl From<String> for IStr {206	fn from(s: String) -> Self {207		s.as_str().into()208	}209}210impl From<&String> for IStr {211	fn from(s: &String) -> Self {212		s.as_str().into()213	}214}215impl From<char> for IStr {216	fn from(value: char) -> Self {217		let mut buf = [0; 5];218		Self::from(&*value.encode_utf8(&mut buf))219	}220}221impl From<&[u8]> for IBytes {222	fn from(v: &[u8]) -> Self {223		intern_bytes(v)224	}225}226227type PoolMap = HashMap<Inner, (), FxBuildHasher>;228229thread_local! {230	static POOL: RefCell<PoolMap> = RefCell::new(HashMap::with_capacity_and_hasher(200, FxBuildHasher::default()));231}232233/// Jrsonnet golang bindings require that it is possible to move jsonnet234/// VM between OS threads, and this is not possible due to usage of235/// `thread_local`. Instead, there is two methods added, one should be236/// called at the end of current thread work, and one that should be237/// used when using other thread.238pub mod interop {239	use std::mem;240241	use crate::{PoolMap, POOL};242243	/// Type-erased interned string pool244	pub enum PoolState {}245246	/// Dump current interned string pool, to be restored by247	/// `reenter_thread`248	pub fn exit_thread() -> *mut PoolState {249		Box::into_raw(Box::new(POOL.with_borrow_mut(mem::take))).cast()250	}251252	/// Reenter thread, using state dumped by `exit_thread`.253	///254	/// # Safety255	///256	/// `state` should be acquired from `exit_thread`, it is not allowed257	/// to reuse state to reenter multiple threads.258	pub unsafe fn reenter_thread(state: *mut PoolState) {259		let ptr: *mut PoolMap = state.cast();260		// SAFETY: ptr is an unique state per method safety requirements.261		let ptr: Box<PoolMap> = unsafe { Box::from_raw(ptr) };262		let ptr: PoolMap = *ptr;263		POOL.with_borrow_mut(|pool| {264			let _ = mem::replace(pool, ptr);265		});266	}267}268269#[must_use]270pub fn intern_bytes(bytes: &[u8]) -> IBytes {271	POOL.with(|pool| {272		let mut pool = pool.borrow_mut();273		let entry = pool.raw_entry_mut().from_key(bytes);274		match entry {275			RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),276			RawEntryMut::Vacant(e) => {277				let (k, ()) = e.insert(Inner::new_bytes(bytes), ());278				IBytes(k.clone())279			}280		}281	})282}283284#[must_use]285pub fn intern_str(str: &str) -> IStr {286	// SAFETY: Rust strings always utf8287	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }288}289290#[cfg(test)]291mod tests {292	use crate::IStr;293294	#[test]295	fn simple() {296		let a = IStr::from("a");297		let b = IStr::from("a");298299		assert_eq!(a.as_ptr(), b.as_ptr());300	}301}