git.delta.rocks / jrsonnet / refs/commits / d543e940d74b

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.8 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::Cow,10	cell::RefCell,11	fmt::{self, Display},12	hash::{Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::{HashMap, hash_map::RawEntryMut};18use jrsonnet_gcmodule::{Acyclic, Trace};19use rustc_hash::FxBuildHasher;2021mod inner;22use inner::Inner;2324mod names;2526/// Interned string27///28/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]29#[derive(Clone, PartialOrd, Ord, Eq)]30pub struct IStr(Inner);31impl Trace for IStr {32	fn is_type_tracked() -> bool {33		false34	}35}3637/// SAFETY:38///39/// `IStr` is acyclic40unsafe impl Acyclic for IStr {}4142impl IStr {43	#[must_use]44	pub fn empty() -> Self {45		"".into()46	}47	#[must_use]48	pub fn as_str(&self) -> &str {49		self as &str50	}5152	#[must_use]53	pub fn cast_bytes(self) -> IBytes {54		IBytes(self.0.clone())55	}5657	pub fn len32(&self) -> u32 {58		self.0.len32()59	}60}6162impl Deref for IStr {63	type Target = str;6465	fn deref(&self) -> &Self::Target {66		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-867		unsafe { self.0.as_str_unchecked() }68	}69}7071impl PartialEq for IStr {72	fn eq(&self, other: &Self) -> bool {73		// all IStr should be inlined into same pool74		Inner::ptr_eq(&self.0, &other.0)75	}76}7778impl PartialEq<str> for IStr {79	fn eq(&self, other: &str) -> bool {80		self as &str == other81	}82}8384impl Hash for IStr {85	fn hash<H: Hasher>(&self, state: &mut H) {86		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address87		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);88	}89}9091impl Drop for IStr {92	fn drop(&mut self) {93		maybe_unpool(&self.0);94	}95}9697impl fmt::Debug for IStr {98	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {99		fmt::Debug::fmt(self as &str, f)100	}101}102103impl Display for IStr {104	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {105		fmt::Display::fmt(self as &str, f)106	}107}108109/// Interned byte array110#[derive(Clone, PartialOrd, Ord, Eq)]111pub struct IBytes(Inner);112impl Trace for IBytes {113	fn is_type_tracked() -> bool {114		false115	}116}117118impl IBytes {119	#[must_use]120	pub fn cast_str(self) -> Option<IStr> {121		if Inner::check_utf8(&self.0) {122			Some(IStr(self.0.clone()))123		} else {124			None125		}126	}127	/// # Safety128	/// data should be valid utf8129	unsafe fn cast_str_unchecked(self) -> IStr {130		// SAFETY: data is utf8131		unsafe { Inner::assume_utf8(&self.0) };132		IStr(self.0.clone())133	}134135	#[must_use]136	pub fn as_slice(&self) -> &[u8] {137		self.0.as_slice()138	}139}140141impl Deref for IBytes {142	type Target = [u8];143144	fn deref(&self) -> &Self::Target {145		self.0.as_slice()146	}147}148149impl PartialEq for IBytes {150	fn eq(&self, other: &Self) -> bool {151		// all IStr should be inlined into same pool152		Inner::ptr_eq(&self.0, &other.0)153	}154}155156impl Hash for IBytes {157	fn hash<H: Hasher>(&self, state: &mut H) {158		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address159		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);160	}161}162163impl Drop for IBytes {164	fn drop(&mut self) {165		maybe_unpool(&self.0);166	}167}168169fn maybe_unpool(inner: &Inner) {170	#[cold]171	#[inline(never)]172	fn unpool(inner: &Inner) {173		// May fail on program termination174		let _ = POOL.try_with(|pool| {175			let mut pool = pool.borrow_mut();176177			if pool.remove(inner).is_none() {178				// DOC(string-pooling)179				// On some platforms (i.e i686-windows), try_with will not fail after TLS180				// destructor is called, but instead re-initialize the TLS with the empty pool.181				// Allow non-pooled Drop in this case.182				// https://github.com/CertainLach/jrsonnet/issues/98#issuecomment-1591624016183				// Another cause might be that you have improperly used jrsonnet in multi-threaded environment:184				// https://github.com/CertainLach/jrsonnet/issues/113185				debug_assert!(pool.is_empty(), "if you have landed here - you most likely did something naughty with multi-threading. jrsonnet string pooling uses thread_local pool");186			}187		});188	}189	// First reference - current object, second - POOL190	if Inner::strong_count(inner) <= 2 {191		unpool(inner);192	}193}194195impl fmt::Debug for IBytes {196	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {197		fmt::Debug::fmt(self as &[u8], f)198	}199}200201impl<'c> From<Cow<'c, str>> for IStr {202	fn from(v: Cow<'c, str>) -> Self {203		intern_str(&v)204	}205}206impl From<&str> for IStr {207	fn from(v: &str) -> Self {208		intern_str(v)209	}210}211impl From<String> for IStr {212	fn from(s: String) -> Self {213		s.as_str().into()214	}215}216impl From<&String> for IStr {217	fn from(s: &String) -> Self {218		s.as_str().into()219	}220}221impl From<char> for IStr {222	fn from(value: char) -> Self {223		let mut buf = [0; 5];224		Self::from(&*value.encode_utf8(&mut buf))225	}226}227impl From<&[u8]> for IBytes {228	fn from(v: &[u8]) -> Self {229		intern_bytes(v)230	}231}232233type PoolMap = HashMap<Inner, (), FxBuildHasher>;234235thread_local! {236	static POOL: RefCell<PoolMap> = RefCell::new(HashMap::with_capacity_and_hasher(200, FxBuildHasher));237}238239/// Utils for embedding jrsonnet in non-rust.240///241/// Jrsonnet golang bindings require that it is possible to move jsonnet242/// VM between OS threads, and this is not possible due to usage of243/// `thread_local`. Instead, there is two methods added, one should be244/// called at the end of current thread work, and one that should be245/// used when using other thread.246pub mod interop {247	use std::mem;248249	use crate::{POOL, PoolMap};250251	/// Type-erased interned string pool252	pub enum PoolState {}253254	/// Dump current interned string pool, to be restored by255	/// `reenter_thread`256	pub fn exit_thread() -> *mut PoolState {257		Box::into_raw(Box::new(POOL.with_borrow_mut(mem::take))).cast()258	}259260	/// Reenter thread, using state dumped by `exit_thread`.261	///262	/// # Safety263	///264	/// `state` should be acquired from `exit_thread`, it is not allowed265	/// to reuse state to reenter multiple threads.266	pub unsafe fn reenter_thread(state: *mut PoolState) {267		let ptr: *mut PoolMap = state.cast();268		// SAFETY: ptr is an unique state per method safety requirements.269		let ptr: Box<PoolMap> = unsafe { Box::from_raw(ptr) };270		let ptr: PoolMap = *ptr;271		POOL.with_borrow_mut(|pool| {272			let _ = mem::replace(pool, ptr);273		});274	}275}276277#[must_use]278pub fn intern_bytes(bytes: &[u8]) -> IBytes {279	POOL.with(|pool| {280		let mut pool = pool.borrow_mut();281		let entry = pool.raw_entry_mut().from_key(bytes);282		match entry {283			RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),284			RawEntryMut::Vacant(e) => {285				let (k, ()) = e.insert(Inner::new_bytes(bytes), ());286				IBytes(k.clone())287			}288		}289	})290}291292#[must_use]293pub fn intern_str(str: &str) -> IStr {294	// SAFETY: Rust strings always utf8295	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }296}297298#[cfg(test)]299mod tests {300	use crate::IStr;301302	#[test]303	fn simple() {304		let a = IStr::from("a");305		let b = IStr::from("a");306307		assert_eq!(a.as_ptr(), b.as_ptr());308	}309}