git.delta.rocks / jrsonnet / refs/commits / dbbcbc4becf1

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.3 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::Cow,10	cell::RefCell,11	fmt::{self, Display},12	hash::{BuildHasherDefault, Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::{hash_map::RawEntryMut, HashMap};18use jrsonnet_gcmodule::Trace;19use rustc_hash::FxHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30	fn is_type_tracked() -> bool {31		false32	}33}3435impl IStr {36	#[must_use]37	pub fn empty() -> Self {38		"".into()39	}40	#[must_use]41	pub fn as_str(&self) -> &str {42		self as &str43	}4445	#[must_use]46	pub fn cast_bytes(self) -> IBytes {47		IBytes(self.0.clone())48	}49}5051impl Deref for IStr {52	type Target = str;5354	fn deref(&self) -> &Self::Target {55		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-856		unsafe { self.0.as_str_unchecked() }57	}58}5960impl PartialEq for IStr {61	fn eq(&self, other: &Self) -> bool {62		// all IStr should be inlined into same pool63		Inner::ptr_eq(&self.0, &other.0)64	}65}6667impl PartialEq<str> for IStr {68	fn eq(&self, other: &str) -> bool {69		self as &str == other70	}71}7273impl Hash for IStr {74	fn hash<H: Hasher>(&self, state: &mut H) {75		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address76		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);77	}78}7980impl Drop for IStr {81	fn drop(&mut self) {82		maybe_unpool(&self.0);83	}84}8586impl fmt::Debug for IStr {87	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {88		fmt::Debug::fmt(self as &str, f)89	}90}9192impl Display for IStr {93	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {94		fmt::Display::fmt(self as &str, f)95	}96}9798/// Interned byte array99#[derive(Clone, PartialOrd, Ord, Eq)]100pub struct IBytes(Inner);101impl Trace for IBytes {102	fn is_type_tracked() -> bool {103		false104	}105}106107impl IBytes {108	#[must_use]109	pub fn cast_str(self) -> Option<IStr> {110		if Inner::check_utf8(&self.0) {111			Some(IStr(self.0.clone()))112		} else {113			None114		}115	}116	/// # Safety117	/// data should be valid utf8118	unsafe fn cast_str_unchecked(self) -> IStr {119		// SAFETY: data is utf8120		unsafe { Inner::assume_utf8(&self.0) };121		IStr(self.0.clone())122	}123124	#[must_use]125	pub fn as_slice(&self) -> &[u8] {126		self.0.as_slice()127	}128}129130impl Deref for IBytes {131	type Target = [u8];132133	fn deref(&self) -> &Self::Target {134		self.0.as_slice()135	}136}137138impl PartialEq for IBytes {139	fn eq(&self, other: &Self) -> bool {140		// all IStr should be inlined into same pool141		Inner::ptr_eq(&self.0, &other.0)142	}143}144145impl Hash for IBytes {146	fn hash<H: Hasher>(&self, state: &mut H) {147		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address148		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);149	}150}151152impl Drop for IBytes {153	fn drop(&mut self) {154		maybe_unpool(&self.0);155	}156}157158fn maybe_unpool(inner: &Inner) {159	#[cold]160	#[inline(never)]161	fn unpool(inner: &Inner) {162		// May fail on program termination163		let _ = POOL.try_with(|pool| {164			let mut pool = pool.borrow_mut();165166			if pool.remove(inner).is_none() {167				// On some platforms (i.e i686-windows), try_with will not fail after TLS168				// destructor is called, but instead re-initialize the TLS with the empty pool.169				// Allow non-pooled Drop in this case.170				// https://github.com/CertainLach/jrsonnet/issues/98#issuecomment-1591624016171				//172				// However, if pool is not empty, most likely this is issue #113, and then I don't173				// have any explainations for now.174				assert!(pool.is_empty(), "received an unpooled string not during the program termination, please write any info regarding this crash to https://github.com/CertainLach/jrsonnet/issues/113, thanks!");175			}176		});177	}178	// First reference - current object, second - POOL179	if Inner::strong_count(inner) <= 2 {180		unpool(inner);181	}182}183184impl fmt::Debug for IBytes {185	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {186		fmt::Debug::fmt(self as &[u8], f)187	}188}189190impl<'c> From<Cow<'c, str>> for IStr {191	fn from(v: Cow<'c, str>) -> Self {192		intern_str(&v)193	}194}195impl From<&str> for IStr {196	fn from(v: &str) -> Self {197		intern_str(v)198	}199}200impl From<String> for IStr {201	fn from(s: String) -> Self {202		s.as_str().into()203	}204}205impl From<&String> for IStr {206	fn from(s: &String) -> Self {207		s.as_str().into()208	}209}210impl From<char> for IStr {211	fn from(value: char) -> Self {212		let mut buf = [0; 5];213		Self::from(&*value.encode_utf8(&mut buf))214	}215}216impl From<&[u8]> for IBytes {217	fn from(v: &[u8]) -> Self {218		intern_bytes(v)219	}220}221222#[cfg(feature = "serde")]223impl serde::Serialize for IStr {224	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>225	where226		S: serde::Serializer,227	{228		self.as_str().serialize(serializer)229	}230}231232#[cfg(feature = "serde")]233impl<'de> serde::Deserialize<'de> for IStr {234	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>235	where236		D: serde::Deserializer<'de>,237	{238		let str = <&str>::deserialize(deserializer)?;239		Ok(intern_str(str))240	}241}242243#[cfg(feature = "structdump")]244impl structdump::Codegen for IStr {245	fn gen_code(246		&self,247		res: &mut structdump::CodegenResult,248		_unique: bool,249	) -> structdump::TokenStream {250		let s: &str = self;251		res.add_code(252			structdump::quote! {253				structdump_import::IStr::from(#s)254			},255			Some(structdump::quote![structdump_import::IStr]),256			false,257		)258	}259}260261thread_local! {262	static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));263}264265#[must_use]266pub fn intern_bytes(bytes: &[u8]) -> IBytes {267	POOL.with(|pool| {268		let mut pool = pool.borrow_mut();269		let entry = pool.raw_entry_mut().from_key(bytes);270		match entry {271			RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),272			RawEntryMut::Vacant(e) => {273				let (k, ()) = e.insert(Inner::new_bytes(bytes), ());274				IBytes(k.clone())275			}276		}277	})278}279280#[must_use]281pub fn intern_str(str: &str) -> IStr {282	// SAFETY: Rust strings always utf8283	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }284}285286#[cfg(test)]287mod tests {288	use crate::IStr;289290	#[test]291	fn simple() {292		let a = IStr::from("a");293		let b = IStr::from("a");294295		assert_eq!(a.as_ptr(), b.as_ptr());296	}297}