git.delta.rocks / jrsonnet / refs/commits / 1e2e6e2fa219

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.0 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::Cow,10	cell::RefCell,11	fmt::{self, Display},12	hash::{BuildHasherDefault, Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::HashMap;18use jrsonnet_gcmodule::Trace;19use rustc_hash::FxHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30	fn is_type_tracked() -> bool {31		false32	}33}3435impl IStr {36	#[must_use]37	pub fn empty() -> Self {38		"".into()39	}40	#[must_use]41	pub fn as_str(&self) -> &str {42		self as &str43	}4445	#[must_use]46	pub fn cast_bytes(self) -> IBytes {47		IBytes(self.0.clone())48	}49}5051impl Deref for IStr {52	type Target = str;5354	fn deref(&self) -> &Self::Target {55		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-856		unsafe { self.0.as_str_unchecked() }57	}58}5960impl PartialEq for IStr {61	fn eq(&self, other: &Self) -> bool {62		// all IStr should be inlined into same pool63		Inner::ptr_eq(&self.0, &other.0)64	}65}6667impl PartialEq<str> for IStr {68	fn eq(&self, other: &str) -> bool {69		self as &str == other70	}71}7273impl Hash for IStr {74	fn hash<H: Hasher>(&self, state: &mut H) {75		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address76		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);77	}78}7980impl Drop for IStr {81	fn drop(&mut self) {82		#[cold]83		#[inline(never)]84		fn unpool(inner: &Inner) {85			// May fail on program termination86			let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));87			if res.is_ok() {88				debug_assert_eq!(Inner::strong_count(inner), 1);89			}90		}91		// First reference - current object, second - POOL92		if Inner::strong_count(&self.0) <= 2 {93			unpool(&self.0);94		}95	}96}9798impl fmt::Debug for IStr {99	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {100		fmt::Debug::fmt(self as &str, f)101	}102}103104impl Display for IStr {105	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {106		fmt::Display::fmt(self as &str, f)107	}108}109110/// Interned byte array111#[derive(Clone, PartialOrd, Ord, Eq)]112pub struct IBytes(Inner);113impl Trace for IBytes {114	fn is_type_tracked() -> bool {115		false116	}117}118119impl IBytes {120	#[must_use]121	pub fn cast_str(self) -> Option<IStr> {122		if Inner::check_utf8(&self.0) {123			Some(IStr(self.0.clone()))124		} else {125			None126		}127	}128	/// # Safety129	/// data should be valid utf8130	unsafe fn cast_str_unchecked(self) -> IStr {131		// SAFETY: data is utf8132		unsafe { Inner::assume_utf8(&self.0) };133		IStr(self.0.clone())134	}135136	#[must_use]137	pub fn as_slice(&self) -> &[u8] {138		self.0.as_slice()139	}140}141142impl Deref for IBytes {143	type Target = [u8];144145	fn deref(&self) -> &Self::Target {146		self.0.as_slice()147	}148}149150impl PartialEq for IBytes {151	fn eq(&self, other: &Self) -> bool {152		// all IStr should be inlined into same pool153		Inner::ptr_eq(&self.0, &other.0)154	}155}156157impl Hash for IBytes {158	fn hash<H: Hasher>(&self, state: &mut H) {159		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address160		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);161	}162}163164impl Drop for IBytes {165	fn drop(&mut self) {166		#[cold]167		#[inline(never)]168		fn unpool(inner: &Inner) {169			// May fail on program termination170			let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));171			if res.is_ok() {172				debug_assert_eq!(Inner::strong_count(inner), 1);173			}174		}175		// First reference - current object, second - POOL176		if Inner::strong_count(&self.0) <= 2 {177			unpool(&self.0);178		}179	}180}181182impl fmt::Debug for IBytes {183	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {184		fmt::Debug::fmt(self as &[u8], f)185	}186}187188impl<'c> From<Cow<'c, str>> for IStr {189	fn from(v: Cow<'c, str>) -> Self {190		intern_str(&v)191	}192}193impl From<&str> for IStr {194	fn from(v: &str) -> Self {195		intern_str(v)196	}197}198impl From<String> for IStr {199	fn from(s: String) -> Self {200		s.as_str().into()201	}202}203impl From<&String> for IStr {204	fn from(s: &String) -> Self {205		s.as_str().into()206	}207}208impl From<char> for IStr {209	fn from(value: char) -> Self {210		let mut buf = [0; 5];211		Self::from(&*value.encode_utf8(&mut buf))212	}213}214impl From<&[u8]> for IBytes {215	fn from(v: &[u8]) -> Self {216		intern_bytes(v)217	}218}219220#[cfg(feature = "serde")]221impl serde::Serialize for IStr {222	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>223	where224		S: serde::Serializer,225	{226		self.as_str().serialize(serializer)227	}228}229230#[cfg(feature = "serde")]231impl<'de> serde::Deserialize<'de> for IStr {232	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>233	where234		D: serde::Deserializer<'de>,235	{236		let str = <&str>::deserialize(deserializer)?;237		Ok(intern_str(str))238	}239}240241#[cfg(feature = "structdump")]242impl structdump::Codegen for IStr {243	fn gen_code(244		&self,245		res: &mut structdump::CodegenResult,246		_unique: bool,247	) -> structdump::TokenStream {248		let s: &str = self;249		res.add_code(250			structdump::quote! {251				structdump_import::IStr::from(#s)252			},253			Some(structdump::quote![structdump_import::IStr]),254			false,255		)256	}257}258259thread_local! {260	static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));261}262263#[must_use]264pub fn intern_bytes(bytes: &[u8]) -> IBytes {265	POOL.with(|pool| {266		let mut pool = pool.borrow_mut();267		let entry = pool.raw_entry_mut().from_key(bytes);268		match entry {269			hashbrown::hash_map::RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),270			hashbrown::hash_map::RawEntryMut::Vacant(e) => {271				let (k, _) = e.insert(Inner::new_bytes(bytes), ());272				IBytes(k.clone())273			}274		}275	})276}277278#[must_use]279pub fn intern_str(str: &str) -> IStr {280	// SAFETY: Rust strings always utf8281	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }282}283284#[cfg(test)]285mod tests {286	use crate::IStr;287288	#[test]289	fn simple() {290		let a = IStr::from("a");291		let b = IStr::from("a");292293		assert_eq!(a.as_ptr(), b.as_ptr());294	}295}