git.delta.rocks / jrsonnet / refs/commits / be1ca0e13d7d

difftreelog

source

crates/jrsonnet-interner/src/lib.rs6.6 KiBsourcehistory
1#![deny(2	unsafe_op_in_unsafe_fn,3	clippy::missing_safety_doc,4	clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9	borrow::{Borrow, Cow},10	cell::RefCell,11	fmt::{self, Display},12	hash::{BuildHasherDefault, Hash, Hasher},13	ops::Deref,14	str,15};1617use hashbrown::HashMap;18use jrsonnet_gcmodule::Trace;19use rustc_hash::FxHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30	fn is_type_tracked() -> bool {31		false32	}33}3435impl IStr {36	#[must_use]37	pub fn empty() -> Self {38		"".into()39	}40	#[must_use]41	pub fn as_str(&self) -> &str {42		self as &str43	}4445	#[must_use]46	pub fn cast_bytes(self) -> IBytes {47		IBytes(self.0.clone())48	}49}5051impl Deref for IStr {52	type Target = str;5354	fn deref(&self) -> &Self::Target {55		// SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-856		unsafe { self.0.as_str_unchecked() }57	}58}5960impl Borrow<str> for IStr {61	fn borrow(&self) -> &str {62		self.as_str()63	}64}65impl Borrow<[u8]> for IStr {66	fn borrow(&self) -> &[u8] {67		self.as_bytes()68	}69}7071impl PartialEq for IStr {72	fn eq(&self, other: &Self) -> bool {73		// all IStr should be inlined into same pool74		Inner::ptr_eq(&self.0, &other.0)75	}76}7778impl PartialEq<str> for IStr {79	fn eq(&self, other: &str) -> bool {80		self as &str == other81	}82}8384impl Hash for IStr {85	fn hash<H: Hasher>(&self, state: &mut H) {86		// IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address87		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);88	}89}9091impl Drop for IStr {92	fn drop(&mut self) {93		maybe_unpool(&self.0);94	}95}9697impl fmt::Debug for IStr {98	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {99		fmt::Debug::fmt(self as &str, f)100	}101}102103impl Display for IStr {104	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {105		fmt::Display::fmt(self as &str, f)106	}107}108109/// Interned byte array110#[derive(Clone, PartialOrd, Ord, Eq)]111pub struct IBytes(Inner);112impl Trace for IBytes {113	fn is_type_tracked() -> bool {114		false115	}116}117118impl IBytes {119	#[must_use]120	pub fn cast_str(self) -> Option<IStr> {121		if Inner::check_utf8(&self.0) {122			Some(IStr(self.0.clone()))123		} else {124			None125		}126	}127	/// # Safety128	/// data should be valid utf8129	unsafe fn cast_str_unchecked(self) -> IStr {130		// SAFETY: data is utf8131		unsafe { Inner::assume_utf8(&self.0) };132		IStr(self.0.clone())133	}134135	#[must_use]136	pub fn as_slice(&self) -> &[u8] {137		self.0.as_slice()138	}139}140141impl Deref for IBytes {142	type Target = [u8];143144	fn deref(&self) -> &Self::Target {145		self.0.as_slice()146	}147}148149impl Borrow<[u8]> for IBytes {150	fn borrow(&self) -> &[u8] {151		self.0.as_slice()152	}153}154155impl PartialEq for IBytes {156	fn eq(&self, other: &Self) -> bool {157		// all IStr should be inlined into same pool158		Inner::ptr_eq(&self.0, &other.0)159	}160}161162impl Hash for IBytes {163	fn hash<H: Hasher>(&self, state: &mut H) {164		// IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address165		state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);166	}167}168169impl Drop for IBytes {170	fn drop(&mut self) {171		maybe_unpool(&self.0);172	}173}174175fn maybe_unpool(inner: &Inner) {176	#[cold]177	#[inline(never)]178	fn unpool(inner: &Inner) {179		// May fail on program termination180		let _ = POOL.try_with(|pool| {181			let mut pool = pool.borrow_mut();182183			if pool.remove(inner).is_none() {184				// On some platforms (i.e i686-windows), try_with will not fail after TLS185				// destructor is called, but instead re-initialize the TLS with the empty pool.186				// Allow non-pooled Drop in this case.187				// https://github.com/CertainLach/jrsonnet/issues/98#issuecomment-1591624016188				//189				// However, if pool is not empty, most likely this is issue #113, and then I don't190				// have any explainations for now.191				assert!(pool.is_empty(), "received an unpooled string not during the program termination, please write any info regarding this crash to https://github.com/CertainLach/jrsonnet/issues/113, thanks!");192			}193		});194	}195	// First reference - current object, second - POOL196	if Inner::strong_count(inner) <= 2 {197		unpool(inner);198	}199}200201impl fmt::Debug for IBytes {202	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {203		fmt::Debug::fmt(self as &[u8], f)204	}205}206207impl<'c> From<Cow<'c, str>> for IStr {208	fn from(v: Cow<'c, str>) -> Self {209		intern_str(&v)210	}211}212impl From<&str> for IStr {213	fn from(v: &str) -> Self {214		intern_str(v)215	}216}217impl From<String> for IStr {218	fn from(s: String) -> Self {219		s.as_str().into()220	}221}222impl From<&String> for IStr {223	fn from(s: &String) -> Self {224		s.as_str().into()225	}226}227impl From<char> for IStr {228	fn from(value: char) -> Self {229		let mut buf = [0; 5];230		Self::from(&*value.encode_utf8(&mut buf))231	}232}233impl From<&[u8]> for IBytes {234	fn from(v: &[u8]) -> Self {235		intern_bytes(v)236	}237}238239#[cfg(feature = "serde")]240impl serde::Serialize for IStr {241	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>242	where243		S: serde::Serializer,244	{245		self.as_str().serialize(serializer)246	}247}248249#[cfg(feature = "serde")]250impl<'de> serde::Deserialize<'de> for IStr {251	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>252	where253		D: serde::Deserializer<'de>,254	{255		let str = <&str>::deserialize(deserializer)?;256		Ok(intern_str(str))257	}258}259260#[cfg(feature = "structdump")]261impl structdump::Codegen for IStr {262	fn gen_code(263		&self,264		res: &mut structdump::CodegenResult,265		_unique: bool,266	) -> structdump::TokenStream {267		let s: &str = self;268		res.add_code(269			structdump::quote! {270				structdump_import::IStr::from(#s)271			},272			Some(structdump::quote![structdump_import::IStr]),273			false,274		)275	}276}277278thread_local! {279	static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));280}281282#[must_use]283pub fn intern_bytes(bytes: &[u8]) -> IBytes {284	POOL.with(|pool| {285		let mut pool = pool.borrow_mut();286		let entry = pool.raw_entry_mut().from_key(bytes);287		match entry {288			hashbrown::hash_map::RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),289			hashbrown::hash_map::RawEntryMut::Vacant(e) => {290				let (k, _) = e.insert(Inner::new_bytes(bytes), ());291				IBytes(k.clone())292			}293		}294	})295}296297#[must_use]298pub fn intern_str(str: &str) -> IStr {299	// SAFETY: Rust strings always utf8300	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }301}302303#[cfg(test)]304mod tests {305	use crate::IStr;306307	#[test]308	fn simple() {309		let a = IStr::from("a");310		let b = IStr::from("a");311312		assert_eq!(a.as_ptr(), b.as_ptr());313	}314}