git.delta.rocks / jrsonnet / refs/commits / 1e2e6e2fa219

difftreelog

source

crates/jrsonnet-interner/src/inner.rs5.7 KiBsourcehistory
1use std::{2	alloc::{self, Layout},3	borrow::Borrow,4	cell::UnsafeCell,5	cmp,6	hash::{Hash, Hasher},7	mem,8	ptr::{self, NonNull},9	slice, str,10};1112const UTF8_MASK: u32 = 1 << 31;13const REFCNT_MASK: u32 = !UTF8_MASK;1415#[repr(C)]16struct InnerHeader {17	size: u32,18	// MSB is checked utf8 flag, rest - refcnt19	utf8_refcnt: u32,20}21impl InnerHeader {22	const fn new(size: u32, is_utf8: bool) -> Self {23		Self {24			size,25			utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),26		}27	}2829	const fn refcnt(&self) -> u32 {30		self.utf8_refcnt & REFCNT_MASK31	}32	const fn is_utf8(&self) -> bool {33		self.utf8_refcnt & UTF8_MASK != 034	}3536	fn set_refcnt(&mut self, cnt: u32) {37		assert_eq!(cnt & UTF8_MASK, 0);38		// Reset all bits expect last39		self.utf8_refcnt &= UTF8_MASK;40		// Store refcnt41		self.utf8_refcnt |= cnt;42	}43	fn set_is_utf8(&mut self) {44		self.utf8_refcnt |= UTF8_MASK;45	}46}4748/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST49pub struct Inner(UnsafeCell<NonNull<InnerHeader>>);50impl Inner {51	/// # Safety52	/// `is_utf8` should only be set if data is really checked to be utf853	/// # Panics54	/// If data is larger than 4GB55	// we allocate with correct alignment56	#[allow(clippy::cast_ptr_alignment)]57	unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {58		// SAFETY:59		// - layout has non-zero size, and correct align60		// - data is written right after allocation61		// - new allocation can't overlap with passed slice62		unsafe {63			let data: *mut InnerHeader = alloc::alloc(Layout::from_size_align_unchecked(64				mem::size_of::<InnerHeader>() + bytes.len(),65				mem::align_of::<InnerHeader>(),66			))67			.cast();68			assert!(!data.is_null());69			*data = InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);70			ptr::copy_nonoverlapping(bytes.as_ptr(), data.offset(1).cast::<u8>(), bytes.len());71			Self(UnsafeCell::new(NonNull::new_unchecked(data)))72		}73	}74	pub fn new_bytes(bytes: &[u8]) -> Self {75		// SAFETY: is_utf8 is not set76		unsafe { Self::new_raw(bytes, false) }77	}78	#[allow(dead_code)]79	pub fn new_str(str: &str) -> Self {80		// SAFETY: strings always utf881		unsafe { Self::new_raw(str.as_bytes(), true) }82	}8384	// `slice::from_raw_parts` is not yet stabilized85	#[allow(clippy::missing_const_for_fn)]86	pub fn as_slice(&self) -> &[u8] {87		let header = Self::header(self);88		// SAFETY: data is not null, and it is correctly initialized89		let size = unsafe { (*header).size };90		// SAFETY: bytes after data is allocated to be exactly data.size in length91		unsafe {92			slice::from_raw_parts(93				(*self.0.get()).as_ptr().offset(1).cast::<u8>(),94				size as usize,95			)96		}97	}9899	/// # Safety100	/// Data should be checked to be utf8 via [`check_utf8`] first101	pub unsafe fn as_str_unchecked(&self) -> &str {102		// SAFETY: data is checked103		unsafe { str::from_utf8_unchecked(self.as_slice()) }104	}105106	/// Check data to be utf-8107	///108	/// Positive results are cached109	pub fn check_utf8(this: &Self) -> bool {110		let header = Self::header_mut(this);111		// SAFETY: header is initialized112		if unsafe { (*header).is_utf8() } {113			return true;114		}115116		if str::from_utf8(this.as_slice()).is_ok() {117			// SAFETY: header is initialized118			unsafe { (*header).set_is_utf8() };119			true120		} else {121			false122		}123	}124125	/// Marks data as utf-8126	///127	/// # Safety128	/// data should be really utf-8129	pub unsafe fn assume_utf8(this: &Self) {130		let header = Self::header_mut(this);131		// SAFETY: header is correct132		unsafe { (*header).set_is_utf8() }133	}134135	fn header(this: &Self) -> *const InnerHeader {136		// Safety: in `new`, we allocate with correct alignment137		unsafe { (*this.0.get()).as_ptr() }138	}139	fn header_mut(this: &Self) -> *mut InnerHeader {140		// Safety: in `new`, we allocate with correct alignment141		unsafe { (*this.0.get()).as_ptr() }142	}143144	fn clone(this: &Self) -> Self {145		let header = Self::header_mut(this);146		// SAFETY: header is initialized147		unsafe {148			let refcnt = (*header).refcnt() + 1;149			(*header).set_refcnt(refcnt);150			Self(UnsafeCell::new(*this.0.get()))151		}152	}153154	pub fn ptr_eq(a: &Self, b: &Self) -> bool {155		Self::as_ptr(a) == Self::as_ptr(b)156	}157	pub fn as_ptr(this: &Self) -> *const u8 {158		// SAFETY: data is initialized159		unsafe { (*this.0.get()).as_ptr().offset(1).cast() }160	}161162	pub fn strong_count(this: &Self) -> u32 {163		let header = Self::header(this);164		// SAFETY: header is initialized165		unsafe { (*header).refcnt() }166	}167}168169impl Clone for Inner {170	fn clone(&self) -> Self {171		Self::clone(self)172	}173}174175impl Drop for Inner {176	fn drop(&mut self) {177		#[cold]178		#[inline(never)]179		fn dealloc(val: &Inner) {180			let header = Inner::header_mut(val);181			// Safety: Data is valid yet182			let size = unsafe { (*header).size as usize };183			// SAFETY: size is correct, layout is valid, data will not be used after this, as refcn == 0184			unsafe {185				alloc::dealloc(186					header.cast(),187					Layout::from_size_align_unchecked(188						mem::size_of::<InnerHeader>() + size,189						mem::align_of::<InnerHeader>(),190					),191				);192			}193		}194		let header = Self::header_mut(self);195		// SAFETY: header is initialized196		let refcnt = unsafe {197			let refcnt = (*header).refcnt() - 1;198			(*header).set_refcnt(refcnt);199			refcnt200		};201		if refcnt == 0 {202			dealloc(self);203		}204	}205}206207impl PartialEq for Inner {208	fn eq(&self, other: &Self) -> bool {209		Self::as_ptr(self) == Self::as_ptr(other) || self.as_slice().eq(other.as_slice())210	}211}212impl Hash for Inner {213	fn hash<H: Hasher>(&self, state: &mut H) {214		self.as_slice().hash(state);215	}216}217impl Eq for Inner {}218impl PartialOrd for Inner {219	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {220		self.as_slice().partial_cmp(other.as_slice())221	}222}223impl Ord for Inner {224	fn cmp(&self, other: &Self) -> cmp::Ordering {225		self.as_slice().cmp(other.as_slice())226	}227}228229impl Borrow<[u8]> for Inner {230	fn borrow(&self) -> &[u8] {231		self.as_slice()232	}233}