git.delta.rocks / jrsonnet / refs/heads / master

difftreelog

source

crates/jrsonnet-interner/src/inner.rs5.8 KiBsourcehistory
1use std::{2	alloc::{self, Layout},3	borrow::Borrow,4	cell::UnsafeCell,5	cmp,6	hash::{Hash, Hasher},7	mem,8	ptr::{self, NonNull},9	slice, str,10};1112const UTF8_MASK: u32 = 1 << 31;13const REFCNT_MASK: u32 = !UTF8_MASK;1415#[repr(C)]16struct InnerHeader {17	size: u32,18	// MSB is checked utf8 flag, rest - refcnt19	utf8_refcnt: u32,20}21impl InnerHeader {22	const fn new(size: u32, is_utf8: bool) -> Self {23		Self {24			size,25			utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),26		}27	}2829	const fn refcnt(&self) -> u32 {30		self.utf8_refcnt & REFCNT_MASK31	}32	const fn is_utf8(&self) -> bool {33		self.utf8_refcnt & UTF8_MASK != 034	}3536	fn set_refcnt(&mut self, cnt: u32) {37		assert_eq!(cnt & UTF8_MASK, 0);38		// Reset all bits expect last39		self.utf8_refcnt &= UTF8_MASK;40		// Store refcnt41		self.utf8_refcnt |= cnt;42	}43	fn set_is_utf8(&mut self) {44		self.utf8_refcnt |= UTF8_MASK;45	}46}4748/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST49pub struct Inner(UnsafeCell<NonNull<InnerHeader>>);50impl Inner {51	/// # Safety52	/// `is_utf8` should only be set if data is really checked to be utf853	/// # Panics54	/// If data is larger than 4GB55	// we allocate with correct alignment56	#[allow(clippy::cast_ptr_alignment)]57	unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {58		// SAFETY:59		// - layout has non-zero size, and correct align60		// - data is written right after allocation61		// - new allocation can't overlap with passed slice62		unsafe {63			let data: *mut InnerHeader = alloc::alloc(Layout::from_size_align_unchecked(64				mem::size_of::<InnerHeader>() + bytes.len(),65				mem::align_of::<InnerHeader>(),66			))67			.cast();68			assert!(!data.is_null());69			*data = InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);70			ptr::copy_nonoverlapping(bytes.as_ptr(), data.add(1).cast::<u8>(), bytes.len());71			Self(UnsafeCell::new(NonNull::new_unchecked(data)))72		}73	}74	pub fn new_bytes(bytes: &[u8]) -> Self {75		// SAFETY: is_utf8 is not set76		unsafe { Self::new_raw(bytes, false) }77	}78	#[allow(dead_code)]79	pub fn new_str(str: &str) -> Self {80		// SAFETY: strings always utf881		unsafe { Self::new_raw(str.as_bytes(), true) }82	}8384	// `slice::from_raw_parts` is not yet stabilized85	#[allow(clippy::missing_const_for_fn)]86	pub fn as_slice(&self) -> &[u8] {87		let header = Self::header(self);88		// SAFETY: data is not null, and it is correctly initialized89		let size = unsafe { (*header).size };90		// SAFETY: bytes after data is allocated to be exactly data.size in length91		unsafe {92			slice::from_raw_parts((*self.0.get()).as_ptr().add(1).cast::<u8>(), size as usize)93		}94	}9596	/// # Safety97	/// Data should be checked to be utf8 via [`check_utf8`] first98	pub unsafe fn as_str_unchecked(&self) -> &str {99		// SAFETY: data is checked100		unsafe { str::from_utf8_unchecked(self.as_slice()) }101	}102103	/// Check data to be utf-8104	///105	/// Positive results are cached106	pub fn check_utf8(this: &Self) -> bool {107		let header = Self::header_mut(this);108		// SAFETY: header is initialized109		if unsafe { (*header).is_utf8() } {110			return true;111		}112113		if str::from_utf8(this.as_slice()).is_ok() {114			// SAFETY: header is initialized115			unsafe { (*header).set_is_utf8() };116			true117		} else {118			false119		}120	}121122	/// Marks data as utf-8123	///124	/// # Safety125	/// data should be really utf-8126	pub unsafe fn assume_utf8(this: &Self) {127		let header = Self::header_mut(this);128		// SAFETY: header is correct129		unsafe { (*header).set_is_utf8() }130	}131132	fn header(this: &Self) -> *const InnerHeader {133		// Safety: in `new`, we allocate with correct alignment134		unsafe { (*this.0.get()).as_ptr() }135	}136	fn header_mut(this: &Self) -> *mut InnerHeader {137		// Safety: in `new`, we allocate with correct alignment138		unsafe { (*this.0.get()).as_ptr() }139	}140141	fn clone(this: &Self) -> Self {142		let header = Self::header_mut(this);143		// SAFETY: header is initialized144		unsafe {145			let refcnt = (*header).refcnt() + 1;146			(*header).set_refcnt(refcnt);147			Self(UnsafeCell::new(*this.0.get()))148		}149	}150151	pub fn ptr_eq(a: &Self, b: &Self) -> bool {152		Self::as_ptr(a) == Self::as_ptr(b)153	}154	pub fn as_ptr(this: &Self) -> *const u8 {155		// SAFETY: data is initialized156		unsafe { (*this.0.get()).as_ptr().add(1).cast() }157	}158159	pub fn strong_count(this: &Self) -> u32 {160		let header = Self::header(this);161		// SAFETY: header is initialized162		unsafe { (*header).refcnt() }163	}164165	pub fn len32(&self) -> u32 {166		let header = Self::header(self);167		// SAFETY: header is initialized168		unsafe { (*header).size }169	}170}171172impl Clone for Inner {173	fn clone(&self) -> Self {174		Self::clone(self)175	}176}177178impl Drop for Inner {179	fn drop(&mut self) {180		#[cold]181		#[inline(never)]182		fn dealloc(val: &Inner) {183			let header = Inner::header_mut(val);184			// Safety: Data is valid yet185			let size = unsafe { (*header).size as usize };186			// SAFETY: size is correct, layout is valid, data will not be used after this, as refcn == 0187			unsafe {188				alloc::dealloc(189					header.cast(),190					Layout::from_size_align_unchecked(191						mem::size_of::<InnerHeader>() + size,192						mem::align_of::<InnerHeader>(),193					),194				);195			}196		}197		let header = Self::header_mut(self);198		// SAFETY: header is initialized199		let refcnt = unsafe {200			let refcnt = (*header).refcnt() - 1;201			(*header).set_refcnt(refcnt);202			refcnt203		};204		if refcnt == 0 {205			dealloc(self);206		}207	}208}209210impl PartialEq for Inner {211	fn eq(&self, other: &Self) -> bool {212		Self::as_ptr(self) == Self::as_ptr(other) || self.as_slice().eq(other.as_slice())213	}214}215impl Hash for Inner {216	fn hash<H: Hasher>(&self, state: &mut H) {217		self.as_slice().hash(state);218	}219}220impl Eq for Inner {}221impl PartialOrd for Inner {222	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {223		Some(self.cmp(other))224	}225}226impl Ord for Inner {227	fn cmp(&self, other: &Self) -> cmp::Ordering {228		self.as_slice().cmp(other.as_slice())229	}230}231232impl Borrow<[u8]> for Inner {233	fn borrow(&self) -> &[u8] {234		self.as_slice()235	}236}