git.delta.rocks / jrsonnet / refs/commits / 7b01ecbd8fbc

difftreelog

source

crates/jrsonnet-interner/src/inner.rs5.6 KiBsourcehistory
1use std::{2	alloc::{self, Layout},3	borrow::Borrow,4	cmp,5	hash::{Hash, Hasher},6	mem,7	ptr::{self, NonNull},8	slice, str,9};1011const UTF8_MASK: u32 = 1 << 31;12const REFCNT_MASK: u32 = !UTF8_MASK;1314#[repr(C)]15struct InnerHeader {16	size: u32,17	// MSB is checked utf8 flag, rest - refcnt18	utf8_refcnt: u32,19}20impl InnerHeader {21	const fn new(size: u32, is_utf8: bool) -> Self {22		Self {23			size,24			utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),25		}26	}2728	const fn refcnt(&self) -> u32 {29		self.utf8_refcnt & REFCNT_MASK30	}31	const fn is_utf8(&self) -> bool {32		self.utf8_refcnt & UTF8_MASK != 033	}3435	fn set_refcnt(&mut self, cnt: u32) {36		assert_eq!(cnt & UTF8_MASK, 0);37		// Reset all bits expect last38		self.utf8_refcnt &= UTF8_MASK;39		// Store refcnt40		self.utf8_refcnt |= cnt;41	}42	fn set_is_utf8(&mut self) {43		self.utf8_refcnt |= UTF8_MASK;44	}45}4647/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST48pub struct Inner(NonNull<u8>);49impl Inner {50	/// # Safety51	/// `is_utf8` should only be set if data is really checked to be utf852	/// # Panics53	/// If data is larger than 4GB54	// we allocate with correct alignment55	#[allow(clippy::cast_ptr_alignment)]56	unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {57		// SAFETY:58		// - layout has non-zero size, and correct align59		// - data is written right after allocation60		// - new allocation can't overlap with passed slice61		unsafe {62			let data = alloc::alloc(Layout::from_size_align_unchecked(63				mem::size_of::<InnerHeader>() + bytes.len(),64				mem::align_of::<InnerHeader>(),65			));66			assert!(!data.is_null());67			*data.cast::<InnerHeader>() =68				InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);69			ptr::copy_nonoverlapping(70				bytes.as_ptr(),71				data.add(mem::size_of::<InnerHeader>()),72				bytes.len(),73			);74			Self(NonNull::new_unchecked(data))75		}76	}77	pub fn new_bytes(bytes: &[u8]) -> Self {78		// SAFETY: is_utf8 is not set79		unsafe { Self::new_raw(bytes, false) }80	}81	#[allow(dead_code)]82	pub fn new_str(str: &str) -> Self {83		// SAFETY: strings always utf884		unsafe { Self::new_raw(str.as_bytes(), true) }85	}8687	// `slice::from_raw_parts` is not yet stabilized88	#[allow(clippy::missing_const_for_fn)]89	pub fn as_slice(&self) -> &[u8] {90		let header = Self::header(self);91		// SAFETY: data is not null, and it is correctly initialized92		let size = unsafe { (*header).size };93		// SAFETY: bytes after data is allocated to be exactly data.size in length94		unsafe {95			slice::from_raw_parts(96				self.0.as_ptr().add(mem::size_of::<InnerHeader>()),97				size as usize,98			)99		}100	}101102	/// # Safety103	/// Data should be checked to be utf8 via [`check_utf8`] first104	pub unsafe fn as_str_unchecked(&self) -> &str {105		// SAFETY: data is checked106		unsafe { str::from_utf8_unchecked(self.as_slice()) }107	}108109	/// Check data to be utf-8110	///111	/// Positive results are cached112	pub fn check_utf8(this: &Self) -> bool {113		let header = Self::header_mut(this);114		// SAFETY: header is initialized115		if unsafe { (*header).is_utf8() } {116			return true;117		}118119		if str::from_utf8(this.as_slice()).is_ok() {120			// SAFETY: header is initialized121			unsafe { (*header).set_is_utf8() };122			true123		} else {124			false125		}126	}127128	/// Marks data as utf-8129	///130	/// # Safety131	/// data should be really utf-8132	pub unsafe fn assume_utf8(this: &Self) {133		let header = Self::header_mut(this);134		// SAFETY: header is correct135		unsafe { (*header).set_is_utf8() }136	}137138	const fn header(this: &Self) -> *const InnerHeader {139		// in `new`, we allocate with correct alignment140		#![allow(clippy::cast_ptr_alignment)]141		this.0.as_ptr() as *const InnerHeader142	}143	const fn header_mut(this: &Self) -> *mut InnerHeader {144		// in `new`, we allocate with correct alignment145		#![allow(clippy::cast_ptr_alignment)]146		this.0.as_ptr().cast::<InnerHeader>()147	}148149	fn clone(this: &Self) -> Self {150		let header = Self::header_mut(this);151		// SAFETY: header is initialized152		unsafe {153			let refcnt = (*header).refcnt() + 1;154			(*header).set_refcnt(refcnt);155		}156		Self(this.0)157	}158159	pub fn ptr_eq(a: &Self, b: &Self) -> bool {160		a.0 == b.0161	}162	pub const fn as_ptr(this: &Self) -> *const u8 {163		// SAFETY: data is initialized164		unsafe { this.0.as_ptr().add(mem::size_of::<InnerHeader>()) }165	}166167	pub const fn strong_count(this: &Self) -> u32 {168		let header = Self::header(this);169		// SAFETY: header is initialized170		unsafe { (*header).refcnt() }171	}172}173174impl Clone for Inner {175	fn clone(&self) -> Self {176		Self::clone(self)177	}178}179180impl Drop for Inner {181	fn drop(&mut self) {182		#[cold]183		#[inline(never)]184		fn dealloc(val: &Inner) {185			let header = Inner::header_mut(val);186			// SAFETY: size is correct, layout is valid187			unsafe {188				alloc::dealloc(189					val.0.as_ptr(),190					Layout::from_size_align_unchecked(191						mem::size_of::<InnerHeader>() + (*header).size as usize,192						mem::align_of::<InnerHeader>(),193					),194				);195			}196		}197		let header = Self::header_mut(self);198		// SAFETY: header is initialized199		let refcnt = unsafe {200			let refcnt = (*header).refcnt() - 1;201			(*header).set_refcnt(refcnt);202			refcnt203		};204		if refcnt == 0 {205			dealloc(self);206		}207	}208}209210impl PartialEq for Inner {211	fn eq(&self, other: &Self) -> bool {212		self.0 == other.0 || self.as_slice().eq(other.as_slice())213	}214}215impl Hash for Inner {216	fn hash<H: Hasher>(&self, state: &mut H) {217		self.as_slice().hash(state);218	}219}220impl Eq for Inner {}221impl PartialOrd for Inner {222	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {223		self.as_slice().partial_cmp(other.as_slice())224	}225}226impl Ord for Inner {227	fn cmp(&self, other: &Self) -> cmp::Ordering {228		self.as_slice().cmp(other.as_slice())229	}230}231232impl Borrow<[u8]> for Inner {233	fn borrow(&self) -> &[u8] {234		self.as_slice()235	}236}