git.delta.rocks / jrsonnet / refs/commits / 45d6bc3e2a43

difftreelog

fix(interner) data should be in UnsafeCell

Yaroslav Bolyukin2023-01-23parent: #153383c.patch.diff
in: master

2 files changed

modifiedcrates/jrsonnet-interner/src/inner.rsdiffbeforeafterboth
before · crates/jrsonnet-interner/src/inner.rs
1use std::{2	alloc::{self, Layout},3	borrow::Borrow,4	cmp,5	hash::{Hash, Hasher},6	mem,7	ptr::{self, NonNull},8	slice, str,9};1011const UTF8_MASK: u32 = 1 << 31;12const REFCNT_MASK: u32 = !UTF8_MASK;1314#[repr(C)]15struct InnerHeader {16	size: u32,17	// MSB is checked utf8 flag, rest - refcnt18	utf8_refcnt: u32,19}20impl InnerHeader {21	const fn new(size: u32, is_utf8: bool) -> Self {22		Self {23			size,24			utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),25		}26	}2728	const fn refcnt(&self) -> u32 {29		self.utf8_refcnt & REFCNT_MASK30	}31	const fn is_utf8(&self) -> bool {32		self.utf8_refcnt & UTF8_MASK != 033	}3435	fn set_refcnt(&mut self, cnt: u32) {36		assert_eq!(cnt & UTF8_MASK, 0);37		// Reset all bits expect last38		self.utf8_refcnt &= UTF8_MASK;39		// Store refcnt40		self.utf8_refcnt |= cnt;41	}42	fn set_is_utf8(&mut self) {43		self.utf8_refcnt |= UTF8_MASK;44	}45}4647/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST48pub struct Inner(NonNull<u8>);49impl Inner {50	/// # Safety51	/// `is_utf8` should only be set if data is really checked to be utf852	/// # Panics53	/// If data is larger than 4GB54	// we allocate with correct alignment55	#[allow(clippy::cast_ptr_alignment)]56	unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {57		// SAFETY:58		// - layout has non-zero size, and correct align59		// - data is written right after allocation60		// - new allocation can't overlap with passed slice61		unsafe {62			let data = alloc::alloc(Layout::from_size_align_unchecked(63				mem::size_of::<InnerHeader>() + bytes.len(),64				mem::align_of::<InnerHeader>(),65			));66			assert!(!data.is_null());67			*data.cast::<InnerHeader>() =68				InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);69			ptr::copy_nonoverlapping(70				bytes.as_ptr(),71				data.add(mem::size_of::<InnerHeader>()),72				bytes.len(),73			);74			Self(NonNull::new_unchecked(data))75		}76	}77	pub fn new_bytes(bytes: &[u8]) -> Self {78		// SAFETY: is_utf8 is not set79		unsafe { Self::new_raw(bytes, false) }80	}81	#[allow(dead_code)]82	pub fn new_str(str: &str) -> Self {83		// SAFETY: strings always utf884		unsafe { Self::new_raw(str.as_bytes(), true) }85	}8687	// `slice::from_raw_parts` is not yet stabilized88	#[allow(clippy::missing_const_for_fn)]89	pub fn as_slice(&self) -> &[u8] {90		let header = Self::header(self);91		// SAFETY: data is not null, and it is correctly initialized92		let size = unsafe { (*header).size };93		// SAFETY: bytes after data is allocated to be exactly data.size in length94		unsafe {95			slice::from_raw_parts(96				self.0.as_ptr().add(mem::size_of::<InnerHeader>()),97				size as usize,98			)99		}100	}101102	/// # Safety103	/// Data should be checked to be utf8 via [`check_utf8`] first104	pub unsafe fn as_str_unchecked(&self) -> &str {105		// SAFETY: data is checked106		unsafe { str::from_utf8_unchecked(self.as_slice()) }107	}108109	/// Check data to be utf-8110	///111	/// Positive results are cached112	pub fn check_utf8(this: &Self) -> bool {113		let header = Self::header_mut(this);114		// SAFETY: header is initialized115		if unsafe { (*header).is_utf8() } {116			return true;117		}118119		if str::from_utf8(this.as_slice()).is_ok() {120			// SAFETY: header is initialized121			unsafe { (*header).set_is_utf8() };122			true123		} else {124			false125		}126	}127128	/// Marks data as utf-8129	///130	/// # Safety131	/// data should be really utf-8132	pub unsafe fn assume_utf8(this: &Self) {133		let header = Self::header_mut(this);134		// SAFETY: header is correct135		unsafe { (*header).set_is_utf8() }136	}137138	const fn header(this: &Self) -> *const InnerHeader {139		// in `new`, we allocate with correct alignment140		#![allow(clippy::cast_ptr_alignment)]141		this.0.as_ptr() as *const InnerHeader142	}143	const fn header_mut(this: &Self) -> *mut InnerHeader {144		// in `new`, we allocate with correct alignment145		#![allow(clippy::cast_ptr_alignment)]146		this.0.as_ptr().cast::<InnerHeader>()147	}148149	fn clone(this: &Self) -> Self {150		let header = Self::header_mut(this);151		// SAFETY: header is initialized152		unsafe {153			let refcnt = (*header).refcnt() + 1;154			(*header).set_refcnt(refcnt);155		}156		Self(this.0)157	}158159	pub fn ptr_eq(a: &Self, b: &Self) -> bool {160		a.0 == b.0161	}162	pub const fn as_ptr(this: &Self) -> *const u8 {163		// SAFETY: data is initialized164		unsafe { this.0.as_ptr().add(mem::size_of::<InnerHeader>()) }165	}166167	pub const fn strong_count(this: &Self) -> u32 {168		let header = Self::header(this);169		// SAFETY: header is initialized170		unsafe { (*header).refcnt() }171	}172}173174impl Clone for Inner {175	fn clone(&self) -> Self {176		Self::clone(self)177	}178}179180impl Drop for Inner {181	fn drop(&mut self) {182		#[cold]183		#[inline(never)]184		fn dealloc(val: &Inner) {185			let header = Inner::header_mut(val);186			// SAFETY: size is correct, layout is valid187			unsafe {188				alloc::dealloc(189					val.0.as_ptr(),190					Layout::from_size_align_unchecked(191						mem::size_of::<InnerHeader>() + (*header).size as usize,192						mem::align_of::<InnerHeader>(),193					),194				);195			}196		}197		let header = Self::header_mut(self);198		// SAFETY: header is initialized199		let refcnt = unsafe {200			let refcnt = (*header).refcnt() - 1;201			(*header).set_refcnt(refcnt);202			refcnt203		};204		if refcnt == 0 {205			dealloc(self);206		}207	}208}209210impl PartialEq for Inner {211	fn eq(&self, other: &Self) -> bool {212		self.0 == other.0 || self.as_slice().eq(other.as_slice())213	}214}215impl Hash for Inner {216	fn hash<H: Hasher>(&self, state: &mut H) {217		self.as_slice().hash(state);218	}219}220impl Eq for Inner {}221impl PartialOrd for Inner {222	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {223		self.as_slice().partial_cmp(other.as_slice())224	}225}226impl Ord for Inner {227	fn cmp(&self, other: &Self) -> cmp::Ordering {228		self.as_slice().cmp(other.as_slice())229	}230}231232impl Borrow<[u8]> for Inner {233	fn borrow(&self) -> &[u8] {234		self.as_slice()235	}236}
after · crates/jrsonnet-interner/src/inner.rs
1use std::{2	alloc::{self, Layout},3	borrow::Borrow,4	cell::UnsafeCell,5	cmp,6	hash::{Hash, Hasher},7	mem,8	ptr::{self, NonNull},9	slice, str,10};1112const UTF8_MASK: u32 = 1 << 31;13const REFCNT_MASK: u32 = !UTF8_MASK;1415#[repr(C)]16struct InnerHeader {17	size: u32,18	// MSB is checked utf8 flag, rest - refcnt19	utf8_refcnt: u32,20}21impl InnerHeader {22	const fn new(size: u32, is_utf8: bool) -> Self {23		Self {24			size,25			utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),26		}27	}2829	const fn refcnt(&self) -> u32 {30		self.utf8_refcnt & REFCNT_MASK31	}32	const fn is_utf8(&self) -> bool {33		self.utf8_refcnt & UTF8_MASK != 034	}3536	fn set_refcnt(&mut self, cnt: u32) {37		assert_eq!(cnt & UTF8_MASK, 0);38		// Reset all bits expect last39		self.utf8_refcnt &= UTF8_MASK;40		// Store refcnt41		self.utf8_refcnt |= cnt;42	}43	fn set_is_utf8(&mut self) {44		self.utf8_refcnt |= UTF8_MASK;45	}46}4748/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST49pub struct Inner(UnsafeCell<NonNull<InnerHeader>>);50impl Inner {51	/// # Safety52	/// `is_utf8` should only be set if data is really checked to be utf853	/// # Panics54	/// If data is larger than 4GB55	// we allocate with correct alignment56	#[allow(clippy::cast_ptr_alignment)]57	unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {58		// SAFETY:59		// - layout has non-zero size, and correct align60		// - data is written right after allocation61		// - new allocation can't overlap with passed slice62		unsafe {63			let data: *mut InnerHeader = alloc::alloc(Layout::from_size_align_unchecked(64				mem::size_of::<InnerHeader>() + bytes.len(),65				mem::align_of::<InnerHeader>(),66			))67			.cast();68			assert!(!data.is_null());69			*data = InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);70			ptr::copy_nonoverlapping(bytes.as_ptr(), data.offset(1).cast::<u8>(), bytes.len());71			Self(UnsafeCell::new(NonNull::new_unchecked(data)))72		}73	}74	pub fn new_bytes(bytes: &[u8]) -> Self {75		// SAFETY: is_utf8 is not set76		unsafe { Self::new_raw(bytes, false) }77	}78	#[allow(dead_code)]79	pub fn new_str(str: &str) -> Self {80		// SAFETY: strings always utf881		unsafe { Self::new_raw(str.as_bytes(), true) }82	}8384	// `slice::from_raw_parts` is not yet stabilized85	#[allow(clippy::missing_const_for_fn)]86	pub fn as_slice(&self) -> &[u8] {87		let header = Self::header(self);88		// SAFETY: data is not null, and it is correctly initialized89		let size = unsafe { (*header).size };90		// SAFETY: bytes after data is allocated to be exactly data.size in length91		unsafe {92			slice::from_raw_parts(93				(*self.0.get()).as_ptr().offset(1).cast::<u8>(),94				size as usize,95			)96		}97	}9899	/// # Safety100	/// Data should be checked to be utf8 via [`check_utf8`] first101	pub unsafe fn as_str_unchecked(&self) -> &str {102		// SAFETY: data is checked103		unsafe { str::from_utf8_unchecked(self.as_slice()) }104	}105106	/// Check data to be utf-8107	///108	/// Positive results are cached109	pub fn check_utf8(this: &Self) -> bool {110		let header = Self::header_mut(this);111		// SAFETY: header is initialized112		if unsafe { (*header).is_utf8() } {113			return true;114		}115116		if str::from_utf8(this.as_slice()).is_ok() {117			// SAFETY: header is initialized118			unsafe { (*header).set_is_utf8() };119			true120		} else {121			false122		}123	}124125	/// Marks data as utf-8126	///127	/// # Safety128	/// data should be really utf-8129	pub unsafe fn assume_utf8(this: &Self) {130		let header = Self::header_mut(this);131		// SAFETY: header is correct132		unsafe { (*header).set_is_utf8() }133	}134135	fn header(this: &Self) -> *const InnerHeader {136		// Safety: in `new`, we allocate with correct alignment137		unsafe { (*this.0.get()).as_ptr() }138	}139	fn header_mut(this: &Self) -> *mut InnerHeader {140		// Safety: in `new`, we allocate with correct alignment141		unsafe { (*this.0.get()).as_mut() }142	}143144	fn clone(this: &Self) -> Self {145		let header = Self::header_mut(this);146		// SAFETY: header is initialized147		unsafe {148			let refcnt = (*header).refcnt() + 1;149			(*header).set_refcnt(refcnt);150			Self(UnsafeCell::new(*this.0.get()))151		}152	}153154	pub fn ptr_eq(a: &Self, b: &Self) -> bool {155		Self::as_ptr(a) == Self::as_ptr(b)156	}157	pub fn as_ptr(this: &Self) -> *const u8 {158		// SAFETY: data is initialized159		unsafe { (*this.0.get()).as_ptr().offset(1).cast() }160	}161162	pub fn strong_count(this: &Self) -> u32 {163		let header = Self::header(this);164		// SAFETY: header is initialized165		unsafe { (*header).refcnt() }166	}167}168169impl Clone for Inner {170	fn clone(&self) -> Self {171		Self::clone(self)172	}173}174175impl Drop for Inner {176	fn drop(&mut self) {177		#[cold]178		#[inline(never)]179		fn dealloc(val: &Inner) {180			let header = Inner::header_mut(val);181			// Safety: Data is valid yet182			let size = unsafe { (*header).size as usize };183			// SAFETY: size is correct, layout is valid, data will not be used after this, as refcn == 0184			unsafe {185				alloc::dealloc(186					header.cast(),187					Layout::from_size_align_unchecked(188						mem::size_of::<InnerHeader>() + size,189						mem::align_of::<InnerHeader>(),190					),191				);192			}193		}194		let header = Self::header_mut(self);195		// SAFETY: header is initialized196		let refcnt = unsafe {197			let refcnt = (*header).refcnt() - 1;198			(*header).set_refcnt(refcnt);199			refcnt200		};201		if refcnt == 0 {202			dealloc(self);203		}204	}205}206207impl PartialEq for Inner {208	fn eq(&self, other: &Self) -> bool {209		Self::as_ptr(self) == Self::as_ptr(other) || self.as_slice().eq(other.as_slice())210	}211}212impl Hash for Inner {213	fn hash<H: Hasher>(&self, state: &mut H) {214		self.as_slice().hash(state);215	}216}217impl Eq for Inner {}218impl PartialOrd for Inner {219	fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {220		self.as_slice().partial_cmp(other.as_slice())221	}222}223impl Ord for Inner {224	fn cmp(&self, other: &Self) -> cmp::Ordering {225		self.as_slice().cmp(other.as_slice())226	}227}228229impl Borrow<[u8]> for Inner {230	fn borrow(&self) -> &[u8] {231		self.as_slice()232	}233}
modifiedcrates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth
--- a/crates/jrsonnet-interner/src/lib.rs
+++ b/crates/jrsonnet-interner/src/lib.rs
@@ -174,6 +174,7 @@
 		}
 		// First reference - current object, second - POOL
 		if Inner::strong_count(&self.0) <= 2 {
+			eprintln!("unpool");
 			unpool(&self.0);
 		}
 	}
@@ -274,3 +275,16 @@
 	// SAFETY: Rust strings always utf8
 	unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }
 }
+
+#[cfg(test)]
+mod tests {
+	use crate::IStr;
+
+	#[test]
+	fn simple() {
+		let a = IStr::from("a");
+		let b = IStr::from("a");
+
+		assert_eq!(a.as_ptr(), b.as_ptr());
+	}
+}