difftreelog
fix(interner) data should be in UnsafeCell
in: master
2 files changed
crates/jrsonnet-interner/src/inner.rsdiffbeforeafterboth1use std::{2 alloc::{self, Layout},3 borrow::Borrow,4 cell::UnsafeCell,5 cmp,6 hash::{Hash, Hasher},7 mem,8 ptr::{self, NonNull},9 slice, str,10};1112const UTF8_MASK: u32 = 1 << 31;13const REFCNT_MASK: u32 = !UTF8_MASK;1415#[repr(C)]16struct InnerHeader {17 size: u32,18 // MSB is checked utf8 flag, rest - refcnt19 utf8_refcnt: u32,20}21impl InnerHeader {22 const fn new(size: u32, is_utf8: bool) -> Self {23 Self {24 size,25 utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),26 }27 }2829 const fn refcnt(&self) -> u32 {30 self.utf8_refcnt & REFCNT_MASK31 }32 const fn is_utf8(&self) -> bool {33 self.utf8_refcnt & UTF8_MASK != 034 }3536 fn set_refcnt(&mut self, cnt: u32) {37 assert_eq!(cnt & UTF8_MASK, 0);38 // Reset all bits expect last39 self.utf8_refcnt &= UTF8_MASK;40 // Store refcnt41 self.utf8_refcnt |= cnt;42 }43 fn set_is_utf8(&mut self) {44 self.utf8_refcnt |= UTF8_MASK;45 }46}4748/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST49pub struct Inner(UnsafeCell<NonNull<InnerHeader>>);50impl Inner {51 /// # Safety52 /// `is_utf8` should only be set if data is really checked to be utf853 /// # Panics54 /// If data is larger than 4GB55 // we allocate with correct alignment56 #[allow(clippy::cast_ptr_alignment)]57 unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {58 // SAFETY:59 // - layout has non-zero size, and correct align60 // - data is written right after allocation61 // - new allocation can't overlap with passed slice62 unsafe {63 let data: *mut InnerHeader = alloc::alloc(Layout::from_size_align_unchecked(64 mem::size_of::<InnerHeader>() + bytes.len(),65 mem::align_of::<InnerHeader>(),66 ))67 .cast();68 assert!(!data.is_null());69 *data = InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);70 ptr::copy_nonoverlapping(bytes.as_ptr(), data.offset(1).cast::<u8>(), bytes.len());71 Self(UnsafeCell::new(NonNull::new_unchecked(data)))72 }73 }74 pub fn new_bytes(bytes: &[u8]) -> Self {75 // SAFETY: is_utf8 is not set76 unsafe { Self::new_raw(bytes, false) }77 }78 #[allow(dead_code)]79 pub fn new_str(str: &str) -> Self {80 // SAFETY: strings always utf881 unsafe { Self::new_raw(str.as_bytes(), true) }82 }8384 // `slice::from_raw_parts` is not yet stabilized85 #[allow(clippy::missing_const_for_fn)]86 pub fn as_slice(&self) -> &[u8] {87 let header = Self::header(self);88 // SAFETY: data is not null, and it is correctly initialized89 let size = unsafe { (*header).size };90 // SAFETY: bytes after data is allocated to be exactly data.size in length91 unsafe {92 slice::from_raw_parts(93 (*self.0.get()).as_ptr().offset(1).cast::<u8>(),94 size as usize,95 )96 }97 }9899 /// # Safety100 /// Data should be checked to be utf8 via [`check_utf8`] first101 pub unsafe fn as_str_unchecked(&self) -> &str {102 // SAFETY: data is checked103 unsafe { str::from_utf8_unchecked(self.as_slice()) }104 }105106 /// Check data to be utf-8107 ///108 /// Positive results are cached109 pub fn check_utf8(this: &Self) -> bool {110 let header = Self::header_mut(this);111 // SAFETY: header is initialized112 if unsafe { (*header).is_utf8() } {113 return true;114 }115116 if str::from_utf8(this.as_slice()).is_ok() {117 // SAFETY: header is initialized118 unsafe { (*header).set_is_utf8() };119 true120 } else {121 false122 }123 }124125 /// Marks data as utf-8126 ///127 /// # Safety128 /// data should be really utf-8129 pub unsafe fn assume_utf8(this: &Self) {130 let header = Self::header_mut(this);131 // SAFETY: header is correct132 unsafe { (*header).set_is_utf8() }133 }134135 fn header(this: &Self) -> *const InnerHeader {136 // Safety: in `new`, we allocate with correct alignment137 unsafe { (*this.0.get()).as_ptr() }138 }139 fn header_mut(this: &Self) -> *mut InnerHeader {140 // Safety: in `new`, we allocate with correct alignment141 unsafe { (*this.0.get()).as_mut() }142 }143144 fn clone(this: &Self) -> Self {145 let header = Self::header_mut(this);146 // SAFETY: header is initialized147 unsafe {148 let refcnt = (*header).refcnt() + 1;149 (*header).set_refcnt(refcnt);150 Self(UnsafeCell::new(*this.0.get()))151 }152 }153154 pub fn ptr_eq(a: &Self, b: &Self) -> bool {155 Self::as_ptr(a) == Self::as_ptr(b)156 }157 pub fn as_ptr(this: &Self) -> *const u8 {158 // SAFETY: data is initialized159 unsafe { (*this.0.get()).as_ptr().offset(1).cast() }160 }161162 pub fn strong_count(this: &Self) -> u32 {163 let header = Self::header(this);164 // SAFETY: header is initialized165 unsafe { (*header).refcnt() }166 }167}168169impl Clone for Inner {170 fn clone(&self) -> Self {171 Self::clone(self)172 }173}174175impl Drop for Inner {176 fn drop(&mut self) {177 #[cold]178 #[inline(never)]179 fn dealloc(val: &Inner) {180 let header = Inner::header_mut(val);181 // Safety: Data is valid yet182 let size = unsafe { (*header).size as usize };183 // SAFETY: size is correct, layout is valid, data will not be used after this, as refcn == 0184 unsafe {185 alloc::dealloc(186 header.cast(),187 Layout::from_size_align_unchecked(188 mem::size_of::<InnerHeader>() + size,189 mem::align_of::<InnerHeader>(),190 ),191 );192 }193 }194 let header = Self::header_mut(self);195 // SAFETY: header is initialized196 let refcnt = unsafe {197 let refcnt = (*header).refcnt() - 1;198 (*header).set_refcnt(refcnt);199 refcnt200 };201 if refcnt == 0 {202 dealloc(self);203 }204 }205}206207impl PartialEq for Inner {208 fn eq(&self, other: &Self) -> bool {209 Self::as_ptr(self) == Self::as_ptr(other) || self.as_slice().eq(other.as_slice())210 }211}212impl Hash for Inner {213 fn hash<H: Hasher>(&self, state: &mut H) {214 self.as_slice().hash(state);215 }216}217impl Eq for Inner {}218impl PartialOrd for Inner {219 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {220 self.as_slice().partial_cmp(other.as_slice())221 }222}223impl Ord for Inner {224 fn cmp(&self, other: &Self) -> cmp::Ordering {225 self.as_slice().cmp(other.as_slice())226 }227}228229impl Borrow<[u8]> for Inner {230 fn borrow(&self) -> &[u8] {231 self.as_slice()232 }233}crates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth--- a/crates/jrsonnet-interner/src/lib.rs
+++ b/crates/jrsonnet-interner/src/lib.rs
@@ -174,6 +174,7 @@
}
// First reference - current object, second - POOL
if Inner::strong_count(&self.0) <= 2 {
+ eprintln!("unpool");
unpool(&self.0);
}
}
@@ -274,3 +275,16 @@
// SAFETY: Rust strings always utf8
unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }
}
+
+#[cfg(test)]
+mod tests {
+ use crate::IStr;
+
+ #[test]
+ fn simple() {
+ let a = IStr::from("a");
+ let b = IStr::from("a");
+
+ assert_eq!(a.as_ptr(), b.as_ptr());
+ }
+}