difftreelog
fix(interner) data should be in UnsafeCell
in: master
2 files changed
crates/jrsonnet-interner/src/inner.rsdiffbeforeafterboth1use std::{2 alloc::{self, Layout},3 borrow::Borrow,4 cmp,5 hash::{Hash, Hasher},6 mem,7 ptr::{self, NonNull},8 slice, str,9};1011const UTF8_MASK: u32 = 1 << 31;12const REFCNT_MASK: u32 = !UTF8_MASK;1314#[repr(C)]15struct InnerHeader {16 size: u32,17 // MSB is checked utf8 flag, rest - refcnt18 utf8_refcnt: u32,19}20impl InnerHeader {21 const fn new(size: u32, is_utf8: bool) -> Self {22 Self {23 size,24 utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),25 }26 }2728 const fn refcnt(&self) -> u32 {29 self.utf8_refcnt & REFCNT_MASK30 }31 const fn is_utf8(&self) -> bool {32 self.utf8_refcnt & UTF8_MASK != 033 }3435 fn set_refcnt(&mut self, cnt: u32) {36 assert_eq!(cnt & UTF8_MASK, 0);37 // Reset all bits expect last38 self.utf8_refcnt &= UTF8_MASK;39 // Store refcnt40 self.utf8_refcnt |= cnt;41 }42 fn set_is_utf8(&mut self) {43 self.utf8_refcnt |= UTF8_MASK;44 }45}4647/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST48pub struct Inner(NonNull<u8>);49impl Inner {50 /// # Safety51 /// `is_utf8` should only be set if data is really checked to be utf852 /// # Panics53 /// If data is larger than 4GB54 // we allocate with correct alignment55 #[allow(clippy::cast_ptr_alignment)]56 unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {57 // SAFETY:58 // - layout has non-zero size, and correct align59 // - data is written right after allocation60 // - new allocation can't overlap with passed slice61 unsafe {62 let data = alloc::alloc(Layout::from_size_align_unchecked(63 mem::size_of::<InnerHeader>() + bytes.len(),64 mem::align_of::<InnerHeader>(),65 ));66 assert!(!data.is_null());67 *data.cast::<InnerHeader>() =68 InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);69 ptr::copy_nonoverlapping(70 bytes.as_ptr(),71 data.add(mem::size_of::<InnerHeader>()),72 bytes.len(),73 );74 Self(NonNull::new_unchecked(data))75 }76 }77 pub fn new_bytes(bytes: &[u8]) -> Self {78 // SAFETY: is_utf8 is not set79 unsafe { Self::new_raw(bytes, false) }80 }81 #[allow(dead_code)]82 pub fn new_str(str: &str) -> Self {83 // SAFETY: strings always utf884 unsafe { Self::new_raw(str.as_bytes(), true) }85 }8687 // `slice::from_raw_parts` is not yet stabilized88 #[allow(clippy::missing_const_for_fn)]89 pub fn as_slice(&self) -> &[u8] {90 let header = Self::header(self);91 // SAFETY: data is not null, and it is correctly initialized92 let size = unsafe { (*header).size };93 // SAFETY: bytes after data is allocated to be exactly data.size in length94 unsafe {95 slice::from_raw_parts(96 self.0.as_ptr().add(mem::size_of::<InnerHeader>()),97 size as usize,98 )99 }100 }101102 /// # Safety103 /// Data should be checked to be utf8 via [`check_utf8`] first104 pub unsafe fn as_str_unchecked(&self) -> &str {105 // SAFETY: data is checked106 unsafe { str::from_utf8_unchecked(self.as_slice()) }107 }108109 /// Check data to be utf-8110 ///111 /// Positive results are cached112 pub fn check_utf8(this: &Self) -> bool {113 let header = Self::header_mut(this);114 // SAFETY: header is initialized115 if unsafe { (*header).is_utf8() } {116 return true;117 }118119 if str::from_utf8(this.as_slice()).is_ok() {120 // SAFETY: header is initialized121 unsafe { (*header).set_is_utf8() };122 true123 } else {124 false125 }126 }127128 /// Marks data as utf-8129 ///130 /// # Safety131 /// data should be really utf-8132 pub unsafe fn assume_utf8(this: &Self) {133 let header = Self::header_mut(this);134 // SAFETY: header is correct135 unsafe { (*header).set_is_utf8() }136 }137138 const fn header(this: &Self) -> *const InnerHeader {139 // in `new`, we allocate with correct alignment140 #![allow(clippy::cast_ptr_alignment)]141 this.0.as_ptr() as *const InnerHeader142 }143 const fn header_mut(this: &Self) -> *mut InnerHeader {144 // in `new`, we allocate with correct alignment145 #![allow(clippy::cast_ptr_alignment)]146 this.0.as_ptr().cast::<InnerHeader>()147 }148149 fn clone(this: &Self) -> Self {150 let header = Self::header_mut(this);151 // SAFETY: header is initialized152 unsafe {153 let refcnt = (*header).refcnt() + 1;154 (*header).set_refcnt(refcnt);155 }156 Self(this.0)157 }158159 pub fn ptr_eq(a: &Self, b: &Self) -> bool {160 a.0 == b.0161 }162 pub const fn as_ptr(this: &Self) -> *const u8 {163 // SAFETY: data is initialized164 unsafe { this.0.as_ptr().add(mem::size_of::<InnerHeader>()) }165 }166167 pub const fn strong_count(this: &Self) -> u32 {168 let header = Self::header(this);169 // SAFETY: header is initialized170 unsafe { (*header).refcnt() }171 }172}173174impl Clone for Inner {175 fn clone(&self) -> Self {176 Self::clone(self)177 }178}179180impl Drop for Inner {181 fn drop(&mut self) {182 #[cold]183 #[inline(never)]184 fn dealloc(val: &Inner) {185 let header = Inner::header_mut(val);186 // SAFETY: size is correct, layout is valid187 unsafe {188 alloc::dealloc(189 val.0.as_ptr(),190 Layout::from_size_align_unchecked(191 mem::size_of::<InnerHeader>() + (*header).size as usize,192 mem::align_of::<InnerHeader>(),193 ),194 );195 }196 }197 let header = Self::header_mut(self);198 // SAFETY: header is initialized199 let refcnt = unsafe {200 let refcnt = (*header).refcnt() - 1;201 (*header).set_refcnt(refcnt);202 refcnt203 };204 if refcnt == 0 {205 dealloc(self);206 }207 }208}209210impl PartialEq for Inner {211 fn eq(&self, other: &Self) -> bool {212 self.0 == other.0 || self.as_slice().eq(other.as_slice())213 }214}215impl Hash for Inner {216 fn hash<H: Hasher>(&self, state: &mut H) {217 self.as_slice().hash(state);218 }219}220impl Eq for Inner {}221impl PartialOrd for Inner {222 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {223 self.as_slice().partial_cmp(other.as_slice())224 }225}226impl Ord for Inner {227 fn cmp(&self, other: &Self) -> cmp::Ordering {228 self.as_slice().cmp(other.as_slice())229 }230}231232impl Borrow<[u8]> for Inner {233 fn borrow(&self) -> &[u8] {234 self.as_slice()235 }236}1use std::{2 alloc::{self, Layout},3 borrow::Borrow,4 cell::UnsafeCell,5 cmp,6 hash::{Hash, Hasher},7 mem,8 ptr::{self, NonNull},9 slice, str,10};1112const UTF8_MASK: u32 = 1 << 31;13const REFCNT_MASK: u32 = !UTF8_MASK;1415#[repr(C)]16struct InnerHeader {17 size: u32,18 // MSB is checked utf8 flag, rest - refcnt19 utf8_refcnt: u32,20}21impl InnerHeader {22 const fn new(size: u32, is_utf8: bool) -> Self {23 Self {24 size,25 utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),26 }27 }2829 const fn refcnt(&self) -> u32 {30 self.utf8_refcnt & REFCNT_MASK31 }32 const fn is_utf8(&self) -> bool {33 self.utf8_refcnt & UTF8_MASK != 034 }3536 fn set_refcnt(&mut self, cnt: u32) {37 assert_eq!(cnt & UTF8_MASK, 0);38 // Reset all bits expect last39 self.utf8_refcnt &= UTF8_MASK;40 // Store refcnt41 self.utf8_refcnt |= cnt;42 }43 fn set_is_utf8(&mut self) {44 self.utf8_refcnt |= UTF8_MASK;45 }46}4748/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST49pub struct Inner(UnsafeCell<NonNull<InnerHeader>>);50impl Inner {51 /// # Safety52 /// `is_utf8` should only be set if data is really checked to be utf853 /// # Panics54 /// If data is larger than 4GB55 // we allocate with correct alignment56 #[allow(clippy::cast_ptr_alignment)]57 unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {58 // SAFETY:59 // - layout has non-zero size, and correct align60 // - data is written right after allocation61 // - new allocation can't overlap with passed slice62 unsafe {63 let data: *mut InnerHeader = alloc::alloc(Layout::from_size_align_unchecked(64 mem::size_of::<InnerHeader>() + bytes.len(),65 mem::align_of::<InnerHeader>(),66 ))67 .cast();68 assert!(!data.is_null());69 *data = InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);70 ptr::copy_nonoverlapping(bytes.as_ptr(), data.offset(1).cast::<u8>(), bytes.len());71 Self(UnsafeCell::new(NonNull::new_unchecked(data)))72 }73 }74 pub fn new_bytes(bytes: &[u8]) -> Self {75 // SAFETY: is_utf8 is not set76 unsafe { Self::new_raw(bytes, false) }77 }78 #[allow(dead_code)]79 pub fn new_str(str: &str) -> Self {80 // SAFETY: strings always utf881 unsafe { Self::new_raw(str.as_bytes(), true) }82 }8384 // `slice::from_raw_parts` is not yet stabilized85 #[allow(clippy::missing_const_for_fn)]86 pub fn as_slice(&self) -> &[u8] {87 let header = Self::header(self);88 // SAFETY: data is not null, and it is correctly initialized89 let size = unsafe { (*header).size };90 // SAFETY: bytes after data is allocated to be exactly data.size in length91 unsafe {92 slice::from_raw_parts(93 (*self.0.get()).as_ptr().offset(1).cast::<u8>(),94 size as usize,95 )96 }97 }9899 /// # Safety100 /// Data should be checked to be utf8 via [`check_utf8`] first101 pub unsafe fn as_str_unchecked(&self) -> &str {102 // SAFETY: data is checked103 unsafe { str::from_utf8_unchecked(self.as_slice()) }104 }105106 /// Check data to be utf-8107 ///108 /// Positive results are cached109 pub fn check_utf8(this: &Self) -> bool {110 let header = Self::header_mut(this);111 // SAFETY: header is initialized112 if unsafe { (*header).is_utf8() } {113 return true;114 }115116 if str::from_utf8(this.as_slice()).is_ok() {117 // SAFETY: header is initialized118 unsafe { (*header).set_is_utf8() };119 true120 } else {121 false122 }123 }124125 /// Marks data as utf-8126 ///127 /// # Safety128 /// data should be really utf-8129 pub unsafe fn assume_utf8(this: &Self) {130 let header = Self::header_mut(this);131 // SAFETY: header is correct132 unsafe { (*header).set_is_utf8() }133 }134135 fn header(this: &Self) -> *const InnerHeader {136 // Safety: in `new`, we allocate with correct alignment137 unsafe { (*this.0.get()).as_ptr() }138 }139 fn header_mut(this: &Self) -> *mut InnerHeader {140 // Safety: in `new`, we allocate with correct alignment141 unsafe { (*this.0.get()).as_mut() }142 }143144 fn clone(this: &Self) -> Self {145 let header = Self::header_mut(this);146 // SAFETY: header is initialized147 unsafe {148 let refcnt = (*header).refcnt() + 1;149 (*header).set_refcnt(refcnt);150 Self(UnsafeCell::new(*this.0.get()))151 }152 }153154 pub fn ptr_eq(a: &Self, b: &Self) -> bool {155 Self::as_ptr(a) == Self::as_ptr(b)156 }157 pub fn as_ptr(this: &Self) -> *const u8 {158 // SAFETY: data is initialized159 unsafe { (*this.0.get()).as_ptr().offset(1).cast() }160 }161162 pub fn strong_count(this: &Self) -> u32 {163 let header = Self::header(this);164 // SAFETY: header is initialized165 unsafe { (*header).refcnt() }166 }167}168169impl Clone for Inner {170 fn clone(&self) -> Self {171 Self::clone(self)172 }173}174175impl Drop for Inner {176 fn drop(&mut self) {177 #[cold]178 #[inline(never)]179 fn dealloc(val: &Inner) {180 let header = Inner::header_mut(val);181 // Safety: Data is valid yet182 let size = unsafe { (*header).size as usize };183 // SAFETY: size is correct, layout is valid, data will not be used after this, as refcn == 0184 unsafe {185 alloc::dealloc(186 header.cast(),187 Layout::from_size_align_unchecked(188 mem::size_of::<InnerHeader>() + size,189 mem::align_of::<InnerHeader>(),190 ),191 );192 }193 }194 let header = Self::header_mut(self);195 // SAFETY: header is initialized196 let refcnt = unsafe {197 let refcnt = (*header).refcnt() - 1;198 (*header).set_refcnt(refcnt);199 refcnt200 };201 if refcnt == 0 {202 dealloc(self);203 }204 }205}206207impl PartialEq for Inner {208 fn eq(&self, other: &Self) -> bool {209 Self::as_ptr(self) == Self::as_ptr(other) || self.as_slice().eq(other.as_slice())210 }211}212impl Hash for Inner {213 fn hash<H: Hasher>(&self, state: &mut H) {214 self.as_slice().hash(state);215 }216}217impl Eq for Inner {}218impl PartialOrd for Inner {219 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {220 self.as_slice().partial_cmp(other.as_slice())221 }222}223impl Ord for Inner {224 fn cmp(&self, other: &Self) -> cmp::Ordering {225 self.as_slice().cmp(other.as_slice())226 }227}228229impl Borrow<[u8]> for Inner {230 fn borrow(&self) -> &[u8] {231 self.as_slice()232 }233}crates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth--- a/crates/jrsonnet-interner/src/lib.rs
+++ b/crates/jrsonnet-interner/src/lib.rs
@@ -174,6 +174,7 @@
}
// First reference - current object, second - POOL
if Inner::strong_count(&self.0) <= 2 {
+ eprintln!("unpool");
unpool(&self.0);
}
}
@@ -274,3 +275,16 @@
// SAFETY: Rust strings always utf8
unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }
}
+
+#[cfg(test)]
+mod tests {
+ use crate::IStr;
+
+ #[test]
+ fn simple() {
+ let a = IStr::from("a");
+ let b = IStr::from("a");
+
+ assert_eq!(a.as_ptr(), b.as_ptr());
+ }
+}