difftreelog
fix(interner) data should be in UnsafeCell
in: master
2 files changed
crates/jrsonnet-interner/src/inner.rsdiffbeforeafterboth--- a/crates/jrsonnet-interner/src/inner.rs
+++ b/crates/jrsonnet-interner/src/inner.rs
@@ -1,6 +1,7 @@
use std::{
alloc::{self, Layout},
borrow::Borrow,
+ cell::UnsafeCell,
cmp,
hash::{Hash, Hasher},
mem,
@@ -45,7 +46,7 @@
}
/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST
-pub struct Inner(NonNull<u8>);
+pub struct Inner(UnsafeCell<NonNull<InnerHeader>>);
impl Inner {
/// # Safety
/// `is_utf8` should only be set if data is really checked to be utf8
@@ -59,19 +60,15 @@
// - data is written right after allocation
// - new allocation can't overlap with passed slice
unsafe {
- let data = alloc::alloc(Layout::from_size_align_unchecked(
+ let data: *mut InnerHeader = alloc::alloc(Layout::from_size_align_unchecked(
mem::size_of::<InnerHeader>() + bytes.len(),
mem::align_of::<InnerHeader>(),
- ));
+ ))
+ .cast();
assert!(!data.is_null());
- *data.cast::<InnerHeader>() =
- InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);
- ptr::copy_nonoverlapping(
- bytes.as_ptr(),
- data.add(mem::size_of::<InnerHeader>()),
- bytes.len(),
- );
- Self(NonNull::new_unchecked(data))
+ *data = InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);
+ ptr::copy_nonoverlapping(bytes.as_ptr(), data.offset(1).cast::<u8>(), bytes.len());
+ Self(UnsafeCell::new(NonNull::new_unchecked(data)))
}
}
pub fn new_bytes(bytes: &[u8]) -> Self {
@@ -93,7 +90,7 @@
// SAFETY: bytes after data is allocated to be exactly data.size in length
unsafe {
slice::from_raw_parts(
- self.0.as_ptr().add(mem::size_of::<InnerHeader>()),
+ (*self.0.get()).as_ptr().offset(1).cast::<u8>(),
size as usize,
)
}
@@ -135,15 +132,13 @@
unsafe { (*header).set_is_utf8() }
}
- const fn header(this: &Self) -> *const InnerHeader {
- // in `new`, we allocate with correct alignment
- #![allow(clippy::cast_ptr_alignment)]
- this.0.as_ptr() as *const InnerHeader
+ fn header(this: &Self) -> *const InnerHeader {
+ // Safety: in `new`, we allocate with correct alignment
+ unsafe { (*this.0.get()).as_ptr() }
}
- const fn header_mut(this: &Self) -> *mut InnerHeader {
- // in `new`, we allocate with correct alignment
- #![allow(clippy::cast_ptr_alignment)]
- this.0.as_ptr().cast::<InnerHeader>()
+ fn header_mut(this: &Self) -> *mut InnerHeader {
+ // Safety: in `new`, we allocate with correct alignment
+ unsafe { (*this.0.get()).as_mut() }
}
fn clone(this: &Self) -> Self {
@@ -152,19 +147,19 @@
unsafe {
let refcnt = (*header).refcnt() + 1;
(*header).set_refcnt(refcnt);
+ Self(UnsafeCell::new(*this.0.get()))
}
- Self(this.0)
}
pub fn ptr_eq(a: &Self, b: &Self) -> bool {
- a.0 == b.0
+ Self::as_ptr(a) == Self::as_ptr(b)
}
- pub const fn as_ptr(this: &Self) -> *const u8 {
+ pub fn as_ptr(this: &Self) -> *const u8 {
// SAFETY: data is initialized
- unsafe { this.0.as_ptr().add(mem::size_of::<InnerHeader>()) }
+ unsafe { (*this.0.get()).as_ptr().offset(1).cast() }
}
- pub const fn strong_count(this: &Self) -> u32 {
+ pub fn strong_count(this: &Self) -> u32 {
let header = Self::header(this);
// SAFETY: header is initialized
unsafe { (*header).refcnt() }
@@ -183,12 +178,14 @@
#[inline(never)]
fn dealloc(val: &Inner) {
let header = Inner::header_mut(val);
- // SAFETY: size is correct, layout is valid
+ // Safety: Data is valid yet
+ let size = unsafe { (*header).size as usize };
+ // SAFETY: size is correct, layout is valid, data will not be used after this, as refcn == 0
unsafe {
alloc::dealloc(
- val.0.as_ptr(),
+ header.cast(),
Layout::from_size_align_unchecked(
- mem::size_of::<InnerHeader>() + (*header).size as usize,
+ mem::size_of::<InnerHeader>() + size,
mem::align_of::<InnerHeader>(),
),
);
@@ -209,7 +206,7 @@
impl PartialEq for Inner {
fn eq(&self, other: &Self) -> bool {
- self.0 == other.0 || self.as_slice().eq(other.as_slice())
+ Self::as_ptr(self) == Self::as_ptr(other) || self.as_slice().eq(other.as_slice())
}
}
impl Hash for Inner {
crates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth1#![deny(2 unsafe_op_in_unsafe_fn,3 clippy::missing_safety_doc,4 clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9 borrow::Cow,10 cell::RefCell,11 fmt::{self, Display},12 hash::{BuildHasherDefault, Hash, Hasher},13 ops::Deref,14 str,15};1617use hashbrown::HashMap;18use jrsonnet_gcmodule::Trace;19use rustc_hash::FxHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30 fn is_type_tracked() -> bool {31 false32 }33}3435impl IStr {36 #[must_use]37 pub fn empty() -> Self {38 "".into()39 }40 #[must_use]41 pub fn as_str(&self) -> &str {42 self as &str43 }4445 #[must_use]46 pub fn cast_bytes(self) -> IBytes {47 IBytes(self.0.clone())48 }49}5051impl Deref for IStr {52 type Target = str;5354 fn deref(&self) -> &Self::Target {55 // SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-856 unsafe { self.0.as_str_unchecked() }57 }58}5960impl PartialEq for IStr {61 fn eq(&self, other: &Self) -> bool {62 // all IStr should be inlined into same pool63 Inner::ptr_eq(&self.0, &other.0)64 }65}6667impl PartialEq<str> for IStr {68 fn eq(&self, other: &str) -> bool {69 self as &str == other70 }71}7273impl Hash for IStr {74 fn hash<H: Hasher>(&self, state: &mut H) {75 // IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address76 state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);77 }78}7980impl Drop for IStr {81 fn drop(&mut self) {82 #[cold]83 #[inline(never)]84 fn unpool(inner: &Inner) {85 // May fail on program termination86 let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));87 if res.is_ok() {88 debug_assert_eq!(Inner::strong_count(inner), 1);89 }90 }91 // First reference - current object, second - POOL92 if Inner::strong_count(&self.0) <= 2 {93 unpool(&self.0);94 }95 }96}9798impl fmt::Debug for IStr {99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {100 fmt::Debug::fmt(self as &str, f)101 }102}103104impl Display for IStr {105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {106 fmt::Display::fmt(self as &str, f)107 }108}109110/// Interned byte array111#[derive(Clone, PartialOrd, Ord, Eq)]112pub struct IBytes(Inner);113impl Trace for IBytes {114 fn is_type_tracked() -> bool {115 false116 }117}118119impl IBytes {120 #[must_use]121 pub fn cast_str(self) -> Option<IStr> {122 if Inner::check_utf8(&self.0) {123 Some(IStr(self.0.clone()))124 } else {125 None126 }127 }128 /// # Safety129 /// data should be valid utf8130 unsafe fn cast_str_unchecked(self) -> IStr {131 // SAFETY: data is utf8132 unsafe { Inner::assume_utf8(&self.0) };133 IStr(self.0.clone())134 }135136 #[must_use]137 pub fn as_slice(&self) -> &[u8] {138 self.0.as_slice()139 }140}141142impl Deref for IBytes {143 type Target = [u8];144145 fn deref(&self) -> &Self::Target {146 self.0.as_slice()147 }148}149150impl PartialEq for IBytes {151 fn eq(&self, other: &Self) -> bool {152 // all IStr should be inlined into same pool153 Inner::ptr_eq(&self.0, &other.0)154 }155}156157impl Hash for IBytes {158 fn hash<H: Hasher>(&self, state: &mut H) {159 // IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address160 state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);161 }162}163164impl Drop for IBytes {165 fn drop(&mut self) {166 #[cold]167 #[inline(never)]168 fn unpool(inner: &Inner) {169 // May fail on program termination170 let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));171 if res.is_ok() {172 debug_assert_eq!(Inner::strong_count(inner), 1);173 }174 }175 // First reference - current object, second - POOL176 if Inner::strong_count(&self.0) <= 2 {177 unpool(&self.0);178 }179 }180}181182impl fmt::Debug for IBytes {183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {184 fmt::Debug::fmt(self as &[u8], f)185 }186}187188impl<'c> From<Cow<'c, str>> for IStr {189 fn from(v: Cow<'c, str>) -> Self {190 intern_str(&v)191 }192}193impl From<&str> for IStr {194 fn from(v: &str) -> Self {195 intern_str(v)196 }197}198impl From<String> for IStr {199 fn from(s: String) -> Self {200 s.as_str().into()201 }202}203impl From<&String> for IStr {204 fn from(s: &String) -> Self {205 s.as_str().into()206 }207}208impl From<&[u8]> for IBytes {209 fn from(v: &[u8]) -> Self {210 intern_bytes(v)211 }212}213214#[cfg(feature = "serde")]215impl serde::Serialize for IStr {216 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>217 where218 S: serde::Serializer,219 {220 self.as_str().serialize(serializer)221 }222}223224#[cfg(feature = "serde")]225impl<'de> serde::Deserialize<'de> for IStr {226 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>227 where228 D: serde::Deserializer<'de>,229 {230 let str = <&str>::deserialize(deserializer)?;231 Ok(intern_str(str))232 }233}234235#[cfg(feature = "structdump")]236impl structdump::Codegen for IStr {237 fn gen_code(238 &self,239 res: &mut structdump::CodegenResult,240 _unique: bool,241 ) -> structdump::TokenStream {242 let s: &str = self;243 res.add_code(244 structdump::quote! {245 structdump_import::IStr::from(#s)246 },247 Some(structdump::quote![structdump_import::IStr]),248 false,249 )250 }251}252253thread_local! {254 static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));255}256257#[must_use]258pub fn intern_bytes(bytes: &[u8]) -> IBytes {259 POOL.with(|pool| {260 let mut pool = pool.borrow_mut();261 let entry = pool.raw_entry_mut().from_key(bytes);262 match entry {263 hashbrown::hash_map::RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),264 hashbrown::hash_map::RawEntryMut::Vacant(e) => {265 let (k, _) = e.insert(Inner::new_bytes(bytes), ());266 IBytes(k.clone())267 }268 }269 })270}271272#[must_use]273pub fn intern_str(str: &str) -> IStr {274 // SAFETY: Rust strings always utf8275 unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }276}1#![deny(2 unsafe_op_in_unsafe_fn,3 clippy::missing_safety_doc,4 clippy::undocumented_unsafe_blocks5)]6#![warn(clippy::pedantic, clippy::nursery)]7#![allow(clippy::missing_const_for_fn)]8use std::{9 borrow::Cow,10 cell::RefCell,11 fmt::{self, Display},12 hash::{BuildHasherDefault, Hash, Hasher},13 ops::Deref,14 str,15};1617use hashbrown::HashMap;18use jrsonnet_gcmodule::Trace;19use rustc_hash::FxHasher;2021mod inner;22use inner::Inner;2324/// Interned string25///26/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]27#[derive(Clone, PartialOrd, Ord, Eq)]28pub struct IStr(Inner);29impl Trace for IStr {30 fn is_type_tracked() -> bool {31 false32 }33}3435impl IStr {36 #[must_use]37 pub fn empty() -> Self {38 "".into()39 }40 #[must_use]41 pub fn as_str(&self) -> &str {42 self as &str43 }4445 #[must_use]46 pub fn cast_bytes(self) -> IBytes {47 IBytes(self.0.clone())48 }49}5051impl Deref for IStr {52 type Target = str;5354 fn deref(&self) -> &Self::Target {55 // SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-856 unsafe { self.0.as_str_unchecked() }57 }58}5960impl PartialEq for IStr {61 fn eq(&self, other: &Self) -> bool {62 // all IStr should be inlined into same pool63 Inner::ptr_eq(&self.0, &other.0)64 }65}6667impl PartialEq<str> for IStr {68 fn eq(&self, other: &str) -> bool {69 self as &str == other70 }71}7273impl Hash for IStr {74 fn hash<H: Hasher>(&self, state: &mut H) {75 // IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address76 state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);77 }78}7980impl Drop for IStr {81 fn drop(&mut self) {82 #[cold]83 #[inline(never)]84 fn unpool(inner: &Inner) {85 // May fail on program termination86 let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));87 if res.is_ok() {88 debug_assert_eq!(Inner::strong_count(inner), 1);89 }90 }91 // First reference - current object, second - POOL92 if Inner::strong_count(&self.0) <= 2 {93 unpool(&self.0);94 }95 }96}9798impl fmt::Debug for IStr {99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {100 fmt::Debug::fmt(self as &str, f)101 }102}103104impl Display for IStr {105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {106 fmt::Display::fmt(self as &str, f)107 }108}109110/// Interned byte array111#[derive(Clone, PartialOrd, Ord, Eq)]112pub struct IBytes(Inner);113impl Trace for IBytes {114 fn is_type_tracked() -> bool {115 false116 }117}118119impl IBytes {120 #[must_use]121 pub fn cast_str(self) -> Option<IStr> {122 if Inner::check_utf8(&self.0) {123 Some(IStr(self.0.clone()))124 } else {125 None126 }127 }128 /// # Safety129 /// data should be valid utf8130 unsafe fn cast_str_unchecked(self) -> IStr {131 // SAFETY: data is utf8132 unsafe { Inner::assume_utf8(&self.0) };133 IStr(self.0.clone())134 }135136 #[must_use]137 pub fn as_slice(&self) -> &[u8] {138 self.0.as_slice()139 }140}141142impl Deref for IBytes {143 type Target = [u8];144145 fn deref(&self) -> &Self::Target {146 self.0.as_slice()147 }148}149150impl PartialEq for IBytes {151 fn eq(&self, other: &Self) -> bool {152 // all IStr should be inlined into same pool153 Inner::ptr_eq(&self.0, &other.0)154 }155}156157impl Hash for IBytes {158 fn hash<H: Hasher>(&self, state: &mut H) {159 // IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address160 state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);161 }162}163164impl Drop for IBytes {165 fn drop(&mut self) {166 #[cold]167 #[inline(never)]168 fn unpool(inner: &Inner) {169 // May fail on program termination170 let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));171 if res.is_ok() {172 debug_assert_eq!(Inner::strong_count(inner), 1);173 }174 }175 // First reference - current object, second - POOL176 if Inner::strong_count(&self.0) <= 2 {177 eprintln!("unpool");178 unpool(&self.0);179 }180 }181}182183impl fmt::Debug for IBytes {184 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {185 fmt::Debug::fmt(self as &[u8], f)186 }187}188189impl<'c> From<Cow<'c, str>> for IStr {190 fn from(v: Cow<'c, str>) -> Self {191 intern_str(&v)192 }193}194impl From<&str> for IStr {195 fn from(v: &str) -> Self {196 intern_str(v)197 }198}199impl From<String> for IStr {200 fn from(s: String) -> Self {201 s.as_str().into()202 }203}204impl From<&String> for IStr {205 fn from(s: &String) -> Self {206 s.as_str().into()207 }208}209impl From<&[u8]> for IBytes {210 fn from(v: &[u8]) -> Self {211 intern_bytes(v)212 }213}214215#[cfg(feature = "serde")]216impl serde::Serialize for IStr {217 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>218 where219 S: serde::Serializer,220 {221 self.as_str().serialize(serializer)222 }223}224225#[cfg(feature = "serde")]226impl<'de> serde::Deserialize<'de> for IStr {227 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>228 where229 D: serde::Deserializer<'de>,230 {231 let str = <&str>::deserialize(deserializer)?;232 Ok(intern_str(str))233 }234}235236#[cfg(feature = "structdump")]237impl structdump::Codegen for IStr {238 fn gen_code(239 &self,240 res: &mut structdump::CodegenResult,241 _unique: bool,242 ) -> structdump::TokenStream {243 let s: &str = self;244 res.add_code(245 structdump::quote! {246 structdump_import::IStr::from(#s)247 },248 Some(structdump::quote![structdump_import::IStr]),249 false,250 )251 }252}253254thread_local! {255 static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));256}257258#[must_use]259pub fn intern_bytes(bytes: &[u8]) -> IBytes {260 POOL.with(|pool| {261 let mut pool = pool.borrow_mut();262 let entry = pool.raw_entry_mut().from_key(bytes);263 match entry {264 hashbrown::hash_map::RawEntryMut::Occupied(i) => IBytes(i.get_key_value().0.clone()),265 hashbrown::hash_map::RawEntryMut::Vacant(e) => {266 let (k, _) = e.insert(Inner::new_bytes(bytes), ());267 IBytes(k.clone())268 }269 }270 })271}272273#[must_use]274pub fn intern_str(str: &str) -> IStr {275 // SAFETY: Rust strings always utf8276 unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }277}278279#[cfg(test)]280mod tests {281 use crate::IStr;282283 #[test]284 fn simple() {285 let a = IStr::from("a");286 let b = IStr::from("a");287288 assert_eq!(a.as_ptr(), b.as_ptr());289 }290}