difftreelog
perf specialized Rc for interner
in: master
3 files changed
crates/jrsonnet-interner/Cargo.tomldiffbeforeafterboth--- a/crates/jrsonnet-interner/Cargo.toml
+++ b/crates/jrsonnet-interner/Cargo.toml
@@ -6,7 +6,12 @@
license = "MIT"
edition = "2021"
+[features]
+default = ["serde"]
+serde = ["dep:serde"]
+
[dependencies]
-serde = { version = "1.0" }
+serde = { version = "1.0", optional = true }
rustc-hash = "1.1"
gcmodule = { git = "https://github.com/CertainLach/gcmodule", branch = "jrsonnet" }
+hashbrown = { version = "0.12.1", features = ["inline-more"] }
crates/jrsonnet-interner/src/inner.rsdiffbeforeafterboth--- /dev/null
+++ b/crates/jrsonnet-interner/src/inner.rs
@@ -0,0 +1,234 @@
+use std::{
+ alloc::{self, Layout},
+ borrow::Borrow,
+ cmp,
+ hash::{Hash, Hasher},
+ mem,
+ ptr::{self, NonNull},
+ slice, str,
+};
+
+const UTF8_MASK: u32 = 1 << 31;
+const REFCNT_MASK: u32 = !UTF8_MASK;
+
+#[repr(C)]
+struct InnerHeader {
+ size: u32,
+ // MSB is checked utf8 flag, rest - refcnt
+ utf8_refcnt: u32,
+}
+impl InnerHeader {
+ const fn new(size: u32, is_utf8: bool) -> Self {
+ Self {
+ size,
+ utf8_refcnt: 1 | (if is_utf8 { UTF8_MASK } else { 0 }),
+ }
+ }
+
+ const fn refcnt(&self) -> u32 {
+ self.utf8_refcnt & REFCNT_MASK
+ }
+ const fn is_utf8(&self) -> bool {
+ self.utf8_refcnt & UTF8_MASK != 0
+ }
+
+ fn set_refcnt(&mut self, cnt: u32) {
+ assert_eq!(cnt & UTF8_MASK, 0);
+ // Reset all bits expect last
+ self.utf8_refcnt &= UTF8_MASK;
+ // Store refcnt
+ self.utf8_refcnt |= cnt;
+ }
+ fn set_is_utf8(&mut self) {
+ self.utf8_refcnt |= UTF8_MASK;
+ }
+}
+
+/// Similar to Rc<[u8]>, but stores all data (refcnt, size) inline, instead of being DST
+pub struct Inner(NonNull<u8>);
+impl Inner {
+ /// # Safety
+ /// `is_utf8` should only be set if data is really checked to be utf8
+ /// # Panics
+ /// If data is larger than 4GB
+ // we allocate with correct alignment
+ #[allow(clippy::cast_ptr_alignment)]
+ unsafe fn new_raw(bytes: &[u8], is_utf8: bool) -> Self {
+ // SAFETY:
+ // - layout has non-zero size, and correct align
+ // - data is written right after allocation
+ // - new allocation can't overlap with passed slice
+ unsafe {
+ let data = alloc::alloc(Layout::from_size_align_unchecked(
+ mem::size_of::<InnerHeader>() + bytes.len(),
+ mem::align_of::<InnerHeader>(),
+ ));
+ assert!(!data.is_null());
+ *data.cast::<InnerHeader>() =
+ InnerHeader::new(bytes.len().try_into().expect("bytes > 4GB"), is_utf8);
+ ptr::copy_nonoverlapping(
+ bytes.as_ptr(),
+ data.add(mem::size_of::<InnerHeader>()),
+ bytes.len(),
+ );
+ Self(NonNull::new_unchecked(data))
+ }
+ }
+ pub fn new_bytes(bytes: &[u8]) -> Self {
+ // SAFETY: is_utf8 is not set
+ unsafe { Self::new_raw(bytes, false) }
+ }
+ #[allow(dead_code)]
+ pub fn new_str(str: &str) -> Self {
+ // SAFETY: strings always utf8
+ unsafe { Self::new_raw(str.as_bytes(), true) }
+ }
+
+ pub fn as_slice(&self) -> &[u8] {
+ let header = Self::header(self);
+ // SAFETY: data is not null, and it is correctly initialized
+ let size = unsafe { (*header).size };
+ // SAFETY: bytes after data is allocated to be exactly data.size in length
+ unsafe {
+ slice::from_raw_parts(
+ self.0.as_ptr().add(mem::size_of::<InnerHeader>()),
+ size as usize,
+ )
+ }
+ }
+
+ /// # Safety
+ /// Data should be checked to be utf8 via [`check_utf8`] first
+ pub unsafe fn as_str_unchecked(&self) -> &str {
+ // SAFETY: data is checked
+ unsafe { str::from_utf8_unchecked(self.as_slice()) }
+ }
+
+ /// Check data to be utf-8
+ ///
+ /// Positive results are cached
+ pub fn check_utf8(this: &Self) -> bool {
+ let header = Self::header_mut(this);
+ // SAFETY: header is initialized
+ if unsafe { (*header).is_utf8() } {
+ return true;
+ }
+
+ if str::from_utf8(this.as_slice()).is_ok() {
+ // SAFETY: header is initialized
+ unsafe { (*header).set_is_utf8() };
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Marks data as utf-8
+ ///
+ /// # Safety
+ /// data should be really utf-8
+ pub unsafe fn assume_utf8(this: &Self) {
+ let header = Self::header_mut(this);
+ // SAFETY: header is correct
+ unsafe { (*header).set_is_utf8() }
+ }
+
+ const fn header(this: &Self) -> *const InnerHeader {
+ // in `new`, we allocate with correct alignment
+ #![allow(clippy::cast_ptr_alignment)]
+ this.0.as_ptr() as *const InnerHeader
+ }
+ const fn header_mut(this: &Self) -> *mut InnerHeader {
+ // in `new`, we allocate with correct alignment
+ #![allow(clippy::cast_ptr_alignment)]
+ this.0.as_ptr().cast::<InnerHeader>()
+ }
+
+ fn clone(this: &Self) -> Self {
+ let header = Self::header_mut(this);
+ // SAFETY: header is initialized
+ unsafe {
+ let refcnt = (*header).refcnt() + 1;
+ (*header).set_refcnt(refcnt);
+ }
+ Self(this.0)
+ }
+
+ pub fn ptr_eq(a: &Self, b: &Self) -> bool {
+ a.0 == b.0
+ }
+ pub fn as_ptr(this: &Self) -> *const u8 {
+ // SAFETY: data is initialized
+ unsafe { this.0.as_ptr().add(mem::size_of::<InnerHeader>()) }
+ }
+
+ pub const fn strong_count(this: &Self) -> u32 {
+ let header = Self::header(this);
+ // SAFETY: header is initialized
+ unsafe { (*header).refcnt() }
+ }
+}
+
+impl Clone for Inner {
+ fn clone(&self) -> Self {
+ Self::clone(self)
+ }
+}
+
+impl Drop for Inner {
+ fn drop(&mut self) {
+ #[cold]
+ #[inline(never)]
+ fn dealloc(val: &Inner) {
+ let header = Inner::header_mut(val);
+ // SAFETY: size is correct, layout is valid
+ unsafe {
+ alloc::dealloc(
+ val.0.as_ptr(),
+ Layout::from_size_align_unchecked(
+ mem::size_of::<InnerHeader>() + (*header).size as usize,
+ mem::align_of::<InnerHeader>(),
+ ),
+ );
+ }
+ }
+ let header = Self::header_mut(self);
+ // SAFETY: header is initialized
+ let refcnt = unsafe {
+ let refcnt = (*header).refcnt() - 1;
+ (*header).set_refcnt(refcnt);
+ refcnt
+ };
+ if refcnt == 0 {
+ dealloc(self);
+ }
+ }
+}
+
+impl PartialEq for Inner {
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0 || self.as_slice().eq(other.as_slice())
+ }
+}
+impl Hash for Inner {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.as_slice().hash(state);
+ }
+}
+impl Eq for Inner {}
+impl PartialOrd for Inner {
+ fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+ self.as_slice().partial_cmp(other.as_slice())
+ }
+}
+impl Ord for Inner {
+ fn cmp(&self, other: &Self) -> cmp::Ordering {
+ self.as_slice().cmp(other.as_slice())
+ }
+}
+
+impl Borrow<[u8]> for Inner {
+ fn borrow(&self) -> &[u8] {
+ self.as_slice()
+ }
+}
crates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth1use std::{2 borrow::Cow,3 cell::RefCell,4 convert::TryFrom,5 fmt::{self, Display},6 hash::{BuildHasherDefault, Hash, Hasher},7 ops::Deref,8 rc::Rc,9 str::Utf8Error,10};1112use gcmodule::Trace;13use rustc_hash::FxHashMap;14use serde::{Deserialize, Serialize};1516#[derive(Clone, PartialOrd, Ord, Eq)]17pub struct IStr(Rc<str>);18impl Trace for IStr {19 fn is_type_tracked() -> bool {20 false21 }22}2324impl Deref for IStr {25 type Target = str;2627 fn deref(&self) -> &Self::Target {28 &self.029 }30}3132impl PartialEq for IStr {33 fn eq(&self, other: &Self) -> bool {34 // It is ok, since all IStr should be inlined into same pool35 Rc::ptr_eq(&self.0, &other.0)36 }37}3839impl PartialEq<str> for IStr {40 fn eq(&self, other: &str) -> bool {41 &self.0 as &str == other42 }43}4445impl Hash for IStr {46 fn hash<H: Hasher>(&self, state: &mut H) {47 state.write_usize(Rc::as_ptr(&self.0) as *const () as usize)48 }49}5051impl Drop for IStr {52 fn drop(&mut self) {53 // First reference - current object, second - POOL54 if Rc::strong_count(&self.0) <= 2 {55 let _result = STR_POOL.try_with(|pool| pool.borrow_mut().remove(&self.0));56 }57 }58}5960impl fmt::Debug for IStr {61 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {62 write!(f, "{:?}", &self.0)63 }64}6566impl Display for IStr {67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {68 f.write_str(&self.0)69 }70}7172thread_local! {73 static STR_POOL: RefCell<FxHashMap<Rc<str>, ()>> = RefCell::new(FxHashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));74}7576impl From<&str> for IStr {77 fn from(str: &str) -> Self {78 IStr(STR_POOL.with(|pool| {79 let mut pool = pool.borrow_mut();80 if let Some((k, _)) = pool.get_key_value(str) {81 k.clone()82 } else {83 let rc: Rc<str> = str.into();84 pool.insert(rc.clone(), ());85 rc86 }87 }))88 }89}9091impl TryFrom<&[u8]> for IStr {92 type Error = Utf8Error;9394 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {95 let str = std::str::from_utf8(value)?;96 Ok(str.into())97 }98}99100impl From<String> for IStr {101 fn from(str: String) -> Self {102 (&str as &str).into()103 }104}105106impl<'i> From<Cow<'i, str>> for IStr {107 fn from(c: Cow<'i, str>) -> Self {108 (&c as &str).into()109 }110}111112impl Serialize for IStr {113 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>114 where115 S: serde::Serializer,116 {117 (&self.0 as &str).serialize(serializer)118 }119}120121impl<'de> Deserialize<'de> for IStr {122 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>123 where124 D: serde::Deserializer<'de>,125 {126 let s = <&str>::deserialize(deserializer)?;127 Ok(s.into())128 }129}