git.delta.rocks / jrsonnet / refs/commits / c11576e16a8b

difftreelog

perf specialized Rc for interner

Yaroslav Bolyukin2022-05-26parent: #32f6ee5.patch.diff
in: master

3 files changed

modifiedcrates/jrsonnet-interner/Cargo.tomldiffbeforeafterboth
6license = "MIT"6license = "MIT"
7edition = "2021"7edition = "2021"
8
9[features]
10default = ["serde"]
11serde = ["dep:serde"]
812
9[dependencies]13[dependencies]
10serde = { version = "1.0" }14serde = { version = "1.0", optional = true }
11rustc-hash = "1.1"15rustc-hash = "1.1"
12gcmodule = { git = "https://github.com/CertainLach/gcmodule", branch = "jrsonnet" }16gcmodule = { git = "https://github.com/CertainLach/gcmodule", branch = "jrsonnet" }
17hashbrown = { version = "0.12.1", features = ["inline-more"] }
1318
addedcrates/jrsonnet-interner/src/inner.rsdiffbeforeafterboth

no changes

modifiedcrates/jrsonnet-interner/src/lib.rsdiffbeforeafterboth
1#![deny(
2 unsafe_op_in_unsafe_fn,
3 clippy::missing_safety_doc,
4 clippy::undocumented_unsafe_blocks
5)]
6#![warn(clippy::pedantic, clippy::nursery)]
1use std::{7use std::{
2 borrow::Cow,8 borrow::Cow,
3 cell::RefCell,9 cell::RefCell,
4 convert::TryFrom,
5 fmt::{self, Display},10 fmt::{self, Display},
6 hash::{BuildHasherDefault, Hash, Hasher},11 hash::{BuildHasherDefault, Hash, Hasher},
7 ops::Deref,12 ops::Deref,
8 rc::Rc,
9 str::Utf8Error,13 str,
10};14};
1115
12use gcmodule::Trace;16use gcmodule::Trace;
17use hashbrown::HashMap;
13use rustc_hash::FxHashMap;18use rustc_hash::FxHasher;
19
20mod inner;
14use serde::{Deserialize, Serialize};21use inner::Inner;
1522
23/// Interned string
24///
25/// Provides O(1) comparsions and hashing, cheap copy, and cheap conversion to [`IBytes`]
16#[derive(Clone, PartialOrd, Ord, Eq)]26#[derive(Clone, PartialOrd, Ord, Eq)]
17pub struct IStr(Rc<str>);27pub struct IStr(Inner);
18impl Trace for IStr {28impl Trace for IStr {
19 fn is_type_tracked() -> bool {29 fn is_type_tracked() -> bool {
20 false30 false
21 }31 }
22}32}
33
34impl IStr {
35 #[must_use]
36 pub fn as_str(&self) -> &str {
37 self as &str
38 }
39
40 #[must_use]
41 pub fn cast_bytes(self) -> IBytes {
42 IBytes(self.0.clone())
43 }
44}
2345
24impl Deref for IStr {46impl Deref for IStr {
25 type Target = str;47 type Target = str;
2648
27 fn deref(&self) -> &Self::Target {49 fn deref(&self) -> &Self::Target {
50 // SAFETY: Inner::check_utf8 is called on IStr construction, data is utf-8
28 &self.051 unsafe { self.0.as_str_unchecked() }
29 }52 }
30}53}
3154
32impl PartialEq for IStr {55impl PartialEq for IStr {
33 fn eq(&self, other: &Self) -> bool {56 fn eq(&self, other: &Self) -> bool {
34 // It is ok, since all IStr should be inlined into same pool57 // all IStr should be inlined into same pool
35 Rc::ptr_eq(&self.0, &other.0)58 Inner::ptr_eq(&self.0, &other.0)
36 }59 }
37}60}
3861
39impl PartialEq<str> for IStr {62impl PartialEq<str> for IStr {
40 fn eq(&self, other: &str) -> bool {63 fn eq(&self, other: &str) -> bool {
41 &self.0 as &str == other64 self as &str == other
42 }65 }
43}66}
4467
45impl Hash for IStr {68impl Hash for IStr {
46 fn hash<H: Hasher>(&self, state: &mut H) {69 fn hash<H: Hasher>(&self, state: &mut H) {
70 // IStr is always obtained from pool, where no string have duplicate, thus every unique string has unique address
47 state.write_usize(Rc::as_ptr(&self.0) as *const () as usize)71 state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);
48 }72 }
49}73}
5074
51impl Drop for IStr {75impl Drop for IStr {
52 fn drop(&mut self) {76 fn drop(&mut self) {
77 #[cold]
78 #[inline(never)]
79 fn unpool(inner: &Inner) {
80 // May fail on program termination
81 let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));
82 if res.is_ok() {
83 debug_assert_eq!(Inner::strong_count(inner), 1);
84 }
85 }
53 // First reference - current object, second - POOL86 // First reference - current object, second - POOL
54 if Rc::strong_count(&self.0) <= 2 {87 if Inner::strong_count(&self.0) <= 2 {
55 let _result = STR_POOL.try_with(|pool| pool.borrow_mut().remove(&self.0));88 unpool(&self.0);
56 }89 }
57 }90 }
58}91}
5992
60impl fmt::Debug for IStr {93impl fmt::Debug for IStr {
61 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {94 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62 write!(f, "{:?}", &self.0)95 fmt::Debug::fmt(self as &str, f)
63 }96 }
64}97}
6598
66impl Display for IStr {99impl Display for IStr {
67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {100 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68 f.write_str(&self.0)101 fmt::Display::fmt(self as &str, f)
69 }102 }
70}103}
104
105/// Interned byte array
106#[derive(Clone, PartialOrd, Ord, Eq)]
107pub struct IBytes(Inner);
108impl Trace for IBytes {
109 fn is_type_tracked() -> bool {
110 false
111 }
112}
71113
72thread_local! {114impl IBytes {
115 #[must_use]
73 static STR_POOL: RefCell<FxHashMap<Rc<str>, ()>> = RefCell::new(FxHashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));116 pub fn cast_str(self) -> Option<IStr> {
117 if Inner::check_utf8(&self.0) {
118 Some(IStr(self.0.clone()))
119 } else {
120 None
121 }
122 }
123 /// # Safety
124 /// data should be valid utf8
125 unsafe fn cast_str_unchecked(self) -> IStr {
126 // SAFETY: data is utf8
127 unsafe { Inner::assume_utf8(&self.0) };
128 IStr(self.0.clone())
129 }
74}130}
131
132impl Deref for IBytes {
133 type Target = [u8];
134
135 fn deref(&self) -> &Self::Target {
136 self.0.as_slice()
137 }
138}
139
140impl PartialEq for IBytes {
141 fn eq(&self, other: &Self) -> bool {
142 // all IStr should be inlined into same pool
143 Inner::ptr_eq(&self.0, &other.0)
144 }
145}
75146
76impl From<&str> for IStr {147impl Hash for IBytes {
77 fn from(str: &str) -> Self {148 fn hash<H: Hasher>(&self, state: &mut H) {
149 // IBytes is always obtained from pool, where no string have duplicate, thus every unique string has unique address
78 IStr(STR_POOL.with(|pool| {150 state.write_usize(Inner::as_ptr(&self.0).cast::<()>() as usize);
79 let mut pool = pool.borrow_mut();
80 if let Some((k, _)) = pool.get_key_value(str) {
81 k.clone()
82 } else {
83 let rc: Rc<str> = str.into();
84 pool.insert(rc.clone(), ());
85 rc
86 }
87 }))
88 }151 }
89}152}
90153
91impl TryFrom<&[u8]> for IStr {154impl Drop for IBytes {
92 type Error = Utf8Error;155 fn drop(&mut self) {
93156 #[cold]
157 #[inline(never)]
94 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {158 fn unpool(inner: &Inner) {
159 // May fail on program termination
95 let str = std::str::from_utf8(value)?;160 let res = POOL.try_with(|pool| pool.borrow_mut().remove(inner));
96 Ok(str.into())161 if res.is_ok() {
162 debug_assert_eq!(Inner::strong_count(inner), 1);
163 }
97 }164 }
165 // First reference - current object, second - POOL
166 if Inner::strong_count(&self.0) <= 2 {
167 unpool(&self.0);
168 }
169 }
98}170}
99171
100impl From<String> for IStr {172impl fmt::Debug for IBytes {
101 fn from(str: String) -> Self {173 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102 (&str as &str).into()174 fmt::Debug::fmt(self as &[u8], f)
103 }175 }
104}176}
105177
106impl<'i> From<Cow<'i, str>> for IStr {178impl<'c> From<Cow<'c, str>> for IStr {
107 fn from(c: Cow<'i, str>) -> Self {179 fn from(v: Cow<'c, str>) -> Self {
108 (&c as &str).into()180 intern_str(&v)
109 }181 }
110}182}
183impl From<&str> for IStr {
184 fn from(v: &str) -> Self {
185 intern_str(v)
186 }
187}
188impl From<String> for IStr {
189 fn from(s: String) -> Self {
190 s.as_str().into()
191 }
192}
193impl From<&[u8]> for IBytes {
194 fn from(v: &[u8]) -> Self {
195 intern_bytes(v)
196 }
197}
111198
112impl Serialize for IStr {199impl serde::Serialize for IStr {
113 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>200 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
114 where201 where
115 S: serde::Serializer,202 S: serde::Serializer,
116 {203 {
117 (&self.0 as &str).serialize(serializer)204 self.as_str().serialize(serializer)
118 }205 }
119}206}
120207
121impl<'de> Deserialize<'de> for IStr {208impl<'de> serde::Deserialize<'de> for IStr {
122 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>209 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
123 where210 where
124 D: serde::Deserializer<'de>,211 D: serde::Deserializer<'de>,
125 {212 {
126 let s = <&str>::deserialize(deserializer)?;213 let str = <&str>::deserialize(deserializer)?;
127 Ok(s.into())214 Ok(intern_str(str))
128 }215 }
129}216}
217
218thread_local! {
219 static POOL: RefCell<HashMap<Inner, (), BuildHasherDefault<FxHasher>>> = RefCell::new(HashMap::with_capacity_and_hasher(200, BuildHasherDefault::default()));
220}
221
222#[must_use]
223pub fn intern_bytes(bytes: &[u8]) -> IBytes {
224 POOL.with(|pool| {
225 let mut pool = pool.borrow_mut();
226 let entry = pool.raw_entry_mut().from_key(bytes);
227 match entry {
228 hashbrown::hash_map::RawEntryMut::Occupied(mut i) => {
229 IBytes(i.get_key_value().0.clone())
230 }
231 hashbrown::hash_map::RawEntryMut::Vacant(e) => {
232 let (k, _) = e.insert(Inner::new_bytes(bytes), ());
233 IBytes(k.clone())
234 }
235 }
236 })
237}
238
239#[must_use]
240pub fn intern_str(str: &str) -> IStr {
241 // SAFETY: Rust strings always utf8
242 unsafe { intern_bytes(str.as_bytes()).cast_str_unchecked() }
243}
130244