pub mod external {
pub use nohash_hasher;
#[cfg(feature = "serde")]
pub use serde;
}
#[inline]
fn hash(value: impl std::hash::Hash) -> u64 {
use std::hash::Hasher as _;
let mut hasher =
std::hash::BuildHasher::build_hasher(&ahash::RandomState::with_seeds(0, 1, 2, 3));
value.hash(&mut hasher);
hasher.finish()
}
#[derive(Copy, Clone, Eq)]
pub struct InternedString {
hash: u64, string: &'static str,
}
static_assertions::assert_not_impl_any!(InternedString: std::borrow::Borrow<str>);
impl InternedString {
#[inline]
pub fn new(string: &str) -> Self {
global_intern(string)
}
#[inline]
pub fn as_str(&self) -> &'static str {
self.string
}
#[inline]
pub fn hash(&self) -> u64 {
self.hash
}
}
impl From<&str> for InternedString {
#[inline]
fn from(string: &str) -> Self {
Self::new(string)
}
}
impl From<String> for InternedString {
#[inline]
fn from(string: String) -> Self {
Self::new(&string)
}
}
impl std::cmp::PartialEq for InternedString {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.hash == other.hash
}
}
impl std::hash::Hash for InternedString {
#[inline]
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.hash);
}
}
impl nohash_hasher::IsEnabled for InternedString {}
impl std::cmp::PartialOrd for InternedString {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.string.cmp(other.string))
}
}
impl std::cmp::Ord for InternedString {
#[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.string.cmp(other.string)
}
}
impl AsRef<str> for InternedString {
#[inline]
fn as_ref(&self) -> &str {
self.string
}
}
impl std::ops::Deref for InternedString {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
impl std::fmt::Debug for InternedString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl std::fmt::Display for InternedString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
#[cfg(feature = "serde")]
impl serde::Serialize for InternedString {
#[inline]
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.as_str().serialize(serializer)
}
}
#[cfg(feature = "serde")]
impl<'de> serde::Deserialize<'de> for InternedString {
#[inline]
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
String::deserialize(deserializer).map(|s| global_intern(&s))
}
}
#[derive(Default)]
struct StringInterner {
map: nohash_hasher::IntMap<u64, &'static str>,
}
impl StringInterner {
#[allow(dead_code)] pub fn len(&self) -> usize {
self.map.len()
}
pub fn intern(&mut self, string: &str) -> InternedString {
let hash = hash(string);
let static_ref_string = self
.map
.entry(hash)
.or_insert_with(|| Box::leak(Box::<str>::from(string)));
InternedString {
hash,
string: static_ref_string,
}
}
pub fn bytes_used(&self) -> usize {
self.map
.iter()
.map(|(k, v): (_, &&str)| {
std::mem::size_of_val(k) + std::mem::size_of::<&str>() + v.len()
})
.sum()
}
}
#[macro_export]
macro_rules! declare_new_type {
(
$(#[$meta:meta])* $vis:vis struct $StructName:ident;
) => {
$(#[$meta])*
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize, serde::Serialize),
)]
pub struct $StructName($crate::InternedString);
impl $StructName {
#[inline]
pub fn new(string: &str) -> Self {
Self($crate::InternedString::new(string))
}
#[inline]
pub fn as_str(&self) -> &'static str {
self.0.as_str()
}
#[inline]
pub fn hash(&self) -> u64 {
self.0.hash()
}
}
impl $crate::external::nohash_hasher::IsEnabled for $StructName {}
impl From<&str> for $StructName {
#[inline]
fn from(string: &str) -> Self {
Self::new(string)
}
}
impl From<String> for $StructName {
#[inline]
fn from(string: String) -> Self {
Self::new(&string)
}
}
impl AsRef<str> for $StructName {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl std::ops::Deref for $StructName {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
impl std::fmt::Debug for $StructName {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl std::fmt::Display for $StructName {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl<'a> PartialEq<&'a str> for $StructName {
#[inline]
fn eq(&self, other: &&'a str) -> bool {
self.as_str() == *other
}
}
impl<'a> PartialEq<&'a str> for &$StructName {
#[inline]
fn eq(&self, other: &&'a str) -> bool {
self.as_str() == *other
}
}
impl<'a> PartialEq<$StructName> for &'a str {
#[inline]
fn eq(&self, other: &$StructName) -> bool {
*self == other.as_str()
}
}
};
}
use once_cell::sync::Lazy;
use parking_lot::Mutex;
static GLOBAL_INTERNER: Lazy<Mutex<StringInterner>> =
Lazy::new(|| Mutex::new(StringInterner::default()));
pub fn bytes_used() -> usize {
GLOBAL_INTERNER.lock().bytes_used()
}
fn global_intern(string: &str) -> InternedString {
GLOBAL_INTERNER.lock().intern(string)
}
#[test]
fn test_interner() {
let mut interner = StringInterner::default();
assert_eq!(interner.len(), 0);
let a = interner.intern("Hello World!");
assert_eq!(interner.len(), 1);
let b = interner.intern("Hello World!");
assert_eq!(interner.len(), 1);
assert_eq!(a, b);
let c = interner.intern("Another string");
assert_eq!(interner.len(), 2);
assert!(a.hash == b.hash);
assert!(a.hash != c.hash);
}
#[test]
fn test_newtype_macro() {
#![allow(dead_code)]
declare_new_type!(
pub struct MyString;
);
let a = MyString::new("test");
let b = MyString::new("test");
assert_eq!(a, b);
assert_eq!(a.as_str(), "test");
}
#[test]
fn do_not_implement_borrow() {
declare_new_type!(
pub struct MyString;
);
static_assertions::assert_not_impl_any!(MyString: std::borrow::Borrow<str>);
}