zerotrie/
zerotrie.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::reader;
6
7use core::borrow::Borrow;
8
9#[cfg(feature = "alloc")]
10use crate::{
11    builder::bytestr::ByteStr, builder::nonconst::ZeroTrieBuilder, error::ZeroTrieBuildError,
12};
13#[cfg(feature = "alloc")]
14use alloc::{boxed::Box, collections::BTreeMap, collections::VecDeque, string::String, vec::Vec};
15#[cfg(feature = "litemap")]
16use litemap::LiteMap;
17
18/// A data structure that compactly maps from byte sequences to integers.
19///
20/// There are several variants of `ZeroTrie` which are very similar but are optimized
21/// for different use cases:
22///
23/// - [`ZeroTrieSimpleAscii`] is the most compact structure. Very fast for small data.
24///   Only stores ASCII-encoded strings. Can be const-constructed!
25/// - [`ZeroTriePerfectHash`] is also compact, but it also supports arbitrary binary
26///   strings. It also scales better to large data. Cannot be const-constructed.
27/// - [`ZeroTrieExtendedCapacity`] can be used if more than 2^32 bytes are required.
28///
29/// You can create a `ZeroTrie` directly, in which case the most appropriate
30/// backing implementation will be chosen.
31///
32/// # Backing Store
33///
34/// The data structure has a flexible backing data store. The only requirement for most
35/// functionality is that it implement `AsRef<[u8]>`. All of the following are valid
36/// ZeroTrie types:
37///
38/// - `ZeroTrie<[u8]>` (dynamically sized type: must be stored in a reference or Box)
39/// - `ZeroTrie<&[u8]>` (borrows its data from a u8 buffer)
40/// - `ZeroTrie<Vec<u8>>` (fully owned data)
41/// - `ZeroTrie<ZeroVec<u8>>` (the recommended borrowed-or-owned signature)
42/// - `Cow<ZeroTrie<[u8]>>` (another borrowed-or-owned signature)
43/// - `ZeroTrie<Cow<[u8]>>` (another borrowed-or-owned signature)
44///
45/// # Examples
46///
47/// ```
48/// use litemap::LiteMap;
49/// use zerotrie::ZeroTrie;
50///
51/// let mut map = LiteMap::<&[u8], usize>::new_vec();
52/// map.insert("foo".as_bytes(), 1);
53/// map.insert("bar".as_bytes(), 2);
54/// map.insert("bazzoo".as_bytes(), 3);
55///
56/// let trie = ZeroTrie::try_from(&map)?;
57///
58/// assert_eq!(trie.get("foo"), Some(1));
59/// assert_eq!(trie.get("bar"), Some(2));
60/// assert_eq!(trie.get("bazzoo"), Some(3));
61/// assert_eq!(trie.get("unknown"), None);
62///
63/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
64/// ```
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66// Note: The absence of the following derive does not cause any test failures in this crate
67#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
68pub struct ZeroTrie<Store>(pub(crate) ZeroTrieFlavor<Store>);
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub(crate) enum ZeroTrieFlavor<Store> {
72    SimpleAscii(ZeroTrieSimpleAscii<Store>),
73    PerfectHash(ZeroTriePerfectHash<Store>),
74    ExtendedCapacity(ZeroTrieExtendedCapacity<Store>),
75}
76
77/// A data structure that compactly maps from ASCII strings to integers.
78///
79/// For more information, see [`ZeroTrie`].
80///
81/// # Examples
82///
83/// ```
84/// use litemap::LiteMap;
85/// use zerotrie::ZeroTrieSimpleAscii;
86///
87/// let mut map = LiteMap::new_vec();
88/// map.insert(&b"foo"[..], 1);
89/// map.insert(b"bar", 2);
90/// map.insert(b"bazzoo", 3);
91///
92/// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
93///
94/// assert_eq!(trie.get(b"foo"), Some(1));
95/// assert_eq!(trie.get(b"bar"), Some(2));
96/// assert_eq!(trie.get(b"bazzoo"), Some(3));
97/// assert_eq!(trie.get(b"unknown"), None);
98///
99/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
100/// ```
101///
102/// The trie can only store ASCII bytes; a string with non-ASCII always returns None:
103///
104/// ```
105/// use zerotrie::ZeroTrieSimpleAscii;
106///
107/// // A trie with two values: "abc" and "abcdef"
108/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
109///
110/// assert!(matches!(trie.get(b"ab\xFF"), None));
111/// ```
112#[repr(transparent)]
113#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
114#[cfg_attr(feature = "databake", derive(databake::Bake))]
115#[cfg_attr(feature = "databake", databake(path = zerotrie))]
116#[allow(clippy::exhaustive_structs)] // databake hidden fields
117pub struct ZeroTrieSimpleAscii<Store: ?Sized> {
118    #[doc(hidden)] // for databake, but there are no invariants
119    pub store: Store,
120}
121
122impl<Store: ?Sized> ZeroTrieSimpleAscii<Store> {
123    fn transparent_ref_from_store(s: &Store) -> &Self {
124        unsafe {
125            // Safety: Self is transparent over Store
126            core::mem::transmute(s)
127        }
128    }
129}
130
131impl<Store> ZeroTrieSimpleAscii<Store> {
132    /// Wrap this specific ZeroTrie variant into a ZeroTrie.
133    #[inline]
134    pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
135        ZeroTrie(ZeroTrieFlavor::SimpleAscii(self))
136    }
137}
138
139/// A data structure that compactly maps from ASCII strings to integers
140/// in a case-insensitive way.
141///
142/// # Examples
143///
144/// ```
145/// use litemap::LiteMap;
146/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
147///
148/// let mut map = LiteMap::new_vec();
149/// map.insert(&b"foo"[..], 1);
150/// map.insert(b"Bar", 2);
151/// map.insert(b"Bazzoo", 3);
152///
153/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
154///
155/// assert_eq!(trie.get(b"foo"), Some(1));
156/// assert_eq!(trie.get(b"bar"), Some(2));
157/// assert_eq!(trie.get(b"BAR"), Some(2));
158/// assert_eq!(trie.get(b"bazzoo"), Some(3));
159/// assert_eq!(trie.get(b"unknown"), None);
160///
161/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
162/// ```
163///
164/// Strings with different cases of the same character at the same offset are not allowed:
165///
166/// ```
167/// use litemap::LiteMap;
168/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
169///
170/// let mut map = LiteMap::new_vec();
171/// map.insert(&b"bar"[..], 1);
172/// // OK: 'r' and 'Z' are different letters
173/// map.insert(b"baZ", 2);
174/// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
175/// map.insert(b"baR", 2);
176///
177/// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
178/// ```
179#[repr(transparent)]
180#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
181#[cfg_attr(feature = "databake", derive(databake::Bake))]
182#[cfg_attr(feature = "databake", databake(path = zerotrie))]
183#[allow(clippy::exhaustive_structs)] // databake hidden fields
184pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
185    #[doc(hidden)] // for databake, but there are no invariants
186    pub store: Store,
187}
188
189impl<Store: ?Sized> ZeroAsciiIgnoreCaseTrie<Store> {
190    fn transparent_ref_from_store(s: &Store) -> &Self {
191        unsafe {
192            // Safety: Self is transparent over Store
193            core::mem::transmute(s)
194        }
195    }
196}
197
198// Note: ZeroAsciiIgnoreCaseTrie is not a variant of ZeroTrie so there is no `into_zerotrie`
199
200/// A data structure that compactly maps from byte strings to integers.
201///
202/// For more information, see [`ZeroTrie`].
203///
204/// # Examples
205///
206/// ```
207/// use litemap::LiteMap;
208/// use zerotrie::ZeroTriePerfectHash;
209///
210/// let mut map = LiteMap::<&[u8], usize>::new_vec();
211/// map.insert("foo".as_bytes(), 1);
212/// map.insert("bår".as_bytes(), 2);
213/// map.insert("båzzøø".as_bytes(), 3);
214///
215/// let trie = ZeroTriePerfectHash::try_from(&map)?;
216///
217/// assert_eq!(trie.get("foo".as_bytes()), Some(1));
218/// assert_eq!(trie.get("bår".as_bytes()), Some(2));
219/// assert_eq!(trie.get("båzzøø".as_bytes()), Some(3));
220/// assert_eq!(trie.get("bazzoo".as_bytes()), None);
221///
222/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
223/// ```
224#[repr(transparent)]
225#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
226#[cfg_attr(feature = "databake", derive(databake::Bake))]
227#[cfg_attr(feature = "databake", databake(path = zerotrie))]
228#[allow(clippy::exhaustive_structs)] // databake hidden fields
229pub struct ZeroTriePerfectHash<Store: ?Sized> {
230    #[doc(hidden)] // for databake, but there are no invariants
231    pub store: Store,
232}
233
234impl<Store: ?Sized> ZeroTriePerfectHash<Store> {
235    fn transparent_ref_from_store(s: &Store) -> &Self {
236        unsafe {
237            // Safety: Self is transparent over Store
238            core::mem::transmute(s)
239        }
240    }
241}
242
243impl<Store> ZeroTriePerfectHash<Store> {
244    /// Wrap this specific ZeroTrie variant into a ZeroTrie.
245    #[inline]
246    pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
247        ZeroTrie(ZeroTrieFlavor::PerfectHash(self))
248    }
249}
250
251/// A data structure that maps from a large number of byte strings to integers.
252///
253/// For more information, see [`ZeroTrie`].
254#[repr(transparent)]
255#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
256#[cfg_attr(feature = "databake", derive(databake::Bake))]
257#[cfg_attr(feature = "databake", databake(path = zerotrie))]
258#[allow(clippy::exhaustive_structs)] // databake hidden fields
259pub struct ZeroTrieExtendedCapacity<Store: ?Sized> {
260    #[doc(hidden)] // for databake, but there are no invariants
261    pub store: Store,
262}
263
264impl<Store: ?Sized> ZeroTrieExtendedCapacity<Store> {
265    fn transparent_ref_from_store(s: &Store) -> &Self {
266        unsafe {
267            // Safety: Self is transparent over Store
268            core::mem::transmute(s)
269        }
270    }
271}
272
273impl<Store> ZeroTrieExtendedCapacity<Store> {
274    /// Wrap this specific ZeroTrie variant into a ZeroTrie.
275    #[inline]
276    pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
277        ZeroTrie(ZeroTrieFlavor::ExtendedCapacity(self))
278    }
279}
280
281macro_rules! impl_zerotrie_subtype {
282    ($name:ident, $iter_element:ty, $iter_fn:path, $iter_ty:ty, $cnv_fn:path) => {
283        impl<Store> $name<Store> {
284            /// Create a trie directly from a store.
285            ///
286            /// If the store does not contain valid bytes, unexpected behavior may occur.
287            #[inline]
288            pub const fn from_store(store: Store) -> Self {
289                Self { store }
290            }
291            /// Takes the byte store from this trie.
292            #[inline]
293            pub fn into_store(self) -> Store {
294                self.store
295            }
296            /// Converts this trie's store to a different store implementing the `From` trait.
297            ///
298            #[doc = concat!("For example, use this to change `", stringify!($name), "<Vec<u8>>` to `", stringify!($name), "<Cow<[u8]>>`.")]
299            ///
300            /// # Examples
301            ///
302            /// ```
303            /// use std::borrow::Cow;
304            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
305            ///
306            #[doc = concat!("let trie: ", stringify!($name), "<Vec<u8>> = ", stringify!($name), "::from_bytes(b\"abc\\x85\").to_owned();")]
307            #[doc = concat!("let cow: ", stringify!($name), "<Cow<[u8]>> = trie.convert_store();")]
308            ///
309            /// assert_eq!(cow.get(b"abc"), Some(5));
310            /// ```
311            pub fn convert_store<X: From<Store>>(self) -> $name<X> {
312                $name::<X>::from_store(X::from(self.store))
313            }
314        }
315        impl<Store> $name<Store>
316        where
317        Store: AsRef<[u8]> + ?Sized,
318        {
319            /// Queries the trie for a string.
320            pub fn get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
321                // TODO: Should this be AsRef or Borrow?
322                reader::get_parameterized::<Self>(self.store.as_ref(), key.as_ref())
323            }
324            /// Returns `true` if the trie is empty.
325            #[inline]
326            pub fn is_empty(&self) -> bool {
327                self.store.as_ref().is_empty()
328            }
329            /// Returns the size of the trie in number of bytes.
330            ///
331            /// To get the number of keys in the trie, use `.iter().count()`:
332            ///
333            /// ```
334            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
335            ///
336            /// // A trie with two values: "abc" and "abcdef"
337            #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
338            ///
339            /// assert_eq!(8, trie.byte_len());
340            /// assert_eq!(2, trie.iter().count());
341            /// ```
342            #[inline]
343            pub fn byte_len(&self) -> usize {
344                self.store.as_ref().len()
345            }
346            /// Returns the bytes contained in the underlying store.
347            #[inline]
348            pub fn as_bytes(&self) -> &[u8] {
349                self.store.as_ref()
350            }
351            /// Returns this trie as a reference transparent over a byte slice.
352            #[inline]
353            pub fn as_borrowed(&self) -> &$name<[u8]> {
354                $name::from_bytes(self.store.as_ref())
355            }
356            /// Returns a trie with a store borrowing from this trie.
357            #[inline]
358            pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
359                $name::from_store(self.store.as_ref())
360            }
361        }
362        impl<Store> AsRef<$name<[u8]>> for $name<Store>
363        where
364        Store: AsRef<[u8]> + ?Sized,
365        {
366            #[inline]
367            fn as_ref(&self) -> &$name<[u8]> {
368                self.as_borrowed()
369            }
370        }
371        #[cfg(feature = "alloc")]
372        impl<Store> $name<Store>
373        where
374        Store: AsRef<[u8]> + ?Sized,
375        {
376            /// Converts a possibly-borrowed $name to an owned one.
377            ///
378            /// ✨ *Enabled with the `alloc` Cargo feature.*
379            ///
380            /// # Examples
381            ///
382            /// ```
383            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
384            ///
385            #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x85\");")]
386            #[doc = concat!("let owned: ", stringify!($name), "<Vec<u8>> = trie.to_owned();")]
387            ///
388            /// assert_eq!(trie.get(b"abc"), Some(5));
389            /// assert_eq!(owned.get(b"abc"), Some(5));
390            /// ```
391            #[inline]
392            pub fn to_owned(&self) -> $name<Vec<u8>> {
393                $name::from_store(
394                    Vec::from(self.store.as_ref()),
395                )
396            }
397            /// Returns an iterator over the key/value pairs in this trie.
398            ///
399            /// ✨ *Enabled with the `alloc` Cargo feature.*
400            ///
401            /// # Examples
402            ///
403            /// ```
404            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
405            ///
406            /// // A trie with two values: "abc" and "abcdef"
407            #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
408            ///
409            /// let mut it = trie.iter();
410            /// assert_eq!(it.next(), Some(("abc".into(), 0)));
411            /// assert_eq!(it.next(), Some(("abcdef".into(), 1)));
412            /// assert_eq!(it.next(), None);
413            /// ```
414            #[inline]
415            #[allow(clippy::type_complexity)]
416            pub fn iter(&self) -> $iter_ty {
417                 $iter_fn(self.as_bytes())
418            }
419        }
420        impl $name<[u8]> {
421            /// Casts from a byte slice to a reference to a trie with the same lifetime.
422            ///
423            /// If the bytes are not a valid trie, unexpected behavior may occur.
424            #[inline]
425            pub fn from_bytes(trie: &[u8]) -> &Self {
426                Self::transparent_ref_from_store(trie)
427            }
428        }
429        #[cfg(feature = "alloc")]
430        impl $name<Vec<u8>> {
431            pub(crate) fn try_from_tuple_slice(items: &[(&ByteStr, usize)]) -> Result<Self, ZeroTrieBuildError> {
432                use crate::options::ZeroTrieWithOptions;
433                ZeroTrieBuilder::<VecDeque<u8>>::from_sorted_tuple_slice(
434                    items,
435                    Self::OPTIONS,
436                )
437                .map(|s| Self {
438                    store: s.to_bytes(),
439                })
440            }
441        }
442        #[cfg(feature = "alloc")]
443        impl<'a, K> FromIterator<(K, usize)> for $name<Vec<u8>>
444        where
445            K: AsRef<[u8]>
446        {
447            fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
448                use crate::options::ZeroTrieWithOptions;
449                use crate::builder::nonconst::ZeroTrieBuilder;
450                ZeroTrieBuilder::<VecDeque<u8>>::from_bytes_iter(
451                    iter,
452                    Self::OPTIONS
453                )
454                .map(|s| Self {
455                    store: s.to_bytes(),
456                })
457                .unwrap()
458            }
459        }
460        #[cfg(feature = "alloc")]
461        impl<'a, K> TryFrom<&'a BTreeMap<K, usize>> for $name<Vec<u8>>
462        where
463            K: Borrow<[u8]>
464        {
465            type Error = crate::error::ZeroTrieBuildError;
466            fn try_from(map: &'a BTreeMap<K, usize>) -> Result<Self, Self::Error> {
467                let tuples: Vec<(&[u8], usize)> = map
468                    .iter()
469                    .map(|(k, v)| (k.borrow(), *v))
470                    .collect();
471                let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
472                Self::try_from_tuple_slice(byte_str_slice)
473            }
474        }
475        #[cfg(feature = "alloc")]
476        impl<Store> $name<Store>
477        where
478            Store: AsRef<[u8]> + ?Sized
479        {
480            /// Exports the data from this ZeroTrie type into a BTreeMap.
481            ///
482            /// ✨ *Enabled with the `alloc` Cargo feature.*
483            ///
484            /// # Examples
485            ///
486            /// ```
487            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
488            /// use std::collections::BTreeMap;
489            ///
490            #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
491            /// let items = trie.to_btreemap();
492            ///
493            /// assert_eq!(items.len(), 2);
494            ///
495            #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
496            ///     .into_iter()
497            ///     .collect();
498            /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
499            /// ```
500            pub fn to_btreemap(&self) -> BTreeMap<$iter_element, usize> {
501                self.iter().collect()
502            }
503            #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
504            pub(crate) fn to_btreemap_bytes(&self) -> BTreeMap<Box<[u8]>, usize> {
505                self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
506            }
507        }
508        #[cfg(feature = "alloc")]
509        impl<Store> From<&$name<Store>> for BTreeMap<$iter_element, usize>
510        where
511            Store: AsRef<[u8]> + ?Sized,
512        {
513            #[inline]
514            fn from(other: &$name<Store>) -> Self {
515                other.to_btreemap()
516            }
517        }
518        #[cfg(feature = "litemap")]
519        impl<'a, K, S> TryFrom<&'a LiteMap<K, usize, S>> for $name<Vec<u8>>
520        where
521            K: Borrow<[u8]>,
522            S: litemap::store::StoreIterable<'a, K, usize>,
523        {
524            type Error = crate::error::ZeroTrieBuildError;
525            fn try_from(map: &'a LiteMap<K, usize, S>) -> Result<Self, Self::Error> {
526                let tuples: Vec<(&[u8], usize)> = map
527                    .iter()
528                    .map(|(k, v)| (k.borrow(), *v))
529                    .collect();
530                let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
531                Self::try_from_tuple_slice(byte_str_slice)
532            }
533        }
534        #[cfg(feature = "litemap")]
535        impl<Store> $name<Store>
536        where
537            Store: AsRef<[u8]> + ?Sized,
538        {
539            /// Exports the data from this ZeroTrie type into a LiteMap.
540            ///
541            /// ✨ *Enabled with the `litemap` Cargo feature.*
542            ///
543            /// # Examples
544            ///
545            /// ```
546            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
547            /// use litemap::LiteMap;
548            ///
549            #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
550            ///
551            /// let items = trie.to_litemap();
552            /// assert_eq!(items.len(), 2);
553            ///
554            #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
555            ///     .iter()
556            ///     .map(|(k, v)| (k, *v))
557            ///     .collect();
558            /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
559            /// ```
560            pub fn to_litemap(&self) -> LiteMap<$iter_element, usize> {
561                self.iter().collect()
562            }
563            #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
564            pub(crate) fn to_litemap_bytes(&self) -> LiteMap<Box<[u8]>, usize> {
565                self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
566            }
567        }
568        #[cfg(feature = "litemap")]
569        impl<Store> From<&$name<Store>> for LiteMap<$iter_element, usize>
570        where
571            Store: AsRef<[u8]> + ?Sized,
572        {
573            #[inline]
574            fn from(other: &$name<Store>) -> Self {
575                other.to_litemap()
576            }
577        }
578        #[cfg(feature = "litemap")]
579        impl $name<Vec<u8>>
580        {
581            #[cfg(feature = "serde")]
582            pub(crate) fn try_from_serde_litemap(items: &LiteMap<Box<ByteStr>, usize>) -> Result<Self, ZeroTrieBuildError> {
583                let lm_borrowed: LiteMap<&ByteStr, usize> = items.to_borrowed_keys();
584                Self::try_from_tuple_slice(lm_borrowed.as_slice())
585            }
586        }
587        // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
588        impl Borrow<$name<[u8]>> for $name<&[u8]> {
589            #[inline]
590            fn borrow(&self) -> &$name<[u8]> {
591                self.as_borrowed()
592            }
593        }
594        // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
595        #[cfg(feature = "alloc")]
596        impl Borrow<$name<[u8]>> for $name<Box<[u8]>> {
597            #[inline]
598            fn borrow(&self) -> &$name<[u8]> {
599                self.as_borrowed()
600            }
601        }
602        // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
603        #[cfg(feature = "alloc")]
604        impl Borrow<$name<[u8]>> for $name<Vec<u8>> {
605            #[inline]
606            fn borrow(&self) -> &$name<[u8]> {
607                self.as_borrowed()
608            }
609        }
610        #[cfg(feature = "alloc")]
611        impl alloc::borrow::ToOwned for $name<[u8]> {
612            type Owned = $name<Box<[u8]>>;
613            #[doc = concat!("This impl allows [`", stringify!($name), "`] to be used inside of a [`Cow`](alloc::borrow::Cow).")]
614            ///
615            #[doc = concat!("Note that it is also possible to use `", stringify!($name), "<ZeroVec<u8>>` for a similar result.")]
616            ///
617            /// ✨ *Enabled with the `alloc` Cargo feature.*
618            ///
619            /// # Examples
620            ///
621            /// ```
622            /// use std::borrow::Cow;
623            #[doc = concat!("use zerotrie::", stringify!($name), ";")]
624            ///
625            #[doc = concat!("let trie: Cow<", stringify!($name), "<[u8]>> = Cow::Borrowed(", stringify!($name), "::from_bytes(b\"abc\\x85\"));")]
626            /// assert_eq!(trie.get(b"abc"), Some(5));
627            /// ```
628            fn to_owned(&self) -> Self::Owned {
629                let bytes: &[u8] = self.store.as_ref();
630                $name::from_store(
631                    Vec::from(bytes).into_boxed_slice(),
632                )
633            }
634        }
635        // TODO(#2778): Auto-derive these impls based on the repr(transparent).
636        //
637        // Safety (based on the safety checklist on the VarULE trait):
638        //  1. `$name` does not include any uninitialized or padding bytes as it is `repr(transparent)`
639        //     over a `VarULE` type, `Store`, as evidenced by the existence of `transparent_ref_from_store()`
640        //  2. `$name` is aligned to 1 byte for the same reason
641        //  3. The impl of `validate_bytes()` returns an error if any byte is not valid (passed down to `VarULE` impl of `Store`)
642        //  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (passed down to `VarULE` impl of `Store`)
643        //  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
644        //  6. `parse_bytes()` is left to its default impl
645        //  7. byte equality is semantic equality
646        #[cfg(feature = "zerovec")]
647        unsafe impl<Store> zerovec::ule::VarULE for $name<Store>
648        where
649            Store: zerovec::ule::VarULE,
650        {
651            #[inline]
652            fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
653                Store::validate_bytes(bytes)
654            }
655            #[inline]
656            unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
657                // Safety: we can pass down the validity invariant to Store
658                Self::transparent_ref_from_store(Store::from_bytes_unchecked(bytes))
659            }
660        }
661        #[cfg(feature = "zerofrom")]
662        impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, $name<Store1>> for $name<Store2>
663        where
664            Store2: zerofrom::ZeroFrom<'zf, Store1>,
665        {
666            #[inline]
667            fn zero_from(other: &'zf $name<Store1>) -> Self {
668                $name::from_store(zerofrom::ZeroFrom::zero_from(&other.store))
669            }
670        }
671    };
672}
673
674#[cfg(feature = "alloc")]
675fn string_to_box_u8(input: String) -> Box<[u8]> {
676    input.into_boxed_str().into_boxed_bytes()
677}
678
679#[doc(hidden)] // subject to change
680#[cfg(feature = "alloc")]
681pub type ZeroTrieStringIterator<'a> =
682    core::iter::Map<reader::ZeroTrieIterator<'a>, fn((Vec<u8>, usize)) -> (String, usize)>;
683
684impl_zerotrie_subtype!(
685    ZeroTrieSimpleAscii,
686    String,
687    reader::get_iter_ascii_or_panic,
688    ZeroTrieStringIterator,
689    string_to_box_u8
690);
691impl_zerotrie_subtype!(
692    ZeroAsciiIgnoreCaseTrie,
693    String,
694    reader::get_iter_ascii_or_panic,
695    ZeroTrieStringIterator,
696    string_to_box_u8
697);
698impl_zerotrie_subtype!(
699    ZeroTriePerfectHash,
700    Vec<u8>,
701    reader::get_iter_phf,
702    reader::ZeroTrieIterator<'_>,
703    Vec::into_boxed_slice
704);
705impl_zerotrie_subtype!(
706    ZeroTrieExtendedCapacity,
707    Vec<u8>,
708    reader::get_iter_phf,
709    reader::ZeroTrieIterator<'_>,
710    Vec::into_boxed_slice
711);
712
713macro_rules! impl_dispatch {
714    ($self:ident, $inner_fn:ident()) => {
715        match $self.0 {
716            ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
717            ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
718            ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
719        }
720    };
721    ($self:ident, $inner_fn:ident().into_zerotrie()) => {
722        match $self.0 {
723            ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn().into_zerotrie(),
724            ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn().into_zerotrie(),
725            ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn().into_zerotrie(),
726        }
727    };
728    (&$self:ident, $inner_fn:ident()) => {
729        match &$self.0 {
730            ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
731            ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
732            ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
733        }
734    };
735    ($self:ident, $inner_fn:ident($arg:ident)) => {
736        match $self.0 {
737            ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
738            ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
739            ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
740        }
741    };
742    (&$self:ident, $inner_fn:ident($arg:ident)) => {
743        match &$self.0 {
744            ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
745            ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
746            ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
747        }
748    };
749    (&$self:ident, $trait:ident::$inner_fn:ident()) => {
750        match &$self.0 {
751            ZeroTrieFlavor::SimpleAscii(subtype) => {
752                ZeroTrie(ZeroTrieFlavor::SimpleAscii($trait::$inner_fn(subtype)))
753            }
754            ZeroTrieFlavor::PerfectHash(subtype) => {
755                ZeroTrie(ZeroTrieFlavor::PerfectHash($trait::$inner_fn(subtype)))
756            }
757            ZeroTrieFlavor::ExtendedCapacity(subtype) => {
758                ZeroTrie(ZeroTrieFlavor::ExtendedCapacity($trait::$inner_fn(subtype)))
759            }
760        }
761    };
762}
763
764impl<Store> ZeroTrie<Store> {
765    /// Takes the byte store from this trie.
766    pub fn into_store(self) -> Store {
767        impl_dispatch!(self, into_store())
768    }
769    /// Converts this trie's store to a different store implementing the `From` trait.
770    ///
771    /// For example, use this to change `ZeroTrie<Vec<u8>>` to `ZeroTrie<Cow<[u8]>>`.
772    pub fn convert_store<NewStore>(self) -> ZeroTrie<NewStore>
773    where
774        NewStore: From<Store>,
775    {
776        impl_dispatch!(self, convert_store().into_zerotrie())
777    }
778}
779
780impl<Store> ZeroTrie<Store>
781where
782    Store: AsRef<[u8]>,
783{
784    /// Queries the trie for a string.
785    pub fn get<K>(&self, key: K) -> Option<usize>
786    where
787        K: AsRef<[u8]>,
788    {
789        impl_dispatch!(&self, get(key))
790    }
791    /// Returns `true` if the trie is empty.
792    pub fn is_empty(&self) -> bool {
793        impl_dispatch!(&self, is_empty())
794    }
795    /// Returns the size of the trie in number of bytes.
796    ///
797    /// To get the number of keys in the trie, use `.iter().count()`.
798    pub fn byte_len(&self) -> usize {
799        impl_dispatch!(&self, byte_len())
800    }
801}
802
803#[cfg(feature = "alloc")]
804impl<Store> ZeroTrie<Store>
805where
806    Store: AsRef<[u8]>,
807{
808    /// Exports the data from this ZeroTrie into a BTreeMap.
809    pub fn to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize> {
810        impl_dispatch!(&self, to_btreemap_bytes())
811    }
812}
813
814#[cfg(feature = "litemap")]
815impl<Store> ZeroTrie<Store>
816where
817    Store: AsRef<[u8]>,
818{
819    /// Exports the data from this ZeroTrie into a LiteMap.
820    pub fn to_litemap(&self) -> LiteMap<Box<[u8]>, usize> {
821        impl_dispatch!(&self, to_litemap_bytes())
822    }
823}
824
825#[cfg(feature = "alloc")]
826impl ZeroTrie<Vec<u8>> {
827    pub(crate) fn try_from_tuple_slice(
828        items: &[(&ByteStr, usize)],
829    ) -> Result<Self, ZeroTrieBuildError> {
830        let is_all_ascii = items.iter().all(|(s, _)| s.is_all_ascii());
831        if is_all_ascii && items.len() < 512 {
832            ZeroTrieSimpleAscii::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
833        } else {
834            ZeroTriePerfectHash::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
835        }
836    }
837}
838
839#[cfg(feature = "alloc")]
840impl<K> FromIterator<(K, usize)> for ZeroTrie<Vec<u8>>
841where
842    K: AsRef<[u8]>,
843{
844    fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
845        // We need two Vecs because the first one anchors the `K`s that the second one borrows.
846        let items = Vec::from_iter(iter);
847        let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect();
848        items.sort();
849        let byte_str_slice = ByteStr::from_byte_slice_with_value(&items);
850        #[allow(clippy::unwrap_used)] // FromIterator is panicky
851        Self::try_from_tuple_slice(byte_str_slice).unwrap()
852    }
853}
854
855#[cfg(feature = "databake")]
856impl<Store> databake::Bake for ZeroTrie<Store>
857where
858    Store: databake::Bake,
859{
860    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
861        use databake::*;
862        let inner = impl_dispatch!(&self, bake(env));
863        quote! { #inner.into_zerotrie() }
864    }
865}
866
867#[cfg(feature = "databake")]
868impl<Store> databake::BakeSize for ZeroTrie<Store>
869where
870    Store: databake::BakeSize,
871{
872    fn borrows_size(&self) -> usize {
873        impl_dispatch!(&self, borrows_size())
874    }
875}
876
877#[cfg(feature = "zerofrom")]
878impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, ZeroTrie<Store1>> for ZeroTrie<Store2>
879where
880    Store2: zerofrom::ZeroFrom<'zf, Store1>,
881{
882    fn zero_from(other: &'zf ZeroTrie<Store1>) -> Self {
883        use zerofrom::ZeroFrom;
884        impl_dispatch!(&other, ZeroFrom::zero_from())
885    }
886}