zerovec/varzerovec/
slice.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::{VarZeroSliceIter, VarZeroVecComponents};
6use super::vec::VarZeroVecInner;
7use super::*;
8use crate::ule::*;
9use core::cmp::{Ord, Ordering, PartialOrd};
10use core::fmt;
11use core::marker::PhantomData;
12use core::mem;
13
14use core::ops::Index;
15use core::ops::Range;
16
17/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
18/// where `T` is not `Sized`.
19///
20/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
21/// owned data and as such is ideal for deserialization since most human readable
22/// serialization formats cannot unconditionally deserialize zero-copy.
23///
24/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
25/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
26/// using `VarZeroVec<ZeroSlice<T>>`.
27///
28/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
29/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
30///
31/// This type can be nested within itself to allow for multi-level nested `Vec`s.
32///
33/// # Examples
34///
35/// ## Nested Slices
36///
37/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
38///
39/// ```rust
40/// use zerovec::{VarZeroSlice, VarZeroVec};
41/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
42/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
43/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
44/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
45/// let strings_12 = vec![&*strings_1, &*strings_2];
46/// let strings_34 = vec![&*strings_3, &*strings_4];
47/// let all_strings = vec![strings_12, strings_34];
48///
49/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
50/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
51/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
52/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
53/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
54/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
55/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
56///
57/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
58///     .iter()
59///     .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
60///         v.iter()
61///             .map(|x: &VarZeroSlice<_>| {
62///                 x.as_varzerovec()
63///                     .iter()
64///                     .map(|s| s.to_owned())
65///                     .collect::<Vec<String>>()
66///             })
67///             .collect::<Vec<_>>()
68///     })
69///     .collect::<Vec<_>>();
70/// assert_eq!(reconstructed, all_strings);
71///
72/// let bytes = vzv_all.as_bytes();
73/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
74///     VarZeroVec::parse_bytes(bytes).unwrap();
75/// assert_eq!(vzv_from_bytes, vzv_all);
76/// ```
77///
78/// ## Iterate over Windows
79///
80/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
81/// [core::slice::Windows], this behavior can be easily modeled using an iterator:
82///
83/// ```
84/// use zerovec::VarZeroVec;
85///
86/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
87/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
88///
89/// let mut it = vzv.iter().peekable();
90/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
91///     // Evaluate (x, y) here.
92/// #   pairs.push((x, y));
93/// }
94/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
95/// ```
96//
97// safety invariant: The slice MUST be one which parses to
98// a valid VarZeroVecComponents<T>
99#[repr(transparent)]
100pub struct VarZeroSlice<T: ?Sized, F = Index16> {
101    marker: PhantomData<(F, T)>,
102    /// The original slice this was constructed from
103    entire_slice: [u8],
104}
105
106impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
107    /// Construct a new empty VarZeroSlice
108    pub const fn new_empty() -> &'static Self {
109        // The empty VZV is special-cased to the empty slice
110        unsafe { mem::transmute(&[] as &[u8]) }
111    }
112
113    /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
114    #[inline]
115    pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
116        unsafe {
117            // safety: VarZeroSlice is guaranteed to parse here
118            VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
119        }
120    }
121
122    /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
123    ///
124    /// # Safety
125    ///
126    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
127    pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
128        // self is really just a wrapper around a byte slice
129        mem::transmute(bytes)
130    }
131
132    /// Get the number of elements in this slice
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// # use zerovec::VarZeroVec;
138    ///
139    /// let strings = vec!["foo", "bar", "baz", "quux"];
140    /// let vec = VarZeroVec::<str>::from(&strings);
141    ///
142    /// assert_eq!(vec.len(), 4);
143    /// ```
144    pub fn len(&self) -> usize {
145        self.as_components().len()
146    }
147
148    /// Returns `true` if the slice contains no elements.
149    ///
150    /// # Examples
151    ///
152    /// ```
153    /// # use zerovec::VarZeroVec;
154    ///
155    /// let strings: Vec<String> = vec![];
156    /// let vec = VarZeroVec::<str>::from(&strings);
157    ///
158    /// assert!(vec.is_empty());
159    /// ```
160    pub fn is_empty(&self) -> bool {
161        self.as_components().is_empty()
162    }
163
164    /// Obtain an iterator over this slice's elements
165    ///
166    /// # Example
167    ///
168    /// ```rust
169    /// # use zerovec::VarZeroVec;
170    ///
171    /// let strings = vec!["foo", "bar", "baz", "quux"];
172    /// let vec = VarZeroVec::<str>::from(&strings);
173    ///
174    /// let mut iter_results: Vec<&str> = vec.iter().collect();
175    /// assert_eq!(iter_results[0], "foo");
176    /// assert_eq!(iter_results[1], "bar");
177    /// assert_eq!(iter_results[2], "baz");
178    /// assert_eq!(iter_results[3], "quux");
179    /// ```
180    pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
181        self.as_components().iter()
182    }
183
184    /// Get one of this slice's elements, returning `None` if the index is out of bounds
185    ///
186    /// # Example
187    ///
188    /// ```rust
189    /// # use zerovec::VarZeroVec;
190    ///
191    /// let strings = vec!["foo", "bar", "baz", "quux"];
192    /// let vec = VarZeroVec::<str>::from(&strings);
193    ///
194    /// let mut iter_results: Vec<&str> = vec.iter().collect();
195    /// assert_eq!(vec.get(0), Some("foo"));
196    /// assert_eq!(vec.get(1), Some("bar"));
197    /// assert_eq!(vec.get(2), Some("baz"));
198    /// assert_eq!(vec.get(3), Some("quux"));
199    /// assert_eq!(vec.get(4), None);
200    /// ```
201    pub fn get(&self, idx: usize) -> Option<&T> {
202        self.as_components().get(idx)
203    }
204
205    /// Get one of this slice's elements
206    ///
207    /// # Safety
208    ///
209    /// `index` must be in range
210    ///
211    /// # Example
212    ///
213    /// ```rust
214    /// # use zerovec::VarZeroVec;
215    ///
216    /// let strings = vec!["foo", "bar", "baz", "quux"];
217    /// let vec = VarZeroVec::<str>::from(&strings);
218    ///
219    /// let mut iter_results: Vec<&str> = vec.iter().collect();
220    /// unsafe {
221    ///     assert_eq!(vec.get_unchecked(0), "foo");
222    ///     assert_eq!(vec.get_unchecked(1), "bar");
223    ///     assert_eq!(vec.get_unchecked(2), "baz");
224    ///     assert_eq!(vec.get_unchecked(3), "quux");
225    /// }
226    /// ```
227    pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
228        self.as_components().get_unchecked(idx)
229    }
230
231    /// Obtain an owned `Vec<Box<T>>` out of this
232    #[cfg(feature = "alloc")]
233    pub fn to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
234        self.as_components().to_vec()
235    }
236
237    /// Get a reference to the entire encoded backing buffer of this slice
238    ///
239    /// The bytes can be passed back to [`Self::parse_bytes()`].
240    ///
241    /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
242    ///
243    /// # Example
244    ///
245    /// ```rust
246    /// # use zerovec::VarZeroVec;
247    ///
248    /// let strings = vec!["foo", "bar", "baz"];
249    /// let vzv = VarZeroVec::<str>::from(&strings);
250    ///
251    /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap());
252    /// ```
253    #[inline]
254    pub const fn as_bytes(&self) -> &[u8] {
255        &self.entire_slice
256    }
257
258    /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
259    ///
260    /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
261    /// it is more efficient to perform this conversion first
262    pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
263        VarZeroVec(VarZeroVecInner::Borrowed(self))
264    }
265
266    /// Parse a VarZeroSlice from a slice of the appropriate format
267    ///
268    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
269    pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> {
270        <Self as VarULE>::parse_bytes(slice)
271    }
272}
273
274impl<T, F> VarZeroSlice<T, F>
275where
276    T: VarULE,
277    T: ?Sized,
278    T: Ord,
279    F: VarZeroVecFormat,
280{
281    /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
282    /// the standard library function [`binary_search`].
283    ///
284    /// # Example
285    ///
286    /// ```
287    /// # use zerovec::VarZeroVec;
288    ///
289    /// let strings = vec!["a", "b", "f", "g"];
290    /// let vec = VarZeroVec::<str>::from(&strings);
291    ///
292    /// assert_eq!(vec.binary_search("f"), Ok(2));
293    /// assert_eq!(vec.binary_search("e"), Err(2));
294    /// ```
295    ///
296    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
297    #[inline]
298    pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
299        self.as_components().binary_search(x)
300    }
301
302    /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
303    ///
304    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
305    /// to the behavior of the standard library function [`binary_search`].
306    ///
307    /// The index is returned relative to the start of the range.
308    ///
309    /// # Example
310    ///
311    /// ```
312    /// # use zerovec::VarZeroVec;
313    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
314    /// let vec = VarZeroVec::<str>::from(&strings);
315    ///
316    /// // Same behavior as binary_search when the range covers the whole slice:
317    /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
318    /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
319    ///
320    /// // Will not look outside of the range:
321    /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
322    /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
323    ///
324    /// // Will return indices relative to the start of the range:
325    /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
326    /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
327    ///
328    /// // Will return `None` if the range is out of bounds:
329    /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
330    /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
331    /// ```
332    ///
333    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
334    #[inline]
335    pub fn binary_search_in_range(
336        &self,
337        x: &T,
338        range: Range<usize>,
339    ) -> Option<Result<usize, usize>> {
340        self.as_components().binary_search_in_range(x, range)
341    }
342}
343
344impl<T, F> VarZeroSlice<T, F>
345where
346    T: VarULE,
347    T: ?Sized,
348    F: VarZeroVecFormat,
349{
350    /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
351    /// the standard library function [`binary_search_by`].
352    ///
353    /// # Example
354    ///
355    /// ```
356    /// # use zerovec::VarZeroVec;
357    /// let strings = vec!["a", "b", "f", "g"];
358    /// let vec = VarZeroVec::<str>::from(&strings);
359    ///
360    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
361    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
362    /// ```
363    ///
364    /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
365    #[inline]
366    pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
367        self.as_components().binary_search_by(predicate)
368    }
369
370    /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
371    ///
372    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
373    /// to the behavior of the standard library function [`binary_search`].
374    ///
375    /// The index is returned relative to the start of the range.
376    ///
377    /// # Example
378    ///
379    /// ```
380    /// # use zerovec::VarZeroVec;
381    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
382    /// let vec = VarZeroVec::<str>::from(&strings);
383    ///
384    /// // Same behavior as binary_search when the range covers the whole slice:
385    /// assert_eq!(
386    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
387    ///     Some(Ok(3))
388    /// );
389    /// assert_eq!(
390    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
391    ///     Some(Err(4))
392    /// );
393    ///
394    /// // Will not look outside of the range:
395    /// assert_eq!(
396    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
397    ///     Some(Err(1))
398    /// );
399    /// assert_eq!(
400    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
401    ///     Some(Err(0))
402    /// );
403    ///
404    /// // Will return indices relative to the start of the range:
405    /// assert_eq!(
406    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
407    ///     Some(Ok(2))
408    /// );
409    /// assert_eq!(
410    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
411    ///     Some(Err(3))
412    /// );
413    ///
414    /// // Will return `None` if the range is out of bounds:
415    /// assert_eq!(
416    ///     vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
417    ///     None
418    /// );
419    /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
420    /// ```
421    ///
422    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
423    pub fn binary_search_in_range_by(
424        &self,
425        predicate: impl FnMut(&T) -> Ordering,
426        range: Range<usize>,
427    ) -> Option<Result<usize, usize>> {
428        self.as_components()
429            .binary_search_in_range_by(predicate, range)
430    }
431}
432// Safety (based on the safety checklist on the VarULE trait):
433//  1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
434//     `[u8]` slice which satisfies this invariant)
435//  2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
436//     `[u8]` slice which satisfies this invariant)
437//  3. The impl of `validate_bytes()` returns an error if any byte is not valid.
438//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
439//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
440//  6. `as_bytes()` is equivalent to a regular transmute of the underlying data
441//  7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
442unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
443    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
444        let _: VarZeroVecComponents<T, F> =
445            VarZeroVecComponents::parse_bytes(bytes).map_err(|_| UleError::parse::<Self>())?;
446        Ok(())
447    }
448
449    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
450        // self is really just a wrapper around a byte slice
451        mem::transmute(bytes)
452    }
453
454    fn as_bytes(&self) -> &[u8] {
455        &self.entire_slice
456    }
457}
458
459impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
460    type Output = T;
461    fn index(&self, index: usize) -> &Self::Output {
462        #[allow(clippy::panic)] // documented
463        match self.get(index) {
464            Some(x) => x,
465            None => panic!(
466                "index out of bounds: the len is {} but the index is {index}",
467                self.len()
468            ),
469        }
470    }
471}
472
473impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
474where
475    T: VarULE,
476    T: ?Sized,
477    T: PartialEq,
478    F: VarZeroVecFormat,
479{
480    #[inline]
481    fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
482        // VarULE has an API guarantee that this is equivalent
483        // to `T::VarULE::eq()`
484        self.entire_slice.eq(&other.entire_slice)
485    }
486}
487
488impl<T, F> Eq for VarZeroSlice<T, F>
489where
490    T: VarULE,
491    T: ?Sized,
492    T: Eq,
493    F: VarZeroVecFormat,
494{
495}
496
497impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
498    #[inline]
499    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
500        self.iter().partial_cmp(other.iter())
501    }
502}
503
504impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
505    #[inline]
506    fn cmp(&self, other: &Self) -> Ordering {
507        self.iter().cmp(other.iter())
508    }
509}
510
511impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
512where
513    T: fmt::Debug,
514{
515    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516        f.debug_list().entries(self.iter()).finish()
517    }
518}
519
520impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
521    fn as_ref(&self) -> &VarZeroSlice<T, F> {
522        self
523    }
524}