zerotrie/
cursor.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Types for walking stepwise through a trie.
6//!
7//! For examples, see the `.cursor()` functions
8//! and the `Cursor` types in this module.
9
10use crate::reader;
11use crate::ZeroAsciiIgnoreCaseTrie;
12use crate::ZeroTrieSimpleAscii;
13
14use core::fmt;
15
16impl<Store> ZeroTrieSimpleAscii<Store>
17where
18    Store: AsRef<[u8]> + ?Sized,
19{
20    /// Gets a cursor into the current trie.
21    ///
22    /// Useful to query a trie with data that is not a slice.
23    ///
24    /// This is currently supported only on [`ZeroTrieSimpleAscii`]
25    /// and [`ZeroAsciiIgnoreCaseTrie`].
26    ///
27    /// # Examples
28    ///
29    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
30    ///
31    /// ```
32    /// use core::fmt::Write;
33    /// use zerotrie::ZeroTrieSimpleAscii;
34    ///
35    /// // A trie with two values: "abc" and "abcdef"
36    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
37    ///
38    /// // Get out the value for "abc"
39    /// let mut cursor = trie.cursor();
40    /// write!(&mut cursor, "abc");
41    /// assert_eq!(cursor.take_value(), Some(0));
42    /// ```
43    ///
44    /// Find the longest prefix match:
45    ///
46    /// ```
47    /// use zerotrie::ZeroTrieSimpleAscii;
48    ///
49    /// // A trie with two values: "abc" and "abcdef"
50    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
51    ///
52    /// // Find the longest prefix of the string "abcdxy":
53    /// let query = b"abcdxy";
54    /// let mut longest_prefix = 0;
55    /// let mut cursor = trie.cursor();
56    /// for (i, b) in query.iter().enumerate() {
57    ///     // Checking is_empty() is not required, but it is
58    ///     // good for efficiency
59    ///     if cursor.is_empty() {
60    ///         break;
61    ///     }
62    ///     if cursor.take_value().is_some() {
63    ///         longest_prefix = i;
64    ///     }
65    ///     cursor.step(*b);
66    /// }
67    ///
68    /// // The longest prefix is "abc" which is length 3:
69    /// assert_eq!(longest_prefix, 3);
70    /// ```
71    #[inline]
72    pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
73        ZeroTrieSimpleAsciiCursor {
74            trie: self.as_borrowed_slice(),
75        }
76    }
77}
78
79impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
80where
81    Store: AsRef<[u8]> + ?Sized,
82{
83    /// Gets a cursor into the current trie.
84    ///
85    /// Useful to query a trie with data that is not a slice.
86    ///
87    /// This is currently supported only on [`ZeroTrieSimpleAscii`]
88    /// and [`ZeroAsciiIgnoreCaseTrie`].
89    ///
90    /// # Examples
91    ///
92    /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
93    ///
94    /// ```
95    /// use core::fmt::Write;
96    /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
97    ///
98    /// // A trie with two values: "aBc" and "aBcdEf"
99    /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
100    ///
101    /// // Get out the value for "abc" (case-insensitive!)
102    /// let mut cursor = trie.cursor();
103    /// write!(&mut cursor, "abc");
104    /// assert_eq!(cursor.take_value(), Some(0));
105    /// ```
106    ///
107    /// For more examples, see [`ZeroTrieSimpleAscii::cursor`].
108    #[inline]
109    pub fn cursor(&self) -> ZeroAsciiIgnoreCaseTrieCursor {
110        ZeroAsciiIgnoreCaseTrieCursor {
111            trie: self.as_borrowed_slice(),
112        }
113    }
114}
115
116impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
117    /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
118    /// having to doubly anchor the trie to the stack.
119    #[inline]
120    pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
121        ZeroTrieSimpleAsciiCursor { trie: self }
122    }
123}
124
125impl<'a> ZeroAsciiIgnoreCaseTrie<&'a [u8]> {
126    /// Same as [`ZeroAsciiIgnoreCaseTrie::cursor()`] but moves self to avoid
127    /// having to doubly anchor the trie to the stack.
128    #[inline]
129    pub fn into_cursor(self) -> ZeroAsciiIgnoreCaseTrieCursor<'a> {
130        ZeroAsciiIgnoreCaseTrieCursor { trie: self }
131    }
132}
133
134/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
135///
136/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
137// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
138#[derive(Debug, Clone)]
139pub struct ZeroTrieSimpleAsciiCursor<'a> {
140    trie: ZeroTrieSimpleAscii<&'a [u8]>,
141}
142
143/// A cursor into a [`ZeroAsciiIgnoreCaseTrie`], useful for stepwise lookup.
144///
145/// For examples, see [`ZeroAsciiIgnoreCaseTrie::cursor()`].
146// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
147#[derive(Debug, Clone)]
148pub struct ZeroAsciiIgnoreCaseTrieCursor<'a> {
149    trie: ZeroAsciiIgnoreCaseTrie<&'a [u8]>,
150}
151
152/// Information about a probed edge.
153#[derive(Debug, Copy, Clone, PartialEq, Eq)]
154#[non_exhaustive] // no need to destructure or construct this in userland
155pub struct AsciiProbeResult {
156    /// The character's byte value between this node and its parent.
157    pub byte: u8,
158    /// The number of siblings of this node, _including itself_.
159    pub total_siblings: u8,
160}
161
162impl ZeroTrieSimpleAsciiCursor<'_> {
163    /// Steps the cursor one character into the trie based on the character's byte value.
164    ///
165    /// # Examples
166    ///
167    /// Unrolled loop checking for string presence at every step:
168    ///
169    /// ```
170    /// use zerotrie::ZeroTrieSimpleAscii;
171    ///
172    /// // A trie with two values: "abc" and "abcdef"
173    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
174    ///
175    /// // Search the trie for the string "abcdxy"
176    /// let mut cursor = trie.cursor();
177    /// assert_eq!(cursor.take_value(), None); // ""
178    /// cursor.step(b'a');
179    /// assert_eq!(cursor.take_value(), None); // "a"
180    /// cursor.step(b'b');
181    /// assert_eq!(cursor.take_value(), None); // "ab"
182    /// cursor.step(b'c');
183    /// assert_eq!(cursor.take_value(), Some(0)); // "abc"
184    /// cursor.step(b'd');
185    /// assert_eq!(cursor.take_value(), None); // "abcd"
186    /// assert!(!cursor.is_empty());
187    /// cursor.step(b'x'); // no strings have the prefix "abcdx"
188    /// assert!(cursor.is_empty());
189    /// assert_eq!(cursor.take_value(), None); // "abcdx"
190    /// cursor.step(b'y');
191    /// assert_eq!(cursor.take_value(), None); // "abcdxy"
192    /// ```
193    ///
194    /// If the byte is not ASCII, the cursor will become empty:
195    ///
196    /// ```
197    /// use zerotrie::ZeroTrieSimpleAscii;
198    ///
199    /// // A trie with two values: "abc" and "abcdef"
200    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
201    ///
202    /// let mut cursor = trie.cursor();
203    /// assert_eq!(cursor.take_value(), None); // ""
204    /// cursor.step(b'a');
205    /// assert_eq!(cursor.take_value(), None); // "a"
206    /// cursor.step(b'b');
207    /// assert_eq!(cursor.take_value(), None); // "ab"
208    /// cursor.step(b'\xFF');
209    /// assert!(cursor.is_empty());
210    /// assert_eq!(cursor.take_value(), None);
211    /// ```
212    #[inline]
213    pub fn step(&mut self, byte: u8) {
214        reader::step_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, byte);
215    }
216
217    /// Takes the value at the current position.
218    ///
219    /// Calling this function on a new cursor is equivalent to calling `.get()`
220    /// with the empty string (except that it can only be called once).
221    ///
222    /// # Examples
223    ///
224    /// ```
225    /// use zerotrie::ZeroTrieSimpleAscii;
226    ///
227    /// // A trie with two values: "" and "abc"
228    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
229    ///
230    /// assert_eq!(Some(0), trie.get(""));
231    /// let mut cursor = trie.cursor();
232    /// assert_eq!(Some(0), cursor.take_value());
233    /// assert_eq!(None, cursor.take_value());
234    /// ```
235    #[inline]
236    pub fn take_value(&mut self) -> Option<usize> {
237        reader::take_value(&mut self.trie.store)
238    }
239
240    /// Steps the cursor one character into the trie based on an edge index,
241    /// returning the corresponding character as a byte.
242    ///
243    /// This function is similar to [`Self::step()`], but it takes an index instead of a char.
244    /// This enables stepwise iteration over the contents of the trie.
245    ///
246    /// If there are multiple possibilities for the next byte, the `index` argument allows
247    /// visiting them in order. Since this function steps the cursor, the cursor must be
248    /// cloned (a cheap operation) in order to visit multiple children.
249    ///
250    /// # Examples
251    ///
252    /// Continually query index 0 to extract the first item from a trie:
253    ///
254    /// ```
255    /// use zerotrie::ZeroTrieSimpleAscii;
256    ///
257    /// let data: &[(String, usize)] = &[
258    ///     ("ab".to_string(), 111),
259    ///     ("abcxyz".to_string(), 22),
260    ///     ("abde".to_string(), 333),
261    ///     ("afg".to_string(), 44),
262    /// ];
263    ///
264    /// let trie: ZeroTrieSimpleAscii<Vec<u8>> =
265    ///     data.iter().map(|(s, v)| (s.as_str(), *v)).collect();
266    ///
267    /// let mut cursor = trie.cursor();
268    /// let mut key = String::new();
269    /// let value = loop {
270    ///     if let Some(value) = cursor.take_value() {
271    ///         break value;
272    ///     }
273    ///     let probe_result = cursor.probe(0).unwrap();
274    ///     key.push(char::from(probe_result.byte));
275    /// };
276    ///
277    /// assert_eq!(key, "ab");
278    /// assert_eq!(value, 111);
279    /// ```
280    ///
281    /// Stepwise iterate over all entries in the trie:
282    ///
283    /// ```
284    /// # use zerotrie::ZeroTrieSimpleAscii;
285    /// # let data: &[(String, usize)] = &[
286    /// #     ("ab".to_string(), 111),
287    /// #     ("abcxyz".to_string(), 22),
288    /// #     ("abde".to_string(), 333),
289    /// #     ("afg".to_string(), 44)
290    /// # ];
291    /// # let trie: ZeroTrieSimpleAscii<Vec<u8>> = data
292    /// #     .iter()
293    /// #     .map(|(s, v)| (s.as_str(), *v))
294    /// #     .collect();
295    /// // (trie built as in previous example)
296    ///
297    /// // Initialize the iteration at the first child of the trie.
298    /// let mut stack = Vec::from([(trie.cursor(), 0, 0)]);
299    /// let mut key = Vec::new();
300    /// let mut results = Vec::new();
301    /// loop {
302    ///     let Some((mut cursor, index, suffix_len)) = stack.pop() else {
303    ///         // Nothing left in the trie.
304    ///         break;
305    ///     };
306    ///     // Check to see if there is a value at the current node.
307    ///     if let Some(value) = cursor.take_value() {
308    ///         results.push((String::from_utf8(key.clone()).unwrap(), value));
309    ///     }
310    ///     // Now check for children of the current node.
311    ///     let mut sub_cursor = cursor.clone();
312    ///     if let Some(probe_result) = sub_cursor.probe(index) {
313    ///         // Found a child. Add the current byte edge to the key.
314    ///         key.push(probe_result.byte);
315    ///         // Add the child to the stack, and also add back the current
316    ///         // node if there are more siblings to visit.
317    ///         if index + 1 < probe_result.total_siblings as usize {
318    ///             stack.push((cursor, index + 1, suffix_len));
319    ///             stack.push((sub_cursor, 0, 1));
320    ///         } else {
321    ///             stack.push((sub_cursor, 0, suffix_len + 1));
322    ///         }
323    ///     } else {
324    ///         // No more children. Pop this node's bytes from the key.
325    ///         for _ in 0..suffix_len {
326    ///             key.pop();
327    ///         }
328    ///     }
329    /// }
330    ///
331    /// assert_eq!(&results, data);
332    /// ```
333    pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
334        reader::probe_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, index)
335    }
336
337    /// Checks whether the cursor points to an empty trie.
338    ///
339    /// Use this to determine when to stop iterating.
340    #[inline]
341    pub fn is_empty(&self) -> bool {
342        self.trie.is_empty()
343    }
344}
345
346impl ZeroAsciiIgnoreCaseTrieCursor<'_> {
347    /// Steps the cursor one byte into the trie.
348    ///
349    /// Returns the byte if matched, which may be a different case than the input byte.
350    /// If this function returns `None`, any lookup loops can be terminated.
351    ///
352    /// # Examples
353    ///
354    /// Normalize the case of a value by stepping through an ignore-case trie:
355    ///
356    /// ```
357    /// use std::borrow::Cow;
358    /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
359    ///
360    /// // A trie with two values: "aBc" and "aBcdEf"
361    /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
362    ///
363    /// // Get out the value for "abc" and normalize the key string
364    /// let mut cursor = trie.cursor();
365    /// let mut key_str = Cow::Borrowed("abc".as_bytes());
366    /// let mut i = 0;
367    /// let value = loop {
368    ///     let Some(&input_byte) = key_str.get(i) else {
369    ///         break cursor.take_value();
370    ///     };
371    ///     let Some(matched_byte) = cursor.step(input_byte) else {
372    ///         break None;
373    ///     };
374    ///     if matched_byte != input_byte {
375    ///         key_str.to_mut()[i] = matched_byte;
376    ///     }
377    ///     i += 1;
378    /// };
379    ///
380    /// assert_eq!(value, Some(0));
381    /// assert_eq!(&*key_str, "aBc".as_bytes());
382    /// ```
383    ///
384    /// For more examples, see [`ZeroTrieSimpleAsciiCursor::step`].
385    #[inline]
386    pub fn step(&mut self, byte: u8) -> Option<u8> {
387        reader::step_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, byte)
388    }
389
390    /// Takes the value at the current position.
391    ///
392    /// For more details, see [`ZeroTrieSimpleAsciiCursor::take_value`].
393    #[inline]
394    pub fn take_value(&mut self) -> Option<usize> {
395        reader::take_value(&mut self.trie.store)
396    }
397
398    /// Probes the next byte in the cursor.
399    ///
400    /// For more details, see [`ZeroTrieSimpleAsciiCursor::probe`].
401    pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
402        reader::probe_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, index)
403    }
404
405    /// Checks whether the cursor points to an empty trie.
406    ///
407    /// For more details, see [`ZeroTrieSimpleAsciiCursor::is_empty`].
408    #[inline]
409    pub fn is_empty(&self) -> bool {
410        self.trie.is_empty()
411    }
412}
413
414impl fmt::Write for ZeroTrieSimpleAsciiCursor<'_> {
415    /// Steps the cursor through each ASCII byte of the string.
416    ///
417    /// If the string contains non-ASCII chars, an error is returned.
418    ///
419    /// # Examples
420    ///
421    /// ```
422    /// use core::fmt::Write;
423    /// use zerotrie::ZeroTrieSimpleAscii;
424    ///
425    /// // A trie with two values: "abc" and "abcdef"
426    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
427    ///
428    /// let mut cursor = trie.cursor();
429    /// cursor.write_str("abcdxy").expect("all ASCII");
430    /// cursor.write_str("🚂").expect_err("non-ASCII");
431    /// ```
432    fn write_str(&mut self, s: &str) -> fmt::Result {
433        for b in s.bytes() {
434            if !b.is_ascii() {
435                return Err(fmt::Error);
436            }
437            self.step(b);
438        }
439        Ok(())
440    }
441
442    /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
443    /// an error if the char is non-ASCII.
444    ///
445    /// # Examples
446    ///
447    /// ```
448    /// use core::fmt::Write;
449    /// use zerotrie::ZeroTrieSimpleAscii;
450    ///
451    /// // A trie with two values: "abc" and "abcdef"
452    /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
453    ///
454    /// let mut cursor = trie.cursor();
455    /// cursor.write_char('a').expect("ASCII");
456    /// cursor.write_char('x').expect("ASCII");
457    /// cursor.write_char('🚂').expect_err("non-ASCII");
458    /// ```
459    fn write_char(&mut self, c: char) -> fmt::Result {
460        if !c.is_ascii() {
461            return Err(fmt::Error);
462        }
463        self.step(c as u8);
464        Ok(())
465    }
466}
467
468impl fmt::Write for ZeroAsciiIgnoreCaseTrieCursor<'_> {
469    /// Steps the cursor through each ASCII byte of the string.
470    ///
471    /// If the string contains non-ASCII chars, an error is returned.
472    fn write_str(&mut self, s: &str) -> fmt::Result {
473        for b in s.bytes() {
474            if !b.is_ascii() {
475                return Err(fmt::Error);
476            }
477            self.step(b);
478        }
479        Ok(())
480    }
481
482    /// Equivalent to [`ZeroAsciiIgnoreCaseTrieCursor::step()`], except returns
483    /// an error if the char is non-ASCII.
484    fn write_char(&mut self, c: char) -> fmt::Result {
485        if !c.is_ascii() {
486            return Err(fmt::Error);
487        }
488        self.step(c as u8);
489        Ok(())
490    }
491}