zerotrie/cursor.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Types for walking stepwise through a trie.
6//!
7//! For examples, see the `.cursor()` functions
8//! and the `Cursor` types in this module.
9
10use crate::reader;
11use crate::ZeroAsciiIgnoreCaseTrie;
12use crate::ZeroTrieSimpleAscii;
13
14use core::fmt;
15
16impl<Store> ZeroTrieSimpleAscii<Store>
17where
18 Store: AsRef<[u8]> + ?Sized,
19{
20 /// Gets a cursor into the current trie.
21 ///
22 /// Useful to query a trie with data that is not a slice.
23 ///
24 /// This is currently supported only on [`ZeroTrieSimpleAscii`]
25 /// and [`ZeroAsciiIgnoreCaseTrie`].
26 ///
27 /// # Examples
28 ///
29 /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
30 ///
31 /// ```
32 /// use core::fmt::Write;
33 /// use zerotrie::ZeroTrieSimpleAscii;
34 ///
35 /// // A trie with two values: "abc" and "abcdef"
36 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
37 ///
38 /// // Get out the value for "abc"
39 /// let mut cursor = trie.cursor();
40 /// write!(&mut cursor, "abc");
41 /// assert_eq!(cursor.take_value(), Some(0));
42 /// ```
43 ///
44 /// Find the longest prefix match:
45 ///
46 /// ```
47 /// use zerotrie::ZeroTrieSimpleAscii;
48 ///
49 /// // A trie with two values: "abc" and "abcdef"
50 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
51 ///
52 /// // Find the longest prefix of the string "abcdxy":
53 /// let query = b"abcdxy";
54 /// let mut longest_prefix = 0;
55 /// let mut cursor = trie.cursor();
56 /// for (i, b) in query.iter().enumerate() {
57 /// // Checking is_empty() is not required, but it is
58 /// // good for efficiency
59 /// if cursor.is_empty() {
60 /// break;
61 /// }
62 /// if cursor.take_value().is_some() {
63 /// longest_prefix = i;
64 /// }
65 /// cursor.step(*b);
66 /// }
67 ///
68 /// // The longest prefix is "abc" which is length 3:
69 /// assert_eq!(longest_prefix, 3);
70 /// ```
71 #[inline]
72 pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
73 ZeroTrieSimpleAsciiCursor {
74 trie: self.as_borrowed_slice(),
75 }
76 }
77}
78
79impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
80where
81 Store: AsRef<[u8]> + ?Sized,
82{
83 /// Gets a cursor into the current trie.
84 ///
85 /// Useful to query a trie with data that is not a slice.
86 ///
87 /// This is currently supported only on [`ZeroTrieSimpleAscii`]
88 /// and [`ZeroAsciiIgnoreCaseTrie`].
89 ///
90 /// # Examples
91 ///
92 /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
93 ///
94 /// ```
95 /// use core::fmt::Write;
96 /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
97 ///
98 /// // A trie with two values: "aBc" and "aBcdEf"
99 /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
100 ///
101 /// // Get out the value for "abc" (case-insensitive!)
102 /// let mut cursor = trie.cursor();
103 /// write!(&mut cursor, "abc");
104 /// assert_eq!(cursor.take_value(), Some(0));
105 /// ```
106 ///
107 /// For more examples, see [`ZeroTrieSimpleAscii::cursor`].
108 #[inline]
109 pub fn cursor(&self) -> ZeroAsciiIgnoreCaseTrieCursor {
110 ZeroAsciiIgnoreCaseTrieCursor {
111 trie: self.as_borrowed_slice(),
112 }
113 }
114}
115
116impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
117 /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
118 /// having to doubly anchor the trie to the stack.
119 #[inline]
120 pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
121 ZeroTrieSimpleAsciiCursor { trie: self }
122 }
123}
124
125impl<'a> ZeroAsciiIgnoreCaseTrie<&'a [u8]> {
126 /// Same as [`ZeroAsciiIgnoreCaseTrie::cursor()`] but moves self to avoid
127 /// having to doubly anchor the trie to the stack.
128 #[inline]
129 pub fn into_cursor(self) -> ZeroAsciiIgnoreCaseTrieCursor<'a> {
130 ZeroAsciiIgnoreCaseTrieCursor { trie: self }
131 }
132}
133
134/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
135///
136/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
137// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
138#[derive(Debug, Clone)]
139pub struct ZeroTrieSimpleAsciiCursor<'a> {
140 trie: ZeroTrieSimpleAscii<&'a [u8]>,
141}
142
143/// A cursor into a [`ZeroAsciiIgnoreCaseTrie`], useful for stepwise lookup.
144///
145/// For examples, see [`ZeroAsciiIgnoreCaseTrie::cursor()`].
146// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
147#[derive(Debug, Clone)]
148pub struct ZeroAsciiIgnoreCaseTrieCursor<'a> {
149 trie: ZeroAsciiIgnoreCaseTrie<&'a [u8]>,
150}
151
152/// Information about a probed edge.
153#[derive(Debug, Copy, Clone, PartialEq, Eq)]
154#[non_exhaustive] // no need to destructure or construct this in userland
155pub struct AsciiProbeResult {
156 /// The character's byte value between this node and its parent.
157 pub byte: u8,
158 /// The number of siblings of this node, _including itself_.
159 pub total_siblings: u8,
160}
161
162impl ZeroTrieSimpleAsciiCursor<'_> {
163 /// Steps the cursor one character into the trie based on the character's byte value.
164 ///
165 /// # Examples
166 ///
167 /// Unrolled loop checking for string presence at every step:
168 ///
169 /// ```
170 /// use zerotrie::ZeroTrieSimpleAscii;
171 ///
172 /// // A trie with two values: "abc" and "abcdef"
173 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
174 ///
175 /// // Search the trie for the string "abcdxy"
176 /// let mut cursor = trie.cursor();
177 /// assert_eq!(cursor.take_value(), None); // ""
178 /// cursor.step(b'a');
179 /// assert_eq!(cursor.take_value(), None); // "a"
180 /// cursor.step(b'b');
181 /// assert_eq!(cursor.take_value(), None); // "ab"
182 /// cursor.step(b'c');
183 /// assert_eq!(cursor.take_value(), Some(0)); // "abc"
184 /// cursor.step(b'd');
185 /// assert_eq!(cursor.take_value(), None); // "abcd"
186 /// assert!(!cursor.is_empty());
187 /// cursor.step(b'x'); // no strings have the prefix "abcdx"
188 /// assert!(cursor.is_empty());
189 /// assert_eq!(cursor.take_value(), None); // "abcdx"
190 /// cursor.step(b'y');
191 /// assert_eq!(cursor.take_value(), None); // "abcdxy"
192 /// ```
193 ///
194 /// If the byte is not ASCII, the cursor will become empty:
195 ///
196 /// ```
197 /// use zerotrie::ZeroTrieSimpleAscii;
198 ///
199 /// // A trie with two values: "abc" and "abcdef"
200 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
201 ///
202 /// let mut cursor = trie.cursor();
203 /// assert_eq!(cursor.take_value(), None); // ""
204 /// cursor.step(b'a');
205 /// assert_eq!(cursor.take_value(), None); // "a"
206 /// cursor.step(b'b');
207 /// assert_eq!(cursor.take_value(), None); // "ab"
208 /// cursor.step(b'\xFF');
209 /// assert!(cursor.is_empty());
210 /// assert_eq!(cursor.take_value(), None);
211 /// ```
212 #[inline]
213 pub fn step(&mut self, byte: u8) {
214 reader::step_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, byte);
215 }
216
217 /// Takes the value at the current position.
218 ///
219 /// Calling this function on a new cursor is equivalent to calling `.get()`
220 /// with the empty string (except that it can only be called once).
221 ///
222 /// # Examples
223 ///
224 /// ```
225 /// use zerotrie::ZeroTrieSimpleAscii;
226 ///
227 /// // A trie with two values: "" and "abc"
228 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
229 ///
230 /// assert_eq!(Some(0), trie.get(""));
231 /// let mut cursor = trie.cursor();
232 /// assert_eq!(Some(0), cursor.take_value());
233 /// assert_eq!(None, cursor.take_value());
234 /// ```
235 #[inline]
236 pub fn take_value(&mut self) -> Option<usize> {
237 reader::take_value(&mut self.trie.store)
238 }
239
240 /// Steps the cursor one character into the trie based on an edge index,
241 /// returning the corresponding character as a byte.
242 ///
243 /// This function is similar to [`Self::step()`], but it takes an index instead of a char.
244 /// This enables stepwise iteration over the contents of the trie.
245 ///
246 /// If there are multiple possibilities for the next byte, the `index` argument allows
247 /// visiting them in order. Since this function steps the cursor, the cursor must be
248 /// cloned (a cheap operation) in order to visit multiple children.
249 ///
250 /// # Examples
251 ///
252 /// Continually query index 0 to extract the first item from a trie:
253 ///
254 /// ```
255 /// use zerotrie::ZeroTrieSimpleAscii;
256 ///
257 /// let data: &[(String, usize)] = &[
258 /// ("ab".to_string(), 111),
259 /// ("abcxyz".to_string(), 22),
260 /// ("abde".to_string(), 333),
261 /// ("afg".to_string(), 44),
262 /// ];
263 ///
264 /// let trie: ZeroTrieSimpleAscii<Vec<u8>> =
265 /// data.iter().map(|(s, v)| (s.as_str(), *v)).collect();
266 ///
267 /// let mut cursor = trie.cursor();
268 /// let mut key = String::new();
269 /// let value = loop {
270 /// if let Some(value) = cursor.take_value() {
271 /// break value;
272 /// }
273 /// let probe_result = cursor.probe(0).unwrap();
274 /// key.push(char::from(probe_result.byte));
275 /// };
276 ///
277 /// assert_eq!(key, "ab");
278 /// assert_eq!(value, 111);
279 /// ```
280 ///
281 /// Stepwise iterate over all entries in the trie:
282 ///
283 /// ```
284 /// # use zerotrie::ZeroTrieSimpleAscii;
285 /// # let data: &[(String, usize)] = &[
286 /// # ("ab".to_string(), 111),
287 /// # ("abcxyz".to_string(), 22),
288 /// # ("abde".to_string(), 333),
289 /// # ("afg".to_string(), 44)
290 /// # ];
291 /// # let trie: ZeroTrieSimpleAscii<Vec<u8>> = data
292 /// # .iter()
293 /// # .map(|(s, v)| (s.as_str(), *v))
294 /// # .collect();
295 /// // (trie built as in previous example)
296 ///
297 /// // Initialize the iteration at the first child of the trie.
298 /// let mut stack = Vec::from([(trie.cursor(), 0, 0)]);
299 /// let mut key = Vec::new();
300 /// let mut results = Vec::new();
301 /// loop {
302 /// let Some((mut cursor, index, suffix_len)) = stack.pop() else {
303 /// // Nothing left in the trie.
304 /// break;
305 /// };
306 /// // Check to see if there is a value at the current node.
307 /// if let Some(value) = cursor.take_value() {
308 /// results.push((String::from_utf8(key.clone()).unwrap(), value));
309 /// }
310 /// // Now check for children of the current node.
311 /// let mut sub_cursor = cursor.clone();
312 /// if let Some(probe_result) = sub_cursor.probe(index) {
313 /// // Found a child. Add the current byte edge to the key.
314 /// key.push(probe_result.byte);
315 /// // Add the child to the stack, and also add back the current
316 /// // node if there are more siblings to visit.
317 /// if index + 1 < probe_result.total_siblings as usize {
318 /// stack.push((cursor, index + 1, suffix_len));
319 /// stack.push((sub_cursor, 0, 1));
320 /// } else {
321 /// stack.push((sub_cursor, 0, suffix_len + 1));
322 /// }
323 /// } else {
324 /// // No more children. Pop this node's bytes from the key.
325 /// for _ in 0..suffix_len {
326 /// key.pop();
327 /// }
328 /// }
329 /// }
330 ///
331 /// assert_eq!(&results, data);
332 /// ```
333 pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
334 reader::probe_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, index)
335 }
336
337 /// Checks whether the cursor points to an empty trie.
338 ///
339 /// Use this to determine when to stop iterating.
340 #[inline]
341 pub fn is_empty(&self) -> bool {
342 self.trie.is_empty()
343 }
344}
345
346impl ZeroAsciiIgnoreCaseTrieCursor<'_> {
347 /// Steps the cursor one byte into the trie.
348 ///
349 /// Returns the byte if matched, which may be a different case than the input byte.
350 /// If this function returns `None`, any lookup loops can be terminated.
351 ///
352 /// # Examples
353 ///
354 /// Normalize the case of a value by stepping through an ignore-case trie:
355 ///
356 /// ```
357 /// use std::borrow::Cow;
358 /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
359 ///
360 /// // A trie with two values: "aBc" and "aBcdEf"
361 /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
362 ///
363 /// // Get out the value for "abc" and normalize the key string
364 /// let mut cursor = trie.cursor();
365 /// let mut key_str = Cow::Borrowed("abc".as_bytes());
366 /// let mut i = 0;
367 /// let value = loop {
368 /// let Some(&input_byte) = key_str.get(i) else {
369 /// break cursor.take_value();
370 /// };
371 /// let Some(matched_byte) = cursor.step(input_byte) else {
372 /// break None;
373 /// };
374 /// if matched_byte != input_byte {
375 /// key_str.to_mut()[i] = matched_byte;
376 /// }
377 /// i += 1;
378 /// };
379 ///
380 /// assert_eq!(value, Some(0));
381 /// assert_eq!(&*key_str, "aBc".as_bytes());
382 /// ```
383 ///
384 /// For more examples, see [`ZeroTrieSimpleAsciiCursor::step`].
385 #[inline]
386 pub fn step(&mut self, byte: u8) -> Option<u8> {
387 reader::step_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, byte)
388 }
389
390 /// Takes the value at the current position.
391 ///
392 /// For more details, see [`ZeroTrieSimpleAsciiCursor::take_value`].
393 #[inline]
394 pub fn take_value(&mut self) -> Option<usize> {
395 reader::take_value(&mut self.trie.store)
396 }
397
398 /// Probes the next byte in the cursor.
399 ///
400 /// For more details, see [`ZeroTrieSimpleAsciiCursor::probe`].
401 pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
402 reader::probe_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, index)
403 }
404
405 /// Checks whether the cursor points to an empty trie.
406 ///
407 /// For more details, see [`ZeroTrieSimpleAsciiCursor::is_empty`].
408 #[inline]
409 pub fn is_empty(&self) -> bool {
410 self.trie.is_empty()
411 }
412}
413
414impl fmt::Write for ZeroTrieSimpleAsciiCursor<'_> {
415 /// Steps the cursor through each ASCII byte of the string.
416 ///
417 /// If the string contains non-ASCII chars, an error is returned.
418 ///
419 /// # Examples
420 ///
421 /// ```
422 /// use core::fmt::Write;
423 /// use zerotrie::ZeroTrieSimpleAscii;
424 ///
425 /// // A trie with two values: "abc" and "abcdef"
426 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
427 ///
428 /// let mut cursor = trie.cursor();
429 /// cursor.write_str("abcdxy").expect("all ASCII");
430 /// cursor.write_str("🚂").expect_err("non-ASCII");
431 /// ```
432 fn write_str(&mut self, s: &str) -> fmt::Result {
433 for b in s.bytes() {
434 if !b.is_ascii() {
435 return Err(fmt::Error);
436 }
437 self.step(b);
438 }
439 Ok(())
440 }
441
442 /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
443 /// an error if the char is non-ASCII.
444 ///
445 /// # Examples
446 ///
447 /// ```
448 /// use core::fmt::Write;
449 /// use zerotrie::ZeroTrieSimpleAscii;
450 ///
451 /// // A trie with two values: "abc" and "abcdef"
452 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
453 ///
454 /// let mut cursor = trie.cursor();
455 /// cursor.write_char('a').expect("ASCII");
456 /// cursor.write_char('x').expect("ASCII");
457 /// cursor.write_char('🚂').expect_err("non-ASCII");
458 /// ```
459 fn write_char(&mut self, c: char) -> fmt::Result {
460 if !c.is_ascii() {
461 return Err(fmt::Error);
462 }
463 self.step(c as u8);
464 Ok(())
465 }
466}
467
468impl fmt::Write for ZeroAsciiIgnoreCaseTrieCursor<'_> {
469 /// Steps the cursor through each ASCII byte of the string.
470 ///
471 /// If the string contains non-ASCII chars, an error is returned.
472 fn write_str(&mut self, s: &str) -> fmt::Result {
473 for b in s.bytes() {
474 if !b.is_ascii() {
475 return Err(fmt::Error);
476 }
477 self.step(b);
478 }
479 Ok(())
480 }
481
482 /// Equivalent to [`ZeroAsciiIgnoreCaseTrieCursor::step()`], except returns
483 /// an error if the char is non-ASCII.
484 fn write_char(&mut self, c: char) -> fmt::Result {
485 if !c.is_ascii() {
486 return Err(fmt::Error);
487 }
488 self.step(c as u8);
489 Ok(())
490 }
491}