widestring/utfstr/
iter.rs

1use crate::{
2    debug_fmt_char_iter, decode_utf16, decode_utf32,
3    iter::{DecodeUtf16, DecodeUtf32},
4};
5use core::{
6    borrow::Borrow,
7    iter::Peekable,
8    ops::{Index, Range},
9};
10#[allow(unused_imports)]
11use core::{
12    fmt::Write,
13    iter::{Copied, DoubleEndedIterator, ExactSizeIterator, FlatMap, FusedIterator},
14    slice::Iter,
15};
16
17/// An iterator over the [`char`]s of a UTF-16 string slice
18///
19/// This struct is created by the [`chars`][crate::Utf16Str::chars] method on
20/// [`Utf16Str`][crate::Utf16Str]. See its documentation for more.
21#[derive(Clone)]
22pub struct CharsUtf16<'a> {
23    iter: DecodeUtf16<Copied<Iter<'a, u16>>>,
24}
25
26impl<'a> CharsUtf16<'a> {
27    pub(super) fn new(s: &'a [u16]) -> Self {
28        Self {
29            iter: decode_utf16(s.iter().copied()),
30        }
31    }
32}
33
34impl Iterator for CharsUtf16<'_> {
35    type Item = char;
36
37    #[inline]
38    fn next(&mut self) -> Option<Self::Item> {
39        // Utf16Str already ensures valid surrogate pairs
40        self.iter.next().map(|r| r.unwrap())
41    }
42
43    #[inline]
44    fn size_hint(&self) -> (usize, Option<usize>) {
45        self.iter.size_hint()
46    }
47}
48
49impl FusedIterator for CharsUtf16<'_> {}
50
51impl DoubleEndedIterator for CharsUtf16<'_> {
52    #[inline]
53    fn next_back(&mut self) -> Option<Self::Item> {
54        self.iter.next_back().map(|r| r.unwrap())
55    }
56}
57
58impl core::fmt::Debug for CharsUtf16<'_> {
59    #[inline]
60    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61        debug_fmt_char_iter(self.clone(), f)
62    }
63}
64
65impl core::fmt::Display for CharsUtf16<'_> {
66    #[inline]
67    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
68        self.clone().try_for_each(|c| f.write_char(c))
69    }
70}
71
72/// An iterator over the [`char`]s of a UTF-32 string slice
73///
74/// This struct is created by the [`chars`][crate::Utf32Str::chars] method on
75/// [`Utf32Str`][crate::Utf32Str]. See its documentation for more.
76#[derive(Clone)]
77pub struct CharsUtf32<'a> {
78    iter: DecodeUtf32<Copied<Iter<'a, u32>>>,
79}
80
81impl<'a> CharsUtf32<'a> {
82    pub(super) fn new(s: &'a [u32]) -> Self {
83        Self {
84            iter: decode_utf32(s.iter().copied()),
85        }
86    }
87}
88
89impl Iterator for CharsUtf32<'_> {
90    type Item = char;
91
92    #[inline]
93    fn next(&mut self) -> Option<Self::Item> {
94        // Utf32Str already ensures valid code points
95        self.iter.next().map(|r| r.unwrap())
96    }
97
98    #[inline]
99    fn size_hint(&self) -> (usize, Option<usize>) {
100        self.iter.size_hint()
101    }
102}
103
104impl DoubleEndedIterator for CharsUtf32<'_> {
105    #[inline]
106    fn next_back(&mut self) -> Option<Self::Item> {
107        // Utf32Str already ensures valid code points
108        self.iter.next_back().map(|r| r.unwrap())
109    }
110}
111
112impl FusedIterator for CharsUtf32<'_> {}
113
114impl ExactSizeIterator for CharsUtf32<'_> {
115    #[inline]
116    fn len(&self) -> usize {
117        self.iter.len()
118    }
119}
120
121impl core::fmt::Debug for CharsUtf32<'_> {
122    #[inline]
123    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
124        debug_fmt_char_iter(self.clone(), f)
125    }
126}
127
128impl core::fmt::Display for CharsUtf32<'_> {
129    #[inline]
130    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
131        self.clone().try_for_each(|c| f.write_char(c))
132    }
133}
134
135/// An iterator over the [`char`]s of a string slice, and their positions
136///
137/// This struct is created by the [`char_indices`][crate::Utf16Str::char_indices] method on
138/// [`Utf16Str`][crate::Utf16Str]. See its documentation for more.
139#[derive(Debug, Clone)]
140pub struct CharIndicesUtf16<'a> {
141    forward_offset: usize,
142    back_offset: usize,
143    iter: CharsUtf16<'a>,
144}
145
146impl CharIndicesUtf16<'_> {
147    /// Returns the position of the next character, or the length of the underlying string if
148    /// there are no more characters.
149    #[inline]
150    pub fn offset(&self) -> usize {
151        self.forward_offset
152    }
153}
154
155impl<'a> CharIndicesUtf16<'a> {
156    pub(super) fn new(s: &'a [u16]) -> Self {
157        Self {
158            forward_offset: 0,
159            back_offset: s.len(),
160            iter: CharsUtf16::new(s),
161        }
162    }
163}
164
165impl Iterator for CharIndicesUtf16<'_> {
166    type Item = (usize, char);
167
168    #[inline]
169    fn next(&mut self) -> Option<Self::Item> {
170        let result = self.iter.next();
171        if let Some(c) = result {
172            let offset = self.forward_offset;
173            self.forward_offset += c.len_utf16();
174            Some((offset, c))
175        } else {
176            None
177        }
178    }
179
180    #[inline]
181    fn size_hint(&self) -> (usize, Option<usize>) {
182        self.iter.size_hint()
183    }
184}
185
186impl FusedIterator for CharIndicesUtf16<'_> {}
187
188impl DoubleEndedIterator for CharIndicesUtf16<'_> {
189    #[inline]
190    fn next_back(&mut self) -> Option<Self::Item> {
191        let result = self.iter.next_back();
192        if let Some(c) = result {
193            self.back_offset -= c.len_utf16();
194            Some((self.back_offset, c))
195        } else {
196            None
197        }
198    }
199}
200
201/// An iterator over the [`char`]s of a string slice, and their positions
202///
203/// This struct is created by the [`char_indices`][crate::Utf32Str::char_indices] method on
204/// [`Utf32Str`][crate::Utf32Str]. See its documentation for more.
205#[derive(Debug, Clone)]
206pub struct CharIndicesUtf32<'a> {
207    forward_offset: usize,
208    back_offset: usize,
209    iter: CharsUtf32<'a>,
210}
211
212impl CharIndicesUtf32<'_> {
213    /// Returns the position of the next character, or the length of the underlying string if
214    /// there are no more characters.
215    #[inline]
216    pub fn offset(&self) -> usize {
217        self.forward_offset
218    }
219}
220
221impl<'a> CharIndicesUtf32<'a> {
222    pub(super) fn new(s: &'a [u32]) -> Self {
223        Self {
224            forward_offset: 0,
225            back_offset: s.len(),
226            iter: CharsUtf32::new(s),
227        }
228    }
229}
230
231impl Iterator for CharIndicesUtf32<'_> {
232    type Item = (usize, char);
233
234    #[inline]
235    fn next(&mut self) -> Option<Self::Item> {
236        let result = self.iter.next();
237        if let Some(c) = result {
238            let offset = self.forward_offset;
239            self.forward_offset += 1;
240            Some((offset, c))
241        } else {
242            None
243        }
244    }
245
246    #[inline]
247    fn size_hint(&self) -> (usize, Option<usize>) {
248        self.iter.size_hint()
249    }
250}
251
252impl FusedIterator for CharIndicesUtf32<'_> {}
253
254impl DoubleEndedIterator for CharIndicesUtf32<'_> {
255    #[inline]
256    fn next_back(&mut self) -> Option<Self::Item> {
257        let result = self.iter.next_back();
258        if let Some(c) = result {
259            self.back_offset -= 1;
260            Some((self.back_offset, c))
261        } else {
262            None
263        }
264    }
265}
266
267impl ExactSizeIterator for CharIndicesUtf32<'_> {
268    #[inline]
269    fn len(&self) -> usize {
270        self.iter.len()
271    }
272}
273
274/// The return type of [`Utf16Str::escape_debug`][crate::Utf16Str::escape_debug].
275#[derive(Debug, Clone)]
276pub struct EscapeDebug<I> {
277    iter: FlatMap<I, core::char::EscapeDebug, fn(char) -> core::char::EscapeDebug>,
278}
279
280impl<'a> EscapeDebug<CharsUtf16<'a>> {
281    pub(super) fn new(s: &'a [u16]) -> Self {
282        Self {
283            iter: CharsUtf16::new(s).flat_map(|c| c.escape_debug()),
284        }
285    }
286}
287
288impl<'a> EscapeDebug<CharsUtf32<'a>> {
289    pub(super) fn new(s: &'a [u32]) -> Self {
290        Self {
291            iter: CharsUtf32::new(s).flat_map(|c| c.escape_debug()),
292        }
293    }
294}
295
296/// The return type of [`Utf16Str::escape_default`][crate::Utf16Str::escape_default].
297#[derive(Debug, Clone)]
298pub struct EscapeDefault<I> {
299    iter: FlatMap<I, core::char::EscapeDefault, fn(char) -> core::char::EscapeDefault>,
300}
301
302impl<'a> EscapeDefault<CharsUtf16<'a>> {
303    pub(super) fn new(s: &'a [u16]) -> Self {
304        Self {
305            iter: CharsUtf16::new(s).flat_map(|c| c.escape_default()),
306        }
307    }
308}
309
310impl<'a> EscapeDefault<CharsUtf32<'a>> {
311    pub(super) fn new(s: &'a [u32]) -> Self {
312        Self {
313            iter: CharsUtf32::new(s).flat_map(|c| c.escape_default()),
314        }
315    }
316}
317
318/// The return type of [`Utf16Str::escape_unicode`][crate::Utf16Str::escape_unicode].
319#[derive(Debug, Clone)]
320pub struct EscapeUnicode<I> {
321    iter: FlatMap<I, core::char::EscapeUnicode, fn(char) -> core::char::EscapeUnicode>,
322}
323
324impl<'a> EscapeUnicode<CharsUtf16<'a>> {
325    pub(super) fn new(s: &'a [u16]) -> Self {
326        Self {
327            iter: CharsUtf16::new(s).flat_map(|c| c.escape_unicode()),
328        }
329    }
330}
331
332impl<'a> EscapeUnicode<CharsUtf32<'a>> {
333    pub(super) fn new(s: &'a [u32]) -> Self {
334        Self {
335            iter: CharsUtf32::new(s).flat_map(|c| c.escape_unicode()),
336        }
337    }
338}
339
340macro_rules! escape_impls {
341    ($($name:ident),+) => {$(
342        impl<I> core::fmt::Display for $name<I> where I: Iterator<Item = char> + Clone {
343            #[inline]
344            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
345                self.clone().try_for_each(|c| f.write_char(c))
346            }
347        }
348
349        impl< I> Iterator for $name<I> where I: Iterator<Item = char> {
350            type Item = char;
351
352            #[inline]
353            fn next(&mut self) -> Option<Self::Item> {
354                self.iter.next()
355            }
356
357            #[inline]
358            fn size_hint(&self) -> (usize, Option<usize>) {
359                let (lower, upper) = self.iter.size_hint();
360                // Worst case, every char has to be unicode escaped as \u{NNNNNN}
361                (lower, upper.and_then(|len| len.checked_mul(10)))
362            }
363        }
364
365        impl<I> FusedIterator for $name<I> where I: Iterator<Item = char> + FusedIterator {}
366    )+}
367}
368
369escape_impls!(EscapeDebug, EscapeDefault, EscapeUnicode);
370
371/// An iterator over the [`u16`] code units of a UTF-16 string slice
372///
373/// This struct is created by the [`code_units`][crate::Utf16Str::code_units] method on
374/// [`Utf16Str`][crate::Utf16Str]. See its documentation for more.
375#[derive(Debug, Clone)]
376pub struct CodeUnits<'a> {
377    iter: Copied<Iter<'a, u16>>,
378}
379
380impl<'a> CodeUnits<'a> {
381    pub(super) fn new(s: &'a [u16]) -> Self {
382        Self {
383            iter: s.iter().copied(),
384        }
385    }
386}
387
388impl Iterator for CodeUnits<'_> {
389    type Item = u16;
390
391    #[inline]
392    fn next(&mut self) -> Option<Self::Item> {
393        self.iter.next()
394    }
395
396    #[inline]
397    fn size_hint(&self) -> (usize, Option<usize>) {
398        self.iter.size_hint()
399    }
400}
401
402impl FusedIterator for CodeUnits<'_> {}
403
404impl DoubleEndedIterator for CodeUnits<'_> {
405    #[inline]
406    fn next_back(&mut self) -> Option<Self::Item> {
407        self.iter.next_back()
408    }
409}
410
411impl ExactSizeIterator for CodeUnits<'_> {
412    #[inline]
413    fn len(&self) -> usize {
414        self.iter.len()
415    }
416}
417
418/// An iterator over the lines of a [`crate::Utf16Str`], [`crate::Utf32Str`], or other wide string
419/// that has the char_indices method. Returns string slices.
420///
421/// This struct is created with one of:
422/// 1. The [`lines`][crate::Utf16Str::lines] method on [`crate::Utf16Str`]
423/// 2. The [`lines`][crate::Utf32Str::lines] method on [`crate::Utf32Str`]
424/// 3. etc.
425///
426/// See their documentation for more.
427#[derive(Debug, Clone)]
428pub struct Lines<'a, Str, CharIndices>
429where
430    Str: Borrow<Str> + Index<Range<usize>, Output = Str> + ?Sized,
431    CharIndices: IntoIterator<Item = (usize, char)>,
432{
433    str: &'a Str,
434    str_len: usize,
435    char_indices: Peekable<CharIndices::IntoIter>,
436}
437
438impl<'a, Str, CharIndices> Lines<'a, Str, CharIndices>
439where
440    Str: Borrow<Str> + Index<Range<usize>, Output = Str> + ?Sized,
441    CharIndices: IntoIterator<Item = (usize, char)>,
442{
443    pub(crate) fn new(str: &'a Str, str_len: usize, char_indices: CharIndices) -> Self {
444        Self {
445            str,
446            str_len,
447            char_indices: char_indices.into_iter().peekable(),
448        }
449    }
450}
451
452impl<'a, Str, CharIndices> Iterator for Lines<'a, Str, CharIndices>
453where
454    Str: Borrow<Str> + Index<Range<usize>, Output = Str> + ?Sized,
455    CharIndices: IntoIterator<Item = (usize, char)>,
456{
457    type Item = &'a Str;
458
459    fn next(&mut self) -> Option<Self::Item> {
460        let mut current_char_index = self.char_indices.next()?;
461
462        let line_start = current_char_index.0;
463        let mut line_end = current_char_index.0;
464        let mut previous_was_carriage_return;
465
466        loop {
467            if current_char_index.1 == '\n' {
468                break;
469            }
470
471            if current_char_index.1 == '\r' {
472                line_end = current_char_index.0;
473                previous_was_carriage_return = true;
474            } else {
475                line_end = self
476                    .char_indices
477                    .peek()
478                    .map(|ch_index| ch_index.0)
479                    .unwrap_or(self.str_len);
480                previous_was_carriage_return = false;
481            }
482
483            if let Some(current) = self.char_indices.next() {
484                current_char_index = current;
485            } else {
486                line_end = if previous_was_carriage_return {
487                    self.str_len
488                } else {
489                    line_end
490                };
491                break;
492            }
493        }
494
495        Some(&self.str[line_start..line_end])
496    }
497}
498
499// Since CharIndicesUtf16 is a FusedIterator, so is Lines
500impl<Str, CharIndices> FusedIterator for Lines<'_, Str, CharIndices>
501where
502    Str: Borrow<Str> + Index<Range<usize>, Output = Str>,
503    CharIndices: IntoIterator<Item = (usize, char)>,
504{
505}