icu_collections/
iterator_utils.rs
1use crate::codepointtrie::CodePointMapRange;
6
7pub(crate) struct RangeListIteratorCoalescer<I, T> {
10 iter: I,
11 peek: Option<CodePointMapRange<T>>,
12}
13
14impl<I, T: Eq> RangeListIteratorCoalescer<I, T>
15where
16 I: Iterator<Item = CodePointMapRange<T>>,
17{
18 pub fn new(iter: I) -> Self {
19 Self { iter, peek: None }
20 }
21}
22
23impl<I, T: Eq> Iterator for RangeListIteratorCoalescer<I, T>
24where
25 I: Iterator<Item = CodePointMapRange<T>>,
26{
27 type Item = CodePointMapRange<T>;
28
29 fn next(&mut self) -> Option<Self::Item> {
30 let mut ret = if let Some(peek) = self.peek.take() {
33 peek
34 } else if let Some(next) = self.iter.next() {
35 next
36 } else {
37 return None;
39 };
40
41 #[allow(clippy::while_let_on_iterator)]
43 while let Some(next) = self.iter.next() {
45 if *next.range.start() == ret.range.end() + 1 && next.value == ret.value {
46 ret.range = *ret.range.start()..=*next.range.end();
48 } else {
49 self.peek = Some(next);
52 return Some(ret);
53 }
54 }
55
56 Some(ret)
58 }
59}
60
61#[cfg(test)]
62mod tests {
63 use core::fmt::Debug;
64 use icu::collections::codepointinvlist::CodePointInversionListBuilder;
65 use icu::properties::props::{BinaryProperty, EnumeratedProperty};
66 use icu::properties::{CodePointMapData, CodePointSetData};
67
68 fn test_set<P: BinaryProperty>(name: &str) {
69 let mut builder = CodePointInversionListBuilder::new();
70 let mut builder_complement = CodePointInversionListBuilder::new();
71
72 for range in CodePointSetData::new::<P>().iter_ranges() {
73 builder.add_range32(range)
74 }
75
76 for range in CodePointSetData::new::<P>().iter_ranges_complemented() {
77 builder_complement.add_range32(range)
78 }
79
80 builder.complement();
81 let set1 = builder.build();
82 let set2 = builder_complement.build();
83 assert_eq!(set1, set2, "Set {name} failed to complement correctly");
84 }
85
86 fn test_map<T: EnumeratedProperty + Debug>(value: T, name: &str) {
87 let mut builder = CodePointInversionListBuilder::new();
88 let mut builder_complement = CodePointInversionListBuilder::new();
89
90 for range in CodePointMapData::<T>::new().iter_ranges_for_value(value) {
91 builder.add_range32(range)
92 }
93
94 for range in CodePointMapData::<T>::new().iter_ranges_for_value_complemented(value) {
95 builder_complement.add_range32(range)
96 }
97
98 builder.complement();
99 let set1 = builder.build();
100 let set2 = builder_complement.build();
101 assert_eq!(
102 set1, set2,
103 "Map {name} failed to complement correctly with value {value:?}"
104 );
105 }
106
107 #[test]
108 fn test_complement_sets() {
109 use icu::properties::props::*;
110 test_set::<AsciiHexDigit>("ASCII_Hex_Digit");
113 test_set::<Alnum>("Alnum");
114 test_set::<Alphabetic>("Alphabetic");
115 test_set::<BidiControl>("Bidi_Control");
116 test_set::<BidiMirrored>("Bidi_Mirrored");
117 test_set::<Blank>("Blank");
118 test_set::<Cased>("Cased");
119 test_set::<CaseIgnorable>("Case_Ignorable");
120 test_set::<FullCompositionExclusion>("Full_Composition_Exclusion");
121 test_set::<ChangesWhenCasefolded>("Changes_When_Casefolded");
122 test_set::<ChangesWhenCasemapped>("Changes_When_Casemapped");
123 test_set::<ChangesWhenNfkcCasefolded>("Changes_When_NFKC_Casefolded");
124 test_set::<ChangesWhenLowercased>("Changes_When_Lowercased");
125 test_set::<ChangesWhenTitlecased>("Changes_When_Titlecased");
126 test_set::<ChangesWhenUppercased>("Changes_When_Uppercased");
127 test_set::<Dash>("Dash");
128 test_set::<Deprecated>("Deprecated");
129 test_set::<DefaultIgnorableCodePoint>("Default_Ignorable_Code_Point");
130 test_set::<Diacritic>("Diacritic");
131 test_set::<EmojiModifierBase>("Emoji_Modifier_Base");
132 test_set::<EmojiComponent>("Emoji_Component");
133 test_set::<EmojiModifier>("Emoji_Modifier");
134 test_set::<Emoji>("Emoji");
135 test_set::<EmojiPresentation>("Emoji_Presentation");
136 test_set::<Extender>("Extender");
137 test_set::<ExtendedPictographic>("Extended_Pictographic");
138 test_set::<Graph>("Graph");
139 test_set::<GraphemeBase>("Grapheme_Base");
140 test_set::<GraphemeExtend>("Grapheme_Extend");
141 test_set::<GraphemeLink>("Grapheme_Link");
142 test_set::<HexDigit>("Hex_Digit");
143 test_set::<Hyphen>("Hyphen");
144 test_set::<IdContinue>("Id_Continue");
145 test_set::<Ideographic>("Ideographic");
146 test_set::<IdStart>("Id_Start");
147 test_set::<IdsBinaryOperator>("Ids_Binary_Operator");
148 test_set::<IdsTrinaryOperator>("Ids_Trinary_Operator");
149 test_set::<JoinControl>("Join_Control");
150 test_set::<LogicalOrderException>("Logical_Order_Exception");
151 test_set::<Lowercase>("Lowercase");
152 test_set::<Math>("Math");
153 test_set::<NoncharacterCodePoint>("Noncharacter_Code_Point");
154 test_set::<NfcInert>("NFC_Inert");
155 test_set::<NfdInert>("NFD_Inert");
156 test_set::<NfkcInert>("NFKC_Inert");
157 test_set::<NfkdInert>("NFKD_Inert");
158 test_set::<PatternSyntax>("Pattern_Syntax");
159 test_set::<PatternWhiteSpace>("Pattern_White_Space");
160 test_set::<PrependedConcatenationMark>("Prepended_Concatenation_Mark");
161 test_set::<Print>("Print");
162 test_set::<QuotationMark>("Quotation_Mark");
163 test_set::<Radical>("Radical");
164 test_set::<RegionalIndicator>("Regional_Indicator");
165 test_set::<SoftDotted>("Soft_Dotted");
166 test_set::<SegmentStarter>("Segment_Starter");
167 test_set::<CaseSensitive>("Case_Sensitive");
168 test_set::<SentenceTerminal>("Sentence_Terminal");
169 test_set::<TerminalPunctuation>("Terminal_Punctuation");
170 test_set::<UnifiedIdeograph>("Unified_Ideograph");
171 test_set::<Uppercase>("Uppercase");
172 test_set::<VariationSelector>("Variation_Selector");
173 test_set::<WhiteSpace>("White_Space");
174 test_set::<Xdigit>("Xdigit");
175 test_set::<XidContinue>("XID_Continue");
176 test_set::<XidStart>("XID_Start");
177 }
178
179 #[test]
180 fn test_complement_maps() {
181 use icu::properties::props::{GeneralCategory, Script};
182 test_map(GeneralCategory::UppercaseLetter, "gc");
183 test_map(GeneralCategory::OtherPunctuation, "gc");
184 test_map(Script::Devanagari, "script");
185 test_map(Script::Latin, "script");
186 test_map(Script::Common, "script");
187 }
188}