icu_collections/codepointinvliststringlist/
mod.rs
1#[cfg(feature = "alloc")]
13use crate::codepointinvlist::CodePointInversionListBuilder;
14use crate::codepointinvlist::{CodePointInversionList, CodePointInversionListULE};
15#[cfg(feature = "alloc")]
16use alloc::string::{String, ToString};
17#[cfg(feature = "alloc")]
18use alloc::vec::Vec;
19use displaydoc::Display;
20use yoke::Yokeable;
21use zerofrom::ZeroFrom;
22use zerovec::{VarZeroSlice, VarZeroVec};
23
24#[zerovec::make_varule(CodePointInversionListAndStringListULE)]
29#[zerovec::skip_derive(Ord)]
30#[zerovec::derive(Debug)]
31#[derive(Debug, Eq, PartialEq, Clone, Yokeable, ZeroFrom)]
32#[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV, ToOwned))]
33#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
35#[cfg_attr(feature = "serde", zerovec::derive(Serialize, Deserialize, Debug))]
36pub struct CodePointInversionListAndStringList<'data> {
37 #[cfg_attr(feature = "serde", serde(borrow))]
38 #[zerovec::varule(CodePointInversionListULE)]
39 cp_inv_list: CodePointInversionList<'data>,
40 #[cfg_attr(feature = "serde", serde(borrow))]
45 str_list: VarZeroVec<'data, str>,
46}
47
48#[cfg(feature = "databake")]
49impl databake::Bake for CodePointInversionListAndStringList<'_> {
50 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
51 env.insert("icu_collections");
52 let cp_inv_list = self.cp_inv_list.bake(env);
53 let str_list = self.str_list.bake(env);
54 databake::quote! {
56 icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList::from_parts_unchecked(#cp_inv_list, #str_list)
57 }
58 }
59}
60
61#[cfg(feature = "databake")]
62impl databake::BakeSize for CodePointInversionListAndStringList<'_> {
63 fn borrows_size(&self) -> usize {
64 self.cp_inv_list.borrows_size() + self.str_list.borrows_size()
65 }
66}
67
68impl<'data> CodePointInversionListAndStringList<'data> {
69 pub fn try_from(
72 cp_inv_list: CodePointInversionList<'data>,
73 str_list: VarZeroVec<'data, str>,
74 ) -> Result<Self, InvalidStringList> {
75 {
82 let mut it = str_list.iter();
83 if let Some(mut x) = it.next() {
84 if x.len() == 1 {
85 return Err(InvalidStringList::InvalidStringLength(
86 #[cfg(feature = "alloc")]
87 x.to_string(),
88 ));
89 }
90 for y in it {
91 if x.len() == 1 {
92 return Err(InvalidStringList::InvalidStringLength(
93 #[cfg(feature = "alloc")]
94 x.to_string(),
95 ));
96 } else if x == y {
97 return Err(InvalidStringList::StringListNotUnique(
98 #[cfg(feature = "alloc")]
99 x.to_string(),
100 ));
101 } else if x > y {
102 return Err(InvalidStringList::StringListNotSorted(
103 #[cfg(feature = "alloc")]
104 x.to_string(),
105 #[cfg(feature = "alloc")]
106 y.to_string(),
107 ));
108 }
109
110 x = y;
112 }
113 }
114 }
115
116 Ok(CodePointInversionListAndStringList {
117 cp_inv_list,
118 str_list,
119 })
120 }
121
122 #[doc(hidden)] pub const fn from_parts_unchecked(
124 cp_inv_list: CodePointInversionList<'data>,
125 str_list: VarZeroVec<'data, str>,
126 ) -> Self {
127 CodePointInversionListAndStringList {
128 cp_inv_list,
129 str_list,
130 }
131 }
132
133 pub fn size(&self) -> usize {
137 self.cp_inv_list.size() + self.str_list.len()
138 }
139
140 pub fn has_strings(&self) -> bool {
142 !self.str_list.is_empty()
143 }
144
145 pub fn contains_str(&self, s: &str) -> bool {
167 let mut chars = s.chars();
168 if let Some(first_char) = chars.next() {
169 if chars.next().is_none() {
170 return self.contains(first_char);
171 }
172 }
173 self.str_list.binary_search(s).is_ok()
174 }
175
176 pub fn contains32(&self, cp: u32) -> bool {
196 self.cp_inv_list.contains32(cp)
197 }
198
199 pub fn contains(&self, ch: char) -> bool {
219 self.contains32(ch as u32)
220 }
221
222 pub fn code_points(&self) -> &CodePointInversionList<'data> {
224 &self.cp_inv_list
225 }
226
227 pub fn strings(&self) -> &VarZeroSlice<str> {
229 &self.str_list
230 }
231}
232
233#[cfg(feature = "alloc")]
234impl<'a> FromIterator<&'a str> for CodePointInversionListAndStringList<'_> {
235 fn from_iter<I>(it: I) -> Self
236 where
237 I: IntoIterator<Item = &'a str>,
238 {
239 let mut builder = CodePointInversionListBuilder::new();
240 let mut strings = Vec::<&str>::new();
241 for s in it {
242 let mut chars = s.chars();
243 if let Some(first_char) = chars.next() {
244 if chars.next().is_none() {
245 builder.add_char(first_char);
246 continue;
247 }
248 }
249 strings.push(s);
250 }
251
252 strings.sort_unstable();
255 strings.dedup();
256
257 let cp_inv_list = builder.build();
258 let str_list = VarZeroVec::<str>::from(&strings);
259
260 CodePointInversionListAndStringList {
261 cp_inv_list,
262 str_list,
263 }
264 }
265}
266
267#[derive(Display, Debug)]
269pub enum InvalidStringList {
270 #[cfg_attr(feature = "alloc", displaydoc("Invalid string length for string: {0}"))]
272 InvalidStringLength(#[cfg(feature = "alloc")] String),
273 #[cfg_attr(feature = "alloc", displaydoc("String list has duplicate: {0}"))]
275 StringListNotUnique(#[cfg(feature = "alloc")] String),
276 #[cfg_attr(
278 feature = "alloc",
279 displaydoc("Strings in string list not in sorted order: ({0}, {1})")
280 )]
281 StringListNotSorted(
282 #[cfg(feature = "alloc")] String,
283 #[cfg(feature = "alloc")] String,
284 ),
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290
291 #[test]
292 fn test_size_has_strings() {
293 let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
294 let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
295 let str_slice = &["ascii_max", "bmp_max", "unicode_max", "zero"];
296 let str_list = VarZeroVec::<str>::from(str_slice);
297
298 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
299
300 assert!(cpilsl.has_strings());
301 assert_eq!(8, cpilsl.size());
302 }
303
304 #[test]
305 fn test_empty_string_allowed() {
306 let cp_slice = &[0, 1, 0x7F, 0x80, 0xFFFF, 0x1_0000, 0x10_FFFF, 0x11_0000];
307 let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
308 let str_slice = &["", "ascii_max", "bmp_max", "unicode_max", "zero"];
309 let str_list = VarZeroVec::<str>::from(str_slice);
310
311 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list).unwrap();
312
313 assert!(cpilsl.has_strings());
314 assert_eq!(9, cpilsl.size());
315 }
316
317 #[test]
318 fn test_invalid_string() {
319 let cp_slice = &[0, 1];
320 let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
321 let str_slice = &["a"];
322 let str_list = VarZeroVec::<str>::from(str_slice);
323
324 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
325
326 assert!(matches!(
327 cpilsl,
328 Err(InvalidStringList::InvalidStringLength(_))
329 ));
330 }
331
332 #[test]
333 fn test_invalid_string_list_has_duplicate() {
334 let cp_slice = &[0, 1];
335 let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
336 let str_slice = &["abc", "abc"];
337 let str_list = VarZeroVec::<str>::from(str_slice);
338
339 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
340
341 assert!(matches!(
342 cpilsl,
343 Err(InvalidStringList::StringListNotUnique(_))
344 ));
345 }
346
347 #[test]
348 fn test_invalid_string_list_not_sorted() {
349 let cp_slice = &[0, 1];
350 let cp_list = CodePointInversionList::try_from_u32_inversion_list_slice(cp_slice).unwrap();
351 let str_slice = &["xyz", "abc"];
352 let str_list = VarZeroVec::<str>::from(str_slice);
353
354 let cpilsl = CodePointInversionListAndStringList::try_from(cp_list, str_list);
355
356 assert!(matches!(
357 cpilsl,
358 Err(InvalidStringList::StringListNotSorted(_, _))
359 ));
360 }
361
362 #[test]
363 fn test_from_iter_invariants() {
364 let in_strs_1 = ["a", "abc", "xyz", "abc"];
365 let in_strs_2 = ["xyz", "abc", "a", "abc"];
366
367 let cpilsl_1 = CodePointInversionListAndStringList::from_iter(in_strs_1);
368 let cpilsl_2 = CodePointInversionListAndStringList::from_iter(in_strs_2);
369
370 assert_eq!(cpilsl_1, cpilsl_2);
371
372 assert!(cpilsl_1.has_strings());
373 assert!(cpilsl_1.contains_str("abc"));
374 assert!(cpilsl_1.contains_str("xyz"));
375 assert!(!cpilsl_1.contains_str("def"));
376
377 assert_eq!(1, cpilsl_1.cp_inv_list.size());
378 assert!(cpilsl_1.contains('a'));
379 assert!(!cpilsl_1.contains('0'));
380 assert!(!cpilsl_1.contains('q'));
381
382 assert_eq!(3, cpilsl_1.size());
383 }
384}