1#[cfg(feature = "serde")]
6use alloc::format;
7#[cfg(feature = "serde")]
8use alloc::string::String;
9#[cfg(feature = "alloc")]
10use alloc::vec::Vec;
11use core::{char, ops::RangeBounds, ops::RangeInclusive};
12use potential_utf::PotentialCodePoint;
13use yoke::Yokeable;
14use zerofrom::ZeroFrom;
15use zerovec::{ule::AsULE, zerovec, ZeroVec};
16
17use super::InvalidSetError;
18use crate::codepointinvlist::utils::{deconstruct_range, is_valid_zv};
19
20const BMP_MAX: u32 = 0xFFFF;
22
23const BMP_INV_LIST_VEC: ZeroVec<PotentialCodePoint> = zerovec!(PotentialCodePoint; PotentialCodePoint::to_unaligned; [PotentialCodePoint::from_u24(0x0), PotentialCodePoint::from_u24(BMP_MAX + 1)]);
25
26const ALL_VEC: ZeroVec<PotentialCodePoint> = zerovec!(PotentialCodePoint; PotentialCodePoint::to_unaligned; [PotentialCodePoint::from_u24(0x0), PotentialCodePoint::from_u24((char::MAX as u32) + 1)]);
28
29#[zerovec::make_varule(CodePointInversionListULE)]
34#[zerovec::skip_derive(Ord)]
35#[zerovec::derive(Debug)]
36#[derive(Debug, Eq, PartialEq, Clone, Yokeable, ZeroFrom)]
37#[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV, ToOwned))]
38pub struct CodePointInversionList<'data> {
39 inv_list: ZeroVec<'data, PotentialCodePoint>,
45 size: u32,
46}
47
48#[cfg(feature = "serde")]
49impl<'de: 'a, 'a> serde::Deserialize<'de> for CodePointInversionList<'a> {
50 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
51 where
52 D: serde::Deserializer<'de>,
53 {
54 use serde::de::Error;
55
56 let parsed_inv_list = if deserializer.is_human_readable() {
57 let parsed_strings = Vec::<alloc::borrow::Cow<'de, str>>::deserialize(deserializer)?;
58 let mut inv_list = ZeroVec::new_owned(Vec::with_capacity(parsed_strings.len() * 2));
59 for range in parsed_strings {
60 fn internal(range: &str) -> Option<(u32, u32)> {
61 let (start, range) = UnicodeCodePoint::parse(range)?;
62 if range.is_empty() {
63 return Some((start.0, start.0));
64 }
65 let (hyphen, range) = UnicodeCodePoint::parse(range)?;
66 if hyphen.0 != '-' as u32 {
67 return None;
68 }
69 let (end, range) = UnicodeCodePoint::parse(range)?;
70 range.is_empty().then_some((start.0, end.0))
71 }
72 let (start, end) = internal(&range).ok_or_else(|| Error::custom(format!(
73 "Cannot deserialize invalid inversion list for CodePointInversionList: {range:?}"
74 )))?;
75 inv_list.with_mut(|v| {
76 v.push(PotentialCodePoint::from_u24(start).to_unaligned());
77 v.push(PotentialCodePoint::from_u24(end + 1).to_unaligned());
78 });
79 }
80 inv_list
81 } else {
82 ZeroVec::<PotentialCodePoint>::deserialize(deserializer)?
83 };
84 CodePointInversionList::try_from_inversion_list(parsed_inv_list).map_err(|e| {
85 Error::custom(format!(
86 "Cannot deserialize invalid inversion list for CodePointInversionList: {e:?}"
87 ))
88 })
89 }
90}
91
92#[cfg(feature = "databake")]
93impl databake::Bake for CodePointInversionList<'_> {
94 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
95 env.insert("icu_collections");
96 let inv_list = self.inv_list.bake(env);
97 let size = self.size.bake(env);
98 databake::quote! { unsafe {
100 #[allow(unused_unsafe)]
101 icu_collections::codepointinvlist::CodePointInversionList::from_parts_unchecked(#inv_list, #size)
102 }}
103 }
104}
105
106#[cfg(feature = "databake")]
107impl databake::BakeSize for CodePointInversionList<'_> {
108 fn borrows_size(&self) -> usize {
109 self.inv_list.borrows_size()
110 }
111}
112
113#[cfg(feature = "serde")]
114#[derive(Debug, Copy, Clone)]
115struct UnicodeCodePoint(u32);
116
117#[cfg(feature = "serde")]
118impl UnicodeCodePoint {
119 fn from_u32(cp: u32) -> Result<Self, String> {
120 if cp <= char::MAX as u32 {
121 Ok(Self(cp))
122 } else {
123 Err(format!("Not a Unicode code point {}", cp))
124 }
125 }
126
127 fn parse(value: &str) -> Option<(Self, &str)> {
128 Some(if let Some(hex) = value.strip_prefix("U+") {
129 let (escape, remainder) = (hex.get(..4)?, hex.get(4..)?);
130 (Self(u32::from_str_radix(escape, 16).ok()?), remainder)
131 } else {
132 let c = value.chars().next()?;
133 (Self(c as u32), value.get(c.len_utf8()..)?)
134 })
135 }
136}
137
138#[cfg(feature = "serde")]
139impl core::fmt::Display for UnicodeCodePoint {
140 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
141 match self.0 {
142 s @ 0xD800..=0xDFFF => write!(f, "U+{s:X}"),
143 c => write!(
146 f,
147 "{}",
148 char::from_u32(c).unwrap_or(char::REPLACEMENT_CHARACTER)
149 ),
150 }
151 }
152}
153
154#[cfg(feature = "serde")]
155impl serde::Serialize for CodePointInversionList<'_> {
156 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
157 where
158 S: serde::Serializer,
159 {
160 if serializer.is_human_readable() {
161 use serde::ser::Error;
162 use serde::ser::SerializeSeq;
163 let mut seq = serializer.serialize_seq(Some(self.inv_list.len() / 2))?;
164 for range in self.iter_ranges() {
165 let start = UnicodeCodePoint::from_u32(*range.start()).map_err(S::Error::custom)?;
166 if range.start() == range.end() {
167 seq.serialize_element(&format!("{start}"))?;
168 } else {
169 let end = UnicodeCodePoint::from_u32(*range.end()).map_err(S::Error::custom)?;
170 seq.serialize_element(&format!("{start}-{end}",))?;
171 }
172 }
173 seq.end()
174 } else {
175 self.inv_list.serialize(serializer)
179 }
180 }
181}
182
183impl<'data> CodePointInversionList<'data> {
184 pub fn try_from_inversion_list(
222 inv_list: ZeroVec<'data, PotentialCodePoint>,
223 ) -> Result<Self, InvalidSetError> {
224 #[allow(clippy::indexing_slicing)] if is_valid_zv(&inv_list) {
226 let size = inv_list
227 .as_ule_slice()
228 .chunks(2)
229 .map(|end_points| {
230 u32::from(<PotentialCodePoint as AsULE>::from_unaligned(end_points[1]))
231 - u32::from(<PotentialCodePoint as AsULE>::from_unaligned(end_points[0]))
232 })
233 .sum::<u32>();
234 Ok(Self { inv_list, size })
235 } else {
236 Err(InvalidSetError(
237 #[cfg(feature = "alloc")]
238 inv_list.to_vec(),
239 ))
240 }
241 }
242
243 #[doc(hidden)] pub const unsafe fn from_parts_unchecked(
246 inv_list: ZeroVec<'data, PotentialCodePoint>,
247 size: u32,
248 ) -> Self {
249 Self { inv_list, size }
250 }
251
252 #[cfg(feature = "alloc")]
283 pub fn try_from_u32_inversion_list_slice(inv_list: &[u32]) -> Result<Self, InvalidSetError> {
284 let inv_list_zv: ZeroVec<PotentialCodePoint> = inv_list
285 .iter()
286 .copied()
287 .map(PotentialCodePoint::from_u24)
288 .collect();
289 CodePointInversionList::try_from_inversion_list(inv_list_zv)
290 }
291
292 #[cfg(feature = "alloc")]
294 pub fn into_owned(self) -> CodePointInversionList<'static> {
295 CodePointInversionList {
296 inv_list: self.inv_list.into_owned(),
297 size: self.size,
298 }
299 }
300
301 #[cfg(feature = "alloc")]
303 pub fn get_inversion_list_vec(&self) -> Vec<u32> {
304 self.as_inversion_list().iter().map(u32::from).collect()
305 }
306
307 pub fn all() -> Self {
327 Self {
328 inv_list: ALL_VEC,
329 size: (char::MAX as u32) + 1,
330 }
331 }
332
333 pub fn bmp() -> Self {
355 Self {
356 inv_list: BMP_INV_LIST_VEC,
357 size: BMP_MAX + 1,
358 }
359 }
360
361 #[cfg(feature = "alloc")]
365 pub(crate) fn as_inversion_list(&self) -> &ZeroVec<PotentialCodePoint> {
366 &self.inv_list
367 }
368
369 pub fn iter_chars(&self) -> impl Iterator<Item = char> + '_ {
388 #[allow(clippy::indexing_slicing)] self.inv_list
390 .as_ule_slice()
391 .chunks(2)
392 .flat_map(|pair| {
393 u32::from(PotentialCodePoint::from_unaligned(pair[0]))
394 ..u32::from(PotentialCodePoint::from_unaligned(pair[1]))
395 })
396 .filter_map(char::from_u32)
397 }
398
399 pub fn iter_ranges(&self) -> impl ExactSizeIterator<Item = RangeInclusive<u32>> + '_ {
421 #[allow(clippy::indexing_slicing)] self.inv_list.as_ule_slice().chunks(2).map(|pair| {
423 let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0]));
424 let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1]));
425 range_start..=(range_limit - 1)
426 })
427 }
428
429 pub fn iter_ranges_complemented(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ {
452 let inv_ule = self.inv_list.as_ule_slice();
453 let middle = inv_ule.get(1..inv_ule.len() - 1).unwrap_or(&[]);
454 let beginning = if let Some(first) = self.inv_list.first() {
455 let first = u32::from(first);
456 if first == 0 {
457 None
458 } else {
459 Some(0..=first - 1)
460 }
461 } else {
462 None
463 };
464 let end = if let Some(last) = self.inv_list.last() {
465 let last = u32::from(last);
466 if last == char::MAX as u32 {
467 None
468 } else {
469 Some(last..=char::MAX as u32)
470 }
471 } else {
472 None
473 };
474 #[allow(clippy::indexing_slicing)] let chunks = middle.chunks(2).map(|pair| {
476 let range_start = u32::from(PotentialCodePoint::from_unaligned(pair[0]));
477 let range_limit = u32::from(PotentialCodePoint::from_unaligned(pair[1]));
478 range_start..=(range_limit - 1)
479 });
480 beginning.into_iter().chain(chunks).chain(end)
481 }
482
483 pub fn get_range_count(&self) -> usize {
485 self.inv_list.len() / 2
486 }
487
488 pub fn get_nth_range(&self, idx: usize) -> Option<RangeInclusive<u32>> {
491 let start_idx = idx * 2;
492 let end_idx = start_idx + 1;
493 let start = u32::from(self.inv_list.get(start_idx)?);
494 let end = u32::from(self.inv_list.get(end_idx)?);
495 Some(start..=(end - 1))
496 }
497
498 pub fn size(&self) -> usize {
500 if self.is_empty() {
501 return 0;
502 }
503 self.size as usize
504 }
505
506 pub fn is_empty(&self) -> bool {
508 self.inv_list.is_empty()
509 }
510
511 fn contains_query(&self, query: u32) -> Option<usize> {
516 let query = PotentialCodePoint::try_from(query).ok()?;
517 match self.inv_list.binary_search(&query) {
518 Ok(pos) => {
519 if pos % 2 == 0 {
520 Some(pos)
521 } else {
522 None
523 }
524 }
525 Err(pos) => {
526 if pos % 2 != 0 && pos < self.inv_list.len() {
527 Some(pos - 1)
528 } else {
529 None
530 }
531 }
532 }
533 }
534
535 pub fn contains(&self, query: char) -> bool {
553 self.contains_query(query as u32).is_some()
554 }
555
556 pub fn contains32(&self, query: u32) -> bool {
579 self.contains_query(query).is_some()
580 }
581
582 pub fn contains_range(&self, range: impl RangeBounds<char>) -> bool {
625 let (from, till) = deconstruct_range(range);
626 if from >= till {
627 return false;
628 }
629 match self.contains_query(from) {
630 Some(pos) => {
631 if let Some(x) = self.inv_list.get(pos + 1) {
632 (till) <= x.into()
633 } else {
634 debug_assert!(
635 false,
636 "Inversion list query should not return out of bounds index"
637 );
638 false
639 }
640 }
641 None => false,
642 }
643 }
644
645 pub fn contains_set(&self, set: &Self) -> bool {
673 if set.size() > self.size() {
674 return false;
675 }
676
677 let mut set_ranges = set.iter_ranges();
678 let mut check_elem = set_ranges.next();
679
680 let ranges = self.iter_ranges();
681 for range in ranges {
682 match check_elem {
683 Some(ref check_range) => {
684 if check_range.start() >= range.start()
685 && check_range.end() <= &(range.end() + 1)
686 {
687 check_elem = set_ranges.next();
688 }
689 }
690 _ => break,
691 }
692 }
693 check_elem.is_none()
694 }
695
696 pub fn span(&self, span_str: &str, contained: bool) -> usize {
714 span_str
715 .chars()
716 .take_while(|&x| self.contains(x) == contained)
717 .count()
718 }
719
720 pub fn span_back(&self, span_str: &str, contained: bool) -> usize {
737 span_str.len()
738 - span_str
739 .chars()
740 .rev()
741 .take_while(|&x| self.contains(x) == contained)
742 .count()
743 }
744}
745
746#[cfg(test)]
747mod tests {
748 use super::{CodePointInversionList, InvalidSetError};
749 use std::{char, vec::Vec};
750 use zerovec::ZeroVec;
751
752 #[test]
753 fn test_codepointinversionlist_try_from_vec() {
754 let ex = vec![0x2, 0x3, 0x4, 0x5];
755 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
756 assert_eq!(ex, check.get_inversion_list_vec());
757 assert_eq!(2, check.size());
758 }
759
760 #[test]
761 fn test_codepointinversionlist_try_from_vec_error() {
762 let check = vec![0x1, 0x1, 0x2, 0x3, 0x4];
763 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&check);
764 assert!(matches!(set, Err(InvalidSetError(_))));
765 if let Err(InvalidSetError(actual)) = set {
766 assert_eq!(
767 &check,
768 &actual.into_iter().map(u32::from).collect::<Vec<_>>()
769 );
770 }
771 }
772
773 #[test]
775 fn test_codepointinversionlist_contains_query() {
776 let ex = vec![0x41, 0x46, 0x4B, 0x55];
777 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
778 assert!(check.contains_query(0x40).is_none());
779 assert_eq!(check.contains_query(0x41).unwrap(), 0);
780 assert_eq!(check.contains_query(0x44).unwrap(), 0);
781 assert!(check.contains_query(0x46).is_none());
782 assert_eq!(check.contains_query(0x4C).unwrap(), 2);
783 assert!(check.contains_query(0x56).is_none());
784 }
785
786 #[test]
787 fn test_codepointinversionlist_contains() {
788 let ex = vec![0x2, 0x5, 0xA, 0xF];
789 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
790 assert!(check.contains(0x2 as char));
791 assert!(check.contains(0x4 as char));
792 assert!(check.contains(0xA as char));
793 assert!(check.contains(0xE as char));
794 }
795
796 #[test]
797 fn test_codepointinversionlist_contains_false() {
798 let ex = vec![0x2, 0x5, 0xA, 0xF];
799 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
800 assert!(!check.contains(0x1 as char));
801 assert!(!check.contains(0x5 as char));
802 assert!(!check.contains(0x9 as char));
803 assert!(!check.contains(0xF as char));
804 assert!(!check.contains(0x10 as char));
805 }
806
807 #[test]
808 fn test_codepointinversionlist_contains_range() {
809 let ex = vec![0x41, 0x46, 0x4B, 0x55];
810 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
811 assert!(check.contains_range('A'..='E')); assert!(check.contains_range('C'..'D')); assert!(check.contains_range('L'..'P')); assert!(!check.contains_range('L'..='U')); }
816
817 #[test]
818 fn test_codepointinversionlist_contains_range_false() {
819 let ex = vec![0x41, 0x46, 0x4B, 0x55];
820 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
821 assert!(!check.contains_range('!'..'A')); assert!(!check.contains_range('F'..'K')); assert!(!check.contains_range('U'..)); }
825
826 #[test]
827 fn test_codepointinversionlist_contains_range_invalid() {
828 let check = CodePointInversionList::all();
829 assert!(!check.contains_range('A'..'!')); assert!(!check.contains_range('A'..'A')); }
832
833 #[test]
834 fn test_codepointinversionlist_contains_set_u() {
835 let ex = vec![0xA, 0x14, 0x28, 0x32, 0x46, 0x50, 0x64, 0x6E];
836 let u = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
837 let inside = vec![0xF, 0x14, 0x2C, 0x31, 0x46, 0x50, 0x64, 0x6D];
838 let s = CodePointInversionList::try_from_u32_inversion_list_slice(&inside).unwrap();
839 assert!(u.contains_set(&s));
840 }
841
842 #[test]
843 fn test_codepointinversionlist_contains_set_u_false() {
844 let ex = vec![0xA, 0x14, 0x28, 0x32, 0x46, 0x50, 0x64, 0x78];
845 let u = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
846 let outside = vec![0x0, 0xA, 0x16, 0x2C, 0x32, 0x46, 0x4F, 0x51, 0x6D, 0x6F];
847 let s = CodePointInversionList::try_from_u32_inversion_list_slice(&outside).unwrap();
848 assert!(!u.contains_set(&s));
849 }
850
851 #[test]
852 fn test_codepointinversionlist_size() {
853 let ex = vec![0x2, 0x5, 0xA, 0xF];
854 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
855 assert_eq!(8, check.size());
856 let check = CodePointInversionList::all();
857 let expected = (char::MAX as u32) + 1;
858 assert_eq!(expected as usize, check.size());
859 let inv_list_vec = vec![];
860 let check = CodePointInversionList {
861 inv_list: ZeroVec::from_slice_or_alloc(&inv_list_vec),
862 size: 0,
863 };
864 assert_eq!(check.size(), 0);
865 }
866
867 #[test]
868 fn test_codepointinversionlist_is_empty() {
869 let inv_list_vec = vec![];
870 let check = CodePointInversionList {
871 inv_list: ZeroVec::from_slice_or_alloc(&inv_list_vec),
872 size: 0,
873 };
874 assert!(check.is_empty());
875 }
876
877 #[test]
878 fn test_codepointinversionlist_is_not_empty() {
879 let check = CodePointInversionList::all();
880 assert!(!check.is_empty());
881 }
882
883 #[test]
884 fn test_codepointinversionlist_iter_chars() {
885 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
886 let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
887 let mut iter = check.iter_chars();
888 assert_eq!(Some('A'), iter.next());
889 assert_eq!(Some('B'), iter.next());
890 assert_eq!(Some('C'), iter.next());
891 assert_eq!(Some('E'), iter.next());
892 assert_eq!(None, iter.next());
893 }
894
895 #[test]
896 fn test_codepointinversionlist_iter_ranges() {
897 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
898 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
899 let mut ranges = set.iter_ranges();
900 assert_eq!(Some(0x41..=0x43), ranges.next());
901 assert_eq!(Some(0x45..=0x45), ranges.next());
902 assert_eq!(Some(0xD800..=0xD800), ranges.next());
903 assert_eq!(None, ranges.next());
904 }
905
906 #[test]
907 fn test_codepointinversionlist_iter_ranges_exactsizeiter_trait() {
908 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
909 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
910 let ranges = set.iter_ranges();
911 assert_eq!(3, ranges.len());
912 }
913
914 #[test]
915 fn test_codepointinversionlist_range_count() {
916 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
917 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
918 assert_eq!(3, set.get_range_count());
919 }
920
921 #[test]
922 fn test_codepointinversionlist_get_nth_range() {
923 let ex = vec![0x41, 0x44, 0x45, 0x46, 0xD800, 0xD801];
924 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
925 assert_eq!(Some(0x41..=0x43), set.get_nth_range(0));
926 assert_eq!(Some(0x45..=0x45), set.get_nth_range(1));
927 assert_eq!(Some(0xD800..=0xD800), set.get_nth_range(2));
928 assert_eq!(None, set.get_nth_range(3));
929 }
930
931 #[test]
934 fn test_codepointinversionlist_iter_ranges_with_max_code_point() {
935 let ex = vec![0x80, (char::MAX as u32) + 1];
936 let set = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
937 let mut ranges = set.iter_ranges();
938 assert_eq!(Some(0x80..=(char::MAX as u32)), ranges.next());
939 assert_eq!(None, ranges.next());
940 }
941
942 #[test]
943 fn test_codepointinversionlist_span_contains() {
944 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
946 assert_eq!(check.span("ABCDE", true), 3);
947 assert_eq!(check.span("E", true), 0);
948 }
949
950 #[test]
951 fn test_codepointinversionlist_span_does_not_contain() {
952 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
954 assert_eq!(check.span("DEF", false), 2);
955 assert_eq!(check.span("KLMA", false), 3);
956 }
957
958 #[test]
959 fn test_codepointinversionlist_span_back_contains() {
960 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
962 assert_eq!(check.span_back("XYZABFH", true), 3);
963 assert_eq!(check.span_back("ABCXYZ", true), 6);
964 }
965
966 #[test]
967 fn test_codepointinversionlist_span_back_does_not_contain() {
968 let ex = vec![0x41, 0x44, 0x46, 0x4B]; let check = CodePointInversionList::try_from_u32_inversion_list_slice(&ex).unwrap();
970 assert_eq!(check.span_back("ABCXYZ", false), 3);
971 assert_eq!(check.span_back("XYZABC", false), 6);
972 }
973
974 #[test]
975 fn test_uniset_to_inv_list() {
976 let inv_list = [
977 0x9, 0xE, 0x20, 0x21, 0x85, 0x86, 0xA0, 0xA1, 0x1626, 0x1627, 0x2000, 0x2003, 0x2028,
978 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
979 ];
980 let s: CodePointInversionList =
981 CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
982 let round_trip_inv_list = s.get_inversion_list_vec();
983 assert_eq!(
984 round_trip_inv_list.into_iter().collect::<ZeroVec<u32>>(),
985 inv_list
986 );
987 }
988
989 #[test]
990 fn test_serde_serialize() {
991 let inv_list = [0x41, 0x46, 0x4B, 0x55];
992 let uniset = CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
993 let json_str = serde_json::to_string(&uniset).unwrap();
994 assert_eq!(json_str, r#"["A-E","K-T"]"#);
995 }
996
997 #[test]
998 fn test_serde_serialize_surrogates() {
999 let inv_list = [0xDFAB, 0xDFFF];
1000 let uniset = CodePointInversionList::try_from_u32_inversion_list_slice(&inv_list).unwrap();
1001 let json_str = serde_json::to_string(&uniset).unwrap();
1002 assert_eq!(json_str, r#"["U+DFAB-U+DFFE"]"#);
1003 }
1004
1005 #[test]
1006 fn test_serde_deserialize() {
1007 let inv_list_str = r#"["A-E","K-T"]"#;
1008 let exp_inv_list = [0x41, 0x46, 0x4B, 0x55];
1009 let exp_uniset =
1010 CodePointInversionList::try_from_u32_inversion_list_slice(&exp_inv_list).unwrap();
1011 let act_uniset: CodePointInversionList = serde_json::from_str(inv_list_str).unwrap();
1012 assert_eq!(act_uniset, exp_uniset);
1013 }
1014
1015 #[test]
1016 fn test_serde_deserialize_surrogates() {
1017 let inv_list_str = r#"["U+DFAB-U+DFFE"]"#;
1018 let exp_inv_list = [0xDFAB, 0xDFFF];
1019 let exp_uniset =
1020 CodePointInversionList::try_from_u32_inversion_list_slice(&exp_inv_list).unwrap();
1021 let act_uniset: CodePointInversionList = serde_json::from_str(inv_list_str).unwrap();
1022 assert_eq!(act_uniset, exp_uniset);
1023 }
1024
1025 #[test]
1026 fn test_serde_deserialize_invalid() {
1027 assert!(serde_json::from_str::<CodePointInversionList>("[65,70,98775,85]").is_err());
1028 assert!(serde_json::from_str::<CodePointInversionList>("[65,70,U+FFFFFFFFFF,85]").is_err());
1029 }
1030
1031 #[test]
1032 fn test_serde_with_postcard_roundtrip() -> Result<(), postcard::Error> {
1033 let set = CodePointInversionList::bmp();
1034 let set_serialized: Vec<u8> = postcard::to_allocvec(&set).unwrap();
1035 let set_deserialized: CodePointInversionList =
1036 postcard::from_bytes::<CodePointInversionList>(&set_serialized)?;
1037
1038 assert_eq!(&set, &set_deserialized);
1039 assert!(!set_deserialized.inv_list.is_owned());
1040
1041 Ok(())
1042 }
1043
1044 #[test]
1045 fn databake() {
1046 databake::test_bake!(
1047 CodePointInversionList<'static>,
1048 const,
1049 unsafe {
1050 #[allow(unused_unsafe)]
1051 crate::codepointinvlist::CodePointInversionList::from_parts_unchecked(
1052 unsafe {
1053 zerovec::ZeroVec::from_bytes_unchecked(
1054 b"0\0\0\0:\0\0\0A\0\0\0G\0\0\0a\0\0\0g\0\0\0",
1055 )
1056 },
1057 22u32,
1058 )
1059 },
1060 icu_collections,
1061 [zerovec],
1062 );
1063 }
1064}