icu_normalizer/uts46.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Bundles the part of UTS 46 that makes sense to implement as a
6//! normalization.
7//!
8//! This is meant to be used as a building block of an UTS 46
9//! implementation, such as the `idna` crate.
10
11use crate::ComposingNormalizer;
12use crate::ComposingNormalizerBorrowed;
13use crate::NormalizerNfcV1;
14use crate::NormalizerNfdTablesV1;
15use crate::NormalizerNfkdTablesV1;
16use crate::NormalizerUts46DataV1;
17use icu_provider::DataError;
18use icu_provider::DataProvider;
19
20// Implementation note: Despite merely wrapping a `ComposingNormalizer`,
21// having a `Uts46Mapper` serves two purposes:
22//
23// 1. Denying public access to parts of the `ComposingNormalizer` API
24// that don't work when the data contains markers for ignorables.
25// 2. Providing a place where additional iterator pre-processing or
26// post-processing can take place if needed in the future. (When
27// writing this, it looked like such processing was needed but
28// now isn't needed after all.)
29
30/// A borrowed version of a mapper that knows how to performs the
31/// subsets of UTS 46 processing documented on the methods.
32#[derive(Debug)]
33pub struct Uts46MapperBorrowed<'a> {
34 normalizer: ComposingNormalizerBorrowed<'a>,
35}
36
37#[cfg(feature = "compiled_data")]
38impl Default for Uts46MapperBorrowed<'static> {
39 fn default() -> Self {
40 Self::new()
41 }
42}
43
44impl Uts46MapperBorrowed<'static> {
45 /// Cheaply converts a [`Uts46MapperBorrowed<'static>`] into a [`Uts46Mapper`].
46 ///
47 /// Note: Due to branching and indirection, using [`Uts46Mapper`] might inhibit some
48 /// compile-time optimizations that are possible with [`Uts46MapperBorrowed`].
49 pub const fn static_to_owned(self) -> Uts46Mapper {
50 Uts46Mapper {
51 normalizer: self.normalizer.static_to_owned(),
52 }
53 }
54
55 /// Construct with compiled data.
56 #[cfg(feature = "compiled_data")]
57 pub const fn new() -> Self {
58 Uts46MapperBorrowed {
59 normalizer: ComposingNormalizerBorrowed::new_uts46(),
60 }
61 }
62}
63
64impl Uts46MapperBorrowed<'_> {
65 /// Returns an iterator adaptor that turns an `Iterator` over `char`
66 /// into an iterator yielding a `char` sequence that gets the following
67 /// operations from the "Map" and "Normalize" steps of the "Processing"
68 /// section of UTS 46 lazily applied to it:
69 ///
70 /// 1. The _ignored_ characters are ignored.
71 /// 2. The _mapped_ characters are mapped.
72 /// 3. The _disallowed_ characters are replaced with U+FFFD,
73 /// which itself is a disallowed character.
74 /// 4. The _deviation_ characters are treated as _mapped_ or _valid_
75 /// as appropriate.
76 /// 5. The _disallowed_STD3_valid_ characters are treated as allowed.
77 /// 6. The _disallowed_STD3_mapped_ characters are treated as
78 /// _mapped_.
79 /// 7. The result is normalized to NFC.
80 ///
81 /// Notably:
82 ///
83 /// * The STD3 or WHATWG ASCII deny list should be implemented as a
84 /// post-processing step.
85 /// * Transitional processing is not performed. Transitional mapping
86 /// would be a pre-processing step, but transitional processing is
87 /// deprecated, and none of Firefox, Safari, or Chrome use it.
88 pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>(
89 &'delegate self,
90 iter: I,
91 ) -> impl Iterator<Item = char> + 'delegate {
92 self.normalizer
93 .normalize_iter_private(iter, crate::IgnorableBehavior::Ignored)
94 }
95
96 /// Returns an iterator adaptor that turns an `Iterator` over `char`
97 /// into an iterator yielding a `char` sequence that gets the following
98 /// operations from the NFC check and statucs steps of the "Validity
99 /// Criteria" section of UTS 46 lazily applied to it:
100 ///
101 /// 1. The _ignored_ characters are treated as _disallowed_.
102 /// 2. The _mapped_ characters are mapped.
103 /// 3. The _disallowed_ characters are replaced with U+FFFD,
104 /// which itself is a disallowed character.
105 /// 4. The _deviation_ characters are treated as _mapped_ or _valid_
106 /// as appropriate.
107 /// 5. The _disallowed_STD3_valid_ characters are treated as allowed.
108 /// 6. The _disallowed_STD3_mapped_ characters are treated as
109 /// _mapped_.
110 /// 7. The result is normalized to NFC.
111 ///
112 /// Notably:
113 ///
114 /// * The STD3 or WHATWG ASCII deny list should be implemented as a
115 /// post-processing step.
116 /// * Transitional processing is not performed. Transitional mapping
117 /// would be a pre-processing step, but transitional processing is
118 /// deprecated, and none of Firefox, Safari, or Chrome use it.
119 /// * The output needs to be compared with input to see if anything
120 /// changed. This check catches failures to adhere to the normalization
121 /// and status requirements. In particular, this comparison results
122 /// in _mapped_ characters resulting in error like "Validity Criteria"
123 /// requires.
124 pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>(
125 &'delegate self,
126 iter: I,
127 ) -> impl Iterator<Item = char> + 'delegate {
128 self.normalizer
129 .normalize_iter_private(iter, crate::IgnorableBehavior::ReplacementCharacter)
130 }
131}
132
133/// A mapper that knows how to performs the subsets of UTS 46 processing
134/// documented on the methods.
135#[derive(Debug)]
136pub struct Uts46Mapper {
137 normalizer: ComposingNormalizer,
138}
139
140#[cfg(feature = "compiled_data")]
141impl Default for Uts46Mapper {
142 fn default() -> Self {
143 Self::new().static_to_owned()
144 }
145}
146
147impl Uts46Mapper {
148 /// Constructs a borrowed version of this type for more efficient querying.
149 pub fn as_borrowed(&self) -> Uts46MapperBorrowed<'_> {
150 Uts46MapperBorrowed {
151 normalizer: self.normalizer.as_borrowed(),
152 }
153 }
154
155 /// Construct with compiled data.
156 #[cfg(feature = "compiled_data")]
157 #[allow(clippy::new_ret_no_self)]
158 pub const fn new() -> Uts46MapperBorrowed<'static> {
159 Uts46MapperBorrowed::new()
160 }
161
162 /// Construct with provider.
163 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
164 pub fn try_new<D>(provider: &D) -> Result<Self, DataError>
165 where
166 D: DataProvider<NormalizerUts46DataV1>
167 + DataProvider<NormalizerNfdTablesV1>
168 + DataProvider<NormalizerNfkdTablesV1>
169 // UTS 46 tables merged into NormalizerNfkdTablesV1
170 + DataProvider<NormalizerNfcV1>
171 + ?Sized,
172 {
173 let normalizer = ComposingNormalizer::try_new_uts46_unstable(provider)?;
174
175 Ok(Uts46Mapper { normalizer })
176 }
177}