icu_locale_core/extensions/transform/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locale::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locale::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//!     "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//!     "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38#[cfg(feature = "alloc")]
39use core::str::FromStr;
40
41pub use fields::Fields;
42#[doc(inline)]
43pub use key::{key, Key};
44pub use value::Value;
45
46#[cfg(feature = "alloc")]
47use super::ExtensionType;
48#[cfg(feature = "alloc")]
49use crate::parser::SubtagIterator;
50#[cfg(feature = "alloc")]
51use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode};
52#[cfg(feature = "alloc")]
53use crate::shortvec::ShortBoxSlice;
54use crate::subtags;
55#[cfg(feature = "alloc")]
56use crate::subtags::Language;
57use crate::LanguageIdentifier;
58#[cfg(feature = "alloc")]
59use litemap::LiteMap;
60
61pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
62pub(crate) const TRANSFORM_EXT_STR: &str = "t";
63
64/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
65/// Identifier`] specification.
66///
67/// Transform extension carries information about source language or script of
68/// transformed content, including content that has been transliterated, transcribed,
69/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
70///
71/// # Examples
72///
73/// ```
74/// use icu::locale::extensions::transform::{Key, Value};
75/// use icu::locale::{LanguageIdentifier, Locale};
76///
77/// let mut loc: Locale =
78///     "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
79///
80/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
81///
82/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
83/// let key: Key = "h0".parse().expect("Parsing key failed.");
84/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
85/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
86/// ```
87/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
88/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
89/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
90#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
91#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
92pub struct Transform {
93    /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
94    pub lang: Option<LanguageIdentifier>,
95    /// The key-value pairs present in this locale extension, with each extension key subtag
96    /// associated to its provided value subtag.
97    pub fields: Fields,
98}
99
100impl Transform {
101    /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// use icu::locale::extensions::transform::Transform;
107    ///
108    /// assert_eq!(Transform::new(), Transform::default());
109    /// ```
110    #[inline]
111    pub const fn new() -> Self {
112        Self {
113            lang: None,
114            fields: Fields::new(),
115        }
116    }
117
118    /// A constructor which takes a str slice, parses it and
119    /// produces a well-formed [`Transform`].
120    #[inline]
121    #[cfg(feature = "alloc")]
122    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
123        Self::try_from_utf8(s.as_bytes())
124    }
125
126    /// See [`Self::try_from_str`]
127    #[cfg(feature = "alloc")]
128    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
129        let mut iter = SubtagIterator::new(code_units);
130
131        let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
132        if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
133            return Self::try_from_iter(&mut iter);
134        }
135
136        Err(ParseError::InvalidExtension)
137    }
138
139    /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use icu::locale::Locale;
145    ///
146    /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
147    ///
148    /// assert!(!loc.extensions.transform.is_empty());
149    /// ```
150    pub fn is_empty(&self) -> bool {
151        self.lang.is_none() && self.fields.is_empty()
152    }
153
154    /// Clears the transform extension, effectively removing it from the locale.
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// use icu::locale::Locale;
160    ///
161    /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
162    /// loc.extensions.transform.clear();
163    /// assert_eq!(loc, "en-US".parse().unwrap());
164    /// ```
165    pub fn clear(&mut self) {
166        self.lang = None;
167        self.fields.clear();
168    }
169
170    #[allow(clippy::type_complexity)]
171    pub(crate) fn as_tuple(
172        &self,
173    ) -> (
174        Option<(
175            subtags::Language,
176            Option<subtags::Script>,
177            Option<subtags::Region>,
178            &subtags::Variants,
179        )>,
180        &Fields,
181    ) {
182        (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
183    }
184
185    /// Returns an ordering suitable for use in [`BTreeSet`].
186    ///
187    /// The ordering may or may not be equivalent to string ordering, and it
188    /// may or may not be stable across ICU4X releases.
189    ///
190    /// [`BTreeSet`]: alloc::collections::BTreeSet
191    pub fn total_cmp(&self, other: &Self) -> Ordering {
192        self.as_tuple().cmp(&other.as_tuple())
193    }
194
195    #[cfg(feature = "alloc")]
196    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
197        let mut tlang = None;
198        let mut tfields = LiteMap::new();
199
200        if let Some(subtag) = iter.peek() {
201            if Language::try_from_utf8(subtag).is_ok() {
202                tlang = Some(parse_language_identifier_from_iter(
203                    iter,
204                    ParserMode::Partial,
205                )?);
206            }
207        }
208
209        let mut current_tkey = None;
210        let mut current_tvalue = ShortBoxSlice::new();
211        let mut has_current_tvalue = false;
212
213        while let Some(subtag) = iter.peek() {
214            if let Some(tkey) = current_tkey {
215                if let Ok(val) = Value::parse_subtag(subtag) {
216                    has_current_tvalue = true;
217                    if let Some(val) = val {
218                        current_tvalue.push(val);
219                    }
220                } else {
221                    if !has_current_tvalue {
222                        return Err(ParseError::InvalidExtension);
223                    }
224                    tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
225                    current_tkey = None;
226                    current_tvalue = ShortBoxSlice::new();
227                    has_current_tvalue = false;
228                    continue;
229                }
230            } else if let Ok(tkey) = Key::try_from_utf8(subtag) {
231                current_tkey = Some(tkey);
232            } else {
233                break;
234            }
235
236            iter.next();
237        }
238
239        if let Some(tkey) = current_tkey {
240            if !has_current_tvalue {
241                return Err(ParseError::InvalidExtension);
242            }
243            tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
244        }
245
246        if tlang.is_none() && tfields.is_empty() {
247            Err(ParseError::InvalidExtension)
248        } else {
249            Ok(Self {
250                lang: tlang,
251                fields: tfields.into(),
252            })
253        }
254    }
255
256    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
257    where
258        F: FnMut(&str) -> Result<(), E>,
259    {
260        if self.is_empty() {
261            return Ok(());
262        }
263        if with_ext {
264            f(TRANSFORM_EXT_STR)?;
265        }
266        if let Some(lang) = &self.lang {
267            lang.for_each_subtag_str_lowercased(f)?;
268        }
269        self.fields.for_each_subtag_str(f)
270    }
271}
272
273#[cfg(feature = "alloc")]
274impl FromStr for Transform {
275    type Err = ParseError;
276
277    #[inline]
278    fn from_str(s: &str) -> Result<Self, Self::Err> {
279        Self::try_from_str(s)
280    }
281}
282
283writeable::impl_display_with_writeable!(Transform);
284
285impl writeable::Writeable for Transform {
286    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
287        if self.is_empty() {
288            return Ok(());
289        }
290        sink.write_char(TRANSFORM_EXT_CHAR)?;
291        if let Some(lang) = &self.lang {
292            sink.write_char('-')?;
293            lang.write_lowercased_to(sink)?;
294        }
295        if !self.fields.is_empty() {
296            sink.write_char('-')?;
297            writeable::Writeable::write_to(&self.fields, sink)?;
298        }
299        Ok(())
300    }
301
302    fn writeable_length_hint(&self) -> writeable::LengthHint {
303        if self.is_empty() {
304            return writeable::LengthHint::exact(0);
305        }
306        let mut result = writeable::LengthHint::exact(1);
307        if let Some(lang) = &self.lang {
308            result += writeable::Writeable::writeable_length_hint(lang) + 1;
309        }
310        if !self.fields.is_empty() {
311            result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
312        }
313        result
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    #[test]
322    fn test_transform_extension_fromstr() {
323        let te: Transform = "t-en-us-h0-hybrid"
324            .parse()
325            .expect("Failed to parse Transform");
326        assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
327
328        let te: Result<Transform, _> = "t".parse();
329        assert!(te.is_err());
330    }
331}