icu_locale_core/extensions/unicode/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
14//! use icu::locale::extensions::unicode::{attribute, key, value, Unicode};
15//! use icu::locale::Locale;
16//!
17//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
18//!
19//! assert_eq!(
20//!     loc.extensions.unicode.keywords.get(&key!("hc")),
21//!     Some(&value!("h12"))
22//! );
23//! assert!(loc
24//!     .extensions
25//!     .unicode
26//!     .attributes
27//!     .contains(&attribute!("foobar")));
28//! ```
29mod attribute;
30mod attributes;
31mod key;
32mod keywords;
33mod subdivision;
34mod value;
35
36use core::cmp::Ordering;
37#[cfg(feature = "alloc")]
38use core::str::FromStr;
39
40#[doc(inline)]
41pub use attribute::{attribute, Attribute};
42pub use attributes::Attributes;
43#[doc(inline)]
44pub use key::{key, Key};
45pub use keywords::Keywords;
46#[doc(inline)]
47pub use subdivision::{subdivision_suffix, SubdivisionId, SubdivisionSuffix};
48#[doc(inline)]
49pub use value::{value, Value};
50
51#[cfg(feature = "alloc")]
52use super::ExtensionType;
53#[cfg(feature = "alloc")]
54use crate::parser::ParseError;
55#[cfg(feature = "alloc")]
56use crate::parser::SubtagIterator;
57
58pub(crate) const UNICODE_EXT_CHAR: char = 'u';
59pub(crate) const UNICODE_EXT_STR: &str = "u";
60
61/// Unicode Extensions provide information about user preferences in a given locale.
62///
63/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
64/// Identifier`] specification.
65///
66/// Unicode extensions provide subtags that specify language and/or locale-based behavior
67/// or refinements to language tags, according to work done by the Unicode Consortium.
68/// (See [`RFC 6067`] for details).
69///
70/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
71/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
72/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
73///
74/// # Examples
75///
76/// ```
77/// use icu::locale::extensions::unicode::{key, value};
78/// use icu::locale::Locale;
79///
80/// let loc: Locale =
81///     "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
82///
83/// assert_eq!(
84///     loc.extensions.unicode.keywords.get(&key!("ca")),
85///     Some(&value!("buddhist"))
86/// );
87/// ```
88#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
89#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
90pub struct Unicode {
91    /// The key-value pairs present in this locale extension, with each extension key subtag
92    /// associated to its provided value subtag.
93    pub keywords: Keywords,
94    /// A canonically ordered sequence of single standalone subtags for this locale extension.
95    pub attributes: Attributes,
96}
97
98impl Unicode {
99    /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
100    ///
101    /// # Examples
102    ///
103    /// ```
104    /// use icu::locale::extensions::unicode::Unicode;
105    ///
106    /// assert_eq!(Unicode::new(), Unicode::default());
107    /// ```
108    #[inline]
109    pub const fn new() -> Self {
110        Self {
111            keywords: Keywords::new(),
112            attributes: Attributes::new(),
113        }
114    }
115
116    /// A constructor which takes a str slice, parses it and
117    /// produces a well-formed [`Unicode`].
118    #[inline]
119    #[cfg(feature = "alloc")]
120    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
121        Self::try_from_utf8(s.as_bytes())
122    }
123
124    /// See [`Self::try_from_str`]
125    #[cfg(feature = "alloc")]
126    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
127        let mut iter = SubtagIterator::new(code_units);
128
129        let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
130        if let ExtensionType::Unicode = ExtensionType::try_from_byte_slice(ext)? {
131            return Self::try_from_iter(&mut iter);
132        }
133
134        Err(ParseError::InvalidExtension)
135    }
136
137    /// Returns [`true`] if there list of keywords and attributes is empty.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// use icu::locale::Locale;
143    ///
144    /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
145    ///
146    /// assert!(!loc.extensions.unicode.is_empty());
147    /// ```
148    pub fn is_empty(&self) -> bool {
149        self.keywords.is_empty() && self.attributes.is_empty()
150    }
151
152    /// Clears all Unicode extension keywords and attributes, effectively removing
153    /// the Unicode extension.
154    ///
155    /// # Example
156    ///
157    /// ```
158    /// use icu::locale::Locale;
159    ///
160    /// let mut loc: Locale =
161    ///     "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
162    /// loc.extensions.unicode.clear();
163    /// assert_eq!(loc, "und-t-mul".parse().unwrap());
164    /// ```
165    pub fn clear(&mut self) {
166        self.keywords.clear();
167        self.attributes.clear();
168    }
169
170    pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) {
171        (&self.attributes, &self.keywords)
172    }
173
174    /// Returns an ordering suitable for use in [`BTreeSet`].
175    ///
176    /// The ordering may or may not be equivalent to string ordering, and it
177    /// may or may not be stable across ICU4X releases.
178    ///
179    /// [`BTreeSet`]: alloc::collections::BTreeSet
180    pub fn total_cmp(&self, other: &Self) -> Ordering {
181        self.as_tuple().cmp(&other.as_tuple())
182    }
183
184    #[cfg(feature = "alloc")]
185    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
186        let attributes = Attributes::try_from_iter(iter)?;
187        let keywords = Keywords::try_from_iter(iter)?;
188
189        // Ensure we've defined at least one attribute or keyword
190        if attributes.is_empty() && keywords.is_empty() {
191            return Err(ParseError::InvalidExtension);
192        }
193
194        Ok(Self {
195            keywords,
196            attributes,
197        })
198    }
199
200    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
201    where
202        F: FnMut(&str) -> Result<(), E>,
203    {
204        if !self.is_empty() {
205            if with_ext {
206                f(UNICODE_EXT_STR)?;
207            }
208            self.attributes.for_each_subtag_str(f)?;
209            self.keywords.for_each_subtag_str(f)?;
210        }
211        Ok(())
212    }
213}
214
215#[cfg(feature = "alloc")]
216impl FromStr for Unicode {
217    type Err = ParseError;
218
219    #[inline]
220    fn from_str(s: &str) -> Result<Self, Self::Err> {
221        Self::try_from_str(s)
222    }
223}
224
225writeable::impl_display_with_writeable!(Unicode);
226
227impl writeable::Writeable for Unicode {
228    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
229        sink.write_char(UNICODE_EXT_CHAR)?;
230
231        if !self.attributes.is_empty() {
232            sink.write_char('-')?;
233            writeable::Writeable::write_to(&self.attributes, sink)?;
234        }
235        if !self.keywords.is_empty() {
236            sink.write_char('-')?;
237            writeable::Writeable::write_to(&self.keywords, sink)?;
238        }
239        Ok(())
240    }
241
242    fn writeable_length_hint(&self) -> writeable::LengthHint {
243        if self.is_empty() {
244            return writeable::LengthHint::exact(0);
245        }
246        let mut result = writeable::LengthHint::exact(1);
247        if !self.attributes.is_empty() {
248            result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
249        }
250        if !self.keywords.is_empty() {
251            result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
252        }
253        result
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn test_unicode_extension_fromstr() {
263        let ue: Unicode = "u-foo-hc-h12".parse().expect("Failed to parse Unicode");
264        assert_eq!(ue.to_string(), "u-foo-hc-h12");
265
266        let ue: Result<Unicode, _> = "u".parse();
267        assert!(ue.is_err());
268    }
269}