icu_locale_core/extensions/unicode/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
14//! use icu::locale::extensions::unicode::{attribute, key, value, Unicode};
15//! use icu::locale::Locale;
16//!
17//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
18//!
19//! assert_eq!(
20//! loc.extensions.unicode.keywords.get(&key!("hc")),
21//! Some(&value!("h12"))
22//! );
23//! assert!(loc
24//! .extensions
25//! .unicode
26//! .attributes
27//! .contains(&attribute!("foobar")));
28//! ```
29mod attribute;
30mod attributes;
31mod key;
32mod keywords;
33mod subdivision;
34mod value;
35
36use core::cmp::Ordering;
37#[cfg(feature = "alloc")]
38use core::str::FromStr;
39
40#[doc(inline)]
41pub use attribute::{attribute, Attribute};
42pub use attributes::Attributes;
43#[doc(inline)]
44pub use key::{key, Key};
45pub use keywords::Keywords;
46#[doc(inline)]
47pub use subdivision::{subdivision_suffix, SubdivisionId, SubdivisionSuffix};
48#[doc(inline)]
49pub use value::{value, Value};
50
51#[cfg(feature = "alloc")]
52use super::ExtensionType;
53#[cfg(feature = "alloc")]
54use crate::parser::ParseError;
55#[cfg(feature = "alloc")]
56use crate::parser::SubtagIterator;
57
58pub(crate) const UNICODE_EXT_CHAR: char = 'u';
59pub(crate) const UNICODE_EXT_STR: &str = "u";
60
61/// Unicode Extensions provide information about user preferences in a given locale.
62///
63/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
64/// Identifier`] specification.
65///
66/// Unicode extensions provide subtags that specify language and/or locale-based behavior
67/// or refinements to language tags, according to work done by the Unicode Consortium.
68/// (See [`RFC 6067`] for details).
69///
70/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
71/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
72/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
73///
74/// # Examples
75///
76/// ```
77/// use icu::locale::extensions::unicode::{key, value};
78/// use icu::locale::Locale;
79///
80/// let loc: Locale =
81/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
82///
83/// assert_eq!(
84/// loc.extensions.unicode.keywords.get(&key!("ca")),
85/// Some(&value!("buddhist"))
86/// );
87/// ```
88#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
89#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
90pub struct Unicode {
91 /// The key-value pairs present in this locale extension, with each extension key subtag
92 /// associated to its provided value subtag.
93 pub keywords: Keywords,
94 /// A canonically ordered sequence of single standalone subtags for this locale extension.
95 pub attributes: Attributes,
96}
97
98impl Unicode {
99 /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// use icu::locale::extensions::unicode::Unicode;
105 ///
106 /// assert_eq!(Unicode::new(), Unicode::default());
107 /// ```
108 #[inline]
109 pub const fn new() -> Self {
110 Self {
111 keywords: Keywords::new(),
112 attributes: Attributes::new(),
113 }
114 }
115
116 /// A constructor which takes a str slice, parses it and
117 /// produces a well-formed [`Unicode`].
118 #[inline]
119 #[cfg(feature = "alloc")]
120 pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
121 Self::try_from_utf8(s.as_bytes())
122 }
123
124 /// See [`Self::try_from_str`]
125 #[cfg(feature = "alloc")]
126 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
127 let mut iter = SubtagIterator::new(code_units);
128
129 let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
130 if let ExtensionType::Unicode = ExtensionType::try_from_byte_slice(ext)? {
131 return Self::try_from_iter(&mut iter);
132 }
133
134 Err(ParseError::InvalidExtension)
135 }
136
137 /// Returns [`true`] if there list of keywords and attributes is empty.
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use icu::locale::Locale;
143 ///
144 /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
145 ///
146 /// assert!(!loc.extensions.unicode.is_empty());
147 /// ```
148 pub fn is_empty(&self) -> bool {
149 self.keywords.is_empty() && self.attributes.is_empty()
150 }
151
152 /// Clears all Unicode extension keywords and attributes, effectively removing
153 /// the Unicode extension.
154 ///
155 /// # Example
156 ///
157 /// ```
158 /// use icu::locale::Locale;
159 ///
160 /// let mut loc: Locale =
161 /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
162 /// loc.extensions.unicode.clear();
163 /// assert_eq!(loc, "und-t-mul".parse().unwrap());
164 /// ```
165 pub fn clear(&mut self) {
166 self.keywords.clear();
167 self.attributes.clear();
168 }
169
170 pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) {
171 (&self.attributes, &self.keywords)
172 }
173
174 /// Returns an ordering suitable for use in [`BTreeSet`].
175 ///
176 /// The ordering may or may not be equivalent to string ordering, and it
177 /// may or may not be stable across ICU4X releases.
178 ///
179 /// [`BTreeSet`]: alloc::collections::BTreeSet
180 pub fn total_cmp(&self, other: &Self) -> Ordering {
181 self.as_tuple().cmp(&other.as_tuple())
182 }
183
184 #[cfg(feature = "alloc")]
185 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
186 let attributes = Attributes::try_from_iter(iter)?;
187 let keywords = Keywords::try_from_iter(iter)?;
188
189 // Ensure we've defined at least one attribute or keyword
190 if attributes.is_empty() && keywords.is_empty() {
191 return Err(ParseError::InvalidExtension);
192 }
193
194 Ok(Self {
195 keywords,
196 attributes,
197 })
198 }
199
200 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
201 where
202 F: FnMut(&str) -> Result<(), E>,
203 {
204 if !self.is_empty() {
205 if with_ext {
206 f(UNICODE_EXT_STR)?;
207 }
208 self.attributes.for_each_subtag_str(f)?;
209 self.keywords.for_each_subtag_str(f)?;
210 }
211 Ok(())
212 }
213}
214
215#[cfg(feature = "alloc")]
216impl FromStr for Unicode {
217 type Err = ParseError;
218
219 #[inline]
220 fn from_str(s: &str) -> Result<Self, Self::Err> {
221 Self::try_from_str(s)
222 }
223}
224
225writeable::impl_display_with_writeable!(Unicode);
226
227impl writeable::Writeable for Unicode {
228 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
229 sink.write_char(UNICODE_EXT_CHAR)?;
230
231 if !self.attributes.is_empty() {
232 sink.write_char('-')?;
233 writeable::Writeable::write_to(&self.attributes, sink)?;
234 }
235 if !self.keywords.is_empty() {
236 sink.write_char('-')?;
237 writeable::Writeable::write_to(&self.keywords, sink)?;
238 }
239 Ok(())
240 }
241
242 fn writeable_length_hint(&self) -> writeable::LengthHint {
243 if self.is_empty() {
244 return writeable::LengthHint::exact(0);
245 }
246 let mut result = writeable::LengthHint::exact(1);
247 if !self.attributes.is_empty() {
248 result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
249 }
250 if !self.keywords.is_empty() {
251 result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
252 }
253 result
254 }
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn test_unicode_extension_fromstr() {
263 let ue: Unicode = "u-foo-hc-h12".parse().expect("Failed to parse Unicode");
264 assert_eq!(ue.to_string(), "u-foo-hc-h12");
265
266 let ue: Result<Unicode, _> = "u".parse();
267 assert!(ue.is_err());
268 }
269}