icu_locale_core/parser/
langid.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5pub use super::errors::ParseError;
6use crate::extensions::unicode::{Attribute, Key, Value};
7use crate::extensions::ExtensionType;
8use crate::parser::SubtagIterator;
9#[cfg(feature = "alloc")]
10use crate::shortvec::ShortBoxSlice;
11use crate::subtags::Subtag;
12#[cfg(feature = "alloc")]
13use crate::LanguageIdentifier;
14use crate::{extensions, subtags};
15
16#[derive(PartialEq, Clone, Copy)]
17pub enum ParserMode {
18    LanguageIdentifier,
19    Locale,
20    #[allow(dead_code)]
21    Partial,
22}
23
24#[derive(PartialEq, Clone, Copy)]
25enum ParserPosition {
26    Script,
27    Region,
28    Variant,
29}
30
31#[cfg(feature = "alloc")]
32pub fn parse_language_identifier_from_iter(
33    iter: &mut SubtagIterator,
34    mode: ParserMode,
35) -> Result<LanguageIdentifier, ParseError> {
36    let mut script = None;
37    let mut region = None;
38    let mut variants = ShortBoxSlice::new();
39
40    let language = if let Some(subtag) = iter.next() {
41        subtags::Language::try_from_utf8(subtag)?
42    } else {
43        return Err(ParseError::InvalidLanguage);
44    };
45
46    let mut position = ParserPosition::Script;
47
48    while let Some(subtag) = iter.peek() {
49        if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
50            break;
51        }
52
53        if position == ParserPosition::Script {
54            if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
55                script = Some(s);
56                position = ParserPosition::Region;
57            } else if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
58                region = Some(s);
59                position = ParserPosition::Variant;
60            } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
61                if let Err(idx) = variants.binary_search(&v) {
62                    variants.insert(idx, v);
63                }
64                position = ParserPosition::Variant;
65            } else if mode == ParserMode::Partial {
66                break;
67            } else {
68                return Err(ParseError::InvalidSubtag);
69            }
70        } else if position == ParserPosition::Region {
71            if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
72                region = Some(s);
73                position = ParserPosition::Variant;
74            } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
75                if let Err(idx) = variants.binary_search(&v) {
76                    variants.insert(idx, v);
77                }
78                position = ParserPosition::Variant;
79            } else if mode == ParserMode::Partial {
80                break;
81            } else {
82                return Err(ParseError::InvalidSubtag);
83            }
84        } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
85            if let Err(idx) = variants.binary_search(&v) {
86                variants.insert(idx, v);
87            } else {
88                return Err(ParseError::InvalidSubtag);
89            }
90        } else if mode == ParserMode::Partial {
91            break;
92        } else {
93            return Err(ParseError::InvalidSubtag);
94        }
95        iter.next();
96    }
97
98    Ok(LanguageIdentifier {
99        language,
100        script,
101        region,
102        variants: subtags::Variants::from_short_slice_unchecked(variants),
103    })
104}
105
106#[cfg(feature = "alloc")]
107pub fn parse_language_identifier(
108    t: &[u8],
109    mode: ParserMode,
110) -> Result<LanguageIdentifier, ParseError> {
111    let mut iter = SubtagIterator::new(t);
112    parse_language_identifier_from_iter(&mut iter, mode)
113}
114
115#[allow(clippy::type_complexity)]
116pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
117    mut iter: SubtagIterator,
118    mode: ParserMode,
119) -> Result<
120    (
121        subtags::Language,
122        Option<subtags::Script>,
123        Option<subtags::Region>,
124        Option<subtags::Variant>,
125        Option<(extensions::unicode::Key, Option<Subtag>)>,
126    ),
127    ParseError,
128> {
129    let language;
130    let mut script = None;
131    let mut region = None;
132    let mut variant = None;
133    let mut keyword = None;
134
135    if let (i, Some(subtag)) = iter.next_const() {
136        iter = i;
137        match subtags::Language::try_from_utf8(subtag) {
138            Ok(l) => language = l,
139            Err(e) => return Err(e),
140        }
141    } else {
142        return Err(ParseError::InvalidLanguage);
143    }
144
145    let mut position = ParserPosition::Script;
146
147    while let Some(subtag) = iter.peek() {
148        if !matches!(mode, ParserMode::LanguageIdentifier) && subtag.len() == 1 {
149            break;
150        }
151
152        if matches!(position, ParserPosition::Script) {
153            if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
154                script = Some(s);
155                position = ParserPosition::Region;
156            } else if let Ok(r) = subtags::Region::try_from_utf8(subtag) {
157                region = Some(r);
158                position = ParserPosition::Variant;
159            } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
160                // We cannot handle multiple variants in a const context
161                debug_assert!(variant.is_none());
162                variant = Some(v);
163                position = ParserPosition::Variant;
164            } else if matches!(mode, ParserMode::Partial) {
165                break;
166            } else {
167                return Err(ParseError::InvalidSubtag);
168            }
169        } else if matches!(position, ParserPosition::Region) {
170            if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
171                region = Some(s);
172                position = ParserPosition::Variant;
173            } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
174                // We cannot handle multiple variants in a const context
175                debug_assert!(variant.is_none());
176                variant = Some(v);
177                position = ParserPosition::Variant;
178            } else if matches!(mode, ParserMode::Partial) {
179                break;
180            } else {
181                return Err(ParseError::InvalidSubtag);
182            }
183        } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
184            debug_assert!(matches!(position, ParserPosition::Variant));
185            if variant.is_some() {
186                // We cannot handle multiple variants in a const context
187                return Err(ParseError::InvalidSubtag);
188            }
189            variant = Some(v);
190        } else if matches!(mode, ParserMode::Partial) {
191            break;
192        } else {
193            return Err(ParseError::InvalidSubtag);
194        }
195
196        iter = iter.next_const().0;
197    }
198
199    if matches!(mode, ParserMode::Locale) {
200        if let Some(subtag) = iter.peek() {
201            match ExtensionType::try_from_utf8(subtag) {
202                Ok(ExtensionType::Unicode) => {
203                    iter = iter.next_const().0;
204                    if let Some(peek) = iter.peek() {
205                        if Attribute::try_from_utf8(peek).is_ok() {
206                            // We cannot handle Attributes in a const context
207                            return Err(ParseError::InvalidSubtag);
208                        }
209                    }
210
211                    let mut key = None;
212                    let mut current_type = None;
213
214                    while let Some(peek) = iter.peek() {
215                        if peek.len() == 2 {
216                            if key.is_some() {
217                                // We cannot handle more than one Key in a const context
218                                return Err(ParseError::InvalidSubtag);
219                            }
220                            match Key::try_from_utf8(peek) {
221                                Ok(k) => key = Some(k),
222                                Err(e) => return Err(e),
223                            };
224                        } else if key.is_some() {
225                            match Value::parse_subtag_from_utf8(peek) {
226                                Ok(Some(t)) => {
227                                    if current_type.is_some() {
228                                        // We cannot handle more than one type in a const context
229                                        return Err(ParseError::InvalidSubtag);
230                                    }
231                                    current_type = Some(t);
232                                }
233                                Ok(None) => {}
234                                Err(e) => return Err(e),
235                            }
236                        } else {
237                            break;
238                        }
239                        iter = iter.next_const().0;
240                    }
241                    if let Some(k) = key {
242                        keyword = Some((k, current_type));
243                    }
244                }
245                // We cannot handle Transform, Private, Other extensions in a const context
246                Ok(_) => return Err(ParseError::InvalidSubtag),
247                Err(e) => return Err(e),
248            }
249        }
250    }
251
252    Ok((language, script, region, variant, keyword))
253}
254
255#[allow(clippy::type_complexity)]
256pub const fn parse_language_identifier_with_single_variant(
257    t: &[u8],
258    mode: ParserMode,
259) -> Result<
260    (
261        subtags::Language,
262        Option<subtags::Script>,
263        Option<subtags::Region>,
264        Option<subtags::Variant>,
265    ),
266    ParseError,
267> {
268    let iter = SubtagIterator::new(t);
269    match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
270        Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
271        Err(e) => Err(e),
272    }
273}