icu_locale_core/parser/
langid.rs
1pub use super::errors::ParseError;
6use crate::extensions::unicode::{Attribute, Key, Value};
7use crate::extensions::ExtensionType;
8use crate::parser::SubtagIterator;
9#[cfg(feature = "alloc")]
10use crate::shortvec::ShortBoxSlice;
11use crate::subtags::Subtag;
12#[cfg(feature = "alloc")]
13use crate::LanguageIdentifier;
14use crate::{extensions, subtags};
15
16#[derive(PartialEq, Clone, Copy)]
17pub enum ParserMode {
18 LanguageIdentifier,
19 Locale,
20 #[allow(dead_code)]
21 Partial,
22}
23
24#[derive(PartialEq, Clone, Copy)]
25enum ParserPosition {
26 Script,
27 Region,
28 Variant,
29}
30
31#[cfg(feature = "alloc")]
32pub fn parse_language_identifier_from_iter(
33 iter: &mut SubtagIterator,
34 mode: ParserMode,
35) -> Result<LanguageIdentifier, ParseError> {
36 let mut script = None;
37 let mut region = None;
38 let mut variants = ShortBoxSlice::new();
39
40 let language = if let Some(subtag) = iter.next() {
41 subtags::Language::try_from_utf8(subtag)?
42 } else {
43 return Err(ParseError::InvalidLanguage);
44 };
45
46 let mut position = ParserPosition::Script;
47
48 while let Some(subtag) = iter.peek() {
49 if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
50 break;
51 }
52
53 if position == ParserPosition::Script {
54 if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
55 script = Some(s);
56 position = ParserPosition::Region;
57 } else if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
58 region = Some(s);
59 position = ParserPosition::Variant;
60 } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
61 if let Err(idx) = variants.binary_search(&v) {
62 variants.insert(idx, v);
63 }
64 position = ParserPosition::Variant;
65 } else if mode == ParserMode::Partial {
66 break;
67 } else {
68 return Err(ParseError::InvalidSubtag);
69 }
70 } else if position == ParserPosition::Region {
71 if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
72 region = Some(s);
73 position = ParserPosition::Variant;
74 } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
75 if let Err(idx) = variants.binary_search(&v) {
76 variants.insert(idx, v);
77 }
78 position = ParserPosition::Variant;
79 } else if mode == ParserMode::Partial {
80 break;
81 } else {
82 return Err(ParseError::InvalidSubtag);
83 }
84 } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
85 if let Err(idx) = variants.binary_search(&v) {
86 variants.insert(idx, v);
87 } else {
88 return Err(ParseError::InvalidSubtag);
89 }
90 } else if mode == ParserMode::Partial {
91 break;
92 } else {
93 return Err(ParseError::InvalidSubtag);
94 }
95 iter.next();
96 }
97
98 Ok(LanguageIdentifier {
99 language,
100 script,
101 region,
102 variants: subtags::Variants::from_short_slice_unchecked(variants),
103 })
104}
105
106#[cfg(feature = "alloc")]
107pub fn parse_language_identifier(
108 t: &[u8],
109 mode: ParserMode,
110) -> Result<LanguageIdentifier, ParseError> {
111 let mut iter = SubtagIterator::new(t);
112 parse_language_identifier_from_iter(&mut iter, mode)
113}
114
115#[allow(clippy::type_complexity)]
116pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
117 mut iter: SubtagIterator,
118 mode: ParserMode,
119) -> Result<
120 (
121 subtags::Language,
122 Option<subtags::Script>,
123 Option<subtags::Region>,
124 Option<subtags::Variant>,
125 Option<(extensions::unicode::Key, Option<Subtag>)>,
126 ),
127 ParseError,
128> {
129 let language;
130 let mut script = None;
131 let mut region = None;
132 let mut variant = None;
133 let mut keyword = None;
134
135 if let (i, Some(subtag)) = iter.next_const() {
136 iter = i;
137 match subtags::Language::try_from_utf8(subtag) {
138 Ok(l) => language = l,
139 Err(e) => return Err(e),
140 }
141 } else {
142 return Err(ParseError::InvalidLanguage);
143 }
144
145 let mut position = ParserPosition::Script;
146
147 while let Some(subtag) = iter.peek() {
148 if !matches!(mode, ParserMode::LanguageIdentifier) && subtag.len() == 1 {
149 break;
150 }
151
152 if matches!(position, ParserPosition::Script) {
153 if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
154 script = Some(s);
155 position = ParserPosition::Region;
156 } else if let Ok(r) = subtags::Region::try_from_utf8(subtag) {
157 region = Some(r);
158 position = ParserPosition::Variant;
159 } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
160 debug_assert!(variant.is_none());
162 variant = Some(v);
163 position = ParserPosition::Variant;
164 } else if matches!(mode, ParserMode::Partial) {
165 break;
166 } else {
167 return Err(ParseError::InvalidSubtag);
168 }
169 } else if matches!(position, ParserPosition::Region) {
170 if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
171 region = Some(s);
172 position = ParserPosition::Variant;
173 } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
174 debug_assert!(variant.is_none());
176 variant = Some(v);
177 position = ParserPosition::Variant;
178 } else if matches!(mode, ParserMode::Partial) {
179 break;
180 } else {
181 return Err(ParseError::InvalidSubtag);
182 }
183 } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
184 debug_assert!(matches!(position, ParserPosition::Variant));
185 if variant.is_some() {
186 return Err(ParseError::InvalidSubtag);
188 }
189 variant = Some(v);
190 } else if matches!(mode, ParserMode::Partial) {
191 break;
192 } else {
193 return Err(ParseError::InvalidSubtag);
194 }
195
196 iter = iter.next_const().0;
197 }
198
199 if matches!(mode, ParserMode::Locale) {
200 if let Some(subtag) = iter.peek() {
201 match ExtensionType::try_from_utf8(subtag) {
202 Ok(ExtensionType::Unicode) => {
203 iter = iter.next_const().0;
204 if let Some(peek) = iter.peek() {
205 if Attribute::try_from_utf8(peek).is_ok() {
206 return Err(ParseError::InvalidSubtag);
208 }
209 }
210
211 let mut key = None;
212 let mut current_type = None;
213
214 while let Some(peek) = iter.peek() {
215 if peek.len() == 2 {
216 if key.is_some() {
217 return Err(ParseError::InvalidSubtag);
219 }
220 match Key::try_from_utf8(peek) {
221 Ok(k) => key = Some(k),
222 Err(e) => return Err(e),
223 };
224 } else if key.is_some() {
225 match Value::parse_subtag_from_utf8(peek) {
226 Ok(Some(t)) => {
227 if current_type.is_some() {
228 return Err(ParseError::InvalidSubtag);
230 }
231 current_type = Some(t);
232 }
233 Ok(None) => {}
234 Err(e) => return Err(e),
235 }
236 } else {
237 break;
238 }
239 iter = iter.next_const().0;
240 }
241 if let Some(k) = key {
242 keyword = Some((k, current_type));
243 }
244 }
245 Ok(_) => return Err(ParseError::InvalidSubtag),
247 Err(e) => return Err(e),
248 }
249 }
250 }
251
252 Ok((language, script, region, variant, keyword))
253}
254
255#[allow(clippy::type_complexity)]
256pub const fn parse_language_identifier_with_single_variant(
257 t: &[u8],
258 mode: ParserMode,
259) -> Result<
260 (
261 subtags::Language,
262 Option<subtags::Script>,
263 Option<subtags::Region>,
264 Option<subtags::Variant>,
265 ),
266 ParseError,
267> {
268 let iter = SubtagIterator::new(t);
269 match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
270 Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
271 Err(e) => Err(e),
272 }
273}