pub struct LanguageIdentifier {
pub language: Language,
pub script: Option<Script>,
pub region: Option<Region>,
pub variants: Variants,
}
Expand description
A core struct representing a Unicode BCP47 Language Identifier
.
§Ordering
This type deliberately does not implement Ord
or PartialOrd
because there are
multiple possible orderings. Depending on your use case, two orderings are available:
- A string ordering, suitable for stable serialization:
LanguageIdentifier::strict_cmp
- A struct ordering, suitable for use with a BTreeSet:
LanguageIdentifier::total_cmp
See issue: https://github.com/unicode-org/icu4x/issues/1215
§Parsing
Unicode recognizes three levels of standard conformance for any language identifier:
- well-formed - syntactically correct
- valid - well-formed and only uses registered language, region, script and variant subtags…
- canonical - valid and no deprecated codes or structure.
At the moment parsing normalizes a well-formed language identifier converting
_
separators to -
and adjusting casing to conform to the Unicode standard.
Any syntactically invalid subtags will cause the parsing to fail with an error.
This operation normalizes syntax to be well-formed. No legacy subtag replacements is performed.
For validation and canonicalization, see LocaleCanonicalizer
.
§Examples
Simple example:
use icu::locale::{
langid,
subtags::{language, region},
};
let li = langid!("en-US");
assert_eq!(li.language, language!("en"));
assert_eq!(li.script, None);
assert_eq!(li.region, Some(region!("US")));
assert_eq!(li.variants.len(), 0);
More complex example:
use icu::locale::{
langid,
subtags::{language, region, script, variant},
};
let li = langid!("eN-latn-Us-Valencia");
assert_eq!(li.language, language!("en"));
assert_eq!(li.script, Some(script!("Latn")));
assert_eq!(li.region, Some(region!("US")));
assert_eq!(li.variants.get(0), Some(&variant!("valencia")));
Fields§
§language: Language
Language subtag of the language identifier.
script: Option<Script>
Script subtag of the language identifier.
region: Option<Region>
Region subtag of the language identifier.
variants: Variants
Variant subtags of the language identifier.
Implementations§
Source§impl LanguageIdentifier
impl LanguageIdentifier
Sourcepub const fn is_unknown(&self) -> bool
pub const fn is_unknown(&self) -> bool
Whether this LanguageIdentifier
equals LanguageIdentifier::UNKNOWN
.
Sourcepub fn strict_cmp(&self, other: &[u8]) -> Ordering
pub fn strict_cmp(&self, other: &[u8]) -> Ordering
Compare this LanguageIdentifier
with BCP-47 bytes.
The return value is equivalent to what would happen if you first converted this
LanguageIdentifier
to a BCP-47 string and then performed a byte comparison.
This function is case-sensitive and results in a total order, so it is appropriate for
binary search. The only argument producing Ordering::Equal
is self.to_string()
.
§Examples
Sorting a list of langids with this method requires converting one of them to a string:
use icu::locale::LanguageIdentifier;
use std::cmp::Ordering;
use writeable::Writeable;
// Random input order:
let bcp47_strings: &[&str] = &[
"ar-Latn",
"zh-Hant-TW",
"zh-TW",
"und-fonipa",
"zh-Hant",
"ar-SA",
];
let mut langids = bcp47_strings
.iter()
.map(|s| s.parse().unwrap())
.collect::<Vec<LanguageIdentifier>>();
langids.sort_by(|a, b| {
let b = b.write_to_string();
a.strict_cmp(b.as_bytes())
});
let strict_cmp_strings = langids
.iter()
.map(|l| l.to_string())
.collect::<Vec<String>>();
// Output ordering, sorted alphabetically
let expected_ordering: &[&str] = &[
"ar-Latn",
"ar-SA",
"und-fonipa",
"zh-Hant",
"zh-Hant-TW",
"zh-TW",
];
assert_eq!(expected_ordering, strict_cmp_strings);
Sourcepub fn total_cmp(&self, other: &Self) -> Ordering
pub fn total_cmp(&self, other: &Self) -> Ordering
Compare this LanguageIdentifier
with another LanguageIdentifier
field-by-field.
The result is a total ordering sufficient for use in a BTreeSet
.
Unlike LanguageIdentifier::strict_cmp
, the ordering may or may not be equivalent
to string ordering, and it may or may not be stable across ICU4X releases.
§Examples
This method returns a nonsensical ordering derived from the fields of the struct:
use icu::locale::LanguageIdentifier;
use std::cmp::Ordering;
// Input strings, sorted alphabetically
let bcp47_strings: &[&str] = &[
"ar-Latn",
"ar-SA",
"und-fonipa",
"zh-Hant",
"zh-Hant-TW",
"zh-TW",
];
assert!(bcp47_strings.windows(2).all(|w| w[0] < w[1]));
let mut langids = bcp47_strings
.iter()
.map(|s| s.parse().unwrap())
.collect::<Vec<LanguageIdentifier>>();
langids.sort_by(LanguageIdentifier::total_cmp);
let total_cmp_strings = langids
.iter()
.map(|l| l.to_string())
.collect::<Vec<String>>();
// Output ordering, sorted arbitrarily
let expected_ordering: &[&str] = &[
"ar-SA",
"ar-Latn",
"und-fonipa",
"zh-TW",
"zh-Hant",
"zh-Hant-TW",
];
assert_eq!(expected_ordering, total_cmp_strings);
Use a wrapper to add a LanguageIdentifier
to a BTreeSet
:
use icu::locale::LanguageIdentifier;
use std::cmp::Ordering;
use std::collections::BTreeSet;
#[derive(PartialEq, Eq)]
struct LanguageIdentifierTotalOrd(LanguageIdentifier);
impl Ord for LanguageIdentifierTotalOrd {
fn cmp(&self, other: &Self) -> Ordering {
self.0.total_cmp(&other.0)
}
}
impl PartialOrd for LanguageIdentifierTotalOrd {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
let _: BTreeSet<LanguageIdentifierTotalOrd> = unimplemented!();
Sourcepub fn normalizing_eq(&self, other: &str) -> bool
pub fn normalizing_eq(&self, other: &str) -> bool
Compare this LanguageIdentifier
with a potentially unnormalized BCP-47 string.
The return value is equivalent to what would happen if you first parsed the
BCP-47 string to a LanguageIdentifier
and then performed a structural comparison.
§Examples
use icu::locale::LanguageIdentifier;
let bcp47_strings: &[&str] = &[
"pl-LaTn-pL",
"uNd",
"UnD-adlm",
"uNd-GB",
"UND-FONIPA",
"ZH",
];
for a in bcp47_strings {
assert!(a.parse::<LanguageIdentifier>().unwrap().normalizing_eq(a));
}
Trait Implementations§
Source§impl Clone for LanguageIdentifier
impl Clone for LanguageIdentifier
Source§fn clone(&self) -> LanguageIdentifier
fn clone(&self) -> LanguageIdentifier
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl Debug for LanguageIdentifier
impl Debug for LanguageIdentifier
Source§impl Display for LanguageIdentifier
This trait is implemented for compatibility with fmt!
.
To create a string, [Writeable::write_to_string
] is usually more efficient.
impl Display for LanguageIdentifier
This trait is implemented for compatibility with fmt!
.
To create a string, [Writeable::write_to_string
] is usually more efficient.
Source§impl From<&LanguageIdentifier> for (Language, Option<Script>, Option<Region>)
Convert from a LanguageIdentifier
to an LSR tuple.
impl From<&LanguageIdentifier> for (Language, Option<Script>, Option<Region>)
Convert from a LanguageIdentifier
to an LSR tuple.
§Examples
use icu::locale::{
langid,
subtags::{language, region, script},
};
let lid = langid!("en-Latn-US");
let (lang, script, region) = (&lid).into();
assert_eq!(lang, language!("en"));
assert_eq!(script, Some(script!("Latn")));
assert_eq!(region, Some(region!("US")));
Source§fn from(langid: &LanguageIdentifier) -> Self
fn from(langid: &LanguageIdentifier) -> Self
Source§impl From<&LanguageIdentifier> for DataLocale
impl From<&LanguageIdentifier> for DataLocale
Source§fn from(langid: &LanguageIdentifier) -> Self
fn from(langid: &LanguageIdentifier) -> Self
Source§impl From<&LanguageIdentifier> for LocalePreferences
impl From<&LanguageIdentifier> for LocalePreferences
Source§fn from(lid: &LanguageIdentifier) -> Self
fn from(lid: &LanguageIdentifier) -> Self
Source§impl From<(Language, Option<Script>, Option<Region>)> for LanguageIdentifier
Convert from an LSR tuple to a LanguageIdentifier
.
impl From<(Language, Option<Script>, Option<Region>)> for LanguageIdentifier
Convert from an LSR tuple to a LanguageIdentifier
.
§Examples
use icu::locale::{
langid,
subtags::{language, region, script},
LanguageIdentifier,
};
let lang = language!("en");
let script = script!("Latn");
let region = region!("US");
assert_eq!(
LanguageIdentifier::from((lang, Some(script), Some(region))),
langid!("en-Latn-US")
);
Source§impl From<Language> for LanguageIdentifier
§Examples
use icu::locale::{langid, subtags::language, LanguageIdentifier};
assert_eq!(LanguageIdentifier::from(language!("en")), langid!("en"));
impl From<Language> for LanguageIdentifier
§Examples
use icu::locale::{langid, subtags::language, LanguageIdentifier};
assert_eq!(LanguageIdentifier::from(language!("en")), langid!("en"));
Source§impl From<LanguageIdentifier> for DataLocale
impl From<LanguageIdentifier> for DataLocale
Source§fn from(langid: LanguageIdentifier) -> Self
fn from(langid: LanguageIdentifier) -> Self
Source§impl From<LanguageIdentifier> for Locale
impl From<LanguageIdentifier> for Locale
Source§fn from(id: LanguageIdentifier) -> Self
fn from(id: LanguageIdentifier) -> Self
Source§impl From<Locale> for LanguageIdentifier
impl From<Locale> for LanguageIdentifier
Source§impl From<Option<Region>> for LanguageIdentifier
§Examples
use icu::locale::{langid, subtags::region, LanguageIdentifier};
assert_eq!(
LanguageIdentifier::from(Some(region!("US"))),
langid!("und-US")
);
impl From<Option<Region>> for LanguageIdentifier
§Examples
use icu::locale::{langid, subtags::region, LanguageIdentifier};
assert_eq!(
LanguageIdentifier::from(Some(region!("US"))),
langid!("und-US")
);
Source§impl From<Option<Script>> for LanguageIdentifier
§Examples
use icu::locale::{langid, subtags::script, LanguageIdentifier};
assert_eq!(
LanguageIdentifier::from(Some(script!("latn"))),
langid!("und-Latn")
);
impl From<Option<Script>> for LanguageIdentifier
§Examples
use icu::locale::{langid, subtags::script, LanguageIdentifier};
assert_eq!(
LanguageIdentifier::from(Some(script!("latn"))),
langid!("und-Latn")
);
Source§impl Hash for LanguageIdentifier
impl Hash for LanguageIdentifier
Source§impl PartialEq for LanguageIdentifier
impl PartialEq for LanguageIdentifier
Source§impl Writeable for LanguageIdentifier
impl Writeable for LanguageIdentifier
Source§fn write_to<W: Write + ?Sized>(&self, sink: &mut W) -> Result
fn write_to<W: Write + ?Sized>(&self, sink: &mut W) -> Result
write_to_parts
, and discards any
Part
annotations.Source§fn writeable_length_hint(&self) -> LengthHint
fn writeable_length_hint(&self) -> LengthHint
Source§fn write_to_parts<S>(&self, sink: &mut S) -> Result<(), Error>where
S: PartsWrite + ?Sized,
fn write_to_parts<S>(&self, sink: &mut S) -> Result<(), Error>where
S: PartsWrite + ?Sized,
Part
annotations to the given sink. Errors from the
sink are bubbled up. The default implementation delegates to write_to
,
and doesn’t produce any Part
annotations.