1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
45//! Options for building and reading from a ZeroTrie.
6//!
7//! These options are internal to the crate. A small selection of options
8//! are exported by way of the different public types on this crate.
910/// Whether to use the perfect hash function in the ZeroTrie.
11#[derive(Copy, Clone)]
12pub(crate) enum PhfMode {
13/// Use binary search for all branch nodes.
14BinaryOnly,
15/// Use the perfect hash function for large branch nodes.
16UsePhf,
17}
1819impl PhfMode {
20#[cfg(feature = "serde")]
21const fn to_u8_flag(self) -> u8 {
22match self {
23Self::BinaryOnly => 0,
24Self::UsePhf => 0x1,
25 }
26 }
27}
2829/// Whether to support non-ASCII data in the ZeroTrie.
30#[derive(Copy, Clone)]
31pub(crate) enum AsciiMode {
32/// Support only ASCII, returning an error if non-ASCII is found.
33AsciiOnly,
34/// Support all data, creating span nodes for non-ASCII bytes.
35BinarySpans,
36}
3738impl AsciiMode {
39#[cfg(feature = "serde")]
40const fn to_u8_flag(self) -> u8 {
41match self {
42Self::AsciiOnly => 0,
43Self::BinarySpans => 0x2,
44 }
45 }
46}
4748/// Whether to enforce a limit to the capacity of the ZeroTrie.
49#[derive(Copy, Clone)]
50pub(crate) enum CapacityMode {
51/// Return an error if the trie requires a branch of more than 2^32 bytes.
52Normal,
53/// Construct the trie without returning an error.
54Extended,
55}
5657impl CapacityMode {
58#[cfg(feature = "serde")]
59const fn to_u8_flag(self) -> u8 {
60match self {
61Self::Normal => 0,
62Self::Extended => 0x4,
63 }
64 }
65}
6667/// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc"
68#[derive(Copy, Clone)]
69pub(crate) enum CaseSensitivity {
70/// Allow all strings and sort them by byte value.
71Sensitive,
72/// Reject strings with different case and sort them as if `to_ascii_lowercase` is called.
73IgnoreCase,
74}
7576impl CaseSensitivity {
77#[cfg(feature = "serde")]
78const fn to_u8_flag(self) -> u8 {
79match self {
80Self::Sensitive => 0,
81Self::IgnoreCase => 0x8,
82 }
83 }
84}
8586#[derive(Copy, Clone)]
87pub(crate) struct ZeroTrieBuilderOptions {
88pub phf_mode: PhfMode,
89pub ascii_mode: AsciiMode,
90pub capacity_mode: CapacityMode,
91pub case_sensitivity: CaseSensitivity,
92}
9394impl ZeroTrieBuilderOptions {
95#[cfg(feature = "serde")]
96pub(crate) const fn to_u8_flags(self) -> u8 {
97self.phf_mode.to_u8_flag()
98 | self.ascii_mode.to_u8_flag()
99 | self.capacity_mode.to_u8_flag()
100 | self.case_sensitivity.to_u8_flag()
101 }
102}
103104pub(crate) trait ZeroTrieWithOptions {
105const OPTIONS: ZeroTrieBuilderOptions;
106}
107108/// All branch nodes are binary search
109/// and there are no span nodes.
110impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
111const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
112 phf_mode: PhfMode::BinaryOnly,
113 ascii_mode: AsciiMode::AsciiOnly,
114 capacity_mode: CapacityMode::Normal,
115 case_sensitivity: CaseSensitivity::Sensitive,
116 };
117}
118119impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> {
120#[cfg(feature = "serde")]
121pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
122}
123124/// All branch nodes are binary search
125/// and nodes use case-insensitive matching.
126impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
127const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
128 phf_mode: PhfMode::BinaryOnly,
129 ascii_mode: AsciiMode::AsciiOnly,
130 capacity_mode: CapacityMode::Normal,
131 case_sensitivity: CaseSensitivity::IgnoreCase,
132 };
133}
134135/// Branch nodes could be either binary search or PHF.
136impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
137const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
138 phf_mode: PhfMode::UsePhf,
139 ascii_mode: AsciiMode::BinarySpans,
140 capacity_mode: CapacityMode::Normal,
141 case_sensitivity: CaseSensitivity::Sensitive,
142 };
143}
144145/// No limited capacity assertion.
146impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
147const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
148 phf_mode: PhfMode::UsePhf,
149 ascii_mode: AsciiMode::BinarySpans,
150 capacity_mode: CapacityMode::Extended,
151 case_sensitivity: CaseSensitivity::Sensitive,
152 };
153}