zerotrie/
options.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Options for building and reading from a ZeroTrie.
6//!
7//! These options are internal to the crate. A small selection of options
8//! are exported by way of the different public types on this crate.
9
10/// Whether to use the perfect hash function in the ZeroTrie.
11#[derive(Copy, Clone)]
12pub(crate) enum PhfMode {
13    /// Use binary search for all branch nodes.
14    BinaryOnly,
15    /// Use the perfect hash function for large branch nodes.
16    UsePhf,
17}
18
19impl PhfMode {
20    #[cfg(feature = "serde")]
21    const fn to_u8_flag(self) -> u8 {
22        match self {
23            Self::BinaryOnly => 0,
24            Self::UsePhf => 0x1,
25        }
26    }
27}
28
29/// Whether to support non-ASCII data in the ZeroTrie.
30#[derive(Copy, Clone)]
31pub(crate) enum AsciiMode {
32    /// Support only ASCII, returning an error if non-ASCII is found.
33    AsciiOnly,
34    /// Support all data, creating span nodes for non-ASCII bytes.
35    BinarySpans,
36}
37
38impl AsciiMode {
39    #[cfg(feature = "serde")]
40    const fn to_u8_flag(self) -> u8 {
41        match self {
42            Self::AsciiOnly => 0,
43            Self::BinarySpans => 0x2,
44        }
45    }
46}
47
48/// Whether to enforce a limit to the capacity of the ZeroTrie.
49#[derive(Copy, Clone)]
50pub(crate) enum CapacityMode {
51    /// Return an error if the trie requires a branch of more than 2^32 bytes.
52    Normal,
53    /// Construct the trie without returning an error.
54    Extended,
55}
56
57impl CapacityMode {
58    #[cfg(feature = "serde")]
59    const fn to_u8_flag(self) -> u8 {
60        match self {
61            Self::Normal => 0,
62            Self::Extended => 0x4,
63        }
64    }
65}
66
67/// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc"
68#[derive(Copy, Clone)]
69pub(crate) enum CaseSensitivity {
70    /// Allow all strings and sort them by byte value.
71    Sensitive,
72    /// Reject strings with different case and sort them as if `to_ascii_lowercase` is called.
73    IgnoreCase,
74}
75
76impl CaseSensitivity {
77    #[cfg(feature = "serde")]
78    const fn to_u8_flag(self) -> u8 {
79        match self {
80            Self::Sensitive => 0,
81            Self::IgnoreCase => 0x8,
82        }
83    }
84}
85
86#[derive(Copy, Clone)]
87pub(crate) struct ZeroTrieBuilderOptions {
88    pub phf_mode: PhfMode,
89    pub ascii_mode: AsciiMode,
90    pub capacity_mode: CapacityMode,
91    pub case_sensitivity: CaseSensitivity,
92}
93
94impl ZeroTrieBuilderOptions {
95    #[cfg(feature = "serde")]
96    pub(crate) const fn to_u8_flags(self) -> u8 {
97        self.phf_mode.to_u8_flag()
98            | self.ascii_mode.to_u8_flag()
99            | self.capacity_mode.to_u8_flag()
100            | self.case_sensitivity.to_u8_flag()
101    }
102}
103
104pub(crate) trait ZeroTrieWithOptions {
105    const OPTIONS: ZeroTrieBuilderOptions;
106}
107
108/// All branch nodes are binary search
109/// and there are no span nodes.
110impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
111    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
112        phf_mode: PhfMode::BinaryOnly,
113        ascii_mode: AsciiMode::AsciiOnly,
114        capacity_mode: CapacityMode::Normal,
115        case_sensitivity: CaseSensitivity::Sensitive,
116    };
117}
118
119impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> {
120    #[cfg(feature = "serde")]
121    pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
122}
123
124/// All branch nodes are binary search
125/// and nodes use case-insensitive matching.
126impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
127    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
128        phf_mode: PhfMode::BinaryOnly,
129        ascii_mode: AsciiMode::AsciiOnly,
130        capacity_mode: CapacityMode::Normal,
131        case_sensitivity: CaseSensitivity::IgnoreCase,
132    };
133}
134
135/// Branch nodes could be either binary search or PHF.
136impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
137    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
138        phf_mode: PhfMode::UsePhf,
139        ascii_mode: AsciiMode::BinarySpans,
140        capacity_mode: CapacityMode::Normal,
141        case_sensitivity: CaseSensitivity::Sensitive,
142    };
143}
144
145/// No limited capacity assertion.
146impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
147    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
148        phf_mode: PhfMode::UsePhf,
149        ascii_mode: AsciiMode::BinarySpans,
150        capacity_mode: CapacityMode::Extended,
151        case_sensitivity: CaseSensitivity::Sensitive,
152    };
153}