widestring/
macros.rs

1macro_rules! implement_utf16_macro {
2    ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
3        $(#[$m])*
4        #[macro_export]
5        macro_rules! $name {
6            ($text:expr) => {{
7                const _WIDESTRING_U16_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
8                const _WIDESTRING_U16_MACRO_LEN: $crate::internals::core::primitive::usize =
9                    $crate::internals::length_as_utf16(_WIDESTRING_U16_MACRO_UTF8) + $extra_len;
10                const _WIDESTRING_U16_MACRO_UTF16: [$crate::internals::core::primitive::u16;
11                        _WIDESTRING_U16_MACRO_LEN] = {
12                    let mut _widestring_buffer: [$crate::internals::core::primitive::u16; _WIDESTRING_U16_MACRO_LEN] = [0; _WIDESTRING_U16_MACRO_LEN];
13                    let mut _widestring_bytes = _WIDESTRING_U16_MACRO_UTF8.as_bytes();
14                    let mut _widestring_i = 0;
15                    while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
16                        _widestring_bytes = _widestring_rest;
17                        if $extra_len > 0 && _widestring_ch == 0 {
18                            panic!("invalid NUL value found in string literal");
19                        }
20                        // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf16
21                        if _widestring_ch & 0xFFFF == _widestring_ch {
22                            _widestring_buffer[_widestring_i] = _widestring_ch as $crate::internals::core::primitive::u16;
23                            _widestring_i += 1;
24                        } else {
25                            let _widestring_code = _widestring_ch - 0x1_0000;
26                            _widestring_buffer[_widestring_i] = 0xD800 | ((_widestring_code >> 10) as $crate::internals::core::primitive::u16);
27                            _widestring_buffer[_widestring_i + 1] = 0xDC00 | ((_widestring_code as $crate::internals::core::primitive::u16) & 0x3FF);
28                            _widestring_i += 2;
29                        }
30                    }
31                    _widestring_buffer
32                };
33                #[allow(unused_unsafe)]
34                unsafe { $crate::$str::$fn(&_WIDESTRING_U16_MACRO_UTF16) }
35            }};
36        }
37    }
38}
39
40implement_utf16_macro! {
41    /// Converts a string literal into a `const` UTF-16 string slice of type
42    /// [`Utf16Str`][crate::Utf16Str].
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// # #[cfg(feature = "alloc")] {
48    /// use widestring::{utf16str, Utf16Str, Utf16String};
49    ///
50    /// const STRING: &Utf16Str = utf16str!("My string");
51    /// assert_eq!(Utf16String::from_str("My string"), STRING);
52    /// # }
53    /// ```
54    utf16str 0 Utf16Str from_slice_unchecked
55}
56
57implement_utf16_macro! {
58    /// Converts a string literal into a `const` UTF-16 string slice of type
59    /// [`U16Str`][crate::U16Str].
60    ///
61    /// The resulting `const` string slice will always be valid UTF-16.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// # #[cfg(feature = "alloc")] {
67    /// use widestring::{u16str, U16Str, U16String};
68    ///
69    /// const STRING: &U16Str = u16str!("My string");
70    /// assert_eq!(U16String::from_str("My string"), STRING);
71    /// # }
72    /// ```
73    u16str 0 U16Str from_slice
74}
75
76implement_utf16_macro! {
77    /// Converts a string literal into a `const` UTF-16 string slice of type
78    /// [`U16CStr`][crate::U16CStr].
79    ///
80    /// The resulting `const` string slice will always be valid UTF-16 and include a nul terminator.
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// # #[cfg(feature = "alloc")] {
86    /// use widestring::{u16cstr, U16CStr, U16CString};
87    ///
88    /// const STRING: &U16CStr = u16cstr!("My string");
89    /// assert_eq!(U16CString::from_str("My string").unwrap(), STRING);
90    /// # }
91    /// ```
92    u16cstr 1 U16CStr from_slice_unchecked
93}
94
95macro_rules! implement_utf32_macro {
96    ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
97        $(#[$m])*
98        #[macro_export]
99        macro_rules! $name {
100            ($text:expr) => {{
101                const _WIDESTRING_U32_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
102                const _WIDESTRING_U32_MACRO_LEN: $crate::internals::core::primitive::usize =
103                    $crate::internals::length_as_utf32(_WIDESTRING_U32_MACRO_UTF8) + $extra_len;
104                const _WIDESTRING_U32_MACRO_UTF32: [$crate::internals::core::primitive::u32;
105                        _WIDESTRING_U32_MACRO_LEN] = {
106                    let mut _widestring_buffer: [$crate::internals::core::primitive::u32; _WIDESTRING_U32_MACRO_LEN] = [0; _WIDESTRING_U32_MACRO_LEN];
107                    let mut _widestring_bytes = _WIDESTRING_U32_MACRO_UTF8.as_bytes();
108                    let mut _widestring_i = 0;
109                    while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
110                        if $extra_len > 0 && _widestring_ch == 0 {
111                            panic!("invalid NUL value found in string literal");
112                        }
113                        _widestring_bytes = _widestring_rest;
114                        _widestring_buffer[_widestring_i] = _widestring_ch;
115                        _widestring_i += 1;
116                    }
117                    _widestring_buffer
118                };
119                #[allow(unused_unsafe)]
120                unsafe { $crate::$str::$fn(&_WIDESTRING_U32_MACRO_UTF32) }
121            }};
122        }
123    }
124}
125
126implement_utf32_macro! {
127    /// Converts a string literal into a `const` UTF-32 string slice of type
128    /// [`Utf32Str`][crate::Utf32Str].
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// # #[cfg(feature = "alloc")] {
134    /// use widestring::{utf32str, Utf32Str, Utf32String};
135    ///
136    /// const STRING: &Utf32Str = utf32str!("My string");
137    /// assert_eq!(Utf32String::from_str("My string"), STRING);
138    /// # }
139    /// ```
140    utf32str 0 Utf32Str from_slice_unchecked
141}
142
143implement_utf32_macro! {
144    /// Converts a string literal into a `const` UTF-32 string slice of type
145    /// [`U32Str`][crate::U32Str].
146    ///
147    /// The resulting `const` string slice will always be valid UTF-32.
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// # #[cfg(feature = "alloc")] {
153    /// use widestring::{u32str, U32Str, U32String};
154    ///
155    /// const STRING: &U32Str = u32str!("My string");
156    /// assert_eq!(U32String::from_str("My string"), STRING);
157    /// # }
158    /// ```
159    u32str 0 U32Str from_slice
160}
161
162implement_utf32_macro! {
163    /// Converts a string literal into a `const` UTF-32 string slice of type
164    /// [`U32CStr`][crate::U32CStr].
165    ///
166    /// The resulting `const` string slice will always be valid UTF-32 and include a nul terminator.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// # #[cfg(feature = "alloc")] {
172    /// use widestring::{u32cstr, U32CStr, U32CString};
173    ///
174    /// const STRING: &U32CStr = u32cstr!("My string");
175    /// assert_eq!(U32CString::from_str("My string").unwrap(), STRING);
176    /// # }
177    /// ```
178    u32cstr 1 U32CStr from_slice_unchecked
179}
180
181/// Alias for [`u16str`] or [`u32str`] macros depending on platform. Intended to be used when using
182/// [`WideStr`][crate::WideStr] type alias.
183#[cfg(not(windows))]
184#[macro_export]
185macro_rules! widestr {
186    ($text:expr) => {{
187        #[allow(unused_imports)]
188        use $crate::*;
189        u32str!($text)
190    }};
191}
192
193/// Alias for [`utf16str`] or [`utf32str`] macros depending on platform. Intended to be used when
194/// using [`WideUtfStr`][crate::WideUtfStr] type alias.
195#[cfg(not(windows))]
196#[macro_export]
197macro_rules! wideutfstr {
198    ($text:expr) => {{
199        #[allow(unused_imports)]
200        use $crate::*;
201        utf32str!($text)
202    }};
203}
204
205/// Alias for [`u16cstr`] or [`u32cstr`] macros depending on platform. Intended to be used when
206/// using [`WideCStr`][crate::WideCStr] type alias.
207#[cfg(not(windows))]
208#[macro_export]
209macro_rules! widecstr {
210    ($text:expr) => {{
211        #[allow(unused_imports)]
212        use $crate::*;
213        u32cstr!($text)
214    }};
215}
216
217/// Alias for [`u16str`] or [`u32str`] macros depending on platform. Intended to be used when using
218/// [`WideStr`][crate::WideStr] type alias.
219#[cfg(windows)]
220#[macro_export]
221macro_rules! widestr {
222    ($text:expr) => {{
223        #[allow(unused_imports)]
224        use $crate::*;
225        u16str!($text)
226    }};
227}
228
229/// Alias for [`utf16str`] or [`utf32str`] macros depending on platform. Intended to be used when
230/// using [`WideUtfStr`][crate::WideUtfStr] type alias.
231#[cfg(windows)]
232#[macro_export]
233macro_rules! wideutfstr {
234    ($text:expr) => {{
235        #[allow(unused_imports)]
236        use $crate::*;
237        utf16str!($text)
238    }};
239}
240
241/// Alias for [`u16cstr`] or [`u32cstr`] macros depending on platform. Intended to be used when
242/// using [`WideCStr`][crate::WideCStr] type alias.
243#[cfg(windows)]
244#[macro_export]
245macro_rules! widecstr {
246    ($text:expr) => {{
247        #[allow(unused_imports)]
248        use $crate::*;
249        u16cstr!($text)
250    }};
251}
252
253/// Includes a UTF-16 encoded file as a [`Utf16Str`][crate::Utf16Str].
254///
255/// This uses [`include_bytes`](core::include_bytes) to accomplish this.
256///
257/// # Examples
258///
259/// ```
260/// # #[cfg(feature = "alloc")] {
261/// use widestring::{include_utf16str, Utf16Str, Utf16String};
262///
263/// const STRING: &Utf16Str = include_utf16str!("example.txt");
264/// assert_eq!(Utf16String::from_str("My string"), STRING);
265/// # }
266/// ```
267#[macro_export]
268macro_rules! include_utf16str {
269    ($text:expr) => {{
270        const _WIDESTRING_U16_INCLUDE_MACRO_U8: &[$crate::internals::core::primitive::u8] =
271            $crate::internals::core::include_bytes!($text);
272        const _WIDESTRING_U16_INCLUDE_MACRO_LEN: $crate::internals::core::primitive::usize = {
273            let _widestring_len =
274                <[$crate::internals::core::primitive::u8]>::len(_WIDESTRING_U16_INCLUDE_MACRO_U8);
275            if _widestring_len % $crate::internals::core::mem::size_of::<u16>() != 0 {
276                panic!("file not encoded as UTF-16")
277            }
278            _widestring_len / 2
279        };
280        const _WIDESTRING_U16_INCLUDE_MACRO_UTF16: (
281            [$crate::internals::core::primitive::u16; _WIDESTRING_U16_INCLUDE_MACRO_LEN],
282            bool,
283            bool,
284        ) = {
285            let mut _widestring_buffer: [$crate::internals::core::primitive::u16;
286                _WIDESTRING_U16_INCLUDE_MACRO_LEN] = [0; _WIDESTRING_U16_INCLUDE_MACRO_LEN];
287            let mut _widestring_bytes = _WIDESTRING_U16_INCLUDE_MACRO_U8;
288            let mut _widestring_i = 0;
289            let mut _widestring_decode = $crate::internals::DecodeUtf16 {
290                bom: $crate::internals::core::option::Option::None,
291                eof: false,
292                next: $crate::internals::core::option::Option::None,
293                forward_buf: $crate::internals::core::option::Option::None,
294                back_buf: $crate::internals::core::option::Option::None,
295            };
296
297            loop {
298                match $crate::internals::DecodeUtf16::next_code_point(
299                    _widestring_decode,
300                    _widestring_bytes,
301                ) {
302                    Ok((_widestring_new_decode, _widestring_ch, _widestring_rest)) => {
303                        _widestring_decode = _widestring_new_decode;
304
305                        _widestring_bytes = _widestring_rest;
306                        _widestring_buffer[_widestring_i] = _widestring_ch;
307                        _widestring_i += 1;
308                    }
309                    Err(_widestring_new_decode) => {
310                        _widestring_decode = _widestring_new_decode;
311                        break;
312                    }
313                }
314            }
315
316            (
317                _widestring_buffer,
318                if let Some(Some(_)) = _widestring_decode.bom {
319                    true
320                } else {
321                    false
322                },
323                _widestring_decode.eof,
324            )
325        };
326        const _WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED:
327            &[$crate::internals::core::primitive::u16] = {
328            match &_WIDESTRING_U16_INCLUDE_MACRO_UTF16 {
329                (buffer, false, false) => buffer,
330                ([_bom, rest @ ..], true, false) => rest,
331                ([rest @ .., _eof], false, true) => rest,
332                ([_bom, rest @ .., _eof], true, true) => rest,
333            }
334        };
335
336        #[allow(unused_unsafe)]
337        unsafe {
338            $crate::Utf16Str::from_slice_unchecked(_WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED)
339        }
340    }};
341}
342
343#[doc(hidden)]
344#[allow(missing_debug_implementations)]
345pub mod internals {
346    pub use core;
347
348    // A const implementation of https://github.com/rust-lang/rust/blob/d902752866cbbdb331e3cf28ff6bba86ab0f6c62/library/core/src/str/mod.rs#L509-L537
349    // Assumes `utf8` is a valid &str
350    pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
351        const CONT_MASK: u8 = 0b0011_1111;
352        match utf8 {
353            [one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
354            [one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
355                (((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
356                rest,
357            )),
358            [one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
359                (((*one & 0b0000_1111) as u32) << 12)
360                    | (((*two & CONT_MASK) as u32) << 6)
361                    | ((*three & CONT_MASK) as u32),
362                rest,
363            )),
364            [one, two, three, four, rest @ ..] => Some((
365                (((*one & 0b0000_0111) as u32) << 18)
366                    | (((*two & CONT_MASK) as u32) << 12)
367                    | (((*three & CONT_MASK) as u32) << 6)
368                    | ((*four & CONT_MASK) as u32),
369                rest,
370            )),
371            [..] => None,
372        }
373    }
374
375    pub enum BoM {
376        Little,
377        Big,
378    }
379
380    pub struct DecodeUtf16 {
381        pub bom: Option<Option<BoM>>,
382        pub eof: bool,
383        pub next: Option<u16>,
384        pub forward_buf: Option<u16>,
385        pub back_buf: Option<u16>,
386    }
387
388    impl DecodeUtf16 {
389        pub const fn next_code_point(
390            mut self,
391            mut utf16: &[u8],
392        ) -> Result<(Self, u16, &[u8]), Self> {
393            if let [one, two] = utf16 {
394                if u16::from_le_bytes([*one, *two]) == 0x0000 {
395                    self.eof = true;
396                }
397            }
398
399            if self.bom.is_none() {
400                if let [one, two, ..] = utf16 {
401                    let ch = u16::from_le_bytes([*one, *two]);
402                    if ch == 0xfeff {
403                        self.bom = Some(Some(BoM::Little));
404                    } else if ch == 0xfffe {
405                        self.bom = Some(Some(BoM::Big));
406                    } else {
407                        self.bom = Some(None);
408                    }
409                }
410            }
411
412            // Copied from `DecodeUtf16`
413            if let Some(u) = self.next {
414                self.next = None;
415                return Ok((self, u, utf16));
416            }
417
418            let u = if let Some(u) = self.forward_buf {
419                self.forward_buf = None;
420                u
421            } else if let [one, two, rest @ ..] = utf16 {
422                utf16 = rest;
423                match self.bom {
424                    Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
425                    _ => u16::from_le_bytes([*one, *two]),
426                }
427            } else if let Some(u) = self.back_buf {
428                self.back_buf = None;
429                u
430            } else {
431                return Err(self);
432            };
433
434            if !crate::is_utf16_surrogate(u) {
435                Ok((self, u, utf16))
436            } else if crate::is_utf16_low_surrogate(u) {
437                panic!("unpaired surrogate found")
438            } else {
439                let u2 = if let [one, two, rest @ ..] = utf16 {
440                    utf16 = rest;
441                    match self.bom {
442                        Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
443                        _ => u16::from_le_bytes([*one, *two]),
444                    }
445                } else if let Some(u) = self.back_buf {
446                    self.back_buf = None;
447                    u
448                } else {
449                    panic!("unpaired surrogate found")
450                };
451
452                if !crate::is_utf16_low_surrogate(u2) {
453                    panic!("unpaired surrogate found")
454                }
455
456                self.next = Some(u2);
457
458                Ok((self, u, utf16))
459            }
460        }
461    }
462
463    // A const implementation of `s.chars().map(|ch| ch.len_utf16()).sum()`
464    pub const fn length_as_utf16(s: &str) -> usize {
465        let mut bytes = s.as_bytes();
466        let mut len = 0;
467        while let Some((ch, rest)) = next_code_point(bytes) {
468            bytes = rest;
469            len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
470        }
471        len
472    }
473
474    // A const implementation of `s.chars().len()`
475    pub const fn length_as_utf32(s: &str) -> usize {
476        let mut bytes = s.as_bytes();
477        let mut len = 0;
478        while let Some((_, rest)) = next_code_point(bytes) {
479            bytes = rest;
480            len += 1;
481        }
482        len
483    }
484}
485
486#[cfg(all(test, feature = "alloc"))]
487mod test {
488    use crate::{
489        U16CStr, U16Str, U32CStr, U32Str, Utf16Str, Utf16String, Utf32Str, Utf32String, WideCStr,
490        WideStr, WideString,
491    };
492
493    const UTF16STR_TEST: &Utf16Str = utf16str!("⚧️🏳️‍⚧️➡️s");
494    const UTF16STR_INCLUDE_LE_TEST: &Utf16Str = include_utf16str!("test_le.txt");
495    const UTF16STR_INCLUDE_BE_TEST: &Utf16Str = include_utf16str!("test_be.txt");
496    const U16STR_TEST: &U16Str = u16str!("⚧️🏳️‍⚧️➡️s");
497    const U16CSTR_TEST: &U16CStr = u16cstr!("⚧️🏳️‍⚧️➡️s");
498    const UTF32STR_TEST: &Utf32Str = utf32str!("⚧️🏳️‍⚧️➡️s");
499    const U32STR_TEST: &U32Str = u32str!("⚧️🏳️‍⚧️➡️s");
500    const U32CSTR_TEST: &U32CStr = u32cstr!("⚧️🏳️‍⚧️➡️s");
501    const WIDESTR_TEST: &WideStr = widestr!("⚧️🏳️‍⚧️➡️s");
502    const WIDECSTR_TEST: &WideCStr = widecstr!("⚧️🏳️‍⚧️➡️s");
503
504    #[test]
505    fn str_macros() {
506        let str = Utf16String::from_str("⚧️🏳️‍⚧️➡️s");
507        assert_eq!(&str, UTF16STR_TEST);
508        assert_eq!(&str, UTF16STR_INCLUDE_LE_TEST);
509        assert_eq!(&str, UTF16STR_INCLUDE_BE_TEST);
510        assert_eq!(&str, U16STR_TEST);
511        assert_eq!(&str, U16CSTR_TEST);
512        assert!(matches!(U16CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
513
514        let str = Utf32String::from_str("⚧️🏳️‍⚧️➡️s");
515        assert_eq!(&str, UTF32STR_TEST);
516        assert_eq!(&str, U32STR_TEST);
517        assert_eq!(&str, U32CSTR_TEST);
518        assert!(matches!(U32CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
519
520        let str = WideString::from_str("⚧️🏳️‍⚧️➡️s");
521        assert_eq!(&str, WIDESTR_TEST);
522        assert_eq!(&str, WIDECSTR_TEST);
523        assert!(matches!(WIDECSTR_TEST.as_slice_with_nul().last(), Some(&0)));
524    }
525}