widestring/
macros.rs

1macro_rules! implement_utf16_macro {
2    ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
3        $(#[$m])*
4        #[macro_export]
5        macro_rules! $name {
6            ($text:expr) => {{
7                const _WIDESTRING_U16_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
8                const _WIDESTRING_U16_MACRO_LEN: $crate::internals::core::primitive::usize =
9                    $crate::internals::length_as_utf16(_WIDESTRING_U16_MACRO_UTF8) + $extra_len;
10                const _WIDESTRING_U16_MACRO_UTF16: [$crate::internals::core::primitive::u16;
11                        _WIDESTRING_U16_MACRO_LEN] = {
12                    let mut _widestring_buffer: [$crate::internals::core::primitive::u16; _WIDESTRING_U16_MACRO_LEN] = [0; _WIDESTRING_U16_MACRO_LEN];
13                    let mut _widestring_bytes = _WIDESTRING_U16_MACRO_UTF8.as_bytes();
14                    let mut _widestring_i = 0;
15                    while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
16                        _widestring_bytes = _widestring_rest;
17                        if $extra_len > 0 && _widestring_ch == 0 {
18                            panic!("invalid NUL value found in string literal");
19                        }
20                        // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf16
21                        if _widestring_ch & 0xFFFF == _widestring_ch {
22                            _widestring_buffer[_widestring_i] = _widestring_ch as $crate::internals::core::primitive::u16;
23                            _widestring_i += 1;
24                        } else {
25                            let _widestring_code = _widestring_ch - 0x1_0000;
26                            _widestring_buffer[_widestring_i] = 0xD800 | ((_widestring_code >> 10) as $crate::internals::core::primitive::u16);
27                            _widestring_buffer[_widestring_i + 1] = 0xDC00 | ((_widestring_code as $crate::internals::core::primitive::u16) & 0x3FF);
28                            _widestring_i += 2;
29                        }
30                    }
31                    _widestring_buffer
32                };
33                #[allow(unused_unsafe)]
34                unsafe { $crate::$str::$fn(&_WIDESTRING_U16_MACRO_UTF16) }
35            }};
36        }
37    }
38}
39
40implement_utf16_macro! {
41    /// Converts a string literal into a `const` UTF-16 string slice of type
42    /// [`Utf16Str`][crate::Utf16Str].
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// # #[cfg(feature = "alloc")] {
48    /// use widestring::{utf16str, Utf16Str, Utf16String};
49    ///
50    /// const STRING: &Utf16Str = utf16str!("My string");
51    /// assert_eq!(Utf16String::from_str("My string"), STRING);
52    /// # }
53    /// ```
54    utf16str 0 Utf16Str from_slice_unchecked
55}
56
57implement_utf16_macro! {
58    /// Converts a string literal into a `const` UTF-16 string slice of type
59    /// [`U16Str`][crate::U16Str].
60    ///
61    /// The resulting `const` string slice will always be valid UTF-16.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// # #[cfg(feature = "alloc")] {
67    /// use widestring::{u16str, U16Str, U16String};
68    ///
69    /// const STRING: &U16Str = u16str!("My string");
70    /// assert_eq!(U16String::from_str("My string"), STRING);
71    /// # }
72    /// ```
73    u16str 0 U16Str from_slice
74}
75
76implement_utf16_macro! {
77    /// Converts a string literal into a `const` UTF-16 string slice of type
78    /// [`U16CStr`][crate::U16CStr].
79    ///
80    /// The resulting `const` string slice will always be valid UTF-16 and include a nul terminator.
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// # #[cfg(feature = "alloc")] {
86    /// use widestring::{u16cstr, U16CStr, U16CString};
87    ///
88    /// const STRING: &U16CStr = u16cstr!("My string");
89    /// assert_eq!(U16CString::from_str("My string").unwrap(), STRING);
90    /// # }
91    /// ```
92    u16cstr 1 U16CStr from_slice_unchecked
93}
94
95macro_rules! implement_utf32_macro {
96    ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
97        $(#[$m])*
98        #[macro_export]
99        macro_rules! $name {
100            ($text:expr) => {{
101                const _WIDESTRING_U32_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
102                const _WIDESTRING_U32_MACRO_LEN: $crate::internals::core::primitive::usize =
103                    $crate::internals::length_as_utf32(_WIDESTRING_U32_MACRO_UTF8) + $extra_len;
104                const _WIDESTRING_U32_MACRO_UTF32: [$crate::internals::core::primitive::u32;
105                        _WIDESTRING_U32_MACRO_LEN] = {
106                    let mut _widestring_buffer: [$crate::internals::core::primitive::u32; _WIDESTRING_U32_MACRO_LEN] = [0; _WIDESTRING_U32_MACRO_LEN];
107                    let mut _widestring_bytes = _WIDESTRING_U32_MACRO_UTF8.as_bytes();
108                    let mut _widestring_i = 0;
109                    while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
110                        if $extra_len > 0 && _widestring_ch == 0 {
111                            panic!("invalid NUL value found in string literal");
112                        }
113                        _widestring_bytes = _widestring_rest;
114                        _widestring_buffer[_widestring_i] = _widestring_ch;
115                        _widestring_i += 1;
116                    }
117                    _widestring_buffer
118                };
119                #[allow(unused_unsafe)]
120                unsafe { $crate::$str::$fn(&_WIDESTRING_U32_MACRO_UTF32) }
121            }};
122        }
123    }
124}
125
126implement_utf32_macro! {
127    /// Converts a string literal into a `const` UTF-32 string slice of type
128    /// [`Utf32Str`][crate::Utf32Str].
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// # #[cfg(feature = "alloc")] {
134    /// use widestring::{utf32str, Utf32Str, Utf32String};
135    ///
136    /// const STRING: &Utf32Str = utf32str!("My string");
137    /// assert_eq!(Utf32String::from_str("My string"), STRING);
138    /// # }
139    /// ```
140    utf32str 0 Utf32Str from_slice_unchecked
141}
142
143implement_utf32_macro! {
144    /// Converts a string literal into a `const` UTF-32 string slice of type
145    /// [`U32Str`][crate::U32Str].
146    ///
147    /// The resulting `const` string slice will always be valid UTF-32.
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// # #[cfg(feature = "alloc")] {
153    /// use widestring::{u32str, U32Str, U32String};
154    ///
155    /// const STRING: &U32Str = u32str!("My string");
156    /// assert_eq!(U32String::from_str("My string"), STRING);
157    /// # }
158    /// ```
159    u32str 0 U32Str from_slice
160}
161
162implement_utf32_macro! {
163    /// Converts a string literal into a `const` UTF-32 string slice of type
164    /// [`U32CStr`][crate::U32CStr].
165    ///
166    /// The resulting `const` string slice will always be valid UTF-32 and include a nul terminator.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// # #[cfg(feature = "alloc")] {
172    /// use widestring::{u32cstr, U32CStr, U32CString};
173    ///
174    /// const STRING: &U32CStr = u32cstr!("My string");
175    /// assert_eq!(U32CString::from_str("My string").unwrap(), STRING);
176    /// # }
177    /// ```
178    u32cstr 1 U32CStr from_slice_unchecked
179}
180
181/// Alias for [`u16str`] or [`u32str`] macros depending on platform. Intended to be used when using
182/// [`WideStr`][crate::WideStr] type alias.
183#[cfg(not(windows))]
184#[macro_export]
185macro_rules! widestr {
186    ($text:expr) => {{
187        use $crate::*;
188        u32str!($text)
189    }};
190}
191
192/// Alias for [`utf16str`] or [`utf32str`] macros depending on platform. Intended to be used when
193/// using [`WideUtfStr`][crate::WideUtfStr] type alias.
194#[cfg(not(windows))]
195#[macro_export]
196macro_rules! wideutfstr {
197    ($text:expr) => {{
198        use $crate::*;
199        utf32str!($text)
200    }};
201}
202
203/// Alias for [`u16cstr`] or [`u32cstr`] macros depending on platform. Intended to be used when
204/// using [`WideCStr`][crate::WideCStr] type alias.
205#[cfg(not(windows))]
206#[macro_export]
207macro_rules! widecstr {
208    ($text:expr) => {{
209        use $crate::*;
210        u32cstr!($text)
211    }};
212}
213
214/// Alias for [`u16str`] or [`u32str`] macros depending on platform. Intended to be used when using
215/// [`WideStr`][crate::WideStr] type alias.
216#[cfg(windows)]
217#[macro_export]
218macro_rules! widestr {
219    ($text:expr) => {{
220        use $crate::*;
221        u16str!($text)
222    }};
223}
224
225/// Alias for [`utf16str`] or [`utf32str`] macros depending on platform. Intended to be used when
226/// using [`WideUtfStr`][crate::WideUtfStr] type alias.
227#[cfg(windows)]
228#[macro_export]
229macro_rules! wideutfstr {
230    ($text:expr) => {{
231        use $crate::*;
232        utf16str!($text)
233    }};
234}
235
236/// Alias for [`u16cstr`] or [`u32cstr`] macros depending on platform. Intended to be used when
237/// using [`WideCStr`][crate::WideCStr] type alias.
238#[cfg(windows)]
239#[macro_export]
240macro_rules! widecstr {
241    ($text:expr) => {{
242        use $crate::*;
243        u16cstr!($text)
244    }};
245}
246
247/// Includes a UTF-16 encoded file as a [`Utf16Str`][crate::Utf16Str].
248///
249/// This uses [`include_bytes`](core::include_bytes) to accomplish this.
250///
251/// # Examples
252///
253/// ```
254/// # #[cfg(feature = "alloc")] {
255/// use widestring::{include_utf16str, Utf16Str, Utf16String};
256///
257/// const STRING: &Utf16Str = include_utf16str!("example.txt");
258/// assert_eq!(Utf16String::from_str("My string"), STRING);
259/// # }
260/// ```
261#[macro_export]
262macro_rules! include_utf16str {
263    ($text:expr) => {{
264        const _WIDESTRING_U16_INCLUDE_MACRO_U8: &[$crate::internals::core::primitive::u8] =
265            $crate::internals::core::include_bytes!($text);
266        const _WIDESTRING_U16_INCLUDE_MACRO_LEN: $crate::internals::core::primitive::usize = {
267            let _widestring_len =
268                <[$crate::internals::core::primitive::u8]>::len(_WIDESTRING_U16_INCLUDE_MACRO_U8);
269            if _widestring_len % $crate::internals::core::mem::size_of::<u16>() != 0 {
270                panic!("file not encoded as UTF-16")
271            }
272            _widestring_len / 2
273        };
274        const _WIDESTRING_U16_INCLUDE_MACRO_UTF16: (
275            [$crate::internals::core::primitive::u16; _WIDESTRING_U16_INCLUDE_MACRO_LEN],
276            bool,
277            bool,
278        ) = {
279            let mut _widestring_buffer: [$crate::internals::core::primitive::u16;
280                _WIDESTRING_U16_INCLUDE_MACRO_LEN] = [0; _WIDESTRING_U16_INCLUDE_MACRO_LEN];
281            let mut _widestring_bytes = _WIDESTRING_U16_INCLUDE_MACRO_U8;
282            let mut _widestring_i = 0;
283            let mut _widestring_decode = $crate::internals::DecodeUtf16 {
284                bom: $crate::internals::core::option::Option::None,
285                eof: false,
286                next: $crate::internals::core::option::Option::None,
287                forward_buf: $crate::internals::core::option::Option::None,
288                back_buf: $crate::internals::core::option::Option::None,
289            };
290
291            loop {
292                match $crate::internals::DecodeUtf16::next_code_point(
293                    _widestring_decode,
294                    _widestring_bytes,
295                ) {
296                    Ok((_widestring_new_decode, _widestring_ch, _widestring_rest)) => {
297                        _widestring_decode = _widestring_new_decode;
298
299                        _widestring_bytes = _widestring_rest;
300                        _widestring_buffer[_widestring_i] = _widestring_ch;
301                        _widestring_i += 1;
302                    }
303                    Err(_widestring_new_decode) => {
304                        _widestring_decode = _widestring_new_decode;
305                        break;
306                    }
307                }
308            }
309
310            (
311                _widestring_buffer,
312                if let Some(Some(_)) = _widestring_decode.bom {
313                    true
314                } else {
315                    false
316                },
317                _widestring_decode.eof,
318            )
319        };
320        const _WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED:
321            &[$crate::internals::core::primitive::u16] = {
322            match &_WIDESTRING_U16_INCLUDE_MACRO_UTF16 {
323                (buffer, false, false) => buffer,
324                ([_bom, rest @ ..], true, false) => rest,
325                ([rest @ .., _eof], false, true) => rest,
326                ([_bom, rest @ .., _eof], true, true) => rest,
327            }
328        };
329
330        #[allow(unused_unsafe)]
331        unsafe {
332            $crate::Utf16Str::from_slice_unchecked(_WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED)
333        }
334    }};
335}
336
337#[doc(hidden)]
338#[allow(missing_debug_implementations)]
339pub mod internals {
340    pub use core;
341
342    // A const implementation of https://github.com/rust-lang/rust/blob/d902752866cbbdb331e3cf28ff6bba86ab0f6c62/library/core/src/str/mod.rs#L509-L537
343    // Assumes `utf8` is a valid &str
344    pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
345        const CONT_MASK: u8 = 0b0011_1111;
346        match utf8 {
347            [one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
348            [one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
349                (((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
350                rest,
351            )),
352            [one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
353                (((*one & 0b0000_1111) as u32) << 12)
354                    | (((*two & CONT_MASK) as u32) << 6)
355                    | ((*three & CONT_MASK) as u32),
356                rest,
357            )),
358            [one, two, three, four, rest @ ..] => Some((
359                (((*one & 0b0000_0111) as u32) << 18)
360                    | (((*two & CONT_MASK) as u32) << 12)
361                    | (((*three & CONT_MASK) as u32) << 6)
362                    | ((*four & CONT_MASK) as u32),
363                rest,
364            )),
365            [..] => None,
366        }
367    }
368
369    pub enum BoM {
370        Little,
371        Big,
372    }
373
374    pub struct DecodeUtf16 {
375        pub bom: Option<Option<BoM>>,
376        pub eof: bool,
377        pub next: Option<u16>,
378        pub forward_buf: Option<u16>,
379        pub back_buf: Option<u16>,
380    }
381
382    impl DecodeUtf16 {
383        pub const fn next_code_point(
384            mut self,
385            mut utf16: &[u8],
386        ) -> Result<(Self, u16, &[u8]), Self> {
387            if let [one, two] = utf16 {
388                if u16::from_le_bytes([*one, *two]) == 0x0000 {
389                    self.eof = true;
390                }
391            }
392
393            if self.bom.is_none() {
394                if let [one, two, ..] = utf16 {
395                    let ch = u16::from_le_bytes([*one, *two]);
396                    if ch == 0xfeff {
397                        self.bom = Some(Some(BoM::Little));
398                    } else if ch == 0xfffe {
399                        self.bom = Some(Some(BoM::Big));
400                    } else {
401                        self.bom = Some(None);
402                    }
403                }
404            }
405
406            // Copied from `DecodeUtf16`
407            if let Some(u) = self.next {
408                self.next = None;
409                return Ok((self, u, utf16));
410            }
411
412            let u = if let Some(u) = self.forward_buf {
413                self.forward_buf = None;
414                u
415            } else if let [one, two, rest @ ..] = utf16 {
416                utf16 = rest;
417                match self.bom {
418                    Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
419                    _ => u16::from_le_bytes([*one, *two]),
420                }
421            } else if let Some(u) = self.back_buf {
422                self.back_buf = None;
423                u
424            } else {
425                return Err(self);
426            };
427
428            if !crate::is_utf16_surrogate(u) {
429                Ok((self, u, utf16))
430            } else if crate::is_utf16_low_surrogate(u) {
431                panic!("unpaired surrogate found")
432            } else {
433                let u2 = if let [one, two, rest @ ..] = utf16 {
434                    utf16 = rest;
435                    match self.bom {
436                        Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
437                        _ => u16::from_le_bytes([*one, *two]),
438                    }
439                } else if let Some(u) = self.back_buf {
440                    self.back_buf = None;
441                    u
442                } else {
443                    panic!("unpaired surrogate found")
444                };
445
446                if !crate::is_utf16_low_surrogate(u2) {
447                    panic!("unpaired surrogate found")
448                }
449
450                self.next = Some(u2);
451
452                Ok((self, u, utf16))
453            }
454        }
455    }
456
457    // A const implementation of `s.chars().map(|ch| ch.len_utf16()).sum()`
458    pub const fn length_as_utf16(s: &str) -> usize {
459        let mut bytes = s.as_bytes();
460        let mut len = 0;
461        while let Some((ch, rest)) = next_code_point(bytes) {
462            bytes = rest;
463            len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
464        }
465        len
466    }
467
468    // A const implementation of `s.chars().len()`
469    pub const fn length_as_utf32(s: &str) -> usize {
470        let mut bytes = s.as_bytes();
471        let mut len = 0;
472        while let Some((_, rest)) = next_code_point(bytes) {
473            bytes = rest;
474            len += 1;
475        }
476        len
477    }
478}
479
480#[cfg(all(test, feature = "alloc"))]
481mod test {
482    use crate::{
483        U16CStr, U16Str, U32CStr, U32Str, Utf16Str, Utf16String, Utf32Str, Utf32String, WideCStr,
484        WideStr, WideString,
485    };
486
487    const UTF16STR_TEST: &Utf16Str = utf16str!("⚧️🏳️‍⚧️➡️s");
488    const UTF16STR_INCLUDE_LE_TEST: &Utf16Str = include_utf16str!("test_le.txt");
489    const UTF16STR_INCLUDE_BE_TEST: &Utf16Str = include_utf16str!("test_be.txt");
490    const U16STR_TEST: &U16Str = u16str!("⚧️🏳️‍⚧️➡️s");
491    const U16CSTR_TEST: &U16CStr = u16cstr!("⚧️🏳️‍⚧️➡️s");
492    const UTF32STR_TEST: &Utf32Str = utf32str!("⚧️🏳️‍⚧️➡️s");
493    const U32STR_TEST: &U32Str = u32str!("⚧️🏳️‍⚧️➡️s");
494    const U32CSTR_TEST: &U32CStr = u32cstr!("⚧️🏳️‍⚧️➡️s");
495    const WIDESTR_TEST: &WideStr = widestr!("⚧️🏳️‍⚧️➡️s");
496    const WIDECSTR_TEST: &WideCStr = widecstr!("⚧️🏳️‍⚧️➡️s");
497
498    #[test]
499    fn str_macros() {
500        let str = Utf16String::from_str("⚧️🏳️‍⚧️➡️s");
501        assert_eq!(&str, UTF16STR_TEST);
502        assert_eq!(&str, UTF16STR_INCLUDE_LE_TEST);
503        assert_eq!(&str, UTF16STR_INCLUDE_BE_TEST);
504        assert_eq!(&str, U16STR_TEST);
505        assert_eq!(&str, U16CSTR_TEST);
506        assert!(matches!(U16CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
507
508        let str = Utf32String::from_str("⚧️🏳️‍⚧️➡️s");
509        assert_eq!(&str, UTF32STR_TEST);
510        assert_eq!(&str, U32STR_TEST);
511        assert_eq!(&str, U32CSTR_TEST);
512        assert!(matches!(U32CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
513
514        let str = WideString::from_str("⚧️🏳️‍⚧️➡️s");
515        assert_eq!(&str, WIDESTR_TEST);
516        assert_eq!(&str, WIDECSTR_TEST);
517        assert!(matches!(WIDECSTR_TEST.as_slice_with_nul().last(), Some(&0)));
518    }
519}