1macro_rules! implement_utf16_macro {
2 ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
3 $(#[$m])*
4 #[macro_export]
5 macro_rules! $name {
6 ($text:expr) => {{
7 const _WIDESTRING_U16_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
8 const _WIDESTRING_U16_MACRO_LEN: $crate::internals::core::primitive::usize =
9 $crate::internals::length_as_utf16(_WIDESTRING_U16_MACRO_UTF8) + $extra_len;
10 const _WIDESTRING_U16_MACRO_UTF16: [$crate::internals::core::primitive::u16;
11 _WIDESTRING_U16_MACRO_LEN] = {
12 let mut _widestring_buffer: [$crate::internals::core::primitive::u16; _WIDESTRING_U16_MACRO_LEN] = [0; _WIDESTRING_U16_MACRO_LEN];
13 let mut _widestring_bytes = _WIDESTRING_U16_MACRO_UTF8.as_bytes();
14 let mut _widestring_i = 0;
15 while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
16 _widestring_bytes = _widestring_rest;
17 if $extra_len > 0 && _widestring_ch == 0 {
18 panic!("invalid NUL value found in string literal");
19 }
20 if _widestring_ch & 0xFFFF == _widestring_ch {
22 _widestring_buffer[_widestring_i] = _widestring_ch as $crate::internals::core::primitive::u16;
23 _widestring_i += 1;
24 } else {
25 let _widestring_code = _widestring_ch - 0x1_0000;
26 _widestring_buffer[_widestring_i] = 0xD800 | ((_widestring_code >> 10) as $crate::internals::core::primitive::u16);
27 _widestring_buffer[_widestring_i + 1] = 0xDC00 | ((_widestring_code as $crate::internals::core::primitive::u16) & 0x3FF);
28 _widestring_i += 2;
29 }
30 }
31 _widestring_buffer
32 };
33 #[allow(unused_unsafe)]
34 unsafe { $crate::$str::$fn(&_WIDESTRING_U16_MACRO_UTF16) }
35 }};
36 }
37 }
38}
39
40implement_utf16_macro! {
41 utf16str 0 Utf16Str from_slice_unchecked
55}
56
57implement_utf16_macro! {
58 u16str 0 U16Str from_slice
74}
75
76implement_utf16_macro! {
77 u16cstr 1 U16CStr from_slice_unchecked
93}
94
95macro_rules! implement_utf32_macro {
96 ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
97 $(#[$m])*
98 #[macro_export]
99 macro_rules! $name {
100 ($text:expr) => {{
101 const _WIDESTRING_U32_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
102 const _WIDESTRING_U32_MACRO_LEN: $crate::internals::core::primitive::usize =
103 $crate::internals::length_as_utf32(_WIDESTRING_U32_MACRO_UTF8) + $extra_len;
104 const _WIDESTRING_U32_MACRO_UTF32: [$crate::internals::core::primitive::u32;
105 _WIDESTRING_U32_MACRO_LEN] = {
106 let mut _widestring_buffer: [$crate::internals::core::primitive::u32; _WIDESTRING_U32_MACRO_LEN] = [0; _WIDESTRING_U32_MACRO_LEN];
107 let mut _widestring_bytes = _WIDESTRING_U32_MACRO_UTF8.as_bytes();
108 let mut _widestring_i = 0;
109 while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
110 if $extra_len > 0 && _widestring_ch == 0 {
111 panic!("invalid NUL value found in string literal");
112 }
113 _widestring_bytes = _widestring_rest;
114 _widestring_buffer[_widestring_i] = _widestring_ch;
115 _widestring_i += 1;
116 }
117 _widestring_buffer
118 };
119 #[allow(unused_unsafe)]
120 unsafe { $crate::$str::$fn(&_WIDESTRING_U32_MACRO_UTF32) }
121 }};
122 }
123 }
124}
125
126implement_utf32_macro! {
127 utf32str 0 Utf32Str from_slice_unchecked
141}
142
143implement_utf32_macro! {
144 u32str 0 U32Str from_slice
160}
161
162implement_utf32_macro! {
163 u32cstr 1 U32CStr from_slice_unchecked
179}
180
181#[cfg(not(windows))]
184#[macro_export]
185macro_rules! widestr {
186 ($text:expr) => {{
187 use $crate::*;
188 u32str!($text)
189 }};
190}
191
192#[cfg(not(windows))]
195#[macro_export]
196macro_rules! wideutfstr {
197 ($text:expr) => {{
198 use $crate::*;
199 utf32str!($text)
200 }};
201}
202
203#[cfg(not(windows))]
206#[macro_export]
207macro_rules! widecstr {
208 ($text:expr) => {{
209 use $crate::*;
210 u32cstr!($text)
211 }};
212}
213
214#[cfg(windows)]
217#[macro_export]
218macro_rules! widestr {
219 ($text:expr) => {{
220 use $crate::*;
221 u16str!($text)
222 }};
223}
224
225#[cfg(windows)]
228#[macro_export]
229macro_rules! wideutfstr {
230 ($text:expr) => {{
231 use $crate::*;
232 utf16str!($text)
233 }};
234}
235
236#[cfg(windows)]
239#[macro_export]
240macro_rules! widecstr {
241 ($text:expr) => {{
242 use $crate::*;
243 u16cstr!($text)
244 }};
245}
246
247#[macro_export]
262macro_rules! include_utf16str {
263 ($text:expr) => {{
264 const _WIDESTRING_U16_INCLUDE_MACRO_U8: &[$crate::internals::core::primitive::u8] =
265 $crate::internals::core::include_bytes!($text);
266 const _WIDESTRING_U16_INCLUDE_MACRO_LEN: $crate::internals::core::primitive::usize = {
267 let _widestring_len =
268 <[$crate::internals::core::primitive::u8]>::len(_WIDESTRING_U16_INCLUDE_MACRO_U8);
269 if _widestring_len % $crate::internals::core::mem::size_of::<u16>() != 0 {
270 panic!("file not encoded as UTF-16")
271 }
272 _widestring_len / 2
273 };
274 const _WIDESTRING_U16_INCLUDE_MACRO_UTF16: (
275 [$crate::internals::core::primitive::u16; _WIDESTRING_U16_INCLUDE_MACRO_LEN],
276 bool,
277 bool,
278 ) = {
279 let mut _widestring_buffer: [$crate::internals::core::primitive::u16;
280 _WIDESTRING_U16_INCLUDE_MACRO_LEN] = [0; _WIDESTRING_U16_INCLUDE_MACRO_LEN];
281 let mut _widestring_bytes = _WIDESTRING_U16_INCLUDE_MACRO_U8;
282 let mut _widestring_i = 0;
283 let mut _widestring_decode = $crate::internals::DecodeUtf16 {
284 bom: $crate::internals::core::option::Option::None,
285 eof: false,
286 next: $crate::internals::core::option::Option::None,
287 forward_buf: $crate::internals::core::option::Option::None,
288 back_buf: $crate::internals::core::option::Option::None,
289 };
290
291 loop {
292 match $crate::internals::DecodeUtf16::next_code_point(
293 _widestring_decode,
294 _widestring_bytes,
295 ) {
296 Ok((_widestring_new_decode, _widestring_ch, _widestring_rest)) => {
297 _widestring_decode = _widestring_new_decode;
298
299 _widestring_bytes = _widestring_rest;
300 _widestring_buffer[_widestring_i] = _widestring_ch;
301 _widestring_i += 1;
302 }
303 Err(_widestring_new_decode) => {
304 _widestring_decode = _widestring_new_decode;
305 break;
306 }
307 }
308 }
309
310 (
311 _widestring_buffer,
312 if let Some(Some(_)) = _widestring_decode.bom {
313 true
314 } else {
315 false
316 },
317 _widestring_decode.eof,
318 )
319 };
320 const _WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED:
321 &[$crate::internals::core::primitive::u16] = {
322 match &_WIDESTRING_U16_INCLUDE_MACRO_UTF16 {
323 (buffer, false, false) => buffer,
324 ([_bom, rest @ ..], true, false) => rest,
325 ([rest @ .., _eof], false, true) => rest,
326 ([_bom, rest @ .., _eof], true, true) => rest,
327 }
328 };
329
330 #[allow(unused_unsafe)]
331 unsafe {
332 $crate::Utf16Str::from_slice_unchecked(_WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED)
333 }
334 }};
335}
336
337#[doc(hidden)]
338#[allow(missing_debug_implementations)]
339pub mod internals {
340 pub use core;
341
342 pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
345 const CONT_MASK: u8 = 0b0011_1111;
346 match utf8 {
347 [one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
348 [one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
349 (((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
350 rest,
351 )),
352 [one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
353 (((*one & 0b0000_1111) as u32) << 12)
354 | (((*two & CONT_MASK) as u32) << 6)
355 | ((*three & CONT_MASK) as u32),
356 rest,
357 )),
358 [one, two, three, four, rest @ ..] => Some((
359 (((*one & 0b0000_0111) as u32) << 18)
360 | (((*two & CONT_MASK) as u32) << 12)
361 | (((*three & CONT_MASK) as u32) << 6)
362 | ((*four & CONT_MASK) as u32),
363 rest,
364 )),
365 [..] => None,
366 }
367 }
368
369 pub enum BoM {
370 Little,
371 Big,
372 }
373
374 pub struct DecodeUtf16 {
375 pub bom: Option<Option<BoM>>,
376 pub eof: bool,
377 pub next: Option<u16>,
378 pub forward_buf: Option<u16>,
379 pub back_buf: Option<u16>,
380 }
381
382 impl DecodeUtf16 {
383 pub const fn next_code_point(
384 mut self,
385 mut utf16: &[u8],
386 ) -> Result<(Self, u16, &[u8]), Self> {
387 if let [one, two] = utf16 {
388 if u16::from_le_bytes([*one, *two]) == 0x0000 {
389 self.eof = true;
390 }
391 }
392
393 if self.bom.is_none() {
394 if let [one, two, ..] = utf16 {
395 let ch = u16::from_le_bytes([*one, *two]);
396 if ch == 0xfeff {
397 self.bom = Some(Some(BoM::Little));
398 } else if ch == 0xfffe {
399 self.bom = Some(Some(BoM::Big));
400 } else {
401 self.bom = Some(None);
402 }
403 }
404 }
405
406 if let Some(u) = self.next {
408 self.next = None;
409 return Ok((self, u, utf16));
410 }
411
412 let u = if let Some(u) = self.forward_buf {
413 self.forward_buf = None;
414 u
415 } else if let [one, two, rest @ ..] = utf16 {
416 utf16 = rest;
417 match self.bom {
418 Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
419 _ => u16::from_le_bytes([*one, *two]),
420 }
421 } else if let Some(u) = self.back_buf {
422 self.back_buf = None;
423 u
424 } else {
425 return Err(self);
426 };
427
428 if !crate::is_utf16_surrogate(u) {
429 Ok((self, u, utf16))
430 } else if crate::is_utf16_low_surrogate(u) {
431 panic!("unpaired surrogate found")
432 } else {
433 let u2 = if let [one, two, rest @ ..] = utf16 {
434 utf16 = rest;
435 match self.bom {
436 Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
437 _ => u16::from_le_bytes([*one, *two]),
438 }
439 } else if let Some(u) = self.back_buf {
440 self.back_buf = None;
441 u
442 } else {
443 panic!("unpaired surrogate found")
444 };
445
446 if !crate::is_utf16_low_surrogate(u2) {
447 panic!("unpaired surrogate found")
448 }
449
450 self.next = Some(u2);
451
452 Ok((self, u, utf16))
453 }
454 }
455 }
456
457 pub const fn length_as_utf16(s: &str) -> usize {
459 let mut bytes = s.as_bytes();
460 let mut len = 0;
461 while let Some((ch, rest)) = next_code_point(bytes) {
462 bytes = rest;
463 len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
464 }
465 len
466 }
467
468 pub const fn length_as_utf32(s: &str) -> usize {
470 let mut bytes = s.as_bytes();
471 let mut len = 0;
472 while let Some((_, rest)) = next_code_point(bytes) {
473 bytes = rest;
474 len += 1;
475 }
476 len
477 }
478}
479
480#[cfg(all(test, feature = "alloc"))]
481mod test {
482 use crate::{
483 U16CStr, U16Str, U32CStr, U32Str, Utf16Str, Utf16String, Utf32Str, Utf32String, WideCStr,
484 WideStr, WideString,
485 };
486
487 const UTF16STR_TEST: &Utf16Str = utf16str!("⚧️🏳️⚧️➡️s");
488 const UTF16STR_INCLUDE_LE_TEST: &Utf16Str = include_utf16str!("test_le.txt");
489 const UTF16STR_INCLUDE_BE_TEST: &Utf16Str = include_utf16str!("test_be.txt");
490 const U16STR_TEST: &U16Str = u16str!("⚧️🏳️⚧️➡️s");
491 const U16CSTR_TEST: &U16CStr = u16cstr!("⚧️🏳️⚧️➡️s");
492 const UTF32STR_TEST: &Utf32Str = utf32str!("⚧️🏳️⚧️➡️s");
493 const U32STR_TEST: &U32Str = u32str!("⚧️🏳️⚧️➡️s");
494 const U32CSTR_TEST: &U32CStr = u32cstr!("⚧️🏳️⚧️➡️s");
495 const WIDESTR_TEST: &WideStr = widestr!("⚧️🏳️⚧️➡️s");
496 const WIDECSTR_TEST: &WideCStr = widecstr!("⚧️🏳️⚧️➡️s");
497
498 #[test]
499 fn str_macros() {
500 let str = Utf16String::from_str("⚧️🏳️⚧️➡️s");
501 assert_eq!(&str, UTF16STR_TEST);
502 assert_eq!(&str, UTF16STR_INCLUDE_LE_TEST);
503 assert_eq!(&str, UTF16STR_INCLUDE_BE_TEST);
504 assert_eq!(&str, U16STR_TEST);
505 assert_eq!(&str, U16CSTR_TEST);
506 assert!(matches!(U16CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
507
508 let str = Utf32String::from_str("⚧️🏳️⚧️➡️s");
509 assert_eq!(&str, UTF32STR_TEST);
510 assert_eq!(&str, U32STR_TEST);
511 assert_eq!(&str, U32CSTR_TEST);
512 assert!(matches!(U32CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
513
514 let str = WideString::from_str("⚧️🏳️⚧️➡️s");
515 assert_eq!(&str, WIDESTR_TEST);
516 assert_eq!(&str, WIDECSTR_TEST);
517 assert!(matches!(WIDECSTR_TEST.as_slice_with_nul().last(), Some(&0)));
518 }
519}