1macro_rules! implement_utf16_macro {
2 ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
3 $(#[$m])*
4 #[macro_export]
5 macro_rules! $name {
6 ($text:expr) => {{
7 const _WIDESTRING_U16_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
8 const _WIDESTRING_U16_MACRO_LEN: $crate::internals::core::primitive::usize =
9 $crate::internals::length_as_utf16(_WIDESTRING_U16_MACRO_UTF8) + $extra_len;
10 const _WIDESTRING_U16_MACRO_UTF16: [$crate::internals::core::primitive::u16;
11 _WIDESTRING_U16_MACRO_LEN] = {
12 let mut _widestring_buffer: [$crate::internals::core::primitive::u16; _WIDESTRING_U16_MACRO_LEN] = [0; _WIDESTRING_U16_MACRO_LEN];
13 let mut _widestring_bytes = _WIDESTRING_U16_MACRO_UTF8.as_bytes();
14 let mut _widestring_i = 0;
15 while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
16 _widestring_bytes = _widestring_rest;
17 if $extra_len > 0 && _widestring_ch == 0 {
18 panic!("invalid NUL value found in string literal");
19 }
20 if _widestring_ch & 0xFFFF == _widestring_ch {
22 _widestring_buffer[_widestring_i] = _widestring_ch as $crate::internals::core::primitive::u16;
23 _widestring_i += 1;
24 } else {
25 let _widestring_code = _widestring_ch - 0x1_0000;
26 _widestring_buffer[_widestring_i] = 0xD800 | ((_widestring_code >> 10) as $crate::internals::core::primitive::u16);
27 _widestring_buffer[_widestring_i + 1] = 0xDC00 | ((_widestring_code as $crate::internals::core::primitive::u16) & 0x3FF);
28 _widestring_i += 2;
29 }
30 }
31 _widestring_buffer
32 };
33 #[allow(unused_unsafe)]
34 unsafe { $crate::$str::$fn(&_WIDESTRING_U16_MACRO_UTF16) }
35 }};
36 }
37 }
38}
39
40implement_utf16_macro! {
41 utf16str 0 Utf16Str from_slice_unchecked
55}
56
57implement_utf16_macro! {
58 u16str 0 U16Str from_slice
74}
75
76implement_utf16_macro! {
77 u16cstr 1 U16CStr from_slice_unchecked
93}
94
95macro_rules! implement_utf32_macro {
96 ($(#[$m:meta])* $name:ident $extra_len:literal $str:ident $fn:ident) => {
97 $(#[$m])*
98 #[macro_export]
99 macro_rules! $name {
100 ($text:expr) => {{
101 const _WIDESTRING_U32_MACRO_UTF8: &$crate::internals::core::primitive::str = $text;
102 const _WIDESTRING_U32_MACRO_LEN: $crate::internals::core::primitive::usize =
103 $crate::internals::length_as_utf32(_WIDESTRING_U32_MACRO_UTF8) + $extra_len;
104 const _WIDESTRING_U32_MACRO_UTF32: [$crate::internals::core::primitive::u32;
105 _WIDESTRING_U32_MACRO_LEN] = {
106 let mut _widestring_buffer: [$crate::internals::core::primitive::u32; _WIDESTRING_U32_MACRO_LEN] = [0; _WIDESTRING_U32_MACRO_LEN];
107 let mut _widestring_bytes = _WIDESTRING_U32_MACRO_UTF8.as_bytes();
108 let mut _widestring_i = 0;
109 while let $crate::internals::core::option::Option::Some((_widestring_ch, _widestring_rest)) = $crate::internals::next_code_point(_widestring_bytes) {
110 if $extra_len > 0 && _widestring_ch == 0 {
111 panic!("invalid NUL value found in string literal");
112 }
113 _widestring_bytes = _widestring_rest;
114 _widestring_buffer[_widestring_i] = _widestring_ch;
115 _widestring_i += 1;
116 }
117 _widestring_buffer
118 };
119 #[allow(unused_unsafe)]
120 unsafe { $crate::$str::$fn(&_WIDESTRING_U32_MACRO_UTF32) }
121 }};
122 }
123 }
124}
125
126implement_utf32_macro! {
127 utf32str 0 Utf32Str from_slice_unchecked
141}
142
143implement_utf32_macro! {
144 u32str 0 U32Str from_slice
160}
161
162implement_utf32_macro! {
163 u32cstr 1 U32CStr from_slice_unchecked
179}
180
181#[cfg(not(windows))]
184#[macro_export]
185macro_rules! widestr {
186 ($text:expr) => {{
187 #[allow(unused_imports)]
188 use $crate::*;
189 u32str!($text)
190 }};
191}
192
193#[cfg(not(windows))]
196#[macro_export]
197macro_rules! wideutfstr {
198 ($text:expr) => {{
199 #[allow(unused_imports)]
200 use $crate::*;
201 utf32str!($text)
202 }};
203}
204
205#[cfg(not(windows))]
208#[macro_export]
209macro_rules! widecstr {
210 ($text:expr) => {{
211 #[allow(unused_imports)]
212 use $crate::*;
213 u32cstr!($text)
214 }};
215}
216
217#[cfg(windows)]
220#[macro_export]
221macro_rules! widestr {
222 ($text:expr) => {{
223 #[allow(unused_imports)]
224 use $crate::*;
225 u16str!($text)
226 }};
227}
228
229#[cfg(windows)]
232#[macro_export]
233macro_rules! wideutfstr {
234 ($text:expr) => {{
235 #[allow(unused_imports)]
236 use $crate::*;
237 utf16str!($text)
238 }};
239}
240
241#[cfg(windows)]
244#[macro_export]
245macro_rules! widecstr {
246 ($text:expr) => {{
247 #[allow(unused_imports)]
248 use $crate::*;
249 u16cstr!($text)
250 }};
251}
252
253#[macro_export]
268macro_rules! include_utf16str {
269 ($text:expr) => {{
270 const _WIDESTRING_U16_INCLUDE_MACRO_U8: &[$crate::internals::core::primitive::u8] =
271 $crate::internals::core::include_bytes!($text);
272 const _WIDESTRING_U16_INCLUDE_MACRO_LEN: $crate::internals::core::primitive::usize = {
273 let _widestring_len =
274 <[$crate::internals::core::primitive::u8]>::len(_WIDESTRING_U16_INCLUDE_MACRO_U8);
275 if _widestring_len % $crate::internals::core::mem::size_of::<u16>() != 0 {
276 panic!("file not encoded as UTF-16")
277 }
278 _widestring_len / 2
279 };
280 const _WIDESTRING_U16_INCLUDE_MACRO_UTF16: (
281 [$crate::internals::core::primitive::u16; _WIDESTRING_U16_INCLUDE_MACRO_LEN],
282 bool,
283 bool,
284 ) = {
285 let mut _widestring_buffer: [$crate::internals::core::primitive::u16;
286 _WIDESTRING_U16_INCLUDE_MACRO_LEN] = [0; _WIDESTRING_U16_INCLUDE_MACRO_LEN];
287 let mut _widestring_bytes = _WIDESTRING_U16_INCLUDE_MACRO_U8;
288 let mut _widestring_i = 0;
289 let mut _widestring_decode = $crate::internals::DecodeUtf16 {
290 bom: $crate::internals::core::option::Option::None,
291 eof: false,
292 next: $crate::internals::core::option::Option::None,
293 forward_buf: $crate::internals::core::option::Option::None,
294 back_buf: $crate::internals::core::option::Option::None,
295 };
296
297 loop {
298 match $crate::internals::DecodeUtf16::next_code_point(
299 _widestring_decode,
300 _widestring_bytes,
301 ) {
302 Ok((_widestring_new_decode, _widestring_ch, _widestring_rest)) => {
303 _widestring_decode = _widestring_new_decode;
304
305 _widestring_bytes = _widestring_rest;
306 _widestring_buffer[_widestring_i] = _widestring_ch;
307 _widestring_i += 1;
308 }
309 Err(_widestring_new_decode) => {
310 _widestring_decode = _widestring_new_decode;
311 break;
312 }
313 }
314 }
315
316 (
317 _widestring_buffer,
318 if let Some(Some(_)) = _widestring_decode.bom {
319 true
320 } else {
321 false
322 },
323 _widestring_decode.eof,
324 )
325 };
326 const _WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED:
327 &[$crate::internals::core::primitive::u16] = {
328 match &_WIDESTRING_U16_INCLUDE_MACRO_UTF16 {
329 (buffer, false, false) => buffer,
330 ([_bom, rest @ ..], true, false) => rest,
331 ([rest @ .., _eof], false, true) => rest,
332 ([_bom, rest @ .., _eof], true, true) => rest,
333 }
334 };
335
336 #[allow(unused_unsafe)]
337 unsafe {
338 $crate::Utf16Str::from_slice_unchecked(_WIDESTRING_U16_INCLUDE_MACRO_UTF16_TRIMMED)
339 }
340 }};
341}
342
343#[doc(hidden)]
344#[allow(missing_debug_implementations)]
345pub mod internals {
346 pub use core;
347
348 pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
351 const CONT_MASK: u8 = 0b0011_1111;
352 match utf8 {
353 [one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
354 [one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
355 (((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
356 rest,
357 )),
358 [one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
359 (((*one & 0b0000_1111) as u32) << 12)
360 | (((*two & CONT_MASK) as u32) << 6)
361 | ((*three & CONT_MASK) as u32),
362 rest,
363 )),
364 [one, two, three, four, rest @ ..] => Some((
365 (((*one & 0b0000_0111) as u32) << 18)
366 | (((*two & CONT_MASK) as u32) << 12)
367 | (((*three & CONT_MASK) as u32) << 6)
368 | ((*four & CONT_MASK) as u32),
369 rest,
370 )),
371 [..] => None,
372 }
373 }
374
375 pub enum BoM {
376 Little,
377 Big,
378 }
379
380 pub struct DecodeUtf16 {
381 pub bom: Option<Option<BoM>>,
382 pub eof: bool,
383 pub next: Option<u16>,
384 pub forward_buf: Option<u16>,
385 pub back_buf: Option<u16>,
386 }
387
388 impl DecodeUtf16 {
389 pub const fn next_code_point(
390 mut self,
391 mut utf16: &[u8],
392 ) -> Result<(Self, u16, &[u8]), Self> {
393 if let [one, two] = utf16 {
394 if u16::from_le_bytes([*one, *two]) == 0x0000 {
395 self.eof = true;
396 }
397 }
398
399 if self.bom.is_none() {
400 if let [one, two, ..] = utf16 {
401 let ch = u16::from_le_bytes([*one, *two]);
402 if ch == 0xfeff {
403 self.bom = Some(Some(BoM::Little));
404 } else if ch == 0xfffe {
405 self.bom = Some(Some(BoM::Big));
406 } else {
407 self.bom = Some(None);
408 }
409 }
410 }
411
412 if let Some(u) = self.next {
414 self.next = None;
415 return Ok((self, u, utf16));
416 }
417
418 let u = if let Some(u) = self.forward_buf {
419 self.forward_buf = None;
420 u
421 } else if let [one, two, rest @ ..] = utf16 {
422 utf16 = rest;
423 match self.bom {
424 Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
425 _ => u16::from_le_bytes([*one, *two]),
426 }
427 } else if let Some(u) = self.back_buf {
428 self.back_buf = None;
429 u
430 } else {
431 return Err(self);
432 };
433
434 if !crate::is_utf16_surrogate(u) {
435 Ok((self, u, utf16))
436 } else if crate::is_utf16_low_surrogate(u) {
437 panic!("unpaired surrogate found")
438 } else {
439 let u2 = if let [one, two, rest @ ..] = utf16 {
440 utf16 = rest;
441 match self.bom {
442 Some(Some(BoM::Big)) => u16::from_be_bytes([*one, *two]),
443 _ => u16::from_le_bytes([*one, *two]),
444 }
445 } else if let Some(u) = self.back_buf {
446 self.back_buf = None;
447 u
448 } else {
449 panic!("unpaired surrogate found")
450 };
451
452 if !crate::is_utf16_low_surrogate(u2) {
453 panic!("unpaired surrogate found")
454 }
455
456 self.next = Some(u2);
457
458 Ok((self, u, utf16))
459 }
460 }
461 }
462
463 pub const fn length_as_utf16(s: &str) -> usize {
465 let mut bytes = s.as_bytes();
466 let mut len = 0;
467 while let Some((ch, rest)) = next_code_point(bytes) {
468 bytes = rest;
469 len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
470 }
471 len
472 }
473
474 pub const fn length_as_utf32(s: &str) -> usize {
476 let mut bytes = s.as_bytes();
477 let mut len = 0;
478 while let Some((_, rest)) = next_code_point(bytes) {
479 bytes = rest;
480 len += 1;
481 }
482 len
483 }
484}
485
486#[cfg(all(test, feature = "alloc"))]
487mod test {
488 use crate::{
489 U16CStr, U16Str, U32CStr, U32Str, Utf16Str, Utf16String, Utf32Str, Utf32String, WideCStr,
490 WideStr, WideString,
491 };
492
493 const UTF16STR_TEST: &Utf16Str = utf16str!("⚧️🏳️⚧️➡️s");
494 const UTF16STR_INCLUDE_LE_TEST: &Utf16Str = include_utf16str!("test_le.txt");
495 const UTF16STR_INCLUDE_BE_TEST: &Utf16Str = include_utf16str!("test_be.txt");
496 const U16STR_TEST: &U16Str = u16str!("⚧️🏳️⚧️➡️s");
497 const U16CSTR_TEST: &U16CStr = u16cstr!("⚧️🏳️⚧️➡️s");
498 const UTF32STR_TEST: &Utf32Str = utf32str!("⚧️🏳️⚧️➡️s");
499 const U32STR_TEST: &U32Str = u32str!("⚧️🏳️⚧️➡️s");
500 const U32CSTR_TEST: &U32CStr = u32cstr!("⚧️🏳️⚧️➡️s");
501 const WIDESTR_TEST: &WideStr = widestr!("⚧️🏳️⚧️➡️s");
502 const WIDECSTR_TEST: &WideCStr = widecstr!("⚧️🏳️⚧️➡️s");
503
504 #[test]
505 fn str_macros() {
506 let str = Utf16String::from_str("⚧️🏳️⚧️➡️s");
507 assert_eq!(&str, UTF16STR_TEST);
508 assert_eq!(&str, UTF16STR_INCLUDE_LE_TEST);
509 assert_eq!(&str, UTF16STR_INCLUDE_BE_TEST);
510 assert_eq!(&str, U16STR_TEST);
511 assert_eq!(&str, U16CSTR_TEST);
512 assert!(matches!(U16CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
513
514 let str = Utf32String::from_str("⚧️🏳️⚧️➡️s");
515 assert_eq!(&str, UTF32STR_TEST);
516 assert_eq!(&str, U32STR_TEST);
517 assert_eq!(&str, U32CSTR_TEST);
518 assert!(matches!(U32CSTR_TEST.as_slice_with_nul().last(), Some(&0)));
519
520 let str = WideString::from_str("⚧️🏳️⚧️➡️s");
521 assert_eq!(&str, WIDESTR_TEST);
522 assert_eq!(&str, WIDECSTR_TEST);
523 assert!(matches!(WIDECSTR_TEST.as_slice_with_nul().last(), Some(&0)));
524 }
525}