widestring/
utfstr.rs

1//! UTF string slices.
2//!
3//! This module contains UTF string slices and related types.
4
5use crate::{
6    error::{Utf16Error, Utf32Error},
7    is_utf16_low_surrogate,
8    iter::{EncodeUtf16, EncodeUtf32, EncodeUtf8},
9    validate_utf16, validate_utf32, U16Str, U32Str,
10};
11#[cfg(feature = "alloc")]
12use crate::{Utf16String, Utf32String};
13#[cfg(feature = "alloc")]
14#[allow(unused_imports)]
15use alloc::{borrow::Cow, boxed::Box, string::String};
16#[allow(unused_imports)]
17use core::{
18    convert::{AsMut, AsRef, TryFrom},
19    fmt::Write,
20    ops::{Index, IndexMut, RangeBounds},
21    slice::SliceIndex,
22};
23
24mod iter;
25
26pub use iter::*;
27
28#[inline]
29const fn char_len_utf16(c: char) -> usize {
30    c.len_utf16()
31}
32
33#[inline]
34const fn char_len_utf32(_c: char) -> usize {
35    1
36}
37
38macro_rules! utfstr_common_impl {
39    {
40        $(#[$utfstr_meta:meta])*
41        struct $utfstr:ident([$uchar:ty]);
42        type UtfString = $utfstring:ident;
43        type UStr = $ustr:ident;
44        type UCStr = $ucstr:ident;
45        type UtfError = $utferror:ident;
46        char_len_fn = $char_len_fn:ident;
47        $(#[$from_slice_unchecked_meta:meta])*
48        fn from_slice_unchecked() -> {}
49        $(#[$from_slice_unchecked_mut_meta:meta])*
50        fn from_slice_unchecked_mut() -> {}
51        $(#[$from_boxed_slice_unchecked_meta:meta])*
52        fn from_boxed_slice_unchecked() -> {}
53        $(#[$get_unchecked_meta:meta])*
54        fn get_unchecked() -> {}
55        $(#[$get_unchecked_mut_meta:meta])*
56        fn get_unchecked_mut() -> {}
57        $(#[$len_meta:meta])*
58        fn len() -> {}
59    } => {
60        $(#[$utfstr_meta])*
61        #[allow(clippy::derive_hash_xor_eq)]
62        #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
63        pub struct $utfstr {
64            pub(crate) inner: [$uchar],
65        }
66
67        impl $utfstr {
68            $(#[$from_slice_unchecked_meta])*
69            #[allow(trivial_casts)]
70            #[inline]
71            #[must_use]
72            pub const unsafe fn from_slice_unchecked(s: &[$uchar]) -> &Self {
73                &*(s as *const [$uchar] as *const Self)
74            }
75
76            $(#[$from_slice_unchecked_mut_meta])*
77            #[allow(trivial_casts)]
78            #[inline]
79            #[must_use]
80            pub unsafe fn from_slice_unchecked_mut(s: &mut [$uchar]) -> &mut Self {
81                &mut *(s as *mut [$uchar] as *mut Self)
82            }
83
84            $(#[$from_boxed_slice_unchecked_meta])*
85            #[inline]
86            #[cfg(feature = "alloc")]
87            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
88            #[must_use]
89            pub unsafe fn from_boxed_slice_unchecked(s: Box<[$uchar]>) -> Box<Self> {
90                Box::from_raw(Box::into_raw(s) as *mut Self)
91            }
92
93            $(#[$get_unchecked_meta])*
94            #[inline]
95            #[must_use]
96            pub unsafe fn get_unchecked<I>(&self, index: I) -> &Self
97            where
98                I: SliceIndex<[$uchar], Output = [$uchar]>,
99            {
100                Self::from_slice_unchecked(self.inner.get_unchecked(index))
101            }
102
103            $(#[$get_unchecked_mut_meta])*
104            #[inline]
105            #[must_use]
106            pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut Self
107            where
108                I: SliceIndex<[$uchar], Output = [$uchar]>,
109            {
110                Self::from_slice_unchecked_mut(self.inner.get_unchecked_mut(index))
111            }
112
113            $(#[$len_meta])*
114            #[inline]
115            #[must_use]
116            pub const fn len(&self) -> usize {
117                self.inner.len()
118            }
119
120            /// Returns `true` if the string has a length of zero.
121            #[inline]
122            #[must_use]
123            pub const fn is_empty(&self) -> bool {
124                self.inner.is_empty()
125            }
126
127            /// Converts a string to a slice of its underlying elements.
128            ///
129            /// To convert the slice back into a string slice, use the
130            /// [`from_slice`][Self::from_slice] function.
131            #[inline]
132            #[must_use]
133            pub const fn as_slice(&self) -> &[$uchar] {
134                &self.inner
135            }
136
137            /// Converts a mutable string to a mutable slice of its underlying elements.
138            ///
139            /// # Safety
140            ///
141            /// This function is unsafe because you can violate the invariants of this type when
142            /// mutating the slice. The caller must ensure that the contents of the slice is valid
143            /// UTF before the borrow ends and the underlying string is used.
144            ///
145            /// Use of this string type whose contents have been mutated to invalid UTF is
146            /// undefined behavior.
147            #[inline]
148            #[must_use]
149            pub unsafe fn as_mut_slice(&mut self) -> &mut [$uchar] {
150                &mut self.inner
151            }
152
153            /// Converts a string slice to a raw pointer.
154            ///
155            /// This pointer will be pointing to the first element of the string slice.
156            ///
157            /// The caller must ensure that the returned pointer is never written to. If you need to
158            /// mutate the contents of the string slice, use [`as_mut_ptr`][Self::as_mut_ptr].
159            #[inline]
160            #[must_use]
161            pub const fn as_ptr(&self) -> *const $uchar {
162                self.inner.as_ptr()
163            }
164
165            /// Converts a mutable string slice to a mutable pointer.
166            ///
167            /// This pointer will be pointing to the first element of the string slice.
168            #[inline]
169            #[must_use]
170            pub fn as_mut_ptr(&mut self) -> *mut $uchar {
171                self.inner.as_mut_ptr()
172            }
173
174            /// Returns this string as a wide string slice of undefined encoding.
175            #[inline]
176            #[must_use]
177            pub const fn as_ustr(&self) -> &$ustr {
178                $ustr::from_slice(self.as_slice())
179            }
180
181            /// Returns a string slice with leading and trailing whitespace removed.
182            ///
183            /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
184            /// `White_Space`.
185            #[must_use]
186            pub fn trim(&self) -> &Self {
187                self.trim_start().trim_end()
188            }
189
190            /// Returns a string slice with leading whitespace removed.
191            ///
192            /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
193            /// `White_Space`.
194            ///
195            /// # Text directionality
196            ///
197            /// A string is a sequence of elements. `start` in this context means the first position
198            /// of that sequence; for a left-to-right language like English or Russian, this will be
199            /// left side, and for right-to-left languages like Arabic or Hebrew, this will be the
200            /// right side.
201            #[must_use]
202            pub fn trim_start(&self) -> &Self {
203                if let Some((index, _)) = self.char_indices().find(|(_, c)| !c.is_whitespace()) {
204                    &self[index..]
205                } else {
206                    <&Self as Default>::default()
207                }
208            }
209
210            /// Returns a string slice with trailing whitespace removed.
211            ///
212            /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
213            /// `White_Space`.
214            ///
215            /// # Text directionality
216            ///
217            /// A string is a sequence of elements. `end` in this context means the last position of
218            /// that sequence; for a left-to-right language like English or Russian, this will be
219            /// right side, and for right-to-left languages like Arabic or Hebrew, this will be the
220            /// left side.
221            #[must_use]
222            pub fn trim_end(&self) -> &Self {
223                if let Some((index, c)) = self.char_indices().rfind(|(_, c)| !c.is_whitespace()) {
224                    &self[..index + $char_len_fn(c)]
225                } else {
226                    <&Self as Default>::default()
227                }
228            }
229
230            /// Converts a boxed string into a boxed slice without copying or allocating.
231            #[inline]
232            #[cfg(feature = "alloc")]
233            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
234            #[must_use]
235            pub fn into_boxed_slice(self: Box<Self>) -> Box<[$uchar]> {
236                // SAFETY: from_raw pointer is from into_raw
237                unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) }
238            }
239
240            /// Converts a boxed string slice into an owned UTF string without copying or
241            /// allocating.
242            #[inline]
243            #[cfg(feature = "alloc")]
244            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
245            #[must_use]
246            pub fn into_utfstring(self: Box<Self>) -> $utfstring {
247                unsafe { $utfstring::from_vec_unchecked(self.into_boxed_slice().into_vec()) }
248            }
249
250            /// Creates a new owned string by repeating this string `n` times.
251            ///
252            /// # Panics
253            ///
254            /// This function will panic if the capacity would overflow.
255            #[inline]
256            #[cfg(feature = "alloc")]
257            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
258            #[must_use]
259            pub fn repeat(&self, n: usize) -> $utfstring {
260                unsafe { $utfstring::from_vec_unchecked(self.as_slice().repeat(n)) }
261            }
262        }
263
264        impl AsMut<$utfstr> for $utfstr {
265            #[inline]
266            fn as_mut(&mut self) -> &mut $utfstr {
267                self
268            }
269        }
270
271        impl AsRef<$utfstr> for $utfstr {
272            #[inline]
273            fn as_ref(&self) -> &$utfstr {
274                self
275            }
276        }
277
278        impl AsRef<[$uchar]> for $utfstr {
279            #[inline]
280            fn as_ref(&self) -> &[$uchar] {
281                self.as_slice()
282            }
283        }
284
285        impl AsRef<$ustr> for $utfstr {
286            #[inline]
287            fn as_ref(&self) -> &$ustr {
288                self.as_ustr()
289            }
290        }
291
292        impl core::fmt::Debug for $utfstr {
293            #[inline]
294            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
295                f.write_char('"')?;
296                self.escape_debug().try_for_each(|c| f.write_char(c))?;
297                f.write_char('"')
298            }
299        }
300
301        impl Default for &$utfstr {
302            #[inline]
303            fn default() -> Self {
304                // SAFETY: Empty slice is always valid
305                unsafe { $utfstr::from_slice_unchecked(&[]) }
306            }
307        }
308
309        impl Default for &mut $utfstr {
310            #[inline]
311            fn default() -> Self {
312                // SAFETY: Empty slice is valways valid
313                unsafe { $utfstr::from_slice_unchecked_mut(&mut []) }
314            }
315        }
316
317        impl core::fmt::Display for $utfstr {
318            #[inline]
319            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
320                self.chars().try_for_each(|c| f.write_char(c))
321            }
322        }
323
324        #[cfg(feature = "alloc")]
325        impl From<Box<$utfstr>> for Box<[$uchar]> {
326            #[inline]
327            fn from(value: Box<$utfstr>) -> Self {
328                value.into_boxed_slice()
329            }
330        }
331
332        impl<'a> From<&'a $utfstr> for &'a $ustr {
333            #[inline]
334            fn from(value: &'a $utfstr) -> Self {
335                value.as_ustr()
336            }
337        }
338
339        impl<'a> From<&'a $utfstr> for &'a [$uchar] {
340            #[inline]
341            fn from(value: &'a $utfstr) -> Self {
342                value.as_slice()
343            }
344        }
345
346        #[cfg(feature = "std")]
347        impl From<&$utfstr> for std::ffi::OsString {
348            #[inline]
349            fn from(value: &$utfstr) -> std::ffi::OsString {
350                value.as_ustr().to_os_string()
351            }
352        }
353
354        impl PartialEq<$utfstr> for &$utfstr {
355            #[inline]
356            fn eq(&self, other: &$utfstr) -> bool {
357                self.as_slice() == other.as_slice()
358            }
359        }
360
361        #[cfg(feature = "alloc")]
362        impl<'a, 'b> PartialEq<Cow<'a, $utfstr>> for &'b $utfstr {
363            #[inline]
364            fn eq(&self, other: &Cow<'a, $utfstr>) -> bool {
365                self == other.as_ref()
366            }
367        }
368
369        #[cfg(feature = "alloc")]
370        impl PartialEq<$utfstr> for Cow<'_, $utfstr> {
371            #[inline]
372            fn eq(&self, other: &$utfstr) -> bool {
373                self.as_ref() == other
374            }
375        }
376
377        #[cfg(feature = "alloc")]
378        impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, $utfstr> {
379            #[inline]
380            fn eq(&self, other: &&'a $utfstr) -> bool {
381                self.as_ref() == *other
382            }
383        }
384
385        impl PartialEq<$ustr> for $utfstr {
386            #[inline]
387            fn eq(&self, other: &$ustr) -> bool {
388                self.as_slice() == other.as_slice()
389            }
390        }
391
392        impl PartialEq<$utfstr> for $ustr {
393            #[inline]
394            fn eq(&self, other: &$utfstr) -> bool {
395                self.as_slice() == other.as_slice()
396            }
397        }
398
399        impl PartialEq<crate::$ucstr> for $utfstr {
400            #[inline]
401            fn eq(&self, other: &crate::$ucstr) -> bool {
402                self.as_slice() == other.as_slice()
403            }
404        }
405
406        impl PartialEq<$utfstr> for crate::$ucstr {
407            #[inline]
408            fn eq(&self, other: &$utfstr) -> bool {
409                self.as_slice() == other.as_slice()
410            }
411        }
412
413        impl PartialEq<str> for $utfstr {
414            #[inline]
415            fn eq(&self, other: &str) -> bool {
416                self.chars().eq(other.chars())
417            }
418        }
419
420        impl PartialEq<&str> for $utfstr {
421            #[inline]
422            fn eq(&self, other: &&str) -> bool {
423                self.chars().eq(other.chars())
424            }
425        }
426
427        impl PartialEq<str> for &$utfstr {
428            #[inline]
429            fn eq(&self, other: &str) -> bool {
430                self.chars().eq(other.chars())
431            }
432        }
433
434        impl PartialEq<$utfstr> for str {
435            #[inline]
436            fn eq(&self, other: &$utfstr) -> bool {
437                self.chars().eq(other.chars())
438            }
439        }
440
441        impl PartialEq<$utfstr> for &str {
442            #[inline]
443            fn eq(&self, other: &$utfstr) -> bool {
444                self.chars().eq(other.chars())
445            }
446        }
447
448        #[cfg(feature = "alloc")]
449        impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b $utfstr {
450            #[inline]
451            fn eq(&self, other: &Cow<'a, str>) -> bool {
452                self == other.as_ref()
453            }
454        }
455
456        #[cfg(feature = "alloc")]
457        impl PartialEq<$utfstr> for Cow<'_, str> {
458            #[inline]
459            fn eq(&self, other: &$utfstr) -> bool {
460                self.as_ref() == other
461            }
462        }
463
464        #[cfg(feature = "alloc")]
465        impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, str> {
466            #[inline]
467            fn eq(&self, other: &&'a $utfstr) -> bool {
468                self.as_ref() == *other
469            }
470        }
471
472        impl<'a> TryFrom<&'a $ustr> for &'a $utfstr {
473            type Error = $utferror;
474
475            #[inline]
476            fn try_from(value: &'a $ustr) -> Result<Self, Self::Error> {
477                $utfstr::from_ustr(value)
478            }
479        }
480
481        impl<'a> TryFrom<&'a crate::$ucstr> for &'a $utfstr {
482            type Error = $utferror;
483
484            #[inline]
485            fn try_from(value: &'a crate::$ucstr) -> Result<Self, Self::Error> {
486                $utfstr::from_ucstr(value)
487            }
488        }
489    };
490}
491
492utfstr_common_impl! {
493    /// UTF-16 string slice for [`Utf16String`][crate::Utf16String].
494    ///
495    /// [`Utf16Str`] is to [`Utf16String`][crate::Utf16String] as [`str`] is to [`String`].
496    ///
497    /// [`Utf16Str`] slices are string slices that are always valid UTF-16 encoding. This is unlike
498    /// the [`U16Str`][U16Str] string slices, which may not have valid encoding. In this way,
499    /// [`Utf16Str`] string slices most resemble native [`str`] slices of all the types in this
500    /// crate.
501    ///
502    /// # Examples
503    ///
504    /// The easiest way to use [`Utf16Str`] is with the [`utf16str!`][crate::utf16str] macro to
505    /// convert string literals into string slices at compile time:
506    ///
507    /// ```
508    /// use widestring::utf16str;
509    /// let hello = utf16str!("Hello, world!");
510    /// ```
511    ///
512    /// You can also convert a [`u16`] slice directly, provided it is valid UTF-16:
513    ///
514    /// ```
515    /// use widestring::Utf16Str;
516    ///
517    /// let sparkle_heart = [0xd83d, 0xdc96];
518    /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
519    ///
520    /// assert_eq!("💖", sparkle_heart);
521    /// ```
522    struct Utf16Str([u16]);
523
524    type UtfString = Utf16String;
525    type UStr = U16Str;
526    type UCStr = U16CStr;
527    type UtfError = Utf16Error;
528    char_len_fn = char_len_utf16;
529
530    /// Converts a slice to a string slice without checking that the string contains valid UTF-16.
531    ///
532    /// See the safe version, [`from_slice`][Self::from_slice], for more information.
533    ///
534    /// # Safety
535    ///
536    /// This function is unsafe because it does not check that the slice passed to it is valid
537    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
538    /// [`Utf16Str`] is always valid UTF-16.
539    ///
540    /// # Examples
541    ///
542    /// ```
543    /// use widestring::Utf16Str;
544    ///
545    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
546    /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked(&sparkle_heart) };
547    ///
548    /// assert_eq!("💖", sparkle_heart);
549    /// ```
550    fn from_slice_unchecked() -> {}
551
552    /// Converts a mutable slice to a mutable string slice without checking that the string contains
553    /// valid UTF-16.
554    ///
555    /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
556    ///
557    /// # Safety
558    ///
559    /// This function is unsafe because it does not check that the slice passed to it is valid
560    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
561    /// [`Utf16Str`] is always valid UTF-16.
562    ///
563    /// # Examples
564    ///
565    /// ```
566    /// use widestring::Utf16Str;
567    ///
568    /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
569    /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked_mut(&mut sparkle_heart) };
570    ///
571    /// assert_eq!("💖", sparkle_heart);
572    /// ```
573    fn from_slice_unchecked_mut() -> {}
574
575    /// Converts a boxed slice to a boxed string slice without checking that the string contains
576    /// valid UTF-16.
577    ///
578    /// # Safety
579    ///
580    /// This function is unsafe because it does not check if the string slice is valid UTF-16, and
581    /// [`Utf16Str`] must always be valid UTF-16.
582    fn from_boxed_slice_unchecked() -> {}
583
584    /// Returns an unchecked subslice of this string slice.
585    ///
586    /// This is the unchecked alternative to indexing the string slice.
587    ///
588    /// # Safety
589    ///
590    /// Callers of this function are responsible that these preconditions are satisfied:
591    ///
592    /// - The starting index must not exceed the ending index;
593    /// - Indexes must be within bounds of the original slice;
594    /// - Indexes must lie on UTF-16 sequence boundaries.
595    ///
596    /// Failing that, the returned string slice may reference invalid memory or violate the
597    /// invariants communicated by the type.
598    ///
599    /// # Examples
600    ///
601    /// ```
602    /// # use widestring::{utf16str};
603    /// let v = utf16str!("⚧️🏳️‍⚧️➡️s");
604    /// unsafe {
605    ///     assert_eq!(utf16str!("⚧️"), v.get_unchecked(..2));
606    ///     assert_eq!(utf16str!("🏳️‍⚧️"), v.get_unchecked(2..8));
607    ///     assert_eq!(utf16str!("➡️"), v.get_unchecked(8..10));
608    ///     assert_eq!(utf16str!("s"), v.get_unchecked(10..));
609    /// }
610    /// ```
611    fn get_unchecked() -> {}
612
613    /// Returns a mutable, unchecked subslice of this string slice
614    ///
615    /// This is the unchecked alternative to indexing the string slice.
616    ///
617    /// # Safety
618    ///
619    /// Callers of this function are responsible that these preconditions are satisfied:
620    ///
621    /// - The starting index must not exceed the ending index;
622    /// - Indexes must be within bounds of the original slice;
623    /// - Indexes must lie on UTF-16 sequence boundaries.
624    ///
625    /// Failing that, the returned string slice may reference invalid memory or violate the
626    /// invariants communicated by the type.
627    ///
628    /// # Examples
629    ///
630    /// ```
631    /// # use widestring::{utf16str};
632    /// # #[cfg(feature = "alloc")] {
633    /// let mut v = utf16str!("⚧️🏳️‍⚧️➡️s").to_owned();
634    /// unsafe {
635    ///     assert_eq!(utf16str!("⚧️"), v.get_unchecked_mut(..2));
636    ///     assert_eq!(utf16str!("🏳️‍⚧️"), v.get_unchecked_mut(2..8));
637    ///     assert_eq!(utf16str!("➡️"), v.get_unchecked_mut(8..10));
638    ///     assert_eq!(utf16str!("s"), v.get_unchecked_mut(10..));
639    /// }
640    /// # }
641    /// ```
642    fn get_unchecked_mut() -> {}
643
644    /// Returns the length of `self`.
645    ///
646    /// This length is in `u16` values, not [`char`]s or graphemes. In other words, it may not be
647    /// what human considers the length of the string.
648    ///
649    /// # Examples
650    ///
651    /// ```
652    /// # use widestring::utf16str;
653    /// assert_eq!(utf16str!("foo").len(), 3);
654    ///
655    /// let complex = utf16str!("⚧️🏳️‍⚧️➡️s");
656    /// assert_eq!(complex.len(), 11);
657    /// assert_eq!(complex.chars().count(), 10);
658    /// ```
659    fn len() -> {}
660}
661
662utfstr_common_impl! {
663    /// UTF-32 string slice for [`Utf32String`][crate::Utf32String].
664    ///
665    /// [`Utf32Str`] is to [`Utf32String`][crate::Utf32String] as [`str`] is to [`String`].
666    ///
667    /// [`Utf32Str`] slices are string slices that are always valid UTF-32 encoding. This is unlike
668    /// the [`U32Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
669    /// [`Utf32Str`] string slices most resemble native [`str`] slices of all the types in this
670    /// crate.
671    ///
672    /// # Examples
673    ///
674    /// The easiest way to use [`Utf32Str`] is with the [`utf32str!`][crate::utf32str] macro to
675    /// convert string literals into string slices at compile time:
676    ///
677    /// ```
678    /// use widestring::utf32str;
679    /// let hello = utf32str!("Hello, world!");
680    /// ```
681    ///
682    /// You can also convert a [`u32`] slice directly, provided it is valid UTF-32:
683    ///
684    /// ```
685    /// use widestring::Utf32Str;
686    ///
687    /// let sparkle_heart = [0x1f496];
688    /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
689    ///
690    /// assert_eq!("💖", sparkle_heart);
691    /// ```
692    ///
693    /// Since [`char`] slices are valid UTF-32, a slice of [`char`]s can be easily converted to a
694    /// string slice:
695    ///
696    /// ```
697    /// use widestring::Utf32Str;
698    ///
699    /// let sparkle_heart = ['💖'; 3];
700    /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
701    ///
702    /// assert_eq!("💖💖💖", sparkle_heart);
703    /// ```
704    struct Utf32Str([u32]);
705
706    type UtfString = Utf32String;
707    type UStr = U32Str;
708    type UCStr = U32CStr;
709    type UtfError = Utf32Error;
710    char_len_fn = char_len_utf32;
711
712    /// Converts a slice to a string slice without checking that the string contains valid UTF-32.
713    ///
714    /// See the safe version, [`from_slice`][Self::from_slice], for more information.
715    ///
716    /// # Safety
717    ///
718    /// This function is unsafe because it does not check that the slice passed to it is valid
719    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
720    /// [`Utf32Str`] is always valid UTF-32.
721    ///
722    /// # Examples
723    ///
724    /// ```
725    /// use widestring::Utf32Str;
726    ///
727    /// let sparkle_heart = vec![0x1f496];
728    /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked(&sparkle_heart) };
729    ///
730    /// assert_eq!("💖", sparkle_heart);
731    /// ```
732    fn from_slice_unchecked() -> {}
733
734    /// Converts a mutable slice to a mutable string slice without checking that the string contains
735    /// valid UTF-32.
736    ///
737    /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
738    ///
739    /// # Safety
740    ///
741    /// This function is unsafe because it does not check that the slice passed to it is valid
742    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
743    /// [`Utf32Str`] is always valid UTF-32.
744    ///
745    /// # Examples
746    ///
747    /// ```
748    /// use widestring::Utf32Str;
749    ///
750    /// let mut sparkle_heart = vec![0x1f496];
751    /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked_mut(&mut sparkle_heart) };
752    ///
753    /// assert_eq!("💖", sparkle_heart);
754    /// ```
755    fn from_slice_unchecked_mut() -> {}
756
757    /// Converts a boxed slice to a boxed string slice without checking that the string contains
758    /// valid UTF-32.
759    ///
760    /// # Safety
761    ///
762    /// This function is unsafe because it does not check if the string slice is valid UTF-32, and
763    /// [`Utf32Str`] must always be valid UTF-32.
764    fn from_boxed_slice_unchecked() -> {}
765
766    /// Returns an unchecked subslice of this string slice.
767    ///
768    /// This is the unchecked alternative to indexing the string slice.
769    ///
770    /// # Safety
771    ///
772    /// Callers of this function are responsible that these preconditions are satisfied:
773    ///
774    /// - The starting index must not exceed the ending index;
775    /// - Indexes must be within bounds of the original slice;
776    ///
777    /// Failing that, the returned string slice may reference invalid memory or violate the
778    /// invariants communicated by the type.
779    ///
780    /// # Examples
781    ///
782    /// ```
783    /// # use widestring::utf32str;
784    /// let v = utf32str!("⚧️🏳️‍⚧️➡️s");
785    /// unsafe {
786    ///     assert_eq!(utf32str!("⚧️"), v.get_unchecked(..2));
787    ///     assert_eq!(utf32str!("🏳️‍⚧️"), v.get_unchecked(2..7));
788    ///     assert_eq!(utf32str!("➡️"), v.get_unchecked(7..9));
789    ///     assert_eq!(utf32str!("s"), v.get_unchecked(9..))
790    /// }
791    /// ```
792    fn get_unchecked() -> {}
793
794    /// Returns a mutable, unchecked subslice of this string slice
795    ///
796    /// This is the unchecked alternative to indexing the string slice.
797    ///
798    /// # Safety
799    ///
800    /// Callers of this function are responsible that these preconditions are satisfied:
801    ///
802    /// - The starting index must not exceed the ending index;
803    /// - Indexes must be within bounds of the original slice;
804    ///
805    /// Failing that, the returned string slice may reference invalid memory or violate the
806    /// invariants communicated by the type.
807    ///
808    /// # Examples
809    ///
810    /// ```
811    /// # use widestring::utf32str;
812    /// # #[cfg(feature = "alloc")] {
813    /// let mut v = utf32str!("⚧️🏳️‍⚧️➡️s").to_owned();
814    /// unsafe {
815    ///     assert_eq!(utf32str!("⚧️"), v.get_unchecked_mut(..2));
816    ///     assert_eq!(utf32str!("🏳️‍⚧️"), v.get_unchecked_mut(2..7));
817    ///     assert_eq!(utf32str!("➡️"), v.get_unchecked_mut(7..9));
818    ///     assert_eq!(utf32str!("s"), v.get_unchecked_mut(9..))
819    /// }
820    /// # }
821    /// ```
822    fn get_unchecked_mut() -> {}
823
824    /// Returns the length of `self`.
825    ///
826    /// This length is in the number of [`char`]s in the slice, not graphemes. In other words, it
827    /// may not be what human considers the length of the string.
828    ///
829    /// # Examples
830    ///
831    /// ```
832    /// # use widestring::utf32str;
833    /// assert_eq!(utf32str!("foo").len(), 3);
834    ///
835    /// let complex = utf32str!("⚧️🏳️‍⚧️➡️s");
836    /// assert_eq!(complex.len(), 10);
837    /// assert_eq!(complex.chars().count(), 10);
838    /// ```
839    fn len() -> {}
840}
841
842impl Utf16Str {
843    /// Converts a slice of UTF-16 data to a string slice.
844    ///
845    /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
846    /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
847    /// then does the conversion.
848    ///
849    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
850    /// the validity check, there is an unsafe version of this function,
851    /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
852    /// the check.
853    ///
854    /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
855    ///
856    /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
857    /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
858    /// after converting from UTF-8 to UTF-16.
859    ///
860    /// # Errors
861    ///
862    /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
863    /// is not UTF-16.
864    ///
865    /// # Examples
866    ///
867    /// ```
868    /// use widestring::Utf16Str;
869    ///
870    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
871    /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
872    ///
873    /// assert_eq!("💖", sparkle_heart);
874    /// ```
875    ///
876    /// With incorrect values that return an error:
877    ///
878    /// ```
879    /// use widestring::Utf16Str;
880    ///
881    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
882    ///
883    /// assert!(Utf16Str::from_slice(&sparkle_heart).is_err());
884    /// ```
885    pub fn from_slice(s: &[u16]) -> Result<&Self, Utf16Error> {
886        validate_utf16(s)?;
887        // SAFETY: Just validated
888        Ok(unsafe { Self::from_slice_unchecked(s) })
889    }
890
891    /// Converts a mutable slice of UTF-16 data to a mutable string slice.
892    ///
893    /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
894    /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
895    /// then does the conversion.
896    ///
897    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
898    /// the validity check, there is an unsafe version of this function,
899    /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
900    /// but skips the check.
901    ///
902    /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
903    ///
904    /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
905    /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
906    /// after converting from UTF-8 to UTF-16.
907    ///
908    /// # Errors
909    ///
910    /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
911    /// is not UTF-16.
912    ///
913    /// # Examples
914    ///
915    /// ```
916    /// use widestring::Utf16Str;
917    ///
918    /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
919    /// let sparkle_heart = Utf16Str::from_slice_mut(&mut sparkle_heart).unwrap();
920    ///
921    /// assert_eq!("💖", sparkle_heart);
922    /// ```
923    ///
924    /// With incorrect values that return an error:
925    ///
926    /// ```
927    /// use widestring::Utf16Str;
928    ///
929    /// let mut sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
930    ///
931    /// assert!(Utf16Str::from_slice_mut(&mut sparkle_heart).is_err());
932    /// ```
933    pub fn from_slice_mut(s: &mut [u16]) -> Result<&mut Self, Utf16Error> {
934        validate_utf16(s)?;
935        // SAFETY: Just validated
936        Ok(unsafe { Self::from_slice_unchecked_mut(s) })
937    }
938
939    /// Converts a wide string slice of undefined encoding to a UTF-16 string slice without checking
940    /// if the string slice is valid UTF-16.
941    ///
942    /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
943    ///
944    /// # Safety
945    ///
946    /// This function is unsafe because it does not check that the string slice passed to it is
947    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
948    /// the [`Utf16Str`] is always valid UTF-16.
949    ///
950    /// # Examples
951    ///
952    /// ```
953    /// use widestring::{Utf16Str, u16str};
954    ///
955    /// let sparkle_heart = u16str!("💖");
956    /// let sparkle_heart = unsafe { Utf16Str::from_ustr_unchecked(sparkle_heart) };
957    ///
958    /// assert_eq!("💖", sparkle_heart);
959    /// ```
960    #[must_use]
961    pub const unsafe fn from_ustr_unchecked(s: &U16Str) -> &Self {
962        Self::from_slice_unchecked(s.as_slice())
963    }
964
965    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice
966    /// without checking if the string slice is valid UTF-16.
967    ///
968    /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
969    ///
970    /// # Safety
971    ///
972    /// This function is unsafe because it does not check that the string slice passed to it is
973    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
974    /// the [`Utf16Str`] is always valid UTF-16.
975    #[must_use]
976    pub unsafe fn from_ustr_unchecked_mut(s: &mut U16Str) -> &mut Self {
977        Self::from_slice_unchecked_mut(s.as_mut_slice())
978    }
979
980    /// Converts a wide string slice of undefined encoding to a UTF-16 string slice.
981    ///
982    /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
983    /// [`U16Str`] does not contain valid UTF-16 data.
984    ///
985    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
986    /// the validity check, there is an unsafe version of this function,
987    /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
988    /// but skips the check.
989    ///
990    /// # Errors
991    ///
992    /// Returns an error if the string slice is not UTF-16 with a description as to why the
993    /// provided string slice is not UTF-16.
994    ///
995    /// # Examples
996    ///
997    /// ```
998    /// use widestring::{Utf16Str, u16str};
999    ///
1000    /// let sparkle_heart = u16str!("💖");
1001    /// let sparkle_heart = Utf16Str::from_ustr(sparkle_heart).unwrap();
1002    ///
1003    /// assert_eq!("💖", sparkle_heart);
1004    /// ```
1005    #[inline]
1006    pub fn from_ustr(s: &U16Str) -> Result<&Self, Utf16Error> {
1007        Self::from_slice(s.as_slice())
1008    }
1009
1010    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice.
1011    ///
1012    /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
1013    /// [`U16Str`] does not contain valid UTF-16 data.
1014    ///
1015    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1016    /// the validity check, there is an unsafe version of this function,
1017    /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1018    /// but skips the check.
1019    ///
1020    /// # Errors
1021    ///
1022    /// Returns an error if the string slice is not UTF-16 with a description as to why the
1023    /// provided string slice is not UTF-16.
1024    #[inline]
1025    pub fn from_ustr_mut(s: &mut U16Str) -> Result<&mut Self, Utf16Error> {
1026        Self::from_slice_mut(s.as_mut_slice())
1027    }
1028
1029    /// Converts a wide C string slice to a UTF-16 string slice without checking if the
1030    /// string slice is valid UTF-16.
1031    ///
1032    /// The resulting string slice does *not* contain the nul terminator.
1033    ///
1034    /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1035    ///
1036    /// # Safety
1037    ///
1038    /// This function is unsafe because it does not check that the string slice passed to it is
1039    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1040    /// the [`Utf16Str`] is always valid UTF-16.
1041    ///
1042    /// # Examples
1043    ///
1044    /// ```
1045    /// use widestring::{Utf16Str, u16cstr};
1046    ///
1047    /// let sparkle_heart = u16cstr!("💖");
1048    /// let sparkle_heart = unsafe { Utf16Str::from_ucstr_unchecked(sparkle_heart) };
1049    ///
1050    /// assert_eq!("💖", sparkle_heart);
1051    /// ```
1052    #[inline]
1053    #[must_use]
1054    pub unsafe fn from_ucstr_unchecked(s: &crate::U16CStr) -> &Self {
1055        Self::from_slice_unchecked(s.as_slice())
1056    }
1057
1058    /// Converts a mutable wide C string slice to a mutable UTF-16 string slice without
1059    /// checking if the string slice is valid UTF-16.
1060    ///
1061    /// The resulting string slice does *not* contain the nul terminator.
1062    ///
1063    /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1064    ///
1065    /// # Safety
1066    ///
1067    /// This function is unsafe because it does not check that the string slice passed to it is
1068    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1069    /// the [`Utf16Str`] is always valid UTF-16.
1070    #[inline]
1071    #[must_use]
1072    pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U16CStr) -> &mut Self {
1073        Self::from_slice_unchecked_mut(s.as_mut_slice())
1074    }
1075
1076    /// Converts a wide C string slice to a UTF-16 string slice.
1077    ///
1078    /// The resulting string slice does *not* contain the nul terminator.
1079    ///
1080    /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1081    /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1082    ///
1083    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1084    /// the validity check, there is an unsafe version of this function,
1085    /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1086    /// but skips the check.
1087    ///
1088    /// # Errors
1089    ///
1090    /// Returns an error if the string slice is not UTF-16 with a description as to why the
1091    /// provided string slice is not UTF-16.
1092    ///
1093    /// # Examples
1094    ///
1095    /// ```
1096    /// use widestring::{Utf16Str, u16cstr};
1097    ///
1098    /// let sparkle_heart = u16cstr!("💖");
1099    /// let sparkle_heart = Utf16Str::from_ucstr(sparkle_heart).unwrap();
1100    ///
1101    /// assert_eq!("💖", sparkle_heart);
1102    /// ```
1103    #[inline]
1104    pub fn from_ucstr(s: &crate::U16CStr) -> Result<&Self, Utf16Error> {
1105        Self::from_slice(s.as_slice())
1106    }
1107
1108    /// Converts a mutable wide C string slice to a mutable UTF-16 string slice.
1109    ///
1110    /// The resulting string slice does *not* contain the nul terminator.
1111    ///
1112    /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1113    /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1114    ///
1115    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1116    /// the validity check, there is an unsafe version of this function,
1117    /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1118    /// but skips the check.
1119    ///
1120    /// # Safety
1121    ///
1122    /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1123    /// when mutating the slice (i.e. by adding interior nul values).
1124    ///
1125    /// # Errors
1126    ///
1127    /// Returns an error if the string slice is not UTF-16 with a description as to why the
1128    /// provided string slice is not UTF-16.
1129    #[inline]
1130    pub unsafe fn from_ucstr_mut(s: &mut crate::U16CStr) -> Result<&mut Self, Utf16Error> {
1131        Self::from_slice_mut(s.as_mut_slice())
1132    }
1133
1134    /// Converts to a standard UTF-8 [`String`].
1135    ///
1136    /// Because this string is always valid UTF-16, the conversion is lossless and non-fallible.
1137    #[inline]
1138    #[allow(clippy::inherent_to_string_shadow_display)]
1139    #[cfg(feature = "alloc")]
1140    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1141    #[must_use]
1142    pub fn to_string(&self) -> String {
1143        String::from_utf16(self.as_slice()).unwrap()
1144    }
1145
1146    /// Checks that `index`-th value is the value in a UTF-16 code point sequence or the end of the
1147    /// string.
1148    ///
1149    /// Returns `true` if the value at `index` is not a UTF-16 surrogate value, or if the value at
1150    /// `index` is the first value of a surrogate pair (the "high" surrogate). Returns `false` if
1151    /// the value at `index` is the second value of a surrogate pair (a.k.a the "low" surrogate).
1152    ///
1153    /// The start and end of the string (when `index == self.len()`) are considered to be
1154    /// boundaries.
1155    ///
1156    /// Returns `false` if `index is greater than `self.len()`.
1157    ///
1158    /// # Examples
1159    ///
1160    /// ```
1161    /// # use widestring::utf16str;
1162    /// let s = utf16str!("Sparkle 💖 Heart");
1163    /// assert!(s.is_char_boundary(0));
1164    ///
1165    /// // high surrogate of `💖`
1166    /// assert!(s.is_char_boundary(8));
1167    /// // low surrogate of `💖`
1168    /// assert!(!s.is_char_boundary(9));
1169    ///
1170    /// assert!(s.is_char_boundary(s.len()));
1171    /// ```
1172    #[inline]
1173    #[must_use]
1174    pub const fn is_char_boundary(&self, index: usize) -> bool {
1175        if index > self.len() {
1176            false
1177        } else if index == self.len() {
1178            true
1179        } else {
1180            !is_utf16_low_surrogate(self.inner[index])
1181        }
1182    }
1183
1184    /// Returns a subslice of this string.
1185    ///
1186    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1187    /// equivalent indexing operation would panic.
1188    ///
1189    /// # Examples
1190    ///
1191    /// ```
1192    /// # use widestring::{utf16str};
1193    /// let v = utf16str!("⚧️🏳️‍⚧️➡️s");
1194    ///
1195    /// assert_eq!(Some(utf16str!("⚧️")), v.get(..2));
1196    /// assert_eq!(Some(utf16str!("🏳️‍⚧️")), v.get(2..8));
1197    /// assert_eq!(Some(utf16str!("➡️")), v.get(8..10));
1198    /// assert_eq!(Some(utf16str!("s")), v.get(10..));
1199    ///
1200    /// assert!(v.get(3..4).is_none());
1201    /// ```
1202    #[inline]
1203    #[must_use]
1204    pub fn get<I>(&self, index: I) -> Option<&Self>
1205    where
1206        I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1207    {
1208        // TODO: Use SliceIndex directly when it is stabilized
1209        let range = crate::range_check(index, ..self.len())?;
1210        if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1211            return None;
1212        }
1213
1214        // SAFETY: range_check verified bounds, and we just verified char boundaries
1215        Some(unsafe { self.get_unchecked(range) })
1216    }
1217
1218    /// Returns a mutable subslice of this string.
1219    ///
1220    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1221    /// equivalent indexing operation would panic.
1222    ///
1223    /// # Examples
1224    ///
1225    /// ```
1226    /// # use widestring::{utf16str};
1227    /// # #[cfg(feature = "alloc")] {
1228    /// let mut v = utf16str!("⚧️🏳️‍⚧️➡️s").to_owned();
1229    ///
1230    /// assert_eq!(utf16str!("⚧️"), v.get_mut(..2).unwrap());
1231    /// assert_eq!(utf16str!("🏳️‍⚧️"), v.get_mut(2..8).unwrap());
1232    /// assert_eq!(utf16str!("➡️"), v.get_mut(8..10).unwrap());
1233    /// assert_eq!(utf16str!("s"), v.get_mut(10..).unwrap());
1234    ///
1235    /// assert!(v.get_mut(3..4).is_none());
1236    /// # }
1237    /// ```
1238    #[inline]
1239    #[must_use]
1240    pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1241    where
1242        I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1243    {
1244        // TODO: Use SliceIndex directly when it is stabilized
1245        let range = crate::range_check(index, ..self.len())?;
1246        if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1247            return None;
1248        }
1249
1250        // SAFETY: range_check verified bounds, and we just verified char boundaries
1251        Some(unsafe { self.get_unchecked_mut(range) })
1252    }
1253
1254    /// Divide one string slice into two at an index.
1255    ///
1256    /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1257    /// the boundary of a UTF-16 code point.
1258    ///
1259    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1260    /// the end of the string slice.
1261    ///
1262    /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1263    ///
1264    /// # Panics
1265    ///
1266    /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1267    /// code point of the string slice.
1268    ///
1269    /// # Examples
1270    ///
1271    /// ```
1272    /// # use widestring::utf16str;
1273    /// let s = utf16str!("Per Martin-Löf");
1274    ///
1275    /// let (first, last) = s.split_at(3);
1276    ///
1277    /// assert_eq!("Per", first);
1278    /// assert_eq!(" Martin-Löf", last);
1279    /// ```
1280    #[inline]
1281    #[must_use]
1282    pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1283        assert!(self.is_char_boundary(mid));
1284        let (a, b) = self.inner.split_at(mid);
1285        unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1286    }
1287
1288    /// Divide one mutable string slice into two at an index.
1289    ///
1290    /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1291    /// the boundary of a UTF-16 code point.
1292    ///
1293    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1294    /// the end of the string slice.
1295    ///
1296    /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1297    ///
1298    /// # Panics
1299    ///
1300    /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1301    /// code point of the string slice.
1302    ///
1303    /// # Examples
1304    ///
1305    /// ```
1306    /// # use widestring::utf16str;
1307    /// # #[cfg(feature = "alloc")] {
1308    /// let mut s = utf16str!("Per Martin-Löf").to_owned();
1309    ///
1310    /// let (first, last) = s.split_at_mut(3);
1311    ///
1312    /// assert_eq!("Per", first);
1313    /// assert_eq!(" Martin-Löf", last);
1314    /// # }
1315    /// ```
1316    #[inline]
1317    #[must_use]
1318    pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1319        assert!(self.is_char_boundary(mid));
1320        let (a, b) = self.inner.split_at_mut(mid);
1321        unsafe {
1322            (
1323                Self::from_slice_unchecked_mut(a),
1324                Self::from_slice_unchecked_mut(b),
1325            )
1326        }
1327    }
1328
1329    /// Returns an iterator over the [`char`]s of a string slice.
1330    ///
1331    /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1332    /// [`char`]. This method returns such an iterator.
1333    ///
1334    /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
1335    /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
1336    /// actually want. This functionality is not provided by this crate.
1337    #[inline]
1338    #[must_use]
1339    pub fn chars(&self) -> CharsUtf16<'_> {
1340        CharsUtf16::new(self.as_slice())
1341    }
1342
1343    /// Returns an iterator over the [`char`]s of a string slice and their positions.
1344    ///
1345    /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1346    /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
1347    ///
1348    /// The iterator yields tuples. The position is first, the [`char`] is second.
1349    #[inline]
1350    #[must_use]
1351    pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
1352        CharIndicesUtf16::new(self.as_slice())
1353    }
1354
1355    /// An iterator over the [`u16`] code units of a string slice.
1356    ///
1357    /// As a UTF-16 string slice consists of a sequence of [`u16`] code units, we can iterate
1358    /// through a string slice by each code unit. This method returns such an iterator.
1359    #[must_use]
1360    pub fn code_units(&self) -> CodeUnits<'_> {
1361        CodeUnits::new(self.as_slice())
1362    }
1363
1364    /// Returns an iterator of bytes over the string encoded as UTF-8.
1365    #[must_use]
1366    pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf16<'_>> {
1367        crate::encode_utf8(self.chars())
1368    }
1369
1370    /// Returns an iterator of [`u32`] over the sting encoded as UTF-32.
1371    #[must_use]
1372    pub fn encode_utf32(&self) -> EncodeUtf32<CharsUtf16<'_>> {
1373        crate::encode_utf32(self.chars())
1374    }
1375
1376    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
1377    #[inline]
1378    #[must_use]
1379    pub fn escape_debug(&self) -> EscapeDebug<CharsUtf16<'_>> {
1380        EscapeDebug::<CharsUtf16>::new(self.as_slice())
1381    }
1382
1383    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
1384    #[inline]
1385    #[must_use]
1386    pub fn escape_default(&self) -> EscapeDefault<CharsUtf16<'_>> {
1387        EscapeDefault::<CharsUtf16>::new(self.as_slice())
1388    }
1389
1390    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
1391    #[inline]
1392    #[must_use]
1393    pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf16<'_>> {
1394        EscapeUnicode::<CharsUtf16>::new(self.as_slice())
1395    }
1396
1397    /// Returns the lowercase equivalent of this string slice, as a new [`Utf16String`].
1398    ///
1399    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1400    /// `Lowercase`.
1401    ///
1402    /// Since some characters can expand into multiple characters when changing the case, this
1403    /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1404    #[inline]
1405    #[cfg(feature = "alloc")]
1406    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1407    #[must_use]
1408    pub fn to_lowercase(&self) -> Utf16String {
1409        let mut s = Utf16String::with_capacity(self.len());
1410        for c in self.chars() {
1411            for lower in c.to_lowercase() {
1412                s.push(lower);
1413            }
1414        }
1415        s
1416    }
1417
1418    /// Returns the uppercase equivalent of this string slice, as a new [`Utf16String`].
1419    ///
1420    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1421    /// `Uppercase`.
1422    ///
1423    /// Since some characters can expand into multiple characters when changing the case, this
1424    /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1425    #[inline]
1426    #[cfg(feature = "alloc")]
1427    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1428    #[must_use]
1429    pub fn to_uppercase(&self) -> Utf16String {
1430        let mut s = Utf16String::with_capacity(self.len());
1431        for c in self.chars() {
1432            for lower in c.to_uppercase() {
1433                s.push(lower);
1434            }
1435        }
1436        s
1437    }
1438
1439    /// Returns an iterator over the lines of a [`Utf16Str`], as string slices.
1440    ///
1441    /// Lines are split at line endings that are either newlines (`\n`) or
1442    /// sequences of a carriage return followed by a line feed (`\r\n`).
1443    ///
1444    /// Line terminators are not included in the lines returned by the iterator.
1445    ///
1446    /// Note that any carriage return (`\r`) not immediately followed by a
1447    /// line feed (`\n`) does not split a line. These carriage returns are
1448    /// thereby included in the produced lines.
1449    ///
1450    /// The final line ending is optional. A string that ends with a final line
1451    /// ending will return the same lines as an otherwise identical string
1452    /// without a final line ending.
1453    ///
1454    /// # Examples
1455    ///
1456    /// Basic usage:
1457    ///
1458    /// ```
1459    /// use widestring::utf16str;
1460    ///
1461    /// let text = utf16str!("foo\r\nbar\n\nbaz\r");
1462    /// let mut lines = text.lines();
1463    ///
1464    /// assert_eq!(Some(utf16str!("foo")), lines.next());
1465    /// assert_eq!(Some(utf16str!("bar")), lines.next());
1466    /// assert_eq!(Some(utf16str!("")), lines.next());
1467    /// // Trailing carriage return is included in the last line
1468    /// assert_eq!(Some(utf16str!("baz\r")), lines.next());
1469    ///
1470    /// assert_eq!(None, lines.next());
1471    /// ```
1472    ///
1473    /// The final line does not require any ending:
1474    ///
1475    /// ```
1476    /// use widestring::utf16str;
1477    ///
1478    /// let text = utf16str!("foo\nbar\n\r\nbaz");
1479    /// let mut lines = text.lines();
1480    ///
1481    /// assert_eq!(Some(utf16str!("foo")), lines.next());
1482    /// assert_eq!(Some(utf16str!("bar")), lines.next());
1483    /// assert_eq!(Some(utf16str!("")), lines.next());
1484    /// assert_eq!(Some(utf16str!("baz")), lines.next());
1485    ///
1486    /// assert_eq!(None, lines.next());
1487    /// ```
1488    pub fn lines(&self) -> Lines<'_, Utf16Str, CharIndicesUtf16<'_>> {
1489        Lines::new(self, self.len(), self.char_indices())
1490    }
1491}
1492
1493impl Utf32Str {
1494    /// Converts a slice of UTF-32 data to a string slice.
1495    ///
1496    /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1497    /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1498    /// then does the conversion.
1499    ///
1500    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1501    /// the validity check, there is an unsafe version of this function,
1502    /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
1503    /// the check.
1504    ///
1505    /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1506    ///
1507    /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1508    /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1509    /// after converting from UTF-8 to UTF-32.
1510    ///
1511    /// # Errors
1512    ///
1513    /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1514    /// is not UTF-32.
1515    ///
1516    /// # Examples
1517    ///
1518    /// ```
1519    /// use widestring::Utf32Str;
1520    ///
1521    /// let sparkle_heart = vec![0x1f496];
1522    /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
1523    ///
1524    /// assert_eq!("💖", sparkle_heart);
1525    /// ```
1526    ///
1527    /// With incorrect values that return an error:
1528    ///
1529    /// ```
1530    /// use widestring::Utf32Str;
1531    ///
1532    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1533    ///
1534    /// assert!(Utf32Str::from_slice(&sparkle_heart).is_err());
1535    /// ```
1536    pub fn from_slice(s: &[u32]) -> Result<&Self, Utf32Error> {
1537        validate_utf32(s)?;
1538        // SAFETY: Just validated
1539        Ok(unsafe { Self::from_slice_unchecked(s) })
1540    }
1541
1542    /// Converts a mutable slice of UTF-32 data to a mutable string slice.
1543    ///
1544    /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1545    /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1546    /// then does the conversion.
1547    ///
1548    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1549    /// the validity check, there is an unsafe version of this function,
1550    /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
1551    /// but skips the check.
1552    ///
1553    /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1554    ///
1555    /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1556    /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1557    /// after converting from UTF-8 to UTF-32.
1558    ///
1559    /// # Errors
1560    ///
1561    /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1562    /// is not UTF-32.
1563    ///
1564    /// # Examples
1565    ///
1566    /// ```
1567    /// use widestring::Utf32Str;
1568    ///
1569    /// let mut sparkle_heart = vec![0x1f496];
1570    /// let sparkle_heart = Utf32Str::from_slice_mut(&mut sparkle_heart).unwrap();
1571    ///
1572    /// assert_eq!("💖", sparkle_heart);
1573    /// ```
1574    ///
1575    /// With incorrect values that return an error:
1576    ///
1577    /// ```
1578    /// use widestring::Utf32Str;
1579    ///
1580    /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1581    ///
1582    /// assert!(Utf32Str::from_slice_mut(&mut sparkle_heart).is_err());
1583    /// ```
1584    pub fn from_slice_mut(s: &mut [u32]) -> Result<&mut Self, Utf32Error> {
1585        validate_utf32(s)?;
1586        // SAFETY: Just validated
1587        Ok(unsafe { Self::from_slice_unchecked_mut(s) })
1588    }
1589
1590    /// Converts a wide string slice of undefined encoding to a UTF-32 string slice without checking
1591    /// if the string slice is valid UTF-32.
1592    ///
1593    /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
1594    ///
1595    /// # Safety
1596    ///
1597    /// This function is unsafe because it does not check that the string slice passed to it is
1598    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1599    /// the [`Utf32Str`] is always valid UTF-32.
1600    ///
1601    /// # Examples
1602    ///
1603    /// ```
1604    /// use widestring::{Utf32Str, u32str};
1605    ///
1606    /// let sparkle_heart = u32str!("💖");
1607    /// let sparkle_heart = unsafe { Utf32Str::from_ustr_unchecked(sparkle_heart) };
1608    ///
1609    /// assert_eq!("💖", sparkle_heart);
1610    /// ```
1611    #[inline]
1612    #[must_use]
1613    pub const unsafe fn from_ustr_unchecked(s: &crate::U32Str) -> &Self {
1614        Self::from_slice_unchecked(s.as_slice())
1615    }
1616
1617    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice
1618    /// without checking if the string slice is valid UTF-32.
1619    ///
1620    /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
1621    ///
1622    /// # Safety
1623    ///
1624    /// This function is unsafe because it does not check that the string slice passed to it is
1625    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1626    /// the [`Utf32Str`] is always valid UTF-32.
1627    #[inline]
1628    #[must_use]
1629    pub unsafe fn from_ustr_unchecked_mut(s: &mut crate::U32Str) -> &mut Self {
1630        Self::from_slice_unchecked_mut(s.as_mut_slice())
1631    }
1632
1633    /// Converts a wide string slice of undefined encoding to a UTF-32 string slice.
1634    ///
1635    /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1636    /// [`U32Str`] does not contain valid UTF-32 data.
1637    ///
1638    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1639    /// the validity check, there is an unsafe version of this function,
1640    /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
1641    /// but skips the check.
1642    ///
1643    /// # Errors
1644    ///
1645    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1646    /// provided string slice is not UTF-32.
1647    ///
1648    /// # Examples
1649    ///
1650    /// ```
1651    /// use widestring::{Utf32Str, u32str};
1652    ///
1653    /// let sparkle_heart = u32str!("💖");
1654    /// let sparkle_heart = Utf32Str::from_ustr(sparkle_heart).unwrap();
1655    ///
1656    /// assert_eq!("💖", sparkle_heart);
1657    /// ```
1658    #[inline]
1659    pub fn from_ustr(s: &crate::U32Str) -> Result<&Self, Utf32Error> {
1660        Self::from_slice(s.as_slice())
1661    }
1662
1663    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice.
1664    ///
1665    /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1666    /// [`U32Str`] does not contain valid UTF-32 data.
1667    ///
1668    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1669    /// the validity check, there is an unsafe version of this function,
1670    /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1671    /// but skips the check.
1672    ///
1673    /// # Errors
1674    ///
1675    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1676    /// provided string slice is not UTF-32.
1677    #[inline]
1678    pub fn from_ustr_mut(s: &mut crate::U32Str) -> Result<&mut Self, Utf32Error> {
1679        Self::from_slice_mut(s.as_mut_slice())
1680    }
1681
1682    /// Converts a wide C string slice to a UTF-32 string slice without checking if the
1683    /// string slice is valid UTF-32.
1684    ///
1685    /// The resulting string slice does *not* contain the nul terminator.
1686    ///
1687    /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1688    ///
1689    /// # Safety
1690    ///
1691    /// This function is unsafe because it does not check that the string slice passed to it is
1692    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1693    /// the [`Utf32Str`] is always valid UTF-32.
1694    ///
1695    /// # Examples
1696    ///
1697    /// ```
1698    /// use widestring::{Utf32Str, u32cstr};
1699    ///
1700    /// let sparkle_heart = u32cstr!("💖");
1701    /// let sparkle_heart = unsafe { Utf32Str::from_ucstr_unchecked(sparkle_heart) };
1702    ///
1703    /// assert_eq!("💖", sparkle_heart);
1704    /// ```
1705    #[inline]
1706    #[must_use]
1707    pub unsafe fn from_ucstr_unchecked(s: &crate::U32CStr) -> &Self {
1708        Self::from_slice_unchecked(s.as_slice())
1709    }
1710
1711    /// Converts a mutable wide C string slice to a mutable UTF-32 string slice without
1712    /// checking if the string slice is valid UTF-32.
1713    ///
1714    /// The resulting string slice does *not* contain the nul terminator.
1715    ///
1716    /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1717    ///
1718    /// # Safety
1719    ///
1720    /// This function is unsafe because it does not check that the string slice passed to it is
1721    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1722    /// the [`Utf32Str`] is always valid UTF-32.
1723    #[inline]
1724    #[must_use]
1725    pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U32CStr) -> &mut Self {
1726        Self::from_slice_unchecked_mut(s.as_mut_slice())
1727    }
1728
1729    /// Converts a wide C string slice to a UTF-32 string slice.
1730    ///
1731    /// The resulting string slice does *not* contain the nul terminator.
1732    ///
1733    /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1734    /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1735    ///
1736    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1737    /// the validity check, there is an unsafe version of this function,
1738    /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1739    /// but skips the check.
1740    ///
1741    /// # Errors
1742    ///
1743    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1744    /// provided string slice is not UTF-32.
1745    ///
1746    /// # Examples
1747    ///
1748    /// ```
1749    /// use widestring::{Utf32Str, u32cstr};
1750    ///
1751    /// let sparkle_heart = u32cstr!("💖");
1752    /// let sparkle_heart = Utf32Str::from_ucstr(sparkle_heart).unwrap();
1753    ///
1754    /// assert_eq!("💖", sparkle_heart);
1755    /// ```
1756    #[inline]
1757    pub fn from_ucstr(s: &crate::U32CStr) -> Result<&Self, Utf32Error> {
1758        Self::from_slice(s.as_slice())
1759    }
1760
1761    /// Converts a mutable wide C string slice to a mutable UTF-32 string slice.
1762    ///
1763    /// The resulting string slice does *not* contain the nul terminator.
1764    ///
1765    /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1766    /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1767    ///
1768    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1769    /// the validity check, there is an unsafe version of this function,
1770    /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1771    /// but skips the check.
1772    ///
1773    /// # Safety
1774    ///
1775    /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1776    /// when mutating the slice (i.e. by adding interior nul values).
1777    ///
1778    /// # Errors
1779    ///
1780    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1781    /// provided string slice is not UTF-32.
1782    #[inline]
1783    pub unsafe fn from_ucstr_mut(s: &mut crate::U32CStr) -> Result<&mut Self, Utf32Error> {
1784        Self::from_slice_mut(s.as_mut_slice())
1785    }
1786
1787    /// Converts a slice of [`char`]s to a string slice.
1788    ///
1789    /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1790    ///
1791    /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1792    ///
1793    /// # Examples
1794    ///
1795    /// ```
1796    /// use widestring::Utf32Str;
1797    ///
1798    /// let sparkle_heart = ['💖'];
1799    /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
1800    ///
1801    /// assert_eq!("💖", sparkle_heart);
1802    /// ```
1803    #[allow(trivial_casts)]
1804    #[inline]
1805    #[must_use]
1806    pub const fn from_char_slice(s: &[char]) -> &Self {
1807        // SAFETY: char slice is always valid UTF-32
1808        unsafe { Self::from_slice_unchecked(&*(s as *const [char] as *const [u32])) }
1809    }
1810
1811    /// Converts a mutable slice of [`char`]s to a string slice.
1812    ///
1813    /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1814    ///
1815    /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1816    ///
1817    /// # Examples
1818    ///
1819    /// ```
1820    /// use widestring::Utf32Str;
1821    ///
1822    /// let mut sparkle_heart = ['💖'];
1823    /// let sparkle_heart = Utf32Str::from_char_slice_mut(&mut sparkle_heart);
1824    ///
1825    /// assert_eq!("💖", sparkle_heart);
1826    /// ```
1827    #[allow(trivial_casts)]
1828    #[inline]
1829    #[must_use]
1830    pub fn from_char_slice_mut(s: &mut [char]) -> &mut Self {
1831        // SAFETY: char slice is always valid UTF-32
1832        unsafe { Self::from_slice_unchecked_mut(&mut *(s as *mut [char] as *mut [u32])) }
1833    }
1834
1835    /// Converts a string slice into a slice of [`char`]s.
1836    #[allow(trivial_casts)]
1837    #[inline]
1838    #[must_use]
1839    pub const fn as_char_slice(&self) -> &[char] {
1840        // SAFETY: Self should be valid UTF-32 so chars will be in range
1841        unsafe { &*(self.as_slice() as *const [u32] as *const [char]) }
1842    }
1843
1844    /// Converts a mutable string slice into a mutable slice of [`char`]s.
1845    #[allow(trivial_casts)]
1846    #[inline]
1847    #[must_use]
1848    pub fn as_char_slice_mut(&mut self) -> &mut [char] {
1849        // SAFETY: Self should be valid UTF-32 so chars will be in range
1850        unsafe { &mut *(self.as_mut_slice() as *mut [u32] as *mut [char]) }
1851    }
1852
1853    /// Converts to a standard UTF-8 [`String`].
1854    ///
1855    /// Because this string is always valid UTF-32, the conversion is lossless and non-fallible.
1856    #[inline]
1857    #[allow(clippy::inherent_to_string_shadow_display)]
1858    #[cfg(feature = "alloc")]
1859    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1860    #[must_use]
1861    pub fn to_string(&self) -> String {
1862        let mut s = String::with_capacity(self.len());
1863        s.extend(self.as_char_slice());
1864        s
1865    }
1866
1867    /// Returns a subslice of this string.
1868    ///
1869    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1870    /// equivalent indexing operation would panic.
1871    ///
1872    /// # Examples
1873    ///
1874    /// ```
1875    /// # use widestring::{utf32str};
1876    /// let v = utf32str!("⚧️🏳️‍⚧️➡️s");
1877    ///
1878    /// assert_eq!(Some(utf32str!("⚧️")), v.get(..2));
1879    /// assert_eq!(Some(utf32str!("🏳️‍⚧️")), v.get(2..7));
1880    /// assert_eq!(Some(utf32str!("➡️")), v.get(7..9));
1881    /// assert_eq!(Some(utf32str!("s")), v.get(9..));
1882    /// ```
1883    #[inline]
1884    #[must_use]
1885    pub fn get<I>(&self, index: I) -> Option<&Self>
1886    where
1887        I: SliceIndex<[u32], Output = [u32]>,
1888    {
1889        // TODO: Use SliceIndex directly when it is stabilized
1890        // SAFETY: subslice has already been verified
1891        self.inner
1892            .get(index)
1893            .map(|s| unsafe { Self::from_slice_unchecked(s) })
1894    }
1895
1896    /// Returns a mutable subslice of this string.
1897    ///
1898    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1899    /// equivalent indexing operation would panic.
1900    ///
1901    /// # Examples
1902    ///
1903    /// ```
1904    /// # use widestring::{utf32str};
1905    /// # #[cfg(feature = "alloc")] {
1906    /// let mut v = utf32str!("⚧️🏳️‍⚧️➡️s").to_owned();
1907    ///
1908    /// assert_eq!(utf32str!("⚧️"), v.get_mut(..2).unwrap());
1909    /// assert_eq!(utf32str!("🏳️‍⚧️"), v.get_mut(2..7).unwrap());
1910    /// assert_eq!(utf32str!("➡️"), v.get_mut(7..9).unwrap());
1911    /// assert_eq!(utf32str!("s"), v.get_mut(9..).unwrap());
1912    /// # }
1913    /// ```
1914    #[inline]
1915    #[must_use]
1916    pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1917    where
1918        I: SliceIndex<[u32], Output = [u32]>,
1919    {
1920        // TODO: Use SliceIndex directly when it is stabilized
1921        // SAFETY: subslice has already been verified
1922        self.inner
1923            .get_mut(index)
1924            .map(|s| unsafe { Self::from_slice_unchecked_mut(s) })
1925    }
1926
1927    /// Divide one string slice into two at an index.
1928    ///
1929    /// The argument, `mid`, should be an offset from the start of the string.
1930    ///
1931    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1932    /// the end of the string slice.
1933    ///
1934    /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1935    ///
1936    /// # Panics
1937    ///
1938    /// Panics if `mid` is past the end of the last code point of the string slice.
1939    ///
1940    /// # Examples
1941    ///
1942    /// ```
1943    /// # use widestring::utf32str;
1944    /// let s = utf32str!("Per Martin-Löf");
1945    ///
1946    /// let (first, last) = s.split_at(3);
1947    ///
1948    /// assert_eq!("Per", first);
1949    /// assert_eq!(" Martin-Löf", last);
1950    /// ```
1951    #[inline]
1952    #[must_use]
1953    pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1954        let (a, b) = self.inner.split_at(mid);
1955        unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1956    }
1957
1958    /// Divide one mutable string slice into two at an index.
1959    ///
1960    /// The argument, `mid`, should be an offset from the start of the string.
1961    ///
1962    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1963    /// the end of the string slice.
1964    ///
1965    /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1966    ///
1967    /// # Panics
1968    ///
1969    /// Panics if `mid` is past the end of the last code point of the string slice.
1970    ///
1971    /// # Examples
1972    ///
1973    /// ```
1974    /// # use widestring::utf32str;
1975    /// # #[cfg(feature = "alloc")] {
1976    /// let mut s = utf32str!("Per Martin-Löf").to_owned();
1977    ///
1978    /// let (first, last) = s.split_at_mut(3);
1979    ///
1980    /// assert_eq!("Per", first);
1981    /// assert_eq!(" Martin-Löf", last);
1982    /// # }
1983    /// ```
1984    #[inline]
1985    #[must_use]
1986    pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1987        let (a, b) = self.inner.split_at_mut(mid);
1988        unsafe {
1989            (
1990                Self::from_slice_unchecked_mut(a),
1991                Self::from_slice_unchecked_mut(b),
1992            )
1993        }
1994    }
1995
1996    /// Returns an iterator over the [`char`]s of a string slice.
1997    ///
1998    /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
1999    /// [`char`]. This method returns such an iterator.
2000    ///
2001    /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
2002    /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
2003    /// actually want. This functionality is not provided by this crate.
2004    #[inline]
2005    #[must_use]
2006    pub fn chars(&self) -> CharsUtf32<'_> {
2007        CharsUtf32::new(self.as_slice())
2008    }
2009
2010    /// Returns an iterator over the [`char`]s of a string slice and their positions.
2011    ///
2012    /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
2013    /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
2014    ///
2015    /// The iterator yields tuples. The position is first, the [`char`] is second.
2016    #[inline]
2017    #[must_use]
2018    pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
2019        CharIndicesUtf32::new(self.as_slice())
2020    }
2021
2022    /// Returns an iterator of bytes over the string encoded as UTF-8.
2023    #[must_use]
2024    pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf32<'_>> {
2025        crate::encode_utf8(self.chars())
2026    }
2027
2028    /// Returns an iterator of [`u16`] over the sting encoded as UTF-16.
2029    #[must_use]
2030    pub fn encode_utf16(&self) -> EncodeUtf16<CharsUtf32<'_>> {
2031        crate::encode_utf16(self.chars())
2032    }
2033
2034    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
2035    #[inline]
2036    #[must_use]
2037    pub fn escape_debug(&self) -> EscapeDebug<CharsUtf32<'_>> {
2038        EscapeDebug::<CharsUtf32>::new(self.as_slice())
2039    }
2040
2041    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
2042    #[inline]
2043    #[must_use]
2044    pub fn escape_default(&self) -> EscapeDefault<CharsUtf32<'_>> {
2045        EscapeDefault::<CharsUtf32>::new(self.as_slice())
2046    }
2047
2048    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
2049    #[inline]
2050    #[must_use]
2051    pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf32<'_>> {
2052        EscapeUnicode::<CharsUtf32>::new(self.as_slice())
2053    }
2054
2055    /// Returns the lowercase equivalent of this string slice, as a new [`Utf32String`].
2056    ///
2057    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
2058    /// `Lowercase`.
2059    ///
2060    /// Since some characters can expand into multiple characters when changing the case, this
2061    /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2062    #[inline]
2063    #[cfg(feature = "alloc")]
2064    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2065    #[must_use]
2066    pub fn to_lowercase(&self) -> Utf32String {
2067        let mut s = Utf32String::with_capacity(self.len());
2068        for c in self.chars() {
2069            for lower in c.to_lowercase() {
2070                s.push(lower);
2071            }
2072        }
2073        s
2074    }
2075
2076    /// Returns the uppercase equivalent of this string slice, as a new [`Utf32String`].
2077    ///
2078    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
2079    /// `Uppercase`.
2080    ///
2081    /// Since some characters can expand into multiple characters when changing the case, this
2082    /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2083    #[inline]
2084    #[cfg(feature = "alloc")]
2085    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2086    #[must_use]
2087    pub fn to_uppercase(&self) -> Utf32String {
2088        let mut s = Utf32String::with_capacity(self.len());
2089        for c in self.chars() {
2090            for lower in c.to_uppercase() {
2091                s.push(lower);
2092            }
2093        }
2094        s
2095    }
2096
2097    /// Returns an iterator over the lines of a [`Utf32Str`], as string slices.
2098    ///
2099    /// Lines are split at line endings that are either newlines (`\n`) or
2100    /// sequences of a carriage return followed by a line feed (`\r\n`).
2101    ///
2102    /// Line terminators are not included in the lines returned by the iterator.
2103    ///
2104    /// Note that any carriage return (`\r`) not immediately followed by a
2105    /// line feed (`\n`) does not split a line. These carriage returns are
2106    /// thereby included in the produced lines.
2107    ///
2108    /// The final line ending is optional. A string that ends with a final line
2109    /// ending will return the same lines as an otherwise identical string
2110    /// without a final line ending.
2111    ///
2112    /// # Examples
2113    ///
2114    /// Basic usage:
2115    ///
2116    /// ```
2117    /// use widestring::utf32str;
2118    ///
2119    /// let text = utf32str!("foo\r\nbar\n\nbaz\r");
2120    /// let mut lines = text.lines();
2121    ///
2122    /// assert_eq!(Some(utf32str!("foo")), lines.next());
2123    /// assert_eq!(Some(utf32str!("bar")), lines.next());
2124    /// assert_eq!(Some(utf32str!("")), lines.next());
2125    /// // Trailing carriage return is included in the last line
2126    /// assert_eq!(Some(utf32str!("baz\r")), lines.next());
2127    ///
2128    /// assert_eq!(None, lines.next());
2129    /// ```
2130    ///
2131    /// The final line does not require any ending:
2132    ///
2133    /// ```
2134    /// use widestring::utf32str;
2135    ///
2136    /// let text = utf32str!("foo\nbar\n\r\nbaz");
2137    /// let mut lines = text.lines();
2138    ///
2139    /// assert_eq!(Some(utf32str!("foo")), lines.next());
2140    /// assert_eq!(Some(utf32str!("bar")), lines.next());
2141    /// assert_eq!(Some(utf32str!("")), lines.next());
2142    /// assert_eq!(Some(utf32str!("baz")), lines.next());
2143    ///
2144    /// assert_eq!(None, lines.next());
2145    /// ```
2146    pub fn lines(&self) -> Lines<'_, Utf32Str, CharIndicesUtf32<'_>> {
2147        Lines::new(self, self.len(), self.char_indices())
2148    }
2149}
2150
2151impl AsMut<[char]> for Utf32Str {
2152    #[inline]
2153    fn as_mut(&mut self) -> &mut [char] {
2154        self.as_char_slice_mut()
2155    }
2156}
2157
2158impl AsRef<[char]> for Utf32Str {
2159    #[inline]
2160    fn as_ref(&self) -> &[char] {
2161        self.as_char_slice()
2162    }
2163}
2164
2165impl<'a> From<&'a [char]> for &'a Utf32Str {
2166    #[inline]
2167    fn from(value: &'a [char]) -> Self {
2168        Utf32Str::from_char_slice(value)
2169    }
2170}
2171
2172impl<'a> From<&'a mut [char]> for &'a mut Utf32Str {
2173    #[inline]
2174    fn from(value: &'a mut [char]) -> Self {
2175        Utf32Str::from_char_slice_mut(value)
2176    }
2177}
2178
2179impl<'a> From<&'a Utf32Str> for &'a [char] {
2180    #[inline]
2181    fn from(value: &'a Utf32Str) -> Self {
2182        value.as_char_slice()
2183    }
2184}
2185
2186impl<'a> From<&'a mut Utf32Str> for &'a mut [char] {
2187    #[inline]
2188    fn from(value: &'a mut Utf32Str) -> Self {
2189        value.as_char_slice_mut()
2190    }
2191}
2192
2193impl<I> Index<I> for Utf16Str
2194where
2195    I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2196{
2197    type Output = Utf16Str;
2198
2199    #[inline]
2200    fn index(&self, index: I) -> &Self::Output {
2201        self.get(index)
2202            .expect("index out of bounds or not on char boundary")
2203    }
2204}
2205
2206impl<I> Index<I> for Utf32Str
2207where
2208    I: SliceIndex<[u32], Output = [u32]>,
2209{
2210    type Output = Utf32Str;
2211
2212    #[inline]
2213    fn index(&self, index: I) -> &Self::Output {
2214        self.get(index).expect("index out of bounds")
2215    }
2216}
2217
2218impl<I> IndexMut<I> for Utf16Str
2219where
2220    I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2221{
2222    #[inline]
2223    fn index_mut(&mut self, index: I) -> &mut Self::Output {
2224        self.get_mut(index)
2225            .expect("index out of bounds or not on char boundary")
2226    }
2227}
2228
2229impl<I> IndexMut<I> for Utf32Str
2230where
2231    I: SliceIndex<[u32], Output = [u32]>,
2232{
2233    #[inline]
2234    fn index_mut(&mut self, index: I) -> &mut Self::Output {
2235        self.get_mut(index).expect("index out of bounds")
2236    }
2237}
2238
2239impl PartialEq<[char]> for Utf32Str {
2240    #[inline]
2241    fn eq(&self, other: &[char]) -> bool {
2242        self.as_char_slice() == other
2243    }
2244}
2245
2246impl PartialEq<Utf32Str> for [char] {
2247    #[inline]
2248    fn eq(&self, other: &Utf32Str) -> bool {
2249        self == other.as_char_slice()
2250    }
2251}
2252
2253impl PartialEq<Utf16Str> for Utf32Str {
2254    #[inline]
2255    fn eq(&self, other: &Utf16Str) -> bool {
2256        self.chars().eq(other.chars())
2257    }
2258}
2259
2260impl PartialEq<Utf32Str> for Utf16Str {
2261    #[inline]
2262    fn eq(&self, other: &Utf32Str) -> bool {
2263        self.chars().eq(other.chars())
2264    }
2265}
2266
2267impl PartialEq<&Utf16Str> for Utf32Str {
2268    #[inline]
2269    fn eq(&self, other: &&Utf16Str) -> bool {
2270        self.chars().eq(other.chars())
2271    }
2272}
2273
2274impl PartialEq<&Utf32Str> for Utf16Str {
2275    #[inline]
2276    fn eq(&self, other: &&Utf32Str) -> bool {
2277        self.chars().eq(other.chars())
2278    }
2279}
2280
2281impl PartialEq<Utf16Str> for &Utf32Str {
2282    #[inline]
2283    fn eq(&self, other: &Utf16Str) -> bool {
2284        self.chars().eq(other.chars())
2285    }
2286}
2287
2288impl PartialEq<Utf32Str> for &Utf16Str {
2289    #[inline]
2290    fn eq(&self, other: &Utf32Str) -> bool {
2291        self.chars().eq(other.chars())
2292    }
2293}
2294
2295impl<'a> TryFrom<&'a [u16]> for &'a Utf16Str {
2296    type Error = Utf16Error;
2297
2298    #[inline]
2299    fn try_from(value: &'a [u16]) -> Result<Self, Self::Error> {
2300        Utf16Str::from_slice(value)
2301    }
2302}
2303
2304impl<'a> TryFrom<&'a mut [u16]> for &'a mut Utf16Str {
2305    type Error = Utf16Error;
2306
2307    #[inline]
2308    fn try_from(value: &'a mut [u16]) -> Result<Self, Self::Error> {
2309        Utf16Str::from_slice_mut(value)
2310    }
2311}
2312
2313impl<'a> TryFrom<&'a [u32]> for &'a Utf32Str {
2314    type Error = Utf32Error;
2315
2316    #[inline]
2317    fn try_from(value: &'a [u32]) -> Result<Self, Self::Error> {
2318        Utf32Str::from_slice(value)
2319    }
2320}
2321
2322impl<'a> TryFrom<&'a mut [u32]> for &'a mut Utf32Str {
2323    type Error = Utf32Error;
2324
2325    #[inline]
2326    fn try_from(value: &'a mut [u32]) -> Result<Self, Self::Error> {
2327        Utf32Str::from_slice_mut(value)
2328    }
2329}
2330
2331/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2332/// `wchar_t` size on platform.
2333#[cfg(not(windows))]
2334pub type WideUtfStr = Utf32Str;
2335
2336/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2337/// `wchar_t` size on platform.
2338#[cfg(windows)]
2339pub type WideUtfStr = Utf16Str;
2340
2341#[cfg(test)]
2342mod test {
2343    use crate::*;
2344
2345    #[test]
2346    fn utf16_trim() {
2347        let s = utf16str!(" Hello\tworld\t");
2348        assert_eq!(utf16str!("Hello\tworld\t"), s.trim_start());
2349
2350        let s = utf16str!("  English  ");
2351        assert!(Some('E') == s.trim_start().chars().next());
2352
2353        let s = utf16str!("  עברית  ");
2354        assert!(Some('ע') == s.trim_start().chars().next());
2355
2356        let s = utf16str!("      💕Heart ");
2357        assert_eq!(utf16str!("💕Heart "), s.trim_start());
2358
2359        let s = utf16str!(" Heart💕      ");
2360        assert_eq!(utf16str!(" Heart💕"), s.trim_end());
2361    }
2362
2363    #[test]
2364    fn utf32_trim() {
2365        let s = utf32str!(" Hello\tworld\t");
2366        assert_eq!(utf32str!("Hello\tworld\t"), s.trim_start());
2367
2368        let s = utf32str!("  English  ");
2369        assert!(Some('E') == s.trim_start().chars().next());
2370
2371        let s = utf32str!("  עברית  ");
2372        assert!(Some('ע') == s.trim_start().chars().next());
2373
2374        let s = utf32str!("      💕Heart ");
2375        assert_eq!(utf32str!("💕Heart "), s.trim_start());
2376
2377        let s = utf32str!(" Heart💕      ");
2378        assert_eq!(utf32str!(" Heart💕"), s.trim_end());
2379    }
2380}