widestring/
utfstr.rs

Help
1//! UTF string slices.
2//!
3//! This module contains UTF string slices and related types.
4
5use crate::{
6    error::{Utf16Error, Utf32Error},
7    is_utf16_low_surrogate,
8    iter::{EncodeUtf16, EncodeUtf32, EncodeUtf8},
9    validate_utf16, validate_utf32, U16Str, U32Str,
10};
11#[cfg(feature = "alloc")]
12use crate::{Utf16String, Utf32String};
13#[cfg(feature = "alloc")]
14#[allow(unused_imports)]
15use alloc::{borrow::Cow, boxed::Box, string::String};
16#[allow(unused_imports)]
17use core::{
18    convert::{AsMut, AsRef, TryFrom},
19    fmt::Write,
20    ops::{Index, IndexMut, RangeBounds},
21    slice::SliceIndex,
22};
23
24mod iter;
25
26pub use iter::*;
27
28macro_rules! utfstr_common_impl {
29    {
30        $(#[$utfstr_meta:meta])*
31        struct $utfstr:ident([$uchar:ty]);
32        type UtfString = $utfstring:ident;
33        type UStr = $ustr:ident;
34        type UCStr = $ucstr:ident;
35        type UtfError = $utferror:ident;
36        $(#[$from_slice_unchecked_meta:meta])*
37        fn from_slice_unchecked() -> {}
38        $(#[$from_slice_unchecked_mut_meta:meta])*
39        fn from_slice_unchecked_mut() -> {}
40        $(#[$from_boxed_slice_unchecked_meta:meta])*
41        fn from_boxed_slice_unchecked() -> {}
42        $(#[$get_unchecked_meta:meta])*
43        fn get_unchecked() -> {}
44        $(#[$get_unchecked_mut_meta:meta])*
45        fn get_unchecked_mut() -> {}
46        $(#[$len_meta:meta])*
47        fn len() -> {}
48    } => {
49        $(#[$utfstr_meta])*
50        #[allow(clippy::derive_hash_xor_eq)]
51        #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
52        pub struct $utfstr {
53            pub(crate) inner: [$uchar],
54        }
55
56        impl $utfstr {
57            $(#[$from_slice_unchecked_meta])*
58            #[allow(trivial_casts)]
59            #[inline]
60            #[must_use]
61            pub const unsafe fn from_slice_unchecked(s: &[$uchar]) -> &Self {
62                &*(s as *const [$uchar] as *const Self)
63            }
64
65            $(#[$from_slice_unchecked_mut_meta])*
66            #[allow(trivial_casts)]
67            #[inline]
68            #[must_use]
69            pub unsafe fn from_slice_unchecked_mut(s: &mut [$uchar]) -> &mut Self {
70                &mut *(s as *mut [$uchar] as *mut Self)
71            }
72
73            $(#[$from_boxed_slice_unchecked_meta])*
74            #[inline]
75            #[cfg(feature = "alloc")]
76            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
77            #[must_use]
78            pub unsafe fn from_boxed_slice_unchecked(s: Box<[$uchar]>) -> Box<Self> {
79                Box::from_raw(Box::into_raw(s) as *mut Self)
80            }
81
82            $(#[$get_unchecked_meta])*
83            #[inline]
84            #[must_use]
85            pub unsafe fn get_unchecked<I>(&self, index: I) -> &Self
86            where
87                I: SliceIndex<[$uchar], Output = [$uchar]>,
88            {
89                Self::from_slice_unchecked(self.inner.get_unchecked(index))
90            }
91
92            $(#[$get_unchecked_mut_meta])*
93            #[inline]
94            #[must_use]
95            pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut Self
96            where
97                I: SliceIndex<[$uchar], Output = [$uchar]>,
98            {
99                Self::from_slice_unchecked_mut(self.inner.get_unchecked_mut(index))
100            }
101
102            $(#[$len_meta])*
103            #[inline]
104            #[must_use]
105            pub const fn len(&self) -> usize {
106                self.inner.len()
107            }
108
109            /// Returns `true` if the string has a length of zero.
110            #[inline]
111            #[must_use]
112            pub const fn is_empty(&self) -> bool {
113                self.inner.is_empty()
114            }
115
116            /// Converts a string to a slice of its underlying elements.
117            ///
118            /// To convert the slice back into a string slice, use the
119            /// [`from_slice`][Self::from_slice] function.
120            #[inline]
121            #[must_use]
122            pub const fn as_slice(&self) -> &[$uchar] {
123                &self.inner
124            }
125
126            /// Converts a mutable string to a mutable slice of its underlying elements.
127            ///
128            /// # Safety
129            ///
130            /// This function is unsafe because you can violate the invariants of this type when
131            /// mutating the slice. The caller must ensure that the contents of the slice is valid
132            /// UTF before the borrow ends and the underlying string is used.
133            ///
134            /// Use of this string type whose contents have been mutated to invalid UTF is
135            /// undefined behavior.
136            #[inline]
137            #[must_use]
138            pub unsafe fn as_mut_slice(&mut self) -> &mut [$uchar] {
139                &mut self.inner
140            }
141
142            /// Converts a string slice to a raw pointer.
143            ///
144            /// This pointer will be pointing to the first element of the string slice.
145            ///
146            /// The caller must ensure that the returned pointer is never written to. If you need to
147            /// mutate the contents of the string slice, use [`as_mut_ptr`][Self::as_mut_ptr].
148            #[inline]
149            #[must_use]
150            pub const fn as_ptr(&self) -> *const $uchar {
151                self.inner.as_ptr()
152            }
153
154            /// Converts a mutable string slice to a mutable pointer.
155            ///
156            /// This pointer will be pointing to the first element of the string slice.
157            #[inline]
158            #[must_use]
159            pub fn as_mut_ptr(&mut self) -> *mut $uchar {
160                self.inner.as_mut_ptr()
161            }
162
163            /// Returns this string as a wide string slice of undefined encoding.
164            #[inline]
165            #[must_use]
166            pub const fn as_ustr(&self) -> &$ustr {
167                $ustr::from_slice(self.as_slice())
168            }
169
170            /// Returns a string slice with leading and trailing whitespace removed.
171            ///
172            /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
173            /// `White_Space`.
174            #[must_use]
175            pub fn trim(&self) -> &Self {
176                self.trim_start().trim_end()
177            }
178
179            /// Returns a string slice with leading whitespace removed.
180            ///
181            /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
182            /// `White_Space`.
183            ///
184            /// # Text directionality
185            ///
186            /// A string is a sequence of elements. `start` in this context means the first position
187            /// of that sequence; for a left-to-right language like English or Russian, this will be
188            /// left side, and for right-to-left languages like Arabic or Hebrew, this will be the
189            /// right side.
190            #[must_use]
191            pub fn trim_start(&self) -> &Self {
192                if let Some((index, _)) = self.char_indices().find(|(_, c)| !c.is_whitespace()) {
193                    &self[index..]
194                } else {
195                    <&Self as Default>::default()
196                }
197            }
198
199            /// Returns a string slice with trailing whitespace removed.
200            ///
201            /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
202            /// `White_Space`.
203            ///
204            /// # Text directionality
205            ///
206            /// A string is a sequence of elements. `end` in this context means the last position of
207            /// that sequence; for a left-to-right language like English or Russian, this will be
208            /// right side, and for right-to-left languages like Arabic or Hebrew, this will be the
209            /// left side.
210            #[must_use]
211            pub fn trim_end(&self) -> &Self {
212                if let Some((index, _)) = self.char_indices().rfind(|(_, c)| !c.is_whitespace()) {
213                    &self[..=index]
214                } else {
215                    <&Self as Default>::default()
216                }
217            }
218
219            /// Converts a boxed string into a boxed slice without copying or allocating.
220            #[inline]
221            #[cfg(feature = "alloc")]
222            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
223            #[must_use]
224            pub fn into_boxed_slice(self: Box<Self>) -> Box<[$uchar]> {
225                // SAFETY: from_raw pointer is from into_raw
226                unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) }
227            }
228
229            /// Converts a boxed string slice into an owned UTF string without copying or
230            /// allocating.
231            #[inline]
232            #[cfg(feature = "alloc")]
233            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
234            #[must_use]
235            pub fn into_utfstring(self: Box<Self>) -> $utfstring {
236                unsafe { $utfstring::from_vec_unchecked(self.into_boxed_slice().into_vec()) }
237            }
238
239            /// Creates a new owned string by repeating this string `n` times.
240            ///
241            /// # Panics
242            ///
243            /// This function will panic if the capacity would overflow.
244            #[inline]
245            #[cfg(feature = "alloc")]
246            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
247            #[must_use]
248            pub fn repeat(&self, n: usize) -> $utfstring {
249                unsafe { $utfstring::from_vec_unchecked(self.as_slice().repeat(n)) }
250            }
251        }
252
253        impl AsMut<$utfstr> for $utfstr {
254            #[inline]
255            fn as_mut(&mut self) -> &mut $utfstr {
256                self
257            }
258        }
259
260        impl AsRef<$utfstr> for $utfstr {
261            #[inline]
262            fn as_ref(&self) -> &$utfstr {
263                self
264            }
265        }
266
267        impl AsRef<[$uchar]> for $utfstr {
268            #[inline]
269            fn as_ref(&self) -> &[$uchar] {
270                self.as_slice()
271            }
272        }
273
274        impl AsRef<$ustr> for $utfstr {
275            #[inline]
276            fn as_ref(&self) -> &$ustr {
277                self.as_ustr()
278            }
279        }
280
281        impl core::fmt::Debug for $utfstr {
282            #[inline]
283            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
284                f.write_char('"')?;
285                self.escape_debug().try_for_each(|c| f.write_char(c))?;
286                f.write_char('"')
287            }
288        }
289
290        impl Default for &$utfstr {
291            #[inline]
292            fn default() -> Self {
293                // SAFETY: Empty slice is always valid
294                unsafe { $utfstr::from_slice_unchecked(&[]) }
295            }
296        }
297
298        impl Default for &mut $utfstr {
299            #[inline]
300            fn default() -> Self {
301                // SAFETY: Empty slice is valways valid
302                unsafe { $utfstr::from_slice_unchecked_mut(&mut []) }
303            }
304        }
305
306        impl core::fmt::Display for $utfstr {
307            #[inline]
308            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
309                self.chars().try_for_each(|c| f.write_char(c))
310            }
311        }
312
313        #[cfg(feature = "alloc")]
314        impl From<Box<$utfstr>> for Box<[$uchar]> {
315            #[inline]
316            fn from(value: Box<$utfstr>) -> Self {
317                value.into_boxed_slice()
318            }
319        }
320
321        impl<'a> From<&'a $utfstr> for &'a $ustr {
322            #[inline]
323            fn from(value: &'a $utfstr) -> Self {
324                value.as_ustr()
325            }
326        }
327
328        impl<'a> From<&'a $utfstr> for &'a [$uchar] {
329            #[inline]
330            fn from(value: &'a $utfstr) -> Self {
331                value.as_slice()
332            }
333        }
334
335        #[cfg(feature = "std")]
336        impl From<&$utfstr> for std::ffi::OsString {
337            #[inline]
338            fn from(value: &$utfstr) -> std::ffi::OsString {
339                value.as_ustr().to_os_string()
340            }
341        }
342
343        impl PartialEq<$utfstr> for &$utfstr {
344            #[inline]
345            fn eq(&self, other: &$utfstr) -> bool {
346                self.as_slice() == other.as_slice()
347            }
348        }
349
350        #[cfg(feature = "alloc")]
351        impl<'a, 'b> PartialEq<Cow<'a, $utfstr>> for &'b $utfstr {
352            #[inline]
353            fn eq(&self, other: &Cow<'a, $utfstr>) -> bool {
354                self == other.as_ref()
355            }
356        }
357
358        #[cfg(feature = "alloc")]
359        impl PartialEq<$utfstr> for Cow<'_, $utfstr> {
360            #[inline]
361            fn eq(&self, other: &$utfstr) -> bool {
362                self.as_ref() == other
363            }
364        }
365
366        #[cfg(feature = "alloc")]
367        impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, $utfstr> {
368            #[inline]
369            fn eq(&self, other: &&'a $utfstr) -> bool {
370                self.as_ref() == *other
371            }
372        }
373
374        impl PartialEq<$ustr> for $utfstr {
375            #[inline]
376            fn eq(&self, other: &$ustr) -> bool {
377                self.as_slice() == other.as_slice()
378            }
379        }
380
381        impl PartialEq<$utfstr> for $ustr {
382            #[inline]
383            fn eq(&self, other: &$utfstr) -> bool {
384                self.as_slice() == other.as_slice()
385            }
386        }
387
388        impl PartialEq<crate::$ucstr> for $utfstr {
389            #[inline]
390            fn eq(&self, other: &crate::$ucstr) -> bool {
391                self.as_slice() == other.as_slice()
392            }
393        }
394
395        impl PartialEq<$utfstr> for crate::$ucstr {
396            #[inline]
397            fn eq(&self, other: &$utfstr) -> bool {
398                self.as_slice() == other.as_slice()
399            }
400        }
401
402        impl PartialEq<str> for $utfstr {
403            #[inline]
404            fn eq(&self, other: &str) -> bool {
405                self.chars().eq(other.chars())
406            }
407        }
408
409        impl PartialEq<&str> for $utfstr {
410            #[inline]
411            fn eq(&self, other: &&str) -> bool {
412                self.chars().eq(other.chars())
413            }
414        }
415
416        impl PartialEq<str> for &$utfstr {
417            #[inline]
418            fn eq(&self, other: &str) -> bool {
419                self.chars().eq(other.chars())
420            }
421        }
422
423        impl PartialEq<$utfstr> for str {
424            #[inline]
425            fn eq(&self, other: &$utfstr) -> bool {
426                self.chars().eq(other.chars())
427            }
428        }
429
430        impl PartialEq<$utfstr> for &str {
431            #[inline]
432            fn eq(&self, other: &$utfstr) -> bool {
433                self.chars().eq(other.chars())
434            }
435        }
436
437        #[cfg(feature = "alloc")]
438        impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b $utfstr {
439            #[inline]
440            fn eq(&self, other: &Cow<'a, str>) -> bool {
441                self == other.as_ref()
442            }
443        }
444
445        #[cfg(feature = "alloc")]
446        impl PartialEq<$utfstr> for Cow<'_, str> {
447            #[inline]
448            fn eq(&self, other: &$utfstr) -> bool {
449                self.as_ref() == other
450            }
451        }
452
453        #[cfg(feature = "alloc")]
454        impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, str> {
455            #[inline]
456            fn eq(&self, other: &&'a $utfstr) -> bool {
457                self.as_ref() == *other
458            }
459        }
460
461        impl<'a> TryFrom<&'a $ustr> for &'a $utfstr {
462            type Error = $utferror;
463
464            #[inline]
465            fn try_from(value: &'a $ustr) -> Result<Self, Self::Error> {
466                $utfstr::from_ustr(value)
467            }
468        }
469
470        impl<'a> TryFrom<&'a crate::$ucstr> for &'a $utfstr {
471            type Error = $utferror;
472
473            #[inline]
474            fn try_from(value: &'a crate::$ucstr) -> Result<Self, Self::Error> {
475                $utfstr::from_ucstr(value)
476            }
477        }
478    };
479}
480
481utfstr_common_impl! {
482    /// UTF-16 string slice for [`Utf16String`][crate::Utf16String].
483    ///
484    /// [`Utf16Str`] is to [`Utf16String`][crate::Utf16String] as [`str`] is to [`String`].
485    ///
486    /// [`Utf16Str`] slices are string slices that are always valid UTF-16 encoding. This is unlike
487    /// the [`U16Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
488    /// [`Utf16Str`] string slices most resemble native [`str`] slices of all the types in this
489    /// crate.
490    ///
491    /// # Examples
492    ///
493    /// The easiest way to use [`Utf16Str`] is with the [`utf16str!`][crate::utf16str] macro to
494    /// convert string literals into string slices at compile time:
495    ///
496    /// ```
497    /// use widestring::utf16str;
498    /// let hello = utf16str!("Hello, world!");
499    /// ```
500    ///
501    /// You can also convert a [`u16`] slice directly, provided it is valid UTF-16:
502    ///
503    /// ```
504    /// use widestring::Utf16Str;
505    ///
506    /// let sparkle_heart = [0xd83d, 0xdc96];
507    /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
508    ///
509    /// assert_eq!("💖", sparkle_heart);
510    /// ```
511    struct Utf16Str([u16]);
512
513    type UtfString = Utf16String;
514    type UStr = U16Str;
515    type UCStr = U16CStr;
516    type UtfError = Utf16Error;
517
518    /// Converts a slice to a string slice without checking that the string contains valid UTF-16.
519    ///
520    /// See the safe version, [`from_slice`][Self::from_slice], for more information.
521    ///
522    /// # Safety
523    ///
524    /// This function is unsafe because it does not check that the slice passed to it is valid
525    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
526    /// [`Utf16Str`] is always valid UTF-16.
527    ///
528    /// # Examples
529    ///
530    /// ```
531    /// use widestring::Utf16Str;
532    ///
533    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
534    /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked(&sparkle_heart) };
535    ///
536    /// assert_eq!("💖", sparkle_heart);
537    /// ```
538    fn from_slice_unchecked() -> {}
539
540    /// Converts a mutable slice to a mutable string slice without checking that the string contains
541    /// valid UTF-16.
542    ///
543    /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
544    ///
545    /// # Safety
546    ///
547    /// This function is unsafe because it does not check that the slice passed to it is valid
548    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
549    /// [`Utf16Str`] is always valid UTF-16.
550    ///
551    /// # Examples
552    ///
553    /// ```
554    /// use widestring::Utf16Str;
555    ///
556    /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
557    /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked_mut(&mut sparkle_heart) };
558    ///
559    /// assert_eq!("💖", sparkle_heart);
560    /// ```
561    fn from_slice_unchecked_mut() -> {}
562
563    /// Converts a boxed slice to a boxed string slice without checking that the string contains
564    /// valid UTF-16.
565    ///
566    /// # Safety
567    ///
568    /// This function is unsafe because it does not check if the string slice is valid UTF-16, and
569    /// [`Utf16Str`] must always be valid UTF-16.
570    fn from_boxed_slice_unchecked() -> {}
571
572    /// Returns an unchecked subslice of this string slice.
573    ///
574    /// This is the unchecked alternative to indexing the string slice.
575    ///
576    /// # Safety
577    ///
578    /// Callers of this function are responsible that these preconditions are satisfied:
579    ///
580    /// - The starting index must not exceed the ending index;
581    /// - Indexes must be within bounds of the original slice;
582    /// - Indexes must lie on UTF-16 sequence boundaries.
583    ///
584    /// Failing that, the returned string slice may reference invalid memory or violate the
585    /// invariants communicated by the type.
586    ///
587    /// # Examples
588    ///
589    /// ```
590    /// # use widestring::{utf16str};
591    /// let v = utf16str!("⚧️🏳️‍⚧️➡️s");
592    /// unsafe {
593    ///     assert_eq!(utf16str!("⚧️"), v.get_unchecked(..2));
594    ///     assert_eq!(utf16str!("🏳️‍⚧️"), v.get_unchecked(2..8));
595    ///     assert_eq!(utf16str!("➡️"), v.get_unchecked(8..10));
596    ///     assert_eq!(utf16str!("s"), v.get_unchecked(10..));
597    /// }
598    /// ```
599    fn get_unchecked() -> {}
600
601    /// Returns a mutable, unchecked subslice of this string slice
602    ///
603    /// This is the unchecked alternative to indexing the string slice.
604    ///
605    /// # Safety
606    ///
607    /// Callers of this function are responsible that these preconditions are satisfied:
608    ///
609    /// - The starting index must not exceed the ending index;
610    /// - Indexes must be within bounds of the original slice;
611    /// - Indexes must lie on UTF-16 sequence boundaries.
612    ///
613    /// Failing that, the returned string slice may reference invalid memory or violate the
614    /// invariants communicated by the type.
615    ///
616    /// # Examples
617    ///
618    /// ```
619    /// # use widestring::{utf16str};
620    /// # #[cfg(feature = "alloc")] {
621    /// let mut v = utf16str!("⚧️🏳️‍⚧️➡️s").to_owned();
622    /// unsafe {
623    ///     assert_eq!(utf16str!("⚧️"), v.get_unchecked_mut(..2));
624    ///     assert_eq!(utf16str!("🏳️‍⚧️"), v.get_unchecked_mut(2..8));
625    ///     assert_eq!(utf16str!("➡️"), v.get_unchecked_mut(8..10));
626    ///     assert_eq!(utf16str!("s"), v.get_unchecked_mut(10..));
627    /// }
628    /// # }
629    /// ```
630    fn get_unchecked_mut() -> {}
631
632    /// Returns the length of `self`.
633    ///
634    /// This length is in `u16` values, not [`char`]s or graphemes. In other words, it may not be
635    /// what human considers the length of the string.
636    ///
637    /// # Examples
638    ///
639    /// ```
640    /// # use widestring::utf16str;
641    /// assert_eq!(utf16str!("foo").len(), 3);
642    ///
643    /// let complex = utf16str!("⚧️🏳️‍⚧️➡️s");
644    /// assert_eq!(complex.len(), 11);
645    /// assert_eq!(complex.chars().count(), 10);
646    /// ```
647    fn len() -> {}
648}
649
650utfstr_common_impl! {
651    /// UTF-32 string slice for [`Utf32String`][crate::Utf32String].
652    ///
653    /// [`Utf32Str`] is to [`Utf32String`][crate::Utf32String] as [`str`] is to [`String`].
654    ///
655    /// [`Utf32Str`] slices are string slices that are always valid UTF-32 encoding. This is unlike
656    /// the [`U32Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
657    /// [`Utf32Str`] string slices most resemble native [`str`] slices of all the types in this
658    /// crate.
659    ///
660    /// # Examples
661    ///
662    /// The easiest way to use [`Utf32Str`] is with the [`utf32str!`][crate::utf32str] macro to
663    /// convert string literals into string slices at compile time:
664    ///
665    /// ```
666    /// use widestring::utf32str;
667    /// let hello = utf32str!("Hello, world!");
668    /// ```
669    ///
670    /// You can also convert a [`u32`] slice directly, provided it is valid UTF-32:
671    ///
672    /// ```
673    /// use widestring::Utf32Str;
674    ///
675    /// let sparkle_heart = [0x1f496];
676    /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
677    ///
678    /// assert_eq!("💖", sparkle_heart);
679    /// ```
680    ///
681    /// Since [`char`] slices are valid UTF-32, a slice of [`char`]s can be easily converted to a
682    /// string slice:
683    ///
684    /// ```
685    /// use widestring::Utf32Str;
686    ///
687    /// let sparkle_heart = ['💖'; 3];
688    /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
689    ///
690    /// assert_eq!("💖💖💖", sparkle_heart);
691    /// ```
692    struct Utf32Str([u32]);
693
694    type UtfString = Utf32String;
695    type UStr = U32Str;
696    type UCStr = U32CStr;
697    type UtfError = Utf32Error;
698
699    /// Converts a slice to a string slice without checking that the string contains valid UTF-32.
700    ///
701    /// See the safe version, [`from_slice`][Self::from_slice], for more information.
702    ///
703    /// # Safety
704    ///
705    /// This function is unsafe because it does not check that the slice passed to it is valid
706    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
707    /// [`Utf32Str`] is always valid UTF-32.
708    ///
709    /// # Examples
710    ///
711    /// ```
712    /// use widestring::Utf32Str;
713    ///
714    /// let sparkle_heart = vec![0x1f496];
715    /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked(&sparkle_heart) };
716    ///
717    /// assert_eq!("💖", sparkle_heart);
718    /// ```
719    fn from_slice_unchecked() -> {}
720
721    /// Converts a mutable slice to a mutable string slice without checking that the string contains
722    /// valid UTF-32.
723    ///
724    /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
725    ///
726    /// # Safety
727    ///
728    /// This function is unsafe because it does not check that the slice passed to it is valid
729    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
730    /// [`Utf32Str`] is always valid UTF-32.
731    ///
732    /// # Examples
733    ///
734    /// ```
735    /// use widestring::Utf32Str;
736    ///
737    /// let mut sparkle_heart = vec![0x1f496];
738    /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked_mut(&mut sparkle_heart) };
739    ///
740    /// assert_eq!("💖", sparkle_heart);
741    /// ```
742    fn from_slice_unchecked_mut() -> {}
743
744    /// Converts a boxed slice to a boxed string slice without checking that the string contains
745    /// valid UTF-32.
746    ///
747    /// # Safety
748    ///
749    /// This function is unsafe because it does not check if the string slice is valid UTF-32, and
750    /// [`Utf32Str`] must always be valid UTF-32.
751    fn from_boxed_slice_unchecked() -> {}
752
753    /// Returns an unchecked subslice of this string slice.
754    ///
755    /// This is the unchecked alternative to indexing the string slice.
756    ///
757    /// # Safety
758    ///
759    /// Callers of this function are responsible that these preconditions are satisfied:
760    ///
761    /// - The starting index must not exceed the ending index;
762    /// - Indexes must be within bounds of the original slice;
763    ///
764    /// Failing that, the returned string slice may reference invalid memory or violate the
765    /// invariants communicated by the type.
766    ///
767    /// # Examples
768    ///
769    /// ```
770    /// # use widestring::utf32str;
771    /// let v = utf32str!("⚧️🏳️‍⚧️➡️s");
772    /// unsafe {
773    ///     assert_eq!(utf32str!("⚧️"), v.get_unchecked(..2));
774    ///     assert_eq!(utf32str!("🏳️‍⚧️"), v.get_unchecked(2..7));
775    ///     assert_eq!(utf32str!("➡️"), v.get_unchecked(7..9));
776    ///     assert_eq!(utf32str!("s"), v.get_unchecked(9..))
777    /// }
778    /// ```
779    fn get_unchecked() -> {}
780
781    /// Returns a mutable, unchecked subslice of this string slice
782    ///
783    /// This is the unchecked alternative to indexing the string slice.
784    ///
785    /// # Safety
786    ///
787    /// Callers of this function are responsible that these preconditions are satisfied:
788    ///
789    /// - The starting index must not exceed the ending index;
790    /// - Indexes must be within bounds of the original slice;
791    ///
792    /// Failing that, the returned string slice may reference invalid memory or violate the
793    /// invariants communicated by the type.
794    ///
795    /// # Examples
796    ///
797    /// ```
798    /// # use widestring::utf32str;
799    /// # #[cfg(feature = "alloc")] {
800    /// let mut v = utf32str!("⚧️🏳️‍⚧️➡️s").to_owned();
801    /// unsafe {
802    ///     assert_eq!(utf32str!("⚧️"), v.get_unchecked_mut(..2));
803    ///     assert_eq!(utf32str!("🏳️‍⚧️"), v.get_unchecked_mut(2..7));
804    ///     assert_eq!(utf32str!("➡️"), v.get_unchecked_mut(7..9));
805    ///     assert_eq!(utf32str!("s"), v.get_unchecked_mut(9..))
806    /// }
807    /// # }
808    /// ```
809    fn get_unchecked_mut() -> {}
810
811    /// Returns the length of `self`.
812    ///
813    /// This length is in the number of [`char`]s in the slice, not graphemes. In other words, it
814    /// may not be what human considers the length of the string.
815    ///
816    /// # Examples
817    ///
818    /// ```
819    /// # use widestring::utf32str;
820    /// assert_eq!(utf32str!("foo").len(), 3);
821    ///
822    /// let complex = utf32str!("⚧️🏳️‍⚧️➡️s");
823    /// assert_eq!(complex.len(), 10);
824    /// assert_eq!(complex.chars().count(), 10);
825    /// ```
826    fn len() -> {}
827}
828
829impl Utf16Str {
830    /// Converts a slice of UTF-16 data to a string slice.
831    ///
832    /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
833    /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
834    /// then does the conversion.
835    ///
836    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
837    /// the validity check, there is an unsafe version of this function,
838    /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
839    /// the check.
840    ///
841    /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
842    ///
843    /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
844    /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
845    /// after converting from UTF-8 to UTF-16.
846    ///
847    /// # Errors
848    ///
849    /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
850    /// is not UTF-16.
851    ///
852    /// # Examples
853    ///
854    /// ```
855    /// use widestring::Utf16Str;
856    ///
857    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
858    /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
859    ///
860    /// assert_eq!("💖", sparkle_heart);
861    /// ```
862    ///
863    /// With incorrect values that return an error:
864    ///
865    /// ```
866    /// use widestring::Utf16Str;
867    ///
868    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
869    ///
870    /// assert!(Utf16Str::from_slice(&sparkle_heart).is_err());
871    /// ```
872    pub fn from_slice(s: &[u16]) -> Result<&Self, Utf16Error> {
873        validate_utf16(s)?;
874        // SAFETY: Just validated
875        Ok(unsafe { Self::from_slice_unchecked(s) })
876    }
877
878    /// Converts a mutable slice of UTF-16 data to a mutable string slice.
879    ///
880    /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
881    /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
882    /// then does the conversion.
883    ///
884    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
885    /// the validity check, there is an unsafe version of this function,
886    /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
887    /// but skips the check.
888    ///
889    /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
890    ///
891    /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
892    /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
893    /// after converting from UTF-8 to UTF-16.
894    ///
895    /// # Errors
896    ///
897    /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
898    /// is not UTF-16.
899    ///
900    /// # Examples
901    ///
902    /// ```
903    /// use widestring::Utf16Str;
904    ///
905    /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
906    /// let sparkle_heart = Utf16Str::from_slice_mut(&mut sparkle_heart).unwrap();
907    ///
908    /// assert_eq!("💖", sparkle_heart);
909    /// ```
910    ///
911    /// With incorrect values that return an error:
912    ///
913    /// ```
914    /// use widestring::Utf16Str;
915    ///
916    /// let mut sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
917    ///
918    /// assert!(Utf16Str::from_slice_mut(&mut sparkle_heart).is_err());
919    /// ```
920    pub fn from_slice_mut(s: &mut [u16]) -> Result<&mut Self, Utf16Error> {
921        validate_utf16(s)?;
922        // SAFETY: Just validated
923        Ok(unsafe { Self::from_slice_unchecked_mut(s) })
924    }
925
926    /// Converts a wide string slice of undefined encoding to a UTF-16 string slice without checking
927    /// if the string slice is valid UTF-16.
928    ///
929    /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
930    ///
931    /// # Safety
932    ///
933    /// This function is unsafe because it does not check that the string slice passed to it is
934    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
935    /// the [`Utf16Str`] is always valid UTF-16.
936    ///
937    /// # Examples
938    ///
939    /// ```
940    /// use widestring::{Utf16Str, u16str};
941    ///
942    /// let sparkle_heart = u16str!("💖");
943    /// let sparkle_heart = unsafe { Utf16Str::from_ustr_unchecked(sparkle_heart) };
944    ///
945    /// assert_eq!("💖", sparkle_heart);
946    /// ```
947    #[must_use]
948    pub const unsafe fn from_ustr_unchecked(s: &U16Str) -> &Self {
949        Self::from_slice_unchecked(s.as_slice())
950    }
951
952    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice
953    /// without checking if the string slice is valid UTF-16.
954    ///
955    /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
956    ///
957    /// # Safety
958    ///
959    /// This function is unsafe because it does not check that the string slice passed to it is
960    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
961    /// the [`Utf16Str`] is always valid UTF-16.
962    #[must_use]
963    pub unsafe fn from_ustr_unchecked_mut(s: &mut U16Str) -> &mut Self {
964        Self::from_slice_unchecked_mut(s.as_mut_slice())
965    }
966
967    /// Converts a wide string slice of undefined encoding to a UTF-16 string slice.
968    ///
969    /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
970    /// [`U16Str`] does not contain valid UTF-16 data.
971    ///
972    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
973    /// the validity check, there is an unsafe version of this function,
974    /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
975    /// but skips the check.
976    ///
977    /// # Errors
978    ///
979    /// Returns an error if the string slice is not UTF-16 with a description as to why the
980    /// provided string slice is not UTF-16.
981    ///
982    /// # Examples
983    ///
984    /// ```
985    /// use widestring::{Utf16Str, u16str};
986    ///
987    /// let sparkle_heart = u16str!("💖");
988    /// let sparkle_heart = Utf16Str::from_ustr(sparkle_heart).unwrap();
989    ///
990    /// assert_eq!("💖", sparkle_heart);
991    /// ```
992    #[inline]
993    pub fn from_ustr(s: &U16Str) -> Result<&Self, Utf16Error> {
994        Self::from_slice(s.as_slice())
995    }
996
997    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice.
998    ///
999    /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
1000    /// [`U16Str`] does not contain valid UTF-16 data.
1001    ///
1002    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1003    /// the validity check, there is an unsafe version of this function,
1004    /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1005    /// but skips the check.
1006    ///
1007    /// # Errors
1008    ///
1009    /// Returns an error if the string slice is not UTF-16 with a description as to why the
1010    /// provided string slice is not UTF-16.
1011    #[inline]
1012    pub fn from_ustr_mut(s: &mut U16Str) -> Result<&mut Self, Utf16Error> {
1013        Self::from_slice_mut(s.as_mut_slice())
1014    }
1015
1016    /// Converts a wide C string slice to a UTF-16 string slice without checking if the
1017    /// string slice is valid UTF-16.
1018    ///
1019    /// The resulting string slice does *not* contain the nul terminator.
1020    ///
1021    /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1022    ///
1023    /// # Safety
1024    ///
1025    /// This function is unsafe because it does not check that the string slice passed to it is
1026    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1027    /// the [`Utf16Str`] is always valid UTF-16.
1028    ///
1029    /// # Examples
1030    ///
1031    /// ```
1032    /// use widestring::{Utf16Str, u16cstr};
1033    ///
1034    /// let sparkle_heart = u16cstr!("💖");
1035    /// let sparkle_heart = unsafe { Utf16Str::from_ucstr_unchecked(sparkle_heart) };
1036    ///
1037    /// assert_eq!("💖", sparkle_heart);
1038    /// ```
1039    #[inline]
1040    #[must_use]
1041    pub unsafe fn from_ucstr_unchecked(s: &crate::U16CStr) -> &Self {
1042        Self::from_slice_unchecked(s.as_slice())
1043    }
1044
1045    /// Converts a mutable wide C string slice to a mutable UTF-16 string slice without
1046    /// checking if the string slice is valid UTF-16.
1047    ///
1048    /// The resulting string slice does *not* contain the nul terminator.
1049    ///
1050    /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1051    ///
1052    /// # Safety
1053    ///
1054    /// This function is unsafe because it does not check that the string slice passed to it is
1055    /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1056    /// the [`Utf16Str`] is always valid UTF-16.
1057    #[inline]
1058    #[must_use]
1059    pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U16CStr) -> &mut Self {
1060        Self::from_slice_unchecked_mut(s.as_mut_slice())
1061    }
1062
1063    /// Converts a wide C string slice to a UTF-16 string slice.
1064    ///
1065    /// The resulting string slice does *not* contain the nul terminator.
1066    ///
1067    /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1068    /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1069    ///
1070    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1071    /// the validity check, there is an unsafe version of this function,
1072    /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1073    /// but skips the check.
1074    ///
1075    /// # Errors
1076    ///
1077    /// Returns an error if the string slice is not UTF-16 with a description as to why the
1078    /// provided string slice is not UTF-16.
1079    ///
1080    /// # Examples
1081    ///
1082    /// ```
1083    /// use widestring::{Utf16Str, u16cstr};
1084    ///
1085    /// let sparkle_heart = u16cstr!("💖");
1086    /// let sparkle_heart = Utf16Str::from_ucstr(sparkle_heart).unwrap();
1087    ///
1088    /// assert_eq!("💖", sparkle_heart);
1089    /// ```
1090    #[inline]
1091    pub fn from_ucstr(s: &crate::U16CStr) -> Result<&Self, Utf16Error> {
1092        Self::from_slice(s.as_slice())
1093    }
1094
1095    /// Converts a mutable wide C string slice to a mutable UTF-16 string slice.
1096    ///
1097    /// The resulting string slice does *not* contain the nul terminator.
1098    ///
1099    /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1100    /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1101    ///
1102    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1103    /// the validity check, there is an unsafe version of this function,
1104    /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1105    /// but skips the check.
1106    ///
1107    /// # Safety
1108    ///
1109    /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1110    /// when mutating the slice (i.e. by adding interior nul values).
1111    ///
1112    /// # Errors
1113    ///
1114    /// Returns an error if the string slice is not UTF-16 with a description as to why the
1115    /// provided string slice is not UTF-16.
1116    #[inline]
1117    pub unsafe fn from_ucstr_mut(s: &mut crate::U16CStr) -> Result<&mut Self, Utf16Error> {
1118        Self::from_slice_mut(s.as_mut_slice())
1119    }
1120
1121    /// Converts to a standard UTF-8 [`String`].
1122    ///
1123    /// Because this string is always valid UTF-16, the conversion is lossless and non-fallible.
1124    #[inline]
1125    #[allow(clippy::inherent_to_string_shadow_display)]
1126    #[cfg(feature = "alloc")]
1127    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1128    #[must_use]
1129    pub fn to_string(&self) -> String {
1130        String::from_utf16(self.as_slice()).unwrap()
1131    }
1132
1133    /// Checks that `index`-th value is the value in a UTF-16 code point sequence or the end of the
1134    /// string.
1135    ///
1136    /// Returns `true` if the value at `index` is not a UTF-16 surrogate value, or if the value at
1137    /// `index` is the first value of a surrogate pair (the "high" surrogate). Returns `false` if
1138    /// the value at `index` is the second value of a surrogate pair (a.k.a the "low" surrogate).
1139    ///
1140    /// The start and end of the string (when `index == self.len()`) are considered to be
1141    /// boundaries.
1142    ///
1143    /// Returns `false` if `index is greater than `self.len()`.
1144    ///
1145    /// # Examples
1146    ///
1147    /// ```
1148    /// # use widestring::utf16str;
1149    /// let s = utf16str!("Sparkle 💖 Heart");
1150    /// assert!(s.is_char_boundary(0));
1151    ///
1152    /// // high surrogate of `💖`
1153    /// assert!(s.is_char_boundary(8));
1154    /// // low surrogate of `💖`
1155    /// assert!(!s.is_char_boundary(9));
1156    ///
1157    /// assert!(s.is_char_boundary(s.len()));
1158    /// ```
1159    #[inline]
1160    #[must_use]
1161    pub const fn is_char_boundary(&self, index: usize) -> bool {
1162        if index > self.len() {
1163            false
1164        } else if index == self.len() {
1165            true
1166        } else {
1167            !is_utf16_low_surrogate(self.inner[index])
1168        }
1169    }
1170
1171    /// Returns a subslice of this string.
1172    ///
1173    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1174    /// equivalent indexing operation would panic.
1175    ///
1176    /// # Examples
1177    ///
1178    /// ```
1179    /// # use widestring::{utf16str};
1180    /// let v = utf16str!("⚧️🏳️‍⚧️➡️s");
1181    ///
1182    /// assert_eq!(Some(utf16str!("⚧️")), v.get(..2));
1183    /// assert_eq!(Some(utf16str!("🏳️‍⚧️")), v.get(2..8));
1184    /// assert_eq!(Some(utf16str!("➡️")), v.get(8..10));
1185    /// assert_eq!(Some(utf16str!("s")), v.get(10..));
1186    ///
1187    /// assert!(v.get(3..4).is_none());
1188    /// ```
1189    #[inline]
1190    #[must_use]
1191    pub fn get<I>(&self, index: I) -> Option<&Self>
1192    where
1193        I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1194    {
1195        // TODO: Use SliceIndex directly when it is stabilized
1196        let range = crate::range_check(index, ..self.len())?;
1197        if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1198            return None;
1199        }
1200
1201        // SAFETY: range_check verified bounds, and we just verified char boundaries
1202        Some(unsafe { self.get_unchecked(range) })
1203    }
1204
1205    /// Returns a mutable subslice of this string.
1206    ///
1207    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1208    /// equivalent indexing operation would panic.
1209    ///
1210    /// # Examples
1211    ///
1212    /// ```
1213    /// # use widestring::{utf16str};
1214    /// # #[cfg(feature = "alloc")] {
1215    /// let mut v = utf16str!("⚧️🏳️‍⚧️➡️s").to_owned();
1216    ///
1217    /// assert_eq!(utf16str!("⚧️"), v.get_mut(..2).unwrap());
1218    /// assert_eq!(utf16str!("🏳️‍⚧️"), v.get_mut(2..8).unwrap());
1219    /// assert_eq!(utf16str!("➡️"), v.get_mut(8..10).unwrap());
1220    /// assert_eq!(utf16str!("s"), v.get_mut(10..).unwrap());
1221    ///
1222    /// assert!(v.get_mut(3..4).is_none());
1223    /// # }
1224    /// ```
1225    #[inline]
1226    #[must_use]
1227    pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1228    where
1229        I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1230    {
1231        // TODO: Use SliceIndex directly when it is stabilized
1232        let range = crate::range_check(index, ..self.len())?;
1233        if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1234            return None;
1235        }
1236
1237        // SAFETY: range_check verified bounds, and we just verified char boundaries
1238        Some(unsafe { self.get_unchecked_mut(range) })
1239    }
1240
1241    /// Divide one string slice into two at an index.
1242    ///
1243    /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1244    /// the boundary of a UTF-16 code point.
1245    ///
1246    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1247    /// the end of the string slice.
1248    ///
1249    /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1250    ///
1251    /// # Panics
1252    ///
1253    /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1254    /// code point of the string slice.
1255    ///
1256    /// # Examples
1257    ///
1258    /// ```
1259    /// # use widestring::utf16str;
1260    /// let s = utf16str!("Per Martin-Löf");
1261    ///
1262    /// let (first, last) = s.split_at(3);
1263    ///
1264    /// assert_eq!("Per", first);
1265    /// assert_eq!(" Martin-Löf", last);
1266    /// ```
1267    #[inline]
1268    #[must_use]
1269    pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1270        assert!(self.is_char_boundary(mid));
1271        let (a, b) = self.inner.split_at(mid);
1272        unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1273    }
1274
1275    /// Divide one mutable string slice into two at an index.
1276    ///
1277    /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1278    /// the boundary of a UTF-16 code point.
1279    ///
1280    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1281    /// the end of the string slice.
1282    ///
1283    /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1284    ///
1285    /// # Panics
1286    ///
1287    /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1288    /// code point of the string slice.
1289    ///
1290    /// # Examples
1291    ///
1292    /// ```
1293    /// # use widestring::utf16str;
1294    /// # #[cfg(feature = "alloc")] {
1295    /// let mut s = utf16str!("Per Martin-Löf").to_owned();
1296    ///
1297    /// let (first, last) = s.split_at_mut(3);
1298    ///
1299    /// assert_eq!("Per", first);
1300    /// assert_eq!(" Martin-Löf", last);
1301    /// # }
1302    /// ```
1303    #[inline]
1304    #[must_use]
1305    pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1306        assert!(self.is_char_boundary(mid));
1307        let (a, b) = self.inner.split_at_mut(mid);
1308        unsafe {
1309            (
1310                Self::from_slice_unchecked_mut(a),
1311                Self::from_slice_unchecked_mut(b),
1312            )
1313        }
1314    }
1315
1316    /// Returns an iterator over the [`char`]s of a string slice.
1317    ///
1318    /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1319    /// [`char`]. This method returns such an iterator.
1320    ///
1321    /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
1322    /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
1323    /// actually want. This functionality is not provided by this crate.
1324    #[inline]
1325    #[must_use]
1326    pub fn chars(&self) -> CharsUtf16<'_> {
1327        CharsUtf16::new(self.as_slice())
1328    }
1329
1330    /// Returns an iterator over the [`char`]s of a string slice and their positions.
1331    ///
1332    /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1333    /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
1334    ///
1335    /// The iterator yields tuples. The position is first, the [`char`] is second.
1336    #[inline]
1337    #[must_use]
1338    pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
1339        CharIndicesUtf16::new(self.as_slice())
1340    }
1341
1342    /// An iterator over the [`u16`] code units of a string slice.
1343    ///
1344    /// As a UTF-16 string slice consists of a sequence of [`u16`] code units, we can iterate
1345    /// through a string slice by each code unit. This method returns such an iterator.
1346    #[must_use]
1347    pub fn code_units(&self) -> CodeUnits<'_> {
1348        CodeUnits::new(self.as_slice())
1349    }
1350
1351    /// Returns an iterator of bytes over the string encoded as UTF-8.
1352    #[must_use]
1353    pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf16<'_>> {
1354        crate::encode_utf8(self.chars())
1355    }
1356
1357    /// Returns an iterator of [`u32`] over the sting encoded as UTF-32.
1358    #[must_use]
1359    pub fn encode_utf32(&self) -> EncodeUtf32<CharsUtf16<'_>> {
1360        crate::encode_utf32(self.chars())
1361    }
1362
1363    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
1364    #[inline]
1365    #[must_use]
1366    pub fn escape_debug(&self) -> EscapeDebug<CharsUtf16<'_>> {
1367        EscapeDebug::<CharsUtf16>::new(self.as_slice())
1368    }
1369
1370    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
1371    #[inline]
1372    #[must_use]
1373    pub fn escape_default(&self) -> EscapeDefault<CharsUtf16<'_>> {
1374        EscapeDefault::<CharsUtf16>::new(self.as_slice())
1375    }
1376
1377    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
1378    #[inline]
1379    #[must_use]
1380    pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf16<'_>> {
1381        EscapeUnicode::<CharsUtf16>::new(self.as_slice())
1382    }
1383
1384    /// Returns the lowercase equivalent of this string slice, as a new [`Utf16String`].
1385    ///
1386    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1387    /// `Lowercase`.
1388    ///
1389    /// Since some characters can expand into multiple characters when changing the case, this
1390    /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1391    #[inline]
1392    #[cfg(feature = "alloc")]
1393    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1394    #[must_use]
1395    pub fn to_lowercase(&self) -> Utf16String {
1396        let mut s = Utf16String::with_capacity(self.len());
1397        for c in self.chars() {
1398            for lower in c.to_lowercase() {
1399                s.push(lower);
1400            }
1401        }
1402        s
1403    }
1404
1405    /// Returns the uppercase equivalent of this string slice, as a new [`Utf16String`].
1406    ///
1407    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1408    /// `Uppercase`.
1409    ///
1410    /// Since some characters can expand into multiple characters when changing the case, this
1411    /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1412    #[inline]
1413    #[cfg(feature = "alloc")]
1414    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1415    #[must_use]
1416    pub fn to_uppercase(&self) -> Utf16String {
1417        let mut s = Utf16String::with_capacity(self.len());
1418        for c in self.chars() {
1419            for lower in c.to_uppercase() {
1420                s.push(lower);
1421            }
1422        }
1423        s
1424    }
1425
1426    /// Returns an iterator over the lines of a [`Utf16Str`], as string slices.
1427    ///
1428    /// Lines are split at line endings that are either newlines (`\n`) or
1429    /// sequences of a carriage return followed by a line feed (`\r\n`).
1430    ///
1431    /// Line terminators are not included in the lines returned by the iterator.
1432    ///
1433    /// Note that any carriage return (`\r`) not immediately followed by a
1434    /// line feed (`\n`) does not split a line. These carriage returns are
1435    /// thereby included in the produced lines.
1436    ///
1437    /// The final line ending is optional. A string that ends with a final line
1438    /// ending will return the same lines as an otherwise identical string
1439    /// without a final line ending.
1440    ///
1441    /// # Examples
1442    ///
1443    /// Basic usage:
1444    ///
1445    /// ```
1446    /// use widestring::utf16str;
1447    ///
1448    /// let text = utf16str!("foo\r\nbar\n\nbaz\r");
1449    /// let mut lines = text.lines();
1450    ///
1451    /// assert_eq!(Some(utf16str!("foo")), lines.next());
1452    /// assert_eq!(Some(utf16str!("bar")), lines.next());
1453    /// assert_eq!(Some(utf16str!("")), lines.next());
1454    /// // Trailing carriage return is included in the last line
1455    /// assert_eq!(Some(utf16str!("baz\r")), lines.next());
1456    ///
1457    /// assert_eq!(None, lines.next());
1458    /// ```
1459    ///
1460    /// The final line does not require any ending:
1461    ///
1462    /// ```
1463    /// use widestring::utf16str;
1464    ///
1465    /// let text = utf16str!("foo\nbar\n\r\nbaz");
1466    /// let mut lines = text.lines();
1467    ///
1468    /// assert_eq!(Some(utf16str!("foo")), lines.next());
1469    /// assert_eq!(Some(utf16str!("bar")), lines.next());
1470    /// assert_eq!(Some(utf16str!("")), lines.next());
1471    /// assert_eq!(Some(utf16str!("baz")), lines.next());
1472    ///
1473    /// assert_eq!(None, lines.next());
1474    /// ```
1475    pub fn lines(&self) -> Lines<'_, Utf16Str, CharIndicesUtf16<'_>> {
1476        Lines::new(self, self.len(), self.char_indices())
1477    }
1478}
1479
1480impl Utf32Str {
1481    /// Converts a slice of UTF-32 data to a string slice.
1482    ///
1483    /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1484    /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1485    /// then does the conversion.
1486    ///
1487    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1488    /// the validity check, there is an unsafe version of this function,
1489    /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
1490    /// the check.
1491    ///
1492    /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1493    ///
1494    /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1495    /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1496    /// after converting from UTF-8 to UTF-32.
1497    ///
1498    /// # Errors
1499    ///
1500    /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1501    /// is not UTF-32.
1502    ///
1503    /// # Examples
1504    ///
1505    /// ```
1506    /// use widestring::Utf32Str;
1507    ///
1508    /// let sparkle_heart = vec![0x1f496];
1509    /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
1510    ///
1511    /// assert_eq!("💖", sparkle_heart);
1512    /// ```
1513    ///
1514    /// With incorrect values that return an error:
1515    ///
1516    /// ```
1517    /// use widestring::Utf32Str;
1518    ///
1519    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1520    ///
1521    /// assert!(Utf32Str::from_slice(&sparkle_heart).is_err());
1522    /// ```
1523    pub fn from_slice(s: &[u32]) -> Result<&Self, Utf32Error> {
1524        validate_utf32(s)?;
1525        // SAFETY: Just validated
1526        Ok(unsafe { Self::from_slice_unchecked(s) })
1527    }
1528
1529    /// Converts a mutable slice of UTF-32 data to a mutable string slice.
1530    ///
1531    /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1532    /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1533    /// then does the conversion.
1534    ///
1535    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1536    /// the validity check, there is an unsafe version of this function,
1537    /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
1538    /// but skips the check.
1539    ///
1540    /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1541    ///
1542    /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1543    /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1544    /// after converting from UTF-8 to UTF-32.
1545    ///
1546    /// # Errors
1547    ///
1548    /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1549    /// is not UTF-32.
1550    ///
1551    /// # Examples
1552    ///
1553    /// ```
1554    /// use widestring::Utf32Str;
1555    ///
1556    /// let mut sparkle_heart = vec![0x1f496];
1557    /// let sparkle_heart = Utf32Str::from_slice_mut(&mut sparkle_heart).unwrap();
1558    ///
1559    /// assert_eq!("💖", sparkle_heart);
1560    /// ```
1561    ///
1562    /// With incorrect values that return an error:
1563    ///
1564    /// ```
1565    /// use widestring::Utf32Str;
1566    ///
1567    /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1568    ///
1569    /// assert!(Utf32Str::from_slice_mut(&mut sparkle_heart).is_err());
1570    /// ```
1571    pub fn from_slice_mut(s: &mut [u32]) -> Result<&mut Self, Utf32Error> {
1572        validate_utf32(s)?;
1573        // SAFETY: Just validated
1574        Ok(unsafe { Self::from_slice_unchecked_mut(s) })
1575    }
1576
1577    /// Converts a wide string slice of undefined encoding to a UTF-32 string slice without checking
1578    /// if the string slice is valid UTF-32.
1579    ///
1580    /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
1581    ///
1582    /// # Safety
1583    ///
1584    /// This function is unsafe because it does not check that the string slice passed to it is
1585    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1586    /// the [`Utf32Str`] is always valid UTF-32.
1587    ///
1588    /// # Examples
1589    ///
1590    /// ```
1591    /// use widestring::{Utf32Str, u32str};
1592    ///
1593    /// let sparkle_heart = u32str!("💖");
1594    /// let sparkle_heart = unsafe { Utf32Str::from_ustr_unchecked(sparkle_heart) };
1595    ///
1596    /// assert_eq!("💖", sparkle_heart);
1597    /// ```
1598    #[inline]
1599    #[must_use]
1600    pub const unsafe fn from_ustr_unchecked(s: &crate::U32Str) -> &Self {
1601        Self::from_slice_unchecked(s.as_slice())
1602    }
1603
1604    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice
1605    /// without checking if the string slice is valid UTF-32.
1606    ///
1607    /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
1608    ///
1609    /// # Safety
1610    ///
1611    /// This function is unsafe because it does not check that the string slice passed to it is
1612    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1613    /// the [`Utf32Str`] is always valid UTF-32.
1614    #[inline]
1615    #[must_use]
1616    pub unsafe fn from_ustr_unchecked_mut(s: &mut crate::U32Str) -> &mut Self {
1617        Self::from_slice_unchecked_mut(s.as_mut_slice())
1618    }
1619
1620    /// Converts a wide string slice of undefined encoding to a UTF-32 string slice.
1621    ///
1622    /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1623    /// [`U32Str`] does not contain valid UTF-32 data.
1624    ///
1625    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1626    /// the validity check, there is an unsafe version of this function,
1627    /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
1628    /// but skips the check.
1629    ///
1630    /// # Errors
1631    ///
1632    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1633    /// provided string slice is not UTF-32.
1634    ///
1635    /// # Examples
1636    ///
1637    /// ```
1638    /// use widestring::{Utf32Str, u32str};
1639    ///
1640    /// let sparkle_heart = u32str!("💖");
1641    /// let sparkle_heart = Utf32Str::from_ustr(sparkle_heart).unwrap();
1642    ///
1643    /// assert_eq!("💖", sparkle_heart);
1644    /// ```
1645    #[inline]
1646    pub fn from_ustr(s: &crate::U32Str) -> Result<&Self, Utf32Error> {
1647        Self::from_slice(s.as_slice())
1648    }
1649
1650    /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice.
1651    ///
1652    /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1653    /// [`U32Str`] does not contain valid UTF-32 data.
1654    ///
1655    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1656    /// the validity check, there is an unsafe version of this function,
1657    /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1658    /// but skips the check.
1659    ///
1660    /// # Errors
1661    ///
1662    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1663    /// provided string slice is not UTF-32.
1664    #[inline]
1665    pub fn from_ustr_mut(s: &mut crate::U32Str) -> Result<&mut Self, Utf32Error> {
1666        Self::from_slice_mut(s.as_mut_slice())
1667    }
1668
1669    /// Converts a wide C string slice to a UTF-32 string slice without checking if the
1670    /// string slice is valid UTF-32.
1671    ///
1672    /// The resulting string slice does *not* contain the nul terminator.
1673    ///
1674    /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1675    ///
1676    /// # Safety
1677    ///
1678    /// This function is unsafe because it does not check that the string slice passed to it is
1679    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1680    /// the [`Utf32Str`] is always valid UTF-32.
1681    ///
1682    /// # Examples
1683    ///
1684    /// ```
1685    /// use widestring::{Utf32Str, u32cstr};
1686    ///
1687    /// let sparkle_heart = u32cstr!("💖");
1688    /// let sparkle_heart = unsafe { Utf32Str::from_ucstr_unchecked(sparkle_heart) };
1689    ///
1690    /// assert_eq!("💖", sparkle_heart);
1691    /// ```
1692    #[inline]
1693    #[must_use]
1694    pub unsafe fn from_ucstr_unchecked(s: &crate::U32CStr) -> &Self {
1695        Self::from_slice_unchecked(s.as_slice())
1696    }
1697
1698    /// Converts a mutable wide C string slice to a mutable UTF-32 string slice without
1699    /// checking if the string slice is valid UTF-32.
1700    ///
1701    /// The resulting string slice does *not* contain the nul terminator.
1702    ///
1703    /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1704    ///
1705    /// # Safety
1706    ///
1707    /// This function is unsafe because it does not check that the string slice passed to it is
1708    /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1709    /// the [`Utf32Str`] is always valid UTF-32.
1710    #[inline]
1711    #[must_use]
1712    pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U32CStr) -> &mut Self {
1713        Self::from_slice_unchecked_mut(s.as_mut_slice())
1714    }
1715
1716    /// Converts a wide C string slice to a UTF-32 string slice.
1717    ///
1718    /// The resulting string slice does *not* contain the nul terminator.
1719    ///
1720    /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1721    /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1722    ///
1723    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1724    /// the validity check, there is an unsafe version of this function,
1725    /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1726    /// but skips the check.
1727    ///
1728    /// # Errors
1729    ///
1730    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1731    /// provided string slice is not UTF-32.
1732    ///
1733    /// # Examples
1734    ///
1735    /// ```
1736    /// use widestring::{Utf32Str, u32cstr};
1737    ///
1738    /// let sparkle_heart = u32cstr!("💖");
1739    /// let sparkle_heart = Utf32Str::from_ucstr(sparkle_heart).unwrap();
1740    ///
1741    /// assert_eq!("💖", sparkle_heart);
1742    /// ```
1743    #[inline]
1744    pub fn from_ucstr(s: &crate::U32CStr) -> Result<&Self, Utf32Error> {
1745        Self::from_slice(s.as_slice())
1746    }
1747
1748    /// Converts a mutable wide C string slice to a mutable UTF-32 string slice.
1749    ///
1750    /// The resulting string slice does *not* contain the nul terminator.
1751    ///
1752    /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1753    /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1754    ///
1755    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1756    /// the validity check, there is an unsafe version of this function,
1757    /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1758    /// but skips the check.
1759    ///
1760    /// # Safety
1761    ///
1762    /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1763    /// when mutating the slice (i.e. by adding interior nul values).
1764    ///
1765    /// # Errors
1766    ///
1767    /// Returns an error if the string slice is not UTF-32 with a description as to why the
1768    /// provided string slice is not UTF-32.
1769    #[inline]
1770    pub unsafe fn from_ucstr_mut(s: &mut crate::U32CStr) -> Result<&mut Self, Utf32Error> {
1771        Self::from_slice_mut(s.as_mut_slice())
1772    }
1773
1774    /// Converts a slice of [`char`]s to a string slice.
1775    ///
1776    /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1777    ///
1778    /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1779    ///
1780    /// # Examples
1781    ///
1782    /// ```
1783    /// use widestring::Utf32Str;
1784    ///
1785    /// let sparkle_heart = ['💖'];
1786    /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
1787    ///
1788    /// assert_eq!("💖", sparkle_heart);
1789    /// ```
1790    #[allow(trivial_casts)]
1791    #[inline]
1792    #[must_use]
1793    pub const fn from_char_slice(s: &[char]) -> &Self {
1794        // SAFETY: char slice is always valid UTF-32
1795        unsafe { Self::from_slice_unchecked(&*(s as *const [char] as *const [u32])) }
1796    }
1797
1798    /// Converts a mutable slice of [`char`]s to a string slice.
1799    ///
1800    /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1801    ///
1802    /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1803    ///
1804    /// # Examples
1805    ///
1806    /// ```
1807    /// use widestring::Utf32Str;
1808    ///
1809    /// let mut sparkle_heart = ['💖'];
1810    /// let sparkle_heart = Utf32Str::from_char_slice_mut(&mut sparkle_heart);
1811    ///
1812    /// assert_eq!("💖", sparkle_heart);
1813    /// ```
1814    #[allow(trivial_casts)]
1815    #[inline]
1816    #[must_use]
1817    pub fn from_char_slice_mut(s: &mut [char]) -> &mut Self {
1818        // SAFETY: char slice is always valid UTF-32
1819        unsafe { Self::from_slice_unchecked_mut(&mut *(s as *mut [char] as *mut [u32])) }
1820    }
1821
1822    /// Converts a string slice into a slice of [`char`]s.
1823    #[allow(trivial_casts)]
1824    #[inline]
1825    #[must_use]
1826    pub const fn as_char_slice(&self) -> &[char] {
1827        // SAFETY: Self should be valid UTF-32 so chars will be in range
1828        unsafe { &*(self.as_slice() as *const [u32] as *const [char]) }
1829    }
1830
1831    /// Converts a mutable string slice into a mutable slice of [`char`]s.
1832    #[allow(trivial_casts)]
1833    #[inline]
1834    #[must_use]
1835    pub fn as_char_slice_mut(&mut self) -> &mut [char] {
1836        // SAFETY: Self should be valid UTF-32 so chars will be in range
1837        unsafe { &mut *(self.as_mut_slice() as *mut [u32] as *mut [char]) }
1838    }
1839
1840    /// Converts to a standard UTF-8 [`String`].
1841    ///
1842    /// Because this string is always valid UTF-32, the conversion is lossless and non-fallible.
1843    #[inline]
1844    #[allow(clippy::inherent_to_string_shadow_display)]
1845    #[cfg(feature = "alloc")]
1846    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1847    #[must_use]
1848    pub fn to_string(&self) -> String {
1849        let mut s = String::with_capacity(self.len());
1850        s.extend(self.as_char_slice());
1851        s
1852    }
1853
1854    /// Returns a subslice of this string.
1855    ///
1856    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1857    /// equivalent indexing operation would panic.
1858    ///
1859    /// # Examples
1860    ///
1861    /// ```
1862    /// # use widestring::{utf32str};
1863    /// let v = utf32str!("⚧️🏳️‍⚧️➡️s");
1864    ///
1865    /// assert_eq!(Some(utf32str!("⚧️")), v.get(..2));
1866    /// assert_eq!(Some(utf32str!("🏳️‍⚧️")), v.get(2..7));
1867    /// assert_eq!(Some(utf32str!("➡️")), v.get(7..9));
1868    /// assert_eq!(Some(utf32str!("s")), v.get(9..));
1869    /// ```
1870    #[inline]
1871    #[must_use]
1872    pub fn get<I>(&self, index: I) -> Option<&Self>
1873    where
1874        I: SliceIndex<[u32], Output = [u32]>,
1875    {
1876        // TODO: Use SliceIndex directly when it is stabilized
1877        // SAFETY: subslice has already been verified
1878        self.inner
1879            .get(index)
1880            .map(|s| unsafe { Self::from_slice_unchecked(s) })
1881    }
1882
1883    /// Returns a mutable subslice of this string.
1884    ///
1885    /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1886    /// equivalent indexing operation would panic.
1887    ///
1888    /// # Examples
1889    ///
1890    /// ```
1891    /// # use widestring::{utf32str};
1892    /// # #[cfg(feature = "alloc")] {
1893    /// let mut v = utf32str!("⚧️🏳️‍⚧️➡️s").to_owned();
1894    ///
1895    /// assert_eq!(utf32str!("⚧️"), v.get_mut(..2).unwrap());
1896    /// assert_eq!(utf32str!("🏳️‍⚧️"), v.get_mut(2..7).unwrap());
1897    /// assert_eq!(utf32str!("➡️"), v.get_mut(7..9).unwrap());
1898    /// assert_eq!(utf32str!("s"), v.get_mut(9..).unwrap());
1899    /// # }
1900    /// ```
1901    #[inline]
1902    #[must_use]
1903    pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1904    where
1905        I: SliceIndex<[u32], Output = [u32]>,
1906    {
1907        // TODO: Use SliceIndex directly when it is stabilized
1908        // SAFETY: subslice has already been verified
1909        self.inner
1910            .get_mut(index)
1911            .map(|s| unsafe { Self::from_slice_unchecked_mut(s) })
1912    }
1913
1914    /// Divide one string slice into two at an index.
1915    ///
1916    /// The argument, `mid`, should be an offset from the start of the string.
1917    ///
1918    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1919    /// the end of the string slice.
1920    ///
1921    /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1922    ///
1923    /// # Panics
1924    ///
1925    /// Panics if `mid` is past the end of the last code point of the string slice.
1926    ///
1927    /// # Examples
1928    ///
1929    /// ```
1930    /// # use widestring::utf32str;
1931    /// let s = utf32str!("Per Martin-Löf");
1932    ///
1933    /// let (first, last) = s.split_at(3);
1934    ///
1935    /// assert_eq!("Per", first);
1936    /// assert_eq!(" Martin-Löf", last);
1937    /// ```
1938    #[inline]
1939    #[must_use]
1940    pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1941        let (a, b) = self.inner.split_at(mid);
1942        unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1943    }
1944
1945    /// Divide one mutable string slice into two at an index.
1946    ///
1947    /// The argument, `mid`, should be an offset from the start of the string.
1948    ///
1949    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1950    /// the end of the string slice.
1951    ///
1952    /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1953    ///
1954    /// # Panics
1955    ///
1956    /// Panics if `mid` is past the end of the last code point of the string slice.
1957    ///
1958    /// # Examples
1959    ///
1960    /// ```
1961    /// # use widestring::utf32str;
1962    /// # #[cfg(feature = "alloc")] {
1963    /// let mut s = utf32str!("Per Martin-Löf").to_owned();
1964    ///
1965    /// let (first, last) = s.split_at_mut(3);
1966    ///
1967    /// assert_eq!("Per", first);
1968    /// assert_eq!(" Martin-Löf", last);
1969    /// # }
1970    /// ```
1971    #[inline]
1972    #[must_use]
1973    pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1974        let (a, b) = self.inner.split_at_mut(mid);
1975        unsafe {
1976            (
1977                Self::from_slice_unchecked_mut(a),
1978                Self::from_slice_unchecked_mut(b),
1979            )
1980        }
1981    }
1982
1983    /// Returns an iterator over the [`char`]s of a string slice.
1984    ///
1985    /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
1986    /// [`char`]. This method returns such an iterator.
1987    ///
1988    /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
1989    /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
1990    /// actually want. This functionality is not provided by this crate.
1991    #[inline]
1992    #[must_use]
1993    pub fn chars(&self) -> CharsUtf32<'_> {
1994        CharsUtf32::new(self.as_slice())
1995    }
1996
1997    /// Returns an iterator over the [`char`]s of a string slice and their positions.
1998    ///
1999    /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
2000    /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
2001    ///
2002    /// The iterator yields tuples. The position is first, the [`char`] is second.
2003    #[inline]
2004    #[must_use]
2005    pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
2006        CharIndicesUtf32::new(self.as_slice())
2007    }
2008
2009    /// Returns an iterator of bytes over the string encoded as UTF-8.
2010    #[must_use]
2011    pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf32<'_>> {
2012        crate::encode_utf8(self.chars())
2013    }
2014
2015    /// Returns an iterator of [`u16`] over the sting encoded as UTF-16.
2016    #[must_use]
2017    pub fn encode_utf16(&self) -> EncodeUtf16<CharsUtf32<'_>> {
2018        crate::encode_utf16(self.chars())
2019    }
2020
2021    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
2022    #[inline]
2023    #[must_use]
2024    pub fn escape_debug(&self) -> EscapeDebug<CharsUtf32<'_>> {
2025        EscapeDebug::<CharsUtf32>::new(self.as_slice())
2026    }
2027
2028    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
2029    #[inline]
2030    #[must_use]
2031    pub fn escape_default(&self) -> EscapeDefault<CharsUtf32<'_>> {
2032        EscapeDefault::<CharsUtf32>::new(self.as_slice())
2033    }
2034
2035    /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
2036    #[inline]
2037    #[must_use]
2038    pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf32<'_>> {
2039        EscapeUnicode::<CharsUtf32>::new(self.as_slice())
2040    }
2041
2042    /// Returns the lowercase equivalent of this string slice, as a new [`Utf32String`].
2043    ///
2044    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
2045    /// `Lowercase`.
2046    ///
2047    /// Since some characters can expand into multiple characters when changing the case, this
2048    /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2049    #[inline]
2050    #[cfg(feature = "alloc")]
2051    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2052    #[must_use]
2053    pub fn to_lowercase(&self) -> Utf32String {
2054        let mut s = Utf32String::with_capacity(self.len());
2055        for c in self.chars() {
2056            for lower in c.to_lowercase() {
2057                s.push(lower);
2058            }
2059        }
2060        s
2061    }
2062
2063    /// Returns the uppercase equivalent of this string slice, as a new [`Utf32String`].
2064    ///
2065    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
2066    /// `Uppercase`.
2067    ///
2068    /// Since some characters can expand into multiple characters when changing the case, this
2069    /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2070    #[inline]
2071    #[cfg(feature = "alloc")]
2072    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2073    #[must_use]
2074    pub fn to_uppercase(&self) -> Utf32String {
2075        let mut s = Utf32String::with_capacity(self.len());
2076        for c in self.chars() {
2077            for lower in c.to_uppercase() {
2078                s.push(lower);
2079            }
2080        }
2081        s
2082    }
2083
2084    /// Returns an iterator over the lines of a [`Utf32Str`], as string slices.
2085    ///
2086    /// Lines are split at line endings that are either newlines (`\n`) or
2087    /// sequences of a carriage return followed by a line feed (`\r\n`).
2088    ///
2089    /// Line terminators are not included in the lines returned by the iterator.
2090    ///
2091    /// Note that any carriage return (`\r`) not immediately followed by a
2092    /// line feed (`\n`) does not split a line. These carriage returns are
2093    /// thereby included in the produced lines.
2094    ///
2095    /// The final line ending is optional. A string that ends with a final line
2096    /// ending will return the same lines as an otherwise identical string
2097    /// without a final line ending.
2098    ///
2099    /// # Examples
2100    ///
2101    /// Basic usage:
2102    ///
2103    /// ```
2104    /// use widestring::utf32str;
2105    ///
2106    /// let text = utf32str!("foo\r\nbar\n\nbaz\r");
2107    /// let mut lines = text.lines();
2108    ///
2109    /// assert_eq!(Some(utf32str!("foo")), lines.next());
2110    /// assert_eq!(Some(utf32str!("bar")), lines.next());
2111    /// assert_eq!(Some(utf32str!("")), lines.next());
2112    /// // Trailing carriage return is included in the last line
2113    /// assert_eq!(Some(utf32str!("baz\r")), lines.next());
2114    ///
2115    /// assert_eq!(None, lines.next());
2116    /// ```
2117    ///
2118    /// The final line does not require any ending:
2119    ///
2120    /// ```
2121    /// use widestring::utf32str;
2122    ///
2123    /// let text = utf32str!("foo\nbar\n\r\nbaz");
2124    /// let mut lines = text.lines();
2125    ///
2126    /// assert_eq!(Some(utf32str!("foo")), lines.next());
2127    /// assert_eq!(Some(utf32str!("bar")), lines.next());
2128    /// assert_eq!(Some(utf32str!("")), lines.next());
2129    /// assert_eq!(Some(utf32str!("baz")), lines.next());
2130    ///
2131    /// assert_eq!(None, lines.next());
2132    /// ```
2133    pub fn lines(&self) -> Lines<'_, Utf32Str, CharIndicesUtf32<'_>> {
2134        Lines::new(self, self.len(), self.char_indices())
2135    }
2136}
2137
2138impl AsMut<[char]> for Utf32Str {
2139    #[inline]
2140    fn as_mut(&mut self) -> &mut [char] {
2141        self.as_char_slice_mut()
2142    }
2143}
2144
2145impl AsRef<[char]> for Utf32Str {
2146    #[inline]
2147    fn as_ref(&self) -> &[char] {
2148        self.as_char_slice()
2149    }
2150}
2151
2152impl<'a> From<&'a [char]> for &'a Utf32Str {
2153    #[inline]
2154    fn from(value: &'a [char]) -> Self {
2155        Utf32Str::from_char_slice(value)
2156    }
2157}
2158
2159impl<'a> From<&'a mut [char]> for &'a mut Utf32Str {
2160    #[inline]
2161    fn from(value: &'a mut [char]) -> Self {
2162        Utf32Str::from_char_slice_mut(value)
2163    }
2164}
2165
2166impl<'a> From<&'a Utf32Str> for &'a [char] {
2167    #[inline]
2168    fn from(value: &'a Utf32Str) -> Self {
2169        value.as_char_slice()
2170    }
2171}
2172
2173impl<'a> From<&'a mut Utf32Str> for &'a mut [char] {
2174    #[inline]
2175    fn from(value: &'a mut Utf32Str) -> Self {
2176        value.as_char_slice_mut()
2177    }
2178}
2179
2180impl<I> Index<I> for Utf16Str
2181where
2182    I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2183{
2184    type Output = Utf16Str;
2185
2186    #[inline]
2187    fn index(&self, index: I) -> &Self::Output {
2188        self.get(index)
2189            .expect("index out of bounds or not on char boundary")
2190    }
2191}
2192
2193impl<I> Index<I> for Utf32Str
2194where
2195    I: SliceIndex<[u32], Output = [u32]>,
2196{
2197    type Output = Utf32Str;
2198
2199    #[inline]
2200    fn index(&self, index: I) -> &Self::Output {
2201        self.get(index).expect("index out of bounds")
2202    }
2203}
2204
2205impl<I> IndexMut<I> for Utf16Str
2206where
2207    I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2208{
2209    #[inline]
2210    fn index_mut(&mut self, index: I) -> &mut Self::Output {
2211        self.get_mut(index)
2212            .expect("index out of bounds or not on char boundary")
2213    }
2214}
2215
2216impl<I> IndexMut<I> for Utf32Str
2217where
2218    I: SliceIndex<[u32], Output = [u32]>,
2219{
2220    #[inline]
2221    fn index_mut(&mut self, index: I) -> &mut Self::Output {
2222        self.get_mut(index).expect("index out of bounds")
2223    }
2224}
2225
2226impl PartialEq<[char]> for Utf32Str {
2227    #[inline]
2228    fn eq(&self, other: &[char]) -> bool {
2229        self.as_char_slice() == other
2230    }
2231}
2232
2233impl PartialEq<Utf32Str> for [char] {
2234    #[inline]
2235    fn eq(&self, other: &Utf32Str) -> bool {
2236        self == other.as_char_slice()
2237    }
2238}
2239
2240impl PartialEq<Utf16Str> for Utf32Str {
2241    #[inline]
2242    fn eq(&self, other: &Utf16Str) -> bool {
2243        self.chars().eq(other.chars())
2244    }
2245}
2246
2247impl PartialEq<Utf32Str> for Utf16Str {
2248    #[inline]
2249    fn eq(&self, other: &Utf32Str) -> bool {
2250        self.chars().eq(other.chars())
2251    }
2252}
2253
2254impl PartialEq<&Utf16Str> for Utf32Str {
2255    #[inline]
2256    fn eq(&self, other: &&Utf16Str) -> bool {
2257        self.chars().eq(other.chars())
2258    }
2259}
2260
2261impl PartialEq<&Utf32Str> for Utf16Str {
2262    #[inline]
2263    fn eq(&self, other: &&Utf32Str) -> bool {
2264        self.chars().eq(other.chars())
2265    }
2266}
2267
2268impl PartialEq<Utf16Str> for &Utf32Str {
2269    #[inline]
2270    fn eq(&self, other: &Utf16Str) -> bool {
2271        self.chars().eq(other.chars())
2272    }
2273}
2274
2275impl PartialEq<Utf32Str> for &Utf16Str {
2276    #[inline]
2277    fn eq(&self, other: &Utf32Str) -> bool {
2278        self.chars().eq(other.chars())
2279    }
2280}
2281
2282impl<'a> TryFrom<&'a [u16]> for &'a Utf16Str {
2283    type Error = Utf16Error;
2284
2285    #[inline]
2286    fn try_from(value: &'a [u16]) -> Result<Self, Self::Error> {
2287        Utf16Str::from_slice(value)
2288    }
2289}
2290
2291impl<'a> TryFrom<&'a mut [u16]> for &'a mut Utf16Str {
2292    type Error = Utf16Error;
2293
2294    #[inline]
2295    fn try_from(value: &'a mut [u16]) -> Result<Self, Self::Error> {
2296        Utf16Str::from_slice_mut(value)
2297    }
2298}
2299
2300impl<'a> TryFrom<&'a [u32]> for &'a Utf32Str {
2301    type Error = Utf32Error;
2302
2303    #[inline]
2304    fn try_from(value: &'a [u32]) -> Result<Self, Self::Error> {
2305        Utf32Str::from_slice(value)
2306    }
2307}
2308
2309impl<'a> TryFrom<&'a mut [u32]> for &'a mut Utf32Str {
2310    type Error = Utf32Error;
2311
2312    #[inline]
2313    fn try_from(value: &'a mut [u32]) -> Result<Self, Self::Error> {
2314        Utf32Str::from_slice_mut(value)
2315    }
2316}
2317
2318/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2319/// `wchar_t` size on platform.
2320#[cfg(not(windows))]
2321pub type WideUtfStr = Utf32Str;
2322
2323/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2324/// `wchar_t` size on platform.
2325#[cfg(windows)]
2326pub type WideUtfStr = Utf16Str;
2327
2328#[cfg(test)]
2329mod test {
2330    use crate::*;
2331
2332    #[test]
2333    fn utf16_trim() {
2334        let s = utf16str!(" Hello\tworld\t");
2335        assert_eq!(utf16str!("Hello\tworld\t"), s.trim_start());
2336
2337        let s = utf16str!("  English  ");
2338        assert!(Some('E') == s.trim_start().chars().next());
2339
2340        let s = utf16str!("  עברית  ");
2341        assert!(Some('ע') == s.trim_start().chars().next());
2342    }
2343
2344    #[test]
2345    fn utf32_trim() {
2346        let s = utf32str!(" Hello\tworld\t");
2347        assert_eq!(utf32str!("Hello\tworld\t"), s.trim_start());
2348
2349        let s = utf32str!("  English  ");
2350        assert!(Some('E') == s.trim_start().chars().next());
2351
2352        let s = utf32str!("  עברית  ");
2353        assert!(Some('ע') == s.trim_start().chars().next());
2354    }
2355}
widestring/utfstr.rs

widestring/
utfstr.rs