widestring/
ustr.rs

1//! Wide string slices with undefined encoding.
2//!
3//! This module contains wide string slices and related types.
4
5use crate::utfstr::Lines;
6#[cfg(feature = "alloc")]
7use crate::{
8    error::{Utf16Error, Utf32Error},
9    U16String, U32String,
10};
11#[cfg(feature = "alloc")]
12#[allow(unused_imports)]
13use alloc::{boxed::Box, string::String, vec::Vec};
14use core::{
15    char,
16    fmt::Write,
17    ops::{Index, IndexMut, Range},
18    slice::{self, SliceIndex},
19};
20
21mod iter;
22
23pub use iter::*;
24
25macro_rules! ustr_common_impl {
26    {
27        $(#[$ustr_meta:meta])*
28        struct $ustr:ident([$uchar:ty]);
29        type UString = $ustring:ident;
30        type UCStr = $ucstr:ident;
31        $(#[$display_meta:meta])*
32        fn display() -> {}
33    } => {
34        $(#[$ustr_meta])*
35        #[allow(clippy::derive_hash_xor_eq)]
36        #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
37        pub struct $ustr {
38            pub(crate) inner: [$uchar],
39        }
40
41        impl $ustr {
42            /// Coerces a value into a wide string slice.
43            #[inline]
44            #[must_use]
45            pub fn new<S: AsRef<Self> + ?Sized>(s: &S) -> &Self {
46                s.as_ref()
47            }
48
49            /// Constructs a wide string slice from a pointer and a length.
50            ///
51            /// The `len` argument is the number of elements, **not** the number of bytes. No
52            /// copying or allocation is performed, the resulting value is a direct reference to the
53            /// pointer bytes.
54            ///
55            /// # Safety
56            ///
57            /// This function is unsafe as there is no guarantee that the given pointer is valid for
58            /// `len` elements.
59            ///
60            /// In addition, the data must meet the safety conditions of
61            /// [std::slice::from_raw_parts]. In particular, the returned string reference *must not
62            /// be mutated* for the duration of lifetime `'a`, except inside an
63            /// [`UnsafeCell`][std::cell::UnsafeCell].
64            ///
65            /// # Panics
66            ///
67            /// This function panics if `p` is null.
68            ///
69            /// # Caveat
70            ///
71            /// The lifetime for the returned string is inferred from its usage. To prevent
72            /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
73            /// is safe in the context, such as by providing a helper function taking the lifetime
74            /// of a host value for the string, or by explicit annotation.
75            #[inline]
76            #[must_use]
77            pub unsafe fn from_ptr<'a>(p: *const $uchar, len: usize) -> &'a Self {
78                assert!(!p.is_null());
79                let slice: *const [$uchar] = slice::from_raw_parts(p, len);
80                &*(slice as *const $ustr)
81            }
82
83            /// Constructs a mutable wide string slice from a mutable pointer and a length.
84            ///
85            /// The `len` argument is the number of elements, **not** the number of bytes. No
86            /// copying or allocation is performed, the resulting value is a direct reference to the
87            /// pointer bytes.
88            ///
89            /// # Safety
90            ///
91            /// This function is unsafe as there is no guarantee that the given pointer is valid for
92            /// `len` elements.
93            ///
94            /// In addition, the data must meet the safety conditions of
95            /// [std::slice::from_raw_parts_mut].
96            ///
97            /// # Panics
98            ///
99            /// This function panics if `p` is null.
100            ///
101            /// # Caveat
102            ///
103            /// The lifetime for the returned string is inferred from its usage. To prevent
104            /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
105            /// is safe in the context, such as by providing a helper function taking the lifetime
106            /// of a host value for the string, or by explicit annotation.
107            #[inline]
108            #[must_use]
109            pub unsafe fn from_ptr_mut<'a>(p: *mut $uchar, len: usize) -> &'a mut Self {
110                assert!(!p.is_null());
111                let slice: *mut [$uchar] = slice::from_raw_parts_mut(p, len);
112                &mut *(slice as *mut $ustr)
113            }
114
115            /// Constructs a wide string slice from a slice of character data.
116            ///
117            /// No checks are performed on the slice. It may be of any encoding and may contain
118            /// invalid or malformed data for that encoding.
119            #[inline]
120            #[must_use]
121            pub const fn from_slice(slice: &[$uchar]) -> &Self {
122                let ptr: *const [$uchar] = slice;
123                unsafe { &*(ptr as *const $ustr) }
124            }
125
126            /// Constructs a mutable wide string slice from a mutable slice of character data.
127            ///
128            /// No checks are performed on the slice. It may be of any encoding and may contain
129            /// invalid or malformed data for that encoding.
130            #[inline]
131            #[must_use]
132            pub fn from_slice_mut(slice: &mut [$uchar]) -> &mut Self {
133                let ptr: *mut [$uchar] = slice;
134                unsafe { &mut *(ptr as *mut $ustr) }
135            }
136
137            /// Copies the string reference to a new owned wide string.
138            #[cfg(feature = "alloc")]
139            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
140            #[inline]
141            #[must_use]
142            pub fn to_ustring(&self) -> $ustring {
143                $ustring::from_vec(&self.inner)
144            }
145
146            /// Converts to a slice of the underlying elements of the string.
147            #[inline]
148            #[must_use]
149            pub const fn as_slice(&self) -> &[$uchar] {
150                &self.inner
151            }
152
153            /// Converts to a mutable slice of the underlying elements of the string.
154            #[must_use]
155            pub fn as_mut_slice(&mut self) -> &mut [$uchar] {
156                &mut self.inner
157            }
158
159            /// Returns a raw pointer to the string.
160            ///
161            /// The caller must ensure that the string outlives the pointer this function returns,
162            /// or else it will end up pointing to garbage.
163            ///
164            /// The caller must also ensure that the memory the pointer (non-transitively) points to
165            /// is never written to (except inside an `UnsafeCell`) using this pointer or any
166            /// pointer derived from it. If you need to mutate the contents of the string, use
167            /// [`as_mut_ptr`][Self::as_mut_ptr].
168            ///
169            /// Modifying the container referenced by this string may cause its buffer to be
170            /// reallocated, which would also make any pointers to it invalid.
171            #[inline]
172            #[must_use]
173            pub const fn as_ptr(&self) -> *const $uchar {
174                self.inner.as_ptr()
175            }
176
177            /// Returns an unsafe mutable raw pointer to the string.
178            ///
179            /// The caller must ensure that the string outlives the pointer this function returns,
180            /// or else it will end up pointing to garbage.
181            ///
182            /// Modifying the container referenced by this string may cause its buffer to be
183            /// reallocated, which would also make any pointers to it invalid.
184            #[inline]
185            #[must_use]
186            pub fn as_mut_ptr(&mut self) -> *mut $uchar {
187                self.inner.as_mut_ptr()
188            }
189
190            /// Returns the two raw pointers spanning the string slice.
191            ///
192            /// The returned range is half-open, which means that the end pointer points one past
193            /// the last element of the slice. This way, an empty slice is represented by two equal
194            /// pointers, and the difference between the two pointers represents the size of the
195            /// slice.
196            ///
197            /// See [`as_ptr`][Self::as_ptr] for warnings on using these pointers. The end pointer
198            /// requires extra caution, as it does not point to a valid element in the slice.
199            ///
200            /// This function is useful for interacting with foreign interfaces which use two
201            /// pointers to refer to a range of elements in memory, as is common in C++.
202            #[inline]
203            #[must_use]
204            pub fn as_ptr_range(&self) -> Range<*const $uchar> {
205                self.inner.as_ptr_range()
206            }
207
208            /// Returns the two unsafe mutable pointers spanning the string slice.
209            ///
210            /// The returned range is half-open, which means that the end pointer points one past
211            /// the last element of the slice. This way, an empty slice is represented by two equal
212            /// pointers, and the difference between the two pointers represents the size of the
213            /// slice.
214            ///
215            /// See [`as_mut_ptr`][Self::as_mut_ptr] for warnings on using these pointers. The end
216            /// pointer requires extra caution, as it does not point to a valid element in the
217            /// slice.
218            ///
219            /// This function is useful for interacting with foreign interfaces which use two
220            /// pointers to refer to a range of elements in memory, as is common in C++.
221            #[inline]
222            #[must_use]
223            pub fn as_mut_ptr_range(&mut self) -> Range<*mut $uchar> {
224                self.inner.as_mut_ptr_range()
225            }
226
227            /// Returns the length of the string as number of elements (**not** number of bytes).
228            #[inline]
229            #[must_use]
230            pub const fn len(&self) -> usize {
231                self.inner.len()
232            }
233
234            /// Returns whether this string contains no data.
235            #[inline]
236            #[must_use]
237            pub const fn is_empty(&self) -> bool {
238                self.inner.is_empty()
239            }
240
241            /// Converts a boxed wide string slice into an owned wide string without copying or
242            /// allocating.
243            #[cfg(feature = "alloc")]
244            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
245            #[must_use]
246            pub fn into_ustring(self: Box<Self>) -> $ustring {
247                let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) };
248                $ustring {
249                    inner: boxed.into_vec(),
250                }
251            }
252
253            $(#[$display_meta])*
254            #[inline]
255            #[must_use]
256            pub fn display(&self) -> Display<'_, $ustr> {
257                Display { str: self }
258            }
259
260            /// Returns a subslice of the string.
261            ///
262            /// This is the non-panicking alternative to indexing the string. Returns [`None`]
263            /// whenever equivalent indexing operation would panic.
264            #[inline]
265            #[must_use]
266            pub fn get<I>(&self, i: I) -> Option<&Self>
267            where
268                I: SliceIndex<[$uchar], Output = [$uchar]>,
269            {
270                self.inner.get(i).map(Self::from_slice)
271            }
272
273            /// Returns a mutable subslice of the string.
274            ///
275            /// This is the non-panicking alternative to indexing the string. Returns [`None`]
276            /// whenever equivalent indexing operation would panic.
277            #[inline]
278            #[must_use]
279            pub fn get_mut<I>(&mut self, i: I) -> Option<&mut Self>
280            where
281                I: SliceIndex<[$uchar], Output = [$uchar]>,
282            {
283                self.inner.get_mut(i).map(Self::from_slice_mut)
284            }
285
286            /// Returns an unchecked subslice of the string.
287            ///
288            /// This is the unchecked alternative to indexing the string.
289            ///
290            /// # Safety
291            ///
292            /// Callers of this function are responsible that these preconditions are satisfied:
293            ///
294            /// - The starting index must not exceed the ending index;
295            /// - Indexes must be within bounds of the original slice.
296            ///
297            /// Failing that, the returned string slice may reference invalid memory.
298            #[inline]
299            #[must_use]
300            pub unsafe fn get_unchecked<I>(&self, i: I) -> &Self
301            where
302                I: SliceIndex<[$uchar], Output = [$uchar]>,
303            {
304                Self::from_slice(self.inner.get_unchecked(i))
305            }
306
307            /// Returns aa mutable, unchecked subslice of the string.
308            ///
309            /// This is the unchecked alternative to indexing the string.
310            ///
311            /// # Safety
312            ///
313            /// Callers of this function are responsible that these preconditions are satisfied:
314            ///
315            /// - The starting index must not exceed the ending index;
316            /// - Indexes must be within bounds of the original slice.
317            ///
318            /// Failing that, the returned string slice may reference invalid memory.
319            #[inline]
320            #[must_use]
321            pub unsafe fn get_unchecked_mut<I>(&mut self, i: I) -> &mut Self
322            where
323                I: SliceIndex<[$uchar], Output = [$uchar]>,
324            {
325                Self::from_slice_mut(self.inner.get_unchecked_mut(i))
326            }
327
328            /// Divide one string slice into two at an index.
329            ///
330            /// The argument, `mid`, should be an offset from the start of the string.
331            ///
332            /// The two slices returned go from the start of the string slice to `mid`, and from
333            /// `mid` to the end of the string slice.
334            ///
335            /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut]
336            /// method.
337            #[inline]
338            #[must_use]
339            pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
340                let split = self.inner.split_at(mid);
341                (Self::from_slice(split.0), Self::from_slice(split.1))
342            }
343
344            /// Divide one mutable string slice into two at an index.
345            ///
346            /// The argument, `mid`, should be an offset from the start of the string.
347            ///
348            /// The two slices returned go from the start of the string slice to `mid`, and from
349            /// `mid` to the end of the string slice.
350            ///
351            /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
352            #[inline]
353            #[must_use]
354            pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
355                let split = self.inner.split_at_mut(mid);
356                (Self::from_slice_mut(split.0), Self::from_slice_mut(split.1))
357            }
358
359            /// Creates a new owned string by repeating this string `n` times.
360            ///
361            /// # Panics
362            ///
363            /// This function will panic if the capacity would overflow.
364            #[inline]
365            #[cfg(feature = "alloc")]
366            #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
367            #[must_use]
368            pub fn repeat(&self, n: usize) -> $ustring {
369                $ustring::from_vec(self.as_slice().repeat(n))
370            }
371        }
372
373        impl AsMut<$ustr> for $ustr {
374            #[inline]
375            fn as_mut(&mut self) -> &mut $ustr {
376                self
377            }
378        }
379
380        impl AsMut<[$uchar]> for $ustr {
381            #[inline]
382            fn as_mut(&mut self) -> &mut [$uchar] {
383                self.as_mut_slice()
384            }
385        }
386
387        impl AsRef<$ustr> for $ustr {
388            #[inline]
389            fn as_ref(&self) -> &Self {
390                self
391            }
392        }
393
394        impl AsRef<[$uchar]> for $ustr {
395            #[inline]
396            fn as_ref(&self) -> &[$uchar] {
397                self.as_slice()
398            }
399        }
400
401        impl Default for &$ustr {
402            #[inline]
403            fn default() -> Self {
404                $ustr::from_slice(&[])
405            }
406        }
407
408        impl Default for &mut $ustr {
409            #[inline]
410            fn default() -> Self {
411                $ustr::from_slice_mut(&mut [])
412            }
413        }
414
415        impl<'a> From<&'a [$uchar]> for &'a $ustr {
416            #[inline]
417            fn from(value: &'a [$uchar]) -> Self {
418                $ustr::from_slice(value)
419            }
420        }
421
422        impl<'a> From<&'a mut [$uchar]> for &'a $ustr {
423            #[inline]
424            fn from(value: &'a mut [$uchar]) -> Self {
425                $ustr::from_slice(value)
426            }
427        }
428
429        impl<'a> From<&'a mut [$uchar]> for &'a mut $ustr {
430            #[inline]
431            fn from(value: &'a mut [$uchar]) -> Self {
432                $ustr::from_slice_mut(value)
433            }
434        }
435
436        impl<'a> From<&'a $ustr> for &'a [$uchar] {
437            #[inline]
438            fn from(value: &'a $ustr) -> Self {
439                value.as_slice()
440            }
441        }
442
443        impl<'a> From<&'a mut $ustr> for &'a mut [$uchar] {
444            #[inline]
445            fn from(value: &'a mut $ustr) -> Self {
446                value.as_mut_slice()
447            }
448        }
449
450        #[cfg(feature = "std")]
451        impl From<&$ustr> for std::ffi::OsString {
452            #[inline]
453            fn from(s: &$ustr) -> std::ffi::OsString {
454                s.to_os_string()
455            }
456        }
457
458        impl<I> Index<I> for $ustr
459        where
460            I: SliceIndex<[$uchar], Output = [$uchar]>,
461        {
462            type Output = Self;
463
464            #[inline]
465            fn index(&self, index: I) -> &Self::Output {
466                Self::from_slice(&self.inner[index])
467            }
468        }
469
470        impl<I> IndexMut<I> for $ustr
471        where
472            I: SliceIndex<[$uchar], Output = [$uchar]>,
473        {
474            #[inline]
475            fn index_mut(&mut self, index: I) -> &mut Self::Output {
476                Self::from_slice_mut(&mut self.inner[index])
477            }
478        }
479
480        impl PartialEq<$ustr> for &$ustr {
481            #[inline]
482            fn eq(&self, other: &$ustr) -> bool {
483                self.as_slice() == other.as_slice()
484            }
485        }
486
487        impl PartialEq<&$ustr> for $ustr {
488            #[inline]
489            fn eq(&self, other: &&$ustr) -> bool {
490                self.as_slice() == other.as_slice()
491            }
492        }
493
494        impl PartialEq<crate::$ucstr> for $ustr {
495            #[inline]
496            fn eq(&self, other: &crate::$ucstr) -> bool {
497                self.as_slice() == other.as_slice()
498            }
499        }
500
501        impl PartialEq<crate::$ucstr> for &$ustr {
502            #[inline]
503            fn eq(&self, other: &crate::$ucstr) -> bool {
504                self.as_slice() == other.as_slice()
505            }
506        }
507
508        impl PartialEq<&crate::$ucstr> for $ustr {
509            #[inline]
510            fn eq(&self, other: &&crate::$ucstr) -> bool {
511                self.as_slice() == other.as_slice()
512            }
513        }
514
515        impl PartialOrd<crate::$ucstr> for $ustr {
516            #[inline]
517            fn partial_cmp(&self, other: &crate::$ucstr) -> Option<core::cmp::Ordering> {
518                self.partial_cmp(other.as_ustr())
519            }
520        }
521    };
522}
523
524ustr_common_impl! {
525    /// 16-bit wide string slice with undefined encoding.
526    ///
527    /// [`U16Str`] is to [`U16String`][crate::U16String] as [`OsStr`][std::ffi::OsStr] is to
528    /// [`OsString`][std::ffi::OsString].
529    ///
530    /// [`U16Str`] are string slices that do not have a defined encoding. While it is sometimes
531    /// assumed that they contain possibly invalid or ill-formed UTF-16 data, they may be used for
532    /// any wide encoded string. This is because [`U16Str`] is intended to be used with FFI
533    /// functions, where proper encoding cannot be guaranteed. If you need string slices that are
534    /// always valid UTF-16 strings, use [`Utf16Str`][crate::Utf16Str] instead.
535    ///
536    /// Because [`U16Str`] does not have a defined encoding, no restrictions are placed on mutating
537    /// or indexing the slice. This means that even if the string contained properly encoded UTF-16
538    /// or other encoding data, mutationing or indexing may result in malformed data. Convert to a
539    /// [`Utf16Str`][crate::Utf16Str] if retaining proper UTF-16 encoding is desired.
540    ///
541    /// # FFI considerations
542    ///
543    /// [`U16Str`] is not aware of nul values and may or may not be nul-terminated. It is intended
544    /// to be used with FFI functions that directly use string length, where the strings are known
545    /// to have proper nul-termination already, or where strings are merely being passed through
546    /// without modification.
547    ///
548    /// [`U16CStr`][crate::U16CStr] should be used instead if nul-aware strings are required.
549    ///
550    /// # Examples
551    ///
552    /// The easiest way to use [`U16Str`] outside of FFI is with the [`u16str!`][crate::u16str]
553    /// macro to convert string literals into UTF-16 string slices at compile time:
554    ///
555    /// ```
556    /// use widestring::u16str;
557    /// let hello = u16str!("Hello, world!");
558    /// ```
559    ///
560    /// You can also convert any [`u16`] slice directly:
561    ///
562    /// ```
563    /// use widestring::{u16str, U16Str};
564    ///
565    /// let sparkle_heart = [0xd83d, 0xdc96];
566    /// let sparkle_heart = U16Str::from_slice(&sparkle_heart);
567    ///
568    /// assert_eq!(u16str!("💖"), sparkle_heart);
569    ///
570    /// // This unpaired UTf-16 surrogate is invalid UTF-16, but is perfectly valid in U16Str
571    /// let malformed_utf16 = [0x0, 0xd83d]; // Note that nul values are also valid an untouched
572    /// let s = U16Str::from_slice(&malformed_utf16);
573    ///
574    /// assert_eq!(s.len(), 2);
575    /// ```
576    ///
577    /// When working with a FFI, it is useful to create a [`U16Str`] from a pointer and a length:
578    ///
579    /// ```
580    /// use widestring::{u16str, U16Str};
581    ///
582    /// let sparkle_heart = [0xd83d, 0xdc96];
583    /// let sparkle_heart = unsafe {
584    ///     U16Str::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len())
585    /// };
586    /// assert_eq!(u16str!("💖"), sparkle_heart);
587    /// ```
588    struct U16Str([u16]);
589
590    type UString = U16String;
591    type UCStr = U16CStr;
592
593    /// Returns an object that implements [`Display`][std::fmt::Display] for printing
594    /// strings that may contain non-Unicode data.
595    ///
596    /// This method assumes this string is intended to be UTF-16 encoding, but handles
597    /// ill-formed UTF-16 sequences lossily. The returned struct implements
598    /// the [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy
599    /// UTF-16 decoding but no heap allocations are performed, such as by
600    /// [`to_string_lossy`][Self::to_string_lossy].
601    ///
602    /// By default, invalid Unicode data is replaced with
603    /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish
604    /// to simply skip any invalid Uncode data and forego the replacement, you may use the
605    /// [alternate formatting][std::fmt#sign0] with `{:#}`.
606    ///
607    /// # Examples
608    ///
609    /// Basic usage:
610    ///
611    /// ```
612    /// use widestring::U16Str;
613    ///
614    /// // 𝄞mus<invalid>ic<invalid>
615    /// let s = U16Str::from_slice(&[
616    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
617    /// ]);
618    ///
619    /// assert_eq!(format!("{}", s.display()),
620    /// "𝄞mus�ic�"
621    /// );
622    /// ```
623    ///
624    /// Using alternate formatting style to skip invalid values entirely:
625    ///
626    /// ```
627    /// use widestring::U16Str;
628    ///
629    /// // 𝄞mus<invalid>ic<invalid>
630    /// let s = U16Str::from_slice(&[
631    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
632    /// ]);
633    ///
634    /// assert_eq!(format!("{:#}", s.display()),
635    /// "𝄞music"
636    /// );
637    /// ```
638    fn display() -> {}
639}
640
641ustr_common_impl! {
642    /// 32-bit wide string slice with undefined encoding.
643    ///
644    /// [`U32Str`] is to [`U32String`][crate::U32String] as [`OsStr`][std::ffi::OsStr] is to
645    /// [`OsString`][std::ffi::OsString].
646    ///
647    /// [`U32Str`] are string slices that do not have a defined encoding. While it is sometimes
648    /// assumed that they contain possibly invalid or ill-formed UTF-32 data, they may be used for
649    /// any wide encoded string. This is because [`U32Str`] is intended to be used with FFI
650    /// functions, where proper encoding cannot be guaranteed. If you need string slices that are
651    /// always valid UTF-32 strings, use [`Utf32Str`][crate::Utf32Str] instead.
652    ///
653    /// Because [`U32Str`] does not have a defined encoding, no restrictions are placed on mutating
654    /// or indexing the slice. This means that even if the string contained properly encoded UTF-32
655    /// or other encoding data, mutationing or indexing may result in malformed data. Convert to a
656    /// [`Utf32Str`][crate::Utf32Str] if retaining proper UTF-32 encoding is desired.
657    ///
658    /// # FFI considerations
659    ///
660    /// [`U32Str`] is not aware of nul values and may or may not be nul-terminated. It is intended
661    /// to be used with FFI functions that directly use string length, where the strings are known
662    /// to have proper nul-termination already, or where strings are merely being passed through
663    /// without modification.
664    ///
665    /// [`U32CStr`][crate::U32CStr] should be used instead if nul-aware strings are required.
666    ///
667    /// # Examples
668    ///
669    /// The easiest way to use [`U32Str`] outside of FFI is with the [`u32str!`][crate::u32str]
670    /// macro to convert string literals into UTF-32 string slices at compile time:
671    ///
672    /// ```
673    /// use widestring::u32str;
674    /// let hello = u32str!("Hello, world!");
675    /// ```
676    ///
677    /// You can also convert any [`u32`] slice directly:
678    ///
679    /// ```
680    /// use widestring::{u32str, U32Str};
681    ///
682    /// let sparkle_heart = [0x1f496];
683    /// let sparkle_heart = U32Str::from_slice(&sparkle_heart);
684    ///
685    /// assert_eq!(u32str!("💖"), sparkle_heart);
686    ///
687    /// // This UTf-16 surrogate is invalid UTF-32, but is perfectly valid in U32Str
688    /// let malformed_utf32 = [0x0, 0xd83d]; // Note that nul values are also valid an untouched
689    /// let s = U32Str::from_slice(&malformed_utf32);
690    ///
691    /// assert_eq!(s.len(), 2);
692    /// ```
693    ///
694    /// When working with a FFI, it is useful to create a [`U32Str`] from a pointer and a length:
695    ///
696    /// ```
697    /// use widestring::{u32str, U32Str};
698    ///
699    /// let sparkle_heart = [0x1f496];
700    /// let sparkle_heart = unsafe {
701    ///     U32Str::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len())
702    /// };
703    /// assert_eq!(u32str!("💖"), sparkle_heart);
704    /// ```
705    struct U32Str([u32]);
706
707    type UString = U32String;
708    type UCStr = U32CStr;
709
710    /// Returns an object that implements [`Display`][std::fmt::Display] for printing
711    /// strings that may contain non-Unicode data.
712    ///
713    /// This method assumes this string is intended to be UTF-32 encoding, but handles
714    /// ill-formed UTF-32 sequences lossily. The returned struct implements
715    /// the [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy
716    /// UTF-32 decoding but no heap allocations are performed, such as by
717    /// [`to_string_lossy`][Self::to_string_lossy].
718    ///
719    /// By default, invalid Unicode data is replaced with
720    /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish
721    /// to simply skip any invalid Uncode data and forego the replacement, you may use the
722    /// [alternate formatting][std::fmt#sign0] with `{:#}`.
723    ///
724    /// # Examples
725    ///
726    /// Basic usage:
727    ///
728    /// ```
729    /// use widestring::U32Str;
730    ///
731    /// // 𝄞mus<invalid>ic<invalid>
732    /// let s = U32Str::from_slice(&[
733    ///     0x1d11e, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
734    /// ]);
735    ///
736    /// assert_eq!(format!("{}", s.display()),
737    /// "𝄞mus�ic�"
738    /// );
739    /// ```
740    ///
741    /// Using alternate formatting style to skip invalid values entirely:
742    ///
743    /// ```
744    /// use widestring::U32Str;
745    ///
746    /// // 𝄞mus<invalid>ic<invalid>
747    /// let s = U32Str::from_slice(&[
748    ///     0x1d11e, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
749    /// ]);
750    ///
751    /// assert_eq!(format!("{:#}", s.display()),
752    /// "𝄞music"
753    /// );
754    /// ```
755    fn display() -> {}
756}
757
758impl U16Str {
759    /// Decodes a string reference to an owned [`OsString`][std::ffi::OsString].
760    ///
761    /// This makes a string copy of the [`U16Str`]. Since [`U16Str`] makes no guarantees that its
762    /// encoding is UTF-16 or that the data valid UTF-16, there is no guarantee that the resulting
763    /// [`OsString`][std::ffi::OsString] will have a valid underlying encoding either.
764    ///
765    /// Note that the encoding of [`OsString`][std::ffi::OsString] is platform-dependent, so on
766    /// some platforms this may make an encoding conversions, while on other platforms (such as
767    /// windows) no changes to the string will be made.
768    ///
769    /// # Examples
770    ///
771    /// ```rust
772    /// use widestring::U16String;
773    /// use std::ffi::OsString;
774    /// let s = "MyString";
775    /// // Create a wide string from the string
776    /// let wstr = U16String::from_str(s);
777    /// // Create an OsString from the wide string
778    /// let osstr = wstr.to_os_string();
779    ///
780    /// assert_eq!(osstr, OsString::from(s));
781    /// ```
782    #[cfg(feature = "std")]
783    #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
784    #[inline]
785    #[must_use]
786    pub fn to_os_string(&self) -> std::ffi::OsString {
787        crate::platform::os_from_wide(&self.inner)
788    }
789
790    /// Decodes this string to a [`String`] if it contains valid UTF-16 data.
791    ///
792    /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such.
793    ///
794    /// # Failures
795    ///
796    /// Returns an error if the string contains any invalid UTF-16 data.
797    ///
798    /// # Examples
799    ///
800    /// ```rust
801    /// use widestring::U16String;
802    /// let s = "MyString";
803    /// // Create a wide string from the string
804    /// let wstr = U16String::from_str(s);
805    /// // Create a regular string from the wide string
806    /// let s2 = wstr.to_string().unwrap();
807    ///
808    /// assert_eq!(s2, s);
809    /// ```
810    #[cfg(feature = "alloc")]
811    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
812    #[inline]
813    pub fn to_string(&self) -> Result<String, Utf16Error> {
814        // Perform conversion ourselves to use our own error types with additional info
815        let mut s = String::with_capacity(self.len());
816        for (index, result) in self.chars().enumerate() {
817            let c = result.map_err(|e| Utf16Error::empty(index, e))?;
818            s.push(c);
819        }
820        Ok(s)
821    }
822
823    /// Decodes the string to a [`String`] even if it is invalid UTF-16 data.
824    ///
825    /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. Any
826    /// invalid sequences are replaced with
827    /// [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which looks like this:
828    /// �
829    ///
830    /// # Examples
831    ///
832    /// ```rust
833    /// use widestring::U16String;
834    /// let s = "MyString";
835    /// // Create a wide string from the string
836    /// let wstr = U16String::from_str(s);
837    /// // Create a regular string from the wide string
838    /// let lossy = wstr.to_string_lossy();
839    ///
840    /// assert_eq!(lossy, s);
841    /// ```
842    #[cfg(feature = "alloc")]
843    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
844    #[inline]
845    #[must_use]
846    pub fn to_string_lossy(&self) -> String {
847        String::from_utf16_lossy(&self.inner)
848    }
849
850    /// Returns an iterator over the [`char`][prim@char]s of a string slice.
851    ///
852    /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
853    /// may consist of invalid UTF-16, the iterator returned by this method
854    /// is an iterator over `Result<char, DecodeUtf16Error>` instead of [`char`][prim@char]s
855    /// directly. If you would like a lossy iterator over [`chars`][prim@char]s directly, instead
856    /// use [`chars_lossy`][Self::chars_lossy].
857    ///
858    /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
859    /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
860    /// what you actually want. That functionality is not provided by by this crate.
861    #[inline]
862    #[must_use]
863    pub fn chars(&self) -> CharsUtf16<'_> {
864        CharsUtf16::new(self.as_slice())
865    }
866
867    /// Returns a lossy iterator over the [`char`][prim@char]s of a string slice.
868    ///
869    /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
870    /// may consist of invalid UTF-16, the iterator returned by this method will replace unpaired
871    /// surrogates with
872    /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). This is a lossy
873    /// version of [`chars`][Self::chars].
874    ///
875    /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
876    /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
877    /// what you actually want. That functionality is not provided by by this crate.
878    #[inline]
879    #[must_use]
880    pub fn chars_lossy(&self) -> CharsLossyUtf16<'_> {
881        CharsLossyUtf16::new(self.as_slice())
882    }
883
884    /// Returns an iterator over the chars of a string slice, and their positions.
885    ///
886    /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
887    /// may consist of invalid UTF-16, the iterator returned by this method is an iterator over
888    /// `Result<char, DecodeUtf16Error>` as well as their positions, instead of
889    /// [`char`][prim@char]s directly. If you would like a lossy indices iterator over
890    /// [`chars`][prim@char]s directly, instead use
891    /// [`char_indices_lossy`][Self::char_indices_lossy].
892    ///
893    /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
894    #[inline]
895    #[must_use]
896    pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
897        CharIndicesUtf16::new(self.as_slice())
898    }
899
900    /// Returns a lossy iterator over the chars of a string slice, and their positions.
901    ///
902    /// As this string slice may consist of invalid UTF-16, the iterator returned by this method
903    /// will replace unpaired surrogates with
904    /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�), as well as the
905    /// positions of all characters. This is a lossy version of
906    /// [`char_indices`][Self::char_indices].
907    ///
908    /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
909    #[inline]
910    #[must_use]
911    pub fn char_indices_lossy(&self) -> CharIndicesLossyUtf16<'_> {
912        CharIndicesLossyUtf16::new(self.as_slice())
913    }
914
915    /// Returns an iterator over the lines of a [`U16Str`], as string slices.
916    ///
917    /// Lines are split at line endings that are either newlines (`\n`) or
918    /// sequences of a carriage return followed by a line feed (`\r\n`).
919    ///
920    /// Line terminators are not included in the lines returned by the iterator.
921    ///
922    /// Note that any carriage return (`\r`) not immediately followed by a
923    /// line feed (`\n`) does not split a line. These carriage returns are
924    /// thereby included in the produced lines.
925    ///
926    /// The final line ending is optional. A string that ends with a final line
927    /// ending will return the same lines as an otherwise identical string
928    /// without a final line ending.
929    ///
930    /// # Examples
931    ///
932    /// Basic usage:
933    ///
934    /// ```
935    /// use widestring::{u16str};
936    ///
937    /// let text = u16str!("foo\r\nbar\n\nbaz\r");
938    /// let mut lines = text.lines_lossy();
939    ///
940    /// assert_eq!(Some(u16str!("foo")), lines.next());
941    /// assert_eq!(Some(u16str!("bar")), lines.next());
942    /// assert_eq!(Some(u16str!("")), lines.next());
943    /// // Trailing carriage return is included in the last line
944    /// assert_eq!(Some(u16str!("baz\r")), lines.next());
945    ///
946    /// assert_eq!(None, lines.next());
947    /// ```
948    ///
949    /// The final line does not require any ending:
950    ///
951    /// ```
952    /// use widestring::{u16str};
953    ///
954    /// let text = u16str!("foo\nbar\n\r\nbaz");
955    /// let mut lines = text.lines_lossy();
956    ///
957    /// assert_eq!(Some(u16str!("foo")), lines.next());
958    /// assert_eq!(Some(u16str!("bar")), lines.next());
959    /// assert_eq!(Some(u16str!("")), lines.next());
960    /// assert_eq!(Some(u16str!("baz")), lines.next());
961    ///
962    /// assert_eq!(None, lines.next());
963    /// ```
964    pub fn lines_lossy(&self) -> Lines<'_, Self, CharIndicesLossyUtf16<'_>> {
965        Lines::new(self, self.len(), self.char_indices_lossy())
966    }
967}
968
969impl U32Str {
970    /// Constructs a [`U32Str`] from a [`char`][prim@char] pointer and a length.
971    ///
972    /// The `len` argument is the number of `char` elements, **not** the number of bytes. No copying
973    /// or allocation is performed, the resulting value is a direct reference to the pointer bytes.
974    ///
975    /// # Safety
976    ///
977    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
978    /// elements.
979    ///
980    /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
981    /// In particular, the returned string reference *must not be mutated* for the duration of
982    /// lifetime `'a`, except inside an [`UnsafeCell`][std::cell::UnsafeCell].
983    ///
984    /// # Panics
985    ///
986    /// This function panics if `p` is null.
987    ///
988    /// # Caveat
989    ///
990    /// The lifetime for the returned string is inferred from its usage. To prevent accidental
991    /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
992    /// context, such as by providing a helper function taking the lifetime of a host value for the
993    /// string, or by explicit annotation.
994    #[inline]
995    #[must_use]
996    pub unsafe fn from_char_ptr<'a>(p: *const char, len: usize) -> &'a Self {
997        Self::from_ptr(p as *const u32, len)
998    }
999
1000    /// Constructs a mutable [`U32Str`] from a mutable [`char`][prim@char] pointer and a length.
1001    ///
1002    /// The `len` argument is the number of `char` elements, **not** the number of bytes. No copying
1003    /// or allocation is performed, the resulting value is a direct reference to the pointer bytes.
1004    ///
1005    /// # Safety
1006    ///
1007    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
1008    /// elements.
1009    ///
1010    /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts_mut].
1011    ///
1012    /// # Panics
1013    ///
1014    /// This function panics if `p` is null.
1015    ///
1016    /// # Caveat
1017    ///
1018    /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1019    /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1020    /// context, such as by providing a helper function taking the lifetime of a host value for the
1021    /// string, or by explicit annotation.
1022    #[inline]
1023    #[must_use]
1024    pub unsafe fn from_char_ptr_mut<'a>(p: *mut char, len: usize) -> &'a mut Self {
1025        Self::from_ptr_mut(p as *mut u32, len)
1026    }
1027
1028    /// Constructs a [`U32Str`] from a [`char`][prim@char] slice.
1029    ///
1030    /// No checks are performed on the slice.
1031    #[inline]
1032    #[must_use]
1033    pub fn from_char_slice(slice: &[char]) -> &Self {
1034        let ptr: *const [char] = slice;
1035        unsafe { &*(ptr as *const Self) }
1036    }
1037
1038    /// Constructs a mutable [`U32Str`] from a mutable [`char`][prim@char] slice.
1039    ///
1040    /// No checks are performed on the slice.
1041    #[inline]
1042    #[must_use]
1043    pub fn from_char_slice_mut(slice: &mut [char]) -> &mut Self {
1044        let ptr: *mut [char] = slice;
1045        unsafe { &mut *(ptr as *mut Self) }
1046    }
1047
1048    /// Decodes a string to an owned [`OsString`][std::ffi::OsString].
1049    ///
1050    /// This makes a string copy of the [`U16Str`]. Since [`U16Str`] makes no guarantees that its
1051    /// encoding is UTF-16 or that the data valid UTF-16, there is no guarantee that the resulting
1052    /// [`OsString`][std::ffi::OsString] will have a valid underlying encoding either.
1053    ///
1054    /// Note that the encoding of [`OsString`][std::ffi::OsString] is platform-dependent, so on
1055    /// some platforms this may make an encoding conversions, while on other platforms no changes to
1056    /// the string will be made.
1057    ///
1058    /// # Examples
1059    ///
1060    /// ```rust
1061    /// use widestring::U32String;
1062    /// use std::ffi::OsString;
1063    /// let s = "MyString";
1064    /// // Create a wide string from the string
1065    /// let wstr = U32String::from_str(s);
1066    /// // Create an OsString from the wide string
1067    /// let osstr = wstr.to_os_string();
1068    ///
1069    /// assert_eq!(osstr, OsString::from(s));
1070    /// ```
1071    #[cfg(feature = "std")]
1072    #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1073    #[inline]
1074    #[must_use]
1075    pub fn to_os_string(&self) -> std::ffi::OsString {
1076        self.to_string_lossy().into()
1077    }
1078
1079    /// Decodes the string to a [`String`] if it contains valid UTF-32 data.
1080    ///
1081    /// This method assumes this string is encoded as UTF-32 and attempts to decode it as such.
1082    ///
1083    /// # Failures
1084    ///
1085    /// Returns an error if the string contains any invalid UTF-32 data.
1086    ///
1087    /// # Examples
1088    ///
1089    /// ```rust
1090    /// use widestring::U32String;
1091    /// let s = "MyString";
1092    /// // Create a wide string from the string
1093    /// let wstr = U32String::from_str(s);
1094    /// // Create a regular string from the wide string
1095    /// let s2 = wstr.to_string().unwrap();
1096    ///
1097    /// assert_eq!(s2, s);
1098    /// ```
1099    #[cfg(feature = "alloc")]
1100    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1101    pub fn to_string(&self) -> Result<String, Utf32Error> {
1102        let mut s = String::with_capacity(self.len());
1103        for (index, result) in self.chars().enumerate() {
1104            let c = result.map_err(|e| Utf32Error::empty(index, e))?;
1105            s.push(c);
1106        }
1107        Ok(s)
1108    }
1109
1110    /// Decodes the string reference to a [`String`] even if it is invalid UTF-32 data.
1111    ///
1112    /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. Any
1113    /// invalid sequences are replaced with
1114    /// [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which looks like this:
1115    /// �
1116    ///
1117    /// # Examples
1118    ///
1119    /// ```rust
1120    /// use widestring::U32String;
1121    /// let s = "MyString";
1122    /// // Create a wide string from the string
1123    /// let wstr = U32String::from_str(s);
1124    /// // Create a regular string from the wide string
1125    /// let lossy = wstr.to_string_lossy();
1126    ///
1127    /// assert_eq!(lossy, s);
1128    /// ```
1129    #[cfg(feature = "alloc")]
1130    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1131    #[must_use]
1132    pub fn to_string_lossy(&self) -> String {
1133        let chars: Vec<char> = self
1134            .inner
1135            .iter()
1136            .map(|&c| char::from_u32(c).unwrap_or(char::REPLACEMENT_CHARACTER))
1137            .collect();
1138        let size = chars.iter().map(|c| c.len_utf8()).sum();
1139        let mut vec = alloc::vec![0; size];
1140        let mut i = 0;
1141        for c in chars {
1142            c.encode_utf8(&mut vec[i..]);
1143            i += c.len_utf8();
1144        }
1145        unsafe { String::from_utf8_unchecked(vec) }
1146    }
1147
1148    /// Returns an iterator over the [`char`][prim@char]s of a string slice.
1149    ///
1150    /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1151    /// may consist of invalid UTF-32, the iterator returned by this method
1152    /// is an iterator over `Result<char, DecodeUtf32Error>` instead of [`char`][prim@char]s
1153    /// directly. If you would like a lossy iterator over [`chars`][prim@char]s directly, instead
1154    /// use [`chars_lossy`][Self::chars_lossy].
1155    ///
1156    /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1157    /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1158    /// what you actually want. That functionality is not provided by by this crate.
1159    #[inline]
1160    #[must_use]
1161    pub fn chars(&self) -> CharsUtf32<'_> {
1162        CharsUtf32::new(self.as_slice())
1163    }
1164
1165    /// Returns a lossy iterator over the [`char`][prim@char]s of a string slice.
1166    ///
1167    /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1168    /// may consist of invalid UTF-32, the iterator returned by this method will replace unpaired
1169    /// surrogates with
1170    /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). This is a lossy
1171    /// version of [`chars`][Self::chars].
1172    ///
1173    /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1174    /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1175    /// what you actually want. That functionality is not provided by by this crate.
1176    #[inline]
1177    #[must_use]
1178    pub fn chars_lossy(&self) -> CharsLossyUtf32<'_> {
1179        CharsLossyUtf32::new(self.as_slice())
1180    }
1181
1182    /// Returns an iterator over the chars of a string slice, and their positions.
1183    ///
1184    /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1185    /// may consist of invalid UTF-32, the iterator returned by this method is an iterator over
1186    /// `Result<char, DecodeUtf32Error>` as well as their positions, instead of
1187    /// [`char`][prim@char]s directly. If you would like a lossy indices iterator over
1188    /// [`chars`][prim@char]s directly, instead use
1189    /// [`char_indices_lossy`][Self::char_indices_lossy].
1190    ///
1191    /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1192    #[inline]
1193    #[must_use]
1194    pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
1195        CharIndicesUtf32::new(self.as_slice())
1196    }
1197
1198    /// Returns a lossy iterator over the chars of a string slice, and their positions.
1199    ///
1200    /// As this string slice may consist of invalid UTF-32, the iterator returned by this method
1201    /// will replace invalid values with
1202    /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�), as well as the
1203    /// positions of all characters. This is a lossy version of
1204    /// [`char_indices`][Self::char_indices].
1205    ///
1206    /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1207    #[inline]
1208    #[must_use]
1209    pub fn char_indices_lossy(&self) -> CharIndicesLossyUtf32<'_> {
1210        CharIndicesLossyUtf32::new(self.as_slice())
1211    }
1212
1213    /// Returns an iterator over the lines of a [`U32Str`], as string slices.
1214    ///
1215    /// Lines are split at line endings that are either newlines (`\n`) or
1216    /// sequences of a carriage return followed by a line feed (`\r\n`).
1217    ///
1218    /// Line terminators are not included in the lines returned by the iterator.
1219    ///
1220    /// Note that any carriage return (`\r`) not immediately followed by a
1221    /// line feed (`\n`) does not split a line. These carriage returns are
1222    /// thereby included in the produced lines.
1223    ///
1224    /// The final line ending is optional. A string that ends with a final line
1225    /// ending will return the same lines as an otherwise identical string
1226    /// without a final line ending.
1227    ///
1228    /// # Examples
1229    ///
1230    /// Basic usage:
1231    ///
1232    /// ```
1233    /// use widestring::{u32str};
1234    ///
1235    /// let text = u32str!("foo\r\nbar\n\nbaz\r");
1236    /// let mut lines = text.lines_lossy();
1237    ///
1238    /// assert_eq!(Some(u32str!("foo")), lines.next());
1239    /// assert_eq!(Some(u32str!("bar")), lines.next());
1240    /// assert_eq!(Some(u32str!("")), lines.next());
1241    /// // Trailing carriage return is included in the last line
1242    /// assert_eq!(Some(u32str!("baz\r")), lines.next());
1243    ///
1244    /// assert_eq!(None, lines.next());
1245    /// ```
1246    ///
1247    /// The final line does not require any ending:
1248    ///
1249    /// ```
1250    /// use widestring::{u32str};
1251    ///
1252    /// let text = u32str!("foo\nbar\n\r\nbaz");
1253    /// let mut lines = text.lines_lossy();
1254    ///
1255    /// assert_eq!(Some(u32str!("foo")), lines.next());
1256    /// assert_eq!(Some(u32str!("bar")), lines.next());
1257    /// assert_eq!(Some(u32str!("")), lines.next());
1258    /// assert_eq!(Some(u32str!("baz")), lines.next());
1259    ///
1260    /// assert_eq!(None, lines.next());
1261    /// ```
1262    pub fn lines_lossy(&self) -> Lines<'_, Self, CharIndicesLossyUtf32<'_>> {
1263        Lines::new(self, self.len(), self.char_indices_lossy())
1264    }
1265}
1266
1267impl core::fmt::Debug for U16Str {
1268    #[inline]
1269    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1270        crate::debug_fmt_u16(self.as_slice(), f)
1271    }
1272}
1273
1274impl core::fmt::Debug for U32Str {
1275    #[inline]
1276    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1277        crate::debug_fmt_u32(self.as_slice(), f)
1278    }
1279}
1280
1281impl<'a> From<&'a [char]> for &'a U32Str {
1282    #[inline]
1283    fn from(value: &'a [char]) -> Self {
1284        U32Str::from_char_slice(value)
1285    }
1286}
1287
1288impl<'a> From<&'a mut [char]> for &'a mut U32Str {
1289    #[inline]
1290    fn from(value: &'a mut [char]) -> Self {
1291        U32Str::from_char_slice_mut(value)
1292    }
1293}
1294
1295/// Alias for [`U16Str`] or [`U32Str`] depending on platform. Intended to match typical C `wchar_t`
1296/// size on platform.
1297#[cfg(not(windows))]
1298pub type WideStr = U32Str;
1299
1300/// Alias for [`U16Str`] or [`U32Str`] depending on platform. Intended to match typical C `wchar_t`
1301/// size on platform.
1302#[cfg(windows)]
1303pub type WideStr = U16Str;
1304
1305/// Helper struct for printing wide string values with [`format!`] and `{}`.
1306///
1307/// A wide string might contain ill-formed UTF encoding. This struct implements the
1308/// [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy but no heap
1309/// allocations are performed, such as by [`to_string_lossy`][U16Str::to_string_lossy]. It is
1310/// created by the [`display`][U16Str::display] method on [`U16Str`] and [`U32Str`].
1311///
1312/// By default, invalid Unicode data is replaced with
1313/// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish to simply
1314/// skip any invalid Uncode data and forego the replacement, you may use the
1315/// [alternate formatting][std::fmt#sign0] with `{:#}`.
1316pub struct Display<'a, S: ?Sized> {
1317    str: &'a S,
1318}
1319
1320impl core::fmt::Debug for Display<'_, U16Str> {
1321    #[inline]
1322    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1323        core::fmt::Debug::fmt(&self.str, f)
1324    }
1325}
1326
1327impl core::fmt::Debug for Display<'_, U32Str> {
1328    #[inline]
1329    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1330        core::fmt::Debug::fmt(&self.str, f)
1331    }
1332}
1333
1334impl core::fmt::Display for Display<'_, U16Str> {
1335    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1336        for c in crate::decode_utf16_lossy(self.str.as_slice().iter().copied()) {
1337            // Allow alternate {:#} format which skips replacment chars entirely
1338            if c != core::char::REPLACEMENT_CHARACTER || !f.alternate() {
1339                f.write_char(c)?;
1340            }
1341        }
1342        Ok(())
1343    }
1344}
1345
1346impl core::fmt::Display for Display<'_, U32Str> {
1347    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1348        for c in crate::decode_utf32_lossy(self.str.as_slice().iter().copied()) {
1349            // Allow alternate {:#} format which skips replacment chars entirely
1350            if c != core::char::REPLACEMENT_CHARACTER || !f.alternate() {
1351                f.write_char(c)?;
1352            }
1353        }
1354        Ok(())
1355    }
1356}