widestring/ustr.rs
1//! Wide string slices with undefined encoding.
2//!
3//! This module contains wide string slices and related types.
4
5use crate::utfstr::Lines;
6#[cfg(feature = "alloc")]
7use crate::{
8 error::{Utf16Error, Utf32Error},
9 U16String, U32String,
10};
11#[cfg(feature = "alloc")]
12#[allow(unused_imports)]
13use alloc::{boxed::Box, string::String, vec::Vec};
14use core::{
15 char,
16 fmt::Write,
17 ops::{Index, IndexMut, Range},
18 slice::{self, SliceIndex},
19};
20
21mod iter;
22
23pub use iter::*;
24
25macro_rules! ustr_common_impl {
26 {
27 $(#[$ustr_meta:meta])*
28 struct $ustr:ident([$uchar:ty]);
29 type UString = $ustring:ident;
30 type UCStr = $ucstr:ident;
31 $(#[$display_meta:meta])*
32 fn display() -> {}
33 } => {
34 $(#[$ustr_meta])*
35 #[allow(clippy::derive_hash_xor_eq)]
36 #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
37 pub struct $ustr {
38 pub(crate) inner: [$uchar],
39 }
40
41 impl $ustr {
42 /// Coerces a value into a wide string slice.
43 #[inline]
44 #[must_use]
45 pub fn new<S: AsRef<Self> + ?Sized>(s: &S) -> &Self {
46 s.as_ref()
47 }
48
49 /// Constructs a wide string slice from a pointer and a length.
50 ///
51 /// The `len` argument is the number of elements, **not** the number of bytes. No
52 /// copying or allocation is performed, the resulting value is a direct reference to the
53 /// pointer bytes.
54 ///
55 /// # Safety
56 ///
57 /// This function is unsafe as there is no guarantee that the given pointer is valid for
58 /// `len` elements.
59 ///
60 /// In addition, the data must meet the safety conditions of
61 /// [std::slice::from_raw_parts]. In particular, the returned string reference *must not
62 /// be mutated* for the duration of lifetime `'a`, except inside an
63 /// [`UnsafeCell`][std::cell::UnsafeCell].
64 ///
65 /// # Panics
66 ///
67 /// This function panics if `p` is null.
68 ///
69 /// # Caveat
70 ///
71 /// The lifetime for the returned string is inferred from its usage. To prevent
72 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
73 /// is safe in the context, such as by providing a helper function taking the lifetime
74 /// of a host value for the string, or by explicit annotation.
75 #[inline]
76 #[must_use]
77 pub unsafe fn from_ptr<'a>(p: *const $uchar, len: usize) -> &'a Self {
78 assert!(!p.is_null());
79 let slice: *const [$uchar] = slice::from_raw_parts(p, len);
80 &*(slice as *const $ustr)
81 }
82
83 /// Constructs a mutable wide string slice from a mutable pointer and a length.
84 ///
85 /// The `len` argument is the number of elements, **not** the number of bytes. No
86 /// copying or allocation is performed, the resulting value is a direct reference to the
87 /// pointer bytes.
88 ///
89 /// # Safety
90 ///
91 /// This function is unsafe as there is no guarantee that the given pointer is valid for
92 /// `len` elements.
93 ///
94 /// In addition, the data must meet the safety conditions of
95 /// [std::slice::from_raw_parts_mut].
96 ///
97 /// # Panics
98 ///
99 /// This function panics if `p` is null.
100 ///
101 /// # Caveat
102 ///
103 /// The lifetime for the returned string is inferred from its usage. To prevent
104 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
105 /// is safe in the context, such as by providing a helper function taking the lifetime
106 /// of a host value for the string, or by explicit annotation.
107 #[inline]
108 #[must_use]
109 pub unsafe fn from_ptr_mut<'a>(p: *mut $uchar, len: usize) -> &'a mut Self {
110 assert!(!p.is_null());
111 let slice: *mut [$uchar] = slice::from_raw_parts_mut(p, len);
112 &mut *(slice as *mut $ustr)
113 }
114
115 /// Constructs a wide string slice from a slice of character data.
116 ///
117 /// No checks are performed on the slice. It may be of any encoding and may contain
118 /// invalid or malformed data for that encoding.
119 #[inline]
120 #[must_use]
121 pub const fn from_slice(slice: &[$uchar]) -> &Self {
122 let ptr: *const [$uchar] = slice;
123 unsafe { &*(ptr as *const $ustr) }
124 }
125
126 /// Constructs a mutable wide string slice from a mutable slice of character data.
127 ///
128 /// No checks are performed on the slice. It may be of any encoding and may contain
129 /// invalid or malformed data for that encoding.
130 #[inline]
131 #[must_use]
132 pub fn from_slice_mut(slice: &mut [$uchar]) -> &mut Self {
133 let ptr: *mut [$uchar] = slice;
134 unsafe { &mut *(ptr as *mut $ustr) }
135 }
136
137 /// Copies the string reference to a new owned wide string.
138 #[cfg(feature = "alloc")]
139 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
140 #[inline]
141 #[must_use]
142 pub fn to_ustring(&self) -> $ustring {
143 $ustring::from_vec(&self.inner)
144 }
145
146 /// Converts to a slice of the underlying elements of the string.
147 #[inline]
148 #[must_use]
149 pub const fn as_slice(&self) -> &[$uchar] {
150 &self.inner
151 }
152
153 /// Converts to a mutable slice of the underlying elements of the string.
154 #[must_use]
155 pub fn as_mut_slice(&mut self) -> &mut [$uchar] {
156 &mut self.inner
157 }
158
159 /// Returns a raw pointer to the string.
160 ///
161 /// The caller must ensure that the string outlives the pointer this function returns,
162 /// or else it will end up pointing to garbage.
163 ///
164 /// The caller must also ensure that the memory the pointer (non-transitively) points to
165 /// is never written to (except inside an `UnsafeCell`) using this pointer or any
166 /// pointer derived from it. If you need to mutate the contents of the string, use
167 /// [`as_mut_ptr`][Self::as_mut_ptr].
168 ///
169 /// Modifying the container referenced by this string may cause its buffer to be
170 /// reallocated, which would also make any pointers to it invalid.
171 #[inline]
172 #[must_use]
173 pub const fn as_ptr(&self) -> *const $uchar {
174 self.inner.as_ptr()
175 }
176
177 /// Returns an unsafe mutable raw pointer to the string.
178 ///
179 /// The caller must ensure that the string outlives the pointer this function returns,
180 /// or else it will end up pointing to garbage.
181 ///
182 /// Modifying the container referenced by this string may cause its buffer to be
183 /// reallocated, which would also make any pointers to it invalid.
184 #[inline]
185 #[must_use]
186 pub fn as_mut_ptr(&mut self) -> *mut $uchar {
187 self.inner.as_mut_ptr()
188 }
189
190 /// Returns the two raw pointers spanning the string slice.
191 ///
192 /// The returned range is half-open, which means that the end pointer points one past
193 /// the last element of the slice. This way, an empty slice is represented by two equal
194 /// pointers, and the difference between the two pointers represents the size of the
195 /// slice.
196 ///
197 /// See [`as_ptr`][Self::as_ptr] for warnings on using these pointers. The end pointer
198 /// requires extra caution, as it does not point to a valid element in the slice.
199 ///
200 /// This function is useful for interacting with foreign interfaces which use two
201 /// pointers to refer to a range of elements in memory, as is common in C++.
202 #[inline]
203 #[must_use]
204 pub fn as_ptr_range(&self) -> Range<*const $uchar> {
205 self.inner.as_ptr_range()
206 }
207
208 /// Returns the two unsafe mutable pointers spanning the string slice.
209 ///
210 /// The returned range is half-open, which means that the end pointer points one past
211 /// the last element of the slice. This way, an empty slice is represented by two equal
212 /// pointers, and the difference between the two pointers represents the size of the
213 /// slice.
214 ///
215 /// See [`as_mut_ptr`][Self::as_mut_ptr] for warnings on using these pointers. The end
216 /// pointer requires extra caution, as it does not point to a valid element in the
217 /// slice.
218 ///
219 /// This function is useful for interacting with foreign interfaces which use two
220 /// pointers to refer to a range of elements in memory, as is common in C++.
221 #[inline]
222 #[must_use]
223 pub fn as_mut_ptr_range(&mut self) -> Range<*mut $uchar> {
224 self.inner.as_mut_ptr_range()
225 }
226
227 /// Returns the length of the string as number of elements (**not** number of bytes).
228 #[inline]
229 #[must_use]
230 pub const fn len(&self) -> usize {
231 self.inner.len()
232 }
233
234 /// Returns whether this string contains no data.
235 #[inline]
236 #[must_use]
237 pub const fn is_empty(&self) -> bool {
238 self.inner.is_empty()
239 }
240
241 /// Converts a boxed wide string slice into an owned wide string without copying or
242 /// allocating.
243 #[cfg(feature = "alloc")]
244 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
245 #[must_use]
246 pub fn into_ustring(self: Box<Self>) -> $ustring {
247 let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) };
248 $ustring {
249 inner: boxed.into_vec(),
250 }
251 }
252
253 $(#[$display_meta])*
254 #[inline]
255 #[must_use]
256 pub fn display(&self) -> Display<'_, $ustr> {
257 Display { str: self }
258 }
259
260 /// Returns a subslice of the string.
261 ///
262 /// This is the non-panicking alternative to indexing the string. Returns [`None`]
263 /// whenever equivalent indexing operation would panic.
264 #[inline]
265 #[must_use]
266 pub fn get<I>(&self, i: I) -> Option<&Self>
267 where
268 I: SliceIndex<[$uchar], Output = [$uchar]>,
269 {
270 self.inner.get(i).map(Self::from_slice)
271 }
272
273 /// Returns a mutable subslice of the string.
274 ///
275 /// This is the non-panicking alternative to indexing the string. Returns [`None`]
276 /// whenever equivalent indexing operation would panic.
277 #[inline]
278 #[must_use]
279 pub fn get_mut<I>(&mut self, i: I) -> Option<&mut Self>
280 where
281 I: SliceIndex<[$uchar], Output = [$uchar]>,
282 {
283 self.inner.get_mut(i).map(Self::from_slice_mut)
284 }
285
286 /// Returns an unchecked subslice of the string.
287 ///
288 /// This is the unchecked alternative to indexing the string.
289 ///
290 /// # Safety
291 ///
292 /// Callers of this function are responsible that these preconditions are satisfied:
293 ///
294 /// - The starting index must not exceed the ending index;
295 /// - Indexes must be within bounds of the original slice.
296 ///
297 /// Failing that, the returned string slice may reference invalid memory.
298 #[inline]
299 #[must_use]
300 pub unsafe fn get_unchecked<I>(&self, i: I) -> &Self
301 where
302 I: SliceIndex<[$uchar], Output = [$uchar]>,
303 {
304 Self::from_slice(self.inner.get_unchecked(i))
305 }
306
307 /// Returns aa mutable, unchecked subslice of the string.
308 ///
309 /// This is the unchecked alternative to indexing the string.
310 ///
311 /// # Safety
312 ///
313 /// Callers of this function are responsible that these preconditions are satisfied:
314 ///
315 /// - The starting index must not exceed the ending index;
316 /// - Indexes must be within bounds of the original slice.
317 ///
318 /// Failing that, the returned string slice may reference invalid memory.
319 #[inline]
320 #[must_use]
321 pub unsafe fn get_unchecked_mut<I>(&mut self, i: I) -> &mut Self
322 where
323 I: SliceIndex<[$uchar], Output = [$uchar]>,
324 {
325 Self::from_slice_mut(self.inner.get_unchecked_mut(i))
326 }
327
328 /// Divide one string slice into two at an index.
329 ///
330 /// The argument, `mid`, should be an offset from the start of the string.
331 ///
332 /// The two slices returned go from the start of the string slice to `mid`, and from
333 /// `mid` to the end of the string slice.
334 ///
335 /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut]
336 /// method.
337 #[inline]
338 #[must_use]
339 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
340 let split = self.inner.split_at(mid);
341 (Self::from_slice(split.0), Self::from_slice(split.1))
342 }
343
344 /// Divide one mutable string slice into two at an index.
345 ///
346 /// The argument, `mid`, should be an offset from the start of the string.
347 ///
348 /// The two slices returned go from the start of the string slice to `mid`, and from
349 /// `mid` to the end of the string slice.
350 ///
351 /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
352 #[inline]
353 #[must_use]
354 pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
355 let split = self.inner.split_at_mut(mid);
356 (Self::from_slice_mut(split.0), Self::from_slice_mut(split.1))
357 }
358
359 /// Creates a new owned string by repeating this string `n` times.
360 ///
361 /// # Panics
362 ///
363 /// This function will panic if the capacity would overflow.
364 #[inline]
365 #[cfg(feature = "alloc")]
366 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
367 #[must_use]
368 pub fn repeat(&self, n: usize) -> $ustring {
369 $ustring::from_vec(self.as_slice().repeat(n))
370 }
371 }
372
373 impl AsMut<$ustr> for $ustr {
374 #[inline]
375 fn as_mut(&mut self) -> &mut $ustr {
376 self
377 }
378 }
379
380 impl AsMut<[$uchar]> for $ustr {
381 #[inline]
382 fn as_mut(&mut self) -> &mut [$uchar] {
383 self.as_mut_slice()
384 }
385 }
386
387 impl AsRef<$ustr> for $ustr {
388 #[inline]
389 fn as_ref(&self) -> &Self {
390 self
391 }
392 }
393
394 impl AsRef<[$uchar]> for $ustr {
395 #[inline]
396 fn as_ref(&self) -> &[$uchar] {
397 self.as_slice()
398 }
399 }
400
401 impl Default for &$ustr {
402 #[inline]
403 fn default() -> Self {
404 $ustr::from_slice(&[])
405 }
406 }
407
408 impl Default for &mut $ustr {
409 #[inline]
410 fn default() -> Self {
411 $ustr::from_slice_mut(&mut [])
412 }
413 }
414
415 impl<'a> From<&'a [$uchar]> for &'a $ustr {
416 #[inline]
417 fn from(value: &'a [$uchar]) -> Self {
418 $ustr::from_slice(value)
419 }
420 }
421
422 impl<'a> From<&'a mut [$uchar]> for &'a $ustr {
423 #[inline]
424 fn from(value: &'a mut [$uchar]) -> Self {
425 $ustr::from_slice(value)
426 }
427 }
428
429 impl<'a> From<&'a mut [$uchar]> for &'a mut $ustr {
430 #[inline]
431 fn from(value: &'a mut [$uchar]) -> Self {
432 $ustr::from_slice_mut(value)
433 }
434 }
435
436 impl<'a> From<&'a $ustr> for &'a [$uchar] {
437 #[inline]
438 fn from(value: &'a $ustr) -> Self {
439 value.as_slice()
440 }
441 }
442
443 impl<'a> From<&'a mut $ustr> for &'a mut [$uchar] {
444 #[inline]
445 fn from(value: &'a mut $ustr) -> Self {
446 value.as_mut_slice()
447 }
448 }
449
450 #[cfg(feature = "std")]
451 impl From<&$ustr> for std::ffi::OsString {
452 #[inline]
453 fn from(s: &$ustr) -> std::ffi::OsString {
454 s.to_os_string()
455 }
456 }
457
458 impl<I> Index<I> for $ustr
459 where
460 I: SliceIndex<[$uchar], Output = [$uchar]>,
461 {
462 type Output = Self;
463
464 #[inline]
465 fn index(&self, index: I) -> &Self::Output {
466 Self::from_slice(&self.inner[index])
467 }
468 }
469
470 impl<I> IndexMut<I> for $ustr
471 where
472 I: SliceIndex<[$uchar], Output = [$uchar]>,
473 {
474 #[inline]
475 fn index_mut(&mut self, index: I) -> &mut Self::Output {
476 Self::from_slice_mut(&mut self.inner[index])
477 }
478 }
479
480 impl PartialEq<$ustr> for &$ustr {
481 #[inline]
482 fn eq(&self, other: &$ustr) -> bool {
483 self.as_slice() == other.as_slice()
484 }
485 }
486
487 impl PartialEq<&$ustr> for $ustr {
488 #[inline]
489 fn eq(&self, other: &&$ustr) -> bool {
490 self.as_slice() == other.as_slice()
491 }
492 }
493
494 impl PartialEq<crate::$ucstr> for $ustr {
495 #[inline]
496 fn eq(&self, other: &crate::$ucstr) -> bool {
497 self.as_slice() == other.as_slice()
498 }
499 }
500
501 impl PartialEq<crate::$ucstr> for &$ustr {
502 #[inline]
503 fn eq(&self, other: &crate::$ucstr) -> bool {
504 self.as_slice() == other.as_slice()
505 }
506 }
507
508 impl PartialEq<&crate::$ucstr> for $ustr {
509 #[inline]
510 fn eq(&self, other: &&crate::$ucstr) -> bool {
511 self.as_slice() == other.as_slice()
512 }
513 }
514
515 impl PartialOrd<crate::$ucstr> for $ustr {
516 #[inline]
517 fn partial_cmp(&self, other: &crate::$ucstr) -> Option<core::cmp::Ordering> {
518 self.partial_cmp(other.as_ustr())
519 }
520 }
521 };
522}
523
524ustr_common_impl! {
525 /// 16-bit wide string slice with undefined encoding.
526 ///
527 /// [`U16Str`] is to [`U16String`][crate::U16String] as [`OsStr`][std::ffi::OsStr] is to
528 /// [`OsString`][std::ffi::OsString].
529 ///
530 /// [`U16Str`] are string slices that do not have a defined encoding. While it is sometimes
531 /// assumed that they contain possibly invalid or ill-formed UTF-16 data, they may be used for
532 /// any wide encoded string. This is because [`U16Str`] is intended to be used with FFI
533 /// functions, where proper encoding cannot be guaranteed. If you need string slices that are
534 /// always valid UTF-16 strings, use [`Utf16Str`][crate::Utf16Str] instead.
535 ///
536 /// Because [`U16Str`] does not have a defined encoding, no restrictions are placed on mutating
537 /// or indexing the slice. This means that even if the string contained properly encoded UTF-16
538 /// or other encoding data, mutationing or indexing may result in malformed data. Convert to a
539 /// [`Utf16Str`][crate::Utf16Str] if retaining proper UTF-16 encoding is desired.
540 ///
541 /// # FFI considerations
542 ///
543 /// [`U16Str`] is not aware of nul values and may or may not be nul-terminated. It is intended
544 /// to be used with FFI functions that directly use string length, where the strings are known
545 /// to have proper nul-termination already, or where strings are merely being passed through
546 /// without modification.
547 ///
548 /// [`U16CStr`][crate::U16CStr] should be used instead if nul-aware strings are required.
549 ///
550 /// # Examples
551 ///
552 /// The easiest way to use [`U16Str`] outside of FFI is with the [`u16str!`][crate::u16str]
553 /// macro to convert string literals into UTF-16 string slices at compile time:
554 ///
555 /// ```
556 /// use widestring::u16str;
557 /// let hello = u16str!("Hello, world!");
558 /// ```
559 ///
560 /// You can also convert any [`u16`] slice directly:
561 ///
562 /// ```
563 /// use widestring::{u16str, U16Str};
564 ///
565 /// let sparkle_heart = [0xd83d, 0xdc96];
566 /// let sparkle_heart = U16Str::from_slice(&sparkle_heart);
567 ///
568 /// assert_eq!(u16str!("💖"), sparkle_heart);
569 ///
570 /// // This unpaired UTf-16 surrogate is invalid UTF-16, but is perfectly valid in U16Str
571 /// let malformed_utf16 = [0x0, 0xd83d]; // Note that nul values are also valid an untouched
572 /// let s = U16Str::from_slice(&malformed_utf16);
573 ///
574 /// assert_eq!(s.len(), 2);
575 /// ```
576 ///
577 /// When working with a FFI, it is useful to create a [`U16Str`] from a pointer and a length:
578 ///
579 /// ```
580 /// use widestring::{u16str, U16Str};
581 ///
582 /// let sparkle_heart = [0xd83d, 0xdc96];
583 /// let sparkle_heart = unsafe {
584 /// U16Str::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len())
585 /// };
586 /// assert_eq!(u16str!("💖"), sparkle_heart);
587 /// ```
588 struct U16Str([u16]);
589
590 type UString = U16String;
591 type UCStr = U16CStr;
592
593 /// Returns an object that implements [`Display`][std::fmt::Display] for printing
594 /// strings that may contain non-Unicode data.
595 ///
596 /// This method assumes this string is intended to be UTF-16 encoding, but handles
597 /// ill-formed UTF-16 sequences lossily. The returned struct implements
598 /// the [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy
599 /// UTF-16 decoding but no heap allocations are performed, such as by
600 /// [`to_string_lossy`][Self::to_string_lossy].
601 ///
602 /// By default, invalid Unicode data is replaced with
603 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish
604 /// to simply skip any invalid Uncode data and forego the replacement, you may use the
605 /// [alternate formatting][std::fmt#sign0] with `{:#}`.
606 ///
607 /// # Examples
608 ///
609 /// Basic usage:
610 ///
611 /// ```
612 /// use widestring::U16Str;
613 ///
614 /// // 𝄞mus<invalid>ic<invalid>
615 /// let s = U16Str::from_slice(&[
616 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
617 /// ]);
618 ///
619 /// assert_eq!(format!("{}", s.display()),
620 /// "𝄞mus�ic�"
621 /// );
622 /// ```
623 ///
624 /// Using alternate formatting style to skip invalid values entirely:
625 ///
626 /// ```
627 /// use widestring::U16Str;
628 ///
629 /// // 𝄞mus<invalid>ic<invalid>
630 /// let s = U16Str::from_slice(&[
631 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
632 /// ]);
633 ///
634 /// assert_eq!(format!("{:#}", s.display()),
635 /// "𝄞music"
636 /// );
637 /// ```
638 fn display() -> {}
639}
640
641ustr_common_impl! {
642 /// 32-bit wide string slice with undefined encoding.
643 ///
644 /// [`U32Str`] is to [`U32String`][crate::U32String] as [`OsStr`][std::ffi::OsStr] is to
645 /// [`OsString`][std::ffi::OsString].
646 ///
647 /// [`U32Str`] are string slices that do not have a defined encoding. While it is sometimes
648 /// assumed that they contain possibly invalid or ill-formed UTF-32 data, they may be used for
649 /// any wide encoded string. This is because [`U32Str`] is intended to be used with FFI
650 /// functions, where proper encoding cannot be guaranteed. If you need string slices that are
651 /// always valid UTF-32 strings, use [`Utf32Str`][crate::Utf32Str] instead.
652 ///
653 /// Because [`U32Str`] does not have a defined encoding, no restrictions are placed on mutating
654 /// or indexing the slice. This means that even if the string contained properly encoded UTF-32
655 /// or other encoding data, mutationing or indexing may result in malformed data. Convert to a
656 /// [`Utf32Str`][crate::Utf32Str] if retaining proper UTF-32 encoding is desired.
657 ///
658 /// # FFI considerations
659 ///
660 /// [`U32Str`] is not aware of nul values and may or may not be nul-terminated. It is intended
661 /// to be used with FFI functions that directly use string length, where the strings are known
662 /// to have proper nul-termination already, or where strings are merely being passed through
663 /// without modification.
664 ///
665 /// [`U32CStr`][crate::U32CStr] should be used instead if nul-aware strings are required.
666 ///
667 /// # Examples
668 ///
669 /// The easiest way to use [`U32Str`] outside of FFI is with the [`u32str!`][crate::u32str]
670 /// macro to convert string literals into UTF-32 string slices at compile time:
671 ///
672 /// ```
673 /// use widestring::u32str;
674 /// let hello = u32str!("Hello, world!");
675 /// ```
676 ///
677 /// You can also convert any [`u32`] slice directly:
678 ///
679 /// ```
680 /// use widestring::{u32str, U32Str};
681 ///
682 /// let sparkle_heart = [0x1f496];
683 /// let sparkle_heart = U32Str::from_slice(&sparkle_heart);
684 ///
685 /// assert_eq!(u32str!("💖"), sparkle_heart);
686 ///
687 /// // This UTf-16 surrogate is invalid UTF-32, but is perfectly valid in U32Str
688 /// let malformed_utf32 = [0x0, 0xd83d]; // Note that nul values are also valid an untouched
689 /// let s = U32Str::from_slice(&malformed_utf32);
690 ///
691 /// assert_eq!(s.len(), 2);
692 /// ```
693 ///
694 /// When working with a FFI, it is useful to create a [`U32Str`] from a pointer and a length:
695 ///
696 /// ```
697 /// use widestring::{u32str, U32Str};
698 ///
699 /// let sparkle_heart = [0x1f496];
700 /// let sparkle_heart = unsafe {
701 /// U32Str::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len())
702 /// };
703 /// assert_eq!(u32str!("💖"), sparkle_heart);
704 /// ```
705 struct U32Str([u32]);
706
707 type UString = U32String;
708 type UCStr = U32CStr;
709
710 /// Returns an object that implements [`Display`][std::fmt::Display] for printing
711 /// strings that may contain non-Unicode data.
712 ///
713 /// This method assumes this string is intended to be UTF-32 encoding, but handles
714 /// ill-formed UTF-32 sequences lossily. The returned struct implements
715 /// the [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy
716 /// UTF-32 decoding but no heap allocations are performed, such as by
717 /// [`to_string_lossy`][Self::to_string_lossy].
718 ///
719 /// By default, invalid Unicode data is replaced with
720 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish
721 /// to simply skip any invalid Uncode data and forego the replacement, you may use the
722 /// [alternate formatting][std::fmt#sign0] with `{:#}`.
723 ///
724 /// # Examples
725 ///
726 /// Basic usage:
727 ///
728 /// ```
729 /// use widestring::U32Str;
730 ///
731 /// // 𝄞mus<invalid>ic<invalid>
732 /// let s = U32Str::from_slice(&[
733 /// 0x1d11e, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
734 /// ]);
735 ///
736 /// assert_eq!(format!("{}", s.display()),
737 /// "𝄞mus�ic�"
738 /// );
739 /// ```
740 ///
741 /// Using alternate formatting style to skip invalid values entirely:
742 ///
743 /// ```
744 /// use widestring::U32Str;
745 ///
746 /// // 𝄞mus<invalid>ic<invalid>
747 /// let s = U32Str::from_slice(&[
748 /// 0x1d11e, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
749 /// ]);
750 ///
751 /// assert_eq!(format!("{:#}", s.display()),
752 /// "𝄞music"
753 /// );
754 /// ```
755 fn display() -> {}
756}
757
758impl U16Str {
759 /// Decodes a string reference to an owned [`OsString`][std::ffi::OsString].
760 ///
761 /// This makes a string copy of the [`U16Str`]. Since [`U16Str`] makes no guarantees that its
762 /// encoding is UTF-16 or that the data valid UTF-16, there is no guarantee that the resulting
763 /// [`OsString`][std::ffi::OsString] will have a valid underlying encoding either.
764 ///
765 /// Note that the encoding of [`OsString`][std::ffi::OsString] is platform-dependent, so on
766 /// some platforms this may make an encoding conversions, while on other platforms (such as
767 /// windows) no changes to the string will be made.
768 ///
769 /// # Examples
770 ///
771 /// ```rust
772 /// use widestring::U16String;
773 /// use std::ffi::OsString;
774 /// let s = "MyString";
775 /// // Create a wide string from the string
776 /// let wstr = U16String::from_str(s);
777 /// // Create an OsString from the wide string
778 /// let osstr = wstr.to_os_string();
779 ///
780 /// assert_eq!(osstr, OsString::from(s));
781 /// ```
782 #[cfg(feature = "std")]
783 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
784 #[inline]
785 #[must_use]
786 pub fn to_os_string(&self) -> std::ffi::OsString {
787 crate::platform::os_from_wide(&self.inner)
788 }
789
790 /// Decodes this string to a [`String`] if it contains valid UTF-16 data.
791 ///
792 /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such.
793 ///
794 /// # Failures
795 ///
796 /// Returns an error if the string contains any invalid UTF-16 data.
797 ///
798 /// # Examples
799 ///
800 /// ```rust
801 /// use widestring::U16String;
802 /// let s = "MyString";
803 /// // Create a wide string from the string
804 /// let wstr = U16String::from_str(s);
805 /// // Create a regular string from the wide string
806 /// let s2 = wstr.to_string().unwrap();
807 ///
808 /// assert_eq!(s2, s);
809 /// ```
810 #[cfg(feature = "alloc")]
811 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
812 #[inline]
813 pub fn to_string(&self) -> Result<String, Utf16Error> {
814 // Perform conversion ourselves to use our own error types with additional info
815 let mut s = String::with_capacity(self.len());
816 for (index, result) in self.chars().enumerate() {
817 let c = result.map_err(|e| Utf16Error::empty(index, e))?;
818 s.push(c);
819 }
820 Ok(s)
821 }
822
823 /// Decodes the string to a [`String`] even if it is invalid UTF-16 data.
824 ///
825 /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. Any
826 /// invalid sequences are replaced with
827 /// [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which looks like this:
828 /// �
829 ///
830 /// # Examples
831 ///
832 /// ```rust
833 /// use widestring::U16String;
834 /// let s = "MyString";
835 /// // Create a wide string from the string
836 /// let wstr = U16String::from_str(s);
837 /// // Create a regular string from the wide string
838 /// let lossy = wstr.to_string_lossy();
839 ///
840 /// assert_eq!(lossy, s);
841 /// ```
842 #[cfg(feature = "alloc")]
843 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
844 #[inline]
845 #[must_use]
846 pub fn to_string_lossy(&self) -> String {
847 String::from_utf16_lossy(&self.inner)
848 }
849
850 /// Returns an iterator over the [`char`][prim@char]s of a string slice.
851 ///
852 /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
853 /// may consist of invalid UTF-16, the iterator returned by this method
854 /// is an iterator over `Result<char, DecodeUtf16Error>` instead of [`char`][prim@char]s
855 /// directly. If you would like a lossy iterator over [`chars`][prim@char]s directly, instead
856 /// use [`chars_lossy`][Self::chars_lossy].
857 ///
858 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
859 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
860 /// what you actually want. That functionality is not provided by by this crate.
861 #[inline]
862 #[must_use]
863 pub fn chars(&self) -> CharsUtf16<'_> {
864 CharsUtf16::new(self.as_slice())
865 }
866
867 /// Returns a lossy iterator over the [`char`][prim@char]s of a string slice.
868 ///
869 /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
870 /// may consist of invalid UTF-16, the iterator returned by this method will replace unpaired
871 /// surrogates with
872 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). This is a lossy
873 /// version of [`chars`][Self::chars].
874 ///
875 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
876 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
877 /// what you actually want. That functionality is not provided by by this crate.
878 #[inline]
879 #[must_use]
880 pub fn chars_lossy(&self) -> CharsLossyUtf16<'_> {
881 CharsLossyUtf16::new(self.as_slice())
882 }
883
884 /// Returns an iterator over the chars of a string slice, and their positions.
885 ///
886 /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
887 /// may consist of invalid UTF-16, the iterator returned by this method is an iterator over
888 /// `Result<char, DecodeUtf16Error>` as well as their positions, instead of
889 /// [`char`][prim@char]s directly. If you would like a lossy indices iterator over
890 /// [`chars`][prim@char]s directly, instead use
891 /// [`char_indices_lossy`][Self::char_indices_lossy].
892 ///
893 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
894 #[inline]
895 #[must_use]
896 pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
897 CharIndicesUtf16::new(self.as_slice())
898 }
899
900 /// Returns a lossy iterator over the chars of a string slice, and their positions.
901 ///
902 /// As this string slice may consist of invalid UTF-16, the iterator returned by this method
903 /// will replace unpaired surrogates with
904 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�), as well as the
905 /// positions of all characters. This is a lossy version of
906 /// [`char_indices`][Self::char_indices].
907 ///
908 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
909 #[inline]
910 #[must_use]
911 pub fn char_indices_lossy(&self) -> CharIndicesLossyUtf16<'_> {
912 CharIndicesLossyUtf16::new(self.as_slice())
913 }
914
915 /// Returns an iterator over the lines of a [`U16Str`], as string slices.
916 ///
917 /// Lines are split at line endings that are either newlines (`\n`) or
918 /// sequences of a carriage return followed by a line feed (`\r\n`).
919 ///
920 /// Line terminators are not included in the lines returned by the iterator.
921 ///
922 /// Note that any carriage return (`\r`) not immediately followed by a
923 /// line feed (`\n`) does not split a line. These carriage returns are
924 /// thereby included in the produced lines.
925 ///
926 /// The final line ending is optional. A string that ends with a final line
927 /// ending will return the same lines as an otherwise identical string
928 /// without a final line ending.
929 ///
930 /// # Examples
931 ///
932 /// Basic usage:
933 ///
934 /// ```
935 /// use widestring::{u16str};
936 ///
937 /// let text = u16str!("foo\r\nbar\n\nbaz\r");
938 /// let mut lines = text.lines_lossy();
939 ///
940 /// assert_eq!(Some(u16str!("foo")), lines.next());
941 /// assert_eq!(Some(u16str!("bar")), lines.next());
942 /// assert_eq!(Some(u16str!("")), lines.next());
943 /// // Trailing carriage return is included in the last line
944 /// assert_eq!(Some(u16str!("baz\r")), lines.next());
945 ///
946 /// assert_eq!(None, lines.next());
947 /// ```
948 ///
949 /// The final line does not require any ending:
950 ///
951 /// ```
952 /// use widestring::{u16str};
953 ///
954 /// let text = u16str!("foo\nbar\n\r\nbaz");
955 /// let mut lines = text.lines_lossy();
956 ///
957 /// assert_eq!(Some(u16str!("foo")), lines.next());
958 /// assert_eq!(Some(u16str!("bar")), lines.next());
959 /// assert_eq!(Some(u16str!("")), lines.next());
960 /// assert_eq!(Some(u16str!("baz")), lines.next());
961 ///
962 /// assert_eq!(None, lines.next());
963 /// ```
964 pub fn lines_lossy(&self) -> Lines<'_, Self, CharIndicesLossyUtf16<'_>> {
965 Lines::new(self, self.len(), self.char_indices_lossy())
966 }
967}
968
969impl U32Str {
970 /// Constructs a [`U32Str`] from a [`char`][prim@char] pointer and a length.
971 ///
972 /// The `len` argument is the number of `char` elements, **not** the number of bytes. No copying
973 /// or allocation is performed, the resulting value is a direct reference to the pointer bytes.
974 ///
975 /// # Safety
976 ///
977 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
978 /// elements.
979 ///
980 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
981 /// In particular, the returned string reference *must not be mutated* for the duration of
982 /// lifetime `'a`, except inside an [`UnsafeCell`][std::cell::UnsafeCell].
983 ///
984 /// # Panics
985 ///
986 /// This function panics if `p` is null.
987 ///
988 /// # Caveat
989 ///
990 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
991 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
992 /// context, such as by providing a helper function taking the lifetime of a host value for the
993 /// string, or by explicit annotation.
994 #[inline]
995 #[must_use]
996 pub unsafe fn from_char_ptr<'a>(p: *const char, len: usize) -> &'a Self {
997 Self::from_ptr(p as *const u32, len)
998 }
999
1000 /// Constructs a mutable [`U32Str`] from a mutable [`char`][prim@char] pointer and a length.
1001 ///
1002 /// The `len` argument is the number of `char` elements, **not** the number of bytes. No copying
1003 /// or allocation is performed, the resulting value is a direct reference to the pointer bytes.
1004 ///
1005 /// # Safety
1006 ///
1007 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
1008 /// elements.
1009 ///
1010 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts_mut].
1011 ///
1012 /// # Panics
1013 ///
1014 /// This function panics if `p` is null.
1015 ///
1016 /// # Caveat
1017 ///
1018 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1019 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1020 /// context, such as by providing a helper function taking the lifetime of a host value for the
1021 /// string, or by explicit annotation.
1022 #[inline]
1023 #[must_use]
1024 pub unsafe fn from_char_ptr_mut<'a>(p: *mut char, len: usize) -> &'a mut Self {
1025 Self::from_ptr_mut(p as *mut u32, len)
1026 }
1027
1028 /// Constructs a [`U32Str`] from a [`char`][prim@char] slice.
1029 ///
1030 /// No checks are performed on the slice.
1031 #[inline]
1032 #[must_use]
1033 pub fn from_char_slice(slice: &[char]) -> &Self {
1034 let ptr: *const [char] = slice;
1035 unsafe { &*(ptr as *const Self) }
1036 }
1037
1038 /// Constructs a mutable [`U32Str`] from a mutable [`char`][prim@char] slice.
1039 ///
1040 /// No checks are performed on the slice.
1041 #[inline]
1042 #[must_use]
1043 pub fn from_char_slice_mut(slice: &mut [char]) -> &mut Self {
1044 let ptr: *mut [char] = slice;
1045 unsafe { &mut *(ptr as *mut Self) }
1046 }
1047
1048 /// Decodes a string to an owned [`OsString`][std::ffi::OsString].
1049 ///
1050 /// This makes a string copy of the [`U16Str`]. Since [`U16Str`] makes no guarantees that its
1051 /// encoding is UTF-16 or that the data valid UTF-16, there is no guarantee that the resulting
1052 /// [`OsString`][std::ffi::OsString] will have a valid underlying encoding either.
1053 ///
1054 /// Note that the encoding of [`OsString`][std::ffi::OsString] is platform-dependent, so on
1055 /// some platforms this may make an encoding conversions, while on other platforms no changes to
1056 /// the string will be made.
1057 ///
1058 /// # Examples
1059 ///
1060 /// ```rust
1061 /// use widestring::U32String;
1062 /// use std::ffi::OsString;
1063 /// let s = "MyString";
1064 /// // Create a wide string from the string
1065 /// let wstr = U32String::from_str(s);
1066 /// // Create an OsString from the wide string
1067 /// let osstr = wstr.to_os_string();
1068 ///
1069 /// assert_eq!(osstr, OsString::from(s));
1070 /// ```
1071 #[cfg(feature = "std")]
1072 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1073 #[inline]
1074 #[must_use]
1075 pub fn to_os_string(&self) -> std::ffi::OsString {
1076 self.to_string_lossy().into()
1077 }
1078
1079 /// Decodes the string to a [`String`] if it contains valid UTF-32 data.
1080 ///
1081 /// This method assumes this string is encoded as UTF-32 and attempts to decode it as such.
1082 ///
1083 /// # Failures
1084 ///
1085 /// Returns an error if the string contains any invalid UTF-32 data.
1086 ///
1087 /// # Examples
1088 ///
1089 /// ```rust
1090 /// use widestring::U32String;
1091 /// let s = "MyString";
1092 /// // Create a wide string from the string
1093 /// let wstr = U32String::from_str(s);
1094 /// // Create a regular string from the wide string
1095 /// let s2 = wstr.to_string().unwrap();
1096 ///
1097 /// assert_eq!(s2, s);
1098 /// ```
1099 #[cfg(feature = "alloc")]
1100 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1101 pub fn to_string(&self) -> Result<String, Utf32Error> {
1102 let mut s = String::with_capacity(self.len());
1103 for (index, result) in self.chars().enumerate() {
1104 let c = result.map_err(|e| Utf32Error::empty(index, e))?;
1105 s.push(c);
1106 }
1107 Ok(s)
1108 }
1109
1110 /// Decodes the string reference to a [`String`] even if it is invalid UTF-32 data.
1111 ///
1112 /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. Any
1113 /// invalid sequences are replaced with
1114 /// [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which looks like this:
1115 /// �
1116 ///
1117 /// # Examples
1118 ///
1119 /// ```rust
1120 /// use widestring::U32String;
1121 /// let s = "MyString";
1122 /// // Create a wide string from the string
1123 /// let wstr = U32String::from_str(s);
1124 /// // Create a regular string from the wide string
1125 /// let lossy = wstr.to_string_lossy();
1126 ///
1127 /// assert_eq!(lossy, s);
1128 /// ```
1129 #[cfg(feature = "alloc")]
1130 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1131 #[must_use]
1132 pub fn to_string_lossy(&self) -> String {
1133 let chars: Vec<char> = self
1134 .inner
1135 .iter()
1136 .map(|&c| char::from_u32(c).unwrap_or(char::REPLACEMENT_CHARACTER))
1137 .collect();
1138 let size = chars.iter().map(|c| c.len_utf8()).sum();
1139 let mut vec = alloc::vec![0; size];
1140 let mut i = 0;
1141 for c in chars {
1142 c.encode_utf8(&mut vec[i..]);
1143 i += c.len_utf8();
1144 }
1145 unsafe { String::from_utf8_unchecked(vec) }
1146 }
1147
1148 /// Returns an iterator over the [`char`][prim@char]s of a string slice.
1149 ///
1150 /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1151 /// may consist of invalid UTF-32, the iterator returned by this method
1152 /// is an iterator over `Result<char, DecodeUtf32Error>` instead of [`char`][prim@char]s
1153 /// directly. If you would like a lossy iterator over [`chars`][prim@char]s directly, instead
1154 /// use [`chars_lossy`][Self::chars_lossy].
1155 ///
1156 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1157 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1158 /// what you actually want. That functionality is not provided by by this crate.
1159 #[inline]
1160 #[must_use]
1161 pub fn chars(&self) -> CharsUtf32<'_> {
1162 CharsUtf32::new(self.as_slice())
1163 }
1164
1165 /// Returns a lossy iterator over the [`char`][prim@char]s of a string slice.
1166 ///
1167 /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1168 /// may consist of invalid UTF-32, the iterator returned by this method will replace unpaired
1169 /// surrogates with
1170 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). This is a lossy
1171 /// version of [`chars`][Self::chars].
1172 ///
1173 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1174 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1175 /// what you actually want. That functionality is not provided by by this crate.
1176 #[inline]
1177 #[must_use]
1178 pub fn chars_lossy(&self) -> CharsLossyUtf32<'_> {
1179 CharsLossyUtf32::new(self.as_slice())
1180 }
1181
1182 /// Returns an iterator over the chars of a string slice, and their positions.
1183 ///
1184 /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1185 /// may consist of invalid UTF-32, the iterator returned by this method is an iterator over
1186 /// `Result<char, DecodeUtf32Error>` as well as their positions, instead of
1187 /// [`char`][prim@char]s directly. If you would like a lossy indices iterator over
1188 /// [`chars`][prim@char]s directly, instead use
1189 /// [`char_indices_lossy`][Self::char_indices_lossy].
1190 ///
1191 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1192 #[inline]
1193 #[must_use]
1194 pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
1195 CharIndicesUtf32::new(self.as_slice())
1196 }
1197
1198 /// Returns a lossy iterator over the chars of a string slice, and their positions.
1199 ///
1200 /// As this string slice may consist of invalid UTF-32, the iterator returned by this method
1201 /// will replace invalid values with
1202 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�), as well as the
1203 /// positions of all characters. This is a lossy version of
1204 /// [`char_indices`][Self::char_indices].
1205 ///
1206 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1207 #[inline]
1208 #[must_use]
1209 pub fn char_indices_lossy(&self) -> CharIndicesLossyUtf32<'_> {
1210 CharIndicesLossyUtf32::new(self.as_slice())
1211 }
1212
1213 /// Returns an iterator over the lines of a [`U32Str`], as string slices.
1214 ///
1215 /// Lines are split at line endings that are either newlines (`\n`) or
1216 /// sequences of a carriage return followed by a line feed (`\r\n`).
1217 ///
1218 /// Line terminators are not included in the lines returned by the iterator.
1219 ///
1220 /// Note that any carriage return (`\r`) not immediately followed by a
1221 /// line feed (`\n`) does not split a line. These carriage returns are
1222 /// thereby included in the produced lines.
1223 ///
1224 /// The final line ending is optional. A string that ends with a final line
1225 /// ending will return the same lines as an otherwise identical string
1226 /// without a final line ending.
1227 ///
1228 /// # Examples
1229 ///
1230 /// Basic usage:
1231 ///
1232 /// ```
1233 /// use widestring::{u32str};
1234 ///
1235 /// let text = u32str!("foo\r\nbar\n\nbaz\r");
1236 /// let mut lines = text.lines_lossy();
1237 ///
1238 /// assert_eq!(Some(u32str!("foo")), lines.next());
1239 /// assert_eq!(Some(u32str!("bar")), lines.next());
1240 /// assert_eq!(Some(u32str!("")), lines.next());
1241 /// // Trailing carriage return is included in the last line
1242 /// assert_eq!(Some(u32str!("baz\r")), lines.next());
1243 ///
1244 /// assert_eq!(None, lines.next());
1245 /// ```
1246 ///
1247 /// The final line does not require any ending:
1248 ///
1249 /// ```
1250 /// use widestring::{u32str};
1251 ///
1252 /// let text = u32str!("foo\nbar\n\r\nbaz");
1253 /// let mut lines = text.lines_lossy();
1254 ///
1255 /// assert_eq!(Some(u32str!("foo")), lines.next());
1256 /// assert_eq!(Some(u32str!("bar")), lines.next());
1257 /// assert_eq!(Some(u32str!("")), lines.next());
1258 /// assert_eq!(Some(u32str!("baz")), lines.next());
1259 ///
1260 /// assert_eq!(None, lines.next());
1261 /// ```
1262 pub fn lines_lossy(&self) -> Lines<'_, Self, CharIndicesLossyUtf32<'_>> {
1263 Lines::new(self, self.len(), self.char_indices_lossy())
1264 }
1265}
1266
1267impl core::fmt::Debug for U16Str {
1268 #[inline]
1269 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1270 crate::debug_fmt_u16(self.as_slice(), f)
1271 }
1272}
1273
1274impl core::fmt::Debug for U32Str {
1275 #[inline]
1276 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1277 crate::debug_fmt_u32(self.as_slice(), f)
1278 }
1279}
1280
1281impl<'a> From<&'a [char]> for &'a U32Str {
1282 #[inline]
1283 fn from(value: &'a [char]) -> Self {
1284 U32Str::from_char_slice(value)
1285 }
1286}
1287
1288impl<'a> From<&'a mut [char]> for &'a mut U32Str {
1289 #[inline]
1290 fn from(value: &'a mut [char]) -> Self {
1291 U32Str::from_char_slice_mut(value)
1292 }
1293}
1294
1295/// Alias for [`U16Str`] or [`U32Str`] depending on platform. Intended to match typical C `wchar_t`
1296/// size on platform.
1297#[cfg(not(windows))]
1298pub type WideStr = U32Str;
1299
1300/// Alias for [`U16Str`] or [`U32Str`] depending on platform. Intended to match typical C `wchar_t`
1301/// size on platform.
1302#[cfg(windows)]
1303pub type WideStr = U16Str;
1304
1305/// Helper struct for printing wide string values with [`format!`] and `{}`.
1306///
1307/// A wide string might contain ill-formed UTF encoding. This struct implements the
1308/// [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy but no heap
1309/// allocations are performed, such as by [`to_string_lossy`][U16Str::to_string_lossy]. It is
1310/// created by the [`display`][U16Str::display] method on [`U16Str`] and [`U32Str`].
1311///
1312/// By default, invalid Unicode data is replaced with
1313/// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish to simply
1314/// skip any invalid Uncode data and forego the replacement, you may use the
1315/// [alternate formatting][std::fmt#sign0] with `{:#}`.
1316pub struct Display<'a, S: ?Sized> {
1317 str: &'a S,
1318}
1319
1320impl core::fmt::Debug for Display<'_, U16Str> {
1321 #[inline]
1322 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1323 core::fmt::Debug::fmt(&self.str, f)
1324 }
1325}
1326
1327impl core::fmt::Debug for Display<'_, U32Str> {
1328 #[inline]
1329 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1330 core::fmt::Debug::fmt(&self.str, f)
1331 }
1332}
1333
1334impl core::fmt::Display for Display<'_, U16Str> {
1335 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1336 for c in crate::decode_utf16_lossy(self.str.as_slice().iter().copied()) {
1337 // Allow alternate {:#} format which skips replacment chars entirely
1338 if c != core::char::REPLACEMENT_CHARACTER || !f.alternate() {
1339 f.write_char(c)?;
1340 }
1341 }
1342 Ok(())
1343 }
1344}
1345
1346impl core::fmt::Display for Display<'_, U32Str> {
1347 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1348 for c in crate::decode_utf32_lossy(self.str.as_slice().iter().copied()) {
1349 // Allow alternate {:#} format which skips replacment chars entirely
1350 if c != core::char::REPLACEMENT_CHARACTER || !f.alternate() {
1351 f.write_char(c)?;
1352 }
1353 }
1354 Ok(())
1355 }
1356}