widestring/utfstr.rs
1//! UTF string slices.
2//!
3//! This module contains UTF string slices and related types.
4
5use crate::{
6 error::{Utf16Error, Utf32Error},
7 is_utf16_low_surrogate,
8 iter::{EncodeUtf16, EncodeUtf32, EncodeUtf8},
9 validate_utf16, validate_utf32, U16Str, U32Str,
10};
11#[cfg(feature = "alloc")]
12use crate::{Utf16String, Utf32String};
13#[cfg(feature = "alloc")]
14#[allow(unused_imports)]
15use alloc::{borrow::Cow, boxed::Box, string::String};
16#[allow(unused_imports)]
17use core::{
18 convert::{AsMut, AsRef, TryFrom},
19 fmt::Write,
20 ops::{Index, IndexMut, RangeBounds},
21 slice::SliceIndex,
22};
23
24mod iter;
25
26pub use iter::*;
27
28#[inline]
29const fn char_len_utf16(c: char) -> usize {
30 c.len_utf16()
31}
32
33#[inline]
34const fn char_len_utf32(_c: char) -> usize {
35 1
36}
37
38macro_rules! utfstr_common_impl {
39 {
40 $(#[$utfstr_meta:meta])*
41 struct $utfstr:ident([$uchar:ty]);
42 type UtfString = $utfstring:ident;
43 type UStr = $ustr:ident;
44 type UCStr = $ucstr:ident;
45 type UtfError = $utferror:ident;
46 char_len_fn = $char_len_fn:ident;
47 $(#[$from_slice_unchecked_meta:meta])*
48 fn from_slice_unchecked() -> {}
49 $(#[$from_slice_unchecked_mut_meta:meta])*
50 fn from_slice_unchecked_mut() -> {}
51 $(#[$from_boxed_slice_unchecked_meta:meta])*
52 fn from_boxed_slice_unchecked() -> {}
53 $(#[$get_unchecked_meta:meta])*
54 fn get_unchecked() -> {}
55 $(#[$get_unchecked_mut_meta:meta])*
56 fn get_unchecked_mut() -> {}
57 $(#[$len_meta:meta])*
58 fn len() -> {}
59 } => {
60 $(#[$utfstr_meta])*
61 #[allow(clippy::derive_hash_xor_eq)]
62 #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
63 pub struct $utfstr {
64 pub(crate) inner: [$uchar],
65 }
66
67 impl $utfstr {
68 $(#[$from_slice_unchecked_meta])*
69 #[allow(trivial_casts)]
70 #[inline]
71 #[must_use]
72 pub const unsafe fn from_slice_unchecked(s: &[$uchar]) -> &Self {
73 &*(s as *const [$uchar] as *const Self)
74 }
75
76 $(#[$from_slice_unchecked_mut_meta])*
77 #[allow(trivial_casts)]
78 #[inline]
79 #[must_use]
80 pub unsafe fn from_slice_unchecked_mut(s: &mut [$uchar]) -> &mut Self {
81 &mut *(s as *mut [$uchar] as *mut Self)
82 }
83
84 $(#[$from_boxed_slice_unchecked_meta])*
85 #[inline]
86 #[cfg(feature = "alloc")]
87 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
88 #[must_use]
89 pub unsafe fn from_boxed_slice_unchecked(s: Box<[$uchar]>) -> Box<Self> {
90 Box::from_raw(Box::into_raw(s) as *mut Self)
91 }
92
93 $(#[$get_unchecked_meta])*
94 #[inline]
95 #[must_use]
96 pub unsafe fn get_unchecked<I>(&self, index: I) -> &Self
97 where
98 I: SliceIndex<[$uchar], Output = [$uchar]>,
99 {
100 Self::from_slice_unchecked(self.inner.get_unchecked(index))
101 }
102
103 $(#[$get_unchecked_mut_meta])*
104 #[inline]
105 #[must_use]
106 pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut Self
107 where
108 I: SliceIndex<[$uchar], Output = [$uchar]>,
109 {
110 Self::from_slice_unchecked_mut(self.inner.get_unchecked_mut(index))
111 }
112
113 $(#[$len_meta])*
114 #[inline]
115 #[must_use]
116 pub const fn len(&self) -> usize {
117 self.inner.len()
118 }
119
120 /// Returns `true` if the string has a length of zero.
121 #[inline]
122 #[must_use]
123 pub const fn is_empty(&self) -> bool {
124 self.inner.is_empty()
125 }
126
127 /// Converts a string to a slice of its underlying elements.
128 ///
129 /// To convert the slice back into a string slice, use the
130 /// [`from_slice`][Self::from_slice] function.
131 #[inline]
132 #[must_use]
133 pub const fn as_slice(&self) -> &[$uchar] {
134 &self.inner
135 }
136
137 /// Converts a mutable string to a mutable slice of its underlying elements.
138 ///
139 /// # Safety
140 ///
141 /// This function is unsafe because you can violate the invariants of this type when
142 /// mutating the slice. The caller must ensure that the contents of the slice is valid
143 /// UTF before the borrow ends and the underlying string is used.
144 ///
145 /// Use of this string type whose contents have been mutated to invalid UTF is
146 /// undefined behavior.
147 #[inline]
148 #[must_use]
149 pub unsafe fn as_mut_slice(&mut self) -> &mut [$uchar] {
150 &mut self.inner
151 }
152
153 /// Converts a string slice to a raw pointer.
154 ///
155 /// This pointer will be pointing to the first element of the string slice.
156 ///
157 /// The caller must ensure that the returned pointer is never written to. If you need to
158 /// mutate the contents of the string slice, use [`as_mut_ptr`][Self::as_mut_ptr].
159 #[inline]
160 #[must_use]
161 pub const fn as_ptr(&self) -> *const $uchar {
162 self.inner.as_ptr()
163 }
164
165 /// Converts a mutable string slice to a mutable pointer.
166 ///
167 /// This pointer will be pointing to the first element of the string slice.
168 #[inline]
169 #[must_use]
170 pub fn as_mut_ptr(&mut self) -> *mut $uchar {
171 self.inner.as_mut_ptr()
172 }
173
174 /// Returns this string as a wide string slice of undefined encoding.
175 #[inline]
176 #[must_use]
177 pub const fn as_ustr(&self) -> &$ustr {
178 $ustr::from_slice(self.as_slice())
179 }
180
181 /// Returns a string slice with leading and trailing whitespace removed.
182 ///
183 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
184 /// `White_Space`.
185 #[must_use]
186 pub fn trim(&self) -> &Self {
187 self.trim_start().trim_end()
188 }
189
190 /// Returns a string slice with leading whitespace removed.
191 ///
192 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
193 /// `White_Space`.
194 ///
195 /// # Text directionality
196 ///
197 /// A string is a sequence of elements. `start` in this context means the first position
198 /// of that sequence; for a left-to-right language like English or Russian, this will be
199 /// left side, and for right-to-left languages like Arabic or Hebrew, this will be the
200 /// right side.
201 #[must_use]
202 pub fn trim_start(&self) -> &Self {
203 if let Some((index, _)) = self.char_indices().find(|(_, c)| !c.is_whitespace()) {
204 &self[index..]
205 } else {
206 <&Self as Default>::default()
207 }
208 }
209
210 /// Returns a string slice with trailing whitespace removed.
211 ///
212 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
213 /// `White_Space`.
214 ///
215 /// # Text directionality
216 ///
217 /// A string is a sequence of elements. `end` in this context means the last position of
218 /// that sequence; for a left-to-right language like English or Russian, this will be
219 /// right side, and for right-to-left languages like Arabic or Hebrew, this will be the
220 /// left side.
221 #[must_use]
222 pub fn trim_end(&self) -> &Self {
223 if let Some((index, c)) = self.char_indices().rfind(|(_, c)| !c.is_whitespace()) {
224 &self[..index + $char_len_fn(c)]
225 } else {
226 <&Self as Default>::default()
227 }
228 }
229
230 /// Converts a boxed string into a boxed slice without copying or allocating.
231 #[inline]
232 #[cfg(feature = "alloc")]
233 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
234 #[must_use]
235 pub fn into_boxed_slice(self: Box<Self>) -> Box<[$uchar]> {
236 // SAFETY: from_raw pointer is from into_raw
237 unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) }
238 }
239
240 /// Converts a boxed string slice into an owned UTF string without copying or
241 /// allocating.
242 #[inline]
243 #[cfg(feature = "alloc")]
244 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
245 #[must_use]
246 pub fn into_utfstring(self: Box<Self>) -> $utfstring {
247 unsafe { $utfstring::from_vec_unchecked(self.into_boxed_slice().into_vec()) }
248 }
249
250 /// Creates a new owned string by repeating this string `n` times.
251 ///
252 /// # Panics
253 ///
254 /// This function will panic if the capacity would overflow.
255 #[inline]
256 #[cfg(feature = "alloc")]
257 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
258 #[must_use]
259 pub fn repeat(&self, n: usize) -> $utfstring {
260 unsafe { $utfstring::from_vec_unchecked(self.as_slice().repeat(n)) }
261 }
262 }
263
264 impl AsMut<$utfstr> for $utfstr {
265 #[inline]
266 fn as_mut(&mut self) -> &mut $utfstr {
267 self
268 }
269 }
270
271 impl AsRef<$utfstr> for $utfstr {
272 #[inline]
273 fn as_ref(&self) -> &$utfstr {
274 self
275 }
276 }
277
278 impl AsRef<[$uchar]> for $utfstr {
279 #[inline]
280 fn as_ref(&self) -> &[$uchar] {
281 self.as_slice()
282 }
283 }
284
285 impl AsRef<$ustr> for $utfstr {
286 #[inline]
287 fn as_ref(&self) -> &$ustr {
288 self.as_ustr()
289 }
290 }
291
292 impl core::fmt::Debug for $utfstr {
293 #[inline]
294 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
295 f.write_char('"')?;
296 self.escape_debug().try_for_each(|c| f.write_char(c))?;
297 f.write_char('"')
298 }
299 }
300
301 impl Default for &$utfstr {
302 #[inline]
303 fn default() -> Self {
304 // SAFETY: Empty slice is always valid
305 unsafe { $utfstr::from_slice_unchecked(&[]) }
306 }
307 }
308
309 impl Default for &mut $utfstr {
310 #[inline]
311 fn default() -> Self {
312 // SAFETY: Empty slice is valways valid
313 unsafe { $utfstr::from_slice_unchecked_mut(&mut []) }
314 }
315 }
316
317 impl core::fmt::Display for $utfstr {
318 #[inline]
319 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
320 self.chars().try_for_each(|c| f.write_char(c))
321 }
322 }
323
324 #[cfg(feature = "alloc")]
325 impl From<Box<$utfstr>> for Box<[$uchar]> {
326 #[inline]
327 fn from(value: Box<$utfstr>) -> Self {
328 value.into_boxed_slice()
329 }
330 }
331
332 impl<'a> From<&'a $utfstr> for &'a $ustr {
333 #[inline]
334 fn from(value: &'a $utfstr) -> Self {
335 value.as_ustr()
336 }
337 }
338
339 impl<'a> From<&'a $utfstr> for &'a [$uchar] {
340 #[inline]
341 fn from(value: &'a $utfstr) -> Self {
342 value.as_slice()
343 }
344 }
345
346 #[cfg(feature = "std")]
347 impl From<&$utfstr> for std::ffi::OsString {
348 #[inline]
349 fn from(value: &$utfstr) -> std::ffi::OsString {
350 value.as_ustr().to_os_string()
351 }
352 }
353
354 impl PartialEq<$utfstr> for &$utfstr {
355 #[inline]
356 fn eq(&self, other: &$utfstr) -> bool {
357 self.as_slice() == other.as_slice()
358 }
359 }
360
361 #[cfg(feature = "alloc")]
362 impl<'a, 'b> PartialEq<Cow<'a, $utfstr>> for &'b $utfstr {
363 #[inline]
364 fn eq(&self, other: &Cow<'a, $utfstr>) -> bool {
365 self == other.as_ref()
366 }
367 }
368
369 #[cfg(feature = "alloc")]
370 impl PartialEq<$utfstr> for Cow<'_, $utfstr> {
371 #[inline]
372 fn eq(&self, other: &$utfstr) -> bool {
373 self.as_ref() == other
374 }
375 }
376
377 #[cfg(feature = "alloc")]
378 impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, $utfstr> {
379 #[inline]
380 fn eq(&self, other: &&'a $utfstr) -> bool {
381 self.as_ref() == *other
382 }
383 }
384
385 impl PartialEq<$ustr> for $utfstr {
386 #[inline]
387 fn eq(&self, other: &$ustr) -> bool {
388 self.as_slice() == other.as_slice()
389 }
390 }
391
392 impl PartialEq<$utfstr> for $ustr {
393 #[inline]
394 fn eq(&self, other: &$utfstr) -> bool {
395 self.as_slice() == other.as_slice()
396 }
397 }
398
399 impl PartialEq<crate::$ucstr> for $utfstr {
400 #[inline]
401 fn eq(&self, other: &crate::$ucstr) -> bool {
402 self.as_slice() == other.as_slice()
403 }
404 }
405
406 impl PartialEq<$utfstr> for crate::$ucstr {
407 #[inline]
408 fn eq(&self, other: &$utfstr) -> bool {
409 self.as_slice() == other.as_slice()
410 }
411 }
412
413 impl PartialEq<str> for $utfstr {
414 #[inline]
415 fn eq(&self, other: &str) -> bool {
416 self.chars().eq(other.chars())
417 }
418 }
419
420 impl PartialEq<&str> for $utfstr {
421 #[inline]
422 fn eq(&self, other: &&str) -> bool {
423 self.chars().eq(other.chars())
424 }
425 }
426
427 impl PartialEq<str> for &$utfstr {
428 #[inline]
429 fn eq(&self, other: &str) -> bool {
430 self.chars().eq(other.chars())
431 }
432 }
433
434 impl PartialEq<$utfstr> for str {
435 #[inline]
436 fn eq(&self, other: &$utfstr) -> bool {
437 self.chars().eq(other.chars())
438 }
439 }
440
441 impl PartialEq<$utfstr> for &str {
442 #[inline]
443 fn eq(&self, other: &$utfstr) -> bool {
444 self.chars().eq(other.chars())
445 }
446 }
447
448 #[cfg(feature = "alloc")]
449 impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b $utfstr {
450 #[inline]
451 fn eq(&self, other: &Cow<'a, str>) -> bool {
452 self == other.as_ref()
453 }
454 }
455
456 #[cfg(feature = "alloc")]
457 impl PartialEq<$utfstr> for Cow<'_, str> {
458 #[inline]
459 fn eq(&self, other: &$utfstr) -> bool {
460 self.as_ref() == other
461 }
462 }
463
464 #[cfg(feature = "alloc")]
465 impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, str> {
466 #[inline]
467 fn eq(&self, other: &&'a $utfstr) -> bool {
468 self.as_ref() == *other
469 }
470 }
471
472 impl<'a> TryFrom<&'a $ustr> for &'a $utfstr {
473 type Error = $utferror;
474
475 #[inline]
476 fn try_from(value: &'a $ustr) -> Result<Self, Self::Error> {
477 $utfstr::from_ustr(value)
478 }
479 }
480
481 impl<'a> TryFrom<&'a crate::$ucstr> for &'a $utfstr {
482 type Error = $utferror;
483
484 #[inline]
485 fn try_from(value: &'a crate::$ucstr) -> Result<Self, Self::Error> {
486 $utfstr::from_ucstr(value)
487 }
488 }
489 };
490}
491
492utfstr_common_impl! {
493 /// UTF-16 string slice for [`Utf16String`][crate::Utf16String].
494 ///
495 /// [`Utf16Str`] is to [`Utf16String`][crate::Utf16String] as [`str`] is to [`String`].
496 ///
497 /// [`Utf16Str`] slices are string slices that are always valid UTF-16 encoding. This is unlike
498 /// the [`U16Str`][U16Str] string slices, which may not have valid encoding. In this way,
499 /// [`Utf16Str`] string slices most resemble native [`str`] slices of all the types in this
500 /// crate.
501 ///
502 /// # Examples
503 ///
504 /// The easiest way to use [`Utf16Str`] is with the [`utf16str!`][crate::utf16str] macro to
505 /// convert string literals into string slices at compile time:
506 ///
507 /// ```
508 /// use widestring::utf16str;
509 /// let hello = utf16str!("Hello, world!");
510 /// ```
511 ///
512 /// You can also convert a [`u16`] slice directly, provided it is valid UTF-16:
513 ///
514 /// ```
515 /// use widestring::Utf16Str;
516 ///
517 /// let sparkle_heart = [0xd83d, 0xdc96];
518 /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
519 ///
520 /// assert_eq!("💖", sparkle_heart);
521 /// ```
522 struct Utf16Str([u16]);
523
524 type UtfString = Utf16String;
525 type UStr = U16Str;
526 type UCStr = U16CStr;
527 type UtfError = Utf16Error;
528 char_len_fn = char_len_utf16;
529
530 /// Converts a slice to a string slice without checking that the string contains valid UTF-16.
531 ///
532 /// See the safe version, [`from_slice`][Self::from_slice], for more information.
533 ///
534 /// # Safety
535 ///
536 /// This function is unsafe because it does not check that the slice passed to it is valid
537 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
538 /// [`Utf16Str`] is always valid UTF-16.
539 ///
540 /// # Examples
541 ///
542 /// ```
543 /// use widestring::Utf16Str;
544 ///
545 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
546 /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked(&sparkle_heart) };
547 ///
548 /// assert_eq!("💖", sparkle_heart);
549 /// ```
550 fn from_slice_unchecked() -> {}
551
552 /// Converts a mutable slice to a mutable string slice without checking that the string contains
553 /// valid UTF-16.
554 ///
555 /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
556 ///
557 /// # Safety
558 ///
559 /// This function is unsafe because it does not check that the slice passed to it is valid
560 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
561 /// [`Utf16Str`] is always valid UTF-16.
562 ///
563 /// # Examples
564 ///
565 /// ```
566 /// use widestring::Utf16Str;
567 ///
568 /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
569 /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked_mut(&mut sparkle_heart) };
570 ///
571 /// assert_eq!("💖", sparkle_heart);
572 /// ```
573 fn from_slice_unchecked_mut() -> {}
574
575 /// Converts a boxed slice to a boxed string slice without checking that the string contains
576 /// valid UTF-16.
577 ///
578 /// # Safety
579 ///
580 /// This function is unsafe because it does not check if the string slice is valid UTF-16, and
581 /// [`Utf16Str`] must always be valid UTF-16.
582 fn from_boxed_slice_unchecked() -> {}
583
584 /// Returns an unchecked subslice of this string slice.
585 ///
586 /// This is the unchecked alternative to indexing the string slice.
587 ///
588 /// # Safety
589 ///
590 /// Callers of this function are responsible that these preconditions are satisfied:
591 ///
592 /// - The starting index must not exceed the ending index;
593 /// - Indexes must be within bounds of the original slice;
594 /// - Indexes must lie on UTF-16 sequence boundaries.
595 ///
596 /// Failing that, the returned string slice may reference invalid memory or violate the
597 /// invariants communicated by the type.
598 ///
599 /// # Examples
600 ///
601 /// ```
602 /// # use widestring::{utf16str};
603 /// let v = utf16str!("⚧️🏳️⚧️➡️s");
604 /// unsafe {
605 /// assert_eq!(utf16str!("⚧️"), v.get_unchecked(..2));
606 /// assert_eq!(utf16str!("🏳️⚧️"), v.get_unchecked(2..8));
607 /// assert_eq!(utf16str!("➡️"), v.get_unchecked(8..10));
608 /// assert_eq!(utf16str!("s"), v.get_unchecked(10..));
609 /// }
610 /// ```
611 fn get_unchecked() -> {}
612
613 /// Returns a mutable, unchecked subslice of this string slice
614 ///
615 /// This is the unchecked alternative to indexing the string slice.
616 ///
617 /// # Safety
618 ///
619 /// Callers of this function are responsible that these preconditions are satisfied:
620 ///
621 /// - The starting index must not exceed the ending index;
622 /// - Indexes must be within bounds of the original slice;
623 /// - Indexes must lie on UTF-16 sequence boundaries.
624 ///
625 /// Failing that, the returned string slice may reference invalid memory or violate the
626 /// invariants communicated by the type.
627 ///
628 /// # Examples
629 ///
630 /// ```
631 /// # use widestring::{utf16str};
632 /// # #[cfg(feature = "alloc")] {
633 /// let mut v = utf16str!("⚧️🏳️⚧️➡️s").to_owned();
634 /// unsafe {
635 /// assert_eq!(utf16str!("⚧️"), v.get_unchecked_mut(..2));
636 /// assert_eq!(utf16str!("🏳️⚧️"), v.get_unchecked_mut(2..8));
637 /// assert_eq!(utf16str!("➡️"), v.get_unchecked_mut(8..10));
638 /// assert_eq!(utf16str!("s"), v.get_unchecked_mut(10..));
639 /// }
640 /// # }
641 /// ```
642 fn get_unchecked_mut() -> {}
643
644 /// Returns the length of `self`.
645 ///
646 /// This length is in `u16` values, not [`char`]s or graphemes. In other words, it may not be
647 /// what human considers the length of the string.
648 ///
649 /// # Examples
650 ///
651 /// ```
652 /// # use widestring::utf16str;
653 /// assert_eq!(utf16str!("foo").len(), 3);
654 ///
655 /// let complex = utf16str!("⚧️🏳️⚧️➡️s");
656 /// assert_eq!(complex.len(), 11);
657 /// assert_eq!(complex.chars().count(), 10);
658 /// ```
659 fn len() -> {}
660}
661
662utfstr_common_impl! {
663 /// UTF-32 string slice for [`Utf32String`][crate::Utf32String].
664 ///
665 /// [`Utf32Str`] is to [`Utf32String`][crate::Utf32String] as [`str`] is to [`String`].
666 ///
667 /// [`Utf32Str`] slices are string slices that are always valid UTF-32 encoding. This is unlike
668 /// the [`U32Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
669 /// [`Utf32Str`] string slices most resemble native [`str`] slices of all the types in this
670 /// crate.
671 ///
672 /// # Examples
673 ///
674 /// The easiest way to use [`Utf32Str`] is with the [`utf32str!`][crate::utf32str] macro to
675 /// convert string literals into string slices at compile time:
676 ///
677 /// ```
678 /// use widestring::utf32str;
679 /// let hello = utf32str!("Hello, world!");
680 /// ```
681 ///
682 /// You can also convert a [`u32`] slice directly, provided it is valid UTF-32:
683 ///
684 /// ```
685 /// use widestring::Utf32Str;
686 ///
687 /// let sparkle_heart = [0x1f496];
688 /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
689 ///
690 /// assert_eq!("💖", sparkle_heart);
691 /// ```
692 ///
693 /// Since [`char`] slices are valid UTF-32, a slice of [`char`]s can be easily converted to a
694 /// string slice:
695 ///
696 /// ```
697 /// use widestring::Utf32Str;
698 ///
699 /// let sparkle_heart = ['💖'; 3];
700 /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
701 ///
702 /// assert_eq!("💖💖💖", sparkle_heart);
703 /// ```
704 struct Utf32Str([u32]);
705
706 type UtfString = Utf32String;
707 type UStr = U32Str;
708 type UCStr = U32CStr;
709 type UtfError = Utf32Error;
710 char_len_fn = char_len_utf32;
711
712 /// Converts a slice to a string slice without checking that the string contains valid UTF-32.
713 ///
714 /// See the safe version, [`from_slice`][Self::from_slice], for more information.
715 ///
716 /// # Safety
717 ///
718 /// This function is unsafe because it does not check that the slice passed to it is valid
719 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
720 /// [`Utf32Str`] is always valid UTF-32.
721 ///
722 /// # Examples
723 ///
724 /// ```
725 /// use widestring::Utf32Str;
726 ///
727 /// let sparkle_heart = vec![0x1f496];
728 /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked(&sparkle_heart) };
729 ///
730 /// assert_eq!("💖", sparkle_heart);
731 /// ```
732 fn from_slice_unchecked() -> {}
733
734 /// Converts a mutable slice to a mutable string slice without checking that the string contains
735 /// valid UTF-32.
736 ///
737 /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
738 ///
739 /// # Safety
740 ///
741 /// This function is unsafe because it does not check that the slice passed to it is valid
742 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
743 /// [`Utf32Str`] is always valid UTF-32.
744 ///
745 /// # Examples
746 ///
747 /// ```
748 /// use widestring::Utf32Str;
749 ///
750 /// let mut sparkle_heart = vec![0x1f496];
751 /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked_mut(&mut sparkle_heart) };
752 ///
753 /// assert_eq!("💖", sparkle_heart);
754 /// ```
755 fn from_slice_unchecked_mut() -> {}
756
757 /// Converts a boxed slice to a boxed string slice without checking that the string contains
758 /// valid UTF-32.
759 ///
760 /// # Safety
761 ///
762 /// This function is unsafe because it does not check if the string slice is valid UTF-32, and
763 /// [`Utf32Str`] must always be valid UTF-32.
764 fn from_boxed_slice_unchecked() -> {}
765
766 /// Returns an unchecked subslice of this string slice.
767 ///
768 /// This is the unchecked alternative to indexing the string slice.
769 ///
770 /// # Safety
771 ///
772 /// Callers of this function are responsible that these preconditions are satisfied:
773 ///
774 /// - The starting index must not exceed the ending index;
775 /// - Indexes must be within bounds of the original slice;
776 ///
777 /// Failing that, the returned string slice may reference invalid memory or violate the
778 /// invariants communicated by the type.
779 ///
780 /// # Examples
781 ///
782 /// ```
783 /// # use widestring::utf32str;
784 /// let v = utf32str!("⚧️🏳️⚧️➡️s");
785 /// unsafe {
786 /// assert_eq!(utf32str!("⚧️"), v.get_unchecked(..2));
787 /// assert_eq!(utf32str!("🏳️⚧️"), v.get_unchecked(2..7));
788 /// assert_eq!(utf32str!("➡️"), v.get_unchecked(7..9));
789 /// assert_eq!(utf32str!("s"), v.get_unchecked(9..))
790 /// }
791 /// ```
792 fn get_unchecked() -> {}
793
794 /// Returns a mutable, unchecked subslice of this string slice
795 ///
796 /// This is the unchecked alternative to indexing the string slice.
797 ///
798 /// # Safety
799 ///
800 /// Callers of this function are responsible that these preconditions are satisfied:
801 ///
802 /// - The starting index must not exceed the ending index;
803 /// - Indexes must be within bounds of the original slice;
804 ///
805 /// Failing that, the returned string slice may reference invalid memory or violate the
806 /// invariants communicated by the type.
807 ///
808 /// # Examples
809 ///
810 /// ```
811 /// # use widestring::utf32str;
812 /// # #[cfg(feature = "alloc")] {
813 /// let mut v = utf32str!("⚧️🏳️⚧️➡️s").to_owned();
814 /// unsafe {
815 /// assert_eq!(utf32str!("⚧️"), v.get_unchecked_mut(..2));
816 /// assert_eq!(utf32str!("🏳️⚧️"), v.get_unchecked_mut(2..7));
817 /// assert_eq!(utf32str!("➡️"), v.get_unchecked_mut(7..9));
818 /// assert_eq!(utf32str!("s"), v.get_unchecked_mut(9..))
819 /// }
820 /// # }
821 /// ```
822 fn get_unchecked_mut() -> {}
823
824 /// Returns the length of `self`.
825 ///
826 /// This length is in the number of [`char`]s in the slice, not graphemes. In other words, it
827 /// may not be what human considers the length of the string.
828 ///
829 /// # Examples
830 ///
831 /// ```
832 /// # use widestring::utf32str;
833 /// assert_eq!(utf32str!("foo").len(), 3);
834 ///
835 /// let complex = utf32str!("⚧️🏳️⚧️➡️s");
836 /// assert_eq!(complex.len(), 10);
837 /// assert_eq!(complex.chars().count(), 10);
838 /// ```
839 fn len() -> {}
840}
841
842impl Utf16Str {
843 /// Converts a slice of UTF-16 data to a string slice.
844 ///
845 /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
846 /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
847 /// then does the conversion.
848 ///
849 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
850 /// the validity check, there is an unsafe version of this function,
851 /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
852 /// the check.
853 ///
854 /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
855 ///
856 /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
857 /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
858 /// after converting from UTF-8 to UTF-16.
859 ///
860 /// # Errors
861 ///
862 /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
863 /// is not UTF-16.
864 ///
865 /// # Examples
866 ///
867 /// ```
868 /// use widestring::Utf16Str;
869 ///
870 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
871 /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
872 ///
873 /// assert_eq!("💖", sparkle_heart);
874 /// ```
875 ///
876 /// With incorrect values that return an error:
877 ///
878 /// ```
879 /// use widestring::Utf16Str;
880 ///
881 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
882 ///
883 /// assert!(Utf16Str::from_slice(&sparkle_heart).is_err());
884 /// ```
885 pub fn from_slice(s: &[u16]) -> Result<&Self, Utf16Error> {
886 validate_utf16(s)?;
887 // SAFETY: Just validated
888 Ok(unsafe { Self::from_slice_unchecked(s) })
889 }
890
891 /// Converts a mutable slice of UTF-16 data to a mutable string slice.
892 ///
893 /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
894 /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
895 /// then does the conversion.
896 ///
897 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
898 /// the validity check, there is an unsafe version of this function,
899 /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
900 /// but skips the check.
901 ///
902 /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
903 ///
904 /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
905 /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
906 /// after converting from UTF-8 to UTF-16.
907 ///
908 /// # Errors
909 ///
910 /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
911 /// is not UTF-16.
912 ///
913 /// # Examples
914 ///
915 /// ```
916 /// use widestring::Utf16Str;
917 ///
918 /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
919 /// let sparkle_heart = Utf16Str::from_slice_mut(&mut sparkle_heart).unwrap();
920 ///
921 /// assert_eq!("💖", sparkle_heart);
922 /// ```
923 ///
924 /// With incorrect values that return an error:
925 ///
926 /// ```
927 /// use widestring::Utf16Str;
928 ///
929 /// let mut sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
930 ///
931 /// assert!(Utf16Str::from_slice_mut(&mut sparkle_heart).is_err());
932 /// ```
933 pub fn from_slice_mut(s: &mut [u16]) -> Result<&mut Self, Utf16Error> {
934 validate_utf16(s)?;
935 // SAFETY: Just validated
936 Ok(unsafe { Self::from_slice_unchecked_mut(s) })
937 }
938
939 /// Converts a wide string slice of undefined encoding to a UTF-16 string slice without checking
940 /// if the string slice is valid UTF-16.
941 ///
942 /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
943 ///
944 /// # Safety
945 ///
946 /// This function is unsafe because it does not check that the string slice passed to it is
947 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
948 /// the [`Utf16Str`] is always valid UTF-16.
949 ///
950 /// # Examples
951 ///
952 /// ```
953 /// use widestring::{Utf16Str, u16str};
954 ///
955 /// let sparkle_heart = u16str!("💖");
956 /// let sparkle_heart = unsafe { Utf16Str::from_ustr_unchecked(sparkle_heart) };
957 ///
958 /// assert_eq!("💖", sparkle_heart);
959 /// ```
960 #[must_use]
961 pub const unsafe fn from_ustr_unchecked(s: &U16Str) -> &Self {
962 Self::from_slice_unchecked(s.as_slice())
963 }
964
965 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice
966 /// without checking if the string slice is valid UTF-16.
967 ///
968 /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
969 ///
970 /// # Safety
971 ///
972 /// This function is unsafe because it does not check that the string slice passed to it is
973 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
974 /// the [`Utf16Str`] is always valid UTF-16.
975 #[must_use]
976 pub unsafe fn from_ustr_unchecked_mut(s: &mut U16Str) -> &mut Self {
977 Self::from_slice_unchecked_mut(s.as_mut_slice())
978 }
979
980 /// Converts a wide string slice of undefined encoding to a UTF-16 string slice.
981 ///
982 /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
983 /// [`U16Str`] does not contain valid UTF-16 data.
984 ///
985 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
986 /// the validity check, there is an unsafe version of this function,
987 /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
988 /// but skips the check.
989 ///
990 /// # Errors
991 ///
992 /// Returns an error if the string slice is not UTF-16 with a description as to why the
993 /// provided string slice is not UTF-16.
994 ///
995 /// # Examples
996 ///
997 /// ```
998 /// use widestring::{Utf16Str, u16str};
999 ///
1000 /// let sparkle_heart = u16str!("💖");
1001 /// let sparkle_heart = Utf16Str::from_ustr(sparkle_heart).unwrap();
1002 ///
1003 /// assert_eq!("💖", sparkle_heart);
1004 /// ```
1005 #[inline]
1006 pub fn from_ustr(s: &U16Str) -> Result<&Self, Utf16Error> {
1007 Self::from_slice(s.as_slice())
1008 }
1009
1010 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice.
1011 ///
1012 /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
1013 /// [`U16Str`] does not contain valid UTF-16 data.
1014 ///
1015 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1016 /// the validity check, there is an unsafe version of this function,
1017 /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1018 /// but skips the check.
1019 ///
1020 /// # Errors
1021 ///
1022 /// Returns an error if the string slice is not UTF-16 with a description as to why the
1023 /// provided string slice is not UTF-16.
1024 #[inline]
1025 pub fn from_ustr_mut(s: &mut U16Str) -> Result<&mut Self, Utf16Error> {
1026 Self::from_slice_mut(s.as_mut_slice())
1027 }
1028
1029 /// Converts a wide C string slice to a UTF-16 string slice without checking if the
1030 /// string slice is valid UTF-16.
1031 ///
1032 /// The resulting string slice does *not* contain the nul terminator.
1033 ///
1034 /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1035 ///
1036 /// # Safety
1037 ///
1038 /// This function is unsafe because it does not check that the string slice passed to it is
1039 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1040 /// the [`Utf16Str`] is always valid UTF-16.
1041 ///
1042 /// # Examples
1043 ///
1044 /// ```
1045 /// use widestring::{Utf16Str, u16cstr};
1046 ///
1047 /// let sparkle_heart = u16cstr!("💖");
1048 /// let sparkle_heart = unsafe { Utf16Str::from_ucstr_unchecked(sparkle_heart) };
1049 ///
1050 /// assert_eq!("💖", sparkle_heart);
1051 /// ```
1052 #[inline]
1053 #[must_use]
1054 pub unsafe fn from_ucstr_unchecked(s: &crate::U16CStr) -> &Self {
1055 Self::from_slice_unchecked(s.as_slice())
1056 }
1057
1058 /// Converts a mutable wide C string slice to a mutable UTF-16 string slice without
1059 /// checking if the string slice is valid UTF-16.
1060 ///
1061 /// The resulting string slice does *not* contain the nul terminator.
1062 ///
1063 /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1064 ///
1065 /// # Safety
1066 ///
1067 /// This function is unsafe because it does not check that the string slice passed to it is
1068 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1069 /// the [`Utf16Str`] is always valid UTF-16.
1070 #[inline]
1071 #[must_use]
1072 pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U16CStr) -> &mut Self {
1073 Self::from_slice_unchecked_mut(s.as_mut_slice())
1074 }
1075
1076 /// Converts a wide C string slice to a UTF-16 string slice.
1077 ///
1078 /// The resulting string slice does *not* contain the nul terminator.
1079 ///
1080 /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1081 /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1082 ///
1083 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1084 /// the validity check, there is an unsafe version of this function,
1085 /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1086 /// but skips the check.
1087 ///
1088 /// # Errors
1089 ///
1090 /// Returns an error if the string slice is not UTF-16 with a description as to why the
1091 /// provided string slice is not UTF-16.
1092 ///
1093 /// # Examples
1094 ///
1095 /// ```
1096 /// use widestring::{Utf16Str, u16cstr};
1097 ///
1098 /// let sparkle_heart = u16cstr!("💖");
1099 /// let sparkle_heart = Utf16Str::from_ucstr(sparkle_heart).unwrap();
1100 ///
1101 /// assert_eq!("💖", sparkle_heart);
1102 /// ```
1103 #[inline]
1104 pub fn from_ucstr(s: &crate::U16CStr) -> Result<&Self, Utf16Error> {
1105 Self::from_slice(s.as_slice())
1106 }
1107
1108 /// Converts a mutable wide C string slice to a mutable UTF-16 string slice.
1109 ///
1110 /// The resulting string slice does *not* contain the nul terminator.
1111 ///
1112 /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1113 /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1114 ///
1115 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1116 /// the validity check, there is an unsafe version of this function,
1117 /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1118 /// but skips the check.
1119 ///
1120 /// # Safety
1121 ///
1122 /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1123 /// when mutating the slice (i.e. by adding interior nul values).
1124 ///
1125 /// # Errors
1126 ///
1127 /// Returns an error if the string slice is not UTF-16 with a description as to why the
1128 /// provided string slice is not UTF-16.
1129 #[inline]
1130 pub unsafe fn from_ucstr_mut(s: &mut crate::U16CStr) -> Result<&mut Self, Utf16Error> {
1131 Self::from_slice_mut(s.as_mut_slice())
1132 }
1133
1134 /// Converts to a standard UTF-8 [`String`].
1135 ///
1136 /// Because this string is always valid UTF-16, the conversion is lossless and non-fallible.
1137 #[inline]
1138 #[allow(clippy::inherent_to_string_shadow_display)]
1139 #[cfg(feature = "alloc")]
1140 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1141 #[must_use]
1142 pub fn to_string(&self) -> String {
1143 String::from_utf16(self.as_slice()).unwrap()
1144 }
1145
1146 /// Checks that `index`-th value is the value in a UTF-16 code point sequence or the end of the
1147 /// string.
1148 ///
1149 /// Returns `true` if the value at `index` is not a UTF-16 surrogate value, or if the value at
1150 /// `index` is the first value of a surrogate pair (the "high" surrogate). Returns `false` if
1151 /// the value at `index` is the second value of a surrogate pair (a.k.a the "low" surrogate).
1152 ///
1153 /// The start and end of the string (when `index == self.len()`) are considered to be
1154 /// boundaries.
1155 ///
1156 /// Returns `false` if `index is greater than `self.len()`.
1157 ///
1158 /// # Examples
1159 ///
1160 /// ```
1161 /// # use widestring::utf16str;
1162 /// let s = utf16str!("Sparkle 💖 Heart");
1163 /// assert!(s.is_char_boundary(0));
1164 ///
1165 /// // high surrogate of `💖`
1166 /// assert!(s.is_char_boundary(8));
1167 /// // low surrogate of `💖`
1168 /// assert!(!s.is_char_boundary(9));
1169 ///
1170 /// assert!(s.is_char_boundary(s.len()));
1171 /// ```
1172 #[inline]
1173 #[must_use]
1174 pub const fn is_char_boundary(&self, index: usize) -> bool {
1175 if index > self.len() {
1176 false
1177 } else if index == self.len() {
1178 true
1179 } else {
1180 !is_utf16_low_surrogate(self.inner[index])
1181 }
1182 }
1183
1184 /// Returns a subslice of this string.
1185 ///
1186 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1187 /// equivalent indexing operation would panic.
1188 ///
1189 /// # Examples
1190 ///
1191 /// ```
1192 /// # use widestring::{utf16str};
1193 /// let v = utf16str!("⚧️🏳️⚧️➡️s");
1194 ///
1195 /// assert_eq!(Some(utf16str!("⚧️")), v.get(..2));
1196 /// assert_eq!(Some(utf16str!("🏳️⚧️")), v.get(2..8));
1197 /// assert_eq!(Some(utf16str!("➡️")), v.get(8..10));
1198 /// assert_eq!(Some(utf16str!("s")), v.get(10..));
1199 ///
1200 /// assert!(v.get(3..4).is_none());
1201 /// ```
1202 #[inline]
1203 #[must_use]
1204 pub fn get<I>(&self, index: I) -> Option<&Self>
1205 where
1206 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1207 {
1208 // TODO: Use SliceIndex directly when it is stabilized
1209 let range = crate::range_check(index, ..self.len())?;
1210 if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1211 return None;
1212 }
1213
1214 // SAFETY: range_check verified bounds, and we just verified char boundaries
1215 Some(unsafe { self.get_unchecked(range) })
1216 }
1217
1218 /// Returns a mutable subslice of this string.
1219 ///
1220 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1221 /// equivalent indexing operation would panic.
1222 ///
1223 /// # Examples
1224 ///
1225 /// ```
1226 /// # use widestring::{utf16str};
1227 /// # #[cfg(feature = "alloc")] {
1228 /// let mut v = utf16str!("⚧️🏳️⚧️➡️s").to_owned();
1229 ///
1230 /// assert_eq!(utf16str!("⚧️"), v.get_mut(..2).unwrap());
1231 /// assert_eq!(utf16str!("🏳️⚧️"), v.get_mut(2..8).unwrap());
1232 /// assert_eq!(utf16str!("➡️"), v.get_mut(8..10).unwrap());
1233 /// assert_eq!(utf16str!("s"), v.get_mut(10..).unwrap());
1234 ///
1235 /// assert!(v.get_mut(3..4).is_none());
1236 /// # }
1237 /// ```
1238 #[inline]
1239 #[must_use]
1240 pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1241 where
1242 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1243 {
1244 // TODO: Use SliceIndex directly when it is stabilized
1245 let range = crate::range_check(index, ..self.len())?;
1246 if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1247 return None;
1248 }
1249
1250 // SAFETY: range_check verified bounds, and we just verified char boundaries
1251 Some(unsafe { self.get_unchecked_mut(range) })
1252 }
1253
1254 /// Divide one string slice into two at an index.
1255 ///
1256 /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1257 /// the boundary of a UTF-16 code point.
1258 ///
1259 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1260 /// the end of the string slice.
1261 ///
1262 /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1263 ///
1264 /// # Panics
1265 ///
1266 /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1267 /// code point of the string slice.
1268 ///
1269 /// # Examples
1270 ///
1271 /// ```
1272 /// # use widestring::utf16str;
1273 /// let s = utf16str!("Per Martin-Löf");
1274 ///
1275 /// let (first, last) = s.split_at(3);
1276 ///
1277 /// assert_eq!("Per", first);
1278 /// assert_eq!(" Martin-Löf", last);
1279 /// ```
1280 #[inline]
1281 #[must_use]
1282 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1283 assert!(self.is_char_boundary(mid));
1284 let (a, b) = self.inner.split_at(mid);
1285 unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1286 }
1287
1288 /// Divide one mutable string slice into two at an index.
1289 ///
1290 /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1291 /// the boundary of a UTF-16 code point.
1292 ///
1293 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1294 /// the end of the string slice.
1295 ///
1296 /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1297 ///
1298 /// # Panics
1299 ///
1300 /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1301 /// code point of the string slice.
1302 ///
1303 /// # Examples
1304 ///
1305 /// ```
1306 /// # use widestring::utf16str;
1307 /// # #[cfg(feature = "alloc")] {
1308 /// let mut s = utf16str!("Per Martin-Löf").to_owned();
1309 ///
1310 /// let (first, last) = s.split_at_mut(3);
1311 ///
1312 /// assert_eq!("Per", first);
1313 /// assert_eq!(" Martin-Löf", last);
1314 /// # }
1315 /// ```
1316 #[inline]
1317 #[must_use]
1318 pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1319 assert!(self.is_char_boundary(mid));
1320 let (a, b) = self.inner.split_at_mut(mid);
1321 unsafe {
1322 (
1323 Self::from_slice_unchecked_mut(a),
1324 Self::from_slice_unchecked_mut(b),
1325 )
1326 }
1327 }
1328
1329 /// Returns an iterator over the [`char`]s of a string slice.
1330 ///
1331 /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1332 /// [`char`]. This method returns such an iterator.
1333 ///
1334 /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
1335 /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
1336 /// actually want. This functionality is not provided by this crate.
1337 #[inline]
1338 #[must_use]
1339 pub fn chars(&self) -> CharsUtf16<'_> {
1340 CharsUtf16::new(self.as_slice())
1341 }
1342
1343 /// Returns an iterator over the [`char`]s of a string slice and their positions.
1344 ///
1345 /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1346 /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
1347 ///
1348 /// The iterator yields tuples. The position is first, the [`char`] is second.
1349 #[inline]
1350 #[must_use]
1351 pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
1352 CharIndicesUtf16::new(self.as_slice())
1353 }
1354
1355 /// An iterator over the [`u16`] code units of a string slice.
1356 ///
1357 /// As a UTF-16 string slice consists of a sequence of [`u16`] code units, we can iterate
1358 /// through a string slice by each code unit. This method returns such an iterator.
1359 #[must_use]
1360 pub fn code_units(&self) -> CodeUnits<'_> {
1361 CodeUnits::new(self.as_slice())
1362 }
1363
1364 /// Returns an iterator of bytes over the string encoded as UTF-8.
1365 #[must_use]
1366 pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf16<'_>> {
1367 crate::encode_utf8(self.chars())
1368 }
1369
1370 /// Returns an iterator of [`u32`] over the sting encoded as UTF-32.
1371 #[must_use]
1372 pub fn encode_utf32(&self) -> EncodeUtf32<CharsUtf16<'_>> {
1373 crate::encode_utf32(self.chars())
1374 }
1375
1376 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
1377 #[inline]
1378 #[must_use]
1379 pub fn escape_debug(&self) -> EscapeDebug<CharsUtf16<'_>> {
1380 EscapeDebug::<CharsUtf16>::new(self.as_slice())
1381 }
1382
1383 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
1384 #[inline]
1385 #[must_use]
1386 pub fn escape_default(&self) -> EscapeDefault<CharsUtf16<'_>> {
1387 EscapeDefault::<CharsUtf16>::new(self.as_slice())
1388 }
1389
1390 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
1391 #[inline]
1392 #[must_use]
1393 pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf16<'_>> {
1394 EscapeUnicode::<CharsUtf16>::new(self.as_slice())
1395 }
1396
1397 /// Returns the lowercase equivalent of this string slice, as a new [`Utf16String`].
1398 ///
1399 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1400 /// `Lowercase`.
1401 ///
1402 /// Since some characters can expand into multiple characters when changing the case, this
1403 /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1404 #[inline]
1405 #[cfg(feature = "alloc")]
1406 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1407 #[must_use]
1408 pub fn to_lowercase(&self) -> Utf16String {
1409 let mut s = Utf16String::with_capacity(self.len());
1410 for c in self.chars() {
1411 for lower in c.to_lowercase() {
1412 s.push(lower);
1413 }
1414 }
1415 s
1416 }
1417
1418 /// Returns the uppercase equivalent of this string slice, as a new [`Utf16String`].
1419 ///
1420 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1421 /// `Uppercase`.
1422 ///
1423 /// Since some characters can expand into multiple characters when changing the case, this
1424 /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1425 #[inline]
1426 #[cfg(feature = "alloc")]
1427 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1428 #[must_use]
1429 pub fn to_uppercase(&self) -> Utf16String {
1430 let mut s = Utf16String::with_capacity(self.len());
1431 for c in self.chars() {
1432 for lower in c.to_uppercase() {
1433 s.push(lower);
1434 }
1435 }
1436 s
1437 }
1438
1439 /// Returns an iterator over the lines of a [`Utf16Str`], as string slices.
1440 ///
1441 /// Lines are split at line endings that are either newlines (`\n`) or
1442 /// sequences of a carriage return followed by a line feed (`\r\n`).
1443 ///
1444 /// Line terminators are not included in the lines returned by the iterator.
1445 ///
1446 /// Note that any carriage return (`\r`) not immediately followed by a
1447 /// line feed (`\n`) does not split a line. These carriage returns are
1448 /// thereby included in the produced lines.
1449 ///
1450 /// The final line ending is optional. A string that ends with a final line
1451 /// ending will return the same lines as an otherwise identical string
1452 /// without a final line ending.
1453 ///
1454 /// # Examples
1455 ///
1456 /// Basic usage:
1457 ///
1458 /// ```
1459 /// use widestring::utf16str;
1460 ///
1461 /// let text = utf16str!("foo\r\nbar\n\nbaz\r");
1462 /// let mut lines = text.lines();
1463 ///
1464 /// assert_eq!(Some(utf16str!("foo")), lines.next());
1465 /// assert_eq!(Some(utf16str!("bar")), lines.next());
1466 /// assert_eq!(Some(utf16str!("")), lines.next());
1467 /// // Trailing carriage return is included in the last line
1468 /// assert_eq!(Some(utf16str!("baz\r")), lines.next());
1469 ///
1470 /// assert_eq!(None, lines.next());
1471 /// ```
1472 ///
1473 /// The final line does not require any ending:
1474 ///
1475 /// ```
1476 /// use widestring::utf16str;
1477 ///
1478 /// let text = utf16str!("foo\nbar\n\r\nbaz");
1479 /// let mut lines = text.lines();
1480 ///
1481 /// assert_eq!(Some(utf16str!("foo")), lines.next());
1482 /// assert_eq!(Some(utf16str!("bar")), lines.next());
1483 /// assert_eq!(Some(utf16str!("")), lines.next());
1484 /// assert_eq!(Some(utf16str!("baz")), lines.next());
1485 ///
1486 /// assert_eq!(None, lines.next());
1487 /// ```
1488 pub fn lines(&self) -> Lines<'_, Utf16Str, CharIndicesUtf16<'_>> {
1489 Lines::new(self, self.len(), self.char_indices())
1490 }
1491}
1492
1493impl Utf32Str {
1494 /// Converts a slice of UTF-32 data to a string slice.
1495 ///
1496 /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1497 /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1498 /// then does the conversion.
1499 ///
1500 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1501 /// the validity check, there is an unsafe version of this function,
1502 /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
1503 /// the check.
1504 ///
1505 /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1506 ///
1507 /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1508 /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1509 /// after converting from UTF-8 to UTF-32.
1510 ///
1511 /// # Errors
1512 ///
1513 /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1514 /// is not UTF-32.
1515 ///
1516 /// # Examples
1517 ///
1518 /// ```
1519 /// use widestring::Utf32Str;
1520 ///
1521 /// let sparkle_heart = vec![0x1f496];
1522 /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
1523 ///
1524 /// assert_eq!("💖", sparkle_heart);
1525 /// ```
1526 ///
1527 /// With incorrect values that return an error:
1528 ///
1529 /// ```
1530 /// use widestring::Utf32Str;
1531 ///
1532 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1533 ///
1534 /// assert!(Utf32Str::from_slice(&sparkle_heart).is_err());
1535 /// ```
1536 pub fn from_slice(s: &[u32]) -> Result<&Self, Utf32Error> {
1537 validate_utf32(s)?;
1538 // SAFETY: Just validated
1539 Ok(unsafe { Self::from_slice_unchecked(s) })
1540 }
1541
1542 /// Converts a mutable slice of UTF-32 data to a mutable string slice.
1543 ///
1544 /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1545 /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1546 /// then does the conversion.
1547 ///
1548 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1549 /// the validity check, there is an unsafe version of this function,
1550 /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
1551 /// but skips the check.
1552 ///
1553 /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1554 ///
1555 /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1556 /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1557 /// after converting from UTF-8 to UTF-32.
1558 ///
1559 /// # Errors
1560 ///
1561 /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1562 /// is not UTF-32.
1563 ///
1564 /// # Examples
1565 ///
1566 /// ```
1567 /// use widestring::Utf32Str;
1568 ///
1569 /// let mut sparkle_heart = vec![0x1f496];
1570 /// let sparkle_heart = Utf32Str::from_slice_mut(&mut sparkle_heart).unwrap();
1571 ///
1572 /// assert_eq!("💖", sparkle_heart);
1573 /// ```
1574 ///
1575 /// With incorrect values that return an error:
1576 ///
1577 /// ```
1578 /// use widestring::Utf32Str;
1579 ///
1580 /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1581 ///
1582 /// assert!(Utf32Str::from_slice_mut(&mut sparkle_heart).is_err());
1583 /// ```
1584 pub fn from_slice_mut(s: &mut [u32]) -> Result<&mut Self, Utf32Error> {
1585 validate_utf32(s)?;
1586 // SAFETY: Just validated
1587 Ok(unsafe { Self::from_slice_unchecked_mut(s) })
1588 }
1589
1590 /// Converts a wide string slice of undefined encoding to a UTF-32 string slice without checking
1591 /// if the string slice is valid UTF-32.
1592 ///
1593 /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
1594 ///
1595 /// # Safety
1596 ///
1597 /// This function is unsafe because it does not check that the string slice passed to it is
1598 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1599 /// the [`Utf32Str`] is always valid UTF-32.
1600 ///
1601 /// # Examples
1602 ///
1603 /// ```
1604 /// use widestring::{Utf32Str, u32str};
1605 ///
1606 /// let sparkle_heart = u32str!("💖");
1607 /// let sparkle_heart = unsafe { Utf32Str::from_ustr_unchecked(sparkle_heart) };
1608 ///
1609 /// assert_eq!("💖", sparkle_heart);
1610 /// ```
1611 #[inline]
1612 #[must_use]
1613 pub const unsafe fn from_ustr_unchecked(s: &crate::U32Str) -> &Self {
1614 Self::from_slice_unchecked(s.as_slice())
1615 }
1616
1617 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice
1618 /// without checking if the string slice is valid UTF-32.
1619 ///
1620 /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
1621 ///
1622 /// # Safety
1623 ///
1624 /// This function is unsafe because it does not check that the string slice passed to it is
1625 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1626 /// the [`Utf32Str`] is always valid UTF-32.
1627 #[inline]
1628 #[must_use]
1629 pub unsafe fn from_ustr_unchecked_mut(s: &mut crate::U32Str) -> &mut Self {
1630 Self::from_slice_unchecked_mut(s.as_mut_slice())
1631 }
1632
1633 /// Converts a wide string slice of undefined encoding to a UTF-32 string slice.
1634 ///
1635 /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1636 /// [`U32Str`] does not contain valid UTF-32 data.
1637 ///
1638 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1639 /// the validity check, there is an unsafe version of this function,
1640 /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
1641 /// but skips the check.
1642 ///
1643 /// # Errors
1644 ///
1645 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1646 /// provided string slice is not UTF-32.
1647 ///
1648 /// # Examples
1649 ///
1650 /// ```
1651 /// use widestring::{Utf32Str, u32str};
1652 ///
1653 /// let sparkle_heart = u32str!("💖");
1654 /// let sparkle_heart = Utf32Str::from_ustr(sparkle_heart).unwrap();
1655 ///
1656 /// assert_eq!("💖", sparkle_heart);
1657 /// ```
1658 #[inline]
1659 pub fn from_ustr(s: &crate::U32Str) -> Result<&Self, Utf32Error> {
1660 Self::from_slice(s.as_slice())
1661 }
1662
1663 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice.
1664 ///
1665 /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1666 /// [`U32Str`] does not contain valid UTF-32 data.
1667 ///
1668 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1669 /// the validity check, there is an unsafe version of this function,
1670 /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1671 /// but skips the check.
1672 ///
1673 /// # Errors
1674 ///
1675 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1676 /// provided string slice is not UTF-32.
1677 #[inline]
1678 pub fn from_ustr_mut(s: &mut crate::U32Str) -> Result<&mut Self, Utf32Error> {
1679 Self::from_slice_mut(s.as_mut_slice())
1680 }
1681
1682 /// Converts a wide C string slice to a UTF-32 string slice without checking if the
1683 /// string slice is valid UTF-32.
1684 ///
1685 /// The resulting string slice does *not* contain the nul terminator.
1686 ///
1687 /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1688 ///
1689 /// # Safety
1690 ///
1691 /// This function is unsafe because it does not check that the string slice passed to it is
1692 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1693 /// the [`Utf32Str`] is always valid UTF-32.
1694 ///
1695 /// # Examples
1696 ///
1697 /// ```
1698 /// use widestring::{Utf32Str, u32cstr};
1699 ///
1700 /// let sparkle_heart = u32cstr!("💖");
1701 /// let sparkle_heart = unsafe { Utf32Str::from_ucstr_unchecked(sparkle_heart) };
1702 ///
1703 /// assert_eq!("💖", sparkle_heart);
1704 /// ```
1705 #[inline]
1706 #[must_use]
1707 pub unsafe fn from_ucstr_unchecked(s: &crate::U32CStr) -> &Self {
1708 Self::from_slice_unchecked(s.as_slice())
1709 }
1710
1711 /// Converts a mutable wide C string slice to a mutable UTF-32 string slice without
1712 /// checking if the string slice is valid UTF-32.
1713 ///
1714 /// The resulting string slice does *not* contain the nul terminator.
1715 ///
1716 /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1717 ///
1718 /// # Safety
1719 ///
1720 /// This function is unsafe because it does not check that the string slice passed to it is
1721 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1722 /// the [`Utf32Str`] is always valid UTF-32.
1723 #[inline]
1724 #[must_use]
1725 pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U32CStr) -> &mut Self {
1726 Self::from_slice_unchecked_mut(s.as_mut_slice())
1727 }
1728
1729 /// Converts a wide C string slice to a UTF-32 string slice.
1730 ///
1731 /// The resulting string slice does *not* contain the nul terminator.
1732 ///
1733 /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1734 /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1735 ///
1736 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1737 /// the validity check, there is an unsafe version of this function,
1738 /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1739 /// but skips the check.
1740 ///
1741 /// # Errors
1742 ///
1743 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1744 /// provided string slice is not UTF-32.
1745 ///
1746 /// # Examples
1747 ///
1748 /// ```
1749 /// use widestring::{Utf32Str, u32cstr};
1750 ///
1751 /// let sparkle_heart = u32cstr!("💖");
1752 /// let sparkle_heart = Utf32Str::from_ucstr(sparkle_heart).unwrap();
1753 ///
1754 /// assert_eq!("💖", sparkle_heart);
1755 /// ```
1756 #[inline]
1757 pub fn from_ucstr(s: &crate::U32CStr) -> Result<&Self, Utf32Error> {
1758 Self::from_slice(s.as_slice())
1759 }
1760
1761 /// Converts a mutable wide C string slice to a mutable UTF-32 string slice.
1762 ///
1763 /// The resulting string slice does *not* contain the nul terminator.
1764 ///
1765 /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1766 /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1767 ///
1768 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1769 /// the validity check, there is an unsafe version of this function,
1770 /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1771 /// but skips the check.
1772 ///
1773 /// # Safety
1774 ///
1775 /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1776 /// when mutating the slice (i.e. by adding interior nul values).
1777 ///
1778 /// # Errors
1779 ///
1780 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1781 /// provided string slice is not UTF-32.
1782 #[inline]
1783 pub unsafe fn from_ucstr_mut(s: &mut crate::U32CStr) -> Result<&mut Self, Utf32Error> {
1784 Self::from_slice_mut(s.as_mut_slice())
1785 }
1786
1787 /// Converts a slice of [`char`]s to a string slice.
1788 ///
1789 /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1790 ///
1791 /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1792 ///
1793 /// # Examples
1794 ///
1795 /// ```
1796 /// use widestring::Utf32Str;
1797 ///
1798 /// let sparkle_heart = ['💖'];
1799 /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
1800 ///
1801 /// assert_eq!("💖", sparkle_heart);
1802 /// ```
1803 #[allow(trivial_casts)]
1804 #[inline]
1805 #[must_use]
1806 pub const fn from_char_slice(s: &[char]) -> &Self {
1807 // SAFETY: char slice is always valid UTF-32
1808 unsafe { Self::from_slice_unchecked(&*(s as *const [char] as *const [u32])) }
1809 }
1810
1811 /// Converts a mutable slice of [`char`]s to a string slice.
1812 ///
1813 /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1814 ///
1815 /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1816 ///
1817 /// # Examples
1818 ///
1819 /// ```
1820 /// use widestring::Utf32Str;
1821 ///
1822 /// let mut sparkle_heart = ['💖'];
1823 /// let sparkle_heart = Utf32Str::from_char_slice_mut(&mut sparkle_heart);
1824 ///
1825 /// assert_eq!("💖", sparkle_heart);
1826 /// ```
1827 #[allow(trivial_casts)]
1828 #[inline]
1829 #[must_use]
1830 pub fn from_char_slice_mut(s: &mut [char]) -> &mut Self {
1831 // SAFETY: char slice is always valid UTF-32
1832 unsafe { Self::from_slice_unchecked_mut(&mut *(s as *mut [char] as *mut [u32])) }
1833 }
1834
1835 /// Converts a string slice into a slice of [`char`]s.
1836 #[allow(trivial_casts)]
1837 #[inline]
1838 #[must_use]
1839 pub const fn as_char_slice(&self) -> &[char] {
1840 // SAFETY: Self should be valid UTF-32 so chars will be in range
1841 unsafe { &*(self.as_slice() as *const [u32] as *const [char]) }
1842 }
1843
1844 /// Converts a mutable string slice into a mutable slice of [`char`]s.
1845 #[allow(trivial_casts)]
1846 #[inline]
1847 #[must_use]
1848 pub fn as_char_slice_mut(&mut self) -> &mut [char] {
1849 // SAFETY: Self should be valid UTF-32 so chars will be in range
1850 unsafe { &mut *(self.as_mut_slice() as *mut [u32] as *mut [char]) }
1851 }
1852
1853 /// Converts to a standard UTF-8 [`String`].
1854 ///
1855 /// Because this string is always valid UTF-32, the conversion is lossless and non-fallible.
1856 #[inline]
1857 #[allow(clippy::inherent_to_string_shadow_display)]
1858 #[cfg(feature = "alloc")]
1859 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1860 #[must_use]
1861 pub fn to_string(&self) -> String {
1862 let mut s = String::with_capacity(self.len());
1863 s.extend(self.as_char_slice());
1864 s
1865 }
1866
1867 /// Returns a subslice of this string.
1868 ///
1869 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1870 /// equivalent indexing operation would panic.
1871 ///
1872 /// # Examples
1873 ///
1874 /// ```
1875 /// # use widestring::{utf32str};
1876 /// let v = utf32str!("⚧️🏳️⚧️➡️s");
1877 ///
1878 /// assert_eq!(Some(utf32str!("⚧️")), v.get(..2));
1879 /// assert_eq!(Some(utf32str!("🏳️⚧️")), v.get(2..7));
1880 /// assert_eq!(Some(utf32str!("➡️")), v.get(7..9));
1881 /// assert_eq!(Some(utf32str!("s")), v.get(9..));
1882 /// ```
1883 #[inline]
1884 #[must_use]
1885 pub fn get<I>(&self, index: I) -> Option<&Self>
1886 where
1887 I: SliceIndex<[u32], Output = [u32]>,
1888 {
1889 // TODO: Use SliceIndex directly when it is stabilized
1890 // SAFETY: subslice has already been verified
1891 self.inner
1892 .get(index)
1893 .map(|s| unsafe { Self::from_slice_unchecked(s) })
1894 }
1895
1896 /// Returns a mutable subslice of this string.
1897 ///
1898 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1899 /// equivalent indexing operation would panic.
1900 ///
1901 /// # Examples
1902 ///
1903 /// ```
1904 /// # use widestring::{utf32str};
1905 /// # #[cfg(feature = "alloc")] {
1906 /// let mut v = utf32str!("⚧️🏳️⚧️➡️s").to_owned();
1907 ///
1908 /// assert_eq!(utf32str!("⚧️"), v.get_mut(..2).unwrap());
1909 /// assert_eq!(utf32str!("🏳️⚧️"), v.get_mut(2..7).unwrap());
1910 /// assert_eq!(utf32str!("➡️"), v.get_mut(7..9).unwrap());
1911 /// assert_eq!(utf32str!("s"), v.get_mut(9..).unwrap());
1912 /// # }
1913 /// ```
1914 #[inline]
1915 #[must_use]
1916 pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1917 where
1918 I: SliceIndex<[u32], Output = [u32]>,
1919 {
1920 // TODO: Use SliceIndex directly when it is stabilized
1921 // SAFETY: subslice has already been verified
1922 self.inner
1923 .get_mut(index)
1924 .map(|s| unsafe { Self::from_slice_unchecked_mut(s) })
1925 }
1926
1927 /// Divide one string slice into two at an index.
1928 ///
1929 /// The argument, `mid`, should be an offset from the start of the string.
1930 ///
1931 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1932 /// the end of the string slice.
1933 ///
1934 /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1935 ///
1936 /// # Panics
1937 ///
1938 /// Panics if `mid` is past the end of the last code point of the string slice.
1939 ///
1940 /// # Examples
1941 ///
1942 /// ```
1943 /// # use widestring::utf32str;
1944 /// let s = utf32str!("Per Martin-Löf");
1945 ///
1946 /// let (first, last) = s.split_at(3);
1947 ///
1948 /// assert_eq!("Per", first);
1949 /// assert_eq!(" Martin-Löf", last);
1950 /// ```
1951 #[inline]
1952 #[must_use]
1953 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1954 let (a, b) = self.inner.split_at(mid);
1955 unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1956 }
1957
1958 /// Divide one mutable string slice into two at an index.
1959 ///
1960 /// The argument, `mid`, should be an offset from the start of the string.
1961 ///
1962 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1963 /// the end of the string slice.
1964 ///
1965 /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1966 ///
1967 /// # Panics
1968 ///
1969 /// Panics if `mid` is past the end of the last code point of the string slice.
1970 ///
1971 /// # Examples
1972 ///
1973 /// ```
1974 /// # use widestring::utf32str;
1975 /// # #[cfg(feature = "alloc")] {
1976 /// let mut s = utf32str!("Per Martin-Löf").to_owned();
1977 ///
1978 /// let (first, last) = s.split_at_mut(3);
1979 ///
1980 /// assert_eq!("Per", first);
1981 /// assert_eq!(" Martin-Löf", last);
1982 /// # }
1983 /// ```
1984 #[inline]
1985 #[must_use]
1986 pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1987 let (a, b) = self.inner.split_at_mut(mid);
1988 unsafe {
1989 (
1990 Self::from_slice_unchecked_mut(a),
1991 Self::from_slice_unchecked_mut(b),
1992 )
1993 }
1994 }
1995
1996 /// Returns an iterator over the [`char`]s of a string slice.
1997 ///
1998 /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
1999 /// [`char`]. This method returns such an iterator.
2000 ///
2001 /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
2002 /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
2003 /// actually want. This functionality is not provided by this crate.
2004 #[inline]
2005 #[must_use]
2006 pub fn chars(&self) -> CharsUtf32<'_> {
2007 CharsUtf32::new(self.as_slice())
2008 }
2009
2010 /// Returns an iterator over the [`char`]s of a string slice and their positions.
2011 ///
2012 /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
2013 /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
2014 ///
2015 /// The iterator yields tuples. The position is first, the [`char`] is second.
2016 #[inline]
2017 #[must_use]
2018 pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
2019 CharIndicesUtf32::new(self.as_slice())
2020 }
2021
2022 /// Returns an iterator of bytes over the string encoded as UTF-8.
2023 #[must_use]
2024 pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf32<'_>> {
2025 crate::encode_utf8(self.chars())
2026 }
2027
2028 /// Returns an iterator of [`u16`] over the sting encoded as UTF-16.
2029 #[must_use]
2030 pub fn encode_utf16(&self) -> EncodeUtf16<CharsUtf32<'_>> {
2031 crate::encode_utf16(self.chars())
2032 }
2033
2034 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
2035 #[inline]
2036 #[must_use]
2037 pub fn escape_debug(&self) -> EscapeDebug<CharsUtf32<'_>> {
2038 EscapeDebug::<CharsUtf32>::new(self.as_slice())
2039 }
2040
2041 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
2042 #[inline]
2043 #[must_use]
2044 pub fn escape_default(&self) -> EscapeDefault<CharsUtf32<'_>> {
2045 EscapeDefault::<CharsUtf32>::new(self.as_slice())
2046 }
2047
2048 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
2049 #[inline]
2050 #[must_use]
2051 pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf32<'_>> {
2052 EscapeUnicode::<CharsUtf32>::new(self.as_slice())
2053 }
2054
2055 /// Returns the lowercase equivalent of this string slice, as a new [`Utf32String`].
2056 ///
2057 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
2058 /// `Lowercase`.
2059 ///
2060 /// Since some characters can expand into multiple characters when changing the case, this
2061 /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2062 #[inline]
2063 #[cfg(feature = "alloc")]
2064 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2065 #[must_use]
2066 pub fn to_lowercase(&self) -> Utf32String {
2067 let mut s = Utf32String::with_capacity(self.len());
2068 for c in self.chars() {
2069 for lower in c.to_lowercase() {
2070 s.push(lower);
2071 }
2072 }
2073 s
2074 }
2075
2076 /// Returns the uppercase equivalent of this string slice, as a new [`Utf32String`].
2077 ///
2078 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
2079 /// `Uppercase`.
2080 ///
2081 /// Since some characters can expand into multiple characters when changing the case, this
2082 /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2083 #[inline]
2084 #[cfg(feature = "alloc")]
2085 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2086 #[must_use]
2087 pub fn to_uppercase(&self) -> Utf32String {
2088 let mut s = Utf32String::with_capacity(self.len());
2089 for c in self.chars() {
2090 for lower in c.to_uppercase() {
2091 s.push(lower);
2092 }
2093 }
2094 s
2095 }
2096
2097 /// Returns an iterator over the lines of a [`Utf32Str`], as string slices.
2098 ///
2099 /// Lines are split at line endings that are either newlines (`\n`) or
2100 /// sequences of a carriage return followed by a line feed (`\r\n`).
2101 ///
2102 /// Line terminators are not included in the lines returned by the iterator.
2103 ///
2104 /// Note that any carriage return (`\r`) not immediately followed by a
2105 /// line feed (`\n`) does not split a line. These carriage returns are
2106 /// thereby included in the produced lines.
2107 ///
2108 /// The final line ending is optional. A string that ends with a final line
2109 /// ending will return the same lines as an otherwise identical string
2110 /// without a final line ending.
2111 ///
2112 /// # Examples
2113 ///
2114 /// Basic usage:
2115 ///
2116 /// ```
2117 /// use widestring::utf32str;
2118 ///
2119 /// let text = utf32str!("foo\r\nbar\n\nbaz\r");
2120 /// let mut lines = text.lines();
2121 ///
2122 /// assert_eq!(Some(utf32str!("foo")), lines.next());
2123 /// assert_eq!(Some(utf32str!("bar")), lines.next());
2124 /// assert_eq!(Some(utf32str!("")), lines.next());
2125 /// // Trailing carriage return is included in the last line
2126 /// assert_eq!(Some(utf32str!("baz\r")), lines.next());
2127 ///
2128 /// assert_eq!(None, lines.next());
2129 /// ```
2130 ///
2131 /// The final line does not require any ending:
2132 ///
2133 /// ```
2134 /// use widestring::utf32str;
2135 ///
2136 /// let text = utf32str!("foo\nbar\n\r\nbaz");
2137 /// let mut lines = text.lines();
2138 ///
2139 /// assert_eq!(Some(utf32str!("foo")), lines.next());
2140 /// assert_eq!(Some(utf32str!("bar")), lines.next());
2141 /// assert_eq!(Some(utf32str!("")), lines.next());
2142 /// assert_eq!(Some(utf32str!("baz")), lines.next());
2143 ///
2144 /// assert_eq!(None, lines.next());
2145 /// ```
2146 pub fn lines(&self) -> Lines<'_, Utf32Str, CharIndicesUtf32<'_>> {
2147 Lines::new(self, self.len(), self.char_indices())
2148 }
2149}
2150
2151impl AsMut<[char]> for Utf32Str {
2152 #[inline]
2153 fn as_mut(&mut self) -> &mut [char] {
2154 self.as_char_slice_mut()
2155 }
2156}
2157
2158impl AsRef<[char]> for Utf32Str {
2159 #[inline]
2160 fn as_ref(&self) -> &[char] {
2161 self.as_char_slice()
2162 }
2163}
2164
2165impl<'a> From<&'a [char]> for &'a Utf32Str {
2166 #[inline]
2167 fn from(value: &'a [char]) -> Self {
2168 Utf32Str::from_char_slice(value)
2169 }
2170}
2171
2172impl<'a> From<&'a mut [char]> for &'a mut Utf32Str {
2173 #[inline]
2174 fn from(value: &'a mut [char]) -> Self {
2175 Utf32Str::from_char_slice_mut(value)
2176 }
2177}
2178
2179impl<'a> From<&'a Utf32Str> for &'a [char] {
2180 #[inline]
2181 fn from(value: &'a Utf32Str) -> Self {
2182 value.as_char_slice()
2183 }
2184}
2185
2186impl<'a> From<&'a mut Utf32Str> for &'a mut [char] {
2187 #[inline]
2188 fn from(value: &'a mut Utf32Str) -> Self {
2189 value.as_char_slice_mut()
2190 }
2191}
2192
2193impl<I> Index<I> for Utf16Str
2194where
2195 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2196{
2197 type Output = Utf16Str;
2198
2199 #[inline]
2200 fn index(&self, index: I) -> &Self::Output {
2201 self.get(index)
2202 .expect("index out of bounds or not on char boundary")
2203 }
2204}
2205
2206impl<I> Index<I> for Utf32Str
2207where
2208 I: SliceIndex<[u32], Output = [u32]>,
2209{
2210 type Output = Utf32Str;
2211
2212 #[inline]
2213 fn index(&self, index: I) -> &Self::Output {
2214 self.get(index).expect("index out of bounds")
2215 }
2216}
2217
2218impl<I> IndexMut<I> for Utf16Str
2219where
2220 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2221{
2222 #[inline]
2223 fn index_mut(&mut self, index: I) -> &mut Self::Output {
2224 self.get_mut(index)
2225 .expect("index out of bounds or not on char boundary")
2226 }
2227}
2228
2229impl<I> IndexMut<I> for Utf32Str
2230where
2231 I: SliceIndex<[u32], Output = [u32]>,
2232{
2233 #[inline]
2234 fn index_mut(&mut self, index: I) -> &mut Self::Output {
2235 self.get_mut(index).expect("index out of bounds")
2236 }
2237}
2238
2239impl PartialEq<[char]> for Utf32Str {
2240 #[inline]
2241 fn eq(&self, other: &[char]) -> bool {
2242 self.as_char_slice() == other
2243 }
2244}
2245
2246impl PartialEq<Utf32Str> for [char] {
2247 #[inline]
2248 fn eq(&self, other: &Utf32Str) -> bool {
2249 self == other.as_char_slice()
2250 }
2251}
2252
2253impl PartialEq<Utf16Str> for Utf32Str {
2254 #[inline]
2255 fn eq(&self, other: &Utf16Str) -> bool {
2256 self.chars().eq(other.chars())
2257 }
2258}
2259
2260impl PartialEq<Utf32Str> for Utf16Str {
2261 #[inline]
2262 fn eq(&self, other: &Utf32Str) -> bool {
2263 self.chars().eq(other.chars())
2264 }
2265}
2266
2267impl PartialEq<&Utf16Str> for Utf32Str {
2268 #[inline]
2269 fn eq(&self, other: &&Utf16Str) -> bool {
2270 self.chars().eq(other.chars())
2271 }
2272}
2273
2274impl PartialEq<&Utf32Str> for Utf16Str {
2275 #[inline]
2276 fn eq(&self, other: &&Utf32Str) -> bool {
2277 self.chars().eq(other.chars())
2278 }
2279}
2280
2281impl PartialEq<Utf16Str> for &Utf32Str {
2282 #[inline]
2283 fn eq(&self, other: &Utf16Str) -> bool {
2284 self.chars().eq(other.chars())
2285 }
2286}
2287
2288impl PartialEq<Utf32Str> for &Utf16Str {
2289 #[inline]
2290 fn eq(&self, other: &Utf32Str) -> bool {
2291 self.chars().eq(other.chars())
2292 }
2293}
2294
2295impl<'a> TryFrom<&'a [u16]> for &'a Utf16Str {
2296 type Error = Utf16Error;
2297
2298 #[inline]
2299 fn try_from(value: &'a [u16]) -> Result<Self, Self::Error> {
2300 Utf16Str::from_slice(value)
2301 }
2302}
2303
2304impl<'a> TryFrom<&'a mut [u16]> for &'a mut Utf16Str {
2305 type Error = Utf16Error;
2306
2307 #[inline]
2308 fn try_from(value: &'a mut [u16]) -> Result<Self, Self::Error> {
2309 Utf16Str::from_slice_mut(value)
2310 }
2311}
2312
2313impl<'a> TryFrom<&'a [u32]> for &'a Utf32Str {
2314 type Error = Utf32Error;
2315
2316 #[inline]
2317 fn try_from(value: &'a [u32]) -> Result<Self, Self::Error> {
2318 Utf32Str::from_slice(value)
2319 }
2320}
2321
2322impl<'a> TryFrom<&'a mut [u32]> for &'a mut Utf32Str {
2323 type Error = Utf32Error;
2324
2325 #[inline]
2326 fn try_from(value: &'a mut [u32]) -> Result<Self, Self::Error> {
2327 Utf32Str::from_slice_mut(value)
2328 }
2329}
2330
2331/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2332/// `wchar_t` size on platform.
2333#[cfg(not(windows))]
2334pub type WideUtfStr = Utf32Str;
2335
2336/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2337/// `wchar_t` size on platform.
2338#[cfg(windows)]
2339pub type WideUtfStr = Utf16Str;
2340
2341#[cfg(test)]
2342mod test {
2343 use crate::*;
2344
2345 #[test]
2346 fn utf16_trim() {
2347 let s = utf16str!(" Hello\tworld\t");
2348 assert_eq!(utf16str!("Hello\tworld\t"), s.trim_start());
2349
2350 let s = utf16str!(" English ");
2351 assert!(Some('E') == s.trim_start().chars().next());
2352
2353 let s = utf16str!(" עברית ");
2354 assert!(Some('ע') == s.trim_start().chars().next());
2355
2356 let s = utf16str!(" 💕Heart ");
2357 assert_eq!(utf16str!("💕Heart "), s.trim_start());
2358
2359 let s = utf16str!(" Heart💕 ");
2360 assert_eq!(utf16str!(" Heart💕"), s.trim_end());
2361 }
2362
2363 #[test]
2364 fn utf32_trim() {
2365 let s = utf32str!(" Hello\tworld\t");
2366 assert_eq!(utf32str!("Hello\tworld\t"), s.trim_start());
2367
2368 let s = utf32str!(" English ");
2369 assert!(Some('E') == s.trim_start().chars().next());
2370
2371 let s = utf32str!(" עברית ");
2372 assert!(Some('ע') == s.trim_start().chars().next());
2373
2374 let s = utf32str!(" 💕Heart ");
2375 assert_eq!(utf32str!("💕Heart "), s.trim_start());
2376
2377 let s = utf32str!(" Heart💕 ");
2378 assert_eq!(utf32str!(" Heart💕"), s.trim_end());
2379 }
2380}