widestring/utfstr.rs
1//! UTF string slices.
2//!
3//! This module contains UTF string slices and related types.
4
5use crate::{
6 error::{Utf16Error, Utf32Error},
7 is_utf16_low_surrogate,
8 iter::{EncodeUtf16, EncodeUtf32, EncodeUtf8},
9 validate_utf16, validate_utf32, U16Str, U32Str,
10};
11#[cfg(feature = "alloc")]
12use crate::{Utf16String, Utf32String};
13#[cfg(feature = "alloc")]
14#[allow(unused_imports)]
15use alloc::{borrow::Cow, boxed::Box, string::String};
16#[allow(unused_imports)]
17use core::{
18 convert::{AsMut, AsRef, TryFrom},
19 fmt::Write,
20 ops::{Index, IndexMut, RangeBounds},
21 slice::SliceIndex,
22};
23
24mod iter;
25
26pub use iter::*;
27
28macro_rules! utfstr_common_impl {
29 {
30 $(#[$utfstr_meta:meta])*
31 struct $utfstr:ident([$uchar:ty]);
32 type UtfString = $utfstring:ident;
33 type UStr = $ustr:ident;
34 type UCStr = $ucstr:ident;
35 type UtfError = $utferror:ident;
36 $(#[$from_slice_unchecked_meta:meta])*
37 fn from_slice_unchecked() -> {}
38 $(#[$from_slice_unchecked_mut_meta:meta])*
39 fn from_slice_unchecked_mut() -> {}
40 $(#[$from_boxed_slice_unchecked_meta:meta])*
41 fn from_boxed_slice_unchecked() -> {}
42 $(#[$get_unchecked_meta:meta])*
43 fn get_unchecked() -> {}
44 $(#[$get_unchecked_mut_meta:meta])*
45 fn get_unchecked_mut() -> {}
46 $(#[$len_meta:meta])*
47 fn len() -> {}
48 } => {
49 $(#[$utfstr_meta])*
50 #[allow(clippy::derive_hash_xor_eq)]
51 #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
52 pub struct $utfstr {
53 pub(crate) inner: [$uchar],
54 }
55
56 impl $utfstr {
57 $(#[$from_slice_unchecked_meta])*
58 #[allow(trivial_casts)]
59 #[inline]
60 #[must_use]
61 pub const unsafe fn from_slice_unchecked(s: &[$uchar]) -> &Self {
62 &*(s as *const [$uchar] as *const Self)
63 }
64
65 $(#[$from_slice_unchecked_mut_meta])*
66 #[allow(trivial_casts)]
67 #[inline]
68 #[must_use]
69 pub unsafe fn from_slice_unchecked_mut(s: &mut [$uchar]) -> &mut Self {
70 &mut *(s as *mut [$uchar] as *mut Self)
71 }
72
73 $(#[$from_boxed_slice_unchecked_meta])*
74 #[inline]
75 #[cfg(feature = "alloc")]
76 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
77 #[must_use]
78 pub unsafe fn from_boxed_slice_unchecked(s: Box<[$uchar]>) -> Box<Self> {
79 Box::from_raw(Box::into_raw(s) as *mut Self)
80 }
81
82 $(#[$get_unchecked_meta])*
83 #[inline]
84 #[must_use]
85 pub unsafe fn get_unchecked<I>(&self, index: I) -> &Self
86 where
87 I: SliceIndex<[$uchar], Output = [$uchar]>,
88 {
89 Self::from_slice_unchecked(self.inner.get_unchecked(index))
90 }
91
92 $(#[$get_unchecked_mut_meta])*
93 #[inline]
94 #[must_use]
95 pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut Self
96 where
97 I: SliceIndex<[$uchar], Output = [$uchar]>,
98 {
99 Self::from_slice_unchecked_mut(self.inner.get_unchecked_mut(index))
100 }
101
102 $(#[$len_meta])*
103 #[inline]
104 #[must_use]
105 pub const fn len(&self) -> usize {
106 self.inner.len()
107 }
108
109 /// Returns `true` if the string has a length of zero.
110 #[inline]
111 #[must_use]
112 pub const fn is_empty(&self) -> bool {
113 self.inner.is_empty()
114 }
115
116 /// Converts a string to a slice of its underlying elements.
117 ///
118 /// To convert the slice back into a string slice, use the
119 /// [`from_slice`][Self::from_slice] function.
120 #[inline]
121 #[must_use]
122 pub const fn as_slice(&self) -> &[$uchar] {
123 &self.inner
124 }
125
126 /// Converts a mutable string to a mutable slice of its underlying elements.
127 ///
128 /// # Safety
129 ///
130 /// This function is unsafe because you can violate the invariants of this type when
131 /// mutating the slice. The caller must ensure that the contents of the slice is valid
132 /// UTF before the borrow ends and the underlying string is used.
133 ///
134 /// Use of this string type whose contents have been mutated to invalid UTF is
135 /// undefined behavior.
136 #[inline]
137 #[must_use]
138 pub unsafe fn as_mut_slice(&mut self) -> &mut [$uchar] {
139 &mut self.inner
140 }
141
142 /// Converts a string slice to a raw pointer.
143 ///
144 /// This pointer will be pointing to the first element of the string slice.
145 ///
146 /// The caller must ensure that the returned pointer is never written to. If you need to
147 /// mutate the contents of the string slice, use [`as_mut_ptr`][Self::as_mut_ptr].
148 #[inline]
149 #[must_use]
150 pub const fn as_ptr(&self) -> *const $uchar {
151 self.inner.as_ptr()
152 }
153
154 /// Converts a mutable string slice to a mutable pointer.
155 ///
156 /// This pointer will be pointing to the first element of the string slice.
157 #[inline]
158 #[must_use]
159 pub fn as_mut_ptr(&mut self) -> *mut $uchar {
160 self.inner.as_mut_ptr()
161 }
162
163 /// Returns this string as a wide string slice of undefined encoding.
164 #[inline]
165 #[must_use]
166 pub const fn as_ustr(&self) -> &$ustr {
167 $ustr::from_slice(self.as_slice())
168 }
169
170 /// Returns a string slice with leading and trailing whitespace removed.
171 ///
172 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
173 /// `White_Space`.
174 #[must_use]
175 pub fn trim(&self) -> &Self {
176 self.trim_start().trim_end()
177 }
178
179 /// Returns a string slice with leading whitespace removed.
180 ///
181 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
182 /// `White_Space`.
183 ///
184 /// # Text directionality
185 ///
186 /// A string is a sequence of elements. `start` in this context means the first position
187 /// of that sequence; for a left-to-right language like English or Russian, this will be
188 /// left side, and for right-to-left languages like Arabic or Hebrew, this will be the
189 /// right side.
190 #[must_use]
191 pub fn trim_start(&self) -> &Self {
192 if let Some((index, _)) = self.char_indices().find(|(_, c)| !c.is_whitespace()) {
193 &self[index..]
194 } else {
195 <&Self as Default>::default()
196 }
197 }
198
199 /// Returns a string slice with trailing whitespace removed.
200 ///
201 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core Property
202 /// `White_Space`.
203 ///
204 /// # Text directionality
205 ///
206 /// A string is a sequence of elements. `end` in this context means the last position of
207 /// that sequence; for a left-to-right language like English or Russian, this will be
208 /// right side, and for right-to-left languages like Arabic or Hebrew, this will be the
209 /// left side.
210 #[must_use]
211 pub fn trim_end(&self) -> &Self {
212 if let Some((index, _)) = self.char_indices().rfind(|(_, c)| !c.is_whitespace()) {
213 &self[..=index]
214 } else {
215 <&Self as Default>::default()
216 }
217 }
218
219 /// Converts a boxed string into a boxed slice without copying or allocating.
220 #[inline]
221 #[cfg(feature = "alloc")]
222 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
223 #[must_use]
224 pub fn into_boxed_slice(self: Box<Self>) -> Box<[$uchar]> {
225 // SAFETY: from_raw pointer is from into_raw
226 unsafe { Box::from_raw(Box::into_raw(self) as *mut [$uchar]) }
227 }
228
229 /// Converts a boxed string slice into an owned UTF string without copying or
230 /// allocating.
231 #[inline]
232 #[cfg(feature = "alloc")]
233 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
234 #[must_use]
235 pub fn into_utfstring(self: Box<Self>) -> $utfstring {
236 unsafe { $utfstring::from_vec_unchecked(self.into_boxed_slice().into_vec()) }
237 }
238
239 /// Creates a new owned string by repeating this string `n` times.
240 ///
241 /// # Panics
242 ///
243 /// This function will panic if the capacity would overflow.
244 #[inline]
245 #[cfg(feature = "alloc")]
246 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
247 #[must_use]
248 pub fn repeat(&self, n: usize) -> $utfstring {
249 unsafe { $utfstring::from_vec_unchecked(self.as_slice().repeat(n)) }
250 }
251 }
252
253 impl AsMut<$utfstr> for $utfstr {
254 #[inline]
255 fn as_mut(&mut self) -> &mut $utfstr {
256 self
257 }
258 }
259
260 impl AsRef<$utfstr> for $utfstr {
261 #[inline]
262 fn as_ref(&self) -> &$utfstr {
263 self
264 }
265 }
266
267 impl AsRef<[$uchar]> for $utfstr {
268 #[inline]
269 fn as_ref(&self) -> &[$uchar] {
270 self.as_slice()
271 }
272 }
273
274 impl AsRef<$ustr> for $utfstr {
275 #[inline]
276 fn as_ref(&self) -> &$ustr {
277 self.as_ustr()
278 }
279 }
280
281 impl core::fmt::Debug for $utfstr {
282 #[inline]
283 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
284 f.write_char('"')?;
285 self.escape_debug().try_for_each(|c| f.write_char(c))?;
286 f.write_char('"')
287 }
288 }
289
290 impl Default for &$utfstr {
291 #[inline]
292 fn default() -> Self {
293 // SAFETY: Empty slice is always valid
294 unsafe { $utfstr::from_slice_unchecked(&[]) }
295 }
296 }
297
298 impl Default for &mut $utfstr {
299 #[inline]
300 fn default() -> Self {
301 // SAFETY: Empty slice is valways valid
302 unsafe { $utfstr::from_slice_unchecked_mut(&mut []) }
303 }
304 }
305
306 impl core::fmt::Display for $utfstr {
307 #[inline]
308 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
309 self.chars().try_for_each(|c| f.write_char(c))
310 }
311 }
312
313 #[cfg(feature = "alloc")]
314 impl From<Box<$utfstr>> for Box<[$uchar]> {
315 #[inline]
316 fn from(value: Box<$utfstr>) -> Self {
317 value.into_boxed_slice()
318 }
319 }
320
321 impl<'a> From<&'a $utfstr> for &'a $ustr {
322 #[inline]
323 fn from(value: &'a $utfstr) -> Self {
324 value.as_ustr()
325 }
326 }
327
328 impl<'a> From<&'a $utfstr> for &'a [$uchar] {
329 #[inline]
330 fn from(value: &'a $utfstr) -> Self {
331 value.as_slice()
332 }
333 }
334
335 #[cfg(feature = "std")]
336 impl From<&$utfstr> for std::ffi::OsString {
337 #[inline]
338 fn from(value: &$utfstr) -> std::ffi::OsString {
339 value.as_ustr().to_os_string()
340 }
341 }
342
343 impl PartialEq<$utfstr> for &$utfstr {
344 #[inline]
345 fn eq(&self, other: &$utfstr) -> bool {
346 self.as_slice() == other.as_slice()
347 }
348 }
349
350 #[cfg(feature = "alloc")]
351 impl<'a, 'b> PartialEq<Cow<'a, $utfstr>> for &'b $utfstr {
352 #[inline]
353 fn eq(&self, other: &Cow<'a, $utfstr>) -> bool {
354 self == other.as_ref()
355 }
356 }
357
358 #[cfg(feature = "alloc")]
359 impl PartialEq<$utfstr> for Cow<'_, $utfstr> {
360 #[inline]
361 fn eq(&self, other: &$utfstr) -> bool {
362 self.as_ref() == other
363 }
364 }
365
366 #[cfg(feature = "alloc")]
367 impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, $utfstr> {
368 #[inline]
369 fn eq(&self, other: &&'a $utfstr) -> bool {
370 self.as_ref() == *other
371 }
372 }
373
374 impl PartialEq<$ustr> for $utfstr {
375 #[inline]
376 fn eq(&self, other: &$ustr) -> bool {
377 self.as_slice() == other.as_slice()
378 }
379 }
380
381 impl PartialEq<$utfstr> for $ustr {
382 #[inline]
383 fn eq(&self, other: &$utfstr) -> bool {
384 self.as_slice() == other.as_slice()
385 }
386 }
387
388 impl PartialEq<crate::$ucstr> for $utfstr {
389 #[inline]
390 fn eq(&self, other: &crate::$ucstr) -> bool {
391 self.as_slice() == other.as_slice()
392 }
393 }
394
395 impl PartialEq<$utfstr> for crate::$ucstr {
396 #[inline]
397 fn eq(&self, other: &$utfstr) -> bool {
398 self.as_slice() == other.as_slice()
399 }
400 }
401
402 impl PartialEq<str> for $utfstr {
403 #[inline]
404 fn eq(&self, other: &str) -> bool {
405 self.chars().eq(other.chars())
406 }
407 }
408
409 impl PartialEq<&str> for $utfstr {
410 #[inline]
411 fn eq(&self, other: &&str) -> bool {
412 self.chars().eq(other.chars())
413 }
414 }
415
416 impl PartialEq<str> for &$utfstr {
417 #[inline]
418 fn eq(&self, other: &str) -> bool {
419 self.chars().eq(other.chars())
420 }
421 }
422
423 impl PartialEq<$utfstr> for str {
424 #[inline]
425 fn eq(&self, other: &$utfstr) -> bool {
426 self.chars().eq(other.chars())
427 }
428 }
429
430 impl PartialEq<$utfstr> for &str {
431 #[inline]
432 fn eq(&self, other: &$utfstr) -> bool {
433 self.chars().eq(other.chars())
434 }
435 }
436
437 #[cfg(feature = "alloc")]
438 impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b $utfstr {
439 #[inline]
440 fn eq(&self, other: &Cow<'a, str>) -> bool {
441 self == other.as_ref()
442 }
443 }
444
445 #[cfg(feature = "alloc")]
446 impl PartialEq<$utfstr> for Cow<'_, str> {
447 #[inline]
448 fn eq(&self, other: &$utfstr) -> bool {
449 self.as_ref() == other
450 }
451 }
452
453 #[cfg(feature = "alloc")]
454 impl<'a, 'b> PartialEq<&'a $utfstr> for Cow<'b, str> {
455 #[inline]
456 fn eq(&self, other: &&'a $utfstr) -> bool {
457 self.as_ref() == *other
458 }
459 }
460
461 impl<'a> TryFrom<&'a $ustr> for &'a $utfstr {
462 type Error = $utferror;
463
464 #[inline]
465 fn try_from(value: &'a $ustr) -> Result<Self, Self::Error> {
466 $utfstr::from_ustr(value)
467 }
468 }
469
470 impl<'a> TryFrom<&'a crate::$ucstr> for &'a $utfstr {
471 type Error = $utferror;
472
473 #[inline]
474 fn try_from(value: &'a crate::$ucstr) -> Result<Self, Self::Error> {
475 $utfstr::from_ucstr(value)
476 }
477 }
478 };
479}
480
481utfstr_common_impl! {
482 /// UTF-16 string slice for [`Utf16String`][crate::Utf16String].
483 ///
484 /// [`Utf16Str`] is to [`Utf16String`][crate::Utf16String] as [`str`] is to [`String`].
485 ///
486 /// [`Utf16Str`] slices are string slices that are always valid UTF-16 encoding. This is unlike
487 /// the [`U16Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
488 /// [`Utf16Str`] string slices most resemble native [`str`] slices of all the types in this
489 /// crate.
490 ///
491 /// # Examples
492 ///
493 /// The easiest way to use [`Utf16Str`] is with the [`utf16str!`][crate::utf16str] macro to
494 /// convert string literals into string slices at compile time:
495 ///
496 /// ```
497 /// use widestring::utf16str;
498 /// let hello = utf16str!("Hello, world!");
499 /// ```
500 ///
501 /// You can also convert a [`u16`] slice directly, provided it is valid UTF-16:
502 ///
503 /// ```
504 /// use widestring::Utf16Str;
505 ///
506 /// let sparkle_heart = [0xd83d, 0xdc96];
507 /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
508 ///
509 /// assert_eq!("💖", sparkle_heart);
510 /// ```
511 struct Utf16Str([u16]);
512
513 type UtfString = Utf16String;
514 type UStr = U16Str;
515 type UCStr = U16CStr;
516 type UtfError = Utf16Error;
517
518 /// Converts a slice to a string slice without checking that the string contains valid UTF-16.
519 ///
520 /// See the safe version, [`from_slice`][Self::from_slice], for more information.
521 ///
522 /// # Safety
523 ///
524 /// This function is unsafe because it does not check that the slice passed to it is valid
525 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
526 /// [`Utf16Str`] is always valid UTF-16.
527 ///
528 /// # Examples
529 ///
530 /// ```
531 /// use widestring::Utf16Str;
532 ///
533 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
534 /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked(&sparkle_heart) };
535 ///
536 /// assert_eq!("💖", sparkle_heart);
537 /// ```
538 fn from_slice_unchecked() -> {}
539
540 /// Converts a mutable slice to a mutable string slice without checking that the string contains
541 /// valid UTF-16.
542 ///
543 /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
544 ///
545 /// # Safety
546 ///
547 /// This function is unsafe because it does not check that the slice passed to it is valid
548 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
549 /// [`Utf16Str`] is always valid UTF-16.
550 ///
551 /// # Examples
552 ///
553 /// ```
554 /// use widestring::Utf16Str;
555 ///
556 /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
557 /// let sparkle_heart = unsafe { Utf16Str::from_slice_unchecked_mut(&mut sparkle_heart) };
558 ///
559 /// assert_eq!("💖", sparkle_heart);
560 /// ```
561 fn from_slice_unchecked_mut() -> {}
562
563 /// Converts a boxed slice to a boxed string slice without checking that the string contains
564 /// valid UTF-16.
565 ///
566 /// # Safety
567 ///
568 /// This function is unsafe because it does not check if the string slice is valid UTF-16, and
569 /// [`Utf16Str`] must always be valid UTF-16.
570 fn from_boxed_slice_unchecked() -> {}
571
572 /// Returns an unchecked subslice of this string slice.
573 ///
574 /// This is the unchecked alternative to indexing the string slice.
575 ///
576 /// # Safety
577 ///
578 /// Callers of this function are responsible that these preconditions are satisfied:
579 ///
580 /// - The starting index must not exceed the ending index;
581 /// - Indexes must be within bounds of the original slice;
582 /// - Indexes must lie on UTF-16 sequence boundaries.
583 ///
584 /// Failing that, the returned string slice may reference invalid memory or violate the
585 /// invariants communicated by the type.
586 ///
587 /// # Examples
588 ///
589 /// ```
590 /// # use widestring::{utf16str};
591 /// let v = utf16str!("⚧️🏳️⚧️➡️s");
592 /// unsafe {
593 /// assert_eq!(utf16str!("⚧️"), v.get_unchecked(..2));
594 /// assert_eq!(utf16str!("🏳️⚧️"), v.get_unchecked(2..8));
595 /// assert_eq!(utf16str!("➡️"), v.get_unchecked(8..10));
596 /// assert_eq!(utf16str!("s"), v.get_unchecked(10..));
597 /// }
598 /// ```
599 fn get_unchecked() -> {}
600
601 /// Returns a mutable, unchecked subslice of this string slice
602 ///
603 /// This is the unchecked alternative to indexing the string slice.
604 ///
605 /// # Safety
606 ///
607 /// Callers of this function are responsible that these preconditions are satisfied:
608 ///
609 /// - The starting index must not exceed the ending index;
610 /// - Indexes must be within bounds of the original slice;
611 /// - Indexes must lie on UTF-16 sequence boundaries.
612 ///
613 /// Failing that, the returned string slice may reference invalid memory or violate the
614 /// invariants communicated by the type.
615 ///
616 /// # Examples
617 ///
618 /// ```
619 /// # use widestring::{utf16str};
620 /// # #[cfg(feature = "alloc")] {
621 /// let mut v = utf16str!("⚧️🏳️⚧️➡️s").to_owned();
622 /// unsafe {
623 /// assert_eq!(utf16str!("⚧️"), v.get_unchecked_mut(..2));
624 /// assert_eq!(utf16str!("🏳️⚧️"), v.get_unchecked_mut(2..8));
625 /// assert_eq!(utf16str!("➡️"), v.get_unchecked_mut(8..10));
626 /// assert_eq!(utf16str!("s"), v.get_unchecked_mut(10..));
627 /// }
628 /// # }
629 /// ```
630 fn get_unchecked_mut() -> {}
631
632 /// Returns the length of `self`.
633 ///
634 /// This length is in `u16` values, not [`char`]s or graphemes. In other words, it may not be
635 /// what human considers the length of the string.
636 ///
637 /// # Examples
638 ///
639 /// ```
640 /// # use widestring::utf16str;
641 /// assert_eq!(utf16str!("foo").len(), 3);
642 ///
643 /// let complex = utf16str!("⚧️🏳️⚧️➡️s");
644 /// assert_eq!(complex.len(), 11);
645 /// assert_eq!(complex.chars().count(), 10);
646 /// ```
647 fn len() -> {}
648}
649
650utfstr_common_impl! {
651 /// UTF-32 string slice for [`Utf32String`][crate::Utf32String].
652 ///
653 /// [`Utf32Str`] is to [`Utf32String`][crate::Utf32String] as [`str`] is to [`String`].
654 ///
655 /// [`Utf32Str`] slices are string slices that are always valid UTF-32 encoding. This is unlike
656 /// the [`U32Str`][crate::U16Str] string slices, which may not have valid encoding. In this way,
657 /// [`Utf32Str`] string slices most resemble native [`str`] slices of all the types in this
658 /// crate.
659 ///
660 /// # Examples
661 ///
662 /// The easiest way to use [`Utf32Str`] is with the [`utf32str!`][crate::utf32str] macro to
663 /// convert string literals into string slices at compile time:
664 ///
665 /// ```
666 /// use widestring::utf32str;
667 /// let hello = utf32str!("Hello, world!");
668 /// ```
669 ///
670 /// You can also convert a [`u32`] slice directly, provided it is valid UTF-32:
671 ///
672 /// ```
673 /// use widestring::Utf32Str;
674 ///
675 /// let sparkle_heart = [0x1f496];
676 /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
677 ///
678 /// assert_eq!("💖", sparkle_heart);
679 /// ```
680 ///
681 /// Since [`char`] slices are valid UTF-32, a slice of [`char`]s can be easily converted to a
682 /// string slice:
683 ///
684 /// ```
685 /// use widestring::Utf32Str;
686 ///
687 /// let sparkle_heart = ['💖'; 3];
688 /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
689 ///
690 /// assert_eq!("💖💖💖", sparkle_heart);
691 /// ```
692 struct Utf32Str([u32]);
693
694 type UtfString = Utf32String;
695 type UStr = U32Str;
696 type UCStr = U32CStr;
697 type UtfError = Utf32Error;
698
699 /// Converts a slice to a string slice without checking that the string contains valid UTF-32.
700 ///
701 /// See the safe version, [`from_slice`][Self::from_slice], for more information.
702 ///
703 /// # Safety
704 ///
705 /// This function is unsafe because it does not check that the slice passed to it is valid
706 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
707 /// [`Utf32Str`] is always valid UTF-32.
708 ///
709 /// # Examples
710 ///
711 /// ```
712 /// use widestring::Utf32Str;
713 ///
714 /// let sparkle_heart = vec![0x1f496];
715 /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked(&sparkle_heart) };
716 ///
717 /// assert_eq!("💖", sparkle_heart);
718 /// ```
719 fn from_slice_unchecked() -> {}
720
721 /// Converts a mutable slice to a mutable string slice without checking that the string contains
722 /// valid UTF-32.
723 ///
724 /// See the safe version, [`from_slice_mut`][Self::from_slice_mut], for more information.
725 ///
726 /// # Safety
727 ///
728 /// This function is unsafe because it does not check that the slice passed to it is valid
729 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
730 /// [`Utf32Str`] is always valid UTF-32.
731 ///
732 /// # Examples
733 ///
734 /// ```
735 /// use widestring::Utf32Str;
736 ///
737 /// let mut sparkle_heart = vec![0x1f496];
738 /// let sparkle_heart = unsafe { Utf32Str::from_slice_unchecked_mut(&mut sparkle_heart) };
739 ///
740 /// assert_eq!("💖", sparkle_heart);
741 /// ```
742 fn from_slice_unchecked_mut() -> {}
743
744 /// Converts a boxed slice to a boxed string slice without checking that the string contains
745 /// valid UTF-32.
746 ///
747 /// # Safety
748 ///
749 /// This function is unsafe because it does not check if the string slice is valid UTF-32, and
750 /// [`Utf32Str`] must always be valid UTF-32.
751 fn from_boxed_slice_unchecked() -> {}
752
753 /// Returns an unchecked subslice of this string slice.
754 ///
755 /// This is the unchecked alternative to indexing the string slice.
756 ///
757 /// # Safety
758 ///
759 /// Callers of this function are responsible that these preconditions are satisfied:
760 ///
761 /// - The starting index must not exceed the ending index;
762 /// - Indexes must be within bounds of the original slice;
763 ///
764 /// Failing that, the returned string slice may reference invalid memory or violate the
765 /// invariants communicated by the type.
766 ///
767 /// # Examples
768 ///
769 /// ```
770 /// # use widestring::utf32str;
771 /// let v = utf32str!("⚧️🏳️⚧️➡️s");
772 /// unsafe {
773 /// assert_eq!(utf32str!("⚧️"), v.get_unchecked(..2));
774 /// assert_eq!(utf32str!("🏳️⚧️"), v.get_unchecked(2..7));
775 /// assert_eq!(utf32str!("➡️"), v.get_unchecked(7..9));
776 /// assert_eq!(utf32str!("s"), v.get_unchecked(9..))
777 /// }
778 /// ```
779 fn get_unchecked() -> {}
780
781 /// Returns a mutable, unchecked subslice of this string slice
782 ///
783 /// This is the unchecked alternative to indexing the string slice.
784 ///
785 /// # Safety
786 ///
787 /// Callers of this function are responsible that these preconditions are satisfied:
788 ///
789 /// - The starting index must not exceed the ending index;
790 /// - Indexes must be within bounds of the original slice;
791 ///
792 /// Failing that, the returned string slice may reference invalid memory or violate the
793 /// invariants communicated by the type.
794 ///
795 /// # Examples
796 ///
797 /// ```
798 /// # use widestring::utf32str;
799 /// # #[cfg(feature = "alloc")] {
800 /// let mut v = utf32str!("⚧️🏳️⚧️➡️s").to_owned();
801 /// unsafe {
802 /// assert_eq!(utf32str!("⚧️"), v.get_unchecked_mut(..2));
803 /// assert_eq!(utf32str!("🏳️⚧️"), v.get_unchecked_mut(2..7));
804 /// assert_eq!(utf32str!("➡️"), v.get_unchecked_mut(7..9));
805 /// assert_eq!(utf32str!("s"), v.get_unchecked_mut(9..))
806 /// }
807 /// # }
808 /// ```
809 fn get_unchecked_mut() -> {}
810
811 /// Returns the length of `self`.
812 ///
813 /// This length is in the number of [`char`]s in the slice, not graphemes. In other words, it
814 /// may not be what human considers the length of the string.
815 ///
816 /// # Examples
817 ///
818 /// ```
819 /// # use widestring::utf32str;
820 /// assert_eq!(utf32str!("foo").len(), 3);
821 ///
822 /// let complex = utf32str!("⚧️🏳️⚧️➡️s");
823 /// assert_eq!(complex.len(), 10);
824 /// assert_eq!(complex.chars().count(), 10);
825 /// ```
826 fn len() -> {}
827}
828
829impl Utf16Str {
830 /// Converts a slice of UTF-16 data to a string slice.
831 ///
832 /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
833 /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
834 /// then does the conversion.
835 ///
836 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
837 /// the validity check, there is an unsafe version of this function,
838 /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
839 /// the check.
840 ///
841 /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
842 ///
843 /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
844 /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
845 /// after converting from UTF-8 to UTF-16.
846 ///
847 /// # Errors
848 ///
849 /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
850 /// is not UTF-16.
851 ///
852 /// # Examples
853 ///
854 /// ```
855 /// use widestring::Utf16Str;
856 ///
857 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
858 /// let sparkle_heart = Utf16Str::from_slice(&sparkle_heart).unwrap();
859 ///
860 /// assert_eq!("💖", sparkle_heart);
861 /// ```
862 ///
863 /// With incorrect values that return an error:
864 ///
865 /// ```
866 /// use widestring::Utf16Str;
867 ///
868 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
869 ///
870 /// assert!(Utf16Str::from_slice(&sparkle_heart).is_err());
871 /// ```
872 pub fn from_slice(s: &[u16]) -> Result<&Self, Utf16Error> {
873 validate_utf16(s)?;
874 // SAFETY: Just validated
875 Ok(unsafe { Self::from_slice_unchecked(s) })
876 }
877
878 /// Converts a mutable slice of UTF-16 data to a mutable string slice.
879 ///
880 /// Not all slices of [`u16`] values are valid to convert, since [`Utf16Str`] requires that it
881 /// is always valid UTF-16. This function checks to ensure that the values are valid UTF-16, and
882 /// then does the conversion.
883 ///
884 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
885 /// the validity check, there is an unsafe version of this function,
886 /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
887 /// but skips the check.
888 ///
889 /// If you need an owned string, consider using [`Utf16String::from_vec`] instead.
890 ///
891 /// Because you can stack-allocate a `[u16; N]`, this function is one way to have a
892 /// stack-allocated string. Indeed, the [`utf16str!`][crate::utf16str] macro does exactly this
893 /// after converting from UTF-8 to UTF-16.
894 ///
895 /// # Errors
896 ///
897 /// Returns an error if the slice is not UTF-16 with a description as to why the provided slice
898 /// is not UTF-16.
899 ///
900 /// # Examples
901 ///
902 /// ```
903 /// use widestring::Utf16Str;
904 ///
905 /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
906 /// let sparkle_heart = Utf16Str::from_slice_mut(&mut sparkle_heart).unwrap();
907 ///
908 /// assert_eq!("💖", sparkle_heart);
909 /// ```
910 ///
911 /// With incorrect values that return an error:
912 ///
913 /// ```
914 /// use widestring::Utf16Str;
915 ///
916 /// let mut sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
917 ///
918 /// assert!(Utf16Str::from_slice_mut(&mut sparkle_heart).is_err());
919 /// ```
920 pub fn from_slice_mut(s: &mut [u16]) -> Result<&mut Self, Utf16Error> {
921 validate_utf16(s)?;
922 // SAFETY: Just validated
923 Ok(unsafe { Self::from_slice_unchecked_mut(s) })
924 }
925
926 /// Converts a wide string slice of undefined encoding to a UTF-16 string slice without checking
927 /// if the string slice is valid UTF-16.
928 ///
929 /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
930 ///
931 /// # Safety
932 ///
933 /// This function is unsafe because it does not check that the string slice passed to it is
934 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
935 /// the [`Utf16Str`] is always valid UTF-16.
936 ///
937 /// # Examples
938 ///
939 /// ```
940 /// use widestring::{Utf16Str, u16str};
941 ///
942 /// let sparkle_heart = u16str!("💖");
943 /// let sparkle_heart = unsafe { Utf16Str::from_ustr_unchecked(sparkle_heart) };
944 ///
945 /// assert_eq!("💖", sparkle_heart);
946 /// ```
947 #[must_use]
948 pub const unsafe fn from_ustr_unchecked(s: &U16Str) -> &Self {
949 Self::from_slice_unchecked(s.as_slice())
950 }
951
952 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice
953 /// without checking if the string slice is valid UTF-16.
954 ///
955 /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
956 ///
957 /// # Safety
958 ///
959 /// This function is unsafe because it does not check that the string slice passed to it is
960 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
961 /// the [`Utf16Str`] is always valid UTF-16.
962 #[must_use]
963 pub unsafe fn from_ustr_unchecked_mut(s: &mut U16Str) -> &mut Self {
964 Self::from_slice_unchecked_mut(s.as_mut_slice())
965 }
966
967 /// Converts a wide string slice of undefined encoding to a UTF-16 string slice.
968 ///
969 /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
970 /// [`U16Str`] does not contain valid UTF-16 data.
971 ///
972 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
973 /// the validity check, there is an unsafe version of this function,
974 /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
975 /// but skips the check.
976 ///
977 /// # Errors
978 ///
979 /// Returns an error if the string slice is not UTF-16 with a description as to why the
980 /// provided string slice is not UTF-16.
981 ///
982 /// # Examples
983 ///
984 /// ```
985 /// use widestring::{Utf16Str, u16str};
986 ///
987 /// let sparkle_heart = u16str!("💖");
988 /// let sparkle_heart = Utf16Str::from_ustr(sparkle_heart).unwrap();
989 ///
990 /// assert_eq!("💖", sparkle_heart);
991 /// ```
992 #[inline]
993 pub fn from_ustr(s: &U16Str) -> Result<&Self, Utf16Error> {
994 Self::from_slice(s.as_slice())
995 }
996
997 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-16 string slice.
998 ///
999 /// Since [`U16Str`] does not have a specified encoding, this conversion may fail if the
1000 /// [`U16Str`] does not contain valid UTF-16 data.
1001 ///
1002 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1003 /// the validity check, there is an unsafe version of this function,
1004 /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1005 /// but skips the check.
1006 ///
1007 /// # Errors
1008 ///
1009 /// Returns an error if the string slice is not UTF-16 with a description as to why the
1010 /// provided string slice is not UTF-16.
1011 #[inline]
1012 pub fn from_ustr_mut(s: &mut U16Str) -> Result<&mut Self, Utf16Error> {
1013 Self::from_slice_mut(s.as_mut_slice())
1014 }
1015
1016 /// Converts a wide C string slice to a UTF-16 string slice without checking if the
1017 /// string slice is valid UTF-16.
1018 ///
1019 /// The resulting string slice does *not* contain the nul terminator.
1020 ///
1021 /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1022 ///
1023 /// # Safety
1024 ///
1025 /// This function is unsafe because it does not check that the string slice passed to it is
1026 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1027 /// the [`Utf16Str`] is always valid UTF-16.
1028 ///
1029 /// # Examples
1030 ///
1031 /// ```
1032 /// use widestring::{Utf16Str, u16cstr};
1033 ///
1034 /// let sparkle_heart = u16cstr!("💖");
1035 /// let sparkle_heart = unsafe { Utf16Str::from_ucstr_unchecked(sparkle_heart) };
1036 ///
1037 /// assert_eq!("💖", sparkle_heart);
1038 /// ```
1039 #[inline]
1040 #[must_use]
1041 pub unsafe fn from_ucstr_unchecked(s: &crate::U16CStr) -> &Self {
1042 Self::from_slice_unchecked(s.as_slice())
1043 }
1044
1045 /// Converts a mutable wide C string slice to a mutable UTF-16 string slice without
1046 /// checking if the string slice is valid UTF-16.
1047 ///
1048 /// The resulting string slice does *not* contain the nul terminator.
1049 ///
1050 /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1051 ///
1052 /// # Safety
1053 ///
1054 /// This function is unsafe because it does not check that the string slice passed to it is
1055 /// valid UTF-16. If this constraint is violated, undefined behavior results as it is assumed
1056 /// the [`Utf16Str`] is always valid UTF-16.
1057 #[inline]
1058 #[must_use]
1059 pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U16CStr) -> &mut Self {
1060 Self::from_slice_unchecked_mut(s.as_mut_slice())
1061 }
1062
1063 /// Converts a wide C string slice to a UTF-16 string slice.
1064 ///
1065 /// The resulting string slice does *not* contain the nul terminator.
1066 ///
1067 /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1068 /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1069 ///
1070 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1071 /// the validity check, there is an unsafe version of this function,
1072 /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1073 /// but skips the check.
1074 ///
1075 /// # Errors
1076 ///
1077 /// Returns an error if the string slice is not UTF-16 with a description as to why the
1078 /// provided string slice is not UTF-16.
1079 ///
1080 /// # Examples
1081 ///
1082 /// ```
1083 /// use widestring::{Utf16Str, u16cstr};
1084 ///
1085 /// let sparkle_heart = u16cstr!("💖");
1086 /// let sparkle_heart = Utf16Str::from_ucstr(sparkle_heart).unwrap();
1087 ///
1088 /// assert_eq!("💖", sparkle_heart);
1089 /// ```
1090 #[inline]
1091 pub fn from_ucstr(s: &crate::U16CStr) -> Result<&Self, Utf16Error> {
1092 Self::from_slice(s.as_slice())
1093 }
1094
1095 /// Converts a mutable wide C string slice to a mutable UTF-16 string slice.
1096 ///
1097 /// The resulting string slice does *not* contain the nul terminator.
1098 ///
1099 /// Since [`U16CStr`][crate::U16CStr] does not have a specified encoding, this conversion may
1100 /// fail if the [`U16CStr`][crate::U16CStr] does not contain valid UTF-16 data.
1101 ///
1102 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1103 /// the validity check, there is an unsafe version of this function,
1104 /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1105 /// but skips the check.
1106 ///
1107 /// # Safety
1108 ///
1109 /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1110 /// when mutating the slice (i.e. by adding interior nul values).
1111 ///
1112 /// # Errors
1113 ///
1114 /// Returns an error if the string slice is not UTF-16 with a description as to why the
1115 /// provided string slice is not UTF-16.
1116 #[inline]
1117 pub unsafe fn from_ucstr_mut(s: &mut crate::U16CStr) -> Result<&mut Self, Utf16Error> {
1118 Self::from_slice_mut(s.as_mut_slice())
1119 }
1120
1121 /// Converts to a standard UTF-8 [`String`].
1122 ///
1123 /// Because this string is always valid UTF-16, the conversion is lossless and non-fallible.
1124 #[inline]
1125 #[allow(clippy::inherent_to_string_shadow_display)]
1126 #[cfg(feature = "alloc")]
1127 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1128 #[must_use]
1129 pub fn to_string(&self) -> String {
1130 String::from_utf16(self.as_slice()).unwrap()
1131 }
1132
1133 /// Checks that `index`-th value is the value in a UTF-16 code point sequence or the end of the
1134 /// string.
1135 ///
1136 /// Returns `true` if the value at `index` is not a UTF-16 surrogate value, or if the value at
1137 /// `index` is the first value of a surrogate pair (the "high" surrogate). Returns `false` if
1138 /// the value at `index` is the second value of a surrogate pair (a.k.a the "low" surrogate).
1139 ///
1140 /// The start and end of the string (when `index == self.len()`) are considered to be
1141 /// boundaries.
1142 ///
1143 /// Returns `false` if `index is greater than `self.len()`.
1144 ///
1145 /// # Examples
1146 ///
1147 /// ```
1148 /// # use widestring::utf16str;
1149 /// let s = utf16str!("Sparkle 💖 Heart");
1150 /// assert!(s.is_char_boundary(0));
1151 ///
1152 /// // high surrogate of `💖`
1153 /// assert!(s.is_char_boundary(8));
1154 /// // low surrogate of `💖`
1155 /// assert!(!s.is_char_boundary(9));
1156 ///
1157 /// assert!(s.is_char_boundary(s.len()));
1158 /// ```
1159 #[inline]
1160 #[must_use]
1161 pub const fn is_char_boundary(&self, index: usize) -> bool {
1162 if index > self.len() {
1163 false
1164 } else if index == self.len() {
1165 true
1166 } else {
1167 !is_utf16_low_surrogate(self.inner[index])
1168 }
1169 }
1170
1171 /// Returns a subslice of this string.
1172 ///
1173 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1174 /// equivalent indexing operation would panic.
1175 ///
1176 /// # Examples
1177 ///
1178 /// ```
1179 /// # use widestring::{utf16str};
1180 /// let v = utf16str!("⚧️🏳️⚧️➡️s");
1181 ///
1182 /// assert_eq!(Some(utf16str!("⚧️")), v.get(..2));
1183 /// assert_eq!(Some(utf16str!("🏳️⚧️")), v.get(2..8));
1184 /// assert_eq!(Some(utf16str!("➡️")), v.get(8..10));
1185 /// assert_eq!(Some(utf16str!("s")), v.get(10..));
1186 ///
1187 /// assert!(v.get(3..4).is_none());
1188 /// ```
1189 #[inline]
1190 #[must_use]
1191 pub fn get<I>(&self, index: I) -> Option<&Self>
1192 where
1193 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1194 {
1195 // TODO: Use SliceIndex directly when it is stabilized
1196 let range = crate::range_check(index, ..self.len())?;
1197 if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1198 return None;
1199 }
1200
1201 // SAFETY: range_check verified bounds, and we just verified char boundaries
1202 Some(unsafe { self.get_unchecked(range) })
1203 }
1204
1205 /// Returns a mutable subslice of this string.
1206 ///
1207 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1208 /// equivalent indexing operation would panic.
1209 ///
1210 /// # Examples
1211 ///
1212 /// ```
1213 /// # use widestring::{utf16str};
1214 /// # #[cfg(feature = "alloc")] {
1215 /// let mut v = utf16str!("⚧️🏳️⚧️➡️s").to_owned();
1216 ///
1217 /// assert_eq!(utf16str!("⚧️"), v.get_mut(..2).unwrap());
1218 /// assert_eq!(utf16str!("🏳️⚧️"), v.get_mut(2..8).unwrap());
1219 /// assert_eq!(utf16str!("➡️"), v.get_mut(8..10).unwrap());
1220 /// assert_eq!(utf16str!("s"), v.get_mut(10..).unwrap());
1221 ///
1222 /// assert!(v.get_mut(3..4).is_none());
1223 /// # }
1224 /// ```
1225 #[inline]
1226 #[must_use]
1227 pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1228 where
1229 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
1230 {
1231 // TODO: Use SliceIndex directly when it is stabilized
1232 let range = crate::range_check(index, ..self.len())?;
1233 if !self.is_char_boundary(range.start) || !self.is_char_boundary(range.end) {
1234 return None;
1235 }
1236
1237 // SAFETY: range_check verified bounds, and we just verified char boundaries
1238 Some(unsafe { self.get_unchecked_mut(range) })
1239 }
1240
1241 /// Divide one string slice into two at an index.
1242 ///
1243 /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1244 /// the boundary of a UTF-16 code point.
1245 ///
1246 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1247 /// the end of the string slice.
1248 ///
1249 /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1250 ///
1251 /// # Panics
1252 ///
1253 /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1254 /// code point of the string slice.
1255 ///
1256 /// # Examples
1257 ///
1258 /// ```
1259 /// # use widestring::utf16str;
1260 /// let s = utf16str!("Per Martin-Löf");
1261 ///
1262 /// let (first, last) = s.split_at(3);
1263 ///
1264 /// assert_eq!("Per", first);
1265 /// assert_eq!(" Martin-Löf", last);
1266 /// ```
1267 #[inline]
1268 #[must_use]
1269 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1270 assert!(self.is_char_boundary(mid));
1271 let (a, b) = self.inner.split_at(mid);
1272 unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1273 }
1274
1275 /// Divide one mutable string slice into two at an index.
1276 ///
1277 /// The argument, `mid`, should be an offset from the start of the string. It must also be on
1278 /// the boundary of a UTF-16 code point.
1279 ///
1280 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1281 /// the end of the string slice.
1282 ///
1283 /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1284 ///
1285 /// # Panics
1286 ///
1287 /// Panics if `mid` is not on a UTF-16 code point boundary, or if it is past the end of the last
1288 /// code point of the string slice.
1289 ///
1290 /// # Examples
1291 ///
1292 /// ```
1293 /// # use widestring::utf16str;
1294 /// # #[cfg(feature = "alloc")] {
1295 /// let mut s = utf16str!("Per Martin-Löf").to_owned();
1296 ///
1297 /// let (first, last) = s.split_at_mut(3);
1298 ///
1299 /// assert_eq!("Per", first);
1300 /// assert_eq!(" Martin-Löf", last);
1301 /// # }
1302 /// ```
1303 #[inline]
1304 #[must_use]
1305 pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1306 assert!(self.is_char_boundary(mid));
1307 let (a, b) = self.inner.split_at_mut(mid);
1308 unsafe {
1309 (
1310 Self::from_slice_unchecked_mut(a),
1311 Self::from_slice_unchecked_mut(b),
1312 )
1313 }
1314 }
1315
1316 /// Returns an iterator over the [`char`]s of a string slice.
1317 ///
1318 /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1319 /// [`char`]. This method returns such an iterator.
1320 ///
1321 /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
1322 /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
1323 /// actually want. This functionality is not provided by this crate.
1324 #[inline]
1325 #[must_use]
1326 pub fn chars(&self) -> CharsUtf16<'_> {
1327 CharsUtf16::new(self.as_slice())
1328 }
1329
1330 /// Returns an iterator over the [`char`]s of a string slice and their positions.
1331 ///
1332 /// As this string slice consists of valid UTF-16, we can iterate through a string slice by
1333 /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
1334 ///
1335 /// The iterator yields tuples. The position is first, the [`char`] is second.
1336 #[inline]
1337 #[must_use]
1338 pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
1339 CharIndicesUtf16::new(self.as_slice())
1340 }
1341
1342 /// An iterator over the [`u16`] code units of a string slice.
1343 ///
1344 /// As a UTF-16 string slice consists of a sequence of [`u16`] code units, we can iterate
1345 /// through a string slice by each code unit. This method returns such an iterator.
1346 #[must_use]
1347 pub fn code_units(&self) -> CodeUnits<'_> {
1348 CodeUnits::new(self.as_slice())
1349 }
1350
1351 /// Returns an iterator of bytes over the string encoded as UTF-8.
1352 #[must_use]
1353 pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf16<'_>> {
1354 crate::encode_utf8(self.chars())
1355 }
1356
1357 /// Returns an iterator of [`u32`] over the sting encoded as UTF-32.
1358 #[must_use]
1359 pub fn encode_utf32(&self) -> EncodeUtf32<CharsUtf16<'_>> {
1360 crate::encode_utf32(self.chars())
1361 }
1362
1363 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
1364 #[inline]
1365 #[must_use]
1366 pub fn escape_debug(&self) -> EscapeDebug<CharsUtf16<'_>> {
1367 EscapeDebug::<CharsUtf16>::new(self.as_slice())
1368 }
1369
1370 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
1371 #[inline]
1372 #[must_use]
1373 pub fn escape_default(&self) -> EscapeDefault<CharsUtf16<'_>> {
1374 EscapeDefault::<CharsUtf16>::new(self.as_slice())
1375 }
1376
1377 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
1378 #[inline]
1379 #[must_use]
1380 pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf16<'_>> {
1381 EscapeUnicode::<CharsUtf16>::new(self.as_slice())
1382 }
1383
1384 /// Returns the lowercase equivalent of this string slice, as a new [`Utf16String`].
1385 ///
1386 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1387 /// `Lowercase`.
1388 ///
1389 /// Since some characters can expand into multiple characters when changing the case, this
1390 /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1391 #[inline]
1392 #[cfg(feature = "alloc")]
1393 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1394 #[must_use]
1395 pub fn to_lowercase(&self) -> Utf16String {
1396 let mut s = Utf16String::with_capacity(self.len());
1397 for c in self.chars() {
1398 for lower in c.to_lowercase() {
1399 s.push(lower);
1400 }
1401 }
1402 s
1403 }
1404
1405 /// Returns the uppercase equivalent of this string slice, as a new [`Utf16String`].
1406 ///
1407 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1408 /// `Uppercase`.
1409 ///
1410 /// Since some characters can expand into multiple characters when changing the case, this
1411 /// function returns a [`Utf16String`] instead of modifying the parameter in-place.
1412 #[inline]
1413 #[cfg(feature = "alloc")]
1414 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1415 #[must_use]
1416 pub fn to_uppercase(&self) -> Utf16String {
1417 let mut s = Utf16String::with_capacity(self.len());
1418 for c in self.chars() {
1419 for lower in c.to_uppercase() {
1420 s.push(lower);
1421 }
1422 }
1423 s
1424 }
1425
1426 /// Returns an iterator over the lines of a [`Utf16Str`], as string slices.
1427 ///
1428 /// Lines are split at line endings that are either newlines (`\n`) or
1429 /// sequences of a carriage return followed by a line feed (`\r\n`).
1430 ///
1431 /// Line terminators are not included in the lines returned by the iterator.
1432 ///
1433 /// Note that any carriage return (`\r`) not immediately followed by a
1434 /// line feed (`\n`) does not split a line. These carriage returns are
1435 /// thereby included in the produced lines.
1436 ///
1437 /// The final line ending is optional. A string that ends with a final line
1438 /// ending will return the same lines as an otherwise identical string
1439 /// without a final line ending.
1440 ///
1441 /// # Examples
1442 ///
1443 /// Basic usage:
1444 ///
1445 /// ```
1446 /// use widestring::utf16str;
1447 ///
1448 /// let text = utf16str!("foo\r\nbar\n\nbaz\r");
1449 /// let mut lines = text.lines();
1450 ///
1451 /// assert_eq!(Some(utf16str!("foo")), lines.next());
1452 /// assert_eq!(Some(utf16str!("bar")), lines.next());
1453 /// assert_eq!(Some(utf16str!("")), lines.next());
1454 /// // Trailing carriage return is included in the last line
1455 /// assert_eq!(Some(utf16str!("baz\r")), lines.next());
1456 ///
1457 /// assert_eq!(None, lines.next());
1458 /// ```
1459 ///
1460 /// The final line does not require any ending:
1461 ///
1462 /// ```
1463 /// use widestring::utf16str;
1464 ///
1465 /// let text = utf16str!("foo\nbar\n\r\nbaz");
1466 /// let mut lines = text.lines();
1467 ///
1468 /// assert_eq!(Some(utf16str!("foo")), lines.next());
1469 /// assert_eq!(Some(utf16str!("bar")), lines.next());
1470 /// assert_eq!(Some(utf16str!("")), lines.next());
1471 /// assert_eq!(Some(utf16str!("baz")), lines.next());
1472 ///
1473 /// assert_eq!(None, lines.next());
1474 /// ```
1475 pub fn lines(&self) -> Lines<'_, Utf16Str, CharIndicesUtf16<'_>> {
1476 Lines::new(self, self.len(), self.char_indices())
1477 }
1478}
1479
1480impl Utf32Str {
1481 /// Converts a slice of UTF-32 data to a string slice.
1482 ///
1483 /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1484 /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1485 /// then does the conversion.
1486 ///
1487 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1488 /// the validity check, there is an unsafe version of this function,
1489 /// [`from_slice_unchecked`][Self::from_slice_unchecked], which has the same behavior but skips
1490 /// the check.
1491 ///
1492 /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1493 ///
1494 /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1495 /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1496 /// after converting from UTF-8 to UTF-32.
1497 ///
1498 /// # Errors
1499 ///
1500 /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1501 /// is not UTF-32.
1502 ///
1503 /// # Examples
1504 ///
1505 /// ```
1506 /// use widestring::Utf32Str;
1507 ///
1508 /// let sparkle_heart = vec![0x1f496];
1509 /// let sparkle_heart = Utf32Str::from_slice(&sparkle_heart).unwrap();
1510 ///
1511 /// assert_eq!("💖", sparkle_heart);
1512 /// ```
1513 ///
1514 /// With incorrect values that return an error:
1515 ///
1516 /// ```
1517 /// use widestring::Utf32Str;
1518 ///
1519 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1520 ///
1521 /// assert!(Utf32Str::from_slice(&sparkle_heart).is_err());
1522 /// ```
1523 pub fn from_slice(s: &[u32]) -> Result<&Self, Utf32Error> {
1524 validate_utf32(s)?;
1525 // SAFETY: Just validated
1526 Ok(unsafe { Self::from_slice_unchecked(s) })
1527 }
1528
1529 /// Converts a mutable slice of UTF-32 data to a mutable string slice.
1530 ///
1531 /// Not all slices of [`u32`] values are valid to convert, since [`Utf32Str`] requires that it
1532 /// is always valid UTF-32. This function checks to ensure that the values are valid UTF-32, and
1533 /// then does the conversion.
1534 ///
1535 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1536 /// the validity check, there is an unsafe version of this function,
1537 /// [`from_slice_unchecked_mut`][Self::from_slice_unchecked_mut], which has the same behavior
1538 /// but skips the check.
1539 ///
1540 /// If you need an owned string, consider using [`Utf32String::from_vec`] instead.
1541 ///
1542 /// Because you can stack-allocate a `[u32; N]`, this function is one way to have a
1543 /// stack-allocated string. Indeed, the [`utf32str!`][crate::utf32str] macro does exactly this
1544 /// after converting from UTF-8 to UTF-32.
1545 ///
1546 /// # Errors
1547 ///
1548 /// Returns an error if the slice is not UTF-32 with a description as to why the provided slice
1549 /// is not UTF-32.
1550 ///
1551 /// # Examples
1552 ///
1553 /// ```
1554 /// use widestring::Utf32Str;
1555 ///
1556 /// let mut sparkle_heart = vec![0x1f496];
1557 /// let sparkle_heart = Utf32Str::from_slice_mut(&mut sparkle_heart).unwrap();
1558 ///
1559 /// assert_eq!("💖", sparkle_heart);
1560 /// ```
1561 ///
1562 /// With incorrect values that return an error:
1563 ///
1564 /// ```
1565 /// use widestring::Utf32Str;
1566 ///
1567 /// let mut sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1568 ///
1569 /// assert!(Utf32Str::from_slice_mut(&mut sparkle_heart).is_err());
1570 /// ```
1571 pub fn from_slice_mut(s: &mut [u32]) -> Result<&mut Self, Utf32Error> {
1572 validate_utf32(s)?;
1573 // SAFETY: Just validated
1574 Ok(unsafe { Self::from_slice_unchecked_mut(s) })
1575 }
1576
1577 /// Converts a wide string slice of undefined encoding to a UTF-32 string slice without checking
1578 /// if the string slice is valid UTF-32.
1579 ///
1580 /// See the safe version, [`from_ustr`][Self::from_ustr], for more information.
1581 ///
1582 /// # Safety
1583 ///
1584 /// This function is unsafe because it does not check that the string slice passed to it is
1585 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1586 /// the [`Utf32Str`] is always valid UTF-32.
1587 ///
1588 /// # Examples
1589 ///
1590 /// ```
1591 /// use widestring::{Utf32Str, u32str};
1592 ///
1593 /// let sparkle_heart = u32str!("💖");
1594 /// let sparkle_heart = unsafe { Utf32Str::from_ustr_unchecked(sparkle_heart) };
1595 ///
1596 /// assert_eq!("💖", sparkle_heart);
1597 /// ```
1598 #[inline]
1599 #[must_use]
1600 pub const unsafe fn from_ustr_unchecked(s: &crate::U32Str) -> &Self {
1601 Self::from_slice_unchecked(s.as_slice())
1602 }
1603
1604 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice
1605 /// without checking if the string slice is valid UTF-32.
1606 ///
1607 /// See the safe version, [`from_ustr_mut`][Self::from_ustr_mut], for more information.
1608 ///
1609 /// # Safety
1610 ///
1611 /// This function is unsafe because it does not check that the string slice passed to it is
1612 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1613 /// the [`Utf32Str`] is always valid UTF-32.
1614 #[inline]
1615 #[must_use]
1616 pub unsafe fn from_ustr_unchecked_mut(s: &mut crate::U32Str) -> &mut Self {
1617 Self::from_slice_unchecked_mut(s.as_mut_slice())
1618 }
1619
1620 /// Converts a wide string slice of undefined encoding to a UTF-32 string slice.
1621 ///
1622 /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1623 /// [`U32Str`] does not contain valid UTF-32 data.
1624 ///
1625 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1626 /// the validity check, there is an unsafe version of this function,
1627 /// [`from_ustr_unchecked`][Self::from_ustr_unchecked], which has the same behavior
1628 /// but skips the check.
1629 ///
1630 /// # Errors
1631 ///
1632 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1633 /// provided string slice is not UTF-32.
1634 ///
1635 /// # Examples
1636 ///
1637 /// ```
1638 /// use widestring::{Utf32Str, u32str};
1639 ///
1640 /// let sparkle_heart = u32str!("💖");
1641 /// let sparkle_heart = Utf32Str::from_ustr(sparkle_heart).unwrap();
1642 ///
1643 /// assert_eq!("💖", sparkle_heart);
1644 /// ```
1645 #[inline]
1646 pub fn from_ustr(s: &crate::U32Str) -> Result<&Self, Utf32Error> {
1647 Self::from_slice(s.as_slice())
1648 }
1649
1650 /// Converts a mutable wide string slice of undefined encoding to a mutable UTF-32 string slice.
1651 ///
1652 /// Since [`U32Str`] does not have a specified encoding, this conversion may fail if the
1653 /// [`U32Str`] does not contain valid UTF-32 data.
1654 ///
1655 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1656 /// the validity check, there is an unsafe version of this function,
1657 /// [`from_ustr_unchecked_mut`][Self::from_ustr_unchecked_mut], which has the same behavior
1658 /// but skips the check.
1659 ///
1660 /// # Errors
1661 ///
1662 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1663 /// provided string slice is not UTF-32.
1664 #[inline]
1665 pub fn from_ustr_mut(s: &mut crate::U32Str) -> Result<&mut Self, Utf32Error> {
1666 Self::from_slice_mut(s.as_mut_slice())
1667 }
1668
1669 /// Converts a wide C string slice to a UTF-32 string slice without checking if the
1670 /// string slice is valid UTF-32.
1671 ///
1672 /// The resulting string slice does *not* contain the nul terminator.
1673 ///
1674 /// See the safe version, [`from_ucstr`][Self::from_ucstr], for more information.
1675 ///
1676 /// # Safety
1677 ///
1678 /// This function is unsafe because it does not check that the string slice passed to it is
1679 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1680 /// the [`Utf32Str`] is always valid UTF-32.
1681 ///
1682 /// # Examples
1683 ///
1684 /// ```
1685 /// use widestring::{Utf32Str, u32cstr};
1686 ///
1687 /// let sparkle_heart = u32cstr!("💖");
1688 /// let sparkle_heart = unsafe { Utf32Str::from_ucstr_unchecked(sparkle_heart) };
1689 ///
1690 /// assert_eq!("💖", sparkle_heart);
1691 /// ```
1692 #[inline]
1693 #[must_use]
1694 pub unsafe fn from_ucstr_unchecked(s: &crate::U32CStr) -> &Self {
1695 Self::from_slice_unchecked(s.as_slice())
1696 }
1697
1698 /// Converts a mutable wide C string slice to a mutable UTF-32 string slice without
1699 /// checking if the string slice is valid UTF-32.
1700 ///
1701 /// The resulting string slice does *not* contain the nul terminator.
1702 ///
1703 /// See the safe version, [`from_ucstr_mut`][Self::from_ucstr_mut], for more information.
1704 ///
1705 /// # Safety
1706 ///
1707 /// This function is unsafe because it does not check that the string slice passed to it is
1708 /// valid UTF-32. If this constraint is violated, undefined behavior results as it is assumed
1709 /// the [`Utf32Str`] is always valid UTF-32.
1710 #[inline]
1711 #[must_use]
1712 pub unsafe fn from_ucstr_unchecked_mut(s: &mut crate::U32CStr) -> &mut Self {
1713 Self::from_slice_unchecked_mut(s.as_mut_slice())
1714 }
1715
1716 /// Converts a wide C string slice to a UTF-32 string slice.
1717 ///
1718 /// The resulting string slice does *not* contain the nul terminator.
1719 ///
1720 /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1721 /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1722 ///
1723 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1724 /// the validity check, there is an unsafe version of this function,
1725 /// [`from_ucstr_unchecked`][Self::from_ucstr_unchecked], which has the same behavior
1726 /// but skips the check.
1727 ///
1728 /// # Errors
1729 ///
1730 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1731 /// provided string slice is not UTF-32.
1732 ///
1733 /// # Examples
1734 ///
1735 /// ```
1736 /// use widestring::{Utf32Str, u32cstr};
1737 ///
1738 /// let sparkle_heart = u32cstr!("💖");
1739 /// let sparkle_heart = Utf32Str::from_ucstr(sparkle_heart).unwrap();
1740 ///
1741 /// assert_eq!("💖", sparkle_heart);
1742 /// ```
1743 #[inline]
1744 pub fn from_ucstr(s: &crate::U32CStr) -> Result<&Self, Utf32Error> {
1745 Self::from_slice(s.as_slice())
1746 }
1747
1748 /// Converts a mutable wide C string slice to a mutable UTF-32 string slice.
1749 ///
1750 /// The resulting string slice does *not* contain the nul terminator.
1751 ///
1752 /// Since [`U32CStr`][crate::U32CStr] does not have a specified encoding, this conversion may
1753 /// fail if the [`U32CStr`][crate::U32CStr] does not contain valid UTF-32 data.
1754 ///
1755 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1756 /// the validity check, there is an unsafe version of this function,
1757 /// [`from_ucstr_unchecked_mut`][Self::from_ucstr_unchecked_mut], which has the same behavior
1758 /// but skips the check.
1759 ///
1760 /// # Safety
1761 ///
1762 /// This method is unsafe because you can violate the invariants of [`U16CStr`][crate::U16CStr]
1763 /// when mutating the slice (i.e. by adding interior nul values).
1764 ///
1765 /// # Errors
1766 ///
1767 /// Returns an error if the string slice is not UTF-32 with a description as to why the
1768 /// provided string slice is not UTF-32.
1769 #[inline]
1770 pub unsafe fn from_ucstr_mut(s: &mut crate::U32CStr) -> Result<&mut Self, Utf32Error> {
1771 Self::from_slice_mut(s.as_mut_slice())
1772 }
1773
1774 /// Converts a slice of [`char`]s to a string slice.
1775 ///
1776 /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1777 ///
1778 /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1779 ///
1780 /// # Examples
1781 ///
1782 /// ```
1783 /// use widestring::Utf32Str;
1784 ///
1785 /// let sparkle_heart = ['💖'];
1786 /// let sparkle_heart = Utf32Str::from_char_slice(&sparkle_heart);
1787 ///
1788 /// assert_eq!("💖", sparkle_heart);
1789 /// ```
1790 #[allow(trivial_casts)]
1791 #[inline]
1792 #[must_use]
1793 pub const fn from_char_slice(s: &[char]) -> &Self {
1794 // SAFETY: char slice is always valid UTF-32
1795 unsafe { Self::from_slice_unchecked(&*(s as *const [char] as *const [u32])) }
1796 }
1797
1798 /// Converts a mutable slice of [`char`]s to a string slice.
1799 ///
1800 /// Since [`char`] slices are always valid UTF-32, this conversion always suceeds.
1801 ///
1802 /// If you need an owned string, consider using [`Utf32String::from_chars`] instead.
1803 ///
1804 /// # Examples
1805 ///
1806 /// ```
1807 /// use widestring::Utf32Str;
1808 ///
1809 /// let mut sparkle_heart = ['💖'];
1810 /// let sparkle_heart = Utf32Str::from_char_slice_mut(&mut sparkle_heart);
1811 ///
1812 /// assert_eq!("💖", sparkle_heart);
1813 /// ```
1814 #[allow(trivial_casts)]
1815 #[inline]
1816 #[must_use]
1817 pub fn from_char_slice_mut(s: &mut [char]) -> &mut Self {
1818 // SAFETY: char slice is always valid UTF-32
1819 unsafe { Self::from_slice_unchecked_mut(&mut *(s as *mut [char] as *mut [u32])) }
1820 }
1821
1822 /// Converts a string slice into a slice of [`char`]s.
1823 #[allow(trivial_casts)]
1824 #[inline]
1825 #[must_use]
1826 pub const fn as_char_slice(&self) -> &[char] {
1827 // SAFETY: Self should be valid UTF-32 so chars will be in range
1828 unsafe { &*(self.as_slice() as *const [u32] as *const [char]) }
1829 }
1830
1831 /// Converts a mutable string slice into a mutable slice of [`char`]s.
1832 #[allow(trivial_casts)]
1833 #[inline]
1834 #[must_use]
1835 pub fn as_char_slice_mut(&mut self) -> &mut [char] {
1836 // SAFETY: Self should be valid UTF-32 so chars will be in range
1837 unsafe { &mut *(self.as_mut_slice() as *mut [u32] as *mut [char]) }
1838 }
1839
1840 /// Converts to a standard UTF-8 [`String`].
1841 ///
1842 /// Because this string is always valid UTF-32, the conversion is lossless and non-fallible.
1843 #[inline]
1844 #[allow(clippy::inherent_to_string_shadow_display)]
1845 #[cfg(feature = "alloc")]
1846 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1847 #[must_use]
1848 pub fn to_string(&self) -> String {
1849 let mut s = String::with_capacity(self.len());
1850 s.extend(self.as_char_slice());
1851 s
1852 }
1853
1854 /// Returns a subslice of this string.
1855 ///
1856 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1857 /// equivalent indexing operation would panic.
1858 ///
1859 /// # Examples
1860 ///
1861 /// ```
1862 /// # use widestring::{utf32str};
1863 /// let v = utf32str!("⚧️🏳️⚧️➡️s");
1864 ///
1865 /// assert_eq!(Some(utf32str!("⚧️")), v.get(..2));
1866 /// assert_eq!(Some(utf32str!("🏳️⚧️")), v.get(2..7));
1867 /// assert_eq!(Some(utf32str!("➡️")), v.get(7..9));
1868 /// assert_eq!(Some(utf32str!("s")), v.get(9..));
1869 /// ```
1870 #[inline]
1871 #[must_use]
1872 pub fn get<I>(&self, index: I) -> Option<&Self>
1873 where
1874 I: SliceIndex<[u32], Output = [u32]>,
1875 {
1876 // TODO: Use SliceIndex directly when it is stabilized
1877 // SAFETY: subslice has already been verified
1878 self.inner
1879 .get(index)
1880 .map(|s| unsafe { Self::from_slice_unchecked(s) })
1881 }
1882
1883 /// Returns a mutable subslice of this string.
1884 ///
1885 /// This is the non-panicking alternative to indexing the string. Returns [`None`] whenever
1886 /// equivalent indexing operation would panic.
1887 ///
1888 /// # Examples
1889 ///
1890 /// ```
1891 /// # use widestring::{utf32str};
1892 /// # #[cfg(feature = "alloc")] {
1893 /// let mut v = utf32str!("⚧️🏳️⚧️➡️s").to_owned();
1894 ///
1895 /// assert_eq!(utf32str!("⚧️"), v.get_mut(..2).unwrap());
1896 /// assert_eq!(utf32str!("🏳️⚧️"), v.get_mut(2..7).unwrap());
1897 /// assert_eq!(utf32str!("➡️"), v.get_mut(7..9).unwrap());
1898 /// assert_eq!(utf32str!("s"), v.get_mut(9..).unwrap());
1899 /// # }
1900 /// ```
1901 #[inline]
1902 #[must_use]
1903 pub fn get_mut<I>(&mut self, index: I) -> Option<&mut Self>
1904 where
1905 I: SliceIndex<[u32], Output = [u32]>,
1906 {
1907 // TODO: Use SliceIndex directly when it is stabilized
1908 // SAFETY: subslice has already been verified
1909 self.inner
1910 .get_mut(index)
1911 .map(|s| unsafe { Self::from_slice_unchecked_mut(s) })
1912 }
1913
1914 /// Divide one string slice into two at an index.
1915 ///
1916 /// The argument, `mid`, should be an offset from the start of the string.
1917 ///
1918 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1919 /// the end of the string slice.
1920 ///
1921 /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut] method.
1922 ///
1923 /// # Panics
1924 ///
1925 /// Panics if `mid` is past the end of the last code point of the string slice.
1926 ///
1927 /// # Examples
1928 ///
1929 /// ```
1930 /// # use widestring::utf32str;
1931 /// let s = utf32str!("Per Martin-Löf");
1932 ///
1933 /// let (first, last) = s.split_at(3);
1934 ///
1935 /// assert_eq!("Per", first);
1936 /// assert_eq!(" Martin-Löf", last);
1937 /// ```
1938 #[inline]
1939 #[must_use]
1940 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
1941 let (a, b) = self.inner.split_at(mid);
1942 unsafe { (Self::from_slice_unchecked(a), Self::from_slice_unchecked(b)) }
1943 }
1944
1945 /// Divide one mutable string slice into two at an index.
1946 ///
1947 /// The argument, `mid`, should be an offset from the start of the string.
1948 ///
1949 /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to
1950 /// the end of the string slice.
1951 ///
1952 /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
1953 ///
1954 /// # Panics
1955 ///
1956 /// Panics if `mid` is past the end of the last code point of the string slice.
1957 ///
1958 /// # Examples
1959 ///
1960 /// ```
1961 /// # use widestring::utf32str;
1962 /// # #[cfg(feature = "alloc")] {
1963 /// let mut s = utf32str!("Per Martin-Löf").to_owned();
1964 ///
1965 /// let (first, last) = s.split_at_mut(3);
1966 ///
1967 /// assert_eq!("Per", first);
1968 /// assert_eq!(" Martin-Löf", last);
1969 /// # }
1970 /// ```
1971 #[inline]
1972 #[must_use]
1973 pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
1974 let (a, b) = self.inner.split_at_mut(mid);
1975 unsafe {
1976 (
1977 Self::from_slice_unchecked_mut(a),
1978 Self::from_slice_unchecked_mut(b),
1979 )
1980 }
1981 }
1982
1983 /// Returns an iterator over the [`char`]s of a string slice.
1984 ///
1985 /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
1986 /// [`char`]. This method returns such an iterator.
1987 ///
1988 /// It's important to remember that [`char`] represents a Unicode Scalar Value, and might not
1989 /// match your idea of what a 'character' is. Iteration over grapheme clusters may be what you
1990 /// actually want. This functionality is not provided by this crate.
1991 #[inline]
1992 #[must_use]
1993 pub fn chars(&self) -> CharsUtf32<'_> {
1994 CharsUtf32::new(self.as_slice())
1995 }
1996
1997 /// Returns an iterator over the [`char`]s of a string slice and their positions.
1998 ///
1999 /// As this string slice consists of valid UTF-32, we can iterate through a string slice by
2000 /// [`char`]. This method returns an iterator of both these [`char`]s as well as their offsets.
2001 ///
2002 /// The iterator yields tuples. The position is first, the [`char`] is second.
2003 #[inline]
2004 #[must_use]
2005 pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
2006 CharIndicesUtf32::new(self.as_slice())
2007 }
2008
2009 /// Returns an iterator of bytes over the string encoded as UTF-8.
2010 #[must_use]
2011 pub fn encode_utf8(&self) -> EncodeUtf8<CharsUtf32<'_>> {
2012 crate::encode_utf8(self.chars())
2013 }
2014
2015 /// Returns an iterator of [`u16`] over the sting encoded as UTF-16.
2016 #[must_use]
2017 pub fn encode_utf16(&self) -> EncodeUtf16<CharsUtf32<'_>> {
2018 crate::encode_utf16(self.chars())
2019 }
2020
2021 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_debug`].
2022 #[inline]
2023 #[must_use]
2024 pub fn escape_debug(&self) -> EscapeDebug<CharsUtf32<'_>> {
2025 EscapeDebug::<CharsUtf32>::new(self.as_slice())
2026 }
2027
2028 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_default`].
2029 #[inline]
2030 #[must_use]
2031 pub fn escape_default(&self) -> EscapeDefault<CharsUtf32<'_>> {
2032 EscapeDefault::<CharsUtf32>::new(self.as_slice())
2033 }
2034
2035 /// Returns an iterator that escapes each [`char`] in `self` with [`char::escape_unicode`].
2036 #[inline]
2037 #[must_use]
2038 pub fn escape_unicode(&self) -> EscapeUnicode<CharsUtf32<'_>> {
2039 EscapeUnicode::<CharsUtf32>::new(self.as_slice())
2040 }
2041
2042 /// Returns the lowercase equivalent of this string slice, as a new [`Utf32String`].
2043 ///
2044 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
2045 /// `Lowercase`.
2046 ///
2047 /// Since some characters can expand into multiple characters when changing the case, this
2048 /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2049 #[inline]
2050 #[cfg(feature = "alloc")]
2051 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2052 #[must_use]
2053 pub fn to_lowercase(&self) -> Utf32String {
2054 let mut s = Utf32String::with_capacity(self.len());
2055 for c in self.chars() {
2056 for lower in c.to_lowercase() {
2057 s.push(lower);
2058 }
2059 }
2060 s
2061 }
2062
2063 /// Returns the uppercase equivalent of this string slice, as a new [`Utf32String`].
2064 ///
2065 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
2066 /// `Uppercase`.
2067 ///
2068 /// Since some characters can expand into multiple characters when changing the case, this
2069 /// function returns a [`Utf32String`] instead of modifying the parameter in-place.
2070 #[inline]
2071 #[cfg(feature = "alloc")]
2072 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
2073 #[must_use]
2074 pub fn to_uppercase(&self) -> Utf32String {
2075 let mut s = Utf32String::with_capacity(self.len());
2076 for c in self.chars() {
2077 for lower in c.to_uppercase() {
2078 s.push(lower);
2079 }
2080 }
2081 s
2082 }
2083
2084 /// Returns an iterator over the lines of a [`Utf32Str`], as string slices.
2085 ///
2086 /// Lines are split at line endings that are either newlines (`\n`) or
2087 /// sequences of a carriage return followed by a line feed (`\r\n`).
2088 ///
2089 /// Line terminators are not included in the lines returned by the iterator.
2090 ///
2091 /// Note that any carriage return (`\r`) not immediately followed by a
2092 /// line feed (`\n`) does not split a line. These carriage returns are
2093 /// thereby included in the produced lines.
2094 ///
2095 /// The final line ending is optional. A string that ends with a final line
2096 /// ending will return the same lines as an otherwise identical string
2097 /// without a final line ending.
2098 ///
2099 /// # Examples
2100 ///
2101 /// Basic usage:
2102 ///
2103 /// ```
2104 /// use widestring::utf32str;
2105 ///
2106 /// let text = utf32str!("foo\r\nbar\n\nbaz\r");
2107 /// let mut lines = text.lines();
2108 ///
2109 /// assert_eq!(Some(utf32str!("foo")), lines.next());
2110 /// assert_eq!(Some(utf32str!("bar")), lines.next());
2111 /// assert_eq!(Some(utf32str!("")), lines.next());
2112 /// // Trailing carriage return is included in the last line
2113 /// assert_eq!(Some(utf32str!("baz\r")), lines.next());
2114 ///
2115 /// assert_eq!(None, lines.next());
2116 /// ```
2117 ///
2118 /// The final line does not require any ending:
2119 ///
2120 /// ```
2121 /// use widestring::utf32str;
2122 ///
2123 /// let text = utf32str!("foo\nbar\n\r\nbaz");
2124 /// let mut lines = text.lines();
2125 ///
2126 /// assert_eq!(Some(utf32str!("foo")), lines.next());
2127 /// assert_eq!(Some(utf32str!("bar")), lines.next());
2128 /// assert_eq!(Some(utf32str!("")), lines.next());
2129 /// assert_eq!(Some(utf32str!("baz")), lines.next());
2130 ///
2131 /// assert_eq!(None, lines.next());
2132 /// ```
2133 pub fn lines(&self) -> Lines<'_, Utf32Str, CharIndicesUtf32<'_>> {
2134 Lines::new(self, self.len(), self.char_indices())
2135 }
2136}
2137
2138impl AsMut<[char]> for Utf32Str {
2139 #[inline]
2140 fn as_mut(&mut self) -> &mut [char] {
2141 self.as_char_slice_mut()
2142 }
2143}
2144
2145impl AsRef<[char]> for Utf32Str {
2146 #[inline]
2147 fn as_ref(&self) -> &[char] {
2148 self.as_char_slice()
2149 }
2150}
2151
2152impl<'a> From<&'a [char]> for &'a Utf32Str {
2153 #[inline]
2154 fn from(value: &'a [char]) -> Self {
2155 Utf32Str::from_char_slice(value)
2156 }
2157}
2158
2159impl<'a> From<&'a mut [char]> for &'a mut Utf32Str {
2160 #[inline]
2161 fn from(value: &'a mut [char]) -> Self {
2162 Utf32Str::from_char_slice_mut(value)
2163 }
2164}
2165
2166impl<'a> From<&'a Utf32Str> for &'a [char] {
2167 #[inline]
2168 fn from(value: &'a Utf32Str) -> Self {
2169 value.as_char_slice()
2170 }
2171}
2172
2173impl<'a> From<&'a mut Utf32Str> for &'a mut [char] {
2174 #[inline]
2175 fn from(value: &'a mut Utf32Str) -> Self {
2176 value.as_char_slice_mut()
2177 }
2178}
2179
2180impl<I> Index<I> for Utf16Str
2181where
2182 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2183{
2184 type Output = Utf16Str;
2185
2186 #[inline]
2187 fn index(&self, index: I) -> &Self::Output {
2188 self.get(index)
2189 .expect("index out of bounds or not on char boundary")
2190 }
2191}
2192
2193impl<I> Index<I> for Utf32Str
2194where
2195 I: SliceIndex<[u32], Output = [u32]>,
2196{
2197 type Output = Utf32Str;
2198
2199 #[inline]
2200 fn index(&self, index: I) -> &Self::Output {
2201 self.get(index).expect("index out of bounds")
2202 }
2203}
2204
2205impl<I> IndexMut<I> for Utf16Str
2206where
2207 I: RangeBounds<usize> + SliceIndex<[u16], Output = [u16]>,
2208{
2209 #[inline]
2210 fn index_mut(&mut self, index: I) -> &mut Self::Output {
2211 self.get_mut(index)
2212 .expect("index out of bounds or not on char boundary")
2213 }
2214}
2215
2216impl<I> IndexMut<I> for Utf32Str
2217where
2218 I: SliceIndex<[u32], Output = [u32]>,
2219{
2220 #[inline]
2221 fn index_mut(&mut self, index: I) -> &mut Self::Output {
2222 self.get_mut(index).expect("index out of bounds")
2223 }
2224}
2225
2226impl PartialEq<[char]> for Utf32Str {
2227 #[inline]
2228 fn eq(&self, other: &[char]) -> bool {
2229 self.as_char_slice() == other
2230 }
2231}
2232
2233impl PartialEq<Utf32Str> for [char] {
2234 #[inline]
2235 fn eq(&self, other: &Utf32Str) -> bool {
2236 self == other.as_char_slice()
2237 }
2238}
2239
2240impl PartialEq<Utf16Str> for Utf32Str {
2241 #[inline]
2242 fn eq(&self, other: &Utf16Str) -> bool {
2243 self.chars().eq(other.chars())
2244 }
2245}
2246
2247impl PartialEq<Utf32Str> for Utf16Str {
2248 #[inline]
2249 fn eq(&self, other: &Utf32Str) -> bool {
2250 self.chars().eq(other.chars())
2251 }
2252}
2253
2254impl PartialEq<&Utf16Str> for Utf32Str {
2255 #[inline]
2256 fn eq(&self, other: &&Utf16Str) -> bool {
2257 self.chars().eq(other.chars())
2258 }
2259}
2260
2261impl PartialEq<&Utf32Str> for Utf16Str {
2262 #[inline]
2263 fn eq(&self, other: &&Utf32Str) -> bool {
2264 self.chars().eq(other.chars())
2265 }
2266}
2267
2268impl PartialEq<Utf16Str> for &Utf32Str {
2269 #[inline]
2270 fn eq(&self, other: &Utf16Str) -> bool {
2271 self.chars().eq(other.chars())
2272 }
2273}
2274
2275impl PartialEq<Utf32Str> for &Utf16Str {
2276 #[inline]
2277 fn eq(&self, other: &Utf32Str) -> bool {
2278 self.chars().eq(other.chars())
2279 }
2280}
2281
2282impl<'a> TryFrom<&'a [u16]> for &'a Utf16Str {
2283 type Error = Utf16Error;
2284
2285 #[inline]
2286 fn try_from(value: &'a [u16]) -> Result<Self, Self::Error> {
2287 Utf16Str::from_slice(value)
2288 }
2289}
2290
2291impl<'a> TryFrom<&'a mut [u16]> for &'a mut Utf16Str {
2292 type Error = Utf16Error;
2293
2294 #[inline]
2295 fn try_from(value: &'a mut [u16]) -> Result<Self, Self::Error> {
2296 Utf16Str::from_slice_mut(value)
2297 }
2298}
2299
2300impl<'a> TryFrom<&'a [u32]> for &'a Utf32Str {
2301 type Error = Utf32Error;
2302
2303 #[inline]
2304 fn try_from(value: &'a [u32]) -> Result<Self, Self::Error> {
2305 Utf32Str::from_slice(value)
2306 }
2307}
2308
2309impl<'a> TryFrom<&'a mut [u32]> for &'a mut Utf32Str {
2310 type Error = Utf32Error;
2311
2312 #[inline]
2313 fn try_from(value: &'a mut [u32]) -> Result<Self, Self::Error> {
2314 Utf32Str::from_slice_mut(value)
2315 }
2316}
2317
2318/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2319/// `wchar_t` size on platform.
2320#[cfg(not(windows))]
2321pub type WideUtfStr = Utf32Str;
2322
2323/// Alias for [`Utf16Str`] or [`Utf32Str`] depending on platform. Intended to match typical C
2324/// `wchar_t` size on platform.
2325#[cfg(windows)]
2326pub type WideUtfStr = Utf16Str;
2327
2328#[cfg(test)]
2329mod test {
2330 use crate::*;
2331
2332 #[test]
2333 fn utf16_trim() {
2334 let s = utf16str!(" Hello\tworld\t");
2335 assert_eq!(utf16str!("Hello\tworld\t"), s.trim_start());
2336
2337 let s = utf16str!(" English ");
2338 assert!(Some('E') == s.trim_start().chars().next());
2339
2340 let s = utf16str!(" עברית ");
2341 assert!(Some('ע') == s.trim_start().chars().next());
2342 }
2343
2344 #[test]
2345 fn utf32_trim() {
2346 let s = utf32str!(" Hello\tworld\t");
2347 assert_eq!(utf32str!("Hello\tworld\t"), s.trim_start());
2348
2349 let s = utf32str!(" English ");
2350 assert!(Some('E') == s.trim_start().chars().next());
2351
2352 let s = utf32str!(" עברית ");
2353 assert!(Some('ע') == s.trim_start().chars().next());
2354 }
2355}