widestring/
utfstring.rs

Help
1//! Owned, growable UTF strings.
2//!
3//! This module contains UTF strings and related types.
4
5use crate::{
6    decode_utf16_surrogate_pair,
7    error::{Utf16Error, Utf32Error},
8    is_utf16_low_surrogate, is_utf16_surrogate, validate_utf16, validate_utf16_vec, validate_utf32,
9    validate_utf32_vec, Utf16Str, Utf32Str,
10};
11#[allow(unused_imports)]
12use alloc::{
13    borrow::{Cow, ToOwned},
14    boxed::Box,
15    string::String,
16    vec::Vec,
17};
18#[allow(unused_imports)]
19use core::{
20    borrow::{Borrow, BorrowMut},
21    convert::{AsMut, AsRef, From, Infallible, TryFrom},
22    fmt::Write,
23    iter::FromIterator,
24    mem,
25    ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut, RangeBounds},
26    ptr,
27    slice::SliceIndex,
28    str::FromStr,
29};
30
31mod iter;
32pub use iter::*;
33
34macro_rules! utfstring_common_impl {
35    {
36        $(#[$utfstring_meta:meta])*
37        struct $utfstring:ident([$uchar:ty]);
38        type UtfStr = $utfstr:ident;
39        type UStr = $ustr:ident;
40        type UCStr = $ucstr:ident;
41        type UString = $ustring:ident;
42        type UCString = $ucstring:ident;
43        type UtfError = $utferror:ident;
44        $(#[$from_vec_unchecked_meta:meta])*
45        fn from_vec_unchecked() -> {}
46        $(#[$from_str_meta:meta])*
47        fn from_str() -> {}
48        $(#[$push_utfstr_meta:meta])*
49        fn push_utfstr() -> {}
50        $(#[$as_mut_vec_meta:meta])*
51        fn as_mut_vec() -> {}
52    } => {
53        $(#[$utfstring_meta])*
54        #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
55        #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
56        pub struct $utfstring {
57            inner: Vec<$uchar>,
58        }
59
60        impl $utfstring {
61            /// Creates a new empty string.
62            ///
63            /// Given that the string is empty, this will not allocate any initial buffer. While
64            /// that means this initial operation is very inexpensive, it may cause excessive
65            /// allocations later when you add data. If you have an idea of how much data the
66            /// string will hold, consider [`with_capacity`][Self::with_capacity] instead to
67            /// prevent excessive re-allocation.
68            #[inline]
69            #[must_use]
70            pub const fn new() -> Self {
71                Self { inner: Vec::new() }
72            }
73
74            /// Creates a new empty string with a particular capacity.
75            ///
76            /// This string has an internal buffer to hold its data. The capacity is the length of
77            /// that buffer, and can be queried with the [`capacity`][Self::capacity] method. This
78            /// method creates and empty string, but one with an initial buffer that can hold
79            /// `capacity` elements. This is useful when you may be appending a bunch of data to
80            /// the string, reducing the number of reallocations it needs to do.
81            ///
82            /// If the given capacity is `0`, no allocation will occur, and this method is identical
83            /// to the [`new`][Self::new] method.
84            #[inline]
85            #[must_use]
86            pub fn with_capacity(capacity: usize) -> Self {
87                Self {
88                    inner: Vec::with_capacity(capacity),
89                }
90            }
91
92            $(#[$from_vec_unchecked_meta])*
93            #[inline]
94            #[must_use]
95            pub unsafe fn from_vec_unchecked(v: impl Into<Vec<$uchar>>) -> Self {
96                Self { inner: v.into() }
97            }
98
99            $(#[$from_str_meta])*
100            #[inline]
101            #[allow(clippy::should_implement_trait)]
102            #[must_use]
103            pub fn from_str<S: AsRef<str> + ?Sized>(s: &S) -> Self {
104                let s = s.as_ref();
105                let mut string = Self::new();
106                string.extend(s.chars());
107                string
108            }
109
110            /// Converts a string into a string slice.
111            #[inline]
112            #[must_use]
113            pub fn as_utfstr(&self) -> &$utfstr {
114                unsafe { $utfstr::from_slice_unchecked(self.inner.as_slice()) }
115            }
116
117            /// Converts a string into a mutable string slice.
118            #[inline]
119            #[must_use]
120            pub fn as_mut_utfstr(&mut self) -> &mut $utfstr {
121                unsafe { $utfstr::from_slice_unchecked_mut(&mut self.inner) }
122            }
123
124            /// Converts this string into a wide string of undefined encoding.
125            #[inline]
126            #[must_use]
127            pub fn as_ustr(&self) -> &crate::$ustr {
128                crate::$ustr::from_slice(self.as_slice())
129            }
130
131            /// Converts a string into a vector of its elements.
132            ///
133            /// This consumes the string without copying its contents.
134            #[inline]
135            #[must_use]
136            pub fn into_vec(self) -> Vec<$uchar> {
137                self.inner
138            }
139
140            $(#[$push_utfstr_meta])*
141            #[inline]
142            pub fn push_utfstr<S: AsRef<$utfstr> + ?Sized>(&mut self, string: &S) {
143                self.inner.extend_from_slice(string.as_ref().as_slice())
144            }
145
146            /// Returns this string's capacity, in number of elements.
147            #[inline]
148            #[must_use]
149            pub fn capacity(&self) -> usize {
150                self.inner.capacity()
151            }
152
153            /// Ensures that this string's capacity is at least `additional` elements larger than
154            /// its length.
155            ///
156            /// The capacity may be increased by more than `additional` elements if it chooses, to
157            /// prevent frequent reallocations.
158            ///
159            /// If you do not want this "at least" behavior, see the
160            /// [`reserve_exact`][Self::reserve_exact] method.
161            ///
162            /// # Panics
163            ///
164            /// Panics if the new capacity overflows [`usize`].
165            #[inline]
166            pub fn reserve(&mut self, additional: usize) {
167                self.inner.reserve(additional)
168            }
169
170            /// Ensures that this string's capacity is `additional` elements larger than its length.
171            ///
172            /// Consider using the [`reserve`][Self::reserve] method unless you absolutely know
173            /// better than the allocator.
174            ///
175            /// # Panics
176            ///
177            /// Panics if the new capacity overflows [`usize`].
178            #[inline]
179            pub fn reserve_exact(&mut self, additional: usize) {
180                self.inner.reserve_exact(additional)
181            }
182
183            /// Shrinks the capacity of this string to match its length.
184            #[inline]
185            pub fn shrink_to_fit(&mut self) {
186                self.inner.shrink_to_fit()
187            }
188
189            /// Shrinks the capacity of this string with a lower bound.
190            ///
191            /// The capacity will remain at least as large as both the length and the supplied
192            /// value.
193            ///
194            /// If the current capacity is less than the lower limit, this is a no-op.
195            #[inline]
196            pub fn shrink_to(&mut self, min_capacity: usize) {
197                self.inner.shrink_to(min_capacity)
198            }
199
200            /// Returns a slice of this string's contents.
201            #[inline]
202            #[must_use]
203            pub fn as_slice(&self) -> &[$uchar] {
204                self.inner.as_slice()
205            }
206
207            unsafe fn insert_slice(&mut self, idx: usize, slice: &[$uchar]) {
208                let len = self.inner.len();
209                let amt = slice.len();
210                self.inner.reserve(amt);
211
212                ptr::copy(
213                    self.inner.as_ptr().add(idx),
214                    self.inner.as_mut_ptr().add(idx + amt),
215                    len - idx,
216                );
217                ptr::copy_nonoverlapping(slice.as_ptr(), self.inner.as_mut_ptr().add(idx), amt);
218                self.inner.set_len(len + amt);
219            }
220
221            $(#[$as_mut_vec_meta])*
222            #[inline]
223            #[must_use]
224            pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<$uchar> {
225                &mut self.inner
226            }
227
228            /// Returns the length of this string in number of elements, not [`char`]s or
229            /// graphemes.
230            ///
231            /// In other words, it might not be what a human considers the length of the string.
232            #[inline]
233            #[must_use]
234            pub fn len(&self) -> usize {
235                self.inner.len()
236            }
237
238            /// Returns `true` if this string has a length of zero, and `false` otherwise.
239            #[inline]
240            #[must_use]
241            pub fn is_empty(&self) -> bool {
242                self.inner.is_empty()
243            }
244
245            /// Truncates the string, removing all contents.
246            ///
247            /// While this means the string will have a length of zero, it does not touch its
248            /// capacity.
249            #[inline]
250            pub fn clear(&mut self) {
251                self.inner.clear()
252            }
253
254            /// Converts this string into a boxed string slice.
255            ///
256            /// This will drop excess capacity.
257            #[inline]
258            #[must_use]
259            pub fn into_boxed_utfstr(self) -> Box<$utfstr> {
260                let slice = self.inner.into_boxed_slice();
261                // SAFETY: Already valid UTF-16
262                unsafe { $utfstr::from_boxed_slice_unchecked(slice) }
263            }
264
265            /// Appends a given UTF-8 string slice onto the end of this string, converting it to
266            /// UTF-16.
267            #[inline]
268            pub fn push_str<S: AsRef<str> + ?Sized>(&mut self, string: &S) {
269                self.extend(string.as_ref().chars())
270            }
271        }
272
273        impl Add<&$utfstr> for $utfstring {
274            type Output = $utfstring;
275
276            #[inline]
277            fn add(mut self, rhs: &$utfstr) -> Self::Output {
278                self.push_utfstr(rhs);
279                self
280            }
281        }
282
283        impl Add<&str> for $utfstring {
284            type Output = $utfstring;
285
286            #[inline]
287            fn add(mut self, rhs: &str) -> Self::Output {
288                self.push_str(rhs);
289                self
290            }
291        }
292
293        impl AddAssign<&$utfstr> for $utfstring {
294            #[inline]
295            fn add_assign(&mut self, rhs: &$utfstr) {
296                self.push_utfstr(rhs)
297            }
298        }
299
300        impl AddAssign<&str> for $utfstring {
301            #[inline]
302            fn add_assign(&mut self, rhs: &str) {
303                self.push_str(rhs)
304            }
305        }
306
307        impl AsMut<$utfstr> for $utfstring {
308            #[inline]
309            fn as_mut(&mut self) -> &mut $utfstr {
310                self.as_mut_utfstr()
311            }
312        }
313
314        impl AsRef<$utfstr> for $utfstring {
315            #[inline]
316            fn as_ref(&self) -> &$utfstr {
317                self.as_utfstr()
318            }
319        }
320
321        impl AsRef<[$uchar]> for $utfstring {
322            #[inline]
323            fn as_ref(&self) -> &[$uchar] {
324                &self.inner
325            }
326        }
327
328        impl AsRef<crate::$ustr> for $utfstring {
329            #[inline]
330            fn as_ref(&self) -> &crate::$ustr {
331                self.as_ustr()
332            }
333        }
334
335        impl Borrow<$utfstr> for $utfstring {
336            #[inline]
337            fn borrow(&self) -> &$utfstr {
338                self.as_utfstr()
339            }
340        }
341
342        impl BorrowMut<$utfstr> for $utfstring {
343            #[inline]
344            fn borrow_mut(&mut self) -> &mut $utfstr {
345                self.as_mut_utfstr()
346            }
347        }
348
349        impl core::fmt::Debug for $utfstring {
350            #[inline]
351            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
352                core::fmt::Debug::fmt(self.as_utfstr(), f)
353            }
354        }
355
356        impl Deref for $utfstring {
357            type Target = $utfstr;
358
359            #[inline]
360            fn deref(&self) -> &Self::Target {
361                self.as_utfstr()
362            }
363        }
364
365        impl DerefMut for $utfstring {
366            #[inline]
367            fn deref_mut(&mut self) -> &mut Self::Target {
368                self.as_mut_utfstr()
369            }
370        }
371
372        impl core::fmt::Display for $utfstring {
373            #[inline]
374            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
375                core::fmt::Display::fmt(self.as_utfstr(), f)
376            }
377        }
378
379        impl Extend<char> for $utfstring {
380            #[inline]
381            fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
382                let iter = iter.into_iter();
383                let (lower_bound, _) = iter.size_hint();
384                self.reserve(lower_bound);
385                iter.for_each(|c| self.push(c));
386            }
387        }
388
389        impl<'a> Extend<&'a char> for $utfstring {
390            #[inline]
391            fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
392                self.extend(iter.into_iter().copied())
393            }
394        }
395
396        impl<'a> Extend<&'a $utfstr> for $utfstring {
397            #[inline]
398            fn extend<T: IntoIterator<Item = &'a $utfstr>>(&mut self, iter: T) {
399                iter.into_iter().for_each(|s| self.push_utfstr(s))
400            }
401        }
402
403        impl Extend<$utfstring> for $utfstring {
404            #[inline]
405            fn extend<T: IntoIterator<Item = $utfstring>>(&mut self, iter: T) {
406                iter.into_iter()
407                    .for_each(|s| self.push_utfstr(&s))
408            }
409        }
410
411        impl<'a> Extend<Cow<'a, $utfstr>> for $utfstring {
412            #[inline]
413            fn extend<T: IntoIterator<Item = Cow<'a, $utfstr>>>(&mut self, iter: T) {
414                iter.into_iter().for_each(|s| self.push_utfstr(&s))
415            }
416        }
417
418        impl Extend<Box<$utfstr>> for $utfstring {
419            #[inline]
420            fn extend<T: IntoIterator<Item = Box<$utfstr>>>(&mut self, iter: T) {
421                iter.into_iter().for_each(|s| self.push_utfstr(&s))
422            }
423        }
424
425        impl<'a> Extend<&'a str> for $utfstring {
426            #[inline]
427            fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
428                iter.into_iter().for_each(|s| self.push_str(s))
429            }
430        }
431
432        impl Extend<String> for $utfstring {
433            #[inline]
434            fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
435                iter.into_iter().for_each(|s| self.push_str(&s))
436            }
437        }
438
439        impl From<&mut $utfstr> for $utfstring {
440            #[inline]
441            fn from(value: &mut $utfstr) -> Self {
442                value.to_owned()
443            }
444        }
445
446        impl From<&$utfstr> for $utfstring {
447            #[inline]
448            fn from(value: &$utfstr) -> Self {
449                value.to_owned()
450            }
451        }
452
453        impl From<&$utfstring> for $utfstring {
454            #[inline]
455            fn from(value: &$utfstring) -> Self {
456                value.clone()
457            }
458        }
459
460        impl From<$utfstring> for Cow<'_, $utfstr> {
461            #[inline]
462            fn from(value: $utfstring) -> Self {
463                Cow::Owned(value)
464            }
465        }
466
467        impl<'a> From<&'a $utfstring> for Cow<'a, $utfstr> {
468            #[inline]
469            fn from(value: &'a $utfstring) -> Self {
470                Cow::Borrowed(value)
471            }
472        }
473
474        impl From<Cow<'_, $utfstr>> for $utfstring {
475            #[inline]
476            fn from(value: Cow<'_, $utfstr>) -> Self {
477                value.into_owned()
478            }
479        }
480
481        impl From<&str> for $utfstring {
482            #[inline]
483            fn from(value: &str) -> Self {
484                Self::from_str(value)
485            }
486        }
487
488        impl From<String> for $utfstring {
489            #[inline]
490            fn from(value: String) -> Self {
491                Self::from_str(&value)
492            }
493        }
494
495        impl From<$utfstring> for crate::$ustring {
496            #[inline]
497            fn from(value: $utfstring) -> Self {
498                crate::$ustring::from_vec(value.into_vec())
499            }
500        }
501
502        impl From<&$utfstr> for String {
503            #[inline]
504            fn from(value: &$utfstr) -> Self {
505                value.to_string()
506            }
507        }
508
509        impl From<$utfstring> for String {
510            #[inline]
511            fn from(value: $utfstring) -> Self {
512                value.to_string()
513            }
514        }
515
516        #[cfg(feature = "std")]
517        impl From<$utfstring> for std::ffi::OsString {
518            #[inline]
519            fn from(value: $utfstring) -> std::ffi::OsString {
520                value.as_ustr().to_os_string()
521            }
522        }
523
524        impl FromIterator<char> for $utfstring {
525            #[inline]
526            fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
527                let mut s = Self::new();
528                s.extend(iter);
529                s
530            }
531        }
532
533        impl<'a> FromIterator<&'a char> for $utfstring {
534            #[inline]
535            fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
536                let mut s = Self::new();
537                s.extend(iter);
538                s
539            }
540        }
541
542        impl<'a> FromIterator<&'a $utfstr> for $utfstring {
543            #[inline]
544            fn from_iter<T: IntoIterator<Item = &'a $utfstr>>(iter: T) -> Self {
545                let mut s = Self::new();
546                s.extend(iter);
547                s
548            }
549        }
550
551        impl FromIterator<$utfstring> for $utfstring {
552            fn from_iter<T: IntoIterator<Item = $utfstring>>(iter: T) -> Self {
553                let mut iterator = iter.into_iter();
554
555                // Because we're iterating over `String`s, we can avoid at least
556                // one allocation by getting the first string from the iterator
557                // and appending to it all the subsequent strings.
558                match iterator.next() {
559                    None => Self::new(),
560                    Some(mut buf) => {
561                        buf.extend(iterator);
562                        buf
563                    }
564                }
565            }
566        }
567
568        impl FromIterator<Box<$utfstr>> for $utfstring {
569            #[inline]
570            fn from_iter<T: IntoIterator<Item = Box<$utfstr>>>(iter: T) -> Self {
571                let mut s = Self::new();
572                s.extend(iter);
573                s
574            }
575        }
576
577        impl<'a> FromIterator<Cow<'a, $utfstr>> for $utfstring {
578            #[inline]
579            fn from_iter<T: IntoIterator<Item = Cow<'a, $utfstr>>>(iter: T) -> Self {
580                let mut s = Self::new();
581                s.extend(iter);
582                s
583            }
584        }
585
586        impl<'a> FromIterator<&'a str> for $utfstring {
587            #[inline]
588            fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
589                let mut s = Self::new();
590                s.extend(iter);
591                s
592            }
593        }
594
595        impl FromIterator<String> for $utfstring {
596            #[inline]
597            fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
598                let mut s = Self::new();
599                s.extend(iter);
600                s
601            }
602        }
603
604        impl FromStr for $utfstring {
605            type Err = Infallible;
606
607            #[inline]
608            fn from_str(s: &str) -> Result<Self, Self::Err> {
609                Ok($utfstring::from_str(s))
610            }
611        }
612
613        impl<I> Index<I> for $utfstring
614        where
615            I: RangeBounds<usize> + SliceIndex<[$uchar], Output = [$uchar]>,
616        {
617            type Output = $utfstr;
618
619            #[inline]
620            fn index(&self, index: I) -> &Self::Output {
621                &self.deref()[index]
622            }
623        }
624
625        impl<I> IndexMut<I> for $utfstring
626        where
627            I: RangeBounds<usize> + SliceIndex<[$uchar], Output = [$uchar]>,
628        {
629            #[inline]
630            fn index_mut(&mut self, index: I) -> &mut Self::Output {
631                &mut self.deref_mut()[index]
632            }
633        }
634
635        impl PartialEq<$utfstr> for $utfstring {
636            #[inline]
637            fn eq(&self, other: &$utfstr) -> bool {
638                self.as_slice() == other.as_slice()
639            }
640        }
641
642        impl PartialEq<&$utfstr> for $utfstring {
643            #[inline]
644            fn eq(&self, other: &&$utfstr) -> bool {
645                self.as_slice() == other.as_slice()
646            }
647        }
648
649        impl PartialEq<Cow<'_, $utfstr>> for $utfstring {
650            #[inline]
651            fn eq(&self, other: &Cow<'_, $utfstr>) -> bool {
652                self == other.as_ref()
653            }
654        }
655
656        impl PartialEq<$utfstring> for Cow<'_, $utfstr> {
657            #[inline]
658            fn eq(&self, other: &$utfstring) -> bool {
659                self.as_ref() == other
660            }
661        }
662
663        impl PartialEq<$utfstring> for $utfstr {
664            #[inline]
665            fn eq(&self, other: &$utfstring) -> bool {
666                self.as_slice() == other.as_slice()
667            }
668        }
669
670        impl PartialEq<$utfstring> for &$utfstr {
671            #[inline]
672            fn eq(&self, other: &$utfstring) -> bool {
673                self.as_slice() == other.as_slice()
674            }
675        }
676
677        impl PartialEq<str> for $utfstring {
678            #[inline]
679            fn eq(&self, other: &str) -> bool {
680                self.chars().eq(other.chars())
681            }
682        }
683
684        impl PartialEq<&str> for $utfstring {
685            #[inline]
686            fn eq(&self, other: &&str) -> bool {
687                self.chars().eq(other.chars())
688            }
689        }
690
691        impl PartialEq<$utfstring> for str {
692            #[inline]
693            fn eq(&self, other: &$utfstring) -> bool {
694                self.chars().eq(other.chars())
695            }
696        }
697
698        impl PartialEq<$utfstring> for &str {
699            #[inline]
700            fn eq(&self, other: &$utfstring) -> bool {
701                self.chars().eq(other.chars())
702            }
703        }
704
705        impl PartialEq<String> for $utfstring {
706            #[inline]
707            fn eq(&self, other: &String) -> bool {
708                self.chars().eq(other.chars())
709            }
710        }
711
712        impl PartialEq<$utfstring> for String {
713            #[inline]
714            fn eq(&self, other: &$utfstring) -> bool {
715                self.chars().eq(other.chars())
716            }
717        }
718
719        impl PartialEq<String> for $utfstr {
720            #[inline]
721            fn eq(&self, other: &String) -> bool {
722                self.chars().eq(other.chars())
723            }
724        }
725
726        impl PartialEq<$utfstr> for String {
727            #[inline]
728            fn eq(&self, other: &$utfstr) -> bool {
729                self.chars().eq(other.chars())
730            }
731        }
732
733        impl PartialEq<Cow<'_, str>> for $utfstring {
734            #[inline]
735            fn eq(&self, other: &Cow<'_, str>) -> bool {
736                self == other.as_ref()
737            }
738        }
739
740        impl PartialEq<$utfstring> for Cow<'_, str> {
741            #[inline]
742            fn eq(&self, other: &$utfstring) -> bool {
743                self.as_ref() == other
744            }
745        }
746
747        impl PartialEq<crate::$ustr> for $utfstring {
748            #[inline]
749            fn eq(&self, other: &crate::$ustr) -> bool {
750                self.as_slice() == other.as_slice()
751            }
752        }
753
754        impl PartialEq<$utfstring> for crate::$ustr {
755            #[inline]
756            fn eq(&self, other: &$utfstring) -> bool {
757                self.as_slice() == other.as_slice()
758            }
759        }
760
761        impl PartialEq<crate::$ustring> for $utfstring {
762            #[inline]
763            fn eq(&self, other: &crate::$ustring) -> bool {
764                self.as_slice() == other.as_slice()
765            }
766        }
767
768        impl PartialEq<$utfstring> for crate::$ustring {
769            #[inline]
770            fn eq(&self, other: &$utfstring) -> bool {
771                self.as_slice() == other.as_slice()
772            }
773        }
774
775        impl PartialEq<crate::$ustring> for $utfstr {
776            #[inline]
777            fn eq(&self, other: &crate::$ustring) -> bool {
778                self.as_slice() == other.as_slice()
779            }
780        }
781
782        impl PartialEq<$utfstr> for crate::$ustring {
783            #[inline]
784            fn eq(&self, other: &$utfstr) -> bool {
785                self.as_slice() == other.as_slice()
786            }
787        }
788
789        impl PartialEq<crate::$ucstr> for $utfstring {
790            #[inline]
791            fn eq(&self, other: &crate::$ucstr) -> bool {
792                self.as_slice() == other.as_slice()
793            }
794        }
795
796        impl PartialEq<$utfstring> for crate::$ucstr {
797            #[inline]
798            fn eq(&self, other: &$utfstring) -> bool {
799                self.as_slice() == other.as_slice()
800            }
801        }
802
803        impl PartialEq<crate::$ucstring> for $utfstring {
804            #[inline]
805            fn eq(&self, other: &crate::$ucstring) -> bool {
806                self.as_slice() == other.as_slice()
807            }
808        }
809
810        impl PartialEq<$utfstring> for crate::$ucstring {
811            #[inline]
812            fn eq(&self, other: &$utfstring) -> bool {
813                self.as_slice() == other.as_slice()
814            }
815        }
816
817        impl PartialEq<crate::$ucstring> for $utfstr {
818            #[inline]
819            fn eq(&self, other: &crate::$ucstring) -> bool {
820                self.as_slice() == other.as_slice()
821            }
822        }
823
824        impl PartialEq<$utfstr> for crate::$ucstring {
825            #[inline]
826            fn eq(&self, other: &$utfstr) -> bool {
827                self.as_slice() == other.as_slice()
828            }
829        }
830
831        impl ToOwned for $utfstr {
832            type Owned = $utfstring;
833
834            #[inline]
835            fn to_owned(&self) -> Self::Owned {
836                unsafe { $utfstring::from_vec_unchecked(&self.inner) }
837            }
838        }
839
840        impl TryFrom<crate::$ustring> for $utfstring {
841            type Error = $utferror;
842
843            #[inline]
844            fn try_from(value: crate::$ustring) -> Result<Self, Self::Error> {
845                $utfstring::from_ustring(value)
846            }
847        }
848
849        impl TryFrom<crate::$ucstring> for $utfstring {
850            type Error = $utferror;
851
852            #[inline]
853            fn try_from(value: crate::$ucstring) -> Result<Self, Self::Error> {
854                $utfstring::from_ustring(value)
855            }
856        }
857
858        impl TryFrom<&crate::$ustr> for $utfstring {
859            type Error = $utferror;
860
861            #[inline]
862            fn try_from(value: &crate::$ustr) -> Result<Self, Self::Error> {
863                $utfstring::from_ustring(value)
864            }
865        }
866
867        impl TryFrom<&crate::$ucstr> for $utfstring {
868            type Error = $utferror;
869
870            #[inline]
871            fn try_from(value: &crate::$ucstr) -> Result<Self, Self::Error> {
872                $utfstring::from_ustring(value)
873            }
874        }
875
876        impl Write for $utfstring {
877            #[inline]
878            fn write_str(&mut self, s: &str) -> core::fmt::Result {
879                self.push_str(s);
880                Ok(())
881            }
882
883            #[inline]
884            fn write_char(&mut self, c: char) -> core::fmt::Result {
885                self.push(c);
886                Ok(())
887            }
888        }
889    };
890}
891
892utfstring_common_impl! {
893    /// A UTF-16 encoded, growable owned string.
894    ///
895    /// [`Utf16String`] is a version of [`String`] that uses UTF-16 encoding instead of UTF-8
896    /// encoding. The equivalent of [`str`] for [`Utf16String`] is [`Utf16Str`].
897    ///
898    /// Unlike [`U16String`][crate::U16String] which does not specify a coding, [`Utf16String`] is
899    /// always valid UTF-16 encoding. Using unsafe methods to construct a [`Utf16String`] with
900    /// invalid UTF-16 encoding results in undefined behavior.
901    ///
902    /// # UTF-16
903    ///
904    /// [`Utf16String`] is always UTF-16. This means if you need non-UTF-16 wide strings, you should
905    /// use [`U16String`][crate::U16String] instead. It is similar, but does not constrain the
906    /// encoding.
907    ///
908    /// This also means you cannot directly index a single element of the string, as UTF-16 encoding
909    /// may be a single `u16` value or a pair of `u16` surrogates. Instead, you can index subslices
910    /// of the string, or use the [`chars`][Utf16Str::chars] iterator instead.
911    ///
912    /// # Examples
913    ///
914    /// The easiest way to use [`Utf16String`] is with the [`utf16str!`][crate::utf16str] macro to
915    /// convert string literals into UTF-16 string slices at compile time:
916    ///
917    /// ```
918    /// use widestring::{Utf16String, utf16str};
919    /// let hello = Utf16String::from(utf16str!("Hello, world!"));
920    /// ```
921    ///
922    /// Because this string is always valid UTF-16, it is a non-fallible, lossless conversion to and
923    /// from standard Rust strings:
924    ///
925    /// ```
926    /// use widestring::Utf16String;
927    /// // Unlike the utf16str macro, this will do conversion at runtime instead of compile time
928    /// let hello = Utf16String::from_str("Hello, world!");
929    /// let hello_string: String = hello.to_string();
930    /// assert_eq!(hello, hello_string); // Can easily compare between string types
931    /// ```
932    struct Utf16String([u16]);
933
934    type UtfStr = Utf16Str;
935    type UStr = U16Str;
936    type UCStr = U16CStr;
937    type UString = U16String;
938    type UCString = U16CString;
939    type UtfError = Utf16Error;
940
941    /// Converts a [`u16`] vector to a string without checking that the string contains valid
942    /// UTF-16.
943    ///
944    /// See the safe version, [`from_vec`][Self::from_vec], for more information.
945    ///
946    /// # Safety
947    ///
948    /// This function is unsafe because it does not check that the vector passed to it is valid
949    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
950    /// [`Utf16String`] is always valid UTF-16.
951    ///
952    /// # Examples
953    ///
954    /// ```
955    /// use widestring::Utf16String;
956    ///
957    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
958    /// let sparkle_heart = unsafe { Utf16String::from_vec_unchecked(sparkle_heart) };
959    ///
960    /// assert_eq!("💖", sparkle_heart);
961    /// ```
962    fn from_vec_unchecked() -> {}
963
964    /// Re-encodes a UTF-8--encoded string slice into a UTF-16--encoded string.
965    ///
966    /// This operation is lossless and infallible, but requires a memory allocation.
967    ///
968    /// # Examples
969    ///
970    /// ```
971    /// # use widestring::utf16str;
972    /// use widestring::Utf16String;
973    /// let music = Utf16String::from_str("𝄞music");
974    /// assert_eq!(utf16str!("𝄞music"), music);
975    /// ```
976    fn from_str() -> {}
977
978    /// Appends a given string slice onto the end of this string.
979    ///
980    /// # Examples
981    ///
982    /// ```
983    /// # use widestring::utf16str;
984    /// use widestring::Utf16String;
985    /// let mut s = Utf16String::from_str("foo");
986    /// s.push_utfstr(utf16str!("bar"));
987    /// assert_eq!(utf16str!("foobar"), s);
988    /// ```
989    fn push_utfstr() -> {}
990
991    /// Returns a mutable reference to the contents of this string.
992    ///
993    /// # Safety
994    ///
995    /// This function is unsafe because it does not check that the values in the vector are valid
996    /// UTF-16. If this constraint is violated, it may cause undefined beahvior with future
997    /// users of the string, as it is assumed that this string is always valid UTF-16.
998    fn as_mut_vec() -> {}
999}
1000
1001utfstring_common_impl! {
1002    /// A UTF-32 encoded, growable owned string.
1003    ///
1004    /// [`Utf32String`] is a version of [`String`] that uses UTF-32 encoding instead of UTF-8
1005    /// encoding. The equivalent of [`str`] for [`Utf32String`] is [`Utf32Str`].
1006    ///
1007    /// Unlike [`U32String`][crate::U32String] which does not specify a coding, [`Utf32String`] is
1008    /// always valid UTF-32 encoding. Using unsafe methods to construct a [`Utf32String`] with
1009    /// invalid UTF-32 encoding results in undefined behavior.
1010    ///
1011    /// # UTF-32
1012    ///
1013    /// [`Utf32String`] is always UTF-32. This means if you need non-UTF-32 wide strings, you should
1014    /// use [`U32String`][crate::U32String] instead. It is similar, but does not constrain the
1015    /// encoding.
1016    ///
1017    /// Unlike UTF-16 or UTF-8 strings, you may index single elements of UTF-32 strings in addition
1018    /// to subslicing. This is due to it being a fixed-length encoding for [`char`]s. This also
1019    /// means that [`Utf32String`] is the same representation as a `Vec<char>`; indeed conversions
1020    /// between the two exist and are simple typecasts.
1021    ///
1022    /// # Examples
1023    ///
1024    /// The easiest way to use [`Utf32String`] is with the [`utf32str!`][crate::utf32str] macro to
1025    /// convert string literals into UTF-32 string slices at compile time:
1026    ///
1027    /// ```
1028    /// use widestring::{Utf32String, utf32str};
1029    /// let hello = Utf32String::from(utf32str!("Hello, world!"));
1030    /// ```
1031    ///
1032    /// Because this string is always valid UTF-32, it is a non-fallible, lossless conversion to and
1033    /// from standard Rust strings:
1034    ///
1035    /// ```
1036    /// use widestring::Utf32String;
1037    /// // Unlike the utf32str macro, this will do conversion at runtime instead of compile time
1038    /// let hello = Utf32String::from_str("Hello, world!");
1039    /// let hello_string: String = hello.to_string();
1040    /// assert_eq!(hello, hello_string); // Can easily compare between string types
1041    /// ```
1042    struct Utf32String([u32]);
1043
1044    type UtfStr = Utf32Str;
1045    type UStr = U32Str;
1046    type UCStr = U32CStr;
1047    type UString = U32String;
1048    type UCString = U32CString;
1049    type UtfError = Utf32Error;
1050
1051    /// Converts a [`u32`] vector to a string without checking that the string contains valid
1052    /// UTF-32.
1053    ///
1054    /// See the safe version, [`from_vec`][Self::from_vec], for more information.
1055    ///
1056    /// # Safety
1057    ///
1058    /// This function is unsafe because it does not check that the vector passed to it is valid
1059    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
1060    /// [`Utf32String`] is always valid UTF-32.
1061    ///
1062    /// # Examples
1063    ///
1064    /// ```
1065    /// use widestring::Utf32String;
1066    ///
1067    /// let sparkle_heart = vec![0x1f496];
1068    /// let sparkle_heart = unsafe { Utf32String::from_vec_unchecked(sparkle_heart) };
1069    ///
1070    /// assert_eq!("💖", sparkle_heart);
1071    /// ```
1072    fn from_vec_unchecked() -> {}
1073
1074    /// Re-encodes a UTF-8--encoded string slice into a UTF-32--encoded string.
1075    ///
1076    /// This operation is lossless and infallible, but requires a memory allocation.
1077    ///
1078    /// # Examples
1079    ///
1080    /// ```
1081    /// # use widestring::utf32str;
1082    /// use widestring::Utf32String;
1083    /// let music = Utf32String::from_str("𝄞music");
1084    /// assert_eq!(utf32str!("𝄞music"), music);
1085    /// ```
1086    fn from_str() -> {}
1087
1088    /// Appends a given string slice onto the end of this string.
1089    ///
1090    /// # Examples
1091    ///
1092    /// ```
1093    /// # use widestring::utf32str;
1094    /// use widestring::Utf32String;
1095    /// let mut s = Utf32String::from_str("foo");
1096    /// s.push_utfstr(utf32str!("bar"));
1097    /// assert_eq!(utf32str!("foobar"), s);
1098    /// ```
1099    fn push_utfstr() -> {}
1100
1101    /// Returns a mutable reference to the contents of this string.
1102    ///
1103    /// # Safety
1104    ///
1105    /// This function is unsafe because it does not check that the values in the vector are valid
1106    /// UTF-16. If this constraint is violated, it may cause undefined beahvior with future
1107    /// users of the string, as it is assumed that this string is always valid UTF-16.
1108    fn as_mut_vec() -> {}
1109}
1110
1111impl Utf16String {
1112    /// Converts a [`u16`] vector of UTF-16 data to a string.
1113    ///
1114    /// Not all slices of [`u16`] values are valid to convert, since [`Utf16String`] requires that
1115    /// it is always valid UTF-16. This function checks to ensure that the values are valid UTF-16,
1116    /// and then does the conversion. This does not do any copying.
1117    ///
1118    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1119    /// the validity check, there is an unsafe version of this function,
1120    /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1121    /// the check.
1122    ///
1123    /// If you need a string slice, consider using [`Utf16Str::from_slice`] instead.
1124    ///
1125    /// The inverse of this method is [`into_vec`][Self::into_vec].
1126    ///
1127    /// # Errors
1128    ///
1129    /// Returns an error if the vector is not UTF-16 with a description as to why the provided
1130    /// vector is not UTF-16. The error will contain the original [`Vec`] that can be reclaimed with
1131    /// [`into_vec`][Utf16Error::into_vec].
1132    ///
1133    /// # Examples
1134    ///
1135    /// ```
1136    /// use widestring::Utf16String;
1137    ///
1138    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1139    /// let sparkle_heart = Utf16String::from_vec(sparkle_heart).unwrap();
1140    ///
1141    /// assert_eq!("💖", sparkle_heart);
1142    /// ```
1143    ///
1144    /// With incorrect values that return an error:
1145    ///
1146    /// ```
1147    /// use widestring::Utf16String;
1148    ///
1149    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1150    ///
1151    /// assert!(Utf16String::from_vec(sparkle_heart).is_err());
1152    /// ```
1153    pub fn from_vec(v: impl Into<Vec<u16>>) -> Result<Self, Utf16Error> {
1154        let v = validate_utf16_vec(v.into())?;
1155        Ok(unsafe { Self::from_vec_unchecked(v) })
1156    }
1157
1158    /// Converts a slice of [`u16`] data to a string, including invalid characters.
1159    ///
1160    /// Since the given [`u16`] slice may not be valid UTF-16, and [`Utf16String`] requires that
1161    /// it is always valid UTF-16, during the conversion this function replaces any invalid UTF-16
1162    /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1163    /// looks like this: �
1164    ///
1165    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1166    /// the conversion, there is an unsafe version of this function,
1167    /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1168    /// the checks.
1169    ///
1170    /// This function returns a [`Cow<'_, Utf16Str>`][std::borrow::Cow]. If the given slice is
1171    /// invalid UTF-16, then we need to insert our replacement characters which will change the size
1172    /// of the string, and hence, require an owned [`Utf16String`]. But if it's already valid
1173    /// UTF-16, we don't need a new allocation. This return type allows us to handle both cases.
1174    ///
1175    /// # Examples
1176    ///
1177    /// ```
1178    /// # use widestring::utf16str;
1179    /// use widestring::Utf16String;
1180    ///
1181    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1182    /// let sparkle_heart = Utf16String::from_slice_lossy(&sparkle_heart);
1183    ///
1184    /// assert_eq!(utf16str!("💖"), sparkle_heart);
1185    /// ```
1186    ///
1187    /// With incorrect values that return an error:
1188    ///
1189    /// ```
1190    /// # use widestring::utf16str;
1191    /// use widestring::Utf16String;
1192    ///
1193    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1194    /// let sparkle_heart = Utf16String::from_slice_lossy(&sparkle_heart);
1195    ///
1196    /// assert_eq!(utf16str!("\u{fffd}\u{0000}"), sparkle_heart);
1197    /// ```
1198    #[must_use]
1199    pub fn from_slice_lossy(s: &[u16]) -> Cow<'_, Utf16Str> {
1200        match validate_utf16(s) {
1201            // SAFETY: validated as UTF-16
1202            Ok(()) => Cow::Borrowed(unsafe { Utf16Str::from_slice_unchecked(s) }),
1203            Err(e) => {
1204                let mut v = Vec::with_capacity(s.len());
1205                // Valid up until index
1206                v.extend_from_slice(&s[..e.index()]);
1207                let mut index = e.index();
1208                let mut replacement_char = [0; 2];
1209                let replacement_char =
1210                    char::REPLACEMENT_CHARACTER.encode_utf16(&mut replacement_char);
1211                while index < s.len() {
1212                    let u = s[index];
1213                    if is_utf16_surrogate(u) {
1214                        if is_utf16_low_surrogate(u) || index + 1 >= s.len() {
1215                            v.extend_from_slice(replacement_char);
1216                        } else {
1217                            let low = s[index + 1];
1218                            if is_utf16_low_surrogate(low) {
1219                                // Valid surrogate pair
1220                                v.push(u);
1221                                v.push(low);
1222                                index += 1;
1223                            } else {
1224                                v.extend_from_slice(replacement_char);
1225                            }
1226                        }
1227                    } else {
1228                        v.push(u);
1229                    }
1230                    index += 1;
1231                }
1232                // SATEFY: Is now valid UTF-16 with replacement chars
1233                Cow::Owned(unsafe { Self::from_vec_unchecked(v) })
1234            }
1235        }
1236    }
1237
1238    /// Converts a wide string of undefined encoding to a UTF-16 string without checking that the
1239    /// string contains valid UTF-16.
1240    ///
1241    /// See the safe version, [`from_ustring`][Self::from_ustring], for more information.
1242    ///
1243    /// # Safety
1244    ///
1245    /// This function is unsafe because it does not check that the string passed to it is valid
1246    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
1247    /// [`Utf16String`] is always valid UTF-16.
1248    ///
1249    /// # Examples
1250    ///
1251    /// ```
1252    /// use widestring::{U16String, Utf16String};
1253    ///
1254    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1255    /// let sparkle_heart = U16String::from_vec(sparkle_heart);
1256    /// let sparkle_heart = unsafe { Utf16String::from_ustring_unchecked(sparkle_heart) };
1257    ///
1258    /// assert_eq!("💖", sparkle_heart);
1259    /// ```
1260    #[inline]
1261    #[must_use]
1262    pub unsafe fn from_ustring_unchecked(s: impl Into<crate::U16String>) -> Self {
1263        Self::from_vec_unchecked(s.into().into_vec())
1264    }
1265
1266    /// Converts a wide string of undefined encoding into a UTF-16 string.
1267    ///
1268    /// Not all strings with undefined encoding are valid to convert, since [`Utf16String`] requires
1269    /// that it is always valid UTF-16. This function checks to ensure that the string is valid
1270    /// UTF-16, and then does the conversion. This does not do any copying.
1271    ///
1272    /// If you are sure that the string is valid UTF-16, and you don't want to incur the overhead of
1273    /// the validity check, there is an unsafe version of this function,
1274    /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
1275    /// skips the check.
1276    ///
1277    /// If you need a string slice, consider using [`Utf16Str::from_ustr`] instead.
1278    ///
1279    /// # Errors
1280    ///
1281    /// Returns an error if the string is not UTF-16 with a description as to why the provided
1282    /// string is not UTF-16.
1283    ///
1284    /// # Examples
1285    ///
1286    /// ```
1287    /// use widestring::{U16String, Utf16String};
1288    ///
1289    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1290    /// let sparkle_heart = U16String::from_vec(sparkle_heart);
1291    /// let sparkle_heart = Utf16String::from_ustring(sparkle_heart).unwrap();
1292    ///
1293    /// assert_eq!("💖", sparkle_heart);
1294    /// ```
1295    ///
1296    /// With incorrect values that return an error:
1297    ///
1298    /// ```
1299    /// use widestring::{U16String, Utf16String};
1300    ///
1301    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1302    /// let sparkle_heart = U16String::from_vec(sparkle_heart); // Valid for a U16String
1303    ///
1304    /// assert!(Utf16String::from_ustring(sparkle_heart).is_err()); // But not for a Utf16String
1305    /// ```
1306    #[inline]
1307    pub fn from_ustring(s: impl Into<crate::U16String>) -> Result<Self, Utf16Error> {
1308        Self::from_vec(s.into().into_vec())
1309    }
1310
1311    /// Converts a wide string slice of undefined encoding of to a UTF-16 string, including invalid
1312    /// characters.
1313    ///
1314    /// Since the given string slice may not be valid UTF-16, and [`Utf16String`] requires that
1315    /// it is always valid UTF-16, during the conversion this function replaces any invalid UTF-16
1316    /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1317    /// looks like this: �
1318    ///
1319    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1320    /// the conversion, there is an unsafe version of this function,
1321    /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
1322    /// skips the checks.
1323    ///
1324    /// This function returns a [`Cow<'_, Utf16Str>`][std::borrow::Cow]. If the given slice is
1325    /// invalid UTF-16, then we need to insert our replacement characters which will change the size
1326    /// of the string, and hence, require an owned [`Utf16String`]. But if it's already valid
1327    /// UTF-16, we don't need a new allocation. This return type allows us to handle both cases.
1328    ///
1329    /// # Examples
1330    ///
1331    /// ```
1332    /// # use widestring::utf16str;
1333    /// use widestring::{U16Str, Utf16String};
1334    ///
1335    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1336    /// let sparkle_heart = U16Str::from_slice(&sparkle_heart);
1337    /// let sparkle_heart = Utf16String::from_ustr_lossy(sparkle_heart);
1338    ///
1339    /// assert_eq!(utf16str!("💖"), sparkle_heart);
1340    /// ```
1341    ///
1342    /// With incorrect values that return an error:
1343    ///
1344    /// ```
1345    /// # use widestring::utf16str;
1346    /// use widestring::{U16Str, Utf16String};
1347    ///
1348    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1349    /// let sparkle_heart = U16Str::from_slice(&sparkle_heart);
1350    /// let sparkle_heart = Utf16String::from_ustr_lossy(sparkle_heart);
1351    ///
1352    /// assert_eq!(utf16str!("\u{fffd}\u{0000}"), sparkle_heart);
1353    /// ```
1354    #[inline]
1355    #[must_use]
1356    pub fn from_ustr_lossy(s: &crate::U16Str) -> Cow<'_, Utf16Str> {
1357        Self::from_slice_lossy(s.as_slice())
1358    }
1359
1360    /// Converts a wide C string to a UTF-16 string without checking that the string contains
1361    /// valid UTF-16.
1362    ///
1363    /// The resulting string does *not* contain the nul terminator.
1364    ///
1365    /// See the safe version, [`from_ucstring`][Self::from_ucstring], for more information.
1366    ///
1367    /// # Safety
1368    ///
1369    /// This function is unsafe because it does not check that the string passed to it is valid
1370    /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
1371    /// [`Utf16String`] is always valid UTF-16.
1372    ///
1373    /// # Examples
1374    ///
1375    /// ```
1376    /// use widestring::{U16CString, Utf16String};
1377    ///
1378    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1379    /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap();
1380    /// let sparkle_heart = unsafe { Utf16String::from_ucstring_unchecked(sparkle_heart) };
1381    ///
1382    /// assert_eq!("💖", sparkle_heart);
1383    /// ```
1384    #[inline]
1385    #[must_use]
1386    pub unsafe fn from_ucstring_unchecked(s: impl Into<crate::U16CString>) -> Self {
1387        Self::from_vec_unchecked(s.into().into_vec())
1388    }
1389
1390    /// Converts a wide C string into a UTF-16 string.
1391    ///
1392    /// The resulting string does *not* contain the nul terminator.
1393    ///
1394    /// Not all wide C strings are valid to convert, since [`Utf16String`] requires that
1395    /// it is always valid UTF-16. This function checks to ensure that the string is valid UTF-16,
1396    /// and then does the conversion. This does not do any copying.
1397    ///
1398    /// If you are sure that the string is valid UTF-16, and you don't want to incur the overhead of
1399    /// the validity check, there is an unsafe version of this function,
1400    /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
1401    /// skips the check.
1402    ///
1403    /// If you need a string slice, consider using [`Utf16Str::from_ucstr`] instead.
1404    ///
1405    /// # Errors
1406    ///
1407    /// Returns an error if the string is not UTF-16 with a description as to why the provided
1408    /// string is not UTF-16.
1409    ///
1410    /// # Examples
1411    ///
1412    /// ```
1413    /// use widestring::{U16CString, Utf16String};
1414    ///
1415    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1416    /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap();
1417    /// let sparkle_heart = Utf16String::from_ucstring(sparkle_heart).unwrap();
1418    ///
1419    /// assert_eq!("💖", sparkle_heart);
1420    /// ```
1421    ///
1422    /// With incorrect values that return an error:
1423    ///
1424    /// ```
1425    /// use widestring::{U16CString, Utf16String};
1426    ///
1427    /// let sparkle_heart = vec![0xd83d]; // This is an invalid unpaired surrogate
1428    /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap(); // Valid for a U16CString
1429    ///
1430    /// assert!(Utf16String::from_ucstring(sparkle_heart).is_err()); // But not for a Utf16String
1431    /// ```
1432    #[inline]
1433    pub fn from_ucstring(s: impl Into<crate::U16CString>) -> Result<Self, Utf16Error> {
1434        Self::from_vec(s.into().into_vec())
1435    }
1436
1437    /// Converts a wide C string slice of to a UTF-16 string, including invalid characters.
1438    ///
1439    /// The resulting string does *not* contain the nul terminator.
1440    ///
1441    /// Since the given string slice may not be valid UTF-16, and [`Utf16String`] requires that
1442    /// it is always valid UTF-16, during the conversion this function replaces any invalid UTF-16
1443    /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1444    /// looks like this: �
1445    ///
1446    /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1447    /// the conversion, there is an unsafe version of this function,
1448    /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
1449    /// skips the checks.
1450    ///
1451    /// This function returns a [`Cow<'_, Utf16Str>`][std::borrow::Cow]. If the given slice is
1452    /// invalid UTF-16, then we need to insert our replacement characters which will change the size
1453    /// of the string, and hence, require an owned [`Utf16String`]. But if it's already valid
1454    /// UTF-16, we don't need a new allocation. This return type allows us to handle both cases.
1455    ///
1456    /// # Examples
1457    ///
1458    /// ```
1459    /// # use widestring::utf16str;
1460    /// use widestring::{U16CStr, Utf16String};
1461    ///
1462    /// let sparkle_heart = vec![0xd83d, 0xdc96, 0x0]; // Raw surrogate pair
1463    /// let sparkle_heart = U16CStr::from_slice(&sparkle_heart).unwrap();
1464    /// let sparkle_heart = Utf16String::from_ucstr_lossy(sparkle_heart);
1465    ///
1466    /// assert_eq!(utf16str!("💖"), sparkle_heart);
1467    /// ```
1468    ///
1469    /// With incorrect values that return an error:
1470    ///
1471    /// ```
1472    /// # use widestring::utf16str;
1473    /// use widestring::{U16CStr, Utf16String};
1474    ///
1475    /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1476    /// let sparkle_heart = U16CStr::from_slice(&sparkle_heart).unwrap();
1477    /// let sparkle_heart = Utf16String::from_ucstr_lossy(sparkle_heart);
1478    ///
1479    /// assert_eq!(utf16str!("\u{fffd}"), sparkle_heart);
1480    /// ```
1481    #[inline]
1482    #[must_use]
1483    pub fn from_ucstr_lossy(s: &crate::U16CStr) -> Cow<'_, Utf16Str> {
1484        Self::from_slice_lossy(s.as_slice())
1485    }
1486
1487    /// Appends the given [`char`] to the end of this string.
1488    ///
1489    /// # Examples
1490    ///
1491    /// ```
1492    /// use widestring::Utf16String;
1493    /// let mut s = Utf16String::from_str("abc");
1494    ///
1495    /// s.push('1');
1496    /// s.push('2');
1497    /// s.push('3');
1498    ///
1499    /// assert_eq!("abc123", s);
1500    /// ```
1501    #[inline]
1502    pub fn push(&mut self, ch: char) {
1503        let mut buf = [0; 2];
1504        self.inner.extend_from_slice(ch.encode_utf16(&mut buf))
1505    }
1506
1507    /// Shortens this string to the specified length.
1508    ///
1509    /// If `new_len` is greater than the string's current length, this has no effect.
1510    ///
1511    /// Note that this method has no effect on the allocated capacity of the string.
1512    ///
1513    /// # Panics
1514    ///
1515    /// Panics if `new_len` does not lie on a [`char`] boundary.
1516    ///
1517    /// # Examples
1518    ///
1519    /// ```
1520    /// use widestring::Utf16String;
1521    /// let mut s = Utf16String::from_str("hello");
1522    /// s.truncate(2);
1523    /// assert_eq!("he", s);
1524    /// ```
1525    #[inline]
1526    pub fn truncate(&mut self, new_len: usize) {
1527        if new_len <= self.len() {
1528            assert!(self.is_char_boundary(new_len));
1529            self.inner.truncate(new_len)
1530        }
1531    }
1532
1533    /// Removes the last character from the string buffer and returns it.
1534    ///
1535    /// Returns [`None`] if this string is empty.
1536    ///
1537    /// # Examples
1538    ///
1539    /// ```
1540    /// use widestring::Utf16String;
1541    /// let mut s = Utf16String::from_str("foo𝄞");
1542    ///
1543    /// assert_eq!(s.pop(), Some('𝄞'));
1544    /// assert_eq!(s.pop(), Some('o'));
1545    /// assert_eq!(s.pop(), Some('o'));
1546    /// assert_eq!(s.pop(), Some('f'));
1547    ///
1548    /// assert_eq!(s.pop(), None);
1549    /// ```
1550    pub fn pop(&mut self) -> Option<char> {
1551        let c = self.inner.pop();
1552        if let Some(c) = c {
1553            if is_utf16_low_surrogate(c) {
1554                let high = self.inner.pop().unwrap();
1555                // SAFETY: string is always valid UTF-16, so pair is valid
1556                Some(unsafe { decode_utf16_surrogate_pair(high, c) })
1557            } else {
1558                // SAFETY: not a surrogate
1559                Some(unsafe { char::from_u32_unchecked(c as u32) })
1560            }
1561        } else {
1562            None
1563        }
1564    }
1565
1566    /// Removes a [`char`] from this string at an offset and returns it.
1567    ///
1568    /// This is an _O(n)_ operation, as it requires copying every element in the buffer.
1569    ///
1570    /// # Panics
1571    ///
1572    /// Panics if `idx` is larger than or equal to the string's length, or if it does not lie on a
1573    /// [`char`] boundary.
1574    ///
1575    /// # Examples
1576    ///
1577    /// ```
1578    /// use widestring::Utf16String;
1579    /// let mut s = Utf16String::from_str("𝄞foo");
1580    ///
1581    /// assert_eq!(s.remove(0), '𝄞');
1582    /// assert_eq!(s.remove(1), 'o');
1583    /// assert_eq!(s.remove(0), 'f');
1584    /// assert_eq!(s.remove(0), 'o');
1585    /// ```
1586    #[inline]
1587    pub fn remove(&mut self, idx: usize) -> char {
1588        let c = self[idx..].chars().next().unwrap();
1589        let next = idx + c.len_utf16();
1590        let len = self.len();
1591        unsafe {
1592            ptr::copy(
1593                self.inner.as_ptr().add(next),
1594                self.inner.as_mut_ptr().add(idx),
1595                len - next,
1596            );
1597            self.inner.set_len(len - (next - idx));
1598        }
1599        c
1600    }
1601
1602    /// Retains only the characters specified by the predicate.
1603    ///
1604    /// In other words, remove all characters `c` such that `f(c)` returns `false`. This method
1605    /// operates in place, visiting each character exactly once in the original order, and preserves
1606    /// the order of the retained characters.
1607    ///
1608    /// # Examples
1609    ///
1610    /// ```
1611    /// use widestring::Utf16String;
1612    /// let mut s = Utf16String::from_str("f_o_ob_ar");
1613    ///
1614    /// s.retain(|c| c != '_');
1615    ///
1616    /// assert_eq!(s, "foobar");
1617    /// ```
1618    ///
1619    /// Because the elements are visited exactly once in the original order, external state may be
1620    /// used to decide which elements to keep.
1621    ///
1622    /// ```
1623    /// use widestring::Utf16String;
1624    /// let mut s = Utf16String::from_str("abcde");
1625    /// let keep = [false, true, true, false, true];
1626    /// let mut iter = keep.iter();
1627    /// s.retain(|_| *iter.next().unwrap());
1628    /// assert_eq!(s, "bce");
1629    /// ```
1630    pub fn retain<F>(&mut self, mut f: F)
1631    where
1632        F: FnMut(char) -> bool,
1633    {
1634        let mut index = 0;
1635        while index < self.len() {
1636            // SAFETY: always in bounds and incremented by len_utf16 only
1637            let c = unsafe { self.get_unchecked(index..) }
1638                .chars()
1639                .next()
1640                .unwrap();
1641            if !f(c) {
1642                self.inner.drain(index..index + c.len_utf16());
1643            } else {
1644                index += c.len_utf16();
1645            }
1646        }
1647    }
1648
1649    /// Inserts a character into this string at an offset.
1650    ///
1651    /// This is an _O(n)_ operation as it requires copying every element in the buffer.
1652    ///
1653    /// # Panics
1654    ///
1655    /// Panics if `idx` is larger than the string's length, or if it does not lie on a [`char`]
1656    /// boundary.
1657    ///
1658    /// # Examples
1659    ///
1660    /// ```
1661    /// use widestring::Utf16String;
1662    /// let mut s = Utf16String::with_capacity(5);
1663    ///
1664    /// s.insert(0, '𝄞');
1665    /// s.insert(0, 'f');
1666    /// s.insert(1, 'o');
1667    /// s.insert(4, 'o');
1668    ///
1669    /// assert_eq!("fo𝄞o", s);
1670    /// ```
1671    #[inline]
1672    pub fn insert(&mut self, idx: usize, ch: char) {
1673        assert!(self.is_char_boundary(idx));
1674        let mut bits = [0; 2];
1675        let bits = ch.encode_utf16(&mut bits);
1676
1677        unsafe {
1678            self.insert_slice(idx, bits);
1679        }
1680    }
1681
1682    /// Inserts a UTF-16 string slice into this string at an offset.
1683    ///
1684    /// This is an _O(n)_ operation as it requires copying every element in the buffer.
1685    ///
1686    /// # Panics
1687    ///
1688    /// Panics if `idx` is larger than the string's length, or if it does not lie on a [`char`]
1689    /// boundary.
1690    ///
1691    /// # Examples
1692    ///
1693    /// ```
1694    /// # use widestring::utf16str;
1695    /// use widestring::Utf16String;
1696    /// let mut s = Utf16String::from_str("bar");
1697    ///
1698    /// s.insert_utfstr(0, utf16str!("foo"));
1699    ///
1700    /// assert_eq!("foobar", s);
1701    /// ```
1702    #[inline]
1703    pub fn insert_utfstr(&mut self, idx: usize, string: &Utf16Str) {
1704        assert!(self.is_char_boundary(idx));
1705
1706        unsafe {
1707            self.insert_slice(idx, string.as_slice());
1708        }
1709    }
1710
1711    /// Splits the string into two at the given index.
1712    ///
1713    /// Returns a newly allocated string. `self` contains elements [0, at), and the returned string
1714    /// contains elements [at, len). `at` must be on the boundary of a UTF-16 code point.
1715    ///
1716    /// Note that the capacity of `self` does not change.
1717    ///
1718    /// # Panics
1719    ///
1720    /// Panics if `at` is not on a UTF-16 code point boundary, or if it is beyond the last code
1721    /// point of the string.
1722    ///
1723    /// # Examples
1724    ///
1725    /// ```
1726    /// use widestring::Utf16String;
1727    /// let mut hello = Utf16String::from_str("Hello, World!");
1728    /// let world = hello.split_off(7);
1729    /// assert_eq!(hello, "Hello, ");
1730    /// assert_eq!(world, "World!");
1731    /// ```
1732    #[inline]
1733    #[must_use]
1734    pub fn split_off(&mut self, at: usize) -> Self {
1735        assert!(self.is_char_boundary(at));
1736        unsafe { Self::from_vec_unchecked(self.inner.split_off(at)) }
1737    }
1738
1739    /// Creates a draining iterator that removes the specified range in the string and yields the
1740    /// removed [`char`]s.
1741    ///
1742    /// Note: The element range is removed even if the iterator is not consumed until the end.
1743    ///
1744    /// # Panics
1745    ///
1746    /// Panics if the starting point or end point do not lie on a [`char`] boundary, or if they're
1747    /// out of bounds.
1748    ///
1749    /// # Examples
1750    ///
1751    /// Basic usage:
1752    ///
1753    /// ```
1754    /// use widestring::Utf16String;
1755    /// let mut s = Utf16String::from_str("α is alpha, β is beta");
1756    /// let beta_offset = 12;
1757    ///
1758    /// // Remove the range up until the β from the string
1759    /// let t: Utf16String = s.drain(..beta_offset).collect();
1760    /// assert_eq!(t, "α is alpha, ");
1761    /// assert_eq!(s, "β is beta");
1762    ///
1763    /// // A full range clears the string
1764    /// s.drain(..);
1765    /// assert_eq!(s, "");
1766    /// ```
1767    pub fn drain<R>(&mut self, range: R) -> DrainUtf16<'_>
1768    where
1769        R: RangeBounds<usize>,
1770    {
1771        // WARNING: Using range again would be unsound
1772        // TODO: replace with core::slice::range when it is stabilized
1773        let core::ops::Range { start, end } = crate::range(range, ..self.len());
1774        assert!(self.is_char_boundary(start));
1775        assert!(self.is_char_boundary(end));
1776
1777        // Take out two simultaneous borrows. The self_ptr won't be accessed
1778        // until iteration is over, in Drop.
1779        let self_ptr: *mut _ = self;
1780        // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks.
1781        let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
1782
1783        DrainUtf16 {
1784            start,
1785            end,
1786            iter: chars_iter,
1787            string: self_ptr,
1788        }
1789    }
1790
1791    /// Removes the specified range in the string, and replaces it with the given string.
1792    ///
1793    /// The given string doesn't need to be the same length as the range.
1794    ///
1795    /// # Panics
1796    ///
1797    /// Panics if the starting point or end point do not lie on a [`char`] boundary, or if they're
1798    /// out of bounds.
1799    ///
1800    /// # Examples
1801    ///
1802    /// Basic usage:
1803    ///
1804    /// ```
1805    /// use widestring::{utf16str, Utf16String};
1806    /// let mut s = Utf16String::from_str("α is alpha, β is beta");
1807    /// let beta_offset = 12;
1808    ///
1809    /// // Replace the range up until the β from the string
1810    /// s.replace_range(..beta_offset, utf16str!("Α is capital alpha; "));
1811    /// assert_eq!(s, "Α is capital alpha; β is beta");
1812    /// ```
1813    pub fn replace_range<R>(&mut self, range: R, replace_with: &Utf16Str)
1814    where
1815        R: RangeBounds<usize>,
1816    {
1817        use core::ops::Bound::*;
1818
1819        // WARNING: Using range again would be unsound
1820        let start = range.start_bound();
1821        match start {
1822            Included(&n) => assert!(self.is_char_boundary(n)),
1823            Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1824            Unbounded => {}
1825        };
1826        // WARNING: Inlining this variable would be unsound
1827        let end = range.end_bound();
1828        match end {
1829            Included(&n) => assert!(self.is_char_boundary(n + 1)),
1830            Excluded(&n) => assert!(self.is_char_boundary(n)),
1831            Unbounded => {}
1832        };
1833
1834        // Using `range` again would be unsound
1835        // We assume the bounds reported by `range` remain the same, but
1836        // an adversarial implementation could change between calls
1837        self.inner
1838            .splice((start, end), replace_with.as_slice().iter().copied());
1839    }
1840}
1841
1842impl Utf32String {
1843    /// Converts a [`u32`] vector of UTF-32 data to a string.
1844    ///
1845    /// Not all slices of [`u32`] values are valid to convert, since [`Utf32String`] requires that
1846    /// it is always valid UTF-32. This function checks to ensure that the values are valid UTF-32,
1847    /// and then does the conversion. This does not do any copying.
1848    ///
1849    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1850    /// the validity check, there is an unsafe version of this function,
1851    /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1852    /// the check.
1853    ///
1854    /// If you need a string slice, consider using [`Utf32Str::from_slice`] instead.
1855    ///
1856    /// The inverse of this method is [`into_vec`][Self::into_vec].
1857    ///
1858    /// # Errors
1859    ///
1860    /// Returns an error if the vector is not UTF-32 with a description as to why the provided
1861    /// vector is not UTF-32. The error will contain the original [`Vec`] that can be reclaimed with
1862    /// [`into_vec`][Utf32Error::into_vec].
1863    ///
1864    /// # Examples
1865    ///
1866    /// ```
1867    /// use widestring::Utf32String;
1868    ///
1869    /// let sparkle_heart = vec![0x1f496];
1870    /// let sparkle_heart = Utf32String::from_vec(sparkle_heart).unwrap();
1871    ///
1872    /// assert_eq!("💖", sparkle_heart);
1873    /// ```
1874    ///
1875    /// With incorrect values that return an error:
1876    ///
1877    /// ```
1878    /// use widestring::Utf32String;
1879    ///
1880    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1881    ///
1882    /// assert!(Utf32String::from_vec(sparkle_heart).is_err());
1883    /// ```
1884    pub fn from_vec(v: impl Into<Vec<u32>>) -> Result<Self, Utf32Error> {
1885        let v = validate_utf32_vec(v.into())?;
1886        Ok(unsafe { Self::from_vec_unchecked(v) })
1887    }
1888
1889    /// Converts a slice of [`u32`] data to a string, including invalid characters.
1890    ///
1891    /// Since the given [`u32`] slice may not be valid UTF-32, and [`Utf32String`] requires that
1892    /// it is always valid UTF-32, during the conversion this function replaces any invalid UTF-32
1893    /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1894    /// looks like this: �
1895    ///
1896    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1897    /// the conversion, there is an unsafe version of this function,
1898    /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1899    /// the checks.
1900    ///
1901    /// This function returns a [`Cow<'_, Utf32Str>`][std::borrow::Cow]. If the given slice is
1902    /// invalid UTF-32, then we need to insert our replacement characters which will change the size
1903    /// of the string, and hence, require an owned [`Utf32String`]. But if it's already valid
1904    /// UTF-32, we don't need a new allocation. This return type allows us to handle both cases.
1905    ///
1906    /// # Examples
1907    ///
1908    /// ```
1909    /// # use widestring::utf32str;
1910    /// use widestring::Utf32String;
1911    ///
1912    /// let sparkle_heart = vec![0x1f496];
1913    /// let sparkle_heart = Utf32String::from_slice_lossy(&sparkle_heart);
1914    ///
1915    /// assert_eq!(utf32str!("💖"), sparkle_heart);
1916    /// ```
1917    ///
1918    /// With incorrect values that return an error:
1919    ///
1920    /// ```
1921    /// # use widestring::utf32str;
1922    /// use widestring::Utf32String;
1923    ///
1924    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1925    /// let sparkle_heart = Utf32String::from_slice_lossy(&sparkle_heart);
1926    ///
1927    /// assert_eq!(utf32str!("\u{fffd}\u{fffd}"), sparkle_heart);
1928    /// ```
1929    #[must_use]
1930    pub fn from_slice_lossy(s: &[u32]) -> Cow<'_, Utf32Str> {
1931        match validate_utf32(s) {
1932            // SAFETY: validated as UTF-32
1933            Ok(()) => Cow::Borrowed(unsafe { Utf32Str::from_slice_unchecked(s) }),
1934            Err(e) => {
1935                let mut v = Vec::with_capacity(s.len());
1936                // Valid up until index
1937                v.extend_from_slice(&s[..e.index()]);
1938                for u in s[e.index()..].iter().copied() {
1939                    if char::from_u32(u).is_some() {
1940                        v.push(u);
1941                    } else {
1942                        v.push(char::REPLACEMENT_CHARACTER as u32);
1943                    }
1944                }
1945                // SATEFY: Is now valid UTF-32 with replacement chars
1946                Cow::Owned(unsafe { Self::from_vec_unchecked(v) })
1947            }
1948        }
1949    }
1950
1951    /// Converts a wide string of undefined encoding to a UTF-32 string without checking that the
1952    /// string contains valid UTF-32.
1953    ///
1954    /// See the safe version, [`from_ustring`][Self::from_ustring], for more information.
1955    ///
1956    /// # Safety
1957    ///
1958    /// This function is unsafe because it does not check that the string passed to it is valid
1959    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
1960    /// [`Utf32String`] is always valid UTF-32.
1961    ///
1962    /// # Examples
1963    ///
1964    /// ```
1965    /// use widestring::{U32String, Utf32String};
1966    ///
1967    /// let sparkle_heart = vec![0x1f496];
1968    /// let sparkle_heart = U32String::from_vec(sparkle_heart);
1969    /// let sparkle_heart = unsafe { Utf32String::from_ustring_unchecked(sparkle_heart) };
1970    ///
1971    /// assert_eq!("💖", sparkle_heart);
1972    /// ```
1973    #[inline]
1974    #[must_use]
1975    pub unsafe fn from_ustring_unchecked(s: impl Into<crate::U32String>) -> Self {
1976        Self::from_vec_unchecked(s.into().into_vec())
1977    }
1978
1979    /// Converts a wide string of undefined encoding string into a UTF-32 string.
1980    ///
1981    /// Not all strings of undefined encoding are valid to convert, since [`Utf32String`] requires
1982    /// that it is always valid UTF-32. This function checks to ensure that the string is valid
1983    /// UTF-32, and then does the conversion. This does not do any copying.
1984    ///
1985    /// If you are sure that the string is valid UTF-32, and you don't want to incur the overhead of
1986    /// the validity check, there is an unsafe version of this function,
1987    /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
1988    /// skips the check.
1989    ///
1990    /// If you need a string slice, consider using [`Utf32Str::from_ustr`] instead.
1991    ///
1992    /// # Errors
1993    ///
1994    /// Returns an error if the string is not UTF-32 with a description as to why the provided
1995    /// string is not UTF-32.
1996    ///
1997    /// # Examples
1998    ///
1999    /// ```
2000    /// use widestring::{U32String, Utf32String};
2001    ///
2002    /// let sparkle_heart = vec![0x1f496];
2003    /// let sparkle_heart = U32String::from_vec(sparkle_heart);
2004    /// let sparkle_heart = Utf32String::from_ustring(sparkle_heart).unwrap();
2005    ///
2006    /// assert_eq!("💖", sparkle_heart);
2007    /// ```
2008    ///
2009    /// With incorrect values that return an error:
2010    ///
2011    /// ```
2012    /// use widestring::{U32String, Utf32String};
2013    ///
2014    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
2015    /// let sparkle_heart = U32String::from_vec(sparkle_heart); // Valid for a U32String
2016    ///
2017    /// assert!(Utf32String::from_ustring(sparkle_heart).is_err()); // But not for a Utf32String
2018    /// ```
2019    #[inline]
2020    pub fn from_ustring(s: impl Into<crate::U32String>) -> Result<Self, Utf32Error> {
2021        Self::from_vec(s.into().into_vec())
2022    }
2023
2024    /// Converts a wide string slice of undefined encoding to a UTF-32 string, including invalid
2025    /// characters.
2026    ///
2027    /// Since the given string slice may not be valid UTF-32, and [`Utf32String`] requires that
2028    /// it is always valid UTF-32, during the conversion this function replaces any invalid UTF-32
2029    /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
2030    /// looks like this: �
2031    ///
2032    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
2033    /// the conversion, there is an unsafe version of this function,
2034    /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
2035    /// skips the checks.
2036    ///
2037    /// This function returns a [`Cow<'_, Utf32Str>`][std::borrow::Cow]. If the given slice is
2038    /// invalid UTF-32, then we need to insert our replacement characters which will change the size
2039    /// of the string, and hence, require an owned [`Utf32String`]. But if it's already valid
2040    /// UTF-32, we don't need a new allocation. This return type allows us to handle both cases.
2041    ///
2042    /// # Examples
2043    ///
2044    /// ```
2045    /// # use widestring::utf32str;
2046    /// use widestring::{U32Str, Utf32String};
2047    ///
2048    /// let sparkle_heart = vec![0x1f496];
2049    /// let sparkle_heart = U32Str::from_slice(&sparkle_heart);
2050    /// let sparkle_heart = Utf32String::from_ustr_lossy(sparkle_heart);
2051    ///
2052    /// assert_eq!(utf32str!("💖"), sparkle_heart);
2053    /// ```
2054    ///
2055    /// With incorrect values that return an error:
2056    ///
2057    /// ```
2058    /// # use widestring::utf32str;
2059    /// use widestring::{U32Str, Utf32String};
2060    ///
2061    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
2062    /// let sparkle_heart = U32Str::from_slice(&sparkle_heart);
2063    /// let sparkle_heart = Utf32String::from_ustr_lossy(sparkle_heart);
2064    ///
2065    /// assert_eq!(utf32str!("\u{fffd}\u{fffd}"), sparkle_heart);
2066    /// ```
2067    #[inline]
2068    #[must_use]
2069    pub fn from_ustr_lossy(s: &crate::U32Str) -> Cow<'_, Utf32Str> {
2070        Self::from_slice_lossy(s.as_slice())
2071    }
2072
2073    /// Converts a wide C string to a UTF-32 string without checking that the string contains
2074    /// valid UTF-32.
2075    ///
2076    /// The resulting string does *not* contain the nul terminator.
2077    ///
2078    /// See the safe version, [`from_ucstring`][Self::from_ucstring], for more information.
2079    ///
2080    /// # Safety
2081    ///
2082    /// This function is unsafe because it does not check that the string passed to it is valid
2083    /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
2084    /// [`Utf32String`] is always valid UTF-32.
2085    ///
2086    /// # Examples
2087    ///
2088    /// ```
2089    /// use widestring::{U32CString, Utf32String};
2090    ///
2091    /// let sparkle_heart = vec![0x1f496];
2092    /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap();
2093    /// let sparkle_heart = unsafe { Utf32String::from_ucstring_unchecked(sparkle_heart) };
2094    ///
2095    /// assert_eq!("💖", sparkle_heart);
2096    /// ```
2097    #[inline]
2098    #[must_use]
2099    pub unsafe fn from_ucstring_unchecked(s: impl Into<crate::U32CString>) -> Self {
2100        Self::from_vec_unchecked(s.into().into_vec())
2101    }
2102
2103    /// Converts a wide C string into a UTF-32 string.
2104    ///
2105    /// The resulting string does *not* contain the nul terminator.
2106    ///
2107    /// Not all wide C strings are valid to convert, since [`Utf32String`] requires that
2108    /// it is always valid UTF-32. This function checks to ensure that the string is valid UTF-32,
2109    /// and then does the conversion. This does not do any copying.
2110    ///
2111    /// If you are sure that the string is valid UTF-32, and you don't want to incur the overhead of
2112    /// the validity check, there is an unsafe version of this function,
2113    /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
2114    /// skips the check.
2115    ///
2116    /// If you need a string slice, consider using [`Utf32Str::from_ucstr`] instead.
2117    ///
2118    /// # Errors
2119    ///
2120    /// Returns an error if the string is not UTF-32 with a description as to why the provided
2121    /// string is not UTF-32.
2122    ///
2123    /// # Examples
2124    ///
2125    /// ```
2126    /// use widestring::{U32CString, Utf32String};
2127    ///
2128    /// let sparkle_heart = vec![0x1f496];
2129    /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap();
2130    /// let sparkle_heart = Utf32String::from_ucstring(sparkle_heart).unwrap();
2131    ///
2132    /// assert_eq!("💖", sparkle_heart);
2133    /// ```
2134    ///
2135    /// With incorrect values that return an error:
2136    ///
2137    /// ```
2138    /// use widestring::{U32CString, Utf32String};
2139    ///
2140    /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
2141    /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap(); // Valid for a U32CString
2142    ///
2143    /// assert!(Utf32String::from_ucstring(sparkle_heart).is_err()); // But not for a Utf32String
2144    /// ```
2145    #[inline]
2146    pub fn from_ucstring(s: impl Into<crate::U32CString>) -> Result<Self, Utf32Error> {
2147        Self::from_vec(s.into().into_vec())
2148    }
2149
2150    /// Converts a wide C string slice of to a UTF-32 string, including invalid characters.
2151    ///
2152    /// The resulting string does *not* contain the nul terminator.
2153    ///
2154    /// Since the given string slice may not be valid UTF-32, and [`Utf32String`] requires that
2155    /// it is always valid UTF-32, during the conversion this function replaces any invalid UTF-32
2156    /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
2157    /// looks like this: �
2158    ///
2159    /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
2160    /// the conversion, there is an unsafe version of this function,
2161    /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
2162    /// skips the checks.
2163    ///
2164    /// This function returns a [`Cow<'_, Utf32Str>`][std::borrow::Cow]. If the given slice is
2165    /// invalid UTF-32, then we need to insert our replacement characters which will change the size
2166    /// of the string, and hence, require an owned [`Utf32String`]. But if it's already valid
2167    /// UTF-32, we don't need a new allocation. This return type allows us to handle both cases.
2168    ///
2169    /// # Examples
2170    ///
2171    /// ```
2172    /// # use widestring::utf32str;
2173    /// use widestring::{U32CStr, Utf32String};
2174    ///
2175    /// let sparkle_heart = vec![0x1f496, 0x0];
2176    /// let sparkle_heart = U32CStr::from_slice(&sparkle_heart).unwrap();
2177    /// let sparkle_heart = Utf32String::from_ucstr_lossy(sparkle_heart);
2178    ///
2179    /// assert_eq!(utf32str!("💖"), sparkle_heart);
2180    /// ```
2181    ///
2182    /// With incorrect values that return an error:
2183    ///
2184    /// ```
2185    /// # use widestring::utf32str;
2186    /// use widestring::{U32CStr, Utf32String};
2187    ///
2188    /// let sparkle_heart = vec![0xd83d, 0xdc96, 0x0]; // UTF-16 surrogates are invalid
2189    /// let sparkle_heart = U32CStr::from_slice(&sparkle_heart).unwrap();
2190    /// let sparkle_heart = Utf32String::from_ucstr_lossy(sparkle_heart);
2191    ///
2192    /// assert_eq!(utf32str!("\u{fffd}\u{fffd}"), sparkle_heart);
2193    /// ```
2194    #[inline]
2195    #[must_use]
2196    pub fn from_ucstr_lossy(s: &crate::U32CStr) -> Cow<'_, Utf32Str> {
2197        Self::from_slice_lossy(s.as_slice())
2198    }
2199
2200    /// Converts a vector of [`char`]s into a UTF-32 string.
2201    ///
2202    /// Since [`char`]s are always valid UTF-32, this is infallible and efficient.
2203    ///
2204    /// If you need a string slice, consider using [`Utf32Str::from_char_slice`] instead.
2205    ///
2206    /// # Examples
2207    ///
2208    /// ```
2209    /// use widestring::{U32CString, Utf32String};
2210    ///
2211    /// let sparkle_heart = vec!['💖'];
2212    /// let sparkle_heart = Utf32String::from_chars(sparkle_heart);
2213    ///
2214    /// assert_eq!("💖", sparkle_heart);
2215    /// ```
2216    #[inline]
2217    #[must_use]
2218    pub fn from_chars(s: impl Into<Vec<char>>) -> Self {
2219        // SAFETY: Char slices are always valid UTF-32
2220        // TODO: replace mem:transmute when Vec::into_raw_parts is stabilized
2221        // Clippy reports this is unsound due to different sized types; but the sizes are the same
2222        // size. Still best to swap to Vec::into_raw_parts asap.
2223        #[allow(clippy::unsound_collection_transmute)]
2224        unsafe {
2225            let vec: Vec<u32> = mem::transmute(s.into());
2226            Self::from_vec_unchecked(vec)
2227        }
2228    }
2229
2230    /// Appends the given [`char`] to the end of this string.
2231    ///
2232    /// # Examples
2233    ///
2234    /// ```
2235    /// use widestring::Utf32String;
2236    /// let mut s = Utf32String::from_str("abc");
2237    ///
2238    /// s.push('1');
2239    /// s.push('2');
2240    /// s.push('3');
2241    ///
2242    /// assert_eq!("abc123", s);
2243    /// ```
2244    #[inline]
2245    pub fn push(&mut self, ch: char) {
2246        self.inner.push(ch.into())
2247    }
2248
2249    /// Shortens this string to the specified length.
2250    ///
2251    /// If `new_len` is greater than the string's current length, this has no effect.
2252    ///
2253    /// Note that this method has no effect on the allocated capacity of the string.
2254    ///
2255    /// # Examples
2256    ///
2257    /// ```
2258    /// use widestring::Utf32String;
2259    /// let mut s = Utf32String::from_str("hello");
2260    /// s.truncate(2);
2261    /// assert_eq!("he", s);
2262    /// ```
2263    #[inline]
2264    pub fn truncate(&mut self, new_len: usize) {
2265        self.inner.truncate(new_len)
2266    }
2267
2268    /// Removes the last character from the string buffer and returns it.
2269    ///
2270    /// Returns [`None`] if this string is empty.
2271    ///
2272    /// # Examples
2273    ///
2274    /// ```
2275    /// use widestring::Utf32String;
2276    /// let mut s = Utf32String::from_str("foo");
2277    ///
2278    /// assert_eq!(s.pop(), Some('o'));
2279    /// assert_eq!(s.pop(), Some('o'));
2280    /// assert_eq!(s.pop(), Some('f'));
2281    ///
2282    /// assert_eq!(s.pop(), None);
2283    /// ```
2284    #[inline]
2285    pub fn pop(&mut self) -> Option<char> {
2286        // SAFETY: String is already valid UTF-32
2287        self.inner
2288            .pop()
2289            .map(|c| unsafe { core::char::from_u32_unchecked(c) })
2290    }
2291
2292    /// Removes a [`char`] from this string at an offset and returns it.
2293    ///
2294    /// This is an _O(n)_ operation, as it requires copying every element in the buffer.
2295    ///
2296    /// # Panics
2297    ///
2298    /// Panics if `idx` is larger than or equal to the string's length.
2299    ///
2300    /// # Examples
2301    ///
2302    /// ```
2303    /// use widestring::Utf32String;
2304    /// let mut s = Utf32String::from_str("foo");
2305    ///
2306    /// assert_eq!(s.remove(1), 'o');
2307    /// assert_eq!(s.remove(0), 'f');
2308    /// assert_eq!(s.remove(0), 'o');
2309    /// ```
2310    #[inline]
2311    pub fn remove(&mut self, idx: usize) -> char {
2312        let next = idx + 1;
2313        let len = self.len();
2314        unsafe {
2315            let c = core::char::from_u32_unchecked(self.inner[idx]);
2316            ptr::copy(
2317                self.inner.as_ptr().add(next),
2318                self.inner.as_mut_ptr().add(idx),
2319                len - next,
2320            );
2321            self.inner.set_len(len - (next - idx));
2322            c
2323        }
2324    }
2325
2326    /// Retains only the characters specified by the predicate.
2327    ///
2328    /// In other words, remove all characters `c` such that `f(c)` returns `false`. This method
2329    /// operates in place, visiting each character exactly once in the original order, and preserves
2330    /// the order of the retained characters.
2331    ///
2332    /// # Examples
2333    ///
2334    /// ```
2335    /// use widestring::Utf32String;
2336    /// let mut s = Utf32String::from_str("f_o_ob_ar");
2337    ///
2338    /// s.retain(|c| c != '_');
2339    ///
2340    /// assert_eq!(s, "foobar");
2341    /// ```
2342    ///
2343    /// Because the elements are visited exactly once in the original order, external state may be
2344    /// used to decide which elements to keep.
2345    ///
2346    /// ```
2347    /// use widestring::Utf32String;
2348    /// let mut s = Utf32String::from_str("abcde");
2349    /// let keep = [false, true, true, false, true];
2350    /// let mut iter = keep.iter();
2351    /// s.retain(|_| *iter.next().unwrap());
2352    /// assert_eq!(s, "bce");
2353    /// ```
2354    pub fn retain<F>(&mut self, mut f: F)
2355    where
2356        F: FnMut(char) -> bool,
2357    {
2358        let mut index = 0;
2359        while index < self.len() {
2360            // SAFETY: always in bounds
2361            let c = unsafe { self.get_unchecked(index..) }
2362                .chars()
2363                .next()
2364                .unwrap();
2365            if !f(c) {
2366                self.inner.remove(index);
2367            } else {
2368                index += 1;
2369            }
2370        }
2371    }
2372
2373    /// Inserts a character into this string at an offset.
2374    ///
2375    /// This is an _O(n)_ operation as it requires copying every element in the buffer.
2376    ///
2377    /// # Panics
2378    ///
2379    /// Panics if `idx` is larger than the string's length.
2380    ///
2381    /// # Examples
2382    ///
2383    /// ```
2384    /// use widestring::Utf32String;
2385    /// let mut s = Utf32String::with_capacity(3);
2386    ///
2387    /// s.insert(0, 'f');
2388    /// s.insert(1, 'o');
2389    /// s.insert(1, 'o');
2390    ///
2391    /// assert_eq!("foo", s);
2392    /// ```
2393    #[inline]
2394    pub fn insert(&mut self, idx: usize, ch: char) {
2395        unsafe {
2396            self.insert_slice(idx, &[ch as u32]);
2397        }
2398    }
2399
2400    /// Inserts a UTF-32 string slice into this string at an offset.
2401    ///
2402    /// This is an _O(n)_ operation as it requires copying every element in the buffer.
2403    ///
2404    /// # Panics
2405    ///
2406    /// Panics if `idx` is larger than the string's length.
2407    ///
2408    /// # Examples
2409    ///
2410    /// ```
2411    /// # use widestring::utf32str;
2412    /// use widestring::Utf32String;
2413    /// let mut s = Utf32String::from_str("bar");
2414    ///
2415    /// s.insert_utfstr(0, utf32str!("foo"));
2416    ///
2417    /// assert_eq!("foobar", s);
2418    /// ```
2419    #[inline]
2420    pub fn insert_utfstr(&mut self, idx: usize, string: &Utf32Str) {
2421        unsafe {
2422            self.insert_slice(idx, string.as_slice());
2423        }
2424    }
2425
2426    /// Splits the string into two at the given index.
2427    ///
2428    /// Returns a newly allocated string. `self` contains elements [0, at), and the returned string
2429    /// contains elements [at, len).
2430    ///
2431    /// Note that the capacity of `self` does not change.
2432    ///
2433    /// # Panics
2434    ///
2435    /// Panics if `at`it is beyond the last code point of the string.
2436    ///
2437    /// # Examples
2438    ///
2439    /// ```
2440    /// use widestring::Utf32String;
2441    /// let mut hello = Utf32String::from_str("Hello, World!");
2442    /// let world = hello.split_off(7);
2443    /// assert_eq!(hello, "Hello, ");
2444    /// assert_eq!(world, "World!");
2445    /// ```
2446    #[inline]
2447    #[must_use]
2448    pub fn split_off(&mut self, at: usize) -> Self {
2449        unsafe { Self::from_vec_unchecked(self.inner.split_off(at)) }
2450    }
2451
2452    /// Creates a draining iterator that removes the specified range in the string and yields the
2453    /// removed [`char`]s.
2454    ///
2455    /// Note: The element range is removed even if the iterator is not consumed until the end.
2456    ///
2457    /// # Panics
2458    ///
2459    /// Panics if the starting point or end point are out of bounds.
2460    ///
2461    /// # Examples
2462    ///
2463    /// Basic usage:
2464    ///
2465    /// ```
2466    /// use widestring::Utf32String;
2467    /// let mut s = Utf32String::from_str("α is alpha, β is beta");
2468    /// let beta_offset = 12;
2469    ///
2470    /// // Remove the range up until the β from the string
2471    /// let t: Utf32String = s.drain(..beta_offset).collect();
2472    /// assert_eq!(t, "α is alpha, ");
2473    /// assert_eq!(s, "β is beta");
2474    ///
2475    /// // A full range clears the string
2476    /// s.drain(..);
2477    /// assert_eq!(s, "");
2478    /// ```
2479    pub fn drain<R>(&mut self, range: R) -> DrainUtf32<'_>
2480    where
2481        R: RangeBounds<usize>,
2482    {
2483        // WARNING: Using range again would be unsound
2484        // TODO: replace with core::slice::range when it is stabilized
2485        let core::ops::Range { start, end } = crate::range(range, ..self.len());
2486
2487        // Take out two simultaneous borrows. The self_ptr won't be accessed
2488        // until iteration is over, in Drop.
2489        let self_ptr: *mut _ = self;
2490        // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks.
2491        let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
2492
2493        DrainUtf32 {
2494            start,
2495            end,
2496            iter: chars_iter,
2497            string: self_ptr,
2498        }
2499    }
2500
2501    /// Removes the specified range in the string, and replaces it with the given string.
2502    ///
2503    /// The given string doesn't need to be the same length as the range.
2504    ///
2505    /// # Panics
2506    ///
2507    /// Panics if the starting point or end point are out of bounds.
2508    ///
2509    /// # Examples
2510    ///
2511    /// Basic usage:
2512    ///
2513    /// ```
2514    /// use widestring::{utf32str, Utf32String};
2515    /// let mut s = Utf32String::from_str("α is alpha, β is beta");
2516    /// let beta_offset = 12;
2517    ///
2518    /// // Replace the range up until the β from the string
2519    /// s.replace_range(..beta_offset, utf32str!("Α is capital alpha; "));
2520    /// assert_eq!(s, "Α is capital alpha; β is beta");
2521    /// ```
2522    #[inline]
2523    pub fn replace_range<R>(&mut self, range: R, replace_with: &Utf32Str)
2524    where
2525        R: RangeBounds<usize>,
2526    {
2527        self.inner
2528            .splice(range, replace_with.as_slice().iter().copied());
2529    }
2530
2531    /// Converts string into a [`Vec`] of [`char`]s.
2532    ///
2533    /// This consumes the string without copying its contents.
2534    #[allow(trivial_casts)]
2535    #[inline]
2536    #[must_use]
2537    pub fn into_char_vec(self) -> Vec<char> {
2538        let mut v = mem::ManuallyDrop::new(self.into_vec());
2539        let (ptr, len, cap) = (v.as_mut_ptr(), v.len(), v.capacity());
2540        // SAFETY: Self should be valid UTF-32 so chars will be in range
2541        unsafe { Vec::from_raw_parts(ptr as *mut char, len, cap) }
2542    }
2543}
2544
2545impl AsMut<[char]> for Utf32String {
2546    #[inline]
2547    fn as_mut(&mut self) -> &mut [char] {
2548        self.as_char_slice_mut()
2549    }
2550}
2551
2552impl AsRef<[char]> for Utf32String {
2553    #[inline]
2554    fn as_ref(&self) -> &[char] {
2555        self.as_char_slice()
2556    }
2557}
2558
2559impl From<Vec<char>> for Utf32String {
2560    #[inline]
2561    fn from(value: Vec<char>) -> Self {
2562        Utf32String::from_chars(value)
2563    }
2564}
2565
2566impl From<&[char]> for Utf32String {
2567    #[inline]
2568    fn from(value: &[char]) -> Self {
2569        Utf32String::from_chars(value)
2570    }
2571}
2572
2573impl From<Utf32String> for Vec<char> {
2574    #[inline]
2575    fn from(value: Utf32String) -> Self {
2576        value.into_char_vec()
2577    }
2578}
2579
2580impl PartialEq<[char]> for Utf32String {
2581    #[inline]
2582    fn eq(&self, other: &[char]) -> bool {
2583        self.as_char_slice() == other
2584    }
2585}
2586
2587impl PartialEq<Utf16String> for Utf32String {
2588    #[inline]
2589    fn eq(&self, other: &Utf16String) -> bool {
2590        self.chars().eq(other.chars())
2591    }
2592}
2593
2594impl PartialEq<Utf32String> for Utf16String {
2595    #[inline]
2596    fn eq(&self, other: &Utf32String) -> bool {
2597        self.chars().eq(other.chars())
2598    }
2599}
2600
2601impl PartialEq<&Utf16Str> for Utf32String {
2602    #[inline]
2603    fn eq(&self, other: &&Utf16Str) -> bool {
2604        self.chars().eq(other.chars())
2605    }
2606}
2607
2608impl PartialEq<&Utf32Str> for Utf16String {
2609    #[inline]
2610    fn eq(&self, other: &&Utf32Str) -> bool {
2611        self.chars().eq(other.chars())
2612    }
2613}
2614
2615impl PartialEq<Utf32String> for &Utf16Str {
2616    #[inline]
2617    fn eq(&self, other: &Utf32String) -> bool {
2618        self.chars().eq(other.chars())
2619    }
2620}
2621
2622impl PartialEq<Utf16String> for &Utf32Str {
2623    #[inline]
2624    fn eq(&self, other: &Utf16String) -> bool {
2625        self.chars().eq(other.chars())
2626    }
2627}
2628
2629impl TryFrom<Vec<u16>> for Utf16String {
2630    type Error = Utf16Error;
2631
2632    #[inline]
2633    fn try_from(value: Vec<u16>) -> Result<Self, Self::Error> {
2634        Utf16String::from_vec(value)
2635    }
2636}
2637
2638impl TryFrom<Vec<u32>> for Utf32String {
2639    type Error = Utf32Error;
2640
2641    #[inline]
2642    fn try_from(value: Vec<u32>) -> Result<Self, Self::Error> {
2643        Utf32String::from_vec(value)
2644    }
2645}
2646
2647impl TryFrom<&[u16]> for Utf16String {
2648    type Error = Utf16Error;
2649
2650    #[inline]
2651    fn try_from(value: &[u16]) -> Result<Self, Self::Error> {
2652        Utf16String::from_vec(value)
2653    }
2654}
2655
2656impl TryFrom<&[u32]> for Utf32String {
2657    type Error = Utf32Error;
2658
2659    #[inline]
2660    fn try_from(value: &[u32]) -> Result<Self, Self::Error> {
2661        Utf32String::from_vec(value)
2662    }
2663}
2664
2665/// Alias for [`Utf16String`] or [`Utf32String`] depending on platform. Intended to match typical C
2666/// `wchar_t` size on platform.
2667#[cfg(not(windows))]
2668pub type WideUtfString = Utf32String;
2669
2670/// Alias for [`Utf16String`] or [`Utf32String`] depending on platform. Intended to match typical C
2671/// `wchar_t` size on platform.
2672#[cfg(windows)]
2673pub type WideUtfString = Utf16String;
2674
2675#[cfg(test)]
2676mod test {
2677    use crate::*;
2678
2679    #[test]
2680    fn uft16_truncate() {
2681        // Bug #39
2682        let cs = utf16str!("trunc");
2683        let mut s: Utf16String = cs.into();
2684        s.truncate(6);
2685        assert_eq!(s.len(), 5);
2686        s.truncate(5);
2687        assert_eq!(s.len(), 5);
2688        s.truncate(2);
2689        assert_eq!(s.len(), 2);
2690    }
2691}
widestring/utfstring.rs

widestring/
utfstring.rs