widestring/utfstring.rs
1//! Owned, growable UTF strings.
2//!
3//! This module contains UTF strings and related types.
4
5use crate::{
6 decode_utf16_surrogate_pair,
7 error::{Utf16Error, Utf32Error},
8 is_utf16_low_surrogate, is_utf16_surrogate, validate_utf16, validate_utf16_vec, validate_utf32,
9 validate_utf32_vec, Utf16Str, Utf32Str,
10};
11#[allow(unused_imports)]
12use alloc::{
13 borrow::{Cow, ToOwned},
14 boxed::Box,
15 string::String,
16 vec::Vec,
17};
18#[allow(unused_imports)]
19use core::{
20 borrow::{Borrow, BorrowMut},
21 convert::{AsMut, AsRef, From, Infallible, TryFrom},
22 fmt::Write,
23 iter::FromIterator,
24 mem,
25 ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut, RangeBounds},
26 ptr,
27 slice::SliceIndex,
28 str::FromStr,
29};
30
31mod iter;
32pub use iter::*;
33
34macro_rules! utfstring_common_impl {
35 {
36 $(#[$utfstring_meta:meta])*
37 struct $utfstring:ident([$uchar:ty]);
38 type UtfStr = $utfstr:ident;
39 type UStr = $ustr:ident;
40 type UCStr = $ucstr:ident;
41 type UString = $ustring:ident;
42 type UCString = $ucstring:ident;
43 type UtfError = $utferror:ident;
44 $(#[$from_vec_unchecked_meta:meta])*
45 fn from_vec_unchecked() -> {}
46 $(#[$from_str_meta:meta])*
47 fn from_str() -> {}
48 $(#[$push_utfstr_meta:meta])*
49 fn push_utfstr() -> {}
50 $(#[$as_mut_vec_meta:meta])*
51 fn as_mut_vec() -> {}
52 } => {
53 $(#[$utfstring_meta])*
54 #[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
55 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
56 pub struct $utfstring {
57 inner: Vec<$uchar>,
58 }
59
60 impl $utfstring {
61 /// Creates a new empty string.
62 ///
63 /// Given that the string is empty, this will not allocate any initial buffer. While
64 /// that means this initial operation is very inexpensive, it may cause excessive
65 /// allocations later when you add data. If you have an idea of how much data the
66 /// string will hold, consider [`with_capacity`][Self::with_capacity] instead to
67 /// prevent excessive re-allocation.
68 #[inline]
69 #[must_use]
70 pub const fn new() -> Self {
71 Self { inner: Vec::new() }
72 }
73
74 /// Creates a new empty string with a particular capacity.
75 ///
76 /// This string has an internal buffer to hold its data. The capacity is the length of
77 /// that buffer, and can be queried with the [`capacity`][Self::capacity] method. This
78 /// method creates and empty string, but one with an initial buffer that can hold
79 /// `capacity` elements. This is useful when you may be appending a bunch of data to
80 /// the string, reducing the number of reallocations it needs to do.
81 ///
82 /// If the given capacity is `0`, no allocation will occur, and this method is identical
83 /// to the [`new`][Self::new] method.
84 #[inline]
85 #[must_use]
86 pub fn with_capacity(capacity: usize) -> Self {
87 Self {
88 inner: Vec::with_capacity(capacity),
89 }
90 }
91
92 $(#[$from_vec_unchecked_meta])*
93 #[inline]
94 #[must_use]
95 pub unsafe fn from_vec_unchecked(v: impl Into<Vec<$uchar>>) -> Self {
96 Self { inner: v.into() }
97 }
98
99 $(#[$from_str_meta])*
100 #[inline]
101 #[allow(clippy::should_implement_trait)]
102 #[must_use]
103 pub fn from_str<S: AsRef<str> + ?Sized>(s: &S) -> Self {
104 let s = s.as_ref();
105 let mut string = Self::new();
106 string.extend(s.chars());
107 string
108 }
109
110 /// Converts a string into a string slice.
111 #[inline]
112 #[must_use]
113 pub fn as_utfstr(&self) -> &$utfstr {
114 unsafe { $utfstr::from_slice_unchecked(self.inner.as_slice()) }
115 }
116
117 /// Converts a string into a mutable string slice.
118 #[inline]
119 #[must_use]
120 pub fn as_mut_utfstr(&mut self) -> &mut $utfstr {
121 unsafe { $utfstr::from_slice_unchecked_mut(&mut self.inner) }
122 }
123
124 /// Converts this string into a wide string of undefined encoding.
125 #[inline]
126 #[must_use]
127 pub fn as_ustr(&self) -> &crate::$ustr {
128 crate::$ustr::from_slice(self.as_slice())
129 }
130
131 /// Converts a string into a vector of its elements.
132 ///
133 /// This consumes the string without copying its contents.
134 #[inline]
135 #[must_use]
136 pub fn into_vec(self) -> Vec<$uchar> {
137 self.inner
138 }
139
140 $(#[$push_utfstr_meta])*
141 #[inline]
142 pub fn push_utfstr<S: AsRef<$utfstr> + ?Sized>(&mut self, string: &S) {
143 self.inner.extend_from_slice(string.as_ref().as_slice())
144 }
145
146 /// Returns this string's capacity, in number of elements.
147 #[inline]
148 #[must_use]
149 pub fn capacity(&self) -> usize {
150 self.inner.capacity()
151 }
152
153 /// Ensures that this string's capacity is at least `additional` elements larger than
154 /// its length.
155 ///
156 /// The capacity may be increased by more than `additional` elements if it chooses, to
157 /// prevent frequent reallocations.
158 ///
159 /// If you do not want this "at least" behavior, see the
160 /// [`reserve_exact`][Self::reserve_exact] method.
161 ///
162 /// # Panics
163 ///
164 /// Panics if the new capacity overflows [`usize`].
165 #[inline]
166 pub fn reserve(&mut self, additional: usize) {
167 self.inner.reserve(additional)
168 }
169
170 /// Ensures that this string's capacity is `additional` elements larger than its length.
171 ///
172 /// Consider using the [`reserve`][Self::reserve] method unless you absolutely know
173 /// better than the allocator.
174 ///
175 /// # Panics
176 ///
177 /// Panics if the new capacity overflows [`usize`].
178 #[inline]
179 pub fn reserve_exact(&mut self, additional: usize) {
180 self.inner.reserve_exact(additional)
181 }
182
183 /// Shrinks the capacity of this string to match its length.
184 #[inline]
185 pub fn shrink_to_fit(&mut self) {
186 self.inner.shrink_to_fit()
187 }
188
189 /// Shrinks the capacity of this string with a lower bound.
190 ///
191 /// The capacity will remain at least as large as both the length and the supplied
192 /// value.
193 ///
194 /// If the current capacity is less than the lower limit, this is a no-op.
195 #[inline]
196 pub fn shrink_to(&mut self, min_capacity: usize) {
197 self.inner.shrink_to(min_capacity)
198 }
199
200 /// Returns a slice of this string's contents.
201 #[inline]
202 #[must_use]
203 pub fn as_slice(&self) -> &[$uchar] {
204 self.inner.as_slice()
205 }
206
207 unsafe fn insert_slice(&mut self, idx: usize, slice: &[$uchar]) {
208 let len = self.inner.len();
209 let amt = slice.len();
210 self.inner.reserve(amt);
211
212 ptr::copy(
213 self.inner.as_ptr().add(idx),
214 self.inner.as_mut_ptr().add(idx + amt),
215 len - idx,
216 );
217 ptr::copy_nonoverlapping(slice.as_ptr(), self.inner.as_mut_ptr().add(idx), amt);
218 self.inner.set_len(len + amt);
219 }
220
221 $(#[$as_mut_vec_meta])*
222 #[inline]
223 #[must_use]
224 pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<$uchar> {
225 &mut self.inner
226 }
227
228 /// Returns the length of this string in number of elements, not [`char`]s or
229 /// graphemes.
230 ///
231 /// In other words, it might not be what a human considers the length of the string.
232 #[inline]
233 #[must_use]
234 pub fn len(&self) -> usize {
235 self.inner.len()
236 }
237
238 /// Returns `true` if this string has a length of zero, and `false` otherwise.
239 #[inline]
240 #[must_use]
241 pub fn is_empty(&self) -> bool {
242 self.inner.is_empty()
243 }
244
245 /// Truncates the string, removing all contents.
246 ///
247 /// While this means the string will have a length of zero, it does not touch its
248 /// capacity.
249 #[inline]
250 pub fn clear(&mut self) {
251 self.inner.clear()
252 }
253
254 /// Converts this string into a boxed string slice.
255 ///
256 /// This will drop excess capacity.
257 #[inline]
258 #[must_use]
259 pub fn into_boxed_utfstr(self) -> Box<$utfstr> {
260 let slice = self.inner.into_boxed_slice();
261 // SAFETY: Already valid UTF-16
262 unsafe { $utfstr::from_boxed_slice_unchecked(slice) }
263 }
264
265 /// Appends a given UTF-8 string slice onto the end of this string, converting it to
266 /// UTF-16.
267 #[inline]
268 pub fn push_str<S: AsRef<str> + ?Sized>(&mut self, string: &S) {
269 self.extend(string.as_ref().chars())
270 }
271 }
272
273 impl Add<&$utfstr> for $utfstring {
274 type Output = $utfstring;
275
276 #[inline]
277 fn add(mut self, rhs: &$utfstr) -> Self::Output {
278 self.push_utfstr(rhs);
279 self
280 }
281 }
282
283 impl Add<&str> for $utfstring {
284 type Output = $utfstring;
285
286 #[inline]
287 fn add(mut self, rhs: &str) -> Self::Output {
288 self.push_str(rhs);
289 self
290 }
291 }
292
293 impl AddAssign<&$utfstr> for $utfstring {
294 #[inline]
295 fn add_assign(&mut self, rhs: &$utfstr) {
296 self.push_utfstr(rhs)
297 }
298 }
299
300 impl AddAssign<&str> for $utfstring {
301 #[inline]
302 fn add_assign(&mut self, rhs: &str) {
303 self.push_str(rhs)
304 }
305 }
306
307 impl AsMut<$utfstr> for $utfstring {
308 #[inline]
309 fn as_mut(&mut self) -> &mut $utfstr {
310 self.as_mut_utfstr()
311 }
312 }
313
314 impl AsRef<$utfstr> for $utfstring {
315 #[inline]
316 fn as_ref(&self) -> &$utfstr {
317 self.as_utfstr()
318 }
319 }
320
321 impl AsRef<[$uchar]> for $utfstring {
322 #[inline]
323 fn as_ref(&self) -> &[$uchar] {
324 &self.inner
325 }
326 }
327
328 impl AsRef<crate::$ustr> for $utfstring {
329 #[inline]
330 fn as_ref(&self) -> &crate::$ustr {
331 self.as_ustr()
332 }
333 }
334
335 impl Borrow<$utfstr> for $utfstring {
336 #[inline]
337 fn borrow(&self) -> &$utfstr {
338 self.as_utfstr()
339 }
340 }
341
342 impl BorrowMut<$utfstr> for $utfstring {
343 #[inline]
344 fn borrow_mut(&mut self) -> &mut $utfstr {
345 self.as_mut_utfstr()
346 }
347 }
348
349 impl core::fmt::Debug for $utfstring {
350 #[inline]
351 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
352 core::fmt::Debug::fmt(self.as_utfstr(), f)
353 }
354 }
355
356 impl Deref for $utfstring {
357 type Target = $utfstr;
358
359 #[inline]
360 fn deref(&self) -> &Self::Target {
361 self.as_utfstr()
362 }
363 }
364
365 impl DerefMut for $utfstring {
366 #[inline]
367 fn deref_mut(&mut self) -> &mut Self::Target {
368 self.as_mut_utfstr()
369 }
370 }
371
372 impl core::fmt::Display for $utfstring {
373 #[inline]
374 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
375 core::fmt::Display::fmt(self.as_utfstr(), f)
376 }
377 }
378
379 impl Extend<char> for $utfstring {
380 #[inline]
381 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
382 let iter = iter.into_iter();
383 let (lower_bound, _) = iter.size_hint();
384 self.reserve(lower_bound);
385 iter.for_each(|c| self.push(c));
386 }
387 }
388
389 impl<'a> Extend<&'a char> for $utfstring {
390 #[inline]
391 fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
392 self.extend(iter.into_iter().copied())
393 }
394 }
395
396 impl<'a> Extend<&'a $utfstr> for $utfstring {
397 #[inline]
398 fn extend<T: IntoIterator<Item = &'a $utfstr>>(&mut self, iter: T) {
399 iter.into_iter().for_each(|s| self.push_utfstr(s))
400 }
401 }
402
403 impl Extend<$utfstring> for $utfstring {
404 #[inline]
405 fn extend<T: IntoIterator<Item = $utfstring>>(&mut self, iter: T) {
406 iter.into_iter()
407 .for_each(|s| self.push_utfstr(&s))
408 }
409 }
410
411 impl<'a> Extend<Cow<'a, $utfstr>> for $utfstring {
412 #[inline]
413 fn extend<T: IntoIterator<Item = Cow<'a, $utfstr>>>(&mut self, iter: T) {
414 iter.into_iter().for_each(|s| self.push_utfstr(&s))
415 }
416 }
417
418 impl Extend<Box<$utfstr>> for $utfstring {
419 #[inline]
420 fn extend<T: IntoIterator<Item = Box<$utfstr>>>(&mut self, iter: T) {
421 iter.into_iter().for_each(|s| self.push_utfstr(&s))
422 }
423 }
424
425 impl<'a> Extend<&'a str> for $utfstring {
426 #[inline]
427 fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
428 iter.into_iter().for_each(|s| self.push_str(s))
429 }
430 }
431
432 impl Extend<String> for $utfstring {
433 #[inline]
434 fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
435 iter.into_iter().for_each(|s| self.push_str(&s))
436 }
437 }
438
439 impl From<&mut $utfstr> for $utfstring {
440 #[inline]
441 fn from(value: &mut $utfstr) -> Self {
442 value.to_owned()
443 }
444 }
445
446 impl From<&$utfstr> for $utfstring {
447 #[inline]
448 fn from(value: &$utfstr) -> Self {
449 value.to_owned()
450 }
451 }
452
453 impl From<&$utfstring> for $utfstring {
454 #[inline]
455 fn from(value: &$utfstring) -> Self {
456 value.clone()
457 }
458 }
459
460 impl From<$utfstring> for Cow<'_, $utfstr> {
461 #[inline]
462 fn from(value: $utfstring) -> Self {
463 Cow::Owned(value)
464 }
465 }
466
467 impl<'a> From<&'a $utfstring> for Cow<'a, $utfstr> {
468 #[inline]
469 fn from(value: &'a $utfstring) -> Self {
470 Cow::Borrowed(value)
471 }
472 }
473
474 impl From<Cow<'_, $utfstr>> for $utfstring {
475 #[inline]
476 fn from(value: Cow<'_, $utfstr>) -> Self {
477 value.into_owned()
478 }
479 }
480
481 impl From<&str> for $utfstring {
482 #[inline]
483 fn from(value: &str) -> Self {
484 Self::from_str(value)
485 }
486 }
487
488 impl From<String> for $utfstring {
489 #[inline]
490 fn from(value: String) -> Self {
491 Self::from_str(&value)
492 }
493 }
494
495 impl From<$utfstring> for crate::$ustring {
496 #[inline]
497 fn from(value: $utfstring) -> Self {
498 crate::$ustring::from_vec(value.into_vec())
499 }
500 }
501
502 impl From<&$utfstr> for String {
503 #[inline]
504 fn from(value: &$utfstr) -> Self {
505 value.to_string()
506 }
507 }
508
509 impl From<$utfstring> for String {
510 #[inline]
511 fn from(value: $utfstring) -> Self {
512 value.to_string()
513 }
514 }
515
516 #[cfg(feature = "std")]
517 impl From<$utfstring> for std::ffi::OsString {
518 #[inline]
519 fn from(value: $utfstring) -> std::ffi::OsString {
520 value.as_ustr().to_os_string()
521 }
522 }
523
524 impl FromIterator<char> for $utfstring {
525 #[inline]
526 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
527 let mut s = Self::new();
528 s.extend(iter);
529 s
530 }
531 }
532
533 impl<'a> FromIterator<&'a char> for $utfstring {
534 #[inline]
535 fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
536 let mut s = Self::new();
537 s.extend(iter);
538 s
539 }
540 }
541
542 impl<'a> FromIterator<&'a $utfstr> for $utfstring {
543 #[inline]
544 fn from_iter<T: IntoIterator<Item = &'a $utfstr>>(iter: T) -> Self {
545 let mut s = Self::new();
546 s.extend(iter);
547 s
548 }
549 }
550
551 impl FromIterator<$utfstring> for $utfstring {
552 fn from_iter<T: IntoIterator<Item = $utfstring>>(iter: T) -> Self {
553 let mut iterator = iter.into_iter();
554
555 // Because we're iterating over `String`s, we can avoid at least
556 // one allocation by getting the first string from the iterator
557 // and appending to it all the subsequent strings.
558 match iterator.next() {
559 None => Self::new(),
560 Some(mut buf) => {
561 buf.extend(iterator);
562 buf
563 }
564 }
565 }
566 }
567
568 impl FromIterator<Box<$utfstr>> for $utfstring {
569 #[inline]
570 fn from_iter<T: IntoIterator<Item = Box<$utfstr>>>(iter: T) -> Self {
571 let mut s = Self::new();
572 s.extend(iter);
573 s
574 }
575 }
576
577 impl<'a> FromIterator<Cow<'a, $utfstr>> for $utfstring {
578 #[inline]
579 fn from_iter<T: IntoIterator<Item = Cow<'a, $utfstr>>>(iter: T) -> Self {
580 let mut s = Self::new();
581 s.extend(iter);
582 s
583 }
584 }
585
586 impl<'a> FromIterator<&'a str> for $utfstring {
587 #[inline]
588 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
589 let mut s = Self::new();
590 s.extend(iter);
591 s
592 }
593 }
594
595 impl FromIterator<String> for $utfstring {
596 #[inline]
597 fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
598 let mut s = Self::new();
599 s.extend(iter);
600 s
601 }
602 }
603
604 impl FromStr for $utfstring {
605 type Err = Infallible;
606
607 #[inline]
608 fn from_str(s: &str) -> Result<Self, Self::Err> {
609 Ok($utfstring::from_str(s))
610 }
611 }
612
613 impl<I> Index<I> for $utfstring
614 where
615 I: RangeBounds<usize> + SliceIndex<[$uchar], Output = [$uchar]>,
616 {
617 type Output = $utfstr;
618
619 #[inline]
620 fn index(&self, index: I) -> &Self::Output {
621 &self.deref()[index]
622 }
623 }
624
625 impl<I> IndexMut<I> for $utfstring
626 where
627 I: RangeBounds<usize> + SliceIndex<[$uchar], Output = [$uchar]>,
628 {
629 #[inline]
630 fn index_mut(&mut self, index: I) -> &mut Self::Output {
631 &mut self.deref_mut()[index]
632 }
633 }
634
635 impl PartialEq<$utfstr> for $utfstring {
636 #[inline]
637 fn eq(&self, other: &$utfstr) -> bool {
638 self.as_slice() == other.as_slice()
639 }
640 }
641
642 impl PartialEq<&$utfstr> for $utfstring {
643 #[inline]
644 fn eq(&self, other: &&$utfstr) -> bool {
645 self.as_slice() == other.as_slice()
646 }
647 }
648
649 impl PartialEq<Cow<'_, $utfstr>> for $utfstring {
650 #[inline]
651 fn eq(&self, other: &Cow<'_, $utfstr>) -> bool {
652 self == other.as_ref()
653 }
654 }
655
656 impl PartialEq<$utfstring> for Cow<'_, $utfstr> {
657 #[inline]
658 fn eq(&self, other: &$utfstring) -> bool {
659 self.as_ref() == other
660 }
661 }
662
663 impl PartialEq<$utfstring> for $utfstr {
664 #[inline]
665 fn eq(&self, other: &$utfstring) -> bool {
666 self.as_slice() == other.as_slice()
667 }
668 }
669
670 impl PartialEq<$utfstring> for &$utfstr {
671 #[inline]
672 fn eq(&self, other: &$utfstring) -> bool {
673 self.as_slice() == other.as_slice()
674 }
675 }
676
677 impl PartialEq<str> for $utfstring {
678 #[inline]
679 fn eq(&self, other: &str) -> bool {
680 self.chars().eq(other.chars())
681 }
682 }
683
684 impl PartialEq<&str> for $utfstring {
685 #[inline]
686 fn eq(&self, other: &&str) -> bool {
687 self.chars().eq(other.chars())
688 }
689 }
690
691 impl PartialEq<$utfstring> for str {
692 #[inline]
693 fn eq(&self, other: &$utfstring) -> bool {
694 self.chars().eq(other.chars())
695 }
696 }
697
698 impl PartialEq<$utfstring> for &str {
699 #[inline]
700 fn eq(&self, other: &$utfstring) -> bool {
701 self.chars().eq(other.chars())
702 }
703 }
704
705 impl PartialEq<String> for $utfstring {
706 #[inline]
707 fn eq(&self, other: &String) -> bool {
708 self.chars().eq(other.chars())
709 }
710 }
711
712 impl PartialEq<$utfstring> for String {
713 #[inline]
714 fn eq(&self, other: &$utfstring) -> bool {
715 self.chars().eq(other.chars())
716 }
717 }
718
719 impl PartialEq<String> for $utfstr {
720 #[inline]
721 fn eq(&self, other: &String) -> bool {
722 self.chars().eq(other.chars())
723 }
724 }
725
726 impl PartialEq<$utfstr> for String {
727 #[inline]
728 fn eq(&self, other: &$utfstr) -> bool {
729 self.chars().eq(other.chars())
730 }
731 }
732
733 impl PartialEq<Cow<'_, str>> for $utfstring {
734 #[inline]
735 fn eq(&self, other: &Cow<'_, str>) -> bool {
736 self == other.as_ref()
737 }
738 }
739
740 impl PartialEq<$utfstring> for Cow<'_, str> {
741 #[inline]
742 fn eq(&self, other: &$utfstring) -> bool {
743 self.as_ref() == other
744 }
745 }
746
747 impl PartialEq<crate::$ustr> for $utfstring {
748 #[inline]
749 fn eq(&self, other: &crate::$ustr) -> bool {
750 self.as_slice() == other.as_slice()
751 }
752 }
753
754 impl PartialEq<$utfstring> for crate::$ustr {
755 #[inline]
756 fn eq(&self, other: &$utfstring) -> bool {
757 self.as_slice() == other.as_slice()
758 }
759 }
760
761 impl PartialEq<crate::$ustring> for $utfstring {
762 #[inline]
763 fn eq(&self, other: &crate::$ustring) -> bool {
764 self.as_slice() == other.as_slice()
765 }
766 }
767
768 impl PartialEq<$utfstring> for crate::$ustring {
769 #[inline]
770 fn eq(&self, other: &$utfstring) -> bool {
771 self.as_slice() == other.as_slice()
772 }
773 }
774
775 impl PartialEq<crate::$ustring> for $utfstr {
776 #[inline]
777 fn eq(&self, other: &crate::$ustring) -> bool {
778 self.as_slice() == other.as_slice()
779 }
780 }
781
782 impl PartialEq<$utfstr> for crate::$ustring {
783 #[inline]
784 fn eq(&self, other: &$utfstr) -> bool {
785 self.as_slice() == other.as_slice()
786 }
787 }
788
789 impl PartialEq<crate::$ucstr> for $utfstring {
790 #[inline]
791 fn eq(&self, other: &crate::$ucstr) -> bool {
792 self.as_slice() == other.as_slice()
793 }
794 }
795
796 impl PartialEq<$utfstring> for crate::$ucstr {
797 #[inline]
798 fn eq(&self, other: &$utfstring) -> bool {
799 self.as_slice() == other.as_slice()
800 }
801 }
802
803 impl PartialEq<crate::$ucstring> for $utfstring {
804 #[inline]
805 fn eq(&self, other: &crate::$ucstring) -> bool {
806 self.as_slice() == other.as_slice()
807 }
808 }
809
810 impl PartialEq<$utfstring> for crate::$ucstring {
811 #[inline]
812 fn eq(&self, other: &$utfstring) -> bool {
813 self.as_slice() == other.as_slice()
814 }
815 }
816
817 impl PartialEq<crate::$ucstring> for $utfstr {
818 #[inline]
819 fn eq(&self, other: &crate::$ucstring) -> bool {
820 self.as_slice() == other.as_slice()
821 }
822 }
823
824 impl PartialEq<$utfstr> for crate::$ucstring {
825 #[inline]
826 fn eq(&self, other: &$utfstr) -> bool {
827 self.as_slice() == other.as_slice()
828 }
829 }
830
831 impl ToOwned for $utfstr {
832 type Owned = $utfstring;
833
834 #[inline]
835 fn to_owned(&self) -> Self::Owned {
836 unsafe { $utfstring::from_vec_unchecked(&self.inner) }
837 }
838 }
839
840 impl TryFrom<crate::$ustring> for $utfstring {
841 type Error = $utferror;
842
843 #[inline]
844 fn try_from(value: crate::$ustring) -> Result<Self, Self::Error> {
845 $utfstring::from_ustring(value)
846 }
847 }
848
849 impl TryFrom<crate::$ucstring> for $utfstring {
850 type Error = $utferror;
851
852 #[inline]
853 fn try_from(value: crate::$ucstring) -> Result<Self, Self::Error> {
854 $utfstring::from_ustring(value)
855 }
856 }
857
858 impl TryFrom<&crate::$ustr> for $utfstring {
859 type Error = $utferror;
860
861 #[inline]
862 fn try_from(value: &crate::$ustr) -> Result<Self, Self::Error> {
863 $utfstring::from_ustring(value)
864 }
865 }
866
867 impl TryFrom<&crate::$ucstr> for $utfstring {
868 type Error = $utferror;
869
870 #[inline]
871 fn try_from(value: &crate::$ucstr) -> Result<Self, Self::Error> {
872 $utfstring::from_ustring(value)
873 }
874 }
875
876 impl Write for $utfstring {
877 #[inline]
878 fn write_str(&mut self, s: &str) -> core::fmt::Result {
879 self.push_str(s);
880 Ok(())
881 }
882
883 #[inline]
884 fn write_char(&mut self, c: char) -> core::fmt::Result {
885 self.push(c);
886 Ok(())
887 }
888 }
889 };
890}
891
892utfstring_common_impl! {
893 /// A UTF-16 encoded, growable owned string.
894 ///
895 /// [`Utf16String`] is a version of [`String`] that uses UTF-16 encoding instead of UTF-8
896 /// encoding. The equivalent of [`str`] for [`Utf16String`] is [`Utf16Str`].
897 ///
898 /// Unlike [`U16String`][crate::U16String] which does not specify a coding, [`Utf16String`] is
899 /// always valid UTF-16 encoding. Using unsafe methods to construct a [`Utf16String`] with
900 /// invalid UTF-16 encoding results in undefined behavior.
901 ///
902 /// # UTF-16
903 ///
904 /// [`Utf16String`] is always UTF-16. This means if you need non-UTF-16 wide strings, you should
905 /// use [`U16String`][crate::U16String] instead. It is similar, but does not constrain the
906 /// encoding.
907 ///
908 /// This also means you cannot directly index a single element of the string, as UTF-16 encoding
909 /// may be a single `u16` value or a pair of `u16` surrogates. Instead, you can index subslices
910 /// of the string, or use the [`chars`][Utf16Str::chars] iterator instead.
911 ///
912 /// # Examples
913 ///
914 /// The easiest way to use [`Utf16String`] is with the [`utf16str!`][crate::utf16str] macro to
915 /// convert string literals into UTF-16 string slices at compile time:
916 ///
917 /// ```
918 /// use widestring::{Utf16String, utf16str};
919 /// let hello = Utf16String::from(utf16str!("Hello, world!"));
920 /// ```
921 ///
922 /// Because this string is always valid UTF-16, it is a non-fallible, lossless conversion to and
923 /// from standard Rust strings:
924 ///
925 /// ```
926 /// use widestring::Utf16String;
927 /// // Unlike the utf16str macro, this will do conversion at runtime instead of compile time
928 /// let hello = Utf16String::from_str("Hello, world!");
929 /// let hello_string: String = hello.to_string();
930 /// assert_eq!(hello, hello_string); // Can easily compare between string types
931 /// ```
932 struct Utf16String([u16]);
933
934 type UtfStr = Utf16Str;
935 type UStr = U16Str;
936 type UCStr = U16CStr;
937 type UString = U16String;
938 type UCString = U16CString;
939 type UtfError = Utf16Error;
940
941 /// Converts a [`u16`] vector to a string without checking that the string contains valid
942 /// UTF-16.
943 ///
944 /// See the safe version, [`from_vec`][Self::from_vec], for more information.
945 ///
946 /// # Safety
947 ///
948 /// This function is unsafe because it does not check that the vector passed to it is valid
949 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
950 /// [`Utf16String`] is always valid UTF-16.
951 ///
952 /// # Examples
953 ///
954 /// ```
955 /// use widestring::Utf16String;
956 ///
957 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
958 /// let sparkle_heart = unsafe { Utf16String::from_vec_unchecked(sparkle_heart) };
959 ///
960 /// assert_eq!("💖", sparkle_heart);
961 /// ```
962 fn from_vec_unchecked() -> {}
963
964 /// Re-encodes a UTF-8--encoded string slice into a UTF-16--encoded string.
965 ///
966 /// This operation is lossless and infallible, but requires a memory allocation.
967 ///
968 /// # Examples
969 ///
970 /// ```
971 /// # use widestring::utf16str;
972 /// use widestring::Utf16String;
973 /// let music = Utf16String::from_str("𝄞music");
974 /// assert_eq!(utf16str!("𝄞music"), music);
975 /// ```
976 fn from_str() -> {}
977
978 /// Appends a given string slice onto the end of this string.
979 ///
980 /// # Examples
981 ///
982 /// ```
983 /// # use widestring::utf16str;
984 /// use widestring::Utf16String;
985 /// let mut s = Utf16String::from_str("foo");
986 /// s.push_utfstr(utf16str!("bar"));
987 /// assert_eq!(utf16str!("foobar"), s);
988 /// ```
989 fn push_utfstr() -> {}
990
991 /// Returns a mutable reference to the contents of this string.
992 ///
993 /// # Safety
994 ///
995 /// This function is unsafe because it does not check that the values in the vector are valid
996 /// UTF-16. If this constraint is violated, it may cause undefined beahvior with future
997 /// users of the string, as it is assumed that this string is always valid UTF-16.
998 fn as_mut_vec() -> {}
999}
1000
1001utfstring_common_impl! {
1002 /// A UTF-32 encoded, growable owned string.
1003 ///
1004 /// [`Utf32String`] is a version of [`String`] that uses UTF-32 encoding instead of UTF-8
1005 /// encoding. The equivalent of [`str`] for [`Utf32String`] is [`Utf32Str`].
1006 ///
1007 /// Unlike [`U32String`][crate::U32String] which does not specify a coding, [`Utf32String`] is
1008 /// always valid UTF-32 encoding. Using unsafe methods to construct a [`Utf32String`] with
1009 /// invalid UTF-32 encoding results in undefined behavior.
1010 ///
1011 /// # UTF-32
1012 ///
1013 /// [`Utf32String`] is always UTF-32. This means if you need non-UTF-32 wide strings, you should
1014 /// use [`U32String`][crate::U32String] instead. It is similar, but does not constrain the
1015 /// encoding.
1016 ///
1017 /// Unlike UTF-16 or UTF-8 strings, you may index single elements of UTF-32 strings in addition
1018 /// to subslicing. This is due to it being a fixed-length encoding for [`char`]s. This also
1019 /// means that [`Utf32String`] is the same representation as a `Vec<char>`; indeed conversions
1020 /// between the two exist and are simple typecasts.
1021 ///
1022 /// # Examples
1023 ///
1024 /// The easiest way to use [`Utf32String`] is with the [`utf32str!`][crate::utf32str] macro to
1025 /// convert string literals into UTF-32 string slices at compile time:
1026 ///
1027 /// ```
1028 /// use widestring::{Utf32String, utf32str};
1029 /// let hello = Utf32String::from(utf32str!("Hello, world!"));
1030 /// ```
1031 ///
1032 /// Because this string is always valid UTF-32, it is a non-fallible, lossless conversion to and
1033 /// from standard Rust strings:
1034 ///
1035 /// ```
1036 /// use widestring::Utf32String;
1037 /// // Unlike the utf32str macro, this will do conversion at runtime instead of compile time
1038 /// let hello = Utf32String::from_str("Hello, world!");
1039 /// let hello_string: String = hello.to_string();
1040 /// assert_eq!(hello, hello_string); // Can easily compare between string types
1041 /// ```
1042 struct Utf32String([u32]);
1043
1044 type UtfStr = Utf32Str;
1045 type UStr = U32Str;
1046 type UCStr = U32CStr;
1047 type UString = U32String;
1048 type UCString = U32CString;
1049 type UtfError = Utf32Error;
1050
1051 /// Converts a [`u32`] vector to a string without checking that the string contains valid
1052 /// UTF-32.
1053 ///
1054 /// See the safe version, [`from_vec`][Self::from_vec], for more information.
1055 ///
1056 /// # Safety
1057 ///
1058 /// This function is unsafe because it does not check that the vector passed to it is valid
1059 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
1060 /// [`Utf32String`] is always valid UTF-32.
1061 ///
1062 /// # Examples
1063 ///
1064 /// ```
1065 /// use widestring::Utf32String;
1066 ///
1067 /// let sparkle_heart = vec![0x1f496];
1068 /// let sparkle_heart = unsafe { Utf32String::from_vec_unchecked(sparkle_heart) };
1069 ///
1070 /// assert_eq!("💖", sparkle_heart);
1071 /// ```
1072 fn from_vec_unchecked() -> {}
1073
1074 /// Re-encodes a UTF-8--encoded string slice into a UTF-32--encoded string.
1075 ///
1076 /// This operation is lossless and infallible, but requires a memory allocation.
1077 ///
1078 /// # Examples
1079 ///
1080 /// ```
1081 /// # use widestring::utf32str;
1082 /// use widestring::Utf32String;
1083 /// let music = Utf32String::from_str("𝄞music");
1084 /// assert_eq!(utf32str!("𝄞music"), music);
1085 /// ```
1086 fn from_str() -> {}
1087
1088 /// Appends a given string slice onto the end of this string.
1089 ///
1090 /// # Examples
1091 ///
1092 /// ```
1093 /// # use widestring::utf32str;
1094 /// use widestring::Utf32String;
1095 /// let mut s = Utf32String::from_str("foo");
1096 /// s.push_utfstr(utf32str!("bar"));
1097 /// assert_eq!(utf32str!("foobar"), s);
1098 /// ```
1099 fn push_utfstr() -> {}
1100
1101 /// Returns a mutable reference to the contents of this string.
1102 ///
1103 /// # Safety
1104 ///
1105 /// This function is unsafe because it does not check that the values in the vector are valid
1106 /// UTF-16. If this constraint is violated, it may cause undefined beahvior with future
1107 /// users of the string, as it is assumed that this string is always valid UTF-16.
1108 fn as_mut_vec() -> {}
1109}
1110
1111impl Utf16String {
1112 /// Converts a [`u16`] vector of UTF-16 data to a string.
1113 ///
1114 /// Not all slices of [`u16`] values are valid to convert, since [`Utf16String`] requires that
1115 /// it is always valid UTF-16. This function checks to ensure that the values are valid UTF-16,
1116 /// and then does the conversion. This does not do any copying.
1117 ///
1118 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1119 /// the validity check, there is an unsafe version of this function,
1120 /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1121 /// the check.
1122 ///
1123 /// If you need a string slice, consider using [`Utf16Str::from_slice`] instead.
1124 ///
1125 /// The inverse of this method is [`into_vec`][Self::into_vec].
1126 ///
1127 /// # Errors
1128 ///
1129 /// Returns an error if the vector is not UTF-16 with a description as to why the provided
1130 /// vector is not UTF-16. The error will contain the original [`Vec`] that can be reclaimed with
1131 /// [`into_vec`][Utf16Error::into_vec].
1132 ///
1133 /// # Examples
1134 ///
1135 /// ```
1136 /// use widestring::Utf16String;
1137 ///
1138 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1139 /// let sparkle_heart = Utf16String::from_vec(sparkle_heart).unwrap();
1140 ///
1141 /// assert_eq!("💖", sparkle_heart);
1142 /// ```
1143 ///
1144 /// With incorrect values that return an error:
1145 ///
1146 /// ```
1147 /// use widestring::Utf16String;
1148 ///
1149 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1150 ///
1151 /// assert!(Utf16String::from_vec(sparkle_heart).is_err());
1152 /// ```
1153 pub fn from_vec(v: impl Into<Vec<u16>>) -> Result<Self, Utf16Error> {
1154 let v = validate_utf16_vec(v.into())?;
1155 Ok(unsafe { Self::from_vec_unchecked(v) })
1156 }
1157
1158 /// Converts a slice of [`u16`] data to a string, including invalid characters.
1159 ///
1160 /// Since the given [`u16`] slice may not be valid UTF-16, and [`Utf16String`] requires that
1161 /// it is always valid UTF-16, during the conversion this function replaces any invalid UTF-16
1162 /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1163 /// looks like this: �
1164 ///
1165 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1166 /// the conversion, there is an unsafe version of this function,
1167 /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1168 /// the checks.
1169 ///
1170 /// This function returns a [`Cow<'_, Utf16Str>`][std::borrow::Cow]. If the given slice is
1171 /// invalid UTF-16, then we need to insert our replacement characters which will change the size
1172 /// of the string, and hence, require an owned [`Utf16String`]. But if it's already valid
1173 /// UTF-16, we don't need a new allocation. This return type allows us to handle both cases.
1174 ///
1175 /// # Examples
1176 ///
1177 /// ```
1178 /// # use widestring::utf16str;
1179 /// use widestring::Utf16String;
1180 ///
1181 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1182 /// let sparkle_heart = Utf16String::from_slice_lossy(&sparkle_heart);
1183 ///
1184 /// assert_eq!(utf16str!("💖"), sparkle_heart);
1185 /// ```
1186 ///
1187 /// With incorrect values that return an error:
1188 ///
1189 /// ```
1190 /// # use widestring::utf16str;
1191 /// use widestring::Utf16String;
1192 ///
1193 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1194 /// let sparkle_heart = Utf16String::from_slice_lossy(&sparkle_heart);
1195 ///
1196 /// assert_eq!(utf16str!("\u{fffd}\u{0000}"), sparkle_heart);
1197 /// ```
1198 #[must_use]
1199 pub fn from_slice_lossy(s: &[u16]) -> Cow<'_, Utf16Str> {
1200 match validate_utf16(s) {
1201 // SAFETY: validated as UTF-16
1202 Ok(()) => Cow::Borrowed(unsafe { Utf16Str::from_slice_unchecked(s) }),
1203 Err(e) => {
1204 let mut v = Vec::with_capacity(s.len());
1205 // Valid up until index
1206 v.extend_from_slice(&s[..e.index()]);
1207 let mut index = e.index();
1208 let mut replacement_char = [0; 2];
1209 let replacement_char =
1210 char::REPLACEMENT_CHARACTER.encode_utf16(&mut replacement_char);
1211 while index < s.len() {
1212 let u = s[index];
1213 if is_utf16_surrogate(u) {
1214 if is_utf16_low_surrogate(u) || index + 1 >= s.len() {
1215 v.extend_from_slice(replacement_char);
1216 } else {
1217 let low = s[index + 1];
1218 if is_utf16_low_surrogate(low) {
1219 // Valid surrogate pair
1220 v.push(u);
1221 v.push(low);
1222 index += 1;
1223 } else {
1224 v.extend_from_slice(replacement_char);
1225 }
1226 }
1227 } else {
1228 v.push(u);
1229 }
1230 index += 1;
1231 }
1232 // SATEFY: Is now valid UTF-16 with replacement chars
1233 Cow::Owned(unsafe { Self::from_vec_unchecked(v) })
1234 }
1235 }
1236 }
1237
1238 /// Converts a wide string of undefined encoding to a UTF-16 string without checking that the
1239 /// string contains valid UTF-16.
1240 ///
1241 /// See the safe version, [`from_ustring`][Self::from_ustring], for more information.
1242 ///
1243 /// # Safety
1244 ///
1245 /// This function is unsafe because it does not check that the string passed to it is valid
1246 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
1247 /// [`Utf16String`] is always valid UTF-16.
1248 ///
1249 /// # Examples
1250 ///
1251 /// ```
1252 /// use widestring::{U16String, Utf16String};
1253 ///
1254 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1255 /// let sparkle_heart = U16String::from_vec(sparkle_heart);
1256 /// let sparkle_heart = unsafe { Utf16String::from_ustring_unchecked(sparkle_heart) };
1257 ///
1258 /// assert_eq!("💖", sparkle_heart);
1259 /// ```
1260 #[inline]
1261 #[must_use]
1262 pub unsafe fn from_ustring_unchecked(s: impl Into<crate::U16String>) -> Self {
1263 Self::from_vec_unchecked(s.into().into_vec())
1264 }
1265
1266 /// Converts a wide string of undefined encoding into a UTF-16 string.
1267 ///
1268 /// Not all strings with undefined encoding are valid to convert, since [`Utf16String`] requires
1269 /// that it is always valid UTF-16. This function checks to ensure that the string is valid
1270 /// UTF-16, and then does the conversion. This does not do any copying.
1271 ///
1272 /// If you are sure that the string is valid UTF-16, and you don't want to incur the overhead of
1273 /// the validity check, there is an unsafe version of this function,
1274 /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
1275 /// skips the check.
1276 ///
1277 /// If you need a string slice, consider using [`Utf16Str::from_ustr`] instead.
1278 ///
1279 /// # Errors
1280 ///
1281 /// Returns an error if the string is not UTF-16 with a description as to why the provided
1282 /// string is not UTF-16.
1283 ///
1284 /// # Examples
1285 ///
1286 /// ```
1287 /// use widestring::{U16String, Utf16String};
1288 ///
1289 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1290 /// let sparkle_heart = U16String::from_vec(sparkle_heart);
1291 /// let sparkle_heart = Utf16String::from_ustring(sparkle_heart).unwrap();
1292 ///
1293 /// assert_eq!("💖", sparkle_heart);
1294 /// ```
1295 ///
1296 /// With incorrect values that return an error:
1297 ///
1298 /// ```
1299 /// use widestring::{U16String, Utf16String};
1300 ///
1301 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1302 /// let sparkle_heart = U16String::from_vec(sparkle_heart); // Valid for a U16String
1303 ///
1304 /// assert!(Utf16String::from_ustring(sparkle_heart).is_err()); // But not for a Utf16String
1305 /// ```
1306 #[inline]
1307 pub fn from_ustring(s: impl Into<crate::U16String>) -> Result<Self, Utf16Error> {
1308 Self::from_vec(s.into().into_vec())
1309 }
1310
1311 /// Converts a wide string slice of undefined encoding of to a UTF-16 string, including invalid
1312 /// characters.
1313 ///
1314 /// Since the given string slice may not be valid UTF-16, and [`Utf16String`] requires that
1315 /// it is always valid UTF-16, during the conversion this function replaces any invalid UTF-16
1316 /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1317 /// looks like this: �
1318 ///
1319 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1320 /// the conversion, there is an unsafe version of this function,
1321 /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
1322 /// skips the checks.
1323 ///
1324 /// This function returns a [`Cow<'_, Utf16Str>`][std::borrow::Cow]. If the given slice is
1325 /// invalid UTF-16, then we need to insert our replacement characters which will change the size
1326 /// of the string, and hence, require an owned [`Utf16String`]. But if it's already valid
1327 /// UTF-16, we don't need a new allocation. This return type allows us to handle both cases.
1328 ///
1329 /// # Examples
1330 ///
1331 /// ```
1332 /// # use widestring::utf16str;
1333 /// use widestring::{U16Str, Utf16String};
1334 ///
1335 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1336 /// let sparkle_heart = U16Str::from_slice(&sparkle_heart);
1337 /// let sparkle_heart = Utf16String::from_ustr_lossy(sparkle_heart);
1338 ///
1339 /// assert_eq!(utf16str!("💖"), sparkle_heart);
1340 /// ```
1341 ///
1342 /// With incorrect values that return an error:
1343 ///
1344 /// ```
1345 /// # use widestring::utf16str;
1346 /// use widestring::{U16Str, Utf16String};
1347 ///
1348 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1349 /// let sparkle_heart = U16Str::from_slice(&sparkle_heart);
1350 /// let sparkle_heart = Utf16String::from_ustr_lossy(sparkle_heart);
1351 ///
1352 /// assert_eq!(utf16str!("\u{fffd}\u{0000}"), sparkle_heart);
1353 /// ```
1354 #[inline]
1355 #[must_use]
1356 pub fn from_ustr_lossy(s: &crate::U16Str) -> Cow<'_, Utf16Str> {
1357 Self::from_slice_lossy(s.as_slice())
1358 }
1359
1360 /// Converts a wide C string to a UTF-16 string without checking that the string contains
1361 /// valid UTF-16.
1362 ///
1363 /// The resulting string does *not* contain the nul terminator.
1364 ///
1365 /// See the safe version, [`from_ucstring`][Self::from_ucstring], for more information.
1366 ///
1367 /// # Safety
1368 ///
1369 /// This function is unsafe because it does not check that the string passed to it is valid
1370 /// UTF-16. If this constraint is violated, undefined behavior results as it is assumed the
1371 /// [`Utf16String`] is always valid UTF-16.
1372 ///
1373 /// # Examples
1374 ///
1375 /// ```
1376 /// use widestring::{U16CString, Utf16String};
1377 ///
1378 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1379 /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap();
1380 /// let sparkle_heart = unsafe { Utf16String::from_ucstring_unchecked(sparkle_heart) };
1381 ///
1382 /// assert_eq!("💖", sparkle_heart);
1383 /// ```
1384 #[inline]
1385 #[must_use]
1386 pub unsafe fn from_ucstring_unchecked(s: impl Into<crate::U16CString>) -> Self {
1387 Self::from_vec_unchecked(s.into().into_vec())
1388 }
1389
1390 /// Converts a wide C string into a UTF-16 string.
1391 ///
1392 /// The resulting string does *not* contain the nul terminator.
1393 ///
1394 /// Not all wide C strings are valid to convert, since [`Utf16String`] requires that
1395 /// it is always valid UTF-16. This function checks to ensure that the string is valid UTF-16,
1396 /// and then does the conversion. This does not do any copying.
1397 ///
1398 /// If you are sure that the string is valid UTF-16, and you don't want to incur the overhead of
1399 /// the validity check, there is an unsafe version of this function,
1400 /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
1401 /// skips the check.
1402 ///
1403 /// If you need a string slice, consider using [`Utf16Str::from_ucstr`] instead.
1404 ///
1405 /// # Errors
1406 ///
1407 /// Returns an error if the string is not UTF-16 with a description as to why the provided
1408 /// string is not UTF-16.
1409 ///
1410 /// # Examples
1411 ///
1412 /// ```
1413 /// use widestring::{U16CString, Utf16String};
1414 ///
1415 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // Raw surrogate pair
1416 /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap();
1417 /// let sparkle_heart = Utf16String::from_ucstring(sparkle_heart).unwrap();
1418 ///
1419 /// assert_eq!("💖", sparkle_heart);
1420 /// ```
1421 ///
1422 /// With incorrect values that return an error:
1423 ///
1424 /// ```
1425 /// use widestring::{U16CString, Utf16String};
1426 ///
1427 /// let sparkle_heart = vec![0xd83d]; // This is an invalid unpaired surrogate
1428 /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap(); // Valid for a U16CString
1429 ///
1430 /// assert!(Utf16String::from_ucstring(sparkle_heart).is_err()); // But not for a Utf16String
1431 /// ```
1432 #[inline]
1433 pub fn from_ucstring(s: impl Into<crate::U16CString>) -> Result<Self, Utf16Error> {
1434 Self::from_vec(s.into().into_vec())
1435 }
1436
1437 /// Converts a wide C string slice of to a UTF-16 string, including invalid characters.
1438 ///
1439 /// The resulting string does *not* contain the nul terminator.
1440 ///
1441 /// Since the given string slice may not be valid UTF-16, and [`Utf16String`] requires that
1442 /// it is always valid UTF-16, during the conversion this function replaces any invalid UTF-16
1443 /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1444 /// looks like this: �
1445 ///
1446 /// If you are sure that the slice is valid UTF-16, and you don't want to incur the overhead of
1447 /// the conversion, there is an unsafe version of this function,
1448 /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
1449 /// skips the checks.
1450 ///
1451 /// This function returns a [`Cow<'_, Utf16Str>`][std::borrow::Cow]. If the given slice is
1452 /// invalid UTF-16, then we need to insert our replacement characters which will change the size
1453 /// of the string, and hence, require an owned [`Utf16String`]. But if it's already valid
1454 /// UTF-16, we don't need a new allocation. This return type allows us to handle both cases.
1455 ///
1456 /// # Examples
1457 ///
1458 /// ```
1459 /// # use widestring::utf16str;
1460 /// use widestring::{U16CStr, Utf16String};
1461 ///
1462 /// let sparkle_heart = vec![0xd83d, 0xdc96, 0x0]; // Raw surrogate pair
1463 /// let sparkle_heart = U16CStr::from_slice(&sparkle_heart).unwrap();
1464 /// let sparkle_heart = Utf16String::from_ucstr_lossy(sparkle_heart);
1465 ///
1466 /// assert_eq!(utf16str!("💖"), sparkle_heart);
1467 /// ```
1468 ///
1469 /// With incorrect values that return an error:
1470 ///
1471 /// ```
1472 /// # use widestring::utf16str;
1473 /// use widestring::{U16CStr, Utf16String};
1474 ///
1475 /// let sparkle_heart = vec![0xd83d, 0x0]; // This is an invalid unpaired surrogate
1476 /// let sparkle_heart = U16CStr::from_slice(&sparkle_heart).unwrap();
1477 /// let sparkle_heart = Utf16String::from_ucstr_lossy(sparkle_heart);
1478 ///
1479 /// assert_eq!(utf16str!("\u{fffd}"), sparkle_heart);
1480 /// ```
1481 #[inline]
1482 #[must_use]
1483 pub fn from_ucstr_lossy(s: &crate::U16CStr) -> Cow<'_, Utf16Str> {
1484 Self::from_slice_lossy(s.as_slice())
1485 }
1486
1487 /// Appends the given [`char`] to the end of this string.
1488 ///
1489 /// # Examples
1490 ///
1491 /// ```
1492 /// use widestring::Utf16String;
1493 /// let mut s = Utf16String::from_str("abc");
1494 ///
1495 /// s.push('1');
1496 /// s.push('2');
1497 /// s.push('3');
1498 ///
1499 /// assert_eq!("abc123", s);
1500 /// ```
1501 #[inline]
1502 pub fn push(&mut self, ch: char) {
1503 let mut buf = [0; 2];
1504 self.inner.extend_from_slice(ch.encode_utf16(&mut buf))
1505 }
1506
1507 /// Shortens this string to the specified length.
1508 ///
1509 /// If `new_len` is greater than the string's current length, this has no effect.
1510 ///
1511 /// Note that this method has no effect on the allocated capacity of the string.
1512 ///
1513 /// # Panics
1514 ///
1515 /// Panics if `new_len` does not lie on a [`char`] boundary.
1516 ///
1517 /// # Examples
1518 ///
1519 /// ```
1520 /// use widestring::Utf16String;
1521 /// let mut s = Utf16String::from_str("hello");
1522 /// s.truncate(2);
1523 /// assert_eq!("he", s);
1524 /// ```
1525 #[inline]
1526 pub fn truncate(&mut self, new_len: usize) {
1527 if new_len <= self.len() {
1528 assert!(self.is_char_boundary(new_len));
1529 self.inner.truncate(new_len)
1530 }
1531 }
1532
1533 /// Removes the last character from the string buffer and returns it.
1534 ///
1535 /// Returns [`None`] if this string is empty.
1536 ///
1537 /// # Examples
1538 ///
1539 /// ```
1540 /// use widestring::Utf16String;
1541 /// let mut s = Utf16String::from_str("foo𝄞");
1542 ///
1543 /// assert_eq!(s.pop(), Some('𝄞'));
1544 /// assert_eq!(s.pop(), Some('o'));
1545 /// assert_eq!(s.pop(), Some('o'));
1546 /// assert_eq!(s.pop(), Some('f'));
1547 ///
1548 /// assert_eq!(s.pop(), None);
1549 /// ```
1550 pub fn pop(&mut self) -> Option<char> {
1551 let c = self.inner.pop();
1552 if let Some(c) = c {
1553 if is_utf16_low_surrogate(c) {
1554 let high = self.inner.pop().unwrap();
1555 // SAFETY: string is always valid UTF-16, so pair is valid
1556 Some(unsafe { decode_utf16_surrogate_pair(high, c) })
1557 } else {
1558 // SAFETY: not a surrogate
1559 Some(unsafe { char::from_u32_unchecked(c as u32) })
1560 }
1561 } else {
1562 None
1563 }
1564 }
1565
1566 /// Removes a [`char`] from this string at an offset and returns it.
1567 ///
1568 /// This is an _O(n)_ operation, as it requires copying every element in the buffer.
1569 ///
1570 /// # Panics
1571 ///
1572 /// Panics if `idx` is larger than or equal to the string's length, or if it does not lie on a
1573 /// [`char`] boundary.
1574 ///
1575 /// # Examples
1576 ///
1577 /// ```
1578 /// use widestring::Utf16String;
1579 /// let mut s = Utf16String::from_str("𝄞foo");
1580 ///
1581 /// assert_eq!(s.remove(0), '𝄞');
1582 /// assert_eq!(s.remove(1), 'o');
1583 /// assert_eq!(s.remove(0), 'f');
1584 /// assert_eq!(s.remove(0), 'o');
1585 /// ```
1586 #[inline]
1587 pub fn remove(&mut self, idx: usize) -> char {
1588 let c = self[idx..].chars().next().unwrap();
1589 let next = idx + c.len_utf16();
1590 let len = self.len();
1591 unsafe {
1592 ptr::copy(
1593 self.inner.as_ptr().add(next),
1594 self.inner.as_mut_ptr().add(idx),
1595 len - next,
1596 );
1597 self.inner.set_len(len - (next - idx));
1598 }
1599 c
1600 }
1601
1602 /// Retains only the characters specified by the predicate.
1603 ///
1604 /// In other words, remove all characters `c` such that `f(c)` returns `false`. This method
1605 /// operates in place, visiting each character exactly once in the original order, and preserves
1606 /// the order of the retained characters.
1607 ///
1608 /// # Examples
1609 ///
1610 /// ```
1611 /// use widestring::Utf16String;
1612 /// let mut s = Utf16String::from_str("f_o_ob_ar");
1613 ///
1614 /// s.retain(|c| c != '_');
1615 ///
1616 /// assert_eq!(s, "foobar");
1617 /// ```
1618 ///
1619 /// Because the elements are visited exactly once in the original order, external state may be
1620 /// used to decide which elements to keep.
1621 ///
1622 /// ```
1623 /// use widestring::Utf16String;
1624 /// let mut s = Utf16String::from_str("abcde");
1625 /// let keep = [false, true, true, false, true];
1626 /// let mut iter = keep.iter();
1627 /// s.retain(|_| *iter.next().unwrap());
1628 /// assert_eq!(s, "bce");
1629 /// ```
1630 pub fn retain<F>(&mut self, mut f: F)
1631 where
1632 F: FnMut(char) -> bool,
1633 {
1634 let mut index = 0;
1635 while index < self.len() {
1636 // SAFETY: always in bounds and incremented by len_utf16 only
1637 let c = unsafe { self.get_unchecked(index..) }
1638 .chars()
1639 .next()
1640 .unwrap();
1641 if !f(c) {
1642 self.inner.drain(index..index + c.len_utf16());
1643 } else {
1644 index += c.len_utf16();
1645 }
1646 }
1647 }
1648
1649 /// Inserts a character into this string at an offset.
1650 ///
1651 /// This is an _O(n)_ operation as it requires copying every element in the buffer.
1652 ///
1653 /// # Panics
1654 ///
1655 /// Panics if `idx` is larger than the string's length, or if it does not lie on a [`char`]
1656 /// boundary.
1657 ///
1658 /// # Examples
1659 ///
1660 /// ```
1661 /// use widestring::Utf16String;
1662 /// let mut s = Utf16String::with_capacity(5);
1663 ///
1664 /// s.insert(0, '𝄞');
1665 /// s.insert(0, 'f');
1666 /// s.insert(1, 'o');
1667 /// s.insert(4, 'o');
1668 ///
1669 /// assert_eq!("fo𝄞o", s);
1670 /// ```
1671 #[inline]
1672 pub fn insert(&mut self, idx: usize, ch: char) {
1673 assert!(self.is_char_boundary(idx));
1674 let mut bits = [0; 2];
1675 let bits = ch.encode_utf16(&mut bits);
1676
1677 unsafe {
1678 self.insert_slice(idx, bits);
1679 }
1680 }
1681
1682 /// Inserts a UTF-16 string slice into this string at an offset.
1683 ///
1684 /// This is an _O(n)_ operation as it requires copying every element in the buffer.
1685 ///
1686 /// # Panics
1687 ///
1688 /// Panics if `idx` is larger than the string's length, or if it does not lie on a [`char`]
1689 /// boundary.
1690 ///
1691 /// # Examples
1692 ///
1693 /// ```
1694 /// # use widestring::utf16str;
1695 /// use widestring::Utf16String;
1696 /// let mut s = Utf16String::from_str("bar");
1697 ///
1698 /// s.insert_utfstr(0, utf16str!("foo"));
1699 ///
1700 /// assert_eq!("foobar", s);
1701 /// ```
1702 #[inline]
1703 pub fn insert_utfstr(&mut self, idx: usize, string: &Utf16Str) {
1704 assert!(self.is_char_boundary(idx));
1705
1706 unsafe {
1707 self.insert_slice(idx, string.as_slice());
1708 }
1709 }
1710
1711 /// Splits the string into two at the given index.
1712 ///
1713 /// Returns a newly allocated string. `self` contains elements [0, at), and the returned string
1714 /// contains elements [at, len). `at` must be on the boundary of a UTF-16 code point.
1715 ///
1716 /// Note that the capacity of `self` does not change.
1717 ///
1718 /// # Panics
1719 ///
1720 /// Panics if `at` is not on a UTF-16 code point boundary, or if it is beyond the last code
1721 /// point of the string.
1722 ///
1723 /// # Examples
1724 ///
1725 /// ```
1726 /// use widestring::Utf16String;
1727 /// let mut hello = Utf16String::from_str("Hello, World!");
1728 /// let world = hello.split_off(7);
1729 /// assert_eq!(hello, "Hello, ");
1730 /// assert_eq!(world, "World!");
1731 /// ```
1732 #[inline]
1733 #[must_use]
1734 pub fn split_off(&mut self, at: usize) -> Self {
1735 assert!(self.is_char_boundary(at));
1736 unsafe { Self::from_vec_unchecked(self.inner.split_off(at)) }
1737 }
1738
1739 /// Creates a draining iterator that removes the specified range in the string and yields the
1740 /// removed [`char`]s.
1741 ///
1742 /// Note: The element range is removed even if the iterator is not consumed until the end.
1743 ///
1744 /// # Panics
1745 ///
1746 /// Panics if the starting point or end point do not lie on a [`char`] boundary, or if they're
1747 /// out of bounds.
1748 ///
1749 /// # Examples
1750 ///
1751 /// Basic usage:
1752 ///
1753 /// ```
1754 /// use widestring::Utf16String;
1755 /// let mut s = Utf16String::from_str("α is alpha, β is beta");
1756 /// let beta_offset = 12;
1757 ///
1758 /// // Remove the range up until the β from the string
1759 /// let t: Utf16String = s.drain(..beta_offset).collect();
1760 /// assert_eq!(t, "α is alpha, ");
1761 /// assert_eq!(s, "β is beta");
1762 ///
1763 /// // A full range clears the string
1764 /// s.drain(..);
1765 /// assert_eq!(s, "");
1766 /// ```
1767 pub fn drain<R>(&mut self, range: R) -> DrainUtf16<'_>
1768 where
1769 R: RangeBounds<usize>,
1770 {
1771 // WARNING: Using range again would be unsound
1772 // TODO: replace with core::slice::range when it is stabilized
1773 let core::ops::Range { start, end } = crate::range(range, ..self.len());
1774 assert!(self.is_char_boundary(start));
1775 assert!(self.is_char_boundary(end));
1776
1777 // Take out two simultaneous borrows. The self_ptr won't be accessed
1778 // until iteration is over, in Drop.
1779 let self_ptr: *mut _ = self;
1780 // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks.
1781 let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
1782
1783 DrainUtf16 {
1784 start,
1785 end,
1786 iter: chars_iter,
1787 string: self_ptr,
1788 }
1789 }
1790
1791 /// Removes the specified range in the string, and replaces it with the given string.
1792 ///
1793 /// The given string doesn't need to be the same length as the range.
1794 ///
1795 /// # Panics
1796 ///
1797 /// Panics if the starting point or end point do not lie on a [`char`] boundary, or if they're
1798 /// out of bounds.
1799 ///
1800 /// # Examples
1801 ///
1802 /// Basic usage:
1803 ///
1804 /// ```
1805 /// use widestring::{utf16str, Utf16String};
1806 /// let mut s = Utf16String::from_str("α is alpha, β is beta");
1807 /// let beta_offset = 12;
1808 ///
1809 /// // Replace the range up until the β from the string
1810 /// s.replace_range(..beta_offset, utf16str!("Α is capital alpha; "));
1811 /// assert_eq!(s, "Α is capital alpha; β is beta");
1812 /// ```
1813 pub fn replace_range<R>(&mut self, range: R, replace_with: &Utf16Str)
1814 where
1815 R: RangeBounds<usize>,
1816 {
1817 use core::ops::Bound::*;
1818
1819 // WARNING: Using range again would be unsound
1820 let start = range.start_bound();
1821 match start {
1822 Included(&n) => assert!(self.is_char_boundary(n)),
1823 Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
1824 Unbounded => {}
1825 };
1826 // WARNING: Inlining this variable would be unsound
1827 let end = range.end_bound();
1828 match end {
1829 Included(&n) => assert!(self.is_char_boundary(n + 1)),
1830 Excluded(&n) => assert!(self.is_char_boundary(n)),
1831 Unbounded => {}
1832 };
1833
1834 // Using `range` again would be unsound
1835 // We assume the bounds reported by `range` remain the same, but
1836 // an adversarial implementation could change between calls
1837 self.inner
1838 .splice((start, end), replace_with.as_slice().iter().copied());
1839 }
1840}
1841
1842impl Utf32String {
1843 /// Converts a [`u32`] vector of UTF-32 data to a string.
1844 ///
1845 /// Not all slices of [`u32`] values are valid to convert, since [`Utf32String`] requires that
1846 /// it is always valid UTF-32. This function checks to ensure that the values are valid UTF-32,
1847 /// and then does the conversion. This does not do any copying.
1848 ///
1849 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1850 /// the validity check, there is an unsafe version of this function,
1851 /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1852 /// the check.
1853 ///
1854 /// If you need a string slice, consider using [`Utf32Str::from_slice`] instead.
1855 ///
1856 /// The inverse of this method is [`into_vec`][Self::into_vec].
1857 ///
1858 /// # Errors
1859 ///
1860 /// Returns an error if the vector is not UTF-32 with a description as to why the provided
1861 /// vector is not UTF-32. The error will contain the original [`Vec`] that can be reclaimed with
1862 /// [`into_vec`][Utf32Error::into_vec].
1863 ///
1864 /// # Examples
1865 ///
1866 /// ```
1867 /// use widestring::Utf32String;
1868 ///
1869 /// let sparkle_heart = vec![0x1f496];
1870 /// let sparkle_heart = Utf32String::from_vec(sparkle_heart).unwrap();
1871 ///
1872 /// assert_eq!("💖", sparkle_heart);
1873 /// ```
1874 ///
1875 /// With incorrect values that return an error:
1876 ///
1877 /// ```
1878 /// use widestring::Utf32String;
1879 ///
1880 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1881 ///
1882 /// assert!(Utf32String::from_vec(sparkle_heart).is_err());
1883 /// ```
1884 pub fn from_vec(v: impl Into<Vec<u32>>) -> Result<Self, Utf32Error> {
1885 let v = validate_utf32_vec(v.into())?;
1886 Ok(unsafe { Self::from_vec_unchecked(v) })
1887 }
1888
1889 /// Converts a slice of [`u32`] data to a string, including invalid characters.
1890 ///
1891 /// Since the given [`u32`] slice may not be valid UTF-32, and [`Utf32String`] requires that
1892 /// it is always valid UTF-32, during the conversion this function replaces any invalid UTF-32
1893 /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
1894 /// looks like this: �
1895 ///
1896 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
1897 /// the conversion, there is an unsafe version of this function,
1898 /// [`from_vec_unchecked`][Self::from_vec_unchecked], which has the same behavior but skips
1899 /// the checks.
1900 ///
1901 /// This function returns a [`Cow<'_, Utf32Str>`][std::borrow::Cow]. If the given slice is
1902 /// invalid UTF-32, then we need to insert our replacement characters which will change the size
1903 /// of the string, and hence, require an owned [`Utf32String`]. But if it's already valid
1904 /// UTF-32, we don't need a new allocation. This return type allows us to handle both cases.
1905 ///
1906 /// # Examples
1907 ///
1908 /// ```
1909 /// # use widestring::utf32str;
1910 /// use widestring::Utf32String;
1911 ///
1912 /// let sparkle_heart = vec![0x1f496];
1913 /// let sparkle_heart = Utf32String::from_slice_lossy(&sparkle_heart);
1914 ///
1915 /// assert_eq!(utf32str!("💖"), sparkle_heart);
1916 /// ```
1917 ///
1918 /// With incorrect values that return an error:
1919 ///
1920 /// ```
1921 /// # use widestring::utf32str;
1922 /// use widestring::Utf32String;
1923 ///
1924 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
1925 /// let sparkle_heart = Utf32String::from_slice_lossy(&sparkle_heart);
1926 ///
1927 /// assert_eq!(utf32str!("\u{fffd}\u{fffd}"), sparkle_heart);
1928 /// ```
1929 #[must_use]
1930 pub fn from_slice_lossy(s: &[u32]) -> Cow<'_, Utf32Str> {
1931 match validate_utf32(s) {
1932 // SAFETY: validated as UTF-32
1933 Ok(()) => Cow::Borrowed(unsafe { Utf32Str::from_slice_unchecked(s) }),
1934 Err(e) => {
1935 let mut v = Vec::with_capacity(s.len());
1936 // Valid up until index
1937 v.extend_from_slice(&s[..e.index()]);
1938 for u in s[e.index()..].iter().copied() {
1939 if char::from_u32(u).is_some() {
1940 v.push(u);
1941 } else {
1942 v.push(char::REPLACEMENT_CHARACTER as u32);
1943 }
1944 }
1945 // SATEFY: Is now valid UTF-32 with replacement chars
1946 Cow::Owned(unsafe { Self::from_vec_unchecked(v) })
1947 }
1948 }
1949 }
1950
1951 /// Converts a wide string of undefined encoding to a UTF-32 string without checking that the
1952 /// string contains valid UTF-32.
1953 ///
1954 /// See the safe version, [`from_ustring`][Self::from_ustring], for more information.
1955 ///
1956 /// # Safety
1957 ///
1958 /// This function is unsafe because it does not check that the string passed to it is valid
1959 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
1960 /// [`Utf32String`] is always valid UTF-32.
1961 ///
1962 /// # Examples
1963 ///
1964 /// ```
1965 /// use widestring::{U32String, Utf32String};
1966 ///
1967 /// let sparkle_heart = vec![0x1f496];
1968 /// let sparkle_heart = U32String::from_vec(sparkle_heart);
1969 /// let sparkle_heart = unsafe { Utf32String::from_ustring_unchecked(sparkle_heart) };
1970 ///
1971 /// assert_eq!("💖", sparkle_heart);
1972 /// ```
1973 #[inline]
1974 #[must_use]
1975 pub unsafe fn from_ustring_unchecked(s: impl Into<crate::U32String>) -> Self {
1976 Self::from_vec_unchecked(s.into().into_vec())
1977 }
1978
1979 /// Converts a wide string of undefined encoding string into a UTF-32 string.
1980 ///
1981 /// Not all strings of undefined encoding are valid to convert, since [`Utf32String`] requires
1982 /// that it is always valid UTF-32. This function checks to ensure that the string is valid
1983 /// UTF-32, and then does the conversion. This does not do any copying.
1984 ///
1985 /// If you are sure that the string is valid UTF-32, and you don't want to incur the overhead of
1986 /// the validity check, there is an unsafe version of this function,
1987 /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
1988 /// skips the check.
1989 ///
1990 /// If you need a string slice, consider using [`Utf32Str::from_ustr`] instead.
1991 ///
1992 /// # Errors
1993 ///
1994 /// Returns an error if the string is not UTF-32 with a description as to why the provided
1995 /// string is not UTF-32.
1996 ///
1997 /// # Examples
1998 ///
1999 /// ```
2000 /// use widestring::{U32String, Utf32String};
2001 ///
2002 /// let sparkle_heart = vec![0x1f496];
2003 /// let sparkle_heart = U32String::from_vec(sparkle_heart);
2004 /// let sparkle_heart = Utf32String::from_ustring(sparkle_heart).unwrap();
2005 ///
2006 /// assert_eq!("💖", sparkle_heart);
2007 /// ```
2008 ///
2009 /// With incorrect values that return an error:
2010 ///
2011 /// ```
2012 /// use widestring::{U32String, Utf32String};
2013 ///
2014 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
2015 /// let sparkle_heart = U32String::from_vec(sparkle_heart); // Valid for a U32String
2016 ///
2017 /// assert!(Utf32String::from_ustring(sparkle_heart).is_err()); // But not for a Utf32String
2018 /// ```
2019 #[inline]
2020 pub fn from_ustring(s: impl Into<crate::U32String>) -> Result<Self, Utf32Error> {
2021 Self::from_vec(s.into().into_vec())
2022 }
2023
2024 /// Converts a wide string slice of undefined encoding to a UTF-32 string, including invalid
2025 /// characters.
2026 ///
2027 /// Since the given string slice may not be valid UTF-32, and [`Utf32String`] requires that
2028 /// it is always valid UTF-32, during the conversion this function replaces any invalid UTF-32
2029 /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
2030 /// looks like this: �
2031 ///
2032 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
2033 /// the conversion, there is an unsafe version of this function,
2034 /// [`from_ustring_unchecked`][Self::from_ustring_unchecked], which has the same behavior but
2035 /// skips the checks.
2036 ///
2037 /// This function returns a [`Cow<'_, Utf32Str>`][std::borrow::Cow]. If the given slice is
2038 /// invalid UTF-32, then we need to insert our replacement characters which will change the size
2039 /// of the string, and hence, require an owned [`Utf32String`]. But if it's already valid
2040 /// UTF-32, we don't need a new allocation. This return type allows us to handle both cases.
2041 ///
2042 /// # Examples
2043 ///
2044 /// ```
2045 /// # use widestring::utf32str;
2046 /// use widestring::{U32Str, Utf32String};
2047 ///
2048 /// let sparkle_heart = vec![0x1f496];
2049 /// let sparkle_heart = U32Str::from_slice(&sparkle_heart);
2050 /// let sparkle_heart = Utf32String::from_ustr_lossy(sparkle_heart);
2051 ///
2052 /// assert_eq!(utf32str!("💖"), sparkle_heart);
2053 /// ```
2054 ///
2055 /// With incorrect values that return an error:
2056 ///
2057 /// ```
2058 /// # use widestring::utf32str;
2059 /// use widestring::{U32Str, Utf32String};
2060 ///
2061 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
2062 /// let sparkle_heart = U32Str::from_slice(&sparkle_heart);
2063 /// let sparkle_heart = Utf32String::from_ustr_lossy(sparkle_heart);
2064 ///
2065 /// assert_eq!(utf32str!("\u{fffd}\u{fffd}"), sparkle_heart);
2066 /// ```
2067 #[inline]
2068 #[must_use]
2069 pub fn from_ustr_lossy(s: &crate::U32Str) -> Cow<'_, Utf32Str> {
2070 Self::from_slice_lossy(s.as_slice())
2071 }
2072
2073 /// Converts a wide C string to a UTF-32 string without checking that the string contains
2074 /// valid UTF-32.
2075 ///
2076 /// The resulting string does *not* contain the nul terminator.
2077 ///
2078 /// See the safe version, [`from_ucstring`][Self::from_ucstring], for more information.
2079 ///
2080 /// # Safety
2081 ///
2082 /// This function is unsafe because it does not check that the string passed to it is valid
2083 /// UTF-32. If this constraint is violated, undefined behavior results as it is assumed the
2084 /// [`Utf32String`] is always valid UTF-32.
2085 ///
2086 /// # Examples
2087 ///
2088 /// ```
2089 /// use widestring::{U32CString, Utf32String};
2090 ///
2091 /// let sparkle_heart = vec![0x1f496];
2092 /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap();
2093 /// let sparkle_heart = unsafe { Utf32String::from_ucstring_unchecked(sparkle_heart) };
2094 ///
2095 /// assert_eq!("💖", sparkle_heart);
2096 /// ```
2097 #[inline]
2098 #[must_use]
2099 pub unsafe fn from_ucstring_unchecked(s: impl Into<crate::U32CString>) -> Self {
2100 Self::from_vec_unchecked(s.into().into_vec())
2101 }
2102
2103 /// Converts a wide C string into a UTF-32 string.
2104 ///
2105 /// The resulting string does *not* contain the nul terminator.
2106 ///
2107 /// Not all wide C strings are valid to convert, since [`Utf32String`] requires that
2108 /// it is always valid UTF-32. This function checks to ensure that the string is valid UTF-32,
2109 /// and then does the conversion. This does not do any copying.
2110 ///
2111 /// If you are sure that the string is valid UTF-32, and you don't want to incur the overhead of
2112 /// the validity check, there is an unsafe version of this function,
2113 /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
2114 /// skips the check.
2115 ///
2116 /// If you need a string slice, consider using [`Utf32Str::from_ucstr`] instead.
2117 ///
2118 /// # Errors
2119 ///
2120 /// Returns an error if the string is not UTF-32 with a description as to why the provided
2121 /// string is not UTF-32.
2122 ///
2123 /// # Examples
2124 ///
2125 /// ```
2126 /// use widestring::{U32CString, Utf32String};
2127 ///
2128 /// let sparkle_heart = vec![0x1f496];
2129 /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap();
2130 /// let sparkle_heart = Utf32String::from_ucstring(sparkle_heart).unwrap();
2131 ///
2132 /// assert_eq!("💖", sparkle_heart);
2133 /// ```
2134 ///
2135 /// With incorrect values that return an error:
2136 ///
2137 /// ```
2138 /// use widestring::{U32CString, Utf32String};
2139 ///
2140 /// let sparkle_heart = vec![0xd83d, 0xdc96]; // UTF-16 surrogates are invalid
2141 /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap(); // Valid for a U32CString
2142 ///
2143 /// assert!(Utf32String::from_ucstring(sparkle_heart).is_err()); // But not for a Utf32String
2144 /// ```
2145 #[inline]
2146 pub fn from_ucstring(s: impl Into<crate::U32CString>) -> Result<Self, Utf32Error> {
2147 Self::from_vec(s.into().into_vec())
2148 }
2149
2150 /// Converts a wide C string slice of to a UTF-32 string, including invalid characters.
2151 ///
2152 /// The resulting string does *not* contain the nul terminator.
2153 ///
2154 /// Since the given string slice may not be valid UTF-32, and [`Utf32String`] requires that
2155 /// it is always valid UTF-32, during the conversion this function replaces any invalid UTF-32
2156 /// sequences with [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which
2157 /// looks like this: �
2158 ///
2159 /// If you are sure that the slice is valid UTF-32, and you don't want to incur the overhead of
2160 /// the conversion, there is an unsafe version of this function,
2161 /// [`from_ucstring_unchecked`][Self::from_ucstring_unchecked], which has the same behavior but
2162 /// skips the checks.
2163 ///
2164 /// This function returns a [`Cow<'_, Utf32Str>`][std::borrow::Cow]. If the given slice is
2165 /// invalid UTF-32, then we need to insert our replacement characters which will change the size
2166 /// of the string, and hence, require an owned [`Utf32String`]. But if it's already valid
2167 /// UTF-32, we don't need a new allocation. This return type allows us to handle both cases.
2168 ///
2169 /// # Examples
2170 ///
2171 /// ```
2172 /// # use widestring::utf32str;
2173 /// use widestring::{U32CStr, Utf32String};
2174 ///
2175 /// let sparkle_heart = vec![0x1f496, 0x0];
2176 /// let sparkle_heart = U32CStr::from_slice(&sparkle_heart).unwrap();
2177 /// let sparkle_heart = Utf32String::from_ucstr_lossy(sparkle_heart);
2178 ///
2179 /// assert_eq!(utf32str!("💖"), sparkle_heart);
2180 /// ```
2181 ///
2182 /// With incorrect values that return an error:
2183 ///
2184 /// ```
2185 /// # use widestring::utf32str;
2186 /// use widestring::{U32CStr, Utf32String};
2187 ///
2188 /// let sparkle_heart = vec![0xd83d, 0xdc96, 0x0]; // UTF-16 surrogates are invalid
2189 /// let sparkle_heart = U32CStr::from_slice(&sparkle_heart).unwrap();
2190 /// let sparkle_heart = Utf32String::from_ucstr_lossy(sparkle_heart);
2191 ///
2192 /// assert_eq!(utf32str!("\u{fffd}\u{fffd}"), sparkle_heart);
2193 /// ```
2194 #[inline]
2195 #[must_use]
2196 pub fn from_ucstr_lossy(s: &crate::U32CStr) -> Cow<'_, Utf32Str> {
2197 Self::from_slice_lossy(s.as_slice())
2198 }
2199
2200 /// Converts a vector of [`char`]s into a UTF-32 string.
2201 ///
2202 /// Since [`char`]s are always valid UTF-32, this is infallible and efficient.
2203 ///
2204 /// If you need a string slice, consider using [`Utf32Str::from_char_slice`] instead.
2205 ///
2206 /// # Examples
2207 ///
2208 /// ```
2209 /// use widestring::{U32CString, Utf32String};
2210 ///
2211 /// let sparkle_heart = vec!['💖'];
2212 /// let sparkle_heart = Utf32String::from_chars(sparkle_heart);
2213 ///
2214 /// assert_eq!("💖", sparkle_heart);
2215 /// ```
2216 #[inline]
2217 #[must_use]
2218 pub fn from_chars(s: impl Into<Vec<char>>) -> Self {
2219 // SAFETY: Char slices are always valid UTF-32
2220 // TODO: replace mem:transmute when Vec::into_raw_parts is stabilized
2221 // Clippy reports this is unsound due to different sized types; but the sizes are the same
2222 // size. Still best to swap to Vec::into_raw_parts asap.
2223 #[allow(clippy::unsound_collection_transmute)]
2224 unsafe {
2225 let vec: Vec<u32> = mem::transmute(s.into());
2226 Self::from_vec_unchecked(vec)
2227 }
2228 }
2229
2230 /// Appends the given [`char`] to the end of this string.
2231 ///
2232 /// # Examples
2233 ///
2234 /// ```
2235 /// use widestring::Utf32String;
2236 /// let mut s = Utf32String::from_str("abc");
2237 ///
2238 /// s.push('1');
2239 /// s.push('2');
2240 /// s.push('3');
2241 ///
2242 /// assert_eq!("abc123", s);
2243 /// ```
2244 #[inline]
2245 pub fn push(&mut self, ch: char) {
2246 self.inner.push(ch.into())
2247 }
2248
2249 /// Shortens this string to the specified length.
2250 ///
2251 /// If `new_len` is greater than the string's current length, this has no effect.
2252 ///
2253 /// Note that this method has no effect on the allocated capacity of the string.
2254 ///
2255 /// # Examples
2256 ///
2257 /// ```
2258 /// use widestring::Utf32String;
2259 /// let mut s = Utf32String::from_str("hello");
2260 /// s.truncate(2);
2261 /// assert_eq!("he", s);
2262 /// ```
2263 #[inline]
2264 pub fn truncate(&mut self, new_len: usize) {
2265 self.inner.truncate(new_len)
2266 }
2267
2268 /// Removes the last character from the string buffer and returns it.
2269 ///
2270 /// Returns [`None`] if this string is empty.
2271 ///
2272 /// # Examples
2273 ///
2274 /// ```
2275 /// use widestring::Utf32String;
2276 /// let mut s = Utf32String::from_str("foo");
2277 ///
2278 /// assert_eq!(s.pop(), Some('o'));
2279 /// assert_eq!(s.pop(), Some('o'));
2280 /// assert_eq!(s.pop(), Some('f'));
2281 ///
2282 /// assert_eq!(s.pop(), None);
2283 /// ```
2284 #[inline]
2285 pub fn pop(&mut self) -> Option<char> {
2286 // SAFETY: String is already valid UTF-32
2287 self.inner
2288 .pop()
2289 .map(|c| unsafe { core::char::from_u32_unchecked(c) })
2290 }
2291
2292 /// Removes a [`char`] from this string at an offset and returns it.
2293 ///
2294 /// This is an _O(n)_ operation, as it requires copying every element in the buffer.
2295 ///
2296 /// # Panics
2297 ///
2298 /// Panics if `idx` is larger than or equal to the string's length.
2299 ///
2300 /// # Examples
2301 ///
2302 /// ```
2303 /// use widestring::Utf32String;
2304 /// let mut s = Utf32String::from_str("foo");
2305 ///
2306 /// assert_eq!(s.remove(1), 'o');
2307 /// assert_eq!(s.remove(0), 'f');
2308 /// assert_eq!(s.remove(0), 'o');
2309 /// ```
2310 #[inline]
2311 pub fn remove(&mut self, idx: usize) -> char {
2312 let next = idx + 1;
2313 let len = self.len();
2314 unsafe {
2315 let c = core::char::from_u32_unchecked(self.inner[idx]);
2316 ptr::copy(
2317 self.inner.as_ptr().add(next),
2318 self.inner.as_mut_ptr().add(idx),
2319 len - next,
2320 );
2321 self.inner.set_len(len - (next - idx));
2322 c
2323 }
2324 }
2325
2326 /// Retains only the characters specified by the predicate.
2327 ///
2328 /// In other words, remove all characters `c` such that `f(c)` returns `false`. This method
2329 /// operates in place, visiting each character exactly once in the original order, and preserves
2330 /// the order of the retained characters.
2331 ///
2332 /// # Examples
2333 ///
2334 /// ```
2335 /// use widestring::Utf32String;
2336 /// let mut s = Utf32String::from_str("f_o_ob_ar");
2337 ///
2338 /// s.retain(|c| c != '_');
2339 ///
2340 /// assert_eq!(s, "foobar");
2341 /// ```
2342 ///
2343 /// Because the elements are visited exactly once in the original order, external state may be
2344 /// used to decide which elements to keep.
2345 ///
2346 /// ```
2347 /// use widestring::Utf32String;
2348 /// let mut s = Utf32String::from_str("abcde");
2349 /// let keep = [false, true, true, false, true];
2350 /// let mut iter = keep.iter();
2351 /// s.retain(|_| *iter.next().unwrap());
2352 /// assert_eq!(s, "bce");
2353 /// ```
2354 pub fn retain<F>(&mut self, mut f: F)
2355 where
2356 F: FnMut(char) -> bool,
2357 {
2358 let mut index = 0;
2359 while index < self.len() {
2360 // SAFETY: always in bounds
2361 let c = unsafe { self.get_unchecked(index..) }
2362 .chars()
2363 .next()
2364 .unwrap();
2365 if !f(c) {
2366 self.inner.remove(index);
2367 } else {
2368 index += 1;
2369 }
2370 }
2371 }
2372
2373 /// Inserts a character into this string at an offset.
2374 ///
2375 /// This is an _O(n)_ operation as it requires copying every element in the buffer.
2376 ///
2377 /// # Panics
2378 ///
2379 /// Panics if `idx` is larger than the string's length.
2380 ///
2381 /// # Examples
2382 ///
2383 /// ```
2384 /// use widestring::Utf32String;
2385 /// let mut s = Utf32String::with_capacity(3);
2386 ///
2387 /// s.insert(0, 'f');
2388 /// s.insert(1, 'o');
2389 /// s.insert(1, 'o');
2390 ///
2391 /// assert_eq!("foo", s);
2392 /// ```
2393 #[inline]
2394 pub fn insert(&mut self, idx: usize, ch: char) {
2395 unsafe {
2396 self.insert_slice(idx, &[ch as u32]);
2397 }
2398 }
2399
2400 /// Inserts a UTF-32 string slice into this string at an offset.
2401 ///
2402 /// This is an _O(n)_ operation as it requires copying every element in the buffer.
2403 ///
2404 /// # Panics
2405 ///
2406 /// Panics if `idx` is larger than the string's length.
2407 ///
2408 /// # Examples
2409 ///
2410 /// ```
2411 /// # use widestring::utf32str;
2412 /// use widestring::Utf32String;
2413 /// let mut s = Utf32String::from_str("bar");
2414 ///
2415 /// s.insert_utfstr(0, utf32str!("foo"));
2416 ///
2417 /// assert_eq!("foobar", s);
2418 /// ```
2419 #[inline]
2420 pub fn insert_utfstr(&mut self, idx: usize, string: &Utf32Str) {
2421 unsafe {
2422 self.insert_slice(idx, string.as_slice());
2423 }
2424 }
2425
2426 /// Splits the string into two at the given index.
2427 ///
2428 /// Returns a newly allocated string. `self` contains elements [0, at), and the returned string
2429 /// contains elements [at, len).
2430 ///
2431 /// Note that the capacity of `self` does not change.
2432 ///
2433 /// # Panics
2434 ///
2435 /// Panics if `at`it is beyond the last code point of the string.
2436 ///
2437 /// # Examples
2438 ///
2439 /// ```
2440 /// use widestring::Utf32String;
2441 /// let mut hello = Utf32String::from_str("Hello, World!");
2442 /// let world = hello.split_off(7);
2443 /// assert_eq!(hello, "Hello, ");
2444 /// assert_eq!(world, "World!");
2445 /// ```
2446 #[inline]
2447 #[must_use]
2448 pub fn split_off(&mut self, at: usize) -> Self {
2449 unsafe { Self::from_vec_unchecked(self.inner.split_off(at)) }
2450 }
2451
2452 /// Creates a draining iterator that removes the specified range in the string and yields the
2453 /// removed [`char`]s.
2454 ///
2455 /// Note: The element range is removed even if the iterator is not consumed until the end.
2456 ///
2457 /// # Panics
2458 ///
2459 /// Panics if the starting point or end point are out of bounds.
2460 ///
2461 /// # Examples
2462 ///
2463 /// Basic usage:
2464 ///
2465 /// ```
2466 /// use widestring::Utf32String;
2467 /// let mut s = Utf32String::from_str("α is alpha, β is beta");
2468 /// let beta_offset = 12;
2469 ///
2470 /// // Remove the range up until the β from the string
2471 /// let t: Utf32String = s.drain(..beta_offset).collect();
2472 /// assert_eq!(t, "α is alpha, ");
2473 /// assert_eq!(s, "β is beta");
2474 ///
2475 /// // A full range clears the string
2476 /// s.drain(..);
2477 /// assert_eq!(s, "");
2478 /// ```
2479 pub fn drain<R>(&mut self, range: R) -> DrainUtf32<'_>
2480 where
2481 R: RangeBounds<usize>,
2482 {
2483 // WARNING: Using range again would be unsound
2484 // TODO: replace with core::slice::range when it is stabilized
2485 let core::ops::Range { start, end } = crate::range(range, ..self.len());
2486
2487 // Take out two simultaneous borrows. The self_ptr won't be accessed
2488 // until iteration is over, in Drop.
2489 let self_ptr: *mut _ = self;
2490 // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks.
2491 let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
2492
2493 DrainUtf32 {
2494 start,
2495 end,
2496 iter: chars_iter,
2497 string: self_ptr,
2498 }
2499 }
2500
2501 /// Removes the specified range in the string, and replaces it with the given string.
2502 ///
2503 /// The given string doesn't need to be the same length as the range.
2504 ///
2505 /// # Panics
2506 ///
2507 /// Panics if the starting point or end point are out of bounds.
2508 ///
2509 /// # Examples
2510 ///
2511 /// Basic usage:
2512 ///
2513 /// ```
2514 /// use widestring::{utf32str, Utf32String};
2515 /// let mut s = Utf32String::from_str("α is alpha, β is beta");
2516 /// let beta_offset = 12;
2517 ///
2518 /// // Replace the range up until the β from the string
2519 /// s.replace_range(..beta_offset, utf32str!("Α is capital alpha; "));
2520 /// assert_eq!(s, "Α is capital alpha; β is beta");
2521 /// ```
2522 #[inline]
2523 pub fn replace_range<R>(&mut self, range: R, replace_with: &Utf32Str)
2524 where
2525 R: RangeBounds<usize>,
2526 {
2527 self.inner
2528 .splice(range, replace_with.as_slice().iter().copied());
2529 }
2530
2531 /// Converts string into a [`Vec`] of [`char`]s.
2532 ///
2533 /// This consumes the string without copying its contents.
2534 #[allow(trivial_casts)]
2535 #[inline]
2536 #[must_use]
2537 pub fn into_char_vec(self) -> Vec<char> {
2538 let mut v = mem::ManuallyDrop::new(self.into_vec());
2539 let (ptr, len, cap) = (v.as_mut_ptr(), v.len(), v.capacity());
2540 // SAFETY: Self should be valid UTF-32 so chars will be in range
2541 unsafe { Vec::from_raw_parts(ptr as *mut char, len, cap) }
2542 }
2543}
2544
2545impl AsMut<[char]> for Utf32String {
2546 #[inline]
2547 fn as_mut(&mut self) -> &mut [char] {
2548 self.as_char_slice_mut()
2549 }
2550}
2551
2552impl AsRef<[char]> for Utf32String {
2553 #[inline]
2554 fn as_ref(&self) -> &[char] {
2555 self.as_char_slice()
2556 }
2557}
2558
2559impl From<Vec<char>> for Utf32String {
2560 #[inline]
2561 fn from(value: Vec<char>) -> Self {
2562 Utf32String::from_chars(value)
2563 }
2564}
2565
2566impl From<&[char]> for Utf32String {
2567 #[inline]
2568 fn from(value: &[char]) -> Self {
2569 Utf32String::from_chars(value)
2570 }
2571}
2572
2573impl From<Utf32String> for Vec<char> {
2574 #[inline]
2575 fn from(value: Utf32String) -> Self {
2576 value.into_char_vec()
2577 }
2578}
2579
2580impl PartialEq<[char]> for Utf32String {
2581 #[inline]
2582 fn eq(&self, other: &[char]) -> bool {
2583 self.as_char_slice() == other
2584 }
2585}
2586
2587impl PartialEq<Utf16String> for Utf32String {
2588 #[inline]
2589 fn eq(&self, other: &Utf16String) -> bool {
2590 self.chars().eq(other.chars())
2591 }
2592}
2593
2594impl PartialEq<Utf32String> for Utf16String {
2595 #[inline]
2596 fn eq(&self, other: &Utf32String) -> bool {
2597 self.chars().eq(other.chars())
2598 }
2599}
2600
2601impl PartialEq<&Utf16Str> for Utf32String {
2602 #[inline]
2603 fn eq(&self, other: &&Utf16Str) -> bool {
2604 self.chars().eq(other.chars())
2605 }
2606}
2607
2608impl PartialEq<&Utf32Str> for Utf16String {
2609 #[inline]
2610 fn eq(&self, other: &&Utf32Str) -> bool {
2611 self.chars().eq(other.chars())
2612 }
2613}
2614
2615impl PartialEq<Utf32String> for &Utf16Str {
2616 #[inline]
2617 fn eq(&self, other: &Utf32String) -> bool {
2618 self.chars().eq(other.chars())
2619 }
2620}
2621
2622impl PartialEq<Utf16String> for &Utf32Str {
2623 #[inline]
2624 fn eq(&self, other: &Utf16String) -> bool {
2625 self.chars().eq(other.chars())
2626 }
2627}
2628
2629impl TryFrom<Vec<u16>> for Utf16String {
2630 type Error = Utf16Error;
2631
2632 #[inline]
2633 fn try_from(value: Vec<u16>) -> Result<Self, Self::Error> {
2634 Utf16String::from_vec(value)
2635 }
2636}
2637
2638impl TryFrom<Vec<u32>> for Utf32String {
2639 type Error = Utf32Error;
2640
2641 #[inline]
2642 fn try_from(value: Vec<u32>) -> Result<Self, Self::Error> {
2643 Utf32String::from_vec(value)
2644 }
2645}
2646
2647impl TryFrom<&[u16]> for Utf16String {
2648 type Error = Utf16Error;
2649
2650 #[inline]
2651 fn try_from(value: &[u16]) -> Result<Self, Self::Error> {
2652 Utf16String::from_vec(value)
2653 }
2654}
2655
2656impl TryFrom<&[u32]> for Utf32String {
2657 type Error = Utf32Error;
2658
2659 #[inline]
2660 fn try_from(value: &[u32]) -> Result<Self, Self::Error> {
2661 Utf32String::from_vec(value)
2662 }
2663}
2664
2665/// Alias for [`Utf16String`] or [`Utf32String`] depending on platform. Intended to match typical C
2666/// `wchar_t` size on platform.
2667#[cfg(not(windows))]
2668pub type WideUtfString = Utf32String;
2669
2670/// Alias for [`Utf16String`] or [`Utf32String`] depending on platform. Intended to match typical C
2671/// `wchar_t` size on platform.
2672#[cfg(windows)]
2673pub type WideUtfString = Utf16String;
2674
2675#[cfg(test)]
2676mod test {
2677 use crate::*;
2678
2679 #[test]
2680 fn uft16_truncate() {
2681 // Bug #39
2682 let cs = utf16str!("trunc");
2683 let mut s: Utf16String = cs.into();
2684 s.truncate(6);
2685 assert_eq!(s.len(), 5);
2686 s.truncate(5);
2687 assert_eq!(s.len(), 5);
2688 s.truncate(2);
2689 assert_eq!(s.len(), 2);
2690 }
2691}