widestring/ucstring.rs
1//! C-style owned, growable wide strings.
2//!
3//! This module contains wide C strings and related types.
4
5use crate::{error::ContainsNul, U16CStr, U16Str, U16String, U32CStr, U32Str, U32String};
6#[allow(unused_imports)]
7use alloc::{
8 borrow::{Cow, ToOwned},
9 boxed::Box,
10 vec::Vec,
11};
12use core::{
13 borrow::{Borrow, BorrowMut},
14 cmp,
15 mem::{self, ManuallyDrop},
16 ops::{Deref, DerefMut, Index},
17 ptr,
18 slice::{self, SliceIndex},
19};
20
21macro_rules! ucstring_common_impl {
22 {
23 $(#[$ucstring_meta:meta])*
24 struct $ucstring:ident([$uchar:ty]);
25 type UCStr = $ucstr:ident;
26 type UString = $ustring:ident;
27 type UStr = $ustr:ident;
28 $(#[$from_vec_meta:meta])*
29 fn from_vec() -> {}
30 $(#[$from_vec_truncate_meta:meta])*
31 fn from_vec_truncate() -> {}
32 $(#[$into_boxed_ucstr_meta:meta])*
33 fn into_boxed_ucstr() -> {}
34 } => {
35 $(#[$ucstring_meta])*
36 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
37 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
38 pub struct $ucstring {
39 pub(crate) inner: Box<[$uchar]>,
40 }
41
42 impl $ucstring {
43 /// The nul terminator character value.
44 pub const NUL_TERMINATOR: $uchar = 0;
45
46 /// Constructs a new empty wide C string.
47 #[inline]
48 #[must_use]
49 pub fn new() -> Self {
50 unsafe { Self::from_vec_unchecked(Vec::new()) }
51 }
52
53 $(#[$from_vec_meta])*
54 pub fn from_vec(v: impl Into<Vec<$uchar>>) -> Result<Self, ContainsNul<$uchar>> {
55 let v = v.into();
56 // Check for nul vals, ignoring nul terminator
57 match v.iter().position(|&val| val == Self::NUL_TERMINATOR) {
58 None => Ok(unsafe { Self::from_vec_unchecked(v) }),
59 Some(pos) if pos == v.len() - 1 => Ok(unsafe { Self::from_vec_unchecked(v) }),
60 Some(pos) => Err(ContainsNul::new(pos, v)),
61 }
62 }
63
64 $(#[$from_vec_truncate_meta])*
65 #[must_use]
66 pub fn from_vec_truncate(v: impl Into<Vec<$uchar>>) -> Self {
67 let mut v = v.into();
68 // Check for nul vals
69 if let Some(pos) = v.iter().position(|&val| val == Self::NUL_TERMINATOR) {
70 v.truncate(pos + 1);
71 }
72 unsafe { Self::from_vec_unchecked(v) }
73 }
74
75 /// Constructs a wide C string from a vector without checking for interior nul values.
76 ///
77 /// A terminating nul value will be appended if the vector does not already have a
78 /// terminating nul.
79 ///
80 /// # Safety
81 ///
82 /// This method is equivalent to [`from_vec`][Self::from_vec] except that no runtime
83 /// assertion is made that `v` contains no interior nul values. Providing a vector with
84 /// any nul values that are not the last value in the vector will result in an invalid
85 /// C string.
86 #[must_use]
87 pub unsafe fn from_vec_unchecked(v: impl Into<Vec<$uchar>>) -> Self {
88 let mut v = v.into();
89 match v.last() {
90 None => v.push(Self::NUL_TERMINATOR),
91 Some(&c) if c != Self::NUL_TERMINATOR => v.push(Self::NUL_TERMINATOR),
92 Some(_) => (),
93 }
94 Self {
95 inner: v.into_boxed_slice(),
96 }
97 }
98
99 /// Constructs a wide C string from anything that can be converted to a wide string
100 /// slice.
101 ///
102 /// The string will be scanned for invalid interior nul values.
103 ///
104 /// # Errors
105 ///
106 /// This function will return an error if the data contains a nul value that is not the
107 /// terminating nul.
108 /// The returned error will contain a [`Vec`] as well as the position of the nul value.
109 #[inline]
110 pub fn from_ustr(s: impl AsRef<$ustr>) -> Result<Self, ContainsNul<$uchar>> {
111 Self::from_vec(s.as_ref().as_slice())
112 }
113
114 /// Constructs a wide C string from anything that can be converted to a wide string
115 /// slice, truncating at the first nul terminator.
116 ///
117 /// The string will be truncated at the first nul value in the string.
118 #[inline]
119 #[must_use]
120 pub fn from_ustr_truncate(s: impl AsRef<$ustr>) -> Self {
121 Self::from_vec_truncate(s.as_ref().as_slice())
122 }
123
124 /// Constructs a wide C string from anything that can be converted to a wide string
125 /// slice, without scanning for invalid nul values.
126 ///
127 /// # Safety
128 ///
129 /// This method is equivalent to [`from_ustr`][Self::from_ustr] except that no runtime
130 /// assertion is made that `v` contains no interior nul values. Providing a string with
131 /// any nul values that are not the last value in the vector will result in an invalid
132 /// C string.
133 #[inline]
134 #[must_use]
135 pub unsafe fn from_ustr_unchecked(s: impl AsRef<$ustr>) -> Self {
136 Self::from_vec_unchecked(s.as_ref().as_slice())
137 }
138
139 /// Constructs a new wide C string copied from a nul-terminated string pointer.
140 ///
141 /// This will scan for nul values beginning with `p`. The first nul value will be used
142 /// as the nul terminator for the string, similar to how libc string functions such as
143 /// `strlen` work.
144 ///
145 /// If you wish to avoid copying the string pointer, use [`U16CStr::from_ptr_str`] or
146 /// [`U32CStr::from_ptr_str`] instead.
147 ///
148 /// # Safety
149 ///
150 /// This function is unsafe as there is no guarantee that the given pointer is valid or
151 /// has a nul terminator, and the function could scan past the underlying buffer.
152 ///
153 /// In addition, the data must meet the safety conditions of
154 /// [std::slice::from_raw_parts].
155 ///
156 /// # Panics
157 ///
158 /// This function panics if `p` is null.
159 ///
160 /// # Caveat
161 ///
162 /// The lifetime for the returned string is inferred from its usage. To prevent
163 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
164 /// is safe in the context, such as by providing a helper function taking the lifetime
165 /// of a host value for the string, or by explicit annotation.
166 #[inline]
167 #[must_use]
168 pub unsafe fn from_ptr_str(p: *const $uchar) -> Self {
169 $ucstr::from_ptr_str(p).to_ucstring()
170 }
171
172 /// Constructs a wide C string copied from a pointer and a length, checking for invalid
173 /// interior nul values.
174 ///
175 /// The `len` argument is the number of elements, **not** the number of bytes, and does
176 /// **not** include the nul terminator of the string. If `len` is `0`, `p` is allowed to
177 /// be a null pointer.
178 ///
179 /// The resulting string will always be nul-terminated even if the pointer data is not.
180 ///
181 /// # Errors
182 ///
183 /// This will scan the pointer string for an interior nul value and error if one is
184 /// found. To avoid scanning for interior nuls,
185 /// [`from_ptr_unchecked`][Self::from_ptr_unchecked] may be used instead.
186 /// The returned error will contain a [`Vec`] as well as the position of the nul value.
187 ///
188 /// # Safety
189 ///
190 /// This function is unsafe as there is no guarantee that the given pointer is valid for
191 /// `len` elements.
192 ///
193 /// In addition, the data must meet the safety conditions of
194 /// [std::slice::from_raw_parts].
195 ///
196 /// # Panics
197 ///
198 /// Panics if `len` is greater than 0 but `p` is a null pointer.
199 pub unsafe fn from_ptr(
200 p: *const $uchar,
201 len: usize,
202 ) -> Result<Self, ContainsNul<$uchar>> {
203 if len == 0 {
204 return Ok(Self::default());
205 }
206 assert!(!p.is_null());
207 let slice = slice::from_raw_parts(p, len);
208 Self::from_vec(slice)
209 }
210
211 /// Constructs a wide C string copied from a pointer and a length, truncating at the
212 /// first nul terminator.
213 ///
214 /// The `len` argument is the number of elements, **not** the number of bytes. This will
215 /// scan for nul values beginning with `p` until offset `len`. The first nul value will
216 /// be used as the nul terminator for the string, ignoring any remaining values left
217 /// before `len`. If no nul value is found, the whole string of length `len` is used,
218 /// and a new nul-terminator will be added to the resulting string. If `len` is `0`, `p`
219 /// is allowed to be a null pointer.
220 ///
221 /// # Safety
222 ///
223 /// This function is unsafe as there is no guarantee that the given pointer is valid for
224 /// `len` elements.
225 ///
226 /// In addition, the data must meet the safety conditions of
227 /// [std::slice::from_raw_parts].
228 ///
229 /// # Panics
230 ///
231 /// Panics if `len` is greater than 0 but `p` is a null pointer.
232 #[must_use]
233 pub unsafe fn from_ptr_truncate(p: *const $uchar, len: usize) -> Self {
234 if len == 0 {
235 return Self::default();
236 }
237 assert!(!p.is_null());
238 let slice = slice::from_raw_parts(p, len);
239 Self::from_vec_truncate(slice)
240 }
241
242 /// Constructs a wide C string copied from a pointer and a length without checking for
243 /// any nul values.
244 ///
245 /// The `len` argument is the number of elements, **not** the number of bytes, and does
246 /// **not** include the nul terminator of the string. If `len` is `0`, `p` is allowed to
247 /// be a null pointer.
248 ///
249 /// The resulting string will always be nul-terminated even if the pointer data is not.
250 ///
251 /// # Safety
252 ///
253 /// This function is unsafe as there is no guarantee that the given pointer is valid for
254 /// `len` elements.
255 ///
256 /// In addition, the data must meet the safety conditions of
257 /// [std::slice::from_raw_parts].
258 ///
259 /// The interior values of the pointer are not scanned for nul. Any interior nul values
260 /// or will result in an invalid C string.
261 ///
262 /// # Panics
263 ///
264 /// Panics if `len` is greater than 0 but `p` is a null pointer.
265 #[must_use]
266 pub unsafe fn from_ptr_unchecked(p: *const $uchar, len: usize) -> Self {
267 if len == 0 {
268 return Self::default();
269 }
270 assert!(!p.is_null());
271 let slice = slice::from_raw_parts(p, len);
272 Self::from_vec_unchecked(slice)
273 }
274
275 /// Converts to a wide C string slice.
276 #[inline]
277 #[must_use]
278 pub fn as_ucstr(&self) -> &$ucstr {
279 $ucstr::from_inner(&self.inner)
280 }
281
282 /// Converts to a mutable wide C string slice.
283 #[inline]
284 #[must_use]
285 pub fn as_mut_ucstr(&mut self) -> &mut $ucstr {
286 $ucstr::from_inner_mut(&mut self.inner)
287 }
288
289 /// Converts this string into a wide string without a nul terminator.
290 ///
291 /// The resulting string will **not** contain a nul-terminator, and will contain no
292 /// other nul values.
293 #[inline]
294 #[must_use]
295 pub fn into_ustring(self) -> $ustring {
296 $ustring::from_vec(self.into_vec())
297 }
298
299 /// Converts this string into a wide string with a nul terminator.
300 ///
301 /// The resulting vector will contain a nul-terminator and no interior nul values.
302 #[inline]
303 #[must_use]
304 pub fn into_ustring_with_nul(self) -> $ustring {
305 $ustring::from_vec(self.into_vec_with_nul())
306 }
307
308 /// Converts the string into a [`Vec`] without a nul terminator, consuming the string in
309 /// the process.
310 ///
311 /// The resulting vector will **not** contain a nul-terminator, and will contain no
312 /// other nul values.
313 #[inline]
314 #[must_use]
315 pub fn into_vec(self) -> Vec<$uchar> {
316 let mut v = self.into_inner().into_vec();
317 v.pop();
318 v
319 }
320
321 /// Converts the string into a [`Vec`], consuming the string in the process.
322 ///
323 /// The resulting vector will contain a nul-terminator and no interior nul values.
324 #[inline]
325 #[must_use]
326 pub fn into_vec_with_nul(self) -> Vec<$uchar> {
327 self.into_inner().into_vec()
328 }
329
330 /// Transfers ownership of the string to a C caller.
331 ///
332 /// # Safety
333 ///
334 /// The pointer _must_ be returned to Rust and reconstituted using
335 /// [`from_raw`][Self::from_raw] to be properly deallocated. Specifically, one should
336 /// _not_ use the standard C `free` function to deallocate this string. Failure to call
337 /// [`from_raw`][Self::from_raw] will lead to a memory leak.
338 #[inline]
339 #[must_use]
340 pub fn into_raw(self) -> *mut $uchar {
341 Box::into_raw(self.into_inner()) as *mut $uchar
342 }
343
344 /// Retakes ownership of a wide C string that was transferred to C.
345 ///
346 /// This should only be used in combination with [`into_raw`][Self::into_raw]. To
347 /// construct a new wide C string from a pointer, use
348 /// [`from_ptr_str`][Self::from_ptr_str].
349 ///
350 /// # Safety
351 ///
352 /// This should only ever be called with a pointer that was earlier obtained by calling
353 /// [`into_raw`][Self::into_raw]. Additionally, the length of the string will be
354 /// recalculated from the pointer by scanning for the nul-terminator.
355 ///
356 /// # Panics
357 ///
358 /// Panics if `p` is a null pointer.
359 #[must_use]
360 pub unsafe fn from_raw(p: *mut $uchar) -> Self {
361 assert!(!p.is_null());
362 let mut i: isize = 0;
363 while *p.offset(i) != Self::NUL_TERMINATOR {
364 i += 1;
365 }
366 let slice = slice::from_raw_parts_mut(p, i as usize + 1);
367 Self {
368 inner: Box::from_raw(slice),
369 }
370 }
371
372 $(#[$into_boxed_ucstr_meta])*
373 #[inline]
374 #[must_use]
375 pub fn into_boxed_ucstr(self) -> Box<$ucstr> {
376 unsafe { Box::from_raw(Box::into_raw(self.into_inner()) as *mut $ucstr) }
377 }
378
379 /// Bypass "move out of struct which implements [`Drop`] trait" restriction.
380 fn into_inner(self) -> Box<[$uchar]> {
381 let v = ManuallyDrop::new(self);
382 unsafe {
383 ptr::read(&v.inner)
384 }
385 }
386 }
387
388 impl AsMut<$ucstr> for $ucstring {
389 fn as_mut(&mut self) -> &mut $ucstr {
390 self.as_mut_ucstr()
391 }
392 }
393
394 impl AsRef<$ucstr> for $ucstring {
395 #[inline]
396 fn as_ref(&self) -> &$ucstr {
397 self.as_ucstr()
398 }
399 }
400
401 impl AsRef<[$uchar]> for $ucstring {
402 #[inline]
403 fn as_ref(&self) -> &[$uchar] {
404 self.as_slice()
405 }
406 }
407
408 impl AsRef<$ustr> for $ucstring {
409 #[inline]
410 fn as_ref(&self) -> &$ustr {
411 self.as_ustr()
412 }
413 }
414
415 impl Borrow<$ucstr> for $ucstring {
416 #[inline]
417 fn borrow(&self) -> &$ucstr {
418 self.as_ucstr()
419 }
420 }
421
422 impl BorrowMut<$ucstr> for $ucstring {
423 #[inline]
424 fn borrow_mut(&mut self) -> &mut $ucstr {
425 self.as_mut_ucstr()
426 }
427 }
428
429 impl Default for $ucstring {
430 #[inline]
431 fn default() -> Self {
432 unsafe { Self::from_vec_unchecked(Vec::new()) }
433 }
434 }
435
436 impl Deref for $ucstring {
437 type Target = $ucstr;
438
439 #[inline]
440 fn deref(&self) -> &$ucstr {
441 self.as_ucstr()
442 }
443 }
444
445 impl DerefMut for $ucstring {
446 #[inline]
447 fn deref_mut(&mut self) -> &mut Self::Target {
448 self.as_mut_ucstr()
449 }
450 }
451
452 // Turns this `UCString` into an empty string to prevent
453 // memory unsafe code from working by accident. Inline
454 // to prevent LLVM from optimizing it away in debug builds.
455 impl Drop for $ucstring {
456 #[inline]
457 fn drop(&mut self) {
458 unsafe {
459 *self.inner.get_unchecked_mut(0) = Self::NUL_TERMINATOR;
460 }
461 }
462 }
463
464 impl From<$ucstring> for Vec<$uchar> {
465 #[inline]
466 fn from(value: $ucstring) -> Self {
467 value.into_vec()
468 }
469 }
470
471 impl<'a> From<$ucstring> for Cow<'a, $ucstr> {
472 #[inline]
473 fn from(s: $ucstring) -> Cow<'a, $ucstr> {
474 Cow::Owned(s)
475 }
476 }
477
478 #[cfg(feature = "std")]
479 impl From<$ucstring> for std::ffi::OsString {
480 #[inline]
481 fn from(s: $ucstring) -> std::ffi::OsString {
482 s.to_os_string()
483 }
484 }
485
486 impl From<$ucstring> for $ustring {
487 #[inline]
488 fn from(s: $ucstring) -> Self {
489 s.to_ustring()
490 }
491 }
492
493 impl<'a, T: ?Sized + AsRef<$ucstr>> From<&'a T> for $ucstring {
494 #[inline]
495 fn from(s: &'a T) -> Self {
496 s.as_ref().to_ucstring()
497 }
498 }
499
500 impl<'a> From<&'a $ucstr> for Cow<'a, $ucstr> {
501 #[inline]
502 fn from(s: &'a $ucstr) -> Cow<'a, $ucstr> {
503 Cow::Borrowed(s)
504 }
505 }
506
507 impl From<Box<$ucstr>> for $ucstring {
508 #[inline]
509 fn from(s: Box<$ucstr>) -> Self {
510 s.into_ucstring()
511 }
512 }
513
514 impl From<$ucstring> for Box<$ucstr> {
515 #[inline]
516 fn from(s: $ucstring) -> Box<$ucstr> {
517 s.into_boxed_ucstr()
518 }
519 }
520
521 impl<I> Index<I> for $ucstring
522 where
523 I: SliceIndex<[$uchar], Output = [$uchar]>,
524 {
525 type Output = $ustr;
526
527 #[inline]
528 fn index(&self, index: I) -> &Self::Output {
529 &self.as_ucstr()[index]
530 }
531 }
532
533 impl PartialEq<$ustr> for $ucstring {
534 #[inline]
535 fn eq(&self, other: &$ustr) -> bool {
536 self.as_ucstr() == other
537 }
538 }
539
540 impl PartialEq<$ucstr> for $ucstring {
541 #[inline]
542 fn eq(&self, other: &$ucstr) -> bool {
543 self.as_ucstr() == other
544 }
545 }
546
547 impl<'a> PartialEq<&'a $ustr> for $ucstring {
548 #[inline]
549 fn eq(&self, other: &&'a $ustr) -> bool {
550 self.as_ucstr() == *other
551 }
552 }
553
554 impl<'a> PartialEq<&'a $ucstr> for $ucstring {
555 #[inline]
556 fn eq(&self, other: &&'a $ucstr) -> bool {
557 self.as_ucstr() == *other
558 }
559 }
560
561 impl<'a> PartialEq<Cow<'a, $ustr>> for $ucstring {
562 #[inline]
563 fn eq(&self, other: &Cow<'a, $ustr>) -> bool {
564 self.as_ucstr() == other.as_ref()
565 }
566 }
567
568 impl<'a> PartialEq<Cow<'a, $ucstr>> for $ucstring {
569 #[inline]
570 fn eq(&self, other: &Cow<'a, $ucstr>) -> bool {
571 self.as_ucstr() == other.as_ref()
572 }
573 }
574
575 impl PartialEq<$ustring> for $ucstring {
576 #[inline]
577 fn eq(&self, other: &$ustring) -> bool {
578 self.as_ustr() == other.as_ustr()
579 }
580 }
581
582 impl PartialEq<$ucstring> for $ustr {
583 #[inline]
584 fn eq(&self, other: &$ucstring) -> bool {
585 self == other.as_ustr()
586 }
587 }
588
589 impl PartialEq<$ucstring> for $ucstr {
590 #[inline]
591 fn eq(&self, other: &$ucstring) -> bool {
592 self == other.as_ucstr()
593 }
594 }
595
596 impl PartialEq<$ucstring> for &$ucstr {
597 #[inline]
598 fn eq(&self, other: &$ucstring) -> bool {
599 self == other.as_ucstr()
600 }
601 }
602
603 impl PartialEq<$ucstring> for &$ustr {
604 #[inline]
605 fn eq(&self, other: &$ucstring) -> bool {
606 self == other.as_ucstr()
607 }
608 }
609
610 impl PartialOrd<$ustr> for $ucstring {
611 #[inline]
612 fn partial_cmp(&self, other: &$ustr) -> Option<cmp::Ordering> {
613 self.as_ucstr().partial_cmp(other)
614 }
615 }
616
617 impl PartialOrd<$ucstr> for $ucstring {
618 #[inline]
619 fn partial_cmp(&self, other: &$ucstr) -> Option<cmp::Ordering> {
620 self.as_ucstr().partial_cmp(other)
621 }
622 }
623
624 impl<'a> PartialOrd<&'a $ustr> for $ucstring {
625 #[inline]
626 fn partial_cmp(&self, other: &&'a $ustr) -> Option<cmp::Ordering> {
627 self.as_ucstr().partial_cmp(*other)
628 }
629 }
630
631 impl<'a> PartialOrd<&'a $ucstr> for $ucstring {
632 #[inline]
633 fn partial_cmp(&self, other: &&'a $ucstr) -> Option<cmp::Ordering> {
634 self.as_ucstr().partial_cmp(*other)
635 }
636 }
637
638 impl<'a> PartialOrd<Cow<'a, $ustr>> for $ucstring {
639 #[inline]
640 fn partial_cmp(&self, other: &Cow<'a, $ustr>) -> Option<cmp::Ordering> {
641 self.as_ucstr().partial_cmp(other.as_ref())
642 }
643 }
644
645 impl<'a> PartialOrd<Cow<'a, $ucstr>> for $ucstring {
646 #[inline]
647 fn partial_cmp(&self, other: &Cow<'a, $ucstr>) -> Option<cmp::Ordering> {
648 self.as_ucstr().partial_cmp(other.as_ref())
649 }
650 }
651
652 impl PartialOrd<$ustring> for $ucstring {
653 #[inline]
654 fn partial_cmp(&self, other: &$ustring) -> Option<cmp::Ordering> {
655 self.as_ustr().partial_cmp(other.as_ustr())
656 }
657 }
658
659 impl ToOwned for $ucstr {
660 type Owned = $ucstring;
661
662 #[inline]
663 fn to_owned(&self) -> $ucstring {
664 self.to_ucstring()
665 }
666 }
667 };
668}
669
670ucstring_common_impl! {
671 /// An owned, mutable C-style 16-bit wide string for FFI that is nul-aware and nul-terminated.
672 ///
673 /// The string slice of a [`U16CString`] is [`U16CStr`].
674 ///
675 /// [`U16CString`] strings do not have a defined encoding. While it is sometimes
676 /// assumed that they contain possibly invalid or ill-formed UTF-16 data, they may be used for
677 /// any wide encoded string.
678 ///
679 /// # Nul termination
680 ///
681 /// [`U16CString`] is aware of nul (`0`) values. Unless unchecked conversions are used, all
682 /// [`U16CString`] strings end with a nul-terminator in the underlying buffer and contain no
683 /// internal nul values. These strings are intended to be used with FFI functions that require
684 /// nul-terminated strings.
685 ///
686 /// Because of the nul termination requirement, multiple classes methods for provided for
687 /// construction a [`U16CString`] under various scenarios. By default, methods such as
688 /// [`from_ptr`][Self::from_ptr] and [`from_vec`][Self::from_vec] return an error if it contains
689 /// any interior nul values before the terminator. For these methods, the input does not need to
690 /// contain the terminating nul; it is added if it is does not exist.
691 ///
692 /// `_truncate` methods on the other hand, such as
693 /// [`from_ptr_truncate`][Self::from_ptr_truncate] and
694 /// [`from_vec_truncate`][Self::from_vec_truncate], construct a string that terminates with
695 /// the first nul value encountered in the string, and do not return an error. They
696 /// automatically ensure the string is terminated in a nul value even if it was not originally.
697 ///
698 /// Finally, unsafe `_unchecked` variants of these methods, such as
699 /// [`from_ptr_unchecked`][Self::from_ptr_unchecked] and
700 /// [`from_vec_unchecked`][Self::from_vec_unchecked] allow bypassing any checks for nul
701 /// values, when the input has already been ensured to no interior nul values. Again, any
702 /// missing nul terminator is automatically added if necessary.
703 ///
704 /// # Examples
705 ///
706 /// The easiest way to use [`U16CString`] outside of FFI is with the
707 /// [`u16cstr!`][crate::u16cstr] macro to convert string literals into nul-terminated UTF-16
708 /// strings at compile time:
709 ///
710 /// ```
711 /// use widestring::{u16cstr, U16CString};
712 /// let hello = U16CString::from(u16cstr!("Hello, world!"));
713 /// ```
714 ///
715 /// You can also convert any [`u16`] slice or vector directly:
716 ///
717 /// ```
718 /// use widestring::{u16cstr, U16CString};
719 ///
720 /// let sparkle_heart = vec![0xd83d, 0xdc96];
721 /// let sparkle_heart = U16CString::from_vec(sparkle_heart).unwrap();
722 /// // The string will add the missing nul terminator
723 ///
724 /// assert_eq!(u16cstr!("💖"), sparkle_heart);
725 ///
726 /// // This unpaired UTf-16 surrogate is invalid UTF-16, but is perfectly valid in U16CString
727 /// let malformed_utf16 = vec![0xd83d, 0x0];
728 /// let s = U16CString::from_vec(malformed_utf16).unwrap();
729 ///
730 /// assert_eq!(s.len(), 1); // Note the terminating nul is not counted in the length
731 /// ```
732 ///
733 /// When working with a FFI, it is useful to create a [`U16CString`] from a pointer:
734 ///
735 /// ```
736 /// use widestring::{u16cstr, U16CString};
737 ///
738 /// let sparkle_heart = [0xd83d, 0xdc96, 0x0];
739 /// let s = unsafe {
740 /// // Note the string and pointer length does not include the nul terminator
741 /// U16CString::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len() - 1).unwrap()
742 /// };
743 /// assert_eq!(u16cstr!("💖"), s);
744 ///
745 /// // Alternatively, if the length of the pointer is unknown but definitely terminates in nul,
746 /// // a C-style string version can be used
747 /// let s = unsafe { U16CString::from_ptr_str(sparkle_heart.as_ptr()) };
748 ///
749 /// assert_eq!(u16cstr!("💖"), s);
750 /// ```
751 struct U16CString([u16]);
752
753 type UCStr = U16CStr;
754 type UString = U16String;
755 type UStr = U16Str;
756
757 /// Constructs a wide C string from a container of wide character data.
758 ///
759 /// This method will consume the provided data and use the underlying elements to
760 /// construct a new string. The data will be scanned for invalid interior nul values.
761 ///
762 /// # Errors
763 ///
764 /// This function will return an error if the data contains a nul value that is not the
765 /// terminating nul.
766 /// The returned error will contain the original [`Vec`] as well as the position of the
767 /// nul value.
768 ///
769 /// # Examples
770 ///
771 /// ```rust
772 /// use widestring::U16CString;
773 /// let v = vec![84u16, 104u16, 101u16]; // 'T' 'h' 'e'
774 /// # let cloned = v.clone();
775 /// // Create a wide string from the vector
776 /// let wcstr = U16CString::from_vec(v).unwrap();
777 /// # assert_eq!(wcstr.into_vec(), cloned);
778 /// ```
779 ///
780 /// Empty vectors are valid and will return an empty string with a nul terminator:
781 ///
782 /// ```
783 /// use widestring::U16CString;
784 /// let wcstr = U16CString::from_vec(vec![]).unwrap();
785 /// assert_eq!(wcstr, U16CString::default());
786 /// ```
787 ///
788 /// The following example demonstrates errors from nul values in a vector.
789 ///
790 /// ```rust
791 /// use widestring::U16CString;
792 /// let v = vec![84u16, 0u16, 104u16, 101u16]; // 'T' NUL 'h' 'e'
793 /// // Create a wide string from the vector
794 /// let res = U16CString::from_vec(v);
795 /// assert!(res.is_err());
796 /// assert_eq!(res.err().unwrap().nul_position(), 1);
797 /// ```
798 fn from_vec() -> {}
799
800 /// Constructs a wide C string from a container of wide character data, truncating at
801 /// the first nul terminator.
802 ///
803 /// The string will be truncated at the first nul value in the data.
804 ///
805 /// # Examples
806 ///
807 /// ```rust
808 /// use widestring::U16CString;
809 /// let v = vec![84u16, 104u16, 101u16, 0u16]; // 'T' 'h' 'e' NUL
810 /// # let cloned = v[..3].to_owned();
811 /// // Create a wide string from the vector
812 /// let wcstr = U16CString::from_vec_truncate(v);
813 /// # assert_eq!(wcstr.into_vec(), cloned);
814 /// ```
815 fn from_vec_truncate() -> {}
816
817 /// Converts this wide C string into a boxed wide C string slice.
818 ///
819 /// # Examples
820 ///
821 /// ```
822 /// use widestring::{U16CString, U16CStr};
823 ///
824 /// let mut v = vec![102u16, 111u16, 111u16]; // "foo"
825 /// let c_string = U16CString::from_vec(v.clone()).unwrap();
826 /// let boxed = c_string.into_boxed_ucstr();
827 /// v.push(0);
828 /// assert_eq!(&*boxed, U16CStr::from_slice(&v).unwrap());
829 /// ```
830 fn into_boxed_ucstr() -> {}
831}
832ucstring_common_impl! {
833 /// An owned, mutable C-style 32-bit wide string for FFI that is nul-aware and nul-terminated.
834 ///
835 /// The string slice of a [`U32CString`] is [`U32CStr`].
836 ///
837 /// [`U32CString`] strings do not have a defined encoding. While it is sometimes
838 /// assumed that they contain possibly invalid or ill-formed UTF-32 data, they may be used for
839 /// any wide encoded string.
840 ///
841 /// # Nul termination
842 ///
843 /// [`U32CString`] is aware of nul (`0`) values. Unless unchecked conversions are used, all
844 /// [`U32CString`] strings end with a nul-terminator in the underlying buffer and contain no
845 /// internal nul values. These strings are intended to be used with FFI functions that require
846 /// nul-terminated strings.
847 ///
848 /// Because of the nul termination requirement, multiple classes methods for provided for
849 /// construction a [`U32CString`] under various scenarios. By default, methods such as
850 /// [`from_ptr`][Self::from_ptr] and [`from_vec`][Self::from_vec] return an error if it contains
851 /// any interior nul values before the terminator. For these methods, the input does not need to
852 /// contain the terminating nul; it is added if it is does not exist.
853 ///
854 /// `_truncate` methods on the other hand, such as
855 /// [`from_ptr_truncate`][Self::from_ptr_truncate] and
856 /// [`from_vec_truncate`][Self::from_vec_truncate], construct a string that terminates with
857 /// the first nul value encountered in the string, and do not return an error. They
858 /// automatically ensure the string is terminated in a nul value even if it was not originally.
859 ///
860 /// Finally, unsafe `_unchecked` variants of these methods, such as
861 /// [`from_ptr_unchecked`][Self::from_ptr_unchecked] and
862 /// [`from_vec_unchecked`][Self::from_vec_unchecked] allow bypassing any checks for nul
863 /// values, when the input has already been ensured to no interior nul values. Again, any
864 /// missing nul terminator is automatically added if necessary.
865 ///
866 /// # Examples
867 ///
868 /// The easiest way to use [`U32CString`] outside of FFI is with the
869 /// [`u32cstr!`][crate::u32cstr] macro to convert string literals into nul-terminated UTF-32
870 /// strings at compile time:
871 ///
872 /// ```
873 /// use widestring::{u32cstr, U32CString};
874 /// let hello = U32CString::from(u32cstr!("Hello, world!"));
875 /// ```
876 ///
877 /// You can also convert any [`u32`] slice or vector directly:
878 ///
879 /// ```
880 /// use widestring::{u32cstr, U32CString};
881 ///
882 /// let sparkle_heart = vec![0x1f496];
883 /// let sparkle_heart = U32CString::from_vec(sparkle_heart).unwrap();
884 /// // The string will add the missing nul terminator
885 ///
886 /// assert_eq!(u32cstr!("💖"), sparkle_heart);
887 ///
888 /// // This UTf-16 surrogate is invalid UTF-32, but is perfectly valid in U32CString
889 /// let malformed_utf32 = vec![0xd83d, 0x0];
890 /// let s = U32CString::from_vec(malformed_utf32).unwrap();
891 ///
892 /// assert_eq!(s.len(), 1); // Note the terminating nul is not counted in the length
893 /// ```
894 ///
895 /// When working with a FFI, it is useful to create a [`U32CString`] from a pointer:
896 ///
897 /// ```
898 /// use widestring::{u32cstr, U32CString};
899 ///
900 /// let sparkle_heart = [0x1f496, 0x0];
901 /// let s = unsafe {
902 /// // Note the string and pointer length does not include the nul terminator
903 /// U32CString::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len() - 1).unwrap()
904 /// };
905 /// assert_eq!(u32cstr!("💖"), s);
906 ///
907 /// // Alternatively, if the length of the pointer is unknown but definitely terminates in nul,
908 /// // a C-style string version can be used
909 /// let s = unsafe { U32CString::from_ptr_str(sparkle_heart.as_ptr()) };
910 ///
911 /// assert_eq!(u32cstr!("💖"), s);
912 /// ```
913 struct U32CString([u32]);
914
915 type UCStr = U32CStr;
916 type UString = U32String;
917 type UStr = U32Str;
918
919 /// Constructs a wide C string from a container of wide character data.
920 ///
921 /// This method will consume the provided data and use the underlying elements to
922 /// construct a new string. The data will be scanned for invalid interior nul values.
923 ///
924 /// # Errors
925 ///
926 /// This function will return an error if the data contains a nul value that is not the
927 /// terminating nul.
928 /// The returned error will contain the original [`Vec`] as well as the position of the
929 /// nul value.
930 ///
931 /// # Examples
932 ///
933 /// ```rust
934 /// use widestring::U32CString;
935 /// let v = vec![84u32, 104u32, 101u32]; // 'T' 'h' 'e'
936 /// # let cloned = v.clone();
937 /// // Create a wide string from the vector
938 /// let wcstr = U32CString::from_vec(v).unwrap();
939 /// # assert_eq!(wcstr.into_vec(), cloned);
940 /// ```
941 ///
942 /// Empty vectors are valid and will return an empty string with a nul terminator:
943 ///
944 /// ```
945 /// use widestring::U32CString;
946 /// let wcstr = U32CString::from_vec(vec![]).unwrap();
947 /// assert_eq!(wcstr, U32CString::default());
948 /// ```
949 ///
950 /// The following example demonstrates errors from nul values in a vector.
951 ///
952 /// ```rust
953 /// use widestring::U32CString;
954 /// let v = vec![84u32, 0u32, 104u32, 101u32]; // 'T' NUL 'h' 'e'
955 /// // Create a wide string from the vector
956 /// let res = U32CString::from_vec(v);
957 /// assert!(res.is_err());
958 /// assert_eq!(res.err().unwrap().nul_position(), 1);
959 /// ```
960 fn from_vec() -> {}
961
962 /// Constructs a wide C string from a container of wide character data, truncating at
963 /// the first nul terminator.
964 ///
965 /// The string will be truncated at the first nul value in the data.
966 ///
967 /// # Examples
968 ///
969 /// ```rust
970 /// use widestring::U32CString;
971 /// let v = vec![84u32, 104u32, 101u32, 0u32]; // 'T' 'h' 'e' NUL
972 /// # let cloned = v[..3].to_owned();
973 /// // Create a wide string from the vector
974 /// let wcstr = U32CString::from_vec_truncate(v);
975 /// # assert_eq!(wcstr.into_vec(), cloned);
976 /// ```
977 fn from_vec_truncate() -> {}
978
979 /// Converts this wide C string into a boxed wide C string slice.
980 ///
981 /// # Examples
982 ///
983 /// ```
984 /// use widestring::{U32CString, U32CStr};
985 ///
986 /// let mut v = vec![102u32, 111u32, 111u32]; // "foo"
987 /// let c_string = U32CString::from_vec(v.clone()).unwrap();
988 /// let boxed = c_string.into_boxed_ucstr();
989 /// v.push(0);
990 /// assert_eq!(&*boxed, U32CStr::from_slice(&v).unwrap());
991 /// ```
992 fn into_boxed_ucstr() -> {}
993}
994
995impl U16CString {
996 /// Constructs a [`U16CString`] copy from a [`str`], encoding it as UTF-16.
997 ///
998 /// This makes a string copy of the [`str`]. Since [`str`] will always be valid UTF-8, the
999 /// resulting [`U16CString`] will also be valid UTF-16.
1000 ///
1001 /// The string will be scanned for nul values, which are invalid anywhere except the final
1002 /// character.
1003 ///
1004 /// The resulting string will always be nul-terminated even if the original string is not.
1005 ///
1006 /// # Errors
1007 ///
1008 /// This function will return an error if the data contains a nul value anywhere except the
1009 /// final position.
1010 /// The returned error will contain a [`Vec<u16>`] as well as the position of the nul value.
1011 ///
1012 /// # Examples
1013 ///
1014 /// ```rust
1015 /// use widestring::U16CString;
1016 /// let s = "MyString";
1017 /// // Create a wide string from the string
1018 /// let wcstr = U16CString::from_str(s).unwrap();
1019 /// # assert_eq!(wcstr.to_string_lossy(), s);
1020 /// ```
1021 ///
1022 /// The following example demonstrates errors from nul values in a string.
1023 ///
1024 /// ```rust
1025 /// use widestring::U16CString;
1026 /// let s = "My\u{0}String";
1027 /// // Create a wide string from the string
1028 /// let res = U16CString::from_str(s);
1029 /// assert!(res.is_err());
1030 /// assert_eq!(res.err().unwrap().nul_position(), 2);
1031 /// ```
1032 #[allow(clippy::should_implement_trait)]
1033 #[inline]
1034 pub fn from_str(s: impl AsRef<str>) -> Result<Self, ContainsNul<u16>> {
1035 let v: Vec<u16> = s.as_ref().encode_utf16().collect();
1036 Self::from_vec(v)
1037 }
1038
1039 /// Constructs a [`U16CString`] copy from a [`str`], encoding it as UTF-16, without checking for
1040 /// interior nul values.
1041 ///
1042 /// This makes a string copy of the [`str`]. Since [`str`] will always be valid UTF-8, the
1043 /// resulting [`U16CString`] will also be valid UTF-16.
1044 ///
1045 /// The resulting string will always be nul-terminated even if the original string is not.
1046 ///
1047 /// # Safety
1048 ///
1049 /// This method is equivalent to [`from_str`][Self::from_str] except that no runtime assertion
1050 /// is made that `s` contains no interior nul values. Providing a string with nul values that
1051 /// are not the last character will result in an invalid [`U16CString`].
1052 ///
1053 /// # Examples
1054 ///
1055 /// ```rust
1056 /// use widestring::U16CString;
1057 /// let s = "MyString";
1058 /// // Create a wide string from the string
1059 /// let wcstr = unsafe { U16CString::from_str_unchecked(s) };
1060 /// # assert_eq!(wcstr.to_string_lossy(), s);
1061 /// ```
1062 #[inline]
1063 #[must_use]
1064 pub unsafe fn from_str_unchecked(s: impl AsRef<str>) -> Self {
1065 let v: Vec<u16> = s.as_ref().encode_utf16().collect();
1066 Self::from_vec_unchecked(v)
1067 }
1068
1069 /// Constructs a [`U16CString`] copy from a [`str`], encoding it as UTF-16, truncating at the
1070 /// first nul terminator.
1071 ///
1072 /// This makes a string copy of the [`str`]. Since [`str`] will always be valid UTF-8, the
1073 /// resulting [`U16CString`] will also be valid UTF-16.
1074 ///
1075 /// The string will be truncated at the first nul value in the string.
1076 /// The resulting string will always be nul-terminated even if the original string is not.
1077 ///
1078 /// # Examples
1079 ///
1080 /// ```rust
1081 /// use widestring::U16CString;
1082 /// let s = "My\u{0}String";
1083 /// // Create a wide string from the string
1084 /// let wcstr = U16CString::from_str_truncate(s);
1085 /// assert_eq!(wcstr.to_string_lossy(), "My");
1086 /// ```
1087 #[inline]
1088 #[must_use]
1089 pub fn from_str_truncate(s: impl AsRef<str>) -> Self {
1090 let v: Vec<u16> = s.as_ref().encode_utf16().collect();
1091 Self::from_vec_truncate(v)
1092 }
1093
1094 /// Constructs a [`U16CString`] copy from an [`OsStr`][std::ffi::OsStr].
1095 ///
1096 /// This makes a string copy of the [`OsStr`][std::ffi::OsStr]. Since [`OsStr`][std::ffi::OsStr]
1097 /// makes no guarantees that it is valid data, there is no guarantee that the resulting
1098 /// [`U16CString`] will be valid UTF-16.
1099 ///
1100 /// The string will be scanned for nul values, which are invalid anywhere except the final
1101 /// character.
1102 /// The resulting string will always be nul-terminated even if the original string is not.
1103 ///
1104 /// Note that the encoding of [`OsStr`][std::ffi::OsStr] is platform-dependent, so on
1105 /// some platforms this may make an encoding conversions, while on other platforms (such as
1106 /// windows) no changes to the string will be made.
1107 ///
1108 /// # Errors
1109 ///
1110 /// This function will return an error if the data contains a nul value anywhere except the
1111 /// last character.
1112 /// The returned error will contain a [`Vec<u16>`] as well as the position of the nul value.
1113 ///
1114 /// # Examples
1115 ///
1116 /// ```rust
1117 /// use widestring::U16CString;
1118 /// let s = "MyString";
1119 /// // Create a wide string from the string
1120 /// let wcstr = U16CString::from_os_str(s).unwrap();
1121 /// # assert_eq!(wcstr.to_string_lossy(), s);
1122 /// ```
1123 ///
1124 /// The following example demonstrates errors from nul values in the string.
1125 ///
1126 /// ```rust
1127 /// use widestring::U16CString;
1128 /// let s = "My\u{0}String";
1129 /// // Create a wide string from the string
1130 /// let res = U16CString::from_os_str(s);
1131 /// assert!(res.is_err());
1132 /// assert_eq!(res.err().unwrap().nul_position(), 2);
1133 /// ```
1134 #[inline]
1135 #[cfg(feature = "std")]
1136 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1137 pub fn from_os_str(s: impl AsRef<std::ffi::OsStr>) -> Result<Self, ContainsNul<u16>> {
1138 let v = crate::platform::os_to_wide(s.as_ref());
1139 Self::from_vec(v)
1140 }
1141
1142 /// Constructs a [`U16CString`] copy from an [`OsStr`][std::ffi::OsStr], without checking for nul
1143 /// values.
1144 ///
1145 /// This makes a string copy of the [`OsStr`][std::ffi::OsStr]. Since [`OsStr`][std::ffi::OsStr]
1146 /// makes no guarantees that it is valid data, there is no guarantee that the resulting
1147 /// [`U16CString`] will be valid UTF-16.
1148 ///
1149 /// The resulting string will always be nul-terminated even if the original string is not.
1150 ///
1151 /// Note that the encoding of [`OsStr`][std::ffi::OsStr] is platform-dependent, so on
1152 /// some platforms this may make an encoding conversions, while on other platforms (such as
1153 /// windows) no changes to the string will be made.
1154 ///
1155 /// # Safety
1156 ///
1157 /// This method is equivalent to [`from_os_str`][Self::from_os_str] except that no runtime
1158 /// assertion is made that `s` contains no interior nul values. Providing a string with nul
1159 /// values anywhere but the last character will result in an invalid [`U16CString`].
1160 ///
1161 /// # Examples
1162 ///
1163 /// ```rust
1164 /// use widestring::U16CString;
1165 /// let s = "MyString";
1166 /// // Create a wide string from the string
1167 /// let wcstr = unsafe { U16CString::from_os_str_unchecked(s) };
1168 /// # assert_eq!(wcstr.to_string_lossy(), s);
1169 /// ```
1170 #[cfg(feature = "std")]
1171 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1172 #[must_use]
1173 pub unsafe fn from_os_str_unchecked(s: impl AsRef<std::ffi::OsStr>) -> Self {
1174 let v = crate::platform::os_to_wide(s.as_ref());
1175 Self::from_vec_unchecked(v)
1176 }
1177
1178 /// Constructs a [`U16CString`] copy from an [`OsStr`][std::ffi::OsStr], truncating at the first
1179 /// nul terminator.
1180 ///
1181 /// This makes a string copy of the [`OsStr`][std::ffi::OsStr]. Since [`OsStr`][std::ffi::OsStr]
1182 /// makes no guarantees that it is valid data, there is no guarantee that the resulting
1183 /// [`U16CString`] will be valid UTF-16.
1184 ///
1185 /// The string will be truncated at the first nul value in the string.
1186 /// The resulting string will always be nul-terminated even if the original string is not.
1187 ///
1188 /// Note that the encoding of [`OsStr`][std::ffi::OsStr] is platform-dependent, so on
1189 /// some platforms this may make an encoding conversions, while on other platforms (such as
1190 /// windows) no changes to the string will be made.
1191 ///
1192 /// # Examples
1193 ///
1194 /// ```rust
1195 /// use widestring::U16CString;
1196 /// let s = "My\u{0}String";
1197 /// // Create a wide string from the string
1198 /// let wcstr = U16CString::from_os_str_truncate(s);
1199 /// assert_eq!(wcstr.to_string_lossy(), "My");
1200 /// ```
1201 #[inline]
1202 #[cfg(feature = "std")]
1203 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1204 #[must_use]
1205 pub fn from_os_str_truncate(s: impl AsRef<std::ffi::OsStr>) -> Self {
1206 let v = crate::platform::os_to_wide(s.as_ref());
1207 Self::from_vec_truncate(v)
1208 }
1209}
1210
1211impl U32CString {
1212 /// Constructs a [`U32CString`] from a container of character data, checking for invalid nul
1213 /// values.
1214 ///
1215 /// This method will consume the provided data and use the underlying elements to construct a
1216 /// new string. The data will be scanned for invalid nul values anywhere except the last
1217 /// character.
1218 /// The resulting string will always be nul-terminated even if the original string is not.
1219 ///
1220 /// # Errors
1221 ///
1222 /// This function will return an error if the data contains a nul value anywhere except the
1223 /// last character.
1224 /// The returned error will contain the [`Vec<u32>`] as well as the position of the nul value.
1225 ///
1226 /// # Examples
1227 ///
1228 /// ```rust
1229 /// use widestring::U32CString;
1230 /// let v: Vec<char> = "Test".chars().collect();
1231 /// # let cloned: Vec<u32> = v.iter().map(|&c| c as u32).collect();
1232 /// // Create a wide string from the vector
1233 /// let wcstr = U32CString::from_chars(v).unwrap();
1234 /// # assert_eq!(wcstr.into_vec(), cloned);
1235 /// ```
1236 ///
1237 /// The following example demonstrates errors from nul values in a vector.
1238 ///
1239 /// ```rust
1240 /// use widestring::U32CString;
1241 /// let v: Vec<char> = "T\u{0}est".chars().collect();
1242 /// // Create a wide string from the vector
1243 /// let res = U32CString::from_chars(v);
1244 /// assert!(res.is_err());
1245 /// assert_eq!(res.err().unwrap().nul_position(), 1);
1246 /// ```
1247 pub fn from_chars(v: impl Into<Vec<char>>) -> Result<Self, ContainsNul<u32>> {
1248 let mut chars = v.into();
1249 let v: Vec<u32> = unsafe {
1250 let ptr = chars.as_mut_ptr() as *mut u32;
1251 let len = chars.len();
1252 let cap = chars.capacity();
1253 mem::forget(chars);
1254 Vec::from_raw_parts(ptr, len, cap)
1255 };
1256 Self::from_vec(v)
1257 }
1258
1259 /// Constructs a [`U32CString`] from a container of character data, truncating at the first nul
1260 /// value.
1261 ///
1262 /// This method will consume the provided data and use the underlying elements to construct a
1263 /// new string. The string will be truncated at the first nul value in the string.
1264 /// The resulting string will always be nul-terminated even if the original string is not.
1265 ///
1266 /// # Examples
1267 ///
1268 /// ```rust
1269 /// use widestring::U32CString;
1270 /// let v: Vec<char> = "Test\u{0}".chars().collect();
1271 /// # let cloned: Vec<u32> = v[..4].iter().map(|&c| c as u32).collect();
1272 /// // Create a wide string from the vector
1273 /// let wcstr = U32CString::from_chars_truncate(v);
1274 /// # assert_eq!(wcstr.into_vec(), cloned);
1275 /// ```
1276 #[must_use]
1277 pub fn from_chars_truncate(v: impl Into<Vec<char>>) -> Self {
1278 let mut chars = v.into();
1279 let v: Vec<u32> = unsafe {
1280 let ptr = chars.as_mut_ptr() as *mut u32;
1281 let len = chars.len();
1282 let cap = chars.capacity();
1283 mem::forget(chars);
1284 Vec::from_raw_parts(ptr, len, cap)
1285 };
1286 Self::from_vec_truncate(v)
1287 }
1288
1289 /// Constructs a [`U32CString`] from character data without checking for nul values.
1290 ///
1291 /// A terminating nul value will be appended if the vector does not already have a terminating
1292 /// nul.
1293 ///
1294 /// # Safety
1295 ///
1296 /// This method is equivalent to [`from_chars`][Self::from_chars] except that no runtime
1297 /// assertion is made that `v` contains no interior nul values. Providing a vector with nul
1298 /// values anywhere but the last character will result in an invalid [`U32CString`].
1299 #[must_use]
1300 pub unsafe fn from_chars_unchecked(v: impl Into<Vec<char>>) -> Self {
1301 let mut chars = v.into();
1302 let v: Vec<u32> = {
1303 let ptr = chars.as_mut_ptr() as *mut u32;
1304 let len = chars.len();
1305 let cap = chars.capacity();
1306 mem::forget(chars);
1307 Vec::from_raw_parts(ptr, len, cap)
1308 };
1309 Self::from_vec_unchecked(v)
1310 }
1311
1312 /// Constructs a [`U32CString`] copy from a [`str`], encoding it as UTF-32 and checking for
1313 /// invalid interior nul values.
1314 ///
1315 /// This makes a string copy of the [`str`]. Since [`str`] will always be valid UTF-8, the
1316 /// resulting [`U32CString`] will also be valid UTF-32.
1317 ///
1318 /// The string will be scanned for nul values, which are invalid anywhere except the last
1319 /// character.
1320 /// The resulting string will always be nul-terminated even if the original string is not.
1321 ///
1322 /// # Errors
1323 ///
1324 /// This function will return an error if the data contains a nul value anywhere except the
1325 /// last character.
1326 /// The returned error will contain a [`Vec<u32>`] as well as the position of the nul value.
1327 ///
1328 /// # Examples
1329 ///
1330 /// ```rust
1331 /// use widestring::U32CString;
1332 /// let s = "MyString";
1333 /// // Create a wide string from the string
1334 /// let wcstr = U32CString::from_str(s).unwrap();
1335 /// # assert_eq!(wcstr.to_string_lossy(), s);
1336 /// ```
1337 ///
1338 /// The following example demonstrates errors from nul values in a string.
1339 ///
1340 /// ```rust
1341 /// use widestring::U32CString;
1342 /// let s = "My\u{0}String";
1343 /// // Create a wide string from the string
1344 /// let res = U32CString::from_str(s);
1345 /// assert!(res.is_err());
1346 /// assert_eq!(res.err().unwrap().nul_position(), 2);
1347 /// ```
1348 #[allow(clippy::should_implement_trait)]
1349 #[inline]
1350 pub fn from_str(s: impl AsRef<str>) -> Result<Self, ContainsNul<u32>> {
1351 let v: Vec<char> = s.as_ref().chars().collect();
1352 Self::from_chars(v)
1353 }
1354
1355 /// Constructs a [`U32CString`] copy from a [`str`], encoding it as UTF-32, without checking for
1356 /// nul values.
1357 ///
1358 /// This makes a string copy of the [`str`]. Since [`str`] will always be valid UTF-8, the
1359 /// resulting [`U32CString`] will also be valid UTF-32.
1360 ///
1361 /// The resulting string will always be nul-terminated even if the original string is not.
1362 ///
1363 /// # Safety
1364 ///
1365 /// This method is equivalent to [`from_str`][Self::from_str] except that no runtime assertion
1366 /// is made that `s` contains invalid nul values. Providing a string with nul values anywhere
1367 /// except the last character will result in an invalid [`U32CString`].
1368 ///
1369 /// # Examples
1370 ///
1371 /// ```rust
1372 /// use widestring::U32CString;
1373 /// let s = "MyString";
1374 /// // Create a wide string from the string
1375 /// let wcstr = unsafe { U32CString::from_str_unchecked(s) };
1376 /// # assert_eq!(wcstr.to_string_lossy(), s);
1377 /// ```
1378 #[inline]
1379 #[must_use]
1380 pub unsafe fn from_str_unchecked(s: impl AsRef<str>) -> Self {
1381 let v: Vec<char> = s.as_ref().chars().collect();
1382 Self::from_chars_unchecked(v)
1383 }
1384
1385 /// Constructs a [`U32CString`] copy from a [`str`], encoding it as UTF-32, truncating at the
1386 /// first nul terminator.
1387 ///
1388 /// This makes a string copy of the [`str`]. Since [`str`] will always be valid UTF-8, the
1389 /// resulting [`U32CString`] will also be valid UTF-32.
1390 ///
1391 /// The string will be truncated at the first nul value in the string.
1392 /// The resulting string will always be nul-terminated even if the original string is not.
1393 ///
1394 /// # Examples
1395 ///
1396 /// ```rust
1397 /// use widestring::U32CString;
1398 /// let s = "My\u{0}String";
1399 /// // Create a wide string from the string
1400 /// let wcstr = U32CString::from_str_truncate(s);
1401 /// assert_eq!(wcstr.to_string_lossy(), "My");
1402 /// ```
1403 #[inline]
1404 #[must_use]
1405 pub fn from_str_truncate(s: impl AsRef<str>) -> Self {
1406 let v: Vec<char> = s.as_ref().chars().collect();
1407 Self::from_chars_truncate(v)
1408 }
1409
1410 /// Constructs a new wide C string copied from a nul-terminated [`char`] string pointer.
1411 ///
1412 /// This will scan for nul values beginning with `p`. The first nul value will be used as the
1413 /// nul terminator for the string, similar to how libc string functions such as `strlen` work.
1414 ///
1415 /// If you wish to avoid copying the string pointer, use [`U32CStr::from_char_ptr_str`] instead.
1416 ///
1417 /// # Safety
1418 ///
1419 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
1420 /// nul terminator, and the function could scan past the underlying buffer.
1421 ///
1422 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1423 ///
1424 /// # Panics
1425 ///
1426 /// This function panics if `p` is null.
1427 ///
1428 /// # Caveat
1429 ///
1430 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1431 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1432 /// context, such as by providing a helper function taking the lifetime of a host value for the
1433 /// string, or by explicit annotation.
1434 #[inline]
1435 #[must_use]
1436 pub unsafe fn from_char_ptr_str(p: *const char) -> Self {
1437 Self::from_ptr_str(p as *const u32)
1438 }
1439
1440 /// Constructs a wide C string copied from a [`char`] pointer and a length, checking for invalid
1441 /// interior nul values.
1442 ///
1443 /// The `len` argument is the number of elements, **not** the number of bytes, and does
1444 /// **not** include the nul terminator of the string. If `len` is `0`, `p` is allowed to be a
1445 /// null pointer.
1446 ///
1447 /// The resulting string will always be nul-terminated even if the pointer data is not.
1448 ///
1449 /// # Errors
1450 ///
1451 /// This will scan the pointer string for an interior nul value and error if one is found. To
1452 /// avoid scanning for interior nuls, [`from_ptr_unchecked`][Self::from_ptr_unchecked] may be
1453 /// used instead.
1454 /// The returned error will contain a [`Vec`] as well as the position of the nul value.
1455 ///
1456 /// # Safety
1457 ///
1458 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
1459 /// elements.
1460 ///
1461 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1462 ///
1463 /// # Panics
1464 ///
1465 /// Panics if `len` is greater than 0 but `p` is a null pointer.
1466 #[inline]
1467 pub unsafe fn from_char_ptr(p: *const char, len: usize) -> Result<Self, ContainsNul<u32>> {
1468 Self::from_ptr(p as *const u32, len)
1469 }
1470
1471 /// Constructs a wide C string copied from a [`char`] pointer and a length, truncating at the
1472 /// first nul terminator.
1473 ///
1474 /// The `len` argument is the number of elements, **not** the number of bytes. This will scan
1475 /// for nul values beginning with `p` until offset `len`. The first nul value will be used as
1476 /// the nul terminator for the string, ignoring any remaining values left before `len`. If no
1477 /// nul value is found, the whole string of length `len` is used, and a new nul-terminator
1478 /// will be added to the resulting string. If `len` is `0`, `p` is allowed to be a null pointer.
1479 ///
1480 /// # Safety
1481 ///
1482 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
1483 /// elements.
1484 ///
1485 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1486 ///
1487 /// # Panics
1488 ///
1489 /// Panics if `len` is greater than 0 but `p` is a null pointer.
1490 #[inline]
1491 #[must_use]
1492 pub unsafe fn from_char_ptr_truncate(p: *const char, len: usize) -> Self {
1493 Self::from_ptr_truncate(p as *const u32, len)
1494 }
1495
1496 /// Constructs a wide C string copied from a [`char`] pointer and a length without checking for
1497 /// any nul values.
1498 ///
1499 /// The `len` argument is the number of elements, **not** the number of bytes, and does
1500 /// **not** include the nul terminator of the string. If `len` is `0`, `p` is allowed to be a
1501 /// null pointer.
1502 ///
1503 /// The resulting string will always be nul-terminated even if the pointer data is not.
1504 ///
1505 /// # Safety
1506 ///
1507 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
1508 /// elements.
1509 ///
1510 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1511 ///
1512 /// The interior values of the pointer are not scanned for nul. Any interior nul values or
1513 /// will result in an invalid C string.
1514 ///
1515 /// # Panics
1516 ///
1517 /// Panics if `len` is greater than 0 but `p` is a null pointer.
1518 #[must_use]
1519 pub unsafe fn from_char_ptr_unchecked(p: *const char, len: usize) -> Self {
1520 Self::from_ptr_unchecked(p as *const u32, len)
1521 }
1522
1523 /// Constructs a [`U32CString`] copy from an [`OsStr`][std::ffi::OsStr], checking for invalid
1524 /// nul values.
1525 ///
1526 /// This makes a string copy of the [`OsStr`][std::ffi::OsStr]. Since [`OsStr`][std::ffi::OsStr]
1527 /// makes no guarantees that it is valid data, there is no guarantee that the resulting
1528 /// [`U32CString`] will be valid UTF-32.
1529 ///
1530 /// The string will be scanned for nul values, which are invlaid anywhere except the last
1531 /// character.
1532 /// The resulting string will always be nul-terminated even if the string is not.
1533 ///
1534 /// Note that the encoding of [`OsStr`][std::ffi::OsStr] is platform-dependent, so on
1535 /// some platforms this may make an encoding conversions, while on other platforms no changes to
1536 /// the string will be made.
1537 ///
1538 /// # Errors
1539 ///
1540 /// This function will return an error if the data contains a nul value anywhere except the
1541 /// last character.
1542 /// The returned error will contain a [`Vec<u32>`] as well as the position of the nul value.
1543 ///
1544 /// # Examples
1545 ///
1546 /// ```rust
1547 /// use widestring::U32CString;
1548 /// let s = "MyString";
1549 /// // Create a wide string from the string
1550 /// let wcstr = U32CString::from_os_str(s).unwrap();
1551 /// # assert_eq!(wcstr.to_string_lossy(), s);
1552 /// ```
1553 ///
1554 /// The following example demonstrates errors from nul values in a string.
1555 ///
1556 /// ```rust
1557 /// use widestring::U32CString;
1558 /// let s = "My\u{0}String";
1559 /// // Create a wide string from the string
1560 /// let res = U32CString::from_os_str(s);
1561 /// assert!(res.is_err());
1562 /// assert_eq!(res.err().unwrap().nul_position(), 2);
1563 /// ```
1564 #[cfg(feature = "std")]
1565 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1566 #[inline]
1567 pub fn from_os_str(s: impl AsRef<std::ffi::OsStr>) -> Result<Self, ContainsNul<u32>> {
1568 let v: Vec<char> = s.as_ref().to_string_lossy().chars().collect();
1569 Self::from_chars(v)
1570 }
1571
1572 /// Constructs a [`U32CString`] copy from an [`OsStr`][std::ffi::OsStr], without checking for
1573 /// nul values.
1574 ///
1575 /// This makes a string copy of the [`OsStr`][std::ffi::OsStr]. Since [`OsStr`][std::ffi::OsStr]
1576 /// makes no guarantees that it is valid data, there is no guarantee that the resulting
1577 /// [`U32CString`] will be valid UTF-32.
1578 ///
1579 /// The resulting string will always be nul-terminated even if the string is not.
1580 ///
1581 /// Note that the encoding of [`OsStr`][std::ffi::OsStr] is platform-dependent, so on
1582 /// some platforms this may make an encoding conversions, while on other platforms no changes to
1583 /// the string will be made.
1584 ///
1585 /// # Safety
1586 ///
1587 /// This method is equivalent to [`from_os_str`][Self::from_os_str] except that no runtime
1588 /// assertion is made that `s` contains invalid nul values. Providing a string with nul values
1589 /// anywhere except the last character will result in an invalid [`U32CString`].
1590 ///
1591 /// # Examples
1592 ///
1593 /// ```rust
1594 /// use widestring::U32CString;
1595 /// let s = "MyString";
1596 /// // Create a wide string from the string
1597 /// let wcstr = unsafe { U32CString::from_os_str_unchecked(s) };
1598 /// # assert_eq!(wcstr.to_string_lossy(), s);
1599 /// ```
1600 #[cfg(feature = "std")]
1601 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1602 #[inline]
1603 #[must_use]
1604 pub unsafe fn from_os_str_unchecked(s: impl AsRef<std::ffi::OsStr>) -> Self {
1605 let v: Vec<char> = s.as_ref().to_string_lossy().chars().collect();
1606 Self::from_chars_unchecked(v)
1607 }
1608
1609 /// Constructs a [`U32CString`] copy from an [`OsStr`][std::ffi::OsStr], truncating at the first
1610 /// nul terminator.
1611 ///
1612 /// This makes a string copy of the [`OsStr`][std::ffi::OsStr]. Since [`OsStr`][std::ffi::OsStr]
1613 /// makes no guarantees that it is valid data, there is no guarantee that the resulting
1614 /// [`U32CString`] will be valid UTF-32.
1615 ///
1616 /// The string will be truncated at the first nul value in the string.
1617 /// The resulting string will always be nul-terminated even if the string is not.
1618 ///
1619 /// Note that the encoding of [`OsStr`][std::ffi::OsStr] is platform-dependent, so on
1620 /// some platforms this may make an encoding conversions, while on other platforms no changes to
1621 /// the string will be made.
1622 ///
1623 /// # Examples
1624 ///
1625 /// ```rust
1626 /// use widestring::U32CString;
1627 /// let s = "My\u{0}String";
1628 /// // Create a wide string from the string
1629 /// let wcstr = U32CString::from_os_str_truncate(s);
1630 /// assert_eq!(wcstr.to_string_lossy(), "My");
1631 /// ```
1632 #[cfg(feature = "std")]
1633 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1634 #[inline]
1635 #[must_use]
1636 pub fn from_os_str_truncate(s: impl AsRef<std::ffi::OsStr>) -> Self {
1637 let v: Vec<char> = s.as_ref().to_string_lossy().chars().collect();
1638 Self::from_chars_truncate(v)
1639 }
1640}
1641
1642impl core::fmt::Debug for U16CString {
1643 #[inline]
1644 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1645 crate::debug_fmt_u16(self.as_slice_with_nul(), f)
1646 }
1647}
1648
1649impl core::fmt::Debug for U32CString {
1650 #[inline]
1651 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1652 crate::debug_fmt_u32(self.as_slice_with_nul(), f)
1653 }
1654}
1655
1656/// Alias for `U16String` or `U32String` depending on platform. Intended to match typical C
1657/// `wchar_t` size on platform.
1658#[cfg(not(windows))]
1659pub type WideCString = U32CString;
1660
1661/// Alias for `U16String` or `U32String` depending on platform. Intended to match typical C
1662/// `wchar_t` size on platform.
1663#[cfg(windows)]
1664pub type WideCString = U16CString;