widestring/ucstr.rs
1//! C-style wide string slices.
2//!
3//! This module contains wide C string slices and related types.
4
5use crate::{
6 error::{ContainsNul, MissingNulTerminator, NulError},
7 U16Str, U32Str,
8};
9#[cfg(feature = "alloc")]
10#[allow(unused_imports)]
11use alloc::{borrow::ToOwned, boxed::Box, string::String};
12use core::{
13 fmt::Write,
14 ops::{Index, Range},
15 slice::{self, SliceIndex},
16};
17
18#[doc(inline)]
19pub use crate::ustr::{
20 CharIndicesLossyUtf16, CharIndicesLossyUtf32, CharIndicesUtf16, CharIndicesUtf32,
21 CharsLossyUtf16, CharsLossyUtf32, CharsUtf16, CharsUtf32,
22};
23
24macro_rules! ucstr_common_impl {
25 {
26 $(#[$ucstr_meta:meta])*
27 struct $ucstr:ident([$uchar:ty]);
28 type UCString = $ucstring:ident;
29 type UStr = $ustr:ident;
30 type UString = $ustring:ident;
31 $(#[$to_ustring_meta:meta])*
32 fn to_ustring() -> {}
33 $(#[$into_ucstring_meta:meta])*
34 fn into_ucstring() -> {}
35 $(#[$display_meta:meta])*
36 fn display() -> {}
37 } => {
38 $(#[$ucstr_meta])*
39 #[allow(clippy::derive_hash_xor_eq)]
40 #[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
41 pub struct $ucstr {
42 inner: [$uchar],
43 }
44
45 impl $ucstr {
46 /// The nul terminator character value.
47 pub const NUL_TERMINATOR: $uchar = 0;
48
49 /// Coerces a value into a wide C string slice.
50 #[inline]
51 #[must_use]
52 pub fn new<S: AsRef<$ucstr> + ?Sized>(s: &S) -> &Self {
53 s.as_ref()
54 }
55
56 /// Constructs a wide C string slice from a nul-terminated string pointer.
57 ///
58 /// This will scan for nul values beginning with `p`. The first nul value will be used
59 /// as the nul terminator for the string, similar to how libc string functions such as
60 /// `strlen` work.
61 ///
62 /// # Safety
63 ///
64 /// This function is unsafe as there is no guarantee that the given pointer is valid or
65 /// has a nul terminator, and the function could scan past the underlying buffer.
66 ///
67 /// In addition, the data must meet the safety conditions of
68 /// [std::slice::from_raw_parts]. In particular, the returned string reference *must not
69 /// be mutated* for the duration of lifetime `'a`, except inside an
70 /// [`UnsafeCell`][std::cell::UnsafeCell].
71 ///
72 /// # Panics
73 ///
74 /// This function panics if `p` is null.
75 ///
76 /// # Caveat
77 ///
78 /// The lifetime for the returned string is inferred from its usage. To prevent
79 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
80 /// is safe in the context, such as by providing a helper function taking the lifetime
81 /// of a host value for the string, or by explicit annotation.
82 #[must_use]
83 pub unsafe fn from_ptr_str<'a>(p: *const $uchar) -> &'a Self {
84 assert!(!p.is_null());
85 let mut i = 0;
86 while *p.add(i) != Self::NUL_TERMINATOR {
87 i += 1;
88 }
89 Self::from_ptr_unchecked(p, i)
90 }
91
92 /// Constructs a mutable wide C string slice from a mutable nul-terminated string
93 /// pointer.
94 ///
95 /// This will scan for nul values beginning with `p`. The first nul value will be used
96 /// as the nul terminator for the string, similar to how libc string functions such as
97 /// `strlen` work.
98 ///
99 /// # Safety
100 ///
101 /// This function is unsafe as there is no guarantee that the given pointer is valid or
102 /// has a nul terminator, and the function could scan past the underlying buffer.
103 ///
104 /// In addition, the data must meet the safety conditions of
105 /// [std::slice::from_raw_parts_mut].
106 ///
107 /// # Panics
108 ///
109 /// This function panics if `p` is null.
110 ///
111 /// # Caveat
112 ///
113 /// The lifetime for the returned string is inferred from its usage. To prevent
114 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
115 /// is safe in the context, such as by providing a helper function taking the lifetime
116 /// of a host value for the string, or by explicit annotation.
117 #[must_use]
118 pub unsafe fn from_ptr_str_mut<'a>(p: *mut $uchar) -> &'a mut Self {
119 assert!(!p.is_null());
120 let mut i = 0;
121 while *p.add(i) != Self::NUL_TERMINATOR {
122 i += 1;
123 }
124 Self::from_ptr_unchecked_mut(p, i)
125 }
126
127 /// Constructs a wide C string slice from a pointer and a length.
128 ///
129 /// The `len` argument is the number of elements, **not** the number of bytes, and does
130 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and
131 /// means that `p` is a pointer directly to the nul terminator of the string.
132 ///
133 /// # Errors
134 ///
135 /// This will scan the pointer string for an interior nul value and error if one is
136 /// found before the nul terminator at `len` offset. To avoid scanning for interior
137 /// nuls, [`from_ptr_unchecked`][Self::from_ptr_unchecked] may be used instead.
138 ///
139 /// An error is returned if the value at `len` offset is not a nul terminator.
140 ///
141 /// # Safety
142 ///
143 /// This function is unsafe as there is no guarantee that the given pointer is valid for
144 /// `len + 1` elements.
145 ///
146 /// In addition, the data must meet the safety conditions of
147 /// [std::slice::from_raw_parts]. In particular, the returned string reference *must not
148 /// be mutated* for the duration of lifetime `'a`, except inside an
149 /// [`UnsafeCell`][std::cell::UnsafeCell].
150 ///
151 /// # Panics
152 ///
153 /// This function panics if `p` is null.
154 ///
155 /// # Caveat
156 ///
157 /// The lifetime for the returned string is inferred from its usage. To prevent
158 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
159 /// is safe in the context, such as by providing a helper function taking the lifetime
160 /// of a host value for the string, or by explicit annotation.
161 pub unsafe fn from_ptr<'a>(
162 p: *const $uchar,
163 len: usize,
164 ) -> Result<&'a Self, NulError<$uchar>> {
165 assert!(!p.is_null());
166 if *p.add(len) != Self::NUL_TERMINATOR {
167 return Err(MissingNulTerminator::new().into());
168 }
169 for i in 0..len {
170 if *p.add(i) == Self::NUL_TERMINATOR {
171 return Err(ContainsNul::empty(i).into());
172 }
173 }
174 Ok(Self::from_ptr_unchecked(p, len))
175 }
176
177 /// Constructs a mutable wide C string slice from a mutable pointer and a length.
178 ///
179 /// The `len` argument is the number of elements, **not** the number of bytes, and does
180 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and
181 /// means that `p` is a pointer directly to the nul terminator of the string.
182 ///
183 /// # Errors
184 ///
185 /// This will scan the pointer string for an interior nul value and error if one is
186 /// found before the nul terminator at `len` offset. To avoid scanning for interior
187 /// nuls, [`from_ptr_unchecked_mut`][Self::from_ptr_unchecked_mut] may be used instead.
188 ///
189 /// An error is returned if the value at `len` offset is not a nul terminator.
190 ///
191 /// # Safety
192 ///
193 /// This function is unsafe as there is no guarantee that the given pointer is valid for
194 /// `len + 1` elements.
195 ///
196 /// In addition, the data must meet the safety conditions of
197 /// [std::slice::from_raw_parts_mut].
198 ///
199 /// # Panics
200 ///
201 /// This function panics if `p` is null.
202 ///
203 /// # Caveat
204 ///
205 /// The lifetime for the returned string is inferred from its usage. To prevent
206 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
207 /// is safe in the context, such as by providing a helper function taking the lifetime
208 /// of a host value for the string, or by explicit annotation.
209 pub unsafe fn from_ptr_mut<'a>(
210 p: *mut $uchar,
211 len: usize,
212 ) -> Result<&'a mut Self, NulError<$uchar>> {
213 assert!(!p.is_null());
214 if *p.add(len) != Self::NUL_TERMINATOR {
215 return Err(MissingNulTerminator::new().into());
216 }
217 for i in 0..len {
218 if *p.add(i) == Self::NUL_TERMINATOR {
219 return Err(ContainsNul::empty(i).into());
220 }
221 }
222 Ok(Self::from_ptr_unchecked_mut(p, len))
223 }
224
225 /// Constructs a wide C string slice from a pointer and a length, truncating at the
226 /// first nul terminator.
227 ///
228 /// The `len` argument is the number of elements, **not** the number of bytes. This will
229 /// scan for nul values beginning with `p` until offset `len`. The first nul value will
230 /// be used as the nul terminator for the string, ignoring any remaining values left
231 /// before `len`.
232 ///
233 /// # Errors
234 ///
235 /// If no nul terminator is found after `len` + 1 elements, an error is returned.
236 ///
237 /// # Safety
238 ///
239 /// This function is unsafe as there is no guarantee that the given pointer is valid or
240 /// has a nul terminator, and the function could scan past the underlying buffer.
241 ///
242 /// In addition, the data must meet the safety conditions of
243 /// [std::slice::from_raw_parts]. In particular, the returned string reference *must not
244 /// be mutated* for the duration of lifetime `'a`, except inside an
245 /// [`UnsafeCell`][std::cell::UnsafeCell].
246 ///
247 /// # Panics
248 ///
249 /// This function panics if `p` is null.
250 ///
251 /// # Caveat
252 ///
253 /// The lifetime for the returned string is inferred from its usage. To prevent
254 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
255 /// is safe in the context, such as by providing a helper function taking the lifetime
256 /// of a host value for thev string, or by explicit annotation.
257 pub unsafe fn from_ptr_truncate<'a>(
258 p: *const $uchar,
259 len: usize,
260 ) -> Result<&'a Self, MissingNulTerminator> {
261 assert!(!p.is_null());
262 for i in 0..=len {
263 if *p.add(i) == Self::NUL_TERMINATOR {
264 return Ok(Self::from_ptr_unchecked(p, i));
265 }
266 }
267 Err(MissingNulTerminator::new())
268 }
269
270 /// Constructs a mutable wide C string slice from a mutable pointer and a length,
271 /// truncating at the first nul terminator.
272 ///
273 /// The `len` argument is the number of elements, **not** the number of bytes. This will
274 /// scan for nul values beginning with `p` until offset `len`. The first nul value will
275 /// be used as the nul terminator for the string, ignoring any remaining values left
276 /// before `len`.
277 ///
278 /// # Errors
279 ///
280 /// If no nul terminator is found after `len` + 1 elements, an error is returned.
281 ///
282 /// # Safety
283 ///
284 /// This function is unsafe as there is no guarantee that the given pointer is valid or
285 /// has a nul terminator, and the function could scan past the underlying buffer.
286 ///
287 /// In addition, the data must meet the safety conditions of
288 /// [std::slice::from_raw_parts_mut].
289 ///
290 /// # Panics
291 ///
292 /// This function panics if `p` is null.
293 ///
294 /// # Caveat
295 ///
296 /// The lifetime for the returned string is inferred from its usage. To prevent
297 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
298 /// is safe in the context, such as by providing a helper function taking the lifetime
299 /// of a host value for the string, or by explicit annotation.
300 pub unsafe fn from_ptr_truncate_mut<'a>(
301 p: *mut $uchar,
302 len: usize,
303 ) -> Result<&'a mut Self, MissingNulTerminator> {
304 assert!(!p.is_null());
305 for i in 0..=len {
306 if *p.add(i) == Self::NUL_TERMINATOR {
307 return Ok(Self::from_ptr_unchecked_mut(p, i));
308 }
309 }
310 Err(MissingNulTerminator::new())
311 }
312
313 /// Constructs a wide C string slice from a pointer and a length without checking for
314 /// any nul values.
315 ///
316 /// The `len` argument is the number of elements, **not** the number of bytes, and does
317 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and
318 /// means that `p` is a pointer directly to the nul terminator of the string.
319 ///
320 /// # Safety
321 ///
322 /// This function is unsafe as there is no guarantee that the given pointer is valid for
323 /// `len + 1` elements, nor that it has a terminating nul value.
324 ///
325 /// In addition, the data must meet the safety conditions of
326 /// [std::slice::from_raw_parts]. In particular, the returned string reference *must not
327 /// be mutated* for the duration of lifetime `'a`, except inside an
328 /// [`UnsafeCell`][std::cell::UnsafeCell].
329 ///
330 /// The interior values of the pointer are not scanned for nul. Any interior nul values
331 /// or a missing nul terminator at pointer offset `len` + 1 will result in an invalid
332 /// string slice.
333 ///
334 /// # Panics
335 ///
336 /// This function panics if `p` is null.
337 ///
338 /// # Caveat
339 ///
340 /// The lifetime for the returned string is inferred from its usage. To prevent
341 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
342 /// is safe in the context, such as by providing a helper function taking the lifetime
343 /// of a host value for the string, or by explicit annotation.
344 #[must_use]
345 pub unsafe fn from_ptr_unchecked<'a>(p: *const $uchar, len: usize) -> &'a Self {
346 assert!(!p.is_null());
347 let ptr: *const [$uchar] = slice::from_raw_parts(p, len + 1);
348 &*(ptr as *const Self)
349 }
350
351 /// Constructs a mutable wide C string slice from a mutable pointer and a length without
352 /// checking for any nul values.
353 ///
354 /// The `len` argument is the number of elements, **not** the number of bytes, and does
355 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and
356 /// means that `p` is a pointer directly to the nul terminator of the string.
357 ///
358 /// # Safety
359 ///
360 /// This function is unsafe as there is no guarantee that the given pointer is valid for
361 /// `len + 1` elements, nor that is has a terminating nul value.
362 ///
363 /// In addition, the data must meet the safety conditions of
364 /// [std::slice::from_raw_parts_mut].
365 ///
366 /// The interior values of the pointer are not scanned for nul. Any interior nul values
367 /// or a missing nul terminator at pointer offset `len` + 1 will result in an invalid
368 /// string slice.
369 ///
370 /// # Panics
371 ///
372 /// This function panics if `p` is null.
373 ///
374 /// # Caveat
375 ///
376 /// The lifetime for the returned string is inferred from its usage. To prevent
377 /// accidental misuse, it's suggested to tie the lifetime to whichever source lifetime
378 /// is safe in the context, such as by providing a helper function taking the lifetime
379 /// of a host value for the string, or by explicit annotation.
380 #[must_use]
381 pub unsafe fn from_ptr_unchecked_mut<'a>(p: *mut $uchar, len: usize) -> &'a mut Self {
382 assert!(!p.is_null());
383 let ptr: *mut [$uchar] = slice::from_raw_parts_mut(p, len + 1);
384 &mut *(ptr as *mut Self)
385 }
386
387 /// Constructs a wide C string slice from a slice of values with a terminating nul,
388 /// checking for invalid interior nul values.
389 ///
390 /// The slice must have at least one item, the nul terminator, even for an empty string.
391 ///
392 /// # Errors
393 ///
394 /// If there are nul values in the slice except for the last value, an error is
395 /// returned.
396 ///
397 /// An error is also returned if the last value of the slice is not a nul terminator.
398 pub fn from_slice(slice: &[$uchar]) -> Result<&Self, NulError<$uchar>> {
399 if slice.last() != Some(&Self::NUL_TERMINATOR) {
400 return Err(MissingNulTerminator::new().into());
401 }
402 match slice[..slice.len() - 1]
403 .iter()
404 .position(|x| *x == Self::NUL_TERMINATOR)
405 {
406 None => Ok(unsafe { Self::from_slice_unchecked(slice) }),
407 Some(i) => Err(ContainsNul::empty(i).into()),
408 }
409 }
410
411 /// Constructs a mutable wide C string slice from a mutable slice of values with a
412 /// terminating nul, checking for invalid interior nul values.
413 ///
414 /// The slice must have at least one item, the nul terminator, even for an empty string.
415 ///
416 /// # Errors
417 ///
418 /// If there are nul values in the slice except for the last value, an error is
419 /// returned.
420 ///
421 /// An error is also returned if the last value of the slice is not a nul terminator.
422 pub fn from_slice_mut(slice: &mut [$uchar]) -> Result<&mut Self, NulError<$uchar>> {
423 if slice.last() != Some(&Self::NUL_TERMINATOR) {
424 return Err(MissingNulTerminator::new().into());
425 }
426 match slice[..slice.len() - 1]
427 .iter()
428 .position(|x| *x == Self::NUL_TERMINATOR)
429 {
430 None => Ok(unsafe { Self::from_slice_unchecked_mut(slice) }),
431 Some(i) => Err(ContainsNul::empty(i).into()),
432 }
433 }
434
435 /// Constructs a wide C string slice from a slice of values, truncating at the first nul
436 /// terminator.
437 ///
438 /// The slice will be scanned for nul values. When a nul value is found, it is treated
439 /// as the terminator for the string, and the string slice will be truncated to that
440 /// nul.
441 ///
442 /// # Errors
443 ///
444 /// If there are no nul values in the slice, an error is returned.
445 pub fn from_slice_truncate(slice: &[$uchar]) -> Result<&Self, MissingNulTerminator> {
446 match slice.iter().position(|x| *x == Self::NUL_TERMINATOR) {
447 None => Err(MissingNulTerminator::new()),
448 Some(i) => Ok(unsafe { Self::from_slice_unchecked(&slice[..i + 1]) }),
449 }
450 }
451
452 /// Constructs a mutable wide C string slice from a mutable slice of values, truncating
453 /// at the first nul terminator.
454 ///
455 /// The slice will be scanned for nul values. When a nul value is found, it is treated
456 /// as the terminator for the string, and the string slice will be truncated to that
457 /// nul.
458 ///
459 /// # Errors
460 ///
461 /// If there are no nul values in the slice, an error is returned.
462 pub fn from_slice_truncate_mut(
463 slice: &mut [$uchar],
464 ) -> Result<&mut Self, MissingNulTerminator> {
465 match slice.iter().position(|x| *x == Self::NUL_TERMINATOR) {
466 None => Err(MissingNulTerminator::new()),
467 Some(i) => Ok(unsafe { Self::from_slice_unchecked_mut(&mut slice[..i + 1]) }),
468 }
469 }
470
471 /// Constructs a wide C string slice from a slice of values without checking for a
472 /// terminating or interior nul values.
473 ///
474 /// # Safety
475 ///
476 /// This function is unsafe because it can lead to invalid string slice values when the
477 /// slice is missing a terminating nul value or there are non-terminating interior nul
478 /// values in the slice. In particular, an empty slice will result in an invalid
479 /// string slice.
480 #[must_use]
481 pub const unsafe fn from_slice_unchecked(slice: &[$uchar]) -> &Self {
482 let ptr: *const [$uchar] = slice;
483 &*(ptr as *const Self)
484 }
485
486 /// Constructs a mutable wide C string slice from a mutable slice of values without
487 /// checking for a terminating or interior nul values.
488 ///
489 /// # Safety
490 ///
491 /// This function is unsafe because it can lead to invalid string slice values when the
492 /// slice is missing a terminating nul value or there are non-terminating interior nul
493 /// values in the slice. In particular, an empty slice will result in an invalid
494 /// string slice.
495 #[must_use]
496 pub unsafe fn from_slice_unchecked_mut(slice: &mut [$uchar]) -> &mut Self {
497 let ptr: *mut [$uchar] = slice;
498 &mut *(ptr as *mut Self)
499 }
500
501 /// Copies the string reference to a new owned wide C string.
502 #[inline]
503 #[cfg(feature = "alloc")]
504 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
505 #[must_use]
506 pub fn to_ucstring(&self) -> crate::$ucstring {
507 unsafe { crate::$ucstring::from_vec_unchecked(self.inner.to_owned()) }
508 }
509
510 $(#[$to_ustring_meta])*
511 #[inline]
512 #[cfg(feature = "alloc")]
513 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
514 #[must_use]
515 pub fn to_ustring(&self) -> crate::$ustring {
516 crate::$ustring::from_vec(self.as_slice())
517 }
518
519 /// Converts to a slice of the underlying elements.
520 ///
521 /// The slice will **not** include the nul terminator.
522 #[inline]
523 #[must_use]
524 pub fn as_slice(&self) -> &[$uchar] {
525 &self.inner[..self.len()]
526 }
527
528 /// Converts to a mutable slice of the underlying elements.
529 ///
530 /// The slice will **not** include the nul terminator.
531 ///
532 /// # Safety
533 ///
534 /// This method is unsafe because you can violate the invariants of this type when
535 /// mutating the slice (i.e. by adding interior nul values).
536 #[inline]
537 #[must_use]
538 pub unsafe fn as_mut_slice(&mut self) -> &mut [$uchar] {
539 let len = self.len();
540 &mut self.inner[..len]
541 }
542
543 /// Converts to a slice of the underlying elements, including the nul terminator.
544 #[inline]
545 #[must_use]
546 pub const fn as_slice_with_nul(&self) -> &[$uchar] {
547 &self.inner
548 }
549
550 /// Returns a raw pointer to the string.
551 ///
552 /// The caller must ensure that the string outlives the pointer this function returns,
553 /// or else it will end up pointing to garbage.
554 ///
555 /// The caller must also ensure that the memory the pointer (non-transitively) points to
556 /// is never written to (except inside an `UnsafeCell`) using this pointer or any
557 /// pointer derived from it. If you need to mutate the contents of the string, use
558 /// [`as_mut_ptr`][Self::as_mut_ptr].
559 ///
560 /// Modifying the container referenced by this string may cause its buffer to be
561 /// reallocated, which would also make any pointers to it invalid.
562 #[inline]
563 #[must_use]
564 pub const fn as_ptr(&self) -> *const $uchar {
565 self.inner.as_ptr()
566 }
567
568 /// Returns a mutable raw pointer to the string.
569 ///
570 /// The caller must ensure that the string outlives the pointer this function returns,
571 /// or else it will end up pointing to garbage.
572 ///
573 /// Modifying the container referenced by this string may cause its buffer to be
574 /// reallocated, which would also make any pointers to it invalid.
575 #[inline]
576 #[must_use]
577 pub fn as_mut_ptr(&mut self) -> *mut $uchar {
578 self.inner.as_mut_ptr()
579 }
580
581 /// Returns the two raw pointers spanning the string slice.
582 ///
583 /// The returned range is half-open, which means that the end pointer points one past
584 /// the last element of the slice. This way, an empty slice is represented by two equal
585 /// pointers, and the difference between the two pointers represents the size of the
586 /// slice.
587 ///
588 /// See [`as_ptr`][Self::as_ptr] for warnings on using these pointers. The end pointer
589 /// requires extra caution, as it does not point to a valid element in the slice.
590 ///
591 /// This function is useful for interacting with foreign interfaces which use two
592 /// pointers to refer to a range of elements in memory, as is common in C++.
593 #[inline]
594 #[must_use]
595 pub fn as_ptr_range(&self) -> Range<*const $uchar> {
596 self.inner.as_ptr_range()
597 }
598
599 /// Returns the two unsafe mutable pointers spanning the string slice.
600 ///
601 /// The returned range is half-open, which means that the end pointer points one past
602 /// the last element of the slice. This way, an empty slice is represented by two equal
603 /// pointers, and the difference between the two pointers represents the size of the
604 /// slice.
605 ///
606 /// See [`as_mut_ptr`][Self::as_mut_ptr] for warnings on using these pointers. The end
607 /// pointer requires extra caution, as it does not point to a valid element in the
608 /// slice.
609 ///
610 /// This function is useful for interacting with foreign interfaces which use two
611 /// pointers to refer to a range of elements in memory, as is common in C++.
612 #[inline]
613 #[must_use]
614 pub fn as_mut_ptr_range(&mut self) -> Range<*mut $uchar> {
615 self.inner.as_mut_ptr_range()
616 }
617
618 /// Returns the length of the string as number of elements (**not** number of bytes)
619 /// **not** including nul terminator.
620 #[inline]
621 #[must_use]
622 pub const fn len(&self) -> usize {
623 self.inner.len() - 1
624 }
625
626 /// Returns whether this string contains no data (i.e. is only the nul terminator).
627 #[inline]
628 #[must_use]
629 pub const fn is_empty(&self) -> bool {
630 self.len() == 0
631 }
632
633 $(#[$into_ucstring_meta])*
634 #[cfg(feature = "alloc")]
635 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
636 #[must_use]
637 pub fn into_ucstring(self: Box<Self>) -> crate::$ucstring {
638 let raw = Box::into_raw(self) as *mut [$uchar];
639 crate::$ucstring {
640 inner: unsafe { Box::from_raw(raw) },
641 }
642 }
643
644 /// Returns a wide string slice to this wide C string slice.
645 ///
646 /// The wide string slice will *not* include the nul-terminator.
647 #[inline]
648 #[must_use]
649 pub fn as_ustr(&self) -> &$ustr {
650 $ustr::from_slice(self.as_slice())
651 }
652
653 /// Returns a wide string slice to this wide C string slice.
654 ///
655 /// The wide string slice will include the nul-terminator.
656 #[inline]
657 #[must_use]
658 pub fn as_ustr_with_nul(&self) -> &$ustr {
659 $ustr::from_slice(self.as_slice_with_nul())
660 }
661
662 /// Returns a mutable wide string slice to this wide C string slice.
663 ///
664 /// The wide string slice will *not* include the nul-terminator.
665 ///
666 /// # Safety
667 ///
668 /// This method is unsafe because you can violate the invariants of this type when
669 /// mutating the string (i.e. by adding interior nul values).
670 #[inline]
671 #[must_use]
672 pub unsafe fn as_mut_ustr(&mut self) -> &mut $ustr {
673 $ustr::from_slice_mut(self.as_mut_slice())
674 }
675
676 #[cfg(feature = "alloc")]
677 pub(crate) fn from_inner(slice: &[$uchar]) -> &$ucstr {
678 let ptr: *const [$uchar] = slice;
679 unsafe { &*(ptr as *const $ucstr) }
680 }
681
682 #[cfg(feature = "alloc")]
683 pub(crate) fn from_inner_mut(slice: &mut [$uchar]) -> &mut $ucstr {
684 let ptr: *mut [$uchar] = slice;
685 unsafe { &mut *(ptr as *mut $ucstr) }
686 }
687
688 $(#[$display_meta])*
689 #[inline]
690 #[must_use]
691 pub fn display(&self) -> Display<'_, $ucstr> {
692 Display { str: self }
693 }
694
695 /// Returns a subslice of the string.
696 ///
697 /// This is the non-panicking alternative to indexing the string. Returns [`None`]
698 /// whenever equivalent indexing operation would panic.
699 #[inline]
700 #[must_use]
701 pub fn get<I>(&self, i: I) -> Option<&$ustr>
702 where
703 I: SliceIndex<[$uchar], Output = [$uchar]>,
704 {
705 self.as_slice().get(i).map($ustr::from_slice)
706 }
707
708 /// Returns a mutable subslice of the string.
709 ///
710 /// This is the non-panicking alternative to indexing the string. Returns [`None`]
711 /// whenever equivalent indexing operation would panic.
712 ///
713 /// # Safety
714 ///
715 /// This method is unsafe because you can violate the invariants of this type when
716 /// mutating the memory the pointer points to (i.e. by adding interior nul values).
717 #[inline]
718 #[must_use]
719 pub unsafe fn get_mut<I>(&mut self, i: I) -> Option<&mut $ustr>
720 where
721 I: SliceIndex<[$uchar], Output = [$uchar]>,
722 {
723 self.as_mut_slice().get_mut(i).map($ustr::from_slice_mut)
724 }
725
726 /// Returns an unchecked subslice of the string.
727 ///
728 /// This is the unchecked alternative to indexing the string.
729 ///
730 /// # Safety
731 ///
732 /// Callers of this function are responsible that these preconditions are satisfied:
733 ///
734 /// - The starting index must not exceed the ending index;
735 /// - Indexes must be within bounds of the original slice.
736 ///
737 /// Failing that, the returned string slice may reference invalid memory.
738 #[inline]
739 #[must_use]
740 pub unsafe fn get_unchecked<I>(&self, i: I) -> &$ustr
741 where
742 I: SliceIndex<[$uchar], Output = [$uchar]>,
743 {
744 $ustr::from_slice(self.as_slice().get_unchecked(i))
745 }
746
747 /// Returns aa mutable, unchecked subslice of the string.
748 ///
749 /// This is the unchecked alternative to indexing the string.
750 ///
751 /// # Safety
752 ///
753 /// Callers of this function are responsible that these preconditions are satisfied:
754 ///
755 /// - The starting index must not exceed the ending index;
756 /// - Indexes must be within bounds of the original slice.
757 ///
758 /// Failing that, the returned string slice may reference invalid memory.
759 ///
760 /// This method is unsafe because you can violate the invariants of this type when
761 /// mutating the memory the pointer points to (i.e. by adding interior nul values).
762 #[inline]
763 #[must_use]
764 pub unsafe fn get_unchecked_mut<I>(&mut self, i: I) -> &mut $ustr
765 where
766 I: SliceIndex<[$uchar], Output = [$uchar]>,
767 {
768 $ustr::from_slice_mut(self.as_mut_slice().get_unchecked_mut(i))
769 }
770
771 /// Divide one string slice into two at an index.
772 ///
773 /// The argument, `mid`, should be an offset from the start of the string.
774 ///
775 /// The two slices returned go from the start of the string slice to `mid`, and from
776 /// `mid` to the end of the string slice.
777 ///
778 /// To get mutable string slices instead, see the [`split_at_mut`][Self::split_at_mut]
779 /// method.
780 #[inline]
781 #[must_use]
782 pub fn split_at(&self, mid: usize) -> (&$ustr, &$ustr) {
783 let split = self.as_slice().split_at(mid);
784 ($ustr::from_slice(split.0), $ustr::from_slice(split.1))
785 }
786
787 /// Divide one mutable string slice into two at an index.
788 ///
789 /// The argument, `mid`, should be an offset from the start of the string.
790 ///
791 /// The two slices returned go from the start of the string slice to `mid`, and from
792 /// `mid` to the end of the string slice.
793 ///
794 /// To get immutable string slices instead, see the [`split_at`][Self::split_at] method.
795 ///
796 /// # Safety
797 ///
798 /// This method is unsafe because you can violate the invariants of this type when
799 /// mutating the memory the pointer points to (i.e. by adding interior nul values).
800 #[inline]
801 #[must_use]
802 pub unsafe fn split_at_mut(&mut self, mid: usize) -> (&mut $ustr, &mut $ustr) {
803 let split = self.as_mut_slice().split_at_mut(mid);
804 ($ustr::from_slice_mut(split.0), $ustr::from_slice_mut(split.1))
805 }
806
807 /// Creates a new owned string by repeating this string `n` times.
808 ///
809 /// # Panics
810 ///
811 /// This function will panic if the capacity would overflow.
812 #[inline]
813 #[cfg(feature = "alloc")]
814 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
815 #[must_use]
816 pub fn repeat(&self, n: usize) -> crate::$ucstring {
817 unsafe { crate::$ucstring::from_vec_unchecked(self.as_slice().repeat(n)) }
818 }
819 }
820
821 impl AsMut<$ucstr> for $ucstr {
822 #[inline]
823 fn as_mut(&mut self) -> &mut $ucstr {
824 self
825 }
826 }
827
828 impl AsRef<$ucstr> for $ucstr {
829 #[inline]
830 fn as_ref(&self) -> &Self {
831 self
832 }
833 }
834
835 impl AsRef<[$uchar]> for $ucstr {
836 #[inline]
837 fn as_ref(&self) -> &[$uchar] {
838 self.as_slice()
839 }
840 }
841
842 impl AsRef<$ustr> for $ucstr {
843 #[inline]
844 fn as_ref(&self) -> &$ustr {
845 self.as_ustr()
846 }
847 }
848
849 impl<'a> Default for &'a $ucstr {
850 #[inline]
851 fn default() -> Self {
852 const SLICE: &[$uchar] = &[$ucstr::NUL_TERMINATOR];
853 unsafe { $ucstr::from_slice_unchecked(SLICE) }
854 }
855 }
856
857 #[cfg(feature = "alloc")]
858 impl Default for Box<$ucstr> {
859 #[inline]
860 fn default() -> Box<$ucstr> {
861 let boxed: Box<[$uchar]> = Box::from([$ucstr::NUL_TERMINATOR]);
862 unsafe { Box::from_raw(Box::into_raw(boxed) as *mut $ucstr) }
863 }
864 }
865
866 #[cfg(feature = "alloc")]
867 impl<'a> From<&'a $ucstr> for Box<$ucstr> {
868 #[inline]
869 fn from(s: &'a $ucstr) -> Box<$ucstr> {
870 let boxed: Box<[$uchar]> = Box::from(s.as_slice_with_nul());
871 unsafe { Box::from_raw(Box::into_raw(boxed) as *mut $ucstr) }
872 }
873 }
874
875 #[cfg(feature = "std")]
876 impl From<&$ucstr> for std::ffi::OsString {
877 #[inline]
878 fn from(s: &$ucstr) -> std::ffi::OsString {
879 s.to_os_string()
880 }
881 }
882
883 impl<I> Index<I> for $ucstr
884 where
885 I: SliceIndex<[$uchar], Output = [$uchar]>,
886 {
887 type Output = $ustr;
888
889 #[inline]
890 fn index(&self, index: I) -> &Self::Output {
891 $ustr::from_slice(&self.as_slice()[index])
892 }
893 }
894
895 impl PartialEq<$ucstr> for &$ucstr {
896 #[inline]
897 fn eq(&self, other: &$ucstr) -> bool {
898 self.as_slice() == other.as_slice()
899 }
900 }
901
902 impl PartialEq<&$ucstr> for $ucstr {
903 #[inline]
904 fn eq(&self, other: &&$ucstr) -> bool {
905 self.as_slice() == other.as_slice()
906 }
907 }
908
909 impl PartialEq<$ustr> for $ucstr {
910 #[inline]
911 fn eq(&self, other: &$ustr) -> bool {
912 self.as_slice() == other.as_slice()
913 }
914 }
915
916 impl PartialEq<$ustr> for &$ucstr {
917 #[inline]
918 fn eq(&self, other: &$ustr) -> bool {
919 self.as_slice() == other.as_slice()
920 }
921 }
922
923 impl PartialEq<&$ustr> for $ucstr {
924 #[inline]
925 fn eq(&self, other: &&$ustr) -> bool {
926 self.as_slice() == other.as_slice()
927 }
928 }
929
930 impl PartialOrd<$ustr> for $ucstr {
931 #[inline]
932 fn partial_cmp(&self, other: &$ustr) -> Option<core::cmp::Ordering> {
933 self.as_ustr().partial_cmp(other)
934 }
935 }
936 };
937}
938
939ucstr_common_impl! {
940 /// C-style 16-bit wide string slice for [`U16CString`][crate::U16CString].
941 ///
942 /// [`U16CStr`] is to [`U16CString`][crate::U16CString] as [`CStr`][std::ffi::CStr] is to
943 /// [`CString`][std::ffi::CString].
944 ///
945 /// [`U16CStr`] are string slices that do not have a defined encoding. While it is sometimes
946 /// assumed that they contain possibly invalid or ill-formed UTF-16 data, they may be used for
947 /// any wide encoded string.
948 ///
949 /// # Nul termination
950 ///
951 /// [`U16CStr`] is aware of nul (`0`) values. Unless unchecked conversions are used, all
952 /// [`U16CStr`] strings end with a nul-terminator in the underlying buffer and contain no
953 /// internal nul values. These strings are intended to be used with C FFI functions that
954 /// require nul-terminated strings.
955 ///
956 /// Because of the nul termination requirement, multiple classes methods for provided for
957 /// construction a [`U16CStr`] under various scenarios. By default, methods such as
958 /// [`from_ptr`][Self::from_ptr] and [`from_slice`][Self::from_slice] return an error if the
959 /// input does not terminate with a nul value, or if it contains any interior nul values before
960 /// the terminator.
961 ///
962 /// `_truncate` methods on the other hand, such as
963 /// [`from_ptr_truncate`][Self::from_ptr_truncate] and
964 /// [`from_slice_truncate`][Self::from_slice_truncate], construct a slice that terminates with
965 /// the first nul value encountered in the string, only returning an error if the slice contains
966 /// no nul values at all. Use this to mimic the behavior of C functions such as `strlen` when
967 /// you don't know if the input is clean of interior nuls.
968 ///
969 /// Finally, unsafe `_unchecked` variants of these methods, such as
970 /// [`from_ptr_unchecked`][Self::from_ptr_unchecked] and
971 /// [`from_slice_unchecked`][Self::from_slice_unchecked] allow bypassing any checks for nul
972 /// values, when the input has already been ensured to have a nul terminator and no interior
973 /// nul values.
974 ///
975 /// # Examples
976 ///
977 /// The easiest way to use [`U16CStr`] outside of FFI is with the [`u16cstr!`][crate::u16cstr]
978 /// macro to convert string literals into nul-terminated UTF-16 string slices at compile time:
979 ///
980 /// ```
981 /// use widestring::u16cstr;
982 /// let hello = u16cstr!("Hello, world!");
983 /// ```
984 ///
985 /// You can also convert any [`u16`] slice directly, as long as it has a nul terminator:
986 ///
987 /// ```
988 /// use widestring::{u16cstr, U16CStr};
989 ///
990 /// let sparkle_heart = [0xd83d, 0xdc96, 0x0];
991 /// let sparkle_heart = U16CStr::from_slice(&sparkle_heart).unwrap();
992 ///
993 /// assert_eq!(u16cstr!("💖"), sparkle_heart);
994 ///
995 /// // This unpaired UTf-16 surrogate is invalid UTF-16, but is perfectly valid in U16CStr
996 /// let malformed_utf16 = [0xd83d, 0x0];
997 /// let s = U16CStr::from_slice(&malformed_utf16).unwrap();
998 ///
999 /// assert_eq!(s.len(), 1);
1000 /// ```
1001 ///
1002 /// When working with a FFI, it is useful to create a [`U16CStr`] from a pointer:
1003 ///
1004 /// ```
1005 /// use widestring::{u16cstr, U16CStr};
1006 ///
1007 /// let sparkle_heart = [0xd83d, 0xdc96, 0x0];
1008 /// let s = unsafe {
1009 /// // Note the string and pointer length does not include the nul terminator
1010 /// U16CStr::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len() - 1).unwrap()
1011 /// };
1012 /// assert_eq!(u16cstr!("💖"), s);
1013 ///
1014 /// // Alternatively, if the length of the pointer is unknown but definitely terminates in nul,
1015 /// // a C-style string version can be used
1016 /// let s = unsafe { U16CStr::from_ptr_str(sparkle_heart.as_ptr()) };
1017 ///
1018 /// assert_eq!(u16cstr!("💖"), s);
1019 /// ```
1020 struct U16CStr([u16]);
1021
1022 type UCString = U16CString;
1023 type UStr = U16Str;
1024 type UString = U16String;
1025
1026 /// Copies the string reference to a new owned wide string.
1027 ///
1028 /// The resulting wide string will **not** have a nul terminator.
1029 ///
1030 /// # Examples
1031 ///
1032 /// ```rust
1033 /// use widestring::U16CString;
1034 /// let wcstr = U16CString::from_str("MyString").unwrap();
1035 /// // Convert U16CString to a U16String
1036 /// let wstr = wcstr.to_ustring();
1037 ///
1038 /// // U16CString will have a terminating nul
1039 /// let wcvec = wcstr.into_vec_with_nul();
1040 /// assert_eq!(wcvec[wcvec.len()-1], 0);
1041 /// // The resulting U16String will not have the terminating nul
1042 /// let wvec = wstr.into_vec();
1043 /// assert_ne!(wvec[wvec.len()-1], 0);
1044 /// ```
1045 fn to_ustring() -> {}
1046
1047 /// Converts a boxed wide C string slice into an wide C string without copying or
1048 /// allocating.
1049 ///
1050 /// # Examples
1051 ///
1052 /// ```
1053 /// use widestring::U16CString;
1054 ///
1055 /// let v = vec![102u16, 111u16, 111u16]; // "foo"
1056 /// let c_string = U16CString::from_vec(v.clone()).unwrap();
1057 /// let boxed = c_string.into_boxed_ucstr();
1058 /// assert_eq!(boxed.into_ucstring(), U16CString::from_vec(v).unwrap());
1059 /// ```
1060 fn into_ucstring() -> {}
1061
1062 /// Returns an object that implements [`Display`][std::fmt::Display] for printing
1063 /// strings that may contain non-Unicode data.
1064 ///
1065 /// A wide C string might data of any encoding. This function assumes the string is encoded in
1066 /// UTF-16, and returns a struct implements the
1067 /// [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy but
1068 /// no heap allocations are performed, such as by
1069 /// [`to_string_lossy`][Self::to_string_lossy].
1070 ///
1071 /// By default, invalid Unicode data is replaced with
1072 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish
1073 /// to simply skip any invalid Uncode data and forego the replacement, you may use the
1074 /// [alternate formatting][std::fmt#sign0] with `{:#}`.
1075 ///
1076 /// # Examples
1077 ///
1078 /// Basic usage:
1079 ///
1080 /// ```
1081 /// use widestring::U16CStr;
1082 ///
1083 /// // 𝄞mus<invalid>ic<invalid>
1084 /// let s = U16CStr::from_slice(&[
1085 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, 0x0000,
1086 /// ]).unwrap();
1087 ///
1088 /// assert_eq!(format!("{}", s.display()),
1089 /// "𝄞mus�ic�"
1090 /// );
1091 /// ```
1092 ///
1093 /// Using alternate formatting style to skip invalid values entirely:
1094 ///
1095 /// ```
1096 /// use widestring::U16CStr;
1097 ///
1098 /// // 𝄞mus<invalid>ic<invalid>
1099 /// let s = U16CStr::from_slice(&[
1100 /// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, 0x0000,
1101 /// ]).unwrap();
1102 ///
1103 /// assert_eq!(format!("{:#}", s.display()),
1104 /// "𝄞music"
1105 /// );
1106 /// ```
1107 fn display() -> {}
1108}
1109
1110ucstr_common_impl! {
1111 /// C-style 32-bit wide string slice for [`U32CString`][crate::U32CString].
1112 ///
1113 /// [`U32CStr`] is to [`U32CString`][crate::U32CString] as [`CStr`][std::ffi::CStr] is to
1114 /// [`CString`][std::ffi::CString].
1115 ///
1116 /// [`U32CStr`] are string slices that do not have a defined encoding. While it is sometimes
1117 /// assumed that they contain possibly invalid or ill-formed UTF-32 data, they may be used for
1118 /// any wide encoded string.
1119 ///
1120 /// # Nul termination
1121 ///
1122 /// [`U32CStr`] is aware of nul (`0`) values. Unless unchecked conversions are used, all
1123 /// [`U32CStr`] strings end with a nul-terminator in the underlying buffer and contain no
1124 /// internal nul values. These strings are intended to be used with C FFI functions that
1125 /// require nul-terminated strings.
1126 ///
1127 /// Because of the nul termination requirement, multiple classes methods for provided for
1128 /// construction a [`U32CStr`] under various scenarios. By default, methods such as
1129 /// [`from_ptr`][Self::from_ptr] and [`from_slice`][Self::from_slice] return an error if the
1130 /// input does not terminate with a nul value, or if it contains any interior nul values before
1131 /// the terminator.
1132 ///
1133 /// `_truncate` methods on the other hand, such as
1134 /// [`from_ptr_truncate`][Self::from_ptr_truncate] and
1135 /// [`from_slice_truncate`][Self::from_slice_truncate], construct a slice that terminates with
1136 /// the first nul value encountered in the string, only returning an error if the slice contains
1137 /// no nul values at all. Use this to mimic the behavior of C functions such as `strlen` when
1138 /// you don't know if the input is clean of interior nuls.
1139 ///
1140 /// Finally, unsafe `_unchecked` variants of these methods, such as
1141 /// [`from_ptr_unchecked`][Self::from_ptr_unchecked] and
1142 /// [`from_slice_unchecked`][Self::from_slice_unchecked] allow bypassing any checks for nul
1143 /// values, when the input has already been ensured to have a nul terminator and no interior
1144 /// nul values.
1145 ///
1146 /// # Examples
1147 ///
1148 /// The easiest way to use [`U32CStr`] outside of FFI is with the [`u32cstr!`][crate::u32cstr]
1149 /// macro to convert string literals into nul-terminated UTF-32 string slices at compile time:
1150 ///
1151 /// ```
1152 /// use widestring::u32cstr;
1153 /// let hello = u32cstr!("Hello, world!");
1154 /// ```
1155 ///
1156 /// You can also convert any [`u32`] slice directly, as long as it has a nul terminator:
1157 ///
1158 /// ```
1159 /// use widestring::{u32cstr, U32CStr};
1160 ///
1161 /// let sparkle_heart = [0x1f496, 0x0];
1162 /// let sparkle_heart = U32CStr::from_slice(&sparkle_heart).unwrap();
1163 ///
1164 /// assert_eq!(u32cstr!("💖"), sparkle_heart);
1165 ///
1166 /// // This UTf-16 surrogate is invalid UTF-32, but is perfectly valid in U32CStr
1167 /// let malformed_utf32 = [0xd83d, 0x0];
1168 /// let s = U32CStr::from_slice(&malformed_utf32).unwrap();
1169 ///
1170 /// assert_eq!(s.len(), 1);
1171 /// ```
1172 ///
1173 /// When working with a FFI, it is useful to create a [`U32CStr`] from a pointer:
1174 ///
1175 /// ```
1176 /// use widestring::{u32cstr, U32CStr};
1177 ///
1178 /// let sparkle_heart = [0x1f496, 0x0];
1179 /// let s = unsafe {
1180 /// // Note the string and pointer length does not include the nul terminator
1181 /// U32CStr::from_ptr(sparkle_heart.as_ptr(), sparkle_heart.len() - 1).unwrap()
1182 /// };
1183 /// assert_eq!(u32cstr!("💖"), s);
1184 ///
1185 /// // Alternatively, if the length of the pointer is unknown but definitely terminates in nul,
1186 /// // a C-style string version can be used
1187 /// let s = unsafe { U32CStr::from_ptr_str(sparkle_heart.as_ptr()) };
1188 ///
1189 /// assert_eq!(u32cstr!("💖"), s);
1190 /// ```
1191 struct U32CStr([u32]);
1192
1193 type UCString = U32CString;
1194 type UStr = U32Str;
1195 type UString = U32String;
1196
1197 /// Copies the string reference to a new owned wide string.
1198 ///
1199 /// The resulting wide string will **not** have a nul terminator.
1200 ///
1201 /// # Examples
1202 ///
1203 /// ```rust
1204 /// use widestring::U32CString;
1205 /// let wcstr = U32CString::from_str("MyString").unwrap();
1206 /// // Convert U32CString to a U32String
1207 /// let wstr = wcstr.to_ustring();
1208 ///
1209 /// // U32CString will have a terminating nul
1210 /// let wcvec = wcstr.into_vec_with_nul();
1211 /// assert_eq!(wcvec[wcvec.len()-1], 0);
1212 /// // The resulting U32String will not have the terminating nul
1213 /// let wvec = wstr.into_vec();
1214 /// assert_ne!(wvec[wvec.len()-1], 0);
1215 /// ```
1216 fn to_ustring() -> {}
1217
1218 /// Converts a boxed wide C string slice into an owned wide C string without copying or
1219 /// allocating.
1220 ///
1221 /// # Examples
1222 ///
1223 /// ```
1224 /// use widestring::U32CString;
1225 ///
1226 /// let v = vec![102u32, 111u32, 111u32]; // "foo"
1227 /// let c_string = U32CString::from_vec(v.clone()).unwrap();
1228 /// let boxed = c_string.into_boxed_ucstr();
1229 /// assert_eq!(boxed.into_ucstring(), U32CString::from_vec(v).unwrap());
1230 /// ```
1231 fn into_ucstring() -> {}
1232
1233 /// Returns an object that implements [`Display`][std::fmt::Display] for printing
1234 /// strings that may contain non-Unicode data.
1235 ///
1236 /// A wide C string might data of any encoding. This function assumes the string is encoded in
1237 /// UTF-32, and returns a struct implements the
1238 /// [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy but
1239 /// no heap allocations are performed, such as by
1240 /// [`to_string_lossy`][Self::to_string_lossy].
1241 ///
1242 /// By default, invalid Unicode data is replaced with
1243 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish
1244 /// to simply skip any invalid Uncode data and forego the replacement, you may use the
1245 /// [alternate formatting][std::fmt#sign0] with `{:#}`.
1246 ///
1247 /// # Examples
1248 ///
1249 /// Basic usage:
1250 ///
1251 /// ```
1252 /// use widestring::U32CStr;
1253 ///
1254 /// // 𝄞mus<invalid>ic<invalid>
1255 /// let s = U32CStr::from_slice(&[
1256 /// 0x1d11e, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, 0x0000,
1257 /// ]).unwrap();
1258 ///
1259 /// assert_eq!(format!("{}", s.display()),
1260 /// "𝄞mus�ic�"
1261 /// );
1262 /// ```
1263 ///
1264 /// Using alternate formatting style to skip invalid values entirely:
1265 ///
1266 /// ```
1267 /// use widestring::U32CStr;
1268 ///
1269 /// // 𝄞mus<invalid>ic<invalid>
1270 /// let s = U32CStr::from_slice(&[
1271 /// 0x1d11e, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834, 0x0000,
1272 /// ]).unwrap();
1273 ///
1274 /// assert_eq!(format!("{:#}", s.display()),
1275 /// "𝄞music"
1276 /// );
1277 /// ```
1278 fn display() -> {}
1279}
1280
1281impl U16CStr {
1282 /// Copys a string to an owned [`OsString`][std::ffi::OsString].
1283 ///
1284 /// This makes a string copy of the [`U16CStr`]. Since [`U16CStr`] makes no guarantees that it
1285 /// is valid UTF-16, there is no guarantee that the resulting [`OsString`][std::ffi::OsString]
1286 /// will be valid data. The [`OsString`][std::ffi::OsString] will **not** have a nul
1287 /// terminator.
1288 ///
1289 /// Note that the encoding of [`OsString`][std::ffi::OsString] is platform-dependent, so on
1290 /// some platforms this may make an encoding conversions, while on other platforms (such as
1291 /// windows) no changes to the string will be made.
1292 ///
1293 /// # Examples
1294 ///
1295 /// ```rust
1296 /// use widestring::U16CString;
1297 /// use std::ffi::OsString;
1298 /// let s = "MyString";
1299 /// // Create a wide string from the string
1300 /// let wstr = U16CString::from_str(s).unwrap();
1301 /// // Create an OsString from the wide string
1302 /// let osstr = wstr.to_os_string();
1303 ///
1304 /// assert_eq!(osstr, OsString::from(s));
1305 /// ```
1306 #[inline]
1307 #[cfg(feature = "std")]
1308 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1309 #[must_use]
1310 pub fn to_os_string(&self) -> std::ffi::OsString {
1311 crate::platform::os_from_wide(self.as_slice())
1312 }
1313
1314 /// Copies the string to a [`String`] if it contains valid UTF-16 data.
1315 ///
1316 /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. It
1317 /// will **not* have a nul terminator.
1318 ///
1319 /// # Errors
1320 ///
1321 /// Returns an error if the string contains any invalid UTF-16 data.
1322 ///
1323 /// # Examples
1324 ///
1325 /// ```rust
1326 /// use widestring::U16CString;
1327 /// let s = "MyString";
1328 /// // Create a wide string from the string
1329 /// let wstr = U16CString::from_str(s).unwrap();
1330 /// // Create a regular string from the wide string
1331 /// let s2 = wstr.to_string().unwrap();
1332 ///
1333 /// assert_eq!(s2, s);
1334 /// ```
1335 #[inline]
1336 #[cfg(feature = "alloc")]
1337 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1338 pub fn to_string(&self) -> Result<String, crate::error::Utf16Error> {
1339 self.as_ustr().to_string()
1340 }
1341
1342 /// Decodes the string reference to a [`String`] even if it is invalid UTF-16 data.
1343 ///
1344 /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. Any
1345 /// invalid sequences are replaced with
1346 /// [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which looks like this:
1347 /// �. It will **not* have a nul terminator.
1348 ///
1349 /// # Examples
1350 ///
1351 /// ```rust
1352 /// use widestring::U16CString;
1353 /// let s = "MyString";
1354 /// // Create a wide string from the string
1355 /// let wstr = U16CString::from_str(s).unwrap();
1356 /// // Create a regular string from the wide string
1357 /// let s2 = wstr.to_string_lossy();
1358 ///
1359 /// assert_eq!(s2, s);
1360 /// ```
1361 #[inline]
1362 #[cfg(feature = "alloc")]
1363 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1364 #[must_use]
1365 pub fn to_string_lossy(&self) -> String {
1366 String::from_utf16_lossy(self.as_slice())
1367 }
1368
1369 /// Returns an iterator over the [`char`][prim@char]s of a string slice.
1370 ///
1371 /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
1372 /// may consist of invalid UTF-16, the iterator returned by this method
1373 /// is an iterator over `Result<char, DecodeUtf16Error>` instead of [`char`][prim@char]s
1374 /// directly. If you would like a lossy iterator over [`chars`][prim@char]s directly, instead
1375 /// use [`chars_lossy`][Self::chars_lossy].
1376 ///
1377 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1378 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1379 /// what you actually want. That functionality is not provided by by this crate.
1380 #[inline]
1381 #[must_use]
1382 pub fn chars(&self) -> CharsUtf16<'_> {
1383 CharsUtf16::new(self.as_slice())
1384 }
1385
1386 /// Returns a lossy iterator over the [`char`][prim@char]s of a string slice.
1387 ///
1388 /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
1389 /// may consist of invalid UTF-16, the iterator returned by this method will replace unpaired
1390 /// surrogates with
1391 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). This is a lossy
1392 /// version of [`chars`][Self::chars].
1393 ///
1394 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1395 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1396 /// what you actually want. That functionality is not provided by by this crate.
1397 #[inline]
1398 #[must_use]
1399 pub fn chars_lossy(&self) -> CharsLossyUtf16<'_> {
1400 CharsLossyUtf16::new(self.as_slice())
1401 }
1402
1403 /// Returns an iterator over the chars of a string slice, and their positions.
1404 ///
1405 /// As this string has no defined encoding, this method assumes the string is UTF-16. Since it
1406 /// may consist of invalid UTF-16, the iterator returned by this method is an iterator over
1407 /// is an iterator over `Result<char, DecodeUtf16Error>` as well as their positions, instead of
1408 /// [`char`][prim@char]s directly. If you would like a lossy indices iterator over
1409 /// [`chars`][prim@char]s directly, instead use
1410 /// [`char_indices_lossy`][Self::char_indices_lossy].
1411 ///
1412 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1413 #[inline]
1414 #[must_use]
1415 pub fn char_indices(&self) -> CharIndicesUtf16<'_> {
1416 CharIndicesUtf16::new(self.as_slice())
1417 }
1418
1419 /// Returns a lossy iterator over the chars of a string slice, and their positions.
1420 ///
1421 /// As this string slice may consist of invalid UTF-16, the iterator returned by this method
1422 /// will replace unpaired surrogates with
1423 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�), as well as the
1424 /// positions of all characters. This is a lossy version of
1425 /// [`char_indices`][Self::char_indices].
1426 ///
1427 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1428 #[inline]
1429 #[must_use]
1430 pub fn char_indices_lossy(&self) -> CharIndicesLossyUtf16<'_> {
1431 CharIndicesLossyUtf16::new(self.as_slice())
1432 }
1433}
1434
1435impl U32CStr {
1436 /// Constructs a string reference from a [`char`] nul-terminated string pointer.
1437 ///
1438 /// This will scan for nul values beginning with `p`. The first nul value will be used as the
1439 /// nul terminator for the string, similar to how libc string functions such as `strlen` work.
1440 ///
1441 /// # Safety
1442 ///
1443 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
1444 /// nul terminator, and the function could scan past the underlying buffer.
1445 ///
1446 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1447 /// In particular, the returned string reference *must not be mutated* for the duration of
1448 /// lifetime `'a`, except inside an [`UnsafeCell`][std::cell::UnsafeCell].
1449 ///
1450 /// # Panics
1451 ///
1452 /// This function panics if `p` is null.
1453 ///
1454 /// # Caveat
1455 ///
1456 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1457 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1458 /// context, such as by providing a helper function taking the lifetime of a host value for the
1459 /// string, or by explicit annotation.
1460 #[inline]
1461 #[must_use]
1462 pub unsafe fn from_char_ptr_str<'a>(p: *const char) -> &'a Self {
1463 Self::from_ptr_str(p as *const u32)
1464 }
1465
1466 /// Constructs a mutable string reference from a mutable [`char`] nul-terminated string pointer.
1467 ///
1468 /// This will scan for nul values beginning with `p`. The first nul value will be used as the
1469 /// nul terminator for the string, similar to how libc string functions such as `strlen` work.
1470 ///
1471 /// # Safety
1472 ///
1473 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
1474 /// nul terminator, and the function could scan past the underlying buffer.
1475 ///
1476 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts_mut].
1477 ///
1478 /// # Panics
1479 ///
1480 /// This function panics if `p` is null.
1481 ///
1482 /// # Caveat
1483 ///
1484 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1485 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1486 /// context, such as by providing a helper function taking the lifetime of a host value for the
1487 /// string, or by explicit annotation.
1488 #[inline]
1489 #[must_use]
1490 pub unsafe fn from_char_ptr_str_mut<'a>(p: *mut char) -> &'a mut Self {
1491 Self::from_ptr_str_mut(p as *mut u32)
1492 }
1493
1494 /// Constructs a string reference from a [`char`] pointer and a length.
1495 ///
1496 /// The `len` argument is the number of elements, **not** the number of bytes, and does
1497 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means
1498 /// that `p` is a pointer directly to the nul terminator of the string.
1499 ///
1500 /// # Errors
1501 ///
1502 /// This will scan the pointer string for an interior nul value and error if one is found
1503 /// before the nul terminator at `len` offset. To avoid scanning for interior nuls,
1504 /// [`from_ptr_unchecked`][Self::from_ptr_unchecked] may be used instead.
1505 ///
1506 /// An error is returned if the value at `len` offset is not a nul terminator.
1507 ///
1508 /// # Safety
1509 ///
1510 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len +
1511 /// 1` elements.
1512 ///
1513 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1514 /// In particular, the returned string reference *must not be mutated* for the duration of
1515 /// lifetime `'a`, except inside an [`UnsafeCell`][std::cell::UnsafeCell].
1516 ///
1517 /// # Panics
1518 ///
1519 /// This function panics if `p` is null.
1520 ///
1521 /// # Caveat
1522 ///
1523 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1524 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1525 /// context, such as by providing a helper function taking the lifetime of a host value for the
1526 /// string, or by explicit annotation.
1527 pub unsafe fn from_char_ptr<'a>(p: *const char, len: usize) -> Result<&'a Self, NulError<u32>> {
1528 Self::from_ptr(p as *const u32, len)
1529 }
1530
1531 /// Constructs a mutable string reference from a mutable [`char`] pointer and a length.
1532 ///
1533 /// The `len` argument is the number of elements, **not** the number of bytes, and does
1534 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means
1535 /// that `p` is a pointer directly to the nul terminator of the string.
1536 ///
1537 /// # Errors
1538 ///
1539 /// This will scan the pointer string for an interior nul value and error if one is found
1540 /// before the nul terminator at `len` offset. To avoid scanning for interior nuls,
1541 /// [`from_ptr_unchecked_mut`][Self::from_ptr_unchecked_mut] may be used instead.
1542 ///
1543 /// An error is returned if the value at `len` offset is not a nul terminator.
1544 ///
1545 /// # Safety
1546 ///
1547 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len +
1548 /// 1` elements.
1549 ///
1550 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts_mut].
1551 ///
1552 /// # Panics
1553 ///
1554 /// This function panics if `p` is null.
1555 ///
1556 /// # Caveat
1557 ///
1558 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1559 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1560 /// context, such as by providing a helper function taking the lifetime of a host value for the
1561 /// string, or by explicit annotation.
1562 pub unsafe fn from_char_ptr_mut<'a>(
1563 p: *mut char,
1564 len: usize,
1565 ) -> Result<&'a mut Self, NulError<u32>> {
1566 Self::from_ptr_mut(p as *mut u32, len)
1567 }
1568
1569 /// Constructs a string reference from a [`char`] pointer and a length, truncating at the first
1570 /// nul terminator.
1571 ///
1572 /// The `len` argument is the number of elements, **not** the number of bytes. This will scan
1573 /// for nul values beginning with `p` until offset `len`. The first nul value will be used as
1574 /// the nul terminator for the string, ignoring any remaining values left before `len`.
1575 ///
1576 /// # Errors
1577 ///
1578 /// If no nul terminator is found after `len` + 1 elements, an error is returned.
1579 ///
1580 /// # Safety
1581 ///
1582 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
1583 /// nul terminator, and the function could scan past the underlying buffer.
1584 ///
1585 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1586 /// In particular, the returned string reference *must not be mutated* for the duration of
1587 /// lifetime `'a`, except inside an [`UnsafeCell`][std::cell::UnsafeCell].
1588 ///
1589 /// # Panics
1590 ///
1591 /// This function panics if `p` is null.
1592 ///
1593 /// # Caveat
1594 ///
1595 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1596 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1597 /// context, such as by providing a helper function taking the lifetime of a host value for the
1598 /// string, or by explicit annotation.
1599 pub unsafe fn from_char_ptr_truncate<'a>(
1600 p: *const char,
1601 len: usize,
1602 ) -> Result<&'a Self, MissingNulTerminator> {
1603 Self::from_ptr_truncate(p as *const u32, len)
1604 }
1605
1606 /// Constructs a mutable string reference from a mutable [`char`] pointer and a length,
1607 /// truncating at the first nul terminator.
1608 ///
1609 /// The `len` argument is the number of elements, **not** the number of bytes. This will scan
1610 /// for nul values beginning with `p` until offset `len`. The first nul value will be used as
1611 /// the nul terminator for the string, ignoring any remaining values left before `len`.
1612 ///
1613 /// # Errors
1614 ///
1615 /// If no nul terminator is found after `len` + 1 elements, an error is returned.
1616 ///
1617 /// # Safety
1618 ///
1619 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
1620 /// nul terminator, and the function could scan past the underlying buffer.
1621 ///
1622 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts_mut].
1623 ///
1624 /// # Panics
1625 ///
1626 /// This function panics if `p` is null.
1627 ///
1628 /// # Caveat
1629 ///
1630 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1631 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1632 /// context, such as by providing a helper function taking the lifetime of a host value for the
1633 /// string, or by explicit annotation.
1634 pub unsafe fn from_char_ptr_truncate_mut<'a>(
1635 p: *mut char,
1636 len: usize,
1637 ) -> Result<&'a mut Self, MissingNulTerminator> {
1638 Self::from_ptr_truncate_mut(p as *mut u32, len)
1639 }
1640
1641 /// Constructs a string reference from a [`char`] pointer and a length without checking for any
1642 /// nul values.
1643 ///
1644 /// The `len` argument is the number of elements, **not** the number of bytes, and does
1645 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means
1646 /// that `p` is a pointer directly to the nul terminator of the string.
1647 ///
1648 /// # Safety
1649 ///
1650 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len +
1651 /// 1` elements, nor that is has a terminating nul value.
1652 ///
1653 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts].
1654 /// In particular, the returned string reference *must not be mutated* for the duration of
1655 /// lifetime `'a`, except inside an [`UnsafeCell`][std::cell::UnsafeCell].
1656 ///
1657 /// The interior values of the pointer are not scanned for nul. Any interior nul values or
1658 /// a missing nul terminator at pointer offset `len` + 1 will result in an invalid string slice.
1659 ///
1660 /// # Panics
1661 ///
1662 /// This function panics if `p` is null.
1663 ///
1664 /// # Caveat
1665 ///
1666 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1667 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1668 /// context, such as by providing a helper function taking the lifetime of a host value for the
1669 /// string, or by explicit annotation.
1670 #[inline]
1671 #[must_use]
1672 pub unsafe fn from_char_ptr_unchecked<'a>(p: *const char, len: usize) -> &'a Self {
1673 Self::from_ptr_unchecked(p as *const u32, len)
1674 }
1675
1676 /// Constructs a mutable string reference from a mutable [`char`] pointer and a length without
1677 /// checking for any nul values.
1678 ///
1679 /// The `len` argument is the number of elements, **not** the number of bytes, and does
1680 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means
1681 /// that `p` is a pointer directly to the nul terminator of the string.
1682 ///
1683 /// # Safety
1684 ///
1685 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len +
1686 /// 1` elements, nor that is has a terminating nul value.
1687 ///
1688 /// In addition, the data must meet the safety conditions of [std::slice::from_raw_parts_mut].
1689 ///
1690 /// The interior values of the pointer are not scanned for nul. Any interior nul values or
1691 /// a missing nul terminator at pointer offset `len` + 1 will result in an invalid string slice.
1692 ///
1693 /// # Panics
1694 ///
1695 /// This function panics if `p` is null.
1696 ///
1697 /// # Caveat
1698 ///
1699 /// The lifetime for the returned string is inferred from its usage. To prevent accidental
1700 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
1701 /// context, such as by providing a helper function taking the lifetime of a host value for the
1702 /// string, or by explicit annotation.
1703 #[inline]
1704 #[must_use]
1705 pub unsafe fn from_char_ptr_unchecked_mut<'a>(p: *mut char, len: usize) -> &'a mut Self {
1706 Self::from_ptr_unchecked_mut(p as *mut u32, len)
1707 }
1708
1709 /// Constructs a string reference from a [`char`] slice with a terminating nul, checking for
1710 /// invalid interior nul values.
1711 ///
1712 /// The slice must have at least one item, the nul terminator, even for an empty string.
1713 ///
1714 /// # Errors
1715 ///
1716 /// If there are nul values in the slice except for the last value, an error is returned.
1717 ///
1718 /// An error is also returned if the last value of the slice is not a nul terminator.
1719 pub fn from_char_slice(slice: &[char]) -> Result<&Self, NulError<u32>> {
1720 let ptr: *const [char] = slice;
1721 Self::from_slice(unsafe { &*(ptr as *const [u32]) })
1722 }
1723
1724 /// Constructs a mutable string reference from a mutable [`char`] slice with a terminating nul,
1725 /// checking for invalid interior nul values.
1726 ///
1727 /// The slice must have at least one item, the nul terminator, even for an empty string.
1728 ///
1729 /// # Errors
1730 ///
1731 /// If there are nul values in the slice except for the last value, an error is returned.
1732 ///
1733 /// An error is also returned if the last value of the slice is not a nul terminator.
1734 pub fn from_char_slice_mut(slice: &mut [char]) -> Result<&mut Self, NulError<u32>> {
1735 let ptr: *mut [char] = slice;
1736 Self::from_slice_mut(unsafe { &mut *(ptr as *mut [u32]) })
1737 }
1738
1739 /// Constructs a string reference from a slice of [`char`] values, truncating at the first nul
1740 /// terminator.
1741 ///
1742 /// The slice will be scanned for nul values. When a nul value is found, it is treated as the
1743 /// terminator for the string, and the string slice will be truncated to that nul.
1744 ///
1745 /// # Errors
1746 ///
1747 /// If there are no nul values in the slice, an error is returned.
1748 #[inline]
1749 pub fn from_char_slice_truncate(slice: &[char]) -> Result<&Self, MissingNulTerminator> {
1750 let ptr: *const [char] = slice;
1751 Self::from_slice_truncate(unsafe { &*(ptr as *const [u32]) })
1752 }
1753
1754 /// Constructs a mutable string reference from a mutable slice of [`char`] values, truncating at
1755 /// the first nul terminator.
1756 ///
1757 /// The slice will be scanned for nul values. When a nul value is found, it is treated as the
1758 /// terminator for the string, and the string slice will be truncated to that nul.
1759 ///
1760 /// # Errors
1761 ///
1762 /// If there are no nul values in the slice, an error is returned.
1763 #[inline]
1764 pub fn from_char_slice_truncate_mut(
1765 slice: &mut [char],
1766 ) -> Result<&mut Self, MissingNulTerminator> {
1767 let ptr: *mut [char] = slice;
1768 Self::from_slice_truncate_mut(unsafe { &mut *(ptr as *mut [u32]) })
1769 }
1770
1771 /// Constructs a string reference from a [`char`] slice without checking for a terminating or
1772 /// interior nul values.
1773 ///
1774 /// # Safety
1775 ///
1776 /// This function is unsafe because it can lead to invalid C string slice values when the slice
1777 /// is missing a terminating nul value or there are non-terminating interior nul values
1778 /// in the slice. In particular, an empty slice will result in an invalid string slice.
1779 #[inline]
1780 #[must_use]
1781 pub unsafe fn from_char_slice_unchecked(slice: &[char]) -> &Self {
1782 let ptr: *const [char] = slice;
1783 Self::from_slice_unchecked(&*(ptr as *const [u32]))
1784 }
1785
1786 /// Constructs a mutable string reference from a mutable [`char`] slice without checking for a
1787 /// terminating or interior nul values.
1788 ///
1789 /// # Safety
1790 ///
1791 /// This function is unsafe because it can lead to invalid C string slice values when the slice
1792 /// is missing a terminating nul value or there are non-terminating interior nul values
1793 /// in the slice. In particular, an empty slice will result in an invalid string slice.
1794 #[inline]
1795 #[must_use]
1796 pub unsafe fn from_char_slice_unchecked_mut(slice: &mut [char]) -> &mut Self {
1797 let ptr: *mut [char] = slice;
1798 Self::from_slice_unchecked_mut(&mut *(ptr as *mut [u32]))
1799 }
1800
1801 /// Decodes a string reference to an owned [`OsString`][std::ffi::OsString].
1802 ///
1803 /// This makes a string copy of this reference. Since [`U32CStr`] makes no guarantees that it
1804 /// is valid UTF-32, there is no guarantee that the resulting [`OsString`][std::ffi::OsString]
1805 /// will be valid data. The [`OsString`][std::ffi::OsString] will **not** have a nul
1806 /// terminator.
1807 ///
1808 /// Note that the encoding of [`OsString`][std::ffi::OsString] is platform-dependent, so on
1809 /// some platforms this may make an encoding conversions, while on other platforms no changes to
1810 /// the string will be made.
1811 ///
1812 /// # Examples
1813 ///
1814 /// ```rust
1815 /// use widestring::U32CString;
1816 /// use std::ffi::OsString;
1817 /// let s = "MyString";
1818 /// // Create a wide string from the string
1819 /// let wstr = U32CString::from_str(s).unwrap();
1820 /// // Create an OsString from the wide string
1821 /// let osstr = wstr.to_os_string();
1822 ///
1823 /// assert_eq!(osstr, OsString::from(s));
1824 /// ```
1825 #[inline]
1826 #[cfg(feature = "std")]
1827 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
1828 #[must_use]
1829 pub fn to_os_string(&self) -> std::ffi::OsString {
1830 self.as_ustr().to_os_string()
1831 }
1832
1833 /// Decodes the string reference to a [`String`] if it contains valid UTF-32 data.
1834 ///
1835 /// This method assumes this string is encoded as UTF-32 and attempts to decode it as such. It
1836 /// will **not* have a nul terminator.
1837 ///
1838 /// # Errors
1839 ///
1840 /// Returns an error if the string contains any invalid UTF-32 data.
1841 ///
1842 /// # Examples
1843 ///
1844 /// ```rust
1845 /// use widestring::U32CString;
1846 /// let s = "MyString";
1847 /// // Create a wide string from the string
1848 /// let wstr = U32CString::from_str(s).unwrap();
1849 /// // Create a regular string from the wide string
1850 /// let s2 = wstr.to_string().unwrap();
1851 ///
1852 /// assert_eq!(s2, s);
1853 /// ```
1854 #[inline]
1855 #[cfg(feature = "alloc")]
1856 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1857 pub fn to_string(&self) -> Result<String, crate::error::Utf32Error> {
1858 self.as_ustr().to_string()
1859 }
1860
1861 /// Decodes the string reference to a [`String`] even if it is invalid UTF-32 data.
1862 ///
1863 /// This method assumes this string is encoded as UTF-16 and attempts to decode it as such. Any
1864 /// invalid sequences are replaced with
1865 /// [`U+FFFD REPLACEMENT CHARACTER`][core::char::REPLACEMENT_CHARACTER], which looks like this:
1866 /// �. It will **not* have a nul terminator.
1867 ///
1868 /// # Examples
1869 ///
1870 /// ```rust
1871 /// use widestring::U32CString;
1872 /// let s = "MyString";
1873 /// // Create a wide string from the string
1874 /// let wstr = U32CString::from_str(s).unwrap();
1875 /// // Create a regular string from the wide string
1876 /// let s2 = wstr.to_string_lossy();
1877 ///
1878 /// assert_eq!(s2, s);
1879 /// ```
1880 #[inline]
1881 #[cfg(feature = "alloc")]
1882 #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
1883 #[must_use]
1884 pub fn to_string_lossy(&self) -> String {
1885 self.as_ustr().to_string_lossy()
1886 }
1887
1888 /// Returns an iterator over the [`char`][prim@char]s of a string slice.
1889 ///
1890 /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1891 /// may consist of invalid UTF-32, the iterator returned by this method
1892 /// is an iterator over `Result<char, DecodeUtf32Error>` instead of [`char`][prim@char]s
1893 /// directly. If you would like a lossy iterator over [`chars`][prim@char]s directly, instead
1894 /// use [`chars_lossy`][Self::chars_lossy].
1895 ///
1896 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1897 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1898 /// what you actually want. That functionality is not provided by by this crate.
1899 #[inline]
1900 #[must_use]
1901 pub fn chars(&self) -> CharsUtf32<'_> {
1902 CharsUtf32::new(self.as_slice())
1903 }
1904
1905 /// Returns a lossy iterator over the [`char`][prim@char]s of a string slice.
1906 ///
1907 /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1908 /// may consist of invalid UTF-32, the iterator returned by this method will replace invalid
1909 /// data with
1910 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). This is a lossy
1911 /// version of [`chars`][Self::chars].
1912 ///
1913 /// It's important to remember that [`char`][prim@char] represents a Unicode Scalar Value, and
1914 /// may not match your idea of what a 'character' is. Iteration over grapheme clusters may be
1915 /// what you actually want. That functionality is not provided by by this crate.
1916 #[inline]
1917 #[must_use]
1918 pub fn chars_lossy(&self) -> CharsLossyUtf32<'_> {
1919 CharsLossyUtf32::new(self.as_slice())
1920 }
1921
1922 /// Returns an iterator over the chars of a string slice, and their positions.
1923 ///
1924 /// As this string has no defined encoding, this method assumes the string is UTF-32. Since it
1925 /// may consist of invalid UTF-32, the iterator returned by this method is an iterator over
1926 /// `Result<char, DecodeUtf32Error>` as well as their positions, instead of
1927 /// [`char`][prim@char]s directly. If you would like a lossy indices iterator over
1928 /// [`chars`][prim@char]s directly, instead use
1929 /// [`char_indices_lossy`][Self::char_indices_lossy].
1930 ///
1931 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1932 #[inline]
1933 #[must_use]
1934 pub fn char_indices(&self) -> CharIndicesUtf32<'_> {
1935 CharIndicesUtf32::new(self.as_slice())
1936 }
1937
1938 /// Returns a lossy iterator over the chars of a string slice, and their positions.
1939 ///
1940 /// As this string slice may consist of invalid UTF-32, the iterator returned by this method
1941 /// will replace invalid values with
1942 /// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�), as well as the
1943 /// positions of all characters. This is a lossy version of
1944 /// [`char_indices`][Self::char_indices].
1945 ///
1946 /// The iterator yields tuples. The position is first, the [`char`][prim@char] is second.
1947 #[inline]
1948 #[must_use]
1949 pub fn char_indices_lossy(&self) -> CharIndicesLossyUtf32<'_> {
1950 CharIndicesLossyUtf32::new(self.as_slice())
1951 }
1952}
1953
1954impl core::fmt::Debug for U16CStr {
1955 #[inline]
1956 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1957 crate::debug_fmt_u16(self.as_slice_with_nul(), f)
1958 }
1959}
1960
1961impl core::fmt::Debug for U32CStr {
1962 #[inline]
1963 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1964 crate::debug_fmt_u32(self.as_slice_with_nul(), f)
1965 }
1966}
1967
1968/// Alias for [`U16CStr`] or [`U32CStr`] depending on platform. Intended to match typical C
1969/// `wchar_t` size on platform.
1970#[cfg(not(windows))]
1971pub type WideCStr = U32CStr;
1972
1973/// Alias for [`U16CStr`] or [`U32CStr`] depending on platform. Intended to match typical C
1974/// `wchar_t` size on platform.
1975#[cfg(windows)]
1976pub type WideCStr = U16CStr;
1977
1978/// Helper struct for printing wide C string values with [`format!`] and `{}`.
1979///
1980/// A wide C string might contain ill-formed UTF encoding. This struct implements the
1981/// [`Display`][std::fmt::Display] trait in a way that decoding the string is lossy but no heap
1982/// allocations are performed, such as by [`to_string_lossy`][U16CStr::to_string_lossy]. It is
1983/// created by the [`display`][U16CStr::display] method on [`U16CStr`] and [`U32CStr`].
1984///
1985/// By default, invalid Unicode data is replaced with
1986/// [`U+FFFD REPLACEMENT CHARACTER`][std::char::REPLACEMENT_CHARACTER] (�). If you wish to simply
1987/// skip any invalid Uncode data and forego the replacement, you may use the
1988/// [alternate formatting][std::fmt#sign0] with `{:#}`.
1989pub struct Display<'a, S: ?Sized> {
1990 str: &'a S,
1991}
1992
1993impl core::fmt::Debug for Display<'_, U16CStr> {
1994 #[inline]
1995 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1996 core::fmt::Debug::fmt(&self.str, f)
1997 }
1998}
1999
2000impl core::fmt::Debug for Display<'_, U32CStr> {
2001 #[inline]
2002 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2003 core::fmt::Debug::fmt(&self.str, f)
2004 }
2005}
2006
2007impl core::fmt::Display for Display<'_, U16CStr> {
2008 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2009 for c in crate::decode_utf16_lossy(self.str.as_slice().iter().copied()) {
2010 // Allow alternate {:#} format which skips replacment chars entirely
2011 if c != core::char::REPLACEMENT_CHARACTER || !f.alternate() {
2012 f.write_char(c)?;
2013 }
2014 }
2015 Ok(())
2016 }
2017}
2018
2019impl core::fmt::Display for Display<'_, U32CStr> {
2020 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2021 for c in crate::decode_utf32_lossy(self.str.as_slice().iter().copied()) {
2022 // Allow alternate {:#} format which skips replacment chars entirely
2023 if c != core::char::REPLACEMENT_CHARACTER || !f.alternate() {
2024 f.write_char(c)?;
2025 }
2026 }
2027 Ok(())
2028 }
2029}