os_str_bytes/raw_str.rs
1#![cfg_attr(feature = "nightly", allow(unreachable_code))]
2
3use std::borrow::Borrow;
4use std::borrow::Cow;
5use std::borrow::ToOwned;
6use std::ffi::OsStr;
7use std::ffi::OsString;
8use std::fmt;
9use std::fmt::Debug;
10use std::fmt::Display;
11use std::fmt::Formatter;
12use std::mem;
13use std::ops::Deref;
14use std::ops::Index;
15use std::ops::Range;
16use std::ops::RangeFrom;
17use std::ops::RangeFull;
18use std::ops::RangeInclusive;
19use std::ops::RangeTo;
20use std::ops::RangeToInclusive;
21use std::result;
22use std::str;
23
24#[cfg(feature = "memchr")]
25use memchr::memmem::find;
26#[cfg(feature = "memchr")]
27use memchr::memmem::rfind;
28
29use super::imp;
30use super::imp::raw;
31use super::iter::RawSplit;
32use super::pattern::Encoded as EncodedPattern;
33use super::private;
34use super::Pattern;
35
36if_checked_conversions! {
37 use super::EncodingError;
38 use super::Result;
39}
40
41if_nightly! {
42 use super::util;
43}
44
45#[cfg(not(feature = "memchr"))]
46fn find(string: &[u8], pat: &[u8]) -> Option<usize> {
47 (0..=string.len().checked_sub(pat.len())?)
48 .find(|&x| string[x..].starts_with(pat))
49}
50
51#[cfg(not(feature = "memchr"))]
52fn rfind(string: &[u8], pat: &[u8]) -> Option<usize> {
53 (pat.len()..=string.len())
54 .rfind(|&x| string[..x].ends_with(pat))
55 .map(|x| x - pat.len())
56}
57
58#[allow(clippy::missing_safety_doc)]
59unsafe trait TransmuteBox {
60 fn transmute_box<R>(self: Box<Self>) -> Box<R>
61 where
62 R: ?Sized + TransmuteBox,
63 {
64 let value = Box::into_raw(self);
65 // SAFETY: This trait is only implemented for types that can be
66 // transmuted.
67 unsafe { Box::from_raw(mem::transmute_copy(&value)) }
68 }
69}
70
71// SAFETY: This struct has a layout that makes this operation safe.
72unsafe impl TransmuteBox for RawOsStr {}
73unsafe impl TransmuteBox for [u8] {}
74
75#[inline]
76fn leak_cow(string: Cow<'_, RawOsStr>) -> &'_ RawOsStr {
77 match string {
78 Cow::Borrowed(string) => string,
79 #[cfg_attr(not(feature = "nightly"), allow(unused_variables))]
80 Cow::Owned(string) => {
81 if_nightly_return! {{
82 Box::leak(string.into_box())
83 }}
84 unreachable!();
85 }
86 }
87}
88
89/// A container for borrowed byte strings converted by this crate.
90///
91/// This wrapper is intended to prevent violating the invariants of the
92/// [unspecified encoding] used by this crate and minimize encoding
93/// conversions.
94///
95/// # Indices
96///
97/// Methods of this struct that accept indices require that the index lie on a
98/// UTF-8 boundary. Although it is possible to manipulate platform strings
99/// based on other indices, this crate currently does not support them for
100/// slicing methods. They would add significant complication to the
101/// implementation and are generally not necessary. However, all indices
102/// returned by this struct can be used for slicing.
103///
104/// On Unix, all indices are permitted, to avoid false positives. However,
105/// relying on this implementation detail is discouraged. Platform-specific
106/// indices are error-prone.
107///
108/// # Complexity
109///
110/// All searching methods have worst-case multiplicative time complexity (i.e.,
111/// `O(self.raw_len() * pat.len())`). Enabling the "memchr" feature allows
112/// these methods to instead run in linear time in the worst case (documented
113/// for [`memchr::memmem::find`][memchr complexity]).
114///
115/// # Safety
116///
117/// Although this type is annotated with `#[repr(transparent)]`, the inner
118/// representation is not stable. Transmuting between this type and any other
119/// causes immediate undefined behavior.
120///
121/// # Nightly Notes
122///
123/// Indices are validated on all platforms.
124///
125/// [memchr complexity]: memchr::memmem::find#complexity
126/// [unspecified encoding]: super#encoding
127#[derive(Eq, Hash, Ord, PartialEq, PartialOrd)]
128#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
129#[repr(transparent)]
130pub struct RawOsStr([u8]);
131
132impl RawOsStr {
133 const fn from_inner(string: &[u8]) -> &Self {
134 // SAFETY: This struct has a layout that makes this operation safe.
135 unsafe { mem::transmute(string) }
136 }
137
138 /// Converts a platform-native string into a representation that can be
139 /// more easily manipulated.
140 ///
141 /// This method performs the necessary conversion immediately, so it can be
142 /// expensive to call. It is recommended to continue using the returned
143 /// instance as long as possible (instead of the original [`OsStr`]), to
144 /// avoid repeated conversions.
145 ///
146 /// # Nightly Notes
147 ///
148 /// This method is deprecated. Use [`from_os_str`] instead.
149 ///
150 /// # Examples
151 ///
152 /// ```
153 /// use std::env;
154 /// # use std::io;
155 ///
156 /// use os_str_bytes::RawOsStr;
157 ///
158 /// let os_string = env::current_exe()?.into_os_string();
159 /// println!("{:?}", RawOsStr::new(&os_string));
160 /// #
161 /// # Ok::<_, io::Error>(())
162 /// ```
163 ///
164 /// [`from_os_str`]: Self::from_os_str
165 #[cfg_attr(
166 all(not(os_str_bytes_docs_rs), feature = "nightly"),
167 deprecated(since = "6.6.0", note = "use `from_os_str` instead")
168 )]
169 #[inline]
170 #[must_use]
171 pub fn new(string: &OsStr) -> Cow<'_, Self> {
172 if_nightly_return! {{
173 Cow::Borrowed(Self::from_os_str(string))
174 }}
175 match imp::os_str_to_bytes(string) {
176 Cow::Borrowed(string) => Cow::Borrowed(Self::from_inner(string)),
177 Cow::Owned(string) => Cow::Owned(RawOsString(string)),
178 }
179 }
180
181 if_nightly! {
182 /// Wraps a platform-native string, without copying or encoding
183 /// conversion.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// use std::env;
189 /// # use std::io;
190 ///
191 /// use os_str_bytes::RawOsStr;
192 ///
193 /// let os_string = env::current_exe()?.into_os_string();
194 /// println!("{:?}", RawOsStr::from_os_str(&os_string));
195 /// #
196 /// # Ok::<_, io::Error>(())
197 /// ```
198 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
199 #[inline]
200 #[must_use]
201 pub fn from_os_str(string: &OsStr) -> &Self {
202 Self::from_inner(string.as_encoded_bytes())
203 }
204 }
205
206 /// Wraps a string, without copying or encoding conversion.
207 ///
208 /// This method is much more efficient than [`RawOsStr::new`], since the
209 /// [encoding] used by this crate is compatible with UTF-8.
210 ///
211 /// # Examples
212 ///
213 /// ```
214 /// use os_str_bytes::RawOsStr;
215 ///
216 /// let string = "foobar";
217 /// let raw = RawOsStr::from_str(string);
218 /// assert_eq!(string, raw);
219 /// ```
220 ///
221 /// [encoding]: super#encoding
222 #[allow(clippy::should_implement_trait)]
223 #[inline]
224 #[must_use]
225 pub fn from_str(string: &str) -> &Self {
226 Self::from_inner(string.as_bytes())
227 }
228
229 if_nightly! {
230 /// Equivalent to [`OsStr::from_encoded_bytes_unchecked`].
231 ///
232 /// # Examples
233 ///
234 /// ```
235 /// use std::env;
236 /// # use std::io;
237 ///
238 /// use os_str_bytes::RawOsStr;
239 ///
240 /// let os_string = env::current_exe()?.into_os_string();
241 /// let raw = RawOsStr::from_os_str(&os_string);
242 /// let raw_bytes = raw.as_encoded_bytes();
243 /// assert_eq!(raw, unsafe {
244 /// RawOsStr::from_encoded_bytes_unchecked(raw_bytes)
245 /// });
246 /// #
247 /// # Ok::<_, io::Error>(())
248 /// ```
249 ///
250 /// [unspecified encoding]: super#encoding
251 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
252 #[inline]
253 #[must_use]
254 pub unsafe fn from_encoded_bytes_unchecked(string: &[u8]) -> &Self {
255 Self::from_inner(string)
256 }
257 }
258
259 fn cow_from_raw_bytes_checked(
260 string: &[u8],
261 ) -> imp::Result<Cow<'_, Self>> {
262 if_nightly_return! {
263 {
264 imp::os_str_from_bytes(string).map(RawOsStrCow::from_os_str)
265 }
266 raw::validate_bytes(string)
267 .map(|()| Cow::Borrowed(Self::from_inner(string)))
268 }
269 }
270
271 deprecated_conversions! {
272 /// Wraps a byte string, without copying or encoding conversion.
273 ///
274 /// # Panics
275 ///
276 /// Panics if the string is not valid for the [unspecified encoding]
277 /// used by this crate.
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// use std::env;
283 /// # use std::io;
284 ///
285 /// use os_str_bytes::RawOsStr;
286 ///
287 /// let os_string = env::current_exe()?.into_os_string();
288 /// let raw = RawOsStr::new(&os_string);
289 /// let raw_bytes = raw.to_raw_bytes();
290 /// assert_eq!(&*raw, RawOsStr::assert_from_raw_bytes(&raw_bytes));
291 /// #
292 /// # Ok::<_, io::Error>(())
293 /// ```
294 ///
295 /// [unspecified encoding]: super#encoding
296 #[cfg_attr(
297 feature = "conversions",
298 deprecated(
299 since = "6.6.0",
300 note = "use `assert_cow_from_raw_bytes` instead"
301 )
302 )]
303 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
304 #[inline]
305 #[must_use = "method should not be used for validation"]
306 #[track_caller]
307 pub fn assert_from_raw_bytes(string: &[u8]) -> &Self {
308 leak_cow(Self::assert_cow_from_raw_bytes(string))
309 }
310
311 /// Converts and wraps a byte string.
312 ///
313 /// This method should be avoided if other safe methods can be used.
314 ///
315 /// # Panics
316 ///
317 /// Panics if the string is not valid for the [unspecified encoding]
318 /// used by this crate.
319 ///
320 /// # Examples
321 ///
322 /// ```
323 /// use std::env;
324 /// # use std::io;
325 ///
326 /// use os_str_bytes::RawOsStr;
327 ///
328 /// let os_string = env::current_exe()?.into_os_string();
329 /// let raw = RawOsStr::new(&os_string);
330 /// let raw_bytes = raw.to_raw_bytes();
331 /// assert_eq!(raw, RawOsStr::assert_cow_from_raw_bytes(&raw_bytes));
332 /// #
333 /// # Ok::<_, io::Error>(())
334 /// ```
335 ///
336 /// [unspecified encoding]: super#encoding
337 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
338 #[inline]
339 #[must_use = "method should not be used for validation"]
340 #[track_caller]
341 pub fn assert_cow_from_raw_bytes(string: &[u8]) -> Cow<'_, Self> {
342 expect_encoded!(Self::cow_from_raw_bytes_checked(string))
343 }
344 }
345
346 if_checked_conversions! {
347 /// Wraps a byte string, without copying or encoding conversion.
348 ///
349 /// [`assert_from_raw_bytes`] should almost always be used instead. For
350 /// more information, see [`EncodingError`].
351 ///
352 /// # Errors
353 ///
354 /// See documentation for [`EncodingError`].
355 ///
356 /// # Examples
357 ///
358 /// ```
359 /// use std::env;
360 /// # use std::io;
361 ///
362 /// use os_str_bytes::RawOsStr;
363 ///
364 /// let os_string = env::current_exe()?.into_os_string();
365 /// let raw = RawOsStr::new(&os_string);
366 /// let raw_bytes = raw.to_raw_bytes();
367 /// assert_eq!(Ok(&*raw), RawOsStr::from_raw_bytes(&raw_bytes));
368 /// #
369 /// # Ok::<_, io::Error>(())
370 /// ```
371 ///
372 /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
373 #[deprecated(
374 since = "6.6.0",
375 note = "use `cow_from_raw_bytes` instead",
376 )]
377 #[cfg_attr(
378 os_str_bytes_docs_rs,
379 doc(cfg(feature = "checked_conversions"))
380 )]
381 #[inline]
382 pub fn from_raw_bytes(string: &[u8]) -> Result<&Self> {
383 Self::cow_from_raw_bytes(string).map(leak_cow)
384 }
385
386 /// Converts and wraps a byte string.
387 ///
388 /// [`assert_cow_from_raw_bytes`] should almost always be used instead.
389 /// For more information, see [`EncodingError`].
390 ///
391 /// # Errors
392 ///
393 /// See documentation for [`EncodingError`].
394 ///
395 /// # Examples
396 ///
397 /// ```
398 /// use std::env;
399 /// # use std::io;
400 ///
401 /// use os_str_bytes::RawOsStr;
402 ///
403 /// let os_string = env::current_exe()?.into_os_string();
404 /// let raw = RawOsStr::new(&os_string);
405 /// let raw_bytes = raw.to_raw_bytes();
406 /// assert_eq!(
407 /// Ok(&raw),
408 /// RawOsStr::cow_from_raw_bytes(&raw_bytes).as_ref(),
409 /// );
410 /// #
411 /// # Ok::<_, io::Error>(())
412 /// ```
413 ///
414 /// [`assert_cow_from_raw_bytes`]: Self::assert_cow_from_raw_bytes
415 #[cfg_attr(
416 os_str_bytes_docs_rs,
417 doc(cfg(feature = "checked_conversions"))
418 )]
419 #[inline]
420 pub fn cow_from_raw_bytes(string: &[u8]) -> Result<Cow<'_, Self>> {
421 Self::cow_from_raw_bytes_checked(string).map_err(EncodingError)
422 }
423 }
424
425 deprecated_conversions! {
426 /// Wraps a byte string, without copying or encoding conversion.
427 ///
428 /// # Safety
429 ///
430 /// The string must be valid for the [unspecified encoding] used by
431 /// this crate.
432 ///
433 /// # Examples
434 ///
435 /// ```
436 /// use std::env;
437 /// # use std::io;
438 ///
439 /// use os_str_bytes::RawOsStr;
440 ///
441 /// let os_string = env::current_exe()?.into_os_string();
442 /// let raw = RawOsStr::new(&os_string);
443 /// let raw_bytes = raw.to_raw_bytes();
444 /// assert_eq!(&*raw, unsafe {
445 /// RawOsStr::from_raw_bytes_unchecked(&raw_bytes)
446 /// });
447 /// #
448 /// # Ok::<_, io::Error>(())
449 /// ```
450 ///
451 /// [unspecified encoding]: super#encoding
452 #[cfg_attr(feature = "nightly", allow(deprecated))]
453 #[cfg_attr(
454 feature = "conversions",
455 deprecated(
456 since = "6.6.0",
457 note = "use `cow_from_raw_bytes_unchecked` instead"
458 )
459 )]
460 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
461 #[inline]
462 #[must_use]
463 #[track_caller]
464 pub unsafe fn from_raw_bytes_unchecked(string: &[u8]) -> &Self {
465 // SAFETY: This method has equivalent safety requirements.
466 leak_cow(unsafe { Self::cow_from_raw_bytes_unchecked(string) })
467 }
468
469 /// Converts and wraps a byte string.
470 ///
471 /// # Safety
472 ///
473 /// The string must be valid for the [unspecified encoding] used by
474 /// this crate.
475 ///
476 /// # Nightly Notes
477 ///
478 /// This method is deprecated. Use [`assert_cow_from_raw_bytes`] or
479 /// [`from_encoded_bytes_unchecked`] instead.
480 ///
481 /// # Examples
482 ///
483 /// ```
484 /// use std::env;
485 /// # use std::io;
486 ///
487 /// use os_str_bytes::RawOsStr;
488 ///
489 /// let os_string = env::current_exe()?.into_os_string();
490 /// let raw = RawOsStr::new(&os_string);
491 /// let raw_bytes = raw.to_raw_bytes();
492 /// assert_eq!(raw, unsafe {
493 /// RawOsStr::cow_from_raw_bytes_unchecked(&raw_bytes)
494 /// });
495 /// #
496 /// # Ok::<_, io::Error>(())
497 /// ```
498 ///
499 /// [`assert_cow_from_raw_bytes`]: Self::assert_cow_from_raw_bytes
500 /// [`from_encoded_bytes_unchecked`]: Self::from_encoded_bytes_unchecked
501 /// [unspecified encoding]: super#encoding
502 #[cfg_attr(feature = "nightly", allow(deprecated))]
503 #[cfg_attr(
504 all(
505 not(os_str_bytes_docs_rs),
506 feature = "conversions",
507 feature = "nightly",
508 ),
509 deprecated(
510 since = "6.6.0",
511 note = "use `assert_cow_from_raw_bytes` or
512 `from_encoded_bytes_unchecked` instead",
513 )
514 )]
515 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
516 #[inline]
517 #[must_use]
518 #[track_caller]
519 pub unsafe fn cow_from_raw_bytes_unchecked(
520 string: &[u8],
521 ) -> Cow<'_, Self> {
522 if_nightly_return! {
523 {
524 Self::assert_cow_from_raw_bytes(string)
525 }
526 if cfg!(debug_assertions) {
527 expect_encoded!(raw::validate_bytes(string));
528 }
529 }
530 Cow::Borrowed(Self::from_inner(string))
531 }
532 }
533
534 if_nightly! {
535 /// Equivalent to [`OsStr::as_encoded_bytes`].
536 ///
537 /// The returned string will not use the [unspecified encoding]. It can
538 /// only be passed to methods accepting the encoding from the standard
539 /// library, such as [`from_encoded_bytes_unchecked`].
540 ///
541 /// # Examples
542 ///
543 /// ```
544 /// use os_str_bytes::RawOsStr;
545 ///
546 /// let string = "foobar";
547 /// let raw = RawOsStr::from_str(string);
548 /// assert_eq!(string.as_bytes(), raw.as_encoded_bytes());
549 /// ```
550 ///
551 /// [`from_encoded_bytes_unchecked`]: Self::from_encoded_bytes_unchecked
552 /// [unspecified encoding]: super#encoding
553 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
554 #[inline]
555 #[must_use]
556 pub fn as_encoded_bytes(&self) -> &[u8] {
557 &self.0
558 }
559 }
560
561 deprecated_conversions! {
562 /// Returns the byte string stored by this container.
563 ///
564 /// The returned string will use an [unspecified encoding].
565 ///
566 /// # Examples
567 ///
568 /// ```
569 /// use os_str_bytes::RawOsStr;
570 ///
571 /// let string = "foobar";
572 /// let raw = RawOsStr::from_str(string);
573 /// assert_eq!(string.as_bytes(), raw.as_raw_bytes());
574 /// ```
575 ///
576 /// [unspecified encoding]: super#encoding
577 #[cfg_attr(
578 feature = "conversions",
579 deprecated(since = "6.6.0", note = "use `to_raw_bytes` instead")
580 )]
581 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
582 #[inline]
583 #[must_use]
584 pub fn as_raw_bytes(&self) -> &[u8] {
585 match self.to_raw_bytes() {
586 Cow::Borrowed(string) => string,
587 #[cfg_attr(not(feature = "nightly"), allow(unused_variables))]
588 Cow::Owned(string) => {
589 if_nightly_return! {{
590 string.leak()
591 }}
592 unreachable!();
593 }
594 }
595 }
596 }
597
598 if_nightly! {
599 /// Converts this representation back to a platform-native string,
600 /// without copying or encoding conversion.
601 ///
602 /// # Examples
603 ///
604 /// ```
605 /// use std::env;
606 /// # use std::io;
607 ///
608 /// use os_str_bytes::RawOsStr;
609 ///
610 /// let os_string = env::current_exe()?.into_os_string();
611 /// let raw = RawOsStr::from_os_str(&os_string);
612 /// assert_eq!(os_string, raw.as_os_str());
613 /// #
614 /// # Ok::<_, io::Error>(())
615 /// ```
616 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
617 #[inline]
618 #[must_use]
619 pub fn as_os_str(&self) -> &OsStr {
620 // SAFETY: This wrapper prevents violating the invariants of the
621 // encoding used by the standard library.
622 unsafe { OsStr::from_encoded_bytes_unchecked(&self.0) }
623 }
624 }
625
626 /// Equivalent to [`str::contains`].
627 ///
628 /// # Examples
629 ///
630 /// ```
631 /// use os_str_bytes::RawOsStr;
632 ///
633 /// let raw = RawOsStr::from_str("foobar");
634 /// assert!(raw.contains("oo"));
635 /// assert!(!raw.contains("of"));
636 /// ```
637 #[inline]
638 #[must_use]
639 pub fn contains<P>(&self, pat: P) -> bool
640 where
641 P: Pattern,
642 {
643 self.find(pat).is_some()
644 }
645
646 /// Equivalent to [`str::ends_with`].
647 ///
648 /// # Examples
649 ///
650 /// ```
651 /// use os_str_bytes::RawOsStr;
652 ///
653 /// let raw = RawOsStr::from_str("foobar");
654 /// assert!(raw.ends_with("bar"));
655 /// assert!(!raw.ends_with("foo"));
656 /// ```
657 #[inline]
658 #[must_use]
659 pub fn ends_with<P>(&self, pat: P) -> bool
660 where
661 P: Pattern,
662 {
663 let pat = pat.__encode();
664 let pat = pat.__get();
665
666 self.0.ends_with(pat)
667 }
668
669 deprecated_conversions! {
670 /// Equivalent to [`str::ends_with`] but accepts this type for the
671 /// pattern.
672 ///
673 /// # Examples
674 ///
675 /// ```
676 /// use os_str_bytes::RawOsStr;
677 ///
678 /// let raw = RawOsStr::from_str("foobar");
679 /// assert!(raw.ends_with_os(RawOsStr::from_str("bar")));
680 /// assert!(!raw.ends_with_os(RawOsStr::from_str("foo")));
681 /// ```
682 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
683 #[inline]
684 #[must_use]
685 pub fn ends_with_os(&self, pat: &Self) -> bool {
686 raw::ends_with(&self.to_raw_bytes(), &pat.to_raw_bytes())
687 }
688 }
689
690 /// Equivalent to [`str::find`].
691 ///
692 /// # Examples
693 ///
694 /// ```
695 /// use os_str_bytes::RawOsStr;
696 ///
697 /// let raw = RawOsStr::from_str("foobar");
698 /// assert_eq!(Some(1), raw.find("o"));
699 /// assert_eq!(None, raw.find("of"));
700 /// ```
701 #[inline]
702 #[must_use]
703 pub fn find<P>(&self, pat: P) -> Option<usize>
704 where
705 P: Pattern,
706 {
707 let pat = pat.__encode();
708 let pat = pat.__get();
709
710 find(&self.0, pat)
711 }
712
713 /// Equivalent to [`str::is_empty`].
714 ///
715 /// # Examples
716 ///
717 /// ```
718 /// use os_str_bytes::RawOsStr;
719 ///
720 /// assert!(RawOsStr::from_str("").is_empty());
721 /// assert!(!RawOsStr::from_str("foobar").is_empty());
722 /// ```
723 #[inline]
724 #[must_use]
725 pub fn is_empty(&self) -> bool {
726 self.0.is_empty()
727 }
728
729 deprecated_conversions! {
730 /// Returns the length of the byte string stored by this container.
731 ///
732 /// Only the following assumptions can be made about the result:
733 /// - The length of any Unicode character is the length of its UTF-8
734 /// representation (i.e., [`char::len_utf8`]).
735 /// - Splitting a string at a UTF-8 boundary will return two strings
736 /// with lengths that sum to the length of the original string.
737 ///
738 /// This method may return a different result than would [`OsStr::len`]
739 /// when called on same string, since [`OsStr`] uses an unspecified
740 /// encoding.
741 ///
742 /// # Examples
743 ///
744 /// ```
745 /// use os_str_bytes::RawOsStr;
746 ///
747 /// assert_eq!(6, RawOsStr::from_str("foobar").raw_len());
748 /// assert_eq!(0, RawOsStr::from_str("").raw_len());
749 /// ```
750 #[cfg_attr(
751 feature = "conversions",
752 deprecated(
753 since = "6.6.0",
754 note = "use `as_encoded_bytes` or `to_raw_bytes` instead",
755 )
756 )]
757 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
758 #[inline]
759 #[must_use]
760 pub fn raw_len(&self) -> usize {
761 self.to_raw_bytes().len()
762 }
763 }
764
765 /// Equivalent to [`str::rfind`].
766 ///
767 /// # Examples
768 ///
769 /// ```
770 /// use os_str_bytes::RawOsStr;
771 ///
772 /// let raw = RawOsStr::from_str("foobar");
773 /// assert_eq!(Some(2), raw.rfind("o"));
774 /// assert_eq!(None, raw.rfind("of"));
775 /// ```
776 #[inline]
777 #[must_use]
778 pub fn rfind<P>(&self, pat: P) -> Option<usize>
779 where
780 P: Pattern,
781 {
782 let pat = pat.__encode();
783 let pat = pat.__get();
784
785 rfind(&self.0, pat)
786 }
787
788 fn split_once_raw_with<P, F>(
789 &self,
790 pat: &P,
791 find_fn: F,
792 ) -> Option<(&Self, &Self)>
793 where
794 F: FnOnce(&[u8], &[u8]) -> Option<usize>,
795 P: EncodedPattern,
796 {
797 let pat = pat.__get();
798
799 let index = find_fn(&self.0, pat)?;
800 let prefix = &self.0[..index];
801 let suffix = &self.0[index + pat.len()..];
802 Some((Self::from_inner(prefix), Self::from_inner(suffix)))
803 }
804
805 pub(super) fn rsplit_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
806 where
807 P: EncodedPattern,
808 {
809 self.split_once_raw_with(pat, rfind)
810 }
811
812 /// Equivalent to [`str::rsplit_once`].
813 ///
814 /// # Examples
815 ///
816 /// ```
817 /// use os_str_bytes::RawOsStr;
818 ///
819 /// let raw = RawOsStr::from_str("foobar");
820 /// assert_eq!(
821 /// Some((RawOsStr::from_str("fo"), RawOsStr::from_str("bar"))),
822 /// raw.rsplit_once("o"),
823 /// );
824 /// assert_eq!(None, raw.rsplit_once("of"));
825 /// ```
826 #[inline]
827 #[must_use]
828 pub fn rsplit_once<P>(&self, pat: P) -> Option<(&Self, &Self)>
829 where
830 P: Pattern,
831 {
832 self.rsplit_once_raw(&pat.__encode())
833 }
834
835 fn is_boundary(&self, index: usize) -> bool {
836 debug_assert!(index < self.0.len());
837
838 if_nightly_return! {
839 {
840 const MAX_LENGTH: usize = 4;
841
842 if index == 0 {
843 return true;
844 }
845 let byte = self.0[index];
846 if byte.is_ascii() {
847 return true;
848 }
849
850 if !util::is_continuation(byte) {
851 let bytes = &self.0[index..];
852 let valid =
853 str::from_utf8(&bytes[..bytes.len().min(MAX_LENGTH)])
854 .err()
855 .map(|x| x.valid_up_to() != 0)
856 .unwrap_or(true);
857 if valid {
858 return true;
859 }
860 }
861 let mut start = index;
862 for _ in 0..MAX_LENGTH {
863 if let Some(index) = start.checked_sub(1) {
864 start = index;
865 } else {
866 return false;
867 }
868 if !util::is_continuation(self.0[start]) {
869 break;
870 }
871 }
872 str::from_utf8(&self.0[start..index]).is_ok()
873 }
874 !raw::is_continuation(self.0[index])
875 }
876 }
877
878 #[cfg_attr(feature = "nightly", allow(clippy::diverging_sub_expression))]
879 #[cold]
880 #[inline(never)]
881 #[track_caller]
882 fn index_boundary_error(&self, index: usize) -> ! {
883 debug_assert!(!self.is_boundary(index));
884
885 if_nightly_return! {
886 {
887 panic!("byte index {} is not a valid boundary", index);
888 }
889 let start = expect_encoded!(self.0[..index]
890 .iter()
891 .rposition(|&x| !raw::is_continuation(x)));
892 let mut end = index + 1;
893 end += self.0[end..]
894 .iter()
895 .take_while(|&&x| raw::is_continuation(x))
896 .count();
897
898 let code_point = raw::decode_code_point(&self.0[start..end]);
899 panic!(
900 "byte index {} is not a valid boundary; it is inside U+{:04X} \
901 (bytes {}..{})",
902 index, code_point, start, end,
903 );
904 }
905 }
906
907 #[track_caller]
908 fn check_bound(&self, index: usize) {
909 if index < self.0.len() && !self.is_boundary(index) {
910 self.index_boundary_error(index);
911 }
912 }
913
914 /// Equivalent to [`str::split`], but empty patterns are not accepted.
915 ///
916 /// # Panics
917 ///
918 /// Panics if the pattern is empty.
919 ///
920 /// # Examples
921 ///
922 /// ```
923 /// use os_str_bytes::RawOsStr;
924 ///
925 /// let raw = RawOsStr::from_str("foobar");
926 /// assert!(raw.split("o").eq(["f", "", "bar"]));
927 /// ```
928 #[inline]
929 #[must_use]
930 #[track_caller]
931 pub fn split<P>(&self, pat: P) -> RawSplit<'_, P>
932 where
933 P: Pattern,
934 {
935 RawSplit::new(self, pat)
936 }
937
938 /// Equivalent to [`str::split_at`].
939 ///
940 /// # Panics
941 ///
942 /// Panics if the index is not a [valid boundary].
943 ///
944 /// # Examples
945 ///
946 /// ```
947 /// use os_str_bytes::RawOsStr;
948 ///
949 /// let raw = RawOsStr::from_str("foobar");
950 /// assert_eq!(
951 /// ((RawOsStr::from_str("fo"), RawOsStr::from_str("obar"))),
952 /// raw.split_at(2),
953 /// );
954 /// ```
955 ///
956 /// [valid boundary]: #indices
957 #[inline]
958 #[must_use]
959 #[track_caller]
960 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
961 self.check_bound(mid);
962
963 let (prefix, suffix) = self.0.split_at(mid);
964 (Self::from_inner(prefix), Self::from_inner(suffix))
965 }
966
967 pub(super) fn split_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
968 where
969 P: EncodedPattern,
970 {
971 self.split_once_raw_with(pat, find)
972 }
973
974 /// Equivalent to [`str::split_once`].
975 ///
976 /// # Examples
977 ///
978 /// ```
979 /// use os_str_bytes::RawOsStr;
980 ///
981 /// let raw = RawOsStr::from_str("foobar");
982 /// assert_eq!(
983 /// Some((RawOsStr::from_str("f"), RawOsStr::from_str("obar"))),
984 /// raw.split_once("o"),
985 /// );
986 /// assert_eq!(None, raw.split_once("of"));
987 /// ```
988 #[inline]
989 #[must_use]
990 pub fn split_once<P>(&self, pat: P) -> Option<(&Self, &Self)>
991 where
992 P: Pattern,
993 {
994 self.split_once_raw(&pat.__encode())
995 }
996
997 /// Equivalent to [`str::starts_with`].
998 ///
999 /// # Examples
1000 ///
1001 /// ```
1002 /// use os_str_bytes::RawOsStr;
1003 ///
1004 /// let raw = RawOsStr::from_str("foobar");
1005 /// assert!(raw.starts_with("foo"));
1006 /// assert!(!raw.starts_with("bar"));
1007 /// ```
1008 #[inline]
1009 #[must_use]
1010 pub fn starts_with<P>(&self, pat: P) -> bool
1011 where
1012 P: Pattern,
1013 {
1014 let pat = pat.__encode();
1015 let pat = pat.__get();
1016
1017 self.0.starts_with(pat)
1018 }
1019
1020 deprecated_conversions! {
1021 /// Equivalent to [`str::starts_with`] but accepts this type for the
1022 /// pattern.
1023 ///
1024 /// # Examples
1025 ///
1026 /// ```
1027 /// use os_str_bytes::RawOsStr;
1028 ///
1029 /// let raw = RawOsStr::from_str("foobar");
1030 /// assert!(raw.starts_with_os(RawOsStr::from_str("foo")));
1031 /// assert!(!raw.starts_with_os(RawOsStr::from_str("bar")));
1032 /// ```
1033 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
1034 #[inline]
1035 #[must_use]
1036 pub fn starts_with_os(&self, pat: &Self) -> bool {
1037 raw::starts_with(&self.to_raw_bytes(), &pat.to_raw_bytes())
1038 }
1039 }
1040
1041 /// Equivalent to [`str::strip_prefix`].
1042 ///
1043 /// # Examples
1044 ///
1045 /// ```
1046 /// use os_str_bytes::RawOsStr;
1047 ///
1048 /// let raw = RawOsStr::from_str("111foo1bar111");
1049 /// assert_eq!(
1050 /// Some(RawOsStr::from_str("11foo1bar111")),
1051 /// raw.strip_prefix("1"),
1052 /// );
1053 /// assert_eq!(None, raw.strip_prefix("o"));
1054 /// ```
1055 #[inline]
1056 #[must_use]
1057 pub fn strip_prefix<P>(&self, pat: P) -> Option<&Self>
1058 where
1059 P: Pattern,
1060 {
1061 let pat = pat.__encode();
1062 let pat = pat.__get();
1063
1064 self.0.strip_prefix(pat).map(Self::from_inner)
1065 }
1066
1067 /// Equivalent to [`str::strip_suffix`].
1068 ///
1069 /// # Examples
1070 ///
1071 /// ```
1072 /// use os_str_bytes::RawOsStr;
1073 ///
1074 /// let raw = RawOsStr::from_str("111foo1bar111");
1075 /// assert_eq!(
1076 /// Some(RawOsStr::from_str("111foo1bar11")),
1077 /// raw.strip_suffix("1"),
1078 /// );
1079 /// assert_eq!(None, raw.strip_suffix("o"));
1080 /// ```
1081 #[inline]
1082 #[must_use]
1083 pub fn strip_suffix<P>(&self, pat: P) -> Option<&Self>
1084 where
1085 P: Pattern,
1086 {
1087 let pat = pat.__encode();
1088 let pat = pat.__get();
1089
1090 self.0.strip_suffix(pat).map(Self::from_inner)
1091 }
1092
1093 /// Converts this representation back to a platform-native string.
1094 ///
1095 /// When possible, use [`RawOsStrCow::into_os_str`] for a more efficient
1096 /// conversion on some platforms.
1097 ///
1098 /// # Nightly Notes
1099 ///
1100 /// This method is deprecated. Use [`as_os_str`] instead.
1101 ///
1102 /// # Examples
1103 ///
1104 /// ```
1105 /// use std::env;
1106 /// # use std::io;
1107 ///
1108 /// use os_str_bytes::RawOsStr;
1109 ///
1110 /// let os_string = env::current_exe()?.into_os_string();
1111 /// let raw = RawOsStr::new(&os_string);
1112 /// assert_eq!(os_string, raw.to_os_str());
1113 /// #
1114 /// # Ok::<_, io::Error>(())
1115 /// ```
1116 ///
1117 /// [`as_os_str`]: Self::as_os_str
1118 #[cfg_attr(
1119 all(not(os_str_bytes_docs_rs), feature = "nightly"),
1120 deprecated(since = "6.6.0", note = "use `as_os_str` instead")
1121 )]
1122 #[inline]
1123 #[must_use]
1124 pub fn to_os_str(&self) -> Cow<'_, OsStr> {
1125 if_nightly_return! {{
1126 Cow::Borrowed(self.as_os_str())
1127 }}
1128 expect_encoded!(imp::os_str_from_bytes(&self.0))
1129 }
1130
1131 deprecated_conversions! {
1132 /// Converts and returns the byte string stored by this container.
1133 ///
1134 /// The returned string will use an [unspecified encoding].
1135 ///
1136 /// # Examples
1137 ///
1138 /// ```
1139 /// use os_str_bytes::RawOsStr;
1140 ///
1141 /// let string = "foobar";
1142 /// let raw = RawOsStr::from_str(string);
1143 /// assert_eq!(string.as_bytes(), &*raw.to_raw_bytes());
1144 /// ```
1145 ///
1146 /// [unspecified encoding]: super#encoding
1147 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
1148 #[inline]
1149 #[must_use]
1150 pub fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
1151 if_nightly_return! {{
1152 imp::os_str_to_bytes(self.as_os_str())
1153 }}
1154 Cow::Borrowed(&self.0)
1155 }
1156 }
1157
1158 /// Equivalent to [`OsStr::to_str`].
1159 ///
1160 /// # Examples
1161 ///
1162 /// ```
1163 /// use os_str_bytes::RawOsStr;
1164 ///
1165 /// let string = "foobar";
1166 /// let raw = RawOsStr::from_str(string);
1167 /// assert_eq!(Some(string), raw.to_str());
1168 /// ```
1169 #[inline]
1170 #[must_use]
1171 pub fn to_str(&self) -> Option<&str> {
1172 str::from_utf8(&self.0).ok()
1173 }
1174
1175 /// Converts this string to the best UTF-8 representation possible.
1176 ///
1177 /// Invalid sequences will be replaced with
1178 /// [`char::REPLACEMENT_CHARACTER`].
1179 ///
1180 /// This method may return a different result than would
1181 /// [`OsStr::to_string_lossy`] when called on same string, since [`OsStr`]
1182 /// uses an unspecified encoding.
1183 ///
1184 /// # Examples
1185 ///
1186 /// ```
1187 /// use std::env;
1188 /// # use std::io;
1189 ///
1190 /// use os_str_bytes::RawOsStr;
1191 ///
1192 /// let os_string = env::current_exe()?.into_os_string();
1193 /// let raw = RawOsStr::new(&os_string);
1194 /// println!("{}", raw.to_str_lossy());
1195 /// #
1196 /// # Ok::<_, io::Error>(())
1197 /// ```
1198 #[inline]
1199 #[must_use]
1200 pub fn to_str_lossy(&self) -> Cow<'_, str> {
1201 String::from_utf8_lossy(&self.0)
1202 }
1203
1204 fn trim_matches_raw_with<P, F>(&self, pat: &P, strip_fn: F) -> &Self
1205 where
1206 F: for<'a> Fn(&'a [u8], &[u8]) -> Option<&'a [u8]>,
1207 P: EncodedPattern,
1208 {
1209 let pat = pat.__get();
1210 if pat.is_empty() {
1211 return self;
1212 }
1213
1214 let mut string = &self.0;
1215 while let Some(substring) = strip_fn(string, pat) {
1216 string = substring;
1217 }
1218 Self::from_inner(string)
1219 }
1220
1221 fn trim_end_matches_raw<P>(&self, pat: &P) -> &Self
1222 where
1223 P: EncodedPattern,
1224 {
1225 self.trim_matches_raw_with(pat, <[_]>::strip_suffix)
1226 }
1227
1228 /// Equivalent to [`str::trim_end_matches`].
1229 ///
1230 /// # Examples
1231 ///
1232 /// ```
1233 /// use os_str_bytes::RawOsStr;
1234 ///
1235 /// let raw = RawOsStr::from_str("111foo1bar111");
1236 /// assert_eq!("111foo1bar", raw.trim_end_matches("1"));
1237 /// assert_eq!("111foo1bar111", raw.trim_end_matches("o"));
1238 /// ```
1239 #[inline]
1240 #[must_use]
1241 pub fn trim_end_matches<P>(&self, pat: P) -> &Self
1242 where
1243 P: Pattern,
1244 {
1245 self.trim_end_matches_raw(&pat.__encode())
1246 }
1247
1248 /// Equivalent to [`str::trim_matches`].
1249 ///
1250 /// # Examples
1251 ///
1252 /// ```
1253 /// use os_str_bytes::RawOsStr;
1254 ///
1255 /// let raw = RawOsStr::from_str("111foo1bar111");
1256 /// assert_eq!("foo1bar", raw.trim_matches("1"));
1257 /// assert_eq!("111foo1bar111", raw.trim_matches("o"));
1258 /// ```
1259 #[inline]
1260 #[must_use]
1261 pub fn trim_matches<P>(&self, pat: P) -> &Self
1262 where
1263 P: Pattern,
1264 {
1265 let pat = pat.__encode();
1266 self.trim_start_matches_raw(&pat).trim_end_matches_raw(&pat)
1267 }
1268
1269 fn trim_start_matches_raw<P>(&self, pat: &P) -> &Self
1270 where
1271 P: EncodedPattern,
1272 {
1273 self.trim_matches_raw_with(pat, <[_]>::strip_prefix)
1274 }
1275
1276 /// Equivalent to [`str::trim_start_matches`].
1277 ///
1278 /// # Examples
1279 ///
1280 /// ```
1281 /// use os_str_bytes::RawOsStr;
1282 ///
1283 /// let raw = RawOsStr::from_str("111foo1bar111");
1284 /// assert_eq!("foo1bar111", raw.trim_start_matches("1"));
1285 /// assert_eq!("111foo1bar111", raw.trim_start_matches("o"));
1286 /// ```
1287 #[inline]
1288 #[must_use]
1289 pub fn trim_start_matches<P>(&self, pat: P) -> &Self
1290 where
1291 P: Pattern,
1292 {
1293 self.trim_start_matches_raw(&pat.__encode())
1294 }
1295}
1296
1297impl AsRef<Self> for RawOsStr {
1298 #[inline]
1299 fn as_ref(&self) -> &Self {
1300 self
1301 }
1302}
1303
1304if_nightly! {
1305 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1306 impl AsRef<OsStr> for RawOsStr {
1307 #[inline]
1308 fn as_ref(&self) -> &OsStr {
1309 self.as_os_str()
1310 }
1311 }
1312
1313 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1314 impl AsRef<RawOsStr> for OsStr {
1315 #[inline]
1316 fn as_ref(&self) -> &RawOsStr {
1317 RawOsStr::from_os_str(self)
1318 }
1319 }
1320}
1321
1322if_nightly! {
1323 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1324 impl AsRef<RawOsStr> for OsString {
1325 #[inline]
1326 fn as_ref(&self) -> &RawOsStr {
1327 (**self).as_ref()
1328 }
1329 }
1330}
1331
1332impl AsRef<RawOsStr> for str {
1333 #[inline]
1334 fn as_ref(&self) -> &RawOsStr {
1335 RawOsStr::from_str(self)
1336 }
1337}
1338
1339impl AsRef<RawOsStr> for String {
1340 #[inline]
1341 fn as_ref(&self) -> &RawOsStr {
1342 (**self).as_ref()
1343 }
1344}
1345
1346impl Default for &RawOsStr {
1347 #[inline]
1348 fn default() -> Self {
1349 RawOsStr::from_str("")
1350 }
1351}
1352
1353impl<'a> From<&'a RawOsStr> for Cow<'a, RawOsStr> {
1354 #[inline]
1355 fn from(value: &'a RawOsStr) -> Self {
1356 Cow::Borrowed(value)
1357 }
1358}
1359
1360impl From<Box<str>> for Box<RawOsStr> {
1361 #[inline]
1362 fn from(value: Box<str>) -> Self {
1363 value.into_boxed_bytes().transmute_box()
1364 }
1365}
1366
1367impl ToOwned for RawOsStr {
1368 type Owned = RawOsString;
1369
1370 #[inline]
1371 fn to_owned(&self) -> Self::Owned {
1372 RawOsString(self.0.to_owned())
1373 }
1374}
1375
1376/// Extensions to [`Cow<RawOsStr>`] for additional conversions.
1377///
1378/// [`Cow<RawOsStr>`]: Cow
1379#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
1380pub trait RawOsStrCow<'a>: private::Sealed {
1381 /// Converts a platform-native string back to this representation.
1382 ///
1383 /// # Nightly Notes
1384 ///
1385 /// This method does not require copying or encoding conversion.
1386 ///
1387 /// # Examples
1388 ///
1389 /// ```
1390 /// use std::borrow::Cow;
1391 /// use std::env;
1392 /// # use std::io;
1393 ///
1394 /// use os_str_bytes::RawOsStr;
1395 /// use os_str_bytes::RawOsStrCow;
1396 ///
1397 /// let os_string = Cow::Owned(env::current_exe()?.into_os_string());
1398 /// println!("{:?}", Cow::from_os_str(os_string));
1399 /// #
1400 /// # Ok::<_, io::Error>(())
1401 /// ```
1402 #[must_use]
1403 fn from_os_str(string: Cow<'a, OsStr>) -> Self;
1404
1405 /// Converts this representation back to a platform-native string.
1406 ///
1407 /// # Nightly Notes
1408 ///
1409 /// This method does not require copying or encoding conversion.
1410 ///
1411 /// # Examples
1412 ///
1413 /// ```
1414 /// use std::env;
1415 /// # use std::io;
1416 ///
1417 /// use os_str_bytes::RawOsStr;
1418 /// use os_str_bytes::RawOsStrCow;
1419 ///
1420 /// let os_string = env::current_exe()?.into_os_string();
1421 /// let raw = RawOsStr::new(&os_string);
1422 /// assert_eq!(os_string, raw.into_os_str());
1423 /// #
1424 /// # Ok::<_, io::Error>(())
1425 /// ```
1426 #[must_use]
1427 fn into_os_str(self) -> Cow<'a, OsStr>;
1428
1429 deprecated_conversions! {
1430 /// Returns the byte string stored by this container.
1431 ///
1432 /// The returned string will use an [unspecified encoding].
1433 ///
1434 /// # Examples
1435 ///
1436 /// ```
1437 /// use std::borrow::Cow;
1438 ///
1439 /// use os_str_bytes::RawOsStr;
1440 /// use os_str_bytes::RawOsStrCow;
1441 ///
1442 /// let string = "foobar";
1443 /// let raw = Cow::Borrowed(RawOsStr::from_str(string));
1444 /// assert_eq!(string.as_bytes(), &*raw.into_raw_bytes());
1445 /// ```
1446 ///
1447 /// [unspecified encoding]: super#encoding
1448 #[cfg_attr(
1449 feature = "conversions",
1450 deprecated(
1451 since = "6.6.0",
1452 note = "removal planned due to low usage",
1453 )
1454 )]
1455 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
1456 #[must_use]
1457 fn into_raw_bytes(self) -> Cow<'a, [u8]>;
1458 }
1459}
1460
1461impl<'a> RawOsStrCow<'a> for Cow<'a, RawOsStr> {
1462 #[cfg_attr(feature = "nightly", allow(deprecated))]
1463 #[inline]
1464 fn from_os_str(string: Cow<'a, OsStr>) -> Self {
1465 match string {
1466 Cow::Borrowed(string) => RawOsStr::new(string),
1467 Cow::Owned(string) => Cow::Owned(RawOsString::new(string)),
1468 }
1469 }
1470
1471 #[cfg_attr(feature = "nightly", allow(deprecated))]
1472 #[inline]
1473 fn into_os_str(self) -> Cow<'a, OsStr> {
1474 match self {
1475 Cow::Borrowed(string) => string.to_os_str(),
1476 Cow::Owned(string) => Cow::Owned(string.into_os_string()),
1477 }
1478 }
1479
1480 #[inline]
1481 fn into_raw_bytes(self) -> Cow<'a, [u8]> {
1482 match self {
1483 Cow::Borrowed(string) => string.to_raw_bytes(),
1484 Cow::Owned(string) => Cow::Owned(string.into_raw_vec()),
1485 }
1486 }
1487}
1488
1489/// A container for owned byte strings converted by this crate.
1490///
1491/// For more information, see [`RawOsStr`].
1492#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
1493#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
1494pub struct RawOsString(Vec<u8>);
1495
1496impl RawOsString {
1497 /// Converts a platform-native string into a representation that can be
1498 /// more easily manipulated.
1499 ///
1500 /// For more information, see [`RawOsStr::new`].
1501 ///
1502 /// # Nightly Notes
1503 ///
1504 /// This method does not require copying or encoding conversion.
1505 ///
1506 /// # Examples
1507 ///
1508 /// ```
1509 /// use std::env;
1510 /// # use std::io;
1511 ///
1512 /// use os_str_bytes::RawOsString;
1513 ///
1514 /// let os_string = env::current_exe()?.into_os_string();
1515 /// println!("{:?}", RawOsString::new(os_string));
1516 /// #
1517 /// # Ok::<_, io::Error>(())
1518 /// ```
1519 #[inline]
1520 #[must_use]
1521 pub fn new(string: OsString) -> Self {
1522 if_nightly_return! {{
1523 Self(string.into_encoded_bytes())
1524 }}
1525 Self(imp::os_string_into_vec(string))
1526 }
1527
1528 /// Wraps a string, without copying or encoding conversion.
1529 ///
1530 /// This method is much more efficient than [`RawOsString::new`], since the
1531 /// [encoding] used by this crate is compatible with UTF-8.
1532 ///
1533 /// # Examples
1534 ///
1535 /// ```
1536 /// use os_str_bytes::RawOsString;
1537 ///
1538 /// let string = "foobar".to_owned();
1539 /// let raw = RawOsString::from_string(string.clone());
1540 /// assert_eq!(string, raw);
1541 /// ```
1542 ///
1543 /// [encoding]: super#encoding
1544 #[inline]
1545 #[must_use]
1546 pub fn from_string(string: String) -> Self {
1547 Self(string.into_bytes())
1548 }
1549
1550 if_nightly! {
1551 /// Equivalent to [`OsString::from_encoded_bytes_unchecked`].
1552 ///
1553 /// # Examples
1554 ///
1555 /// ```
1556 /// use std::env;
1557 /// # use std::io;
1558 ///
1559 /// use os_str_bytes::RawOsString;
1560 ///
1561 /// let os_string = env::current_exe()?.into_os_string();
1562 /// let raw = RawOsString::new(os_string);
1563 /// let raw_bytes = raw.clone().into_encoded_vec();
1564 /// assert_eq!(raw, unsafe {
1565 /// RawOsString::from_encoded_vec_unchecked(raw_bytes)
1566 /// });
1567 /// #
1568 /// # Ok::<_, io::Error>(())
1569 /// ```
1570 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1571 #[inline]
1572 #[must_use]
1573 pub unsafe fn from_encoded_vec_unchecked(string: Vec<u8>) -> Self {
1574 Self(string)
1575 }
1576 }
1577
1578 fn from_raw_vec_checked(string: Vec<u8>) -> imp::Result<Self> {
1579 if_nightly_return! {
1580 {
1581 imp::os_string_from_vec(string).map(Self::new)
1582 }
1583 raw::validate_bytes(&string).map(|()| Self(string))
1584 }
1585 }
1586
1587 deprecated_conversions! {
1588 /// Wraps a byte string, without copying or encoding conversion.
1589 ///
1590 /// # Panics
1591 ///
1592 /// Panics if the string is not valid for the [unspecified encoding]
1593 /// used by this crate.
1594 ///
1595 /// # Examples
1596 ///
1597 /// ```
1598 /// use std::env;
1599 /// # use std::io;
1600 ///
1601 /// use os_str_bytes::RawOsString;
1602 ///
1603 /// let os_string = env::current_exe()?.into_os_string();
1604 /// let raw = RawOsString::new(os_string);
1605 /// let raw_bytes = raw.clone().into_raw_vec();
1606 /// assert_eq!(raw, RawOsString::assert_from_raw_vec(raw_bytes));
1607 /// #
1608 /// # Ok::<_, io::Error>(())
1609 /// ```
1610 ///
1611 /// [unspecified encoding]: super#encoding
1612 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
1613 #[inline]
1614 #[must_use = "method should not be used for validation"]
1615 #[track_caller]
1616 pub fn assert_from_raw_vec(string: Vec<u8>) -> Self {
1617 expect_encoded!(Self::from_raw_vec_checked(string))
1618 }
1619 }
1620
1621 if_checked_conversions! {
1622 /// Wraps a byte string, without copying or encoding conversion.
1623 ///
1624 /// [`assert_from_raw_vec`] should almost always be used instead. For
1625 /// more information, see [`EncodingError`].
1626 ///
1627 /// # Errors
1628 ///
1629 /// See documentation for [`EncodingError`].
1630 ///
1631 /// # Examples
1632 ///
1633 /// ```
1634 /// use std::env;
1635 /// # use std::io;
1636 ///
1637 /// use os_str_bytes::RawOsString;
1638 ///
1639 /// let os_string = env::current_exe()?.into_os_string();
1640 /// let raw = RawOsString::new(os_string);
1641 /// let raw_bytes = raw.clone().into_raw_vec();
1642 /// assert_eq!(Ok(raw), RawOsString::from_raw_vec(raw_bytes));
1643 /// #
1644 /// # Ok::<_, io::Error>(())
1645 /// ```
1646 ///
1647 /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
1648 #[cfg_attr(
1649 os_str_bytes_docs_rs,
1650 doc(cfg(feature = "checked_conversions"))
1651 )]
1652 #[inline]
1653 pub fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
1654 Self::from_raw_vec_checked(string).map_err(EncodingError)
1655 }
1656 }
1657
1658 deprecated_conversions! {
1659 /// Wraps a byte string, without copying or encoding conversion.
1660 ///
1661 /// # Safety
1662 ///
1663 /// The string must be valid for the [unspecified encoding] used by
1664 /// this crate.
1665 ///
1666 /// # Nightly Notes
1667 ///
1668 /// This method is deprecated. Use [`assert_from_raw_vec`] or
1669 /// [`from_encoded_vec_unchecked`] instead.
1670 ///
1671 /// # Examples
1672 ///
1673 /// ```
1674 /// use std::env;
1675 /// # use std::io;
1676 ///
1677 /// use os_str_bytes::RawOsString;
1678 ///
1679 /// let os_string = env::current_exe()?.into_os_string();
1680 /// let raw = RawOsString::new(os_string);
1681 /// let raw_bytes = raw.clone().into_raw_vec();
1682 /// assert_eq!(raw, unsafe {
1683 /// RawOsString::from_raw_vec_unchecked(raw_bytes)
1684 /// });
1685 /// #
1686 /// # Ok::<_, io::Error>(())
1687 /// ```
1688 ///
1689 /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
1690 /// [`from_encoded_vec_unchecked`]: Self::from_encoded_vec_unchecked
1691 /// [unspecified encoding]: super#encoding
1692 #[cfg_attr(
1693 all(
1694 not(os_str_bytes_docs_rs),
1695 feature = "conversions",
1696 feature = "nightly",
1697 ),
1698 deprecated(
1699 since = "6.6.0",
1700 note = "use `assert_from_raw_vec` or
1701 `from_encoded_vec_unchecked` instead",
1702 )
1703 )]
1704 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
1705 #[inline]
1706 #[must_use]
1707 #[track_caller]
1708 pub unsafe fn from_raw_vec_unchecked(string: Vec<u8>) -> Self {
1709 if_nightly_return! {
1710 {
1711 Self::assert_from_raw_vec(string)
1712 }
1713 if cfg!(debug_assertions) {
1714 expect_encoded!(raw::validate_bytes(&string));
1715 }
1716 }
1717
1718 Self(string)
1719 }
1720 }
1721
1722 /// Equivalent to [`String::clear`].
1723 ///
1724 /// # Examples
1725 ///
1726 /// ```
1727 /// use std::env;
1728 /// # use std::io;
1729 ///
1730 /// use os_str_bytes::RawOsString;
1731 ///
1732 /// let os_string = env::current_exe()?.into_os_string();
1733 /// let mut raw = RawOsString::new(os_string);
1734 /// raw.clear();
1735 /// assert!(raw.is_empty());
1736 /// #
1737 /// # Ok::<_, io::Error>(())
1738 /// ```
1739 #[inline]
1740 pub fn clear(&mut self) {
1741 self.0.clear();
1742 }
1743
1744 /// Equivalent to [`String::into_boxed_str`].
1745 ///
1746 /// # Examples
1747 ///
1748 /// ```
1749 /// use os_str_bytes::RawOsString;
1750 ///
1751 /// let string = "foobar".to_owned();
1752 /// let raw = RawOsString::from_string(string.clone());
1753 /// assert_eq!(string, *raw.into_box());
1754 /// ```
1755 #[inline]
1756 #[must_use]
1757 pub fn into_box(self) -> Box<RawOsStr> {
1758 self.0.into_boxed_slice().transmute_box()
1759 }
1760
1761 if_nightly! {
1762 /// Equivalent to [`OsString::into_encoded_bytes`].
1763 ///
1764 /// The returned string will not use the [unspecified encoding]. It can
1765 /// only be passed to methods accepting the encoding from the standard
1766 /// library, such as [`from_encoded_vec_unchecked`].
1767 ///
1768 /// # Examples
1769 ///
1770 /// ```
1771 /// use os_str_bytes::RawOsString;
1772 ///
1773 /// let string = "foobar".to_owned();
1774 /// let raw = RawOsString::from_string(string.clone());
1775 /// assert_eq!(string.into_bytes(), raw.into_encoded_vec());
1776 /// ```
1777 ///
1778 /// [`from_encoded_vec_unchecked`]: Self::from_encoded_vec_unchecked
1779 /// [unspecified encoding]: super#encoding
1780 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1781 #[inline]
1782 #[must_use]
1783 pub fn into_encoded_vec(self) -> Vec<u8> {
1784 self.0
1785 }
1786 }
1787
1788 /// Converts this representation back to a platform-native string.
1789 ///
1790 /// # Nightly Notes
1791 ///
1792 /// This method does not require copying or encoding conversion.
1793 ///
1794 /// # Examples
1795 ///
1796 /// ```
1797 /// use std::env;
1798 /// # use std::io;
1799 ///
1800 /// use os_str_bytes::RawOsString;
1801 ///
1802 /// let os_string = env::current_exe()?.into_os_string();
1803 /// let raw = RawOsString::new(os_string.clone());
1804 /// assert_eq!(os_string, raw.into_os_string());
1805 /// #
1806 /// # Ok::<_, io::Error>(())
1807 /// ```
1808 #[inline]
1809 #[must_use]
1810 pub fn into_os_string(self) -> OsString {
1811 if_nightly_return! {{
1812 // SAFETY: This wrapper prevents violating the invariants of the
1813 // encoding used by the standard library.
1814 unsafe { OsString::from_encoded_bytes_unchecked(self.0) }
1815 }}
1816 expect_encoded!(imp::os_string_from_vec(self.0))
1817 }
1818
1819 deprecated_conversions! {
1820 /// Returns the byte string stored by this container.
1821 ///
1822 /// The returned string will use an [unspecified encoding].
1823 ///
1824 /// # Examples
1825 ///
1826 /// ```
1827 /// use os_str_bytes::RawOsString;
1828 ///
1829 /// let string = "foobar".to_owned();
1830 /// let raw = RawOsString::from_string(string.clone());
1831 /// assert_eq!(string.into_bytes(), raw.into_raw_vec());
1832 /// ```
1833 ///
1834 /// [unspecified encoding]: super#encoding
1835 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "conversions")))]
1836 #[inline]
1837 #[must_use]
1838 pub fn into_raw_vec(self) -> Vec<u8> {
1839 if_nightly_return! {{
1840 imp::os_string_into_vec(self.into_os_string())
1841 }}
1842 self.0
1843 }
1844 }
1845
1846 /// Equivalent to [`OsString::into_string`].
1847 ///
1848 /// # Examples
1849 ///
1850 /// ```
1851 /// use os_str_bytes::RawOsString;
1852 ///
1853 /// let string = "foobar".to_owned();
1854 /// let raw = RawOsString::from_string(string.clone());
1855 /// assert_eq!(Ok(string), raw.into_string());
1856 /// ```
1857 #[inline]
1858 pub fn into_string(self) -> result::Result<String, Self> {
1859 String::from_utf8(self.0).map_err(|x| Self(x.into_bytes()))
1860 }
1861
1862 /// Equivalent to [`String::shrink_to_fit`].
1863 ///
1864 /// # Examples
1865 ///
1866 /// ```
1867 /// use os_str_bytes::RawOsString;
1868 ///
1869 /// let string = "foobar".to_owned();
1870 /// let mut raw = RawOsString::from_string(string.clone());
1871 /// raw.shrink_to_fit();
1872 /// assert_eq!(string, raw);
1873 /// ```
1874 #[inline]
1875 pub fn shrink_to_fit(&mut self) {
1876 self.0.shrink_to_fit();
1877 }
1878
1879 /// Equivalent to [`String::split_off`].
1880 ///
1881 /// # Panics
1882 ///
1883 /// Panics if the index is not a [valid boundary].
1884 ///
1885 /// # Examples
1886 ///
1887 /// ```
1888 /// use os_str_bytes::RawOsString;
1889 ///
1890 /// let mut raw = RawOsString::from_string("foobar".to_owned());
1891 /// assert_eq!("bar", raw.split_off(3));
1892 /// assert_eq!("foo", raw);
1893 /// ```
1894 ///
1895 /// [valid boundary]: RawOsStr#indices
1896 #[inline]
1897 #[must_use]
1898 #[track_caller]
1899 pub fn split_off(&mut self, at: usize) -> Self {
1900 self.check_bound(at);
1901
1902 Self(self.0.split_off(at))
1903 }
1904
1905 /// Equivalent to [`String::truncate`].
1906 ///
1907 /// # Panics
1908 ///
1909 /// Panics if the index is not a [valid boundary].
1910 ///
1911 /// # Examples
1912 ///
1913 /// ```
1914 /// use os_str_bytes::RawOsString;
1915 ///
1916 /// let mut raw = RawOsString::from_string("foobar".to_owned());
1917 /// raw.truncate(3);
1918 /// assert_eq!("foo", raw);
1919 /// ```
1920 ///
1921 /// [valid boundary]: RawOsStr#indices
1922 #[inline]
1923 #[track_caller]
1924 pub fn truncate(&mut self, new_len: usize) {
1925 self.check_bound(new_len);
1926
1927 self.0.truncate(new_len);
1928 }
1929}
1930
1931if_nightly! {
1932 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1933 impl AsRef<OsStr> for RawOsString {
1934 #[inline]
1935 fn as_ref(&self) -> &OsStr {
1936 (**self).as_ref()
1937 }
1938 }
1939}
1940
1941impl AsRef<RawOsStr> for RawOsString {
1942 #[inline]
1943 fn as_ref(&self) -> &RawOsStr {
1944 self
1945 }
1946}
1947
1948impl Borrow<RawOsStr> for RawOsString {
1949 #[inline]
1950 fn borrow(&self) -> &RawOsStr {
1951 self
1952 }
1953}
1954
1955impl Deref for RawOsString {
1956 type Target = RawOsStr;
1957
1958 #[inline]
1959 fn deref(&self) -> &Self::Target {
1960 RawOsStr::from_inner(&self.0)
1961 }
1962}
1963
1964impl From<RawOsString> for Box<RawOsStr> {
1965 #[inline]
1966 fn from(value: RawOsString) -> Self {
1967 value.into_box()
1968 }
1969}
1970
1971impl From<Box<RawOsStr>> for RawOsString {
1972 #[inline]
1973 fn from(value: Box<RawOsStr>) -> Self {
1974 Self(value.transmute_box::<[_]>().into_vec())
1975 }
1976}
1977
1978impl From<RawOsString> for Cow<'_, RawOsStr> {
1979 #[inline]
1980 fn from(value: RawOsString) -> Self {
1981 Cow::Owned(value)
1982 }
1983}
1984
1985if_nightly! {
1986 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1987 impl From<OsString> for RawOsString {
1988 #[inline]
1989 fn from(value: OsString) -> Self {
1990 Self::new(value)
1991 }
1992 }
1993
1994 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "nightly")))]
1995 impl From<RawOsString> for OsString {
1996 #[inline]
1997 fn from(value: RawOsString) -> Self {
1998 value.into_os_string()
1999 }
2000 }
2001}
2002
2003impl From<String> for RawOsString {
2004 #[inline]
2005 fn from(value: String) -> Self {
2006 Self::from_string(value)
2007 }
2008}
2009
2010struct DebugBuffer<'a>(&'a [u8]);
2011
2012impl Debug for DebugBuffer<'_> {
2013 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2014 f.write_str("\"")?;
2015
2016 let mut string = self.0;
2017 let mut invalid_length = 0;
2018 while !string.is_empty() {
2019 let (invalid, substring) = string.split_at(invalid_length);
2020
2021 let valid = match str::from_utf8(substring) {
2022 Ok(valid) => {
2023 string = b"";
2024 valid
2025 }
2026 Err(error) => {
2027 let (valid, substring) =
2028 substring.split_at(error.valid_up_to());
2029
2030 let invalid_char_length =
2031 error.error_len().unwrap_or_else(|| substring.len());
2032 if valid.is_empty() {
2033 invalid_length += invalid_char_length;
2034 continue;
2035 }
2036 string = substring;
2037 invalid_length = invalid_char_length;
2038
2039 // SAFETY: This slice was validated to be UTF-8.
2040 unsafe { str::from_utf8_unchecked(valid) }
2041 }
2042 };
2043
2044 raw::debug(RawOsStr::from_inner(invalid), f)?;
2045 Display::fmt(&valid.escape_debug(), f)?;
2046 }
2047
2048 f.write_str("\"")
2049 }
2050}
2051
2052macro_rules! r#impl {
2053 ( $type:ty ) => {
2054 impl Debug for $type {
2055 #[inline]
2056 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
2057 f.debug_tuple(stringify!($type))
2058 .field(&DebugBuffer(&self.0))
2059 .finish()
2060 }
2061 }
2062 };
2063}
2064r#impl!(RawOsStr);
2065r#impl!(RawOsString);
2066
2067macro_rules! r#impl {
2068 ( $index_type:ty $(, $index_var:ident , $($bound:expr),+)? ) => {
2069 impl Index<$index_type> for RawOsStr {
2070 type Output = Self;
2071
2072 #[inline]
2073 fn index(&self, idx: $index_type) -> &Self::Output {
2074 $(
2075 let $index_var = &idx;
2076 $(self.check_bound($bound);)+
2077 )?
2078
2079 Self::from_inner(&self.0[idx])
2080 }
2081 }
2082
2083 impl Index<$index_type> for RawOsString {
2084 type Output = RawOsStr;
2085
2086 #[inline]
2087 fn index(&self, idx: $index_type) -> &Self::Output {
2088 &(**self)[idx]
2089 }
2090 }
2091 };
2092}
2093r#impl!(Range<usize>, x, x.start, x.end);
2094r#impl!(RangeFrom<usize>, x, x.start);
2095r#impl!(RangeFull);
2096// [usize::MAX] will always be a valid inclusive end index.
2097#[rustfmt::skip]
2098r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1));
2099r#impl!(RangeTo<usize>, x, x.end);
2100r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1));
2101
2102macro_rules! r#impl {
2103 ( $(#[$attr:meta])* $type:ty , $other_type:ty ) => {
2104 $(#[$attr])*
2105 impl PartialEq<$other_type> for $type {
2106 #[inline]
2107 fn eq(&self, other: &$other_type) -> bool {
2108 let raw: &RawOsStr = self;
2109 let other: &RawOsStr = other.as_ref();
2110 raw == other
2111 }
2112 }
2113
2114 $(#[$attr])*
2115 impl PartialEq<$type> for $other_type {
2116 #[inline]
2117 fn eq(&self, other: &$type) -> bool {
2118 other == self
2119 }
2120 }
2121 };
2122}
2123r#impl!(RawOsStr, RawOsString);
2124r#impl!(RawOsStr, str);
2125r#impl!(RawOsStr, String);
2126r#impl!(&RawOsStr, RawOsString);
2127r#impl!(&RawOsStr, String);
2128r#impl!(RawOsString, str);
2129r#impl!(RawOsString, &str);
2130r#impl!(RawOsString, String);
2131
2132if_nightly! {
2133 macro_rules! impl_nightly {
2134 ( $type:ty , $other_type:ty ) => {
2135 r#impl! {
2136 #[cfg_attr(
2137 os_str_bytes_docs_rs,
2138 doc(cfg(feature = "nightly"))
2139 )]
2140 $type, $other_type
2141 }
2142 };
2143 }
2144 impl_nightly!(RawOsStr, OsStr);
2145 impl_nightly!(RawOsStr, OsString);
2146 impl_nightly!(&RawOsStr, OsString);
2147 impl_nightly!(RawOsString, OsStr);
2148 impl_nightly!(RawOsString, &OsStr);
2149 impl_nightly!(RawOsString, OsString);
2150}
2151
2152#[cfg(feature = "print_bytes")]
2153#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "print_bytes")))]
2154mod print_bytes {
2155 use print_bytes::ByteStr;
2156 use print_bytes::ToBytes;
2157 #[cfg(windows)]
2158 use print_bytes::WideStr;
2159
2160 #[cfg(windows)]
2161 use crate::imp::raw;
2162
2163 use super::RawOsStr;
2164 use super::RawOsString;
2165
2166 impl ToBytes for RawOsStr {
2167 #[inline]
2168 fn to_bytes(&self) -> ByteStr<'_> {
2169 self.0.to_bytes()
2170 }
2171
2172 #[cfg(windows)]
2173 #[inline]
2174 fn to_wide(&self) -> Option<WideStr> {
2175 Some(WideStr::new(raw::encode_wide(self).collect()))
2176 }
2177 }
2178
2179 impl ToBytes for RawOsString {
2180 #[inline]
2181 fn to_bytes(&self) -> ByteStr<'_> {
2182 (**self).to_bytes()
2183 }
2184
2185 #[cfg(windows)]
2186 #[inline]
2187 fn to_wide(&self) -> Option<WideStr> {
2188 (**self).to_wide()
2189 }
2190 }
2191}
2192
2193#[cfg(feature = "uniquote")]
2194#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "uniquote")))]
2195mod uniquote {
2196 use uniquote::Formatter;
2197 use uniquote::Quote;
2198 use uniquote::Result;
2199
2200 use crate::imp::raw;
2201
2202 use super::RawOsStr;
2203 use super::RawOsString;
2204
2205 impl Quote for RawOsStr {
2206 #[inline]
2207 fn escape(&self, f: &mut Formatter<'_>) -> Result {
2208 raw::uniquote::escape(self, f)
2209 }
2210 }
2211
2212 impl Quote for RawOsString {
2213 #[inline]
2214 fn escape(&self, f: &mut Formatter<'_>) -> Result {
2215 (**self).escape(f)
2216 }
2217 }
2218}