tinystr/
ascii.rs

Help
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::asciibyte::AsciiByte;
6use crate::int_ops::{Aligned4, Aligned8};
7use crate::ParseError;
8use core::borrow::Borrow;
9use core::fmt;
10use core::ops::Deref;
11use core::str::{self, FromStr};
12
13#[repr(transparent)]
14#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
15pub struct TinyAsciiStr<const N: usize> {
16    bytes: [AsciiByte; N],
17}
18
19impl<const N: usize> TinyAsciiStr<N> {
20    #[inline]
21    pub const fn try_from_str(s: &str) -> Result<Self, ParseError> {
22        Self::try_from_utf8(s.as_bytes())
23    }
24
25    /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice.
26    /// `code_units` may contain at most `N` non-null ASCII code points.
27    #[inline]
28    pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
29        Self::try_from_utf8_inner(code_units, false)
30    }
31
32    /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
33    /// `code_units` may contain at most `N` non-null ASCII code points.
34    #[inline]
35    pub const fn try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError> {
36        Self::try_from_utf16_inner(code_units, 0, code_units.len(), false)
37    }
38
39    /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units.
40    ///
41    /// Invalid code units, as well as null or non-ASCII code points
42    /// (i.e. those outside the range U+0001..=U+007F`)
43    /// will be replaced with the replacement byte.
44    ///
45    /// The input slice will be truncated if its length exceeds `N`.
46    pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self {
47        let mut out = [0; N];
48        let mut i = 0;
49        // Ord is not available in const, so no `.min(N)`
50        let len = if code_units.len() > N {
51            N
52        } else {
53            code_units.len()
54        };
55
56        // Indexing is protected by the len check above
57        #[allow(clippy::indexing_slicing)]
58        while i < len {
59            let b = code_units[i];
60            if b > 0 && b < 0x80 {
61                out[i] = b;
62            } else {
63                out[i] = replacement;
64            }
65            i += 1;
66        }
67
68        Self {
69            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
70            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
71        }
72    }
73
74    /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units.
75    ///
76    /// Invalid code units, as well as null or non-ASCII code points
77    /// (i.e. those outside the range U+0001..=U+007F`)
78    /// will be replaced with the replacement byte.
79    ///
80    /// The input slice will be truncated if its length exceeds `N`.
81    pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self {
82        let mut out = [0; N];
83        let mut i = 0;
84        // Ord is not available in const, so no `.min(N)`
85        let len = if code_units.len() > N {
86            N
87        } else {
88            code_units.len()
89        };
90
91        // Indexing is protected by the len check above
92        #[allow(clippy::indexing_slicing)]
93        while i < len {
94            let b = code_units[i];
95            if b > 0 && b < 0x80 {
96                out[i] = b as u8;
97            } else {
98                out[i] = replacement;
99            }
100            i += 1;
101        }
102
103        Self {
104            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
105            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
106        }
107    }
108
109    /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
110    ///
111    /// The byte array may contain trailing NUL bytes.
112    ///
113    /// # Example
114    ///
115    /// ```
116    /// use tinystr::tinystr;
117    /// use tinystr::TinyAsciiStr;
118    ///
119    /// assert_eq!(
120    ///     TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
121    ///     Ok(tinystr!(3, "GB"))
122    /// );
123    /// assert_eq!(
124    ///     TinyAsciiStr::<3>::try_from_raw(*b"USD"),
125    ///     Ok(tinystr!(3, "USD"))
126    /// );
127    /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
128    /// ```
129    pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, ParseError> {
130        Self::try_from_utf8_inner(&raw, true)
131    }
132
133    pub(crate) const fn try_from_utf8_inner(
134        code_units: &[u8],
135        allow_trailing_null: bool,
136    ) -> Result<Self, ParseError> {
137        if code_units.len() > N {
138            return Err(ParseError::TooLong {
139                max: N,
140                len: code_units.len(),
141            });
142        }
143
144        let mut out = [0; N];
145        let mut i = 0;
146        let mut found_null = false;
147        // Indexing is protected by TinyStrError::TooLarge
148        #[allow(clippy::indexing_slicing)]
149        while i < code_units.len() {
150            let b = code_units[i];
151
152            if b == 0 {
153                found_null = true;
154            } else if b >= 0x80 {
155                return Err(ParseError::NonAscii);
156            } else if found_null {
157                // Error if there are contentful bytes after null
158                return Err(ParseError::ContainsNull);
159            }
160            out[i] = b;
161
162            i += 1;
163        }
164
165        if !allow_trailing_null && found_null {
166            // We found some trailing nulls, error
167            return Err(ParseError::ContainsNull);
168        }
169
170        Ok(Self {
171            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
172            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
173        })
174    }
175
176    pub(crate) const fn try_from_utf16_inner(
177        code_units: &[u16],
178        start: usize,
179        end: usize,
180        allow_trailing_null: bool,
181    ) -> Result<Self, ParseError> {
182        let len = end - start;
183        if len > N {
184            return Err(ParseError::TooLong { max: N, len });
185        }
186
187        let mut out = [0; N];
188        let mut i = 0;
189        let mut found_null = false;
190        // Indexing is protected by TinyStrError::TooLarge
191        #[allow(clippy::indexing_slicing)]
192        while i < len {
193            let b = code_units[start + i];
194
195            if b == 0 {
196                found_null = true;
197            } else if b >= 0x80 {
198                return Err(ParseError::NonAscii);
199            } else if found_null {
200                // Error if there are contentful bytes after null
201                return Err(ParseError::ContainsNull);
202            }
203            out[i] = b as u8;
204
205            i += 1;
206        }
207
208        if !allow_trailing_null && found_null {
209            // We found some trailing nulls, error
210            return Err(ParseError::ContainsNull);
211        }
212
213        Ok(Self {
214            // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
215            bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
216        })
217    }
218
219    #[inline]
220    pub const fn as_str(&self) -> &str {
221        // as_utf8 is valid utf8
222        unsafe { str::from_utf8_unchecked(self.as_utf8()) }
223    }
224
225    #[inline]
226    #[must_use]
227    pub const fn len(&self) -> usize {
228        if N <= 4 {
229            Aligned4::from_ascii_bytes(&self.bytes).len()
230        } else if N <= 8 {
231            Aligned8::from_ascii_bytes(&self.bytes).len()
232        } else {
233            let mut i = 0;
234            #[allow(clippy::indexing_slicing)] // < N is safe
235            while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
236                i += 1
237            }
238            i
239        }
240    }
241
242    #[inline]
243    #[must_use]
244    pub const fn is_empty(&self) -> bool {
245        self.bytes[0] as u8 == AsciiByte::B0 as u8
246    }
247
248    #[inline]
249    #[must_use]
250    pub const fn as_utf8(&self) -> &[u8] {
251        // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
252        // and changing the length of that slice to self.len() < N is safe.
253        unsafe {
254            core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
255        }
256    }
257
258    #[inline]
259    #[must_use]
260    pub const fn all_bytes(&self) -> &[u8; N] {
261        // SAFETY: `self.bytes` has same size as [u8; N]
262        unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
263    }
264
265    #[inline]
266    #[must_use]
267    /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
268    ///
269    /// If `M < len()` the string gets truncated, otherwise only the
270    /// memory representation changes.
271    pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
272        let mut bytes = [0; M];
273        let mut i = 0;
274        // Indexing is protected by the loop guard
275        #[allow(clippy::indexing_slicing)]
276        while i < M && i < N {
277            bytes[i] = self.bytes[i] as u8;
278            i += 1;
279        }
280        // `self.bytes` only contains ASCII bytes, with no null bytes between
281        // ASCII characters, so this also holds for `bytes`.
282        unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }
283    }
284
285    #[inline]
286    #[must_use]
287    /// Returns a `TinyAsciiStr<Q>` with the concatenation of this string,
288    /// `TinyAsciiStr<N>`, and another string, `TinyAsciiStr<M>`.
289    ///
290    /// If `Q < N + M`, the string gets truncated.
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// use tinystr::tinystr;
296    /// use tinystr::TinyAsciiStr;
297    ///
298    /// let abc = tinystr!(6, "abc");
299    /// let defg = tinystr!(6, "defg");
300    ///
301    /// // The concatenation is successful if Q is large enough...
302    /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg"));
303    /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg"));
304    /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg"));
305    /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg"));
306    ///
307    /// /// ...but it truncates of Q is too small.
308    /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef"));
309    /// assert_eq!(abc.concat(defg), tinystr!(2, "ab"));
310    /// ```
311    pub const fn concat<const M: usize, const Q: usize>(
312        self,
313        other: TinyAsciiStr<M>,
314    ) -> TinyAsciiStr<Q> {
315        let mut result = self.resize::<Q>();
316        let mut i = self.len();
317        let mut j = 0;
318        // Indexing is protected by the loop guard
319        #[allow(clippy::indexing_slicing)]
320        while i < Q && j < M {
321            result.bytes[i] = other.bytes[j];
322            i += 1;
323            j += 1;
324        }
325        result
326    }
327
328    /// # Safety
329    /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
330    /// between ASCII characters
331    #[must_use]
332    pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self {
333        Self {
334            bytes: AsciiByte::to_ascii_byte_array(&code_units),
335        }
336    }
337}
338
339macro_rules! check_is {
340    ($self:ident, $check_int:ident, $check_u8:ident) => {
341        if N <= 4 {
342            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
343        } else if N <= 8 {
344            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
345        } else {
346            let mut i = 0;
347            // Won't panic because self.bytes has length N
348            #[allow(clippy::indexing_slicing)]
349            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
350                if !($self.bytes[i] as u8).$check_u8() {
351                    return false;
352                }
353                i += 1;
354            }
355            true
356        }
357    };
358    ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
359        if N <= 4 {
360            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
361        } else if N <= 8 {
362            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
363        } else {
364            // Won't panic because N is > 8
365            if ($self.bytes[0] as u8).$check_u8_0_inv() {
366                return false;
367            }
368            let mut i = 1;
369            // Won't panic because self.bytes has length N
370            #[allow(clippy::indexing_slicing)]
371            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
372                if ($self.bytes[i] as u8).$check_u8_1_inv() {
373                    return false;
374                }
375                i += 1;
376            }
377            true
378        }
379    };
380    ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
381        if N <= 4 {
382            Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
383        } else if N <= 8 {
384            Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
385        } else {
386            // Won't panic because N is > 8
387            if !($self.bytes[0] as u8).$check_u8_0_inv() {
388                return false;
389            }
390            let mut i = 1;
391            // Won't panic because self.bytes has length N
392            #[allow(clippy::indexing_slicing)]
393            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
394                if !($self.bytes[i] as u8).$check_u8_1_inv() {
395                    return false;
396                }
397                i += 1;
398            }
399            true
400        }
401    };
402}
403
404impl<const N: usize> TinyAsciiStr<N> {
405    /// Checks if the value is composed of ASCII alphabetic characters:
406    ///
407    ///  * U+0041 'A' ..= U+005A 'Z', or
408    ///  * U+0061 'a' ..= U+007A 'z'.
409    ///
410    /// # Examples
411    ///
412    /// ```
413    /// use tinystr::TinyAsciiStr;
414    ///
415    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
416    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
417    ///
418    /// assert!(s1.is_ascii_alphabetic());
419    /// assert!(!s2.is_ascii_alphabetic());
420    /// ```
421    #[inline]
422    #[must_use]
423    pub const fn is_ascii_alphabetic(&self) -> bool {
424        check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
425    }
426
427    /// Checks if the value is composed of ASCII alphanumeric characters:
428    ///
429    ///  * U+0041 'A' ..= U+005A 'Z', or
430    ///  * U+0061 'a' ..= U+007A 'z', or
431    ///  * U+0030 '0' ..= U+0039 '9'.
432    ///
433    /// # Examples
434    ///
435    /// ```
436    /// use tinystr::TinyAsciiStr;
437    ///
438    /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
439    /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
440    ///
441    /// assert!(s1.is_ascii_alphanumeric());
442    /// assert!(!s2.is_ascii_alphanumeric());
443    /// ```
444    #[inline]
445    #[must_use]
446    pub const fn is_ascii_alphanumeric(&self) -> bool {
447        check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
448    }
449
450    /// Checks if the value is composed of ASCII decimal digits:
451    ///
452    ///  * U+0030 '0' ..= U+0039 '9'.
453    ///
454    /// # Examples
455    ///
456    /// ```
457    /// use tinystr::TinyAsciiStr;
458    ///
459    /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
460    /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
461    ///
462    /// assert!(s1.is_ascii_numeric());
463    /// assert!(!s2.is_ascii_numeric());
464    /// ```
465    #[inline]
466    #[must_use]
467    pub const fn is_ascii_numeric(&self) -> bool {
468        check_is!(self, is_ascii_numeric, is_ascii_digit)
469    }
470
471    /// Checks if the value is in ASCII lower case.
472    ///
473    /// All letter characters are checked for case. Non-letter characters are ignored.
474    ///
475    /// # Examples
476    ///
477    /// ```
478    /// use tinystr::TinyAsciiStr;
479    ///
480    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
481    /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
482    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
483    ///
484    /// assert!(!s1.is_ascii_lowercase());
485    /// assert!(s2.is_ascii_lowercase());
486    /// assert!(s3.is_ascii_lowercase());
487    /// ```
488    #[inline]
489    #[must_use]
490    pub const fn is_ascii_lowercase(&self) -> bool {
491        check_is!(
492            self,
493            is_ascii_lowercase,
494            !is_ascii_uppercase,
495            !is_ascii_uppercase
496        )
497    }
498
499    /// Checks if the value is in ASCII title case.
500    ///
501    /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
502    /// Non-letter characters are ignored.
503    ///
504    /// # Examples
505    ///
506    /// ```
507    /// use tinystr::TinyAsciiStr;
508    ///
509    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
510    /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
511    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
512    ///
513    /// assert!(!s1.is_ascii_titlecase());
514    /// assert!(s2.is_ascii_titlecase());
515    /// assert!(s3.is_ascii_titlecase());
516    /// ```
517    #[inline]
518    #[must_use]
519    pub const fn is_ascii_titlecase(&self) -> bool {
520        check_is!(
521            self,
522            is_ascii_titlecase,
523            !is_ascii_lowercase,
524            !is_ascii_uppercase
525        )
526    }
527
528    /// Checks if the value is in ASCII upper case.
529    ///
530    /// All letter characters are checked for case. Non-letter characters are ignored.
531    ///
532    /// # Examples
533    ///
534    /// ```
535    /// use tinystr::TinyAsciiStr;
536    ///
537    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
538    /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
539    /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
540    ///
541    /// assert!(!s1.is_ascii_uppercase());
542    /// assert!(s2.is_ascii_uppercase());
543    /// assert!(!s3.is_ascii_uppercase());
544    /// ```
545    #[inline]
546    #[must_use]
547    pub const fn is_ascii_uppercase(&self) -> bool {
548        check_is!(
549            self,
550            is_ascii_uppercase,
551            !is_ascii_lowercase,
552            !is_ascii_lowercase
553        )
554    }
555
556    /// Checks if the value is composed of ASCII alphabetic lower case characters:
557    ///
558    ///  * U+0061 'a' ..= U+007A 'z',
559    ///
560    /// # Examples
561    ///
562    /// ```
563    /// use tinystr::TinyAsciiStr;
564    ///
565    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
566    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
567    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
568    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
569    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
570    ///
571    /// assert!(!s1.is_ascii_alphabetic_lowercase());
572    /// assert!(!s2.is_ascii_alphabetic_lowercase());
573    /// assert!(!s3.is_ascii_alphabetic_lowercase());
574    /// assert!(s4.is_ascii_alphabetic_lowercase());
575    /// assert!(!s5.is_ascii_alphabetic_lowercase());
576    /// ```
577    #[inline]
578    #[must_use]
579    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
580        check_is!(
581            self,
582            is_ascii_alphabetic_lowercase,
583            is_ascii_lowercase,
584            is_ascii_lowercase
585        )
586    }
587
588    /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
589    ///
590    /// # Examples
591    ///
592    /// ```
593    /// use tinystr::TinyAsciiStr;
594    ///
595    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
596    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
597    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
598    /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
599    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
600    ///
601    /// assert!(s1.is_ascii_alphabetic_titlecase());
602    /// assert!(!s2.is_ascii_alphabetic_titlecase());
603    /// assert!(!s3.is_ascii_alphabetic_titlecase());
604    /// assert!(!s4.is_ascii_alphabetic_titlecase());
605    /// assert!(!s5.is_ascii_alphabetic_titlecase());
606    /// ```
607    #[inline]
608    #[must_use]
609    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
610        check_is!(
611            self,
612            is_ascii_alphabetic_titlecase,
613            is_ascii_uppercase,
614            is_ascii_lowercase
615        )
616    }
617
618    /// Checks if the value is composed of ASCII alphabetic upper case characters:
619    ///
620    ///  * U+0041 'A' ..= U+005A 'Z',
621    ///
622    /// # Examples
623    ///
624    /// ```
625    /// use tinystr::TinyAsciiStr;
626    ///
627    /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
628    /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
629    /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
630    /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
631    /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
632    ///
633    /// assert!(!s1.is_ascii_alphabetic_uppercase());
634    /// assert!(!s2.is_ascii_alphabetic_uppercase());
635    /// assert!(!s3.is_ascii_alphabetic_uppercase());
636    /// assert!(s4.is_ascii_alphabetic_uppercase());
637    /// assert!(!s5.is_ascii_alphabetic_uppercase());
638    /// ```
639    #[inline]
640    #[must_use]
641    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
642        check_is!(
643            self,
644            is_ascii_alphabetic_uppercase,
645            is_ascii_uppercase,
646            is_ascii_uppercase
647        )
648    }
649}
650
651macro_rules! to {
652    ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
653        let mut i = 0;
654        if N <= 4 {
655            let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
656            // Won't panic because self.bytes has length N and aligned has length >= N
657            #[allow(clippy::indexing_slicing)]
658            while i < N {
659                $self.bytes[i] = aligned[i];
660                i += 1;
661            }
662        } else if N <= 8 {
663            let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
664            // Won't panic because self.bytes has length N and aligned has length >= N
665            #[allow(clippy::indexing_slicing)]
666            while i < N {
667                $self.bytes[i] = aligned[i];
668                i += 1;
669            }
670        } else {
671            // Won't panic because self.bytes has length N
672            #[allow(clippy::indexing_slicing)]
673            while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
674                // SAFETY: AsciiByte is repr(u8) and has same size as u8
675                unsafe {
676                    $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
677                        ($self.bytes[i] as u8).$later_char_to()
678                    );
679                }
680                i += 1;
681            }
682            // SAFETY: AsciiByte is repr(u8) and has same size as u8
683            $(
684                $self.bytes[0] = unsafe {
685                    core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
686                };
687            )?
688        }
689        $self
690    }};
691}
692
693impl<const N: usize> TinyAsciiStr<N> {
694    /// Converts this type to its ASCII lower case equivalent in-place.
695    ///
696    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
697    ///
698    /// # Examples
699    ///
700    /// ```
701    /// use tinystr::TinyAsciiStr;
702    ///
703    /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
704    ///
705    /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
706    /// ```
707    #[inline]
708    #[must_use]
709    pub const fn to_ascii_lowercase(mut self) -> Self {
710        to!(self, to_ascii_lowercase, to_ascii_lowercase)
711    }
712
713    /// Converts this type to its ASCII title case equivalent in-place.
714    ///
715    /// The first character is converted to ASCII uppercase; the remaining characters
716    /// are converted to ASCII lowercase.
717    ///
718    /// # Examples
719    ///
720    /// ```
721    /// use tinystr::TinyAsciiStr;
722    ///
723    /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
724    ///
725    /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
726    /// ```
727    #[inline]
728    #[must_use]
729    pub const fn to_ascii_titlecase(mut self) -> Self {
730        to!(
731            self,
732            to_ascii_titlecase,
733            to_ascii_lowercase,
734            to_ascii_uppercase
735        )
736    }
737
738    /// Converts this type to its ASCII upper case equivalent in-place.
739    ///
740    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
741    ///
742    /// # Examples
743    ///
744    /// ```
745    /// use tinystr::TinyAsciiStr;
746    ///
747    /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
748    ///
749    /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
750    /// ```
751    #[inline]
752    #[must_use]
753    pub const fn to_ascii_uppercase(mut self) -> Self {
754        to!(self, to_ascii_uppercase, to_ascii_uppercase)
755    }
756}
757
758impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
759    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
760        fmt::Debug::fmt(self.as_str(), f)
761    }
762}
763
764impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
765    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
766        fmt::Display::fmt(self.as_str(), f)
767    }
768}
769
770impl<const N: usize> Deref for TinyAsciiStr<N> {
771    type Target = str;
772    #[inline]
773    fn deref(&self) -> &str {
774        self.as_str()
775    }
776}
777
778impl<const N: usize> Borrow<str> for TinyAsciiStr<N> {
779    #[inline]
780    fn borrow(&self) -> &str {
781        self.as_str()
782    }
783}
784
785impl<const N: usize> FromStr for TinyAsciiStr<N> {
786    type Err = ParseError;
787    #[inline]
788    fn from_str(s: &str) -> Result<Self, Self::Err> {
789        Self::try_from_str(s)
790    }
791}
792
793impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
794    fn eq(&self, other: &str) -> bool {
795        self.deref() == other
796    }
797}
798
799impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
800    fn eq(&self, other: &&str) -> bool {
801        self.deref() == *other
802    }
803}
804
805#[cfg(feature = "alloc")]
806impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
807    fn eq(&self, other: &alloc::string::String) -> bool {
808        self.deref() == other.deref()
809    }
810}
811
812#[cfg(feature = "alloc")]
813impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
814    fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
815        self.deref() == other.deref()
816    }
817}
818
819#[cfg(test)]
820mod test {
821    use super::*;
822    use rand::distributions::Distribution;
823    use rand::distributions::Standard;
824    use rand::rngs::SmallRng;
825    use rand::seq::SliceRandom;
826    use rand::SeedableRng;
827
828    const STRINGS: [&str; 26] = [
829        "Latn",
830        "laTn",
831        "windows",
832        "AR",
833        "Hans",
834        "macos",
835        "AT",
836        "infiniband",
837        "FR",
838        "en",
839        "Cyrl",
840        "FromIntegral",
841        "NO",
842        "419",
843        "MacintoshOSX2019",
844        "a3z",
845        "A3z",
846        "A3Z",
847        "a3Z",
848        "3A",
849        "3Z",
850        "3a",
851        "3z",
852        "@@[`{",
853        "UK",
854        "E12",
855    ];
856
857    fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
858        let mut rng = SmallRng::seed_from_u64(2022);
859        // Need to do this in 2 steps since the RNG is needed twice
860        let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
861            .take(num_strings)
862            .collect::<Vec<usize>>();
863        string_lengths
864            .iter()
865            .map(|len| {
866                Standard
867                    .sample_iter(&mut rng)
868                    .filter(|b: &u8| *b > 0 && *b < 0x80)
869                    .take(*len)
870                    .collect::<Vec<u8>>()
871            })
872            .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
873            .collect()
874    }
875
876    fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
877    where
878        F1: Fn(&str) -> T,
879        F2: Fn(TinyAsciiStr<N>) -> T,
880        T: core::fmt::Debug + core::cmp::PartialEq,
881    {
882        for s in STRINGS
883            .into_iter()
884            .map(str::to_owned)
885            .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
886        {
887            let t = match TinyAsciiStr::<N>::from_str(&s) {
888                Ok(t) => t,
889                Err(ParseError::TooLong { .. }) => continue,
890                Err(e) => panic!("{}", e),
891            };
892            let expected = reference_f(&s);
893            let actual = tinystr_f(t);
894            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
895
896            let s_utf16: Vec<u16> = s.encode_utf16().collect();
897            let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) {
898                Ok(t) => t,
899                Err(ParseError::TooLong { .. }) => continue,
900                Err(e) => panic!("{}", e),
901            };
902            let expected = reference_f(&s);
903            let actual = tinystr_f(t);
904            assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
905        }
906    }
907
908    #[test]
909    fn test_is_ascii_alphabetic() {
910        fn check<const N: usize>() {
911            check_operation(
912                |s| s.chars().all(|c| c.is_ascii_alphabetic()),
913                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
914            )
915        }
916        check::<2>();
917        check::<3>();
918        check::<4>();
919        check::<5>();
920        check::<8>();
921        check::<16>();
922    }
923
924    #[test]
925    fn test_is_ascii_alphanumeric() {
926        fn check<const N: usize>() {
927            check_operation(
928                |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
929                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
930            )
931        }
932        check::<2>();
933        check::<3>();
934        check::<4>();
935        check::<5>();
936        check::<8>();
937        check::<16>();
938    }
939
940    #[test]
941    fn test_is_ascii_numeric() {
942        fn check<const N: usize>() {
943            check_operation(
944                |s| s.chars().all(|c| c.is_ascii_digit()),
945                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
946            )
947        }
948        check::<2>();
949        check::<3>();
950        check::<4>();
951        check::<5>();
952        check::<8>();
953        check::<16>();
954    }
955
956    #[test]
957    fn test_is_ascii_lowercase() {
958        fn check<const N: usize>() {
959            check_operation(
960                |s| {
961                    s == TinyAsciiStr::<16>::try_from_str(s)
962                        .unwrap()
963                        .to_ascii_lowercase()
964                        .as_str()
965                },
966                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
967            )
968        }
969        check::<2>();
970        check::<3>();
971        check::<4>();
972        check::<5>();
973        check::<8>();
974        check::<16>();
975    }
976
977    #[test]
978    fn test_is_ascii_titlecase() {
979        fn check<const N: usize>() {
980            check_operation(
981                |s| {
982                    s == TinyAsciiStr::<16>::try_from_str(s)
983                        .unwrap()
984                        .to_ascii_titlecase()
985                        .as_str()
986                },
987                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
988            )
989        }
990        check::<2>();
991        check::<3>();
992        check::<4>();
993        check::<5>();
994        check::<8>();
995        check::<16>();
996    }
997
998    #[test]
999    fn test_is_ascii_uppercase() {
1000        fn check<const N: usize>() {
1001            check_operation(
1002                |s| {
1003                    s == TinyAsciiStr::<16>::try_from_str(s)
1004                        .unwrap()
1005                        .to_ascii_uppercase()
1006                        .as_str()
1007                },
1008                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
1009            )
1010        }
1011        check::<2>();
1012        check::<3>();
1013        check::<4>();
1014        check::<5>();
1015        check::<8>();
1016        check::<16>();
1017    }
1018
1019    #[test]
1020    fn test_is_ascii_alphabetic_lowercase() {
1021        fn check<const N: usize>() {
1022            check_operation(
1023                |s| {
1024                    // Check alphabetic
1025                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
1026                    // Check lowercase
1027                    s == TinyAsciiStr::<16>::try_from_str(s)
1028                        .unwrap()
1029                        .to_ascii_lowercase()
1030                        .as_str()
1031                },
1032                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
1033            )
1034        }
1035        check::<2>();
1036        check::<3>();
1037        check::<4>();
1038        check::<5>();
1039        check::<8>();
1040        check::<16>();
1041    }
1042
1043    #[test]
1044    fn test_is_ascii_alphabetic_titlecase() {
1045        fn check<const N: usize>() {
1046            check_operation(
1047                |s| {
1048                    // Check alphabetic
1049                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
1050                    // Check titlecase
1051                    s == TinyAsciiStr::<16>::try_from_str(s)
1052                        .unwrap()
1053                        .to_ascii_titlecase()
1054                        .as_str()
1055                },
1056                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
1057            )
1058        }
1059        check::<2>();
1060        check::<3>();
1061        check::<4>();
1062        check::<5>();
1063        check::<8>();
1064        check::<16>();
1065    }
1066
1067    #[test]
1068    fn test_is_ascii_alphabetic_uppercase() {
1069        fn check<const N: usize>() {
1070            check_operation(
1071                |s| {
1072                    // Check alphabetic
1073                    s.chars().all(|c| c.is_ascii_alphabetic()) &&
1074                    // Check uppercase
1075                    s == TinyAsciiStr::<16>::try_from_str(s)
1076                        .unwrap()
1077                        .to_ascii_uppercase()
1078                        .as_str()
1079                },
1080                |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
1081            )
1082        }
1083        check::<2>();
1084        check::<3>();
1085        check::<4>();
1086        check::<5>();
1087        check::<8>();
1088        check::<16>();
1089    }
1090
1091    #[test]
1092    fn test_to_ascii_lowercase() {
1093        fn check<const N: usize>() {
1094            check_operation(
1095                |s| {
1096                    s.chars()
1097                        .map(|c| c.to_ascii_lowercase())
1098                        .collect::<String>()
1099                },
1100                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
1101            )
1102        }
1103        check::<2>();
1104        check::<3>();
1105        check::<4>();
1106        check::<5>();
1107        check::<8>();
1108        check::<16>();
1109    }
1110
1111    #[test]
1112    fn test_to_ascii_titlecase() {
1113        fn check<const N: usize>() {
1114            check_operation(
1115                |s| {
1116                    let mut r = s
1117                        .chars()
1118                        .map(|c| c.to_ascii_lowercase())
1119                        .collect::<String>();
1120                    // Safe because the string is nonempty and an ASCII string
1121                    unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
1122                    r
1123                },
1124                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
1125            )
1126        }
1127        check::<2>();
1128        check::<3>();
1129        check::<4>();
1130        check::<5>();
1131        check::<8>();
1132        check::<16>();
1133    }
1134
1135    #[test]
1136    fn test_to_ascii_uppercase() {
1137        fn check<const N: usize>() {
1138            check_operation(
1139                |s| {
1140                    s.chars()
1141                        .map(|c| c.to_ascii_uppercase())
1142                        .collect::<String>()
1143                },
1144                |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
1145            )
1146        }
1147        check::<2>();
1148        check::<3>();
1149        check::<4>();
1150        check::<5>();
1151        check::<8>();
1152        check::<16>();
1153    }
1154
1155    #[test]
1156    fn lossy_constructor() {
1157        assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), "");
1158        assert_eq!(
1159            TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(),
1160            "oh?o"
1161        );
1162        assert_eq!(
1163            TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(),
1164            "?"
1165        );
1166        assert_eq!(
1167            TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(),
1168            "tool"
1169        );
1170        assert_eq!(
1171            TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(),
1172            "a??1"
1173        );
1174    }
1175}
tinystr/ascii.rs

tinystr/
ascii.rs