tinystr/ascii.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::asciibyte::AsciiByte;
6use crate::int_ops::{Aligned4, Aligned8};
7use crate::ParseError;
8use core::borrow::Borrow;
9use core::fmt;
10use core::ops::Deref;
11use core::str::{self, FromStr};
12
13#[repr(transparent)]
14#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)]
15pub struct TinyAsciiStr<const N: usize> {
16 bytes: [AsciiByte; N],
17}
18
19impl<const N: usize> TinyAsciiStr<N> {
20 #[inline]
21 pub const fn try_from_str(s: &str) -> Result<Self, ParseError> {
22 Self::try_from_utf8(s.as_bytes())
23 }
24
25 /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice.
26 /// `code_units` may contain at most `N` non-null ASCII code points.
27 #[inline]
28 pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
29 Self::try_from_utf8_inner(code_units, false)
30 }
31
32 /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
33 /// `code_units` may contain at most `N` non-null ASCII code points.
34 #[inline]
35 pub const fn try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError> {
36 Self::try_from_utf16_inner(code_units, 0, code_units.len(), false)
37 }
38
39 /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units.
40 ///
41 /// Invalid code units, as well as null or non-ASCII code points
42 /// (i.e. those outside the range U+0001..=U+007F`)
43 /// will be replaced with the replacement byte.
44 ///
45 /// The input slice will be truncated if its length exceeds `N`.
46 pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self {
47 let mut out = [0; N];
48 let mut i = 0;
49 // Ord is not available in const, so no `.min(N)`
50 let len = if code_units.len() > N {
51 N
52 } else {
53 code_units.len()
54 };
55
56 // Indexing is protected by the len check above
57 #[allow(clippy::indexing_slicing)]
58 while i < len {
59 let b = code_units[i];
60 if b > 0 && b < 0x80 {
61 out[i] = b;
62 } else {
63 out[i] = replacement;
64 }
65 i += 1;
66 }
67
68 Self {
69 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
70 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
71 }
72 }
73
74 /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units.
75 ///
76 /// Invalid code units, as well as null or non-ASCII code points
77 /// (i.e. those outside the range U+0001..=U+007F`)
78 /// will be replaced with the replacement byte.
79 ///
80 /// The input slice will be truncated if its length exceeds `N`.
81 pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self {
82 let mut out = [0; N];
83 let mut i = 0;
84 // Ord is not available in const, so no `.min(N)`
85 let len = if code_units.len() > N {
86 N
87 } else {
88 code_units.len()
89 };
90
91 // Indexing is protected by the len check above
92 #[allow(clippy::indexing_slicing)]
93 while i < len {
94 let b = code_units[i];
95 if b > 0 && b < 0x80 {
96 out[i] = b as u8;
97 } else {
98 out[i] = replacement;
99 }
100 i += 1;
101 }
102
103 Self {
104 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
105 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
106 }
107 }
108
109 /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`.
110 ///
111 /// The byte array may contain trailing NUL bytes.
112 ///
113 /// # Example
114 ///
115 /// ```
116 /// use tinystr::tinystr;
117 /// use tinystr::TinyAsciiStr;
118 ///
119 /// assert_eq!(
120 /// TinyAsciiStr::<3>::try_from_raw(*b"GB\0"),
121 /// Ok(tinystr!(3, "GB"))
122 /// );
123 /// assert_eq!(
124 /// TinyAsciiStr::<3>::try_from_raw(*b"USD"),
125 /// Ok(tinystr!(3, "USD"))
126 /// );
127 /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
128 /// ```
129 pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, ParseError> {
130 Self::try_from_utf8_inner(&raw, true)
131 }
132
133 pub(crate) const fn try_from_utf8_inner(
134 code_units: &[u8],
135 allow_trailing_null: bool,
136 ) -> Result<Self, ParseError> {
137 if code_units.len() > N {
138 return Err(ParseError::TooLong {
139 max: N,
140 len: code_units.len(),
141 });
142 }
143
144 let mut out = [0; N];
145 let mut i = 0;
146 let mut found_null = false;
147 // Indexing is protected by TinyStrError::TooLarge
148 #[allow(clippy::indexing_slicing)]
149 while i < code_units.len() {
150 let b = code_units[i];
151
152 if b == 0 {
153 found_null = true;
154 } else if b >= 0x80 {
155 return Err(ParseError::NonAscii);
156 } else if found_null {
157 // Error if there are contentful bytes after null
158 return Err(ParseError::ContainsNull);
159 }
160 out[i] = b;
161
162 i += 1;
163 }
164
165 if !allow_trailing_null && found_null {
166 // We found some trailing nulls, error
167 return Err(ParseError::ContainsNull);
168 }
169
170 Ok(Self {
171 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
172 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
173 })
174 }
175
176 pub(crate) const fn try_from_utf16_inner(
177 code_units: &[u16],
178 start: usize,
179 end: usize,
180 allow_trailing_null: bool,
181 ) -> Result<Self, ParseError> {
182 let len = end - start;
183 if len > N {
184 return Err(ParseError::TooLong { max: N, len });
185 }
186
187 let mut out = [0; N];
188 let mut i = 0;
189 let mut found_null = false;
190 // Indexing is protected by TinyStrError::TooLarge
191 #[allow(clippy::indexing_slicing)]
192 while i < len {
193 let b = code_units[start + i];
194
195 if b == 0 {
196 found_null = true;
197 } else if b >= 0x80 {
198 return Err(ParseError::NonAscii);
199 } else if found_null {
200 // Error if there are contentful bytes after null
201 return Err(ParseError::ContainsNull);
202 }
203 out[i] = b as u8;
204
205 i += 1;
206 }
207
208 if !allow_trailing_null && found_null {
209 // We found some trailing nulls, error
210 return Err(ParseError::ContainsNull);
211 }
212
213 Ok(Self {
214 // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes`
215 bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) },
216 })
217 }
218
219 #[inline]
220 pub const fn as_str(&self) -> &str {
221 // as_utf8 is valid utf8
222 unsafe { str::from_utf8_unchecked(self.as_utf8()) }
223 }
224
225 #[inline]
226 #[must_use]
227 pub const fn len(&self) -> usize {
228 if N <= 4 {
229 Aligned4::from_ascii_bytes(&self.bytes).len()
230 } else if N <= 8 {
231 Aligned8::from_ascii_bytes(&self.bytes).len()
232 } else {
233 let mut i = 0;
234 #[allow(clippy::indexing_slicing)] // < N is safe
235 while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 {
236 i += 1
237 }
238 i
239 }
240 }
241
242 #[inline]
243 #[must_use]
244 pub const fn is_empty(&self) -> bool {
245 self.bytes[0] as u8 == AsciiByte::B0 as u8
246 }
247
248 #[inline]
249 #[must_use]
250 pub const fn as_utf8(&self) -> &[u8] {
251 // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`,
252 // and changing the length of that slice to self.len() < N is safe.
253 unsafe {
254 core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len())
255 }
256 }
257
258 #[inline]
259 #[must_use]
260 pub const fn all_bytes(&self) -> &[u8; N] {
261 // SAFETY: `self.bytes` has same size as [u8; N]
262 unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) }
263 }
264
265 #[inline]
266 #[must_use]
267 /// Resizes a `TinyAsciiStr<N>` to a `TinyAsciiStr<M>`.
268 ///
269 /// If `M < len()` the string gets truncated, otherwise only the
270 /// memory representation changes.
271 pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> {
272 let mut bytes = [0; M];
273 let mut i = 0;
274 // Indexing is protected by the loop guard
275 #[allow(clippy::indexing_slicing)]
276 while i < M && i < N {
277 bytes[i] = self.bytes[i] as u8;
278 i += 1;
279 }
280 // `self.bytes` only contains ASCII bytes, with no null bytes between
281 // ASCII characters, so this also holds for `bytes`.
282 unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }
283 }
284
285 #[inline]
286 #[must_use]
287 /// Returns a `TinyAsciiStr<Q>` with the concatenation of this string,
288 /// `TinyAsciiStr<N>`, and another string, `TinyAsciiStr<M>`.
289 ///
290 /// If `Q < N + M`, the string gets truncated.
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// use tinystr::tinystr;
296 /// use tinystr::TinyAsciiStr;
297 ///
298 /// let abc = tinystr!(6, "abc");
299 /// let defg = tinystr!(6, "defg");
300 ///
301 /// // The concatenation is successful if Q is large enough...
302 /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg"));
303 /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg"));
304 /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg"));
305 /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg"));
306 ///
307 /// /// ...but it truncates of Q is too small.
308 /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef"));
309 /// assert_eq!(abc.concat(defg), tinystr!(2, "ab"));
310 /// ```
311 pub const fn concat<const M: usize, const Q: usize>(
312 self,
313 other: TinyAsciiStr<M>,
314 ) -> TinyAsciiStr<Q> {
315 let mut result = self.resize::<Q>();
316 let mut i = self.len();
317 let mut j = 0;
318 // Indexing is protected by the loop guard
319 #[allow(clippy::indexing_slicing)]
320 while i < Q && j < M {
321 result.bytes[i] = other.bytes[j];
322 i += 1;
323 j += 1;
324 }
325 result
326 }
327
328 /// # Safety
329 /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes
330 /// between ASCII characters
331 #[must_use]
332 pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self {
333 Self {
334 bytes: AsciiByte::to_ascii_byte_array(&code_units),
335 }
336 }
337}
338
339macro_rules! check_is {
340 ($self:ident, $check_int:ident, $check_u8:ident) => {
341 if N <= 4 {
342 Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
343 } else if N <= 8 {
344 Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
345 } else {
346 let mut i = 0;
347 // Won't panic because self.bytes has length N
348 #[allow(clippy::indexing_slicing)]
349 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
350 if !($self.bytes[i] as u8).$check_u8() {
351 return false;
352 }
353 i += 1;
354 }
355 true
356 }
357 };
358 ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => {
359 if N <= 4 {
360 Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
361 } else if N <= 8 {
362 Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
363 } else {
364 // Won't panic because N is > 8
365 if ($self.bytes[0] as u8).$check_u8_0_inv() {
366 return false;
367 }
368 let mut i = 1;
369 // Won't panic because self.bytes has length N
370 #[allow(clippy::indexing_slicing)]
371 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
372 if ($self.bytes[i] as u8).$check_u8_1_inv() {
373 return false;
374 }
375 i += 1;
376 }
377 true
378 }
379 };
380 ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => {
381 if N <= 4 {
382 Aligned4::from_ascii_bytes(&$self.bytes).$check_int()
383 } else if N <= 8 {
384 Aligned8::from_ascii_bytes(&$self.bytes).$check_int()
385 } else {
386 // Won't panic because N is > 8
387 if !($self.bytes[0] as u8).$check_u8_0_inv() {
388 return false;
389 }
390 let mut i = 1;
391 // Won't panic because self.bytes has length N
392 #[allow(clippy::indexing_slicing)]
393 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
394 if !($self.bytes[i] as u8).$check_u8_1_inv() {
395 return false;
396 }
397 i += 1;
398 }
399 true
400 }
401 };
402}
403
404impl<const N: usize> TinyAsciiStr<N> {
405 /// Checks if the value is composed of ASCII alphabetic characters:
406 ///
407 /// * U+0041 'A' ..= U+005A 'Z', or
408 /// * U+0061 'a' ..= U+007A 'z'.
409 ///
410 /// # Examples
411 ///
412 /// ```
413 /// use tinystr::TinyAsciiStr;
414 ///
415 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
416 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
417 ///
418 /// assert!(s1.is_ascii_alphabetic());
419 /// assert!(!s2.is_ascii_alphabetic());
420 /// ```
421 #[inline]
422 #[must_use]
423 pub const fn is_ascii_alphabetic(&self) -> bool {
424 check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic)
425 }
426
427 /// Checks if the value is composed of ASCII alphanumeric characters:
428 ///
429 /// * U+0041 'A' ..= U+005A 'Z', or
430 /// * U+0061 'a' ..= U+007A 'z', or
431 /// * U+0030 '0' ..= U+0039 '9'.
432 ///
433 /// # Examples
434 ///
435 /// ```
436 /// use tinystr::TinyAsciiStr;
437 ///
438 /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse.");
439 /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse.");
440 ///
441 /// assert!(s1.is_ascii_alphanumeric());
442 /// assert!(!s2.is_ascii_alphanumeric());
443 /// ```
444 #[inline]
445 #[must_use]
446 pub const fn is_ascii_alphanumeric(&self) -> bool {
447 check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric)
448 }
449
450 /// Checks if the value is composed of ASCII decimal digits:
451 ///
452 /// * U+0030 '0' ..= U+0039 '9'.
453 ///
454 /// # Examples
455 ///
456 /// ```
457 /// use tinystr::TinyAsciiStr;
458 ///
459 /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse.");
460 /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse.");
461 ///
462 /// assert!(s1.is_ascii_numeric());
463 /// assert!(!s2.is_ascii_numeric());
464 /// ```
465 #[inline]
466 #[must_use]
467 pub const fn is_ascii_numeric(&self) -> bool {
468 check_is!(self, is_ascii_numeric, is_ascii_digit)
469 }
470
471 /// Checks if the value is in ASCII lower case.
472 ///
473 /// All letter characters are checked for case. Non-letter characters are ignored.
474 ///
475 /// # Examples
476 ///
477 /// ```
478 /// use tinystr::TinyAsciiStr;
479 ///
480 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
481 /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
482 /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
483 ///
484 /// assert!(!s1.is_ascii_lowercase());
485 /// assert!(s2.is_ascii_lowercase());
486 /// assert!(s3.is_ascii_lowercase());
487 /// ```
488 #[inline]
489 #[must_use]
490 pub const fn is_ascii_lowercase(&self) -> bool {
491 check_is!(
492 self,
493 is_ascii_lowercase,
494 !is_ascii_uppercase,
495 !is_ascii_uppercase
496 )
497 }
498
499 /// Checks if the value is in ASCII title case.
500 ///
501 /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase.
502 /// Non-letter characters are ignored.
503 ///
504 /// # Examples
505 ///
506 /// ```
507 /// use tinystr::TinyAsciiStr;
508 ///
509 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
510 /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
511 /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
512 ///
513 /// assert!(!s1.is_ascii_titlecase());
514 /// assert!(s2.is_ascii_titlecase());
515 /// assert!(s3.is_ascii_titlecase());
516 /// ```
517 #[inline]
518 #[must_use]
519 pub const fn is_ascii_titlecase(&self) -> bool {
520 check_is!(
521 self,
522 is_ascii_titlecase,
523 !is_ascii_lowercase,
524 !is_ascii_uppercase
525 )
526 }
527
528 /// Checks if the value is in ASCII upper case.
529 ///
530 /// All letter characters are checked for case. Non-letter characters are ignored.
531 ///
532 /// # Examples
533 ///
534 /// ```
535 /// use tinystr::TinyAsciiStr;
536 ///
537 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
538 /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
539 /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
540 ///
541 /// assert!(!s1.is_ascii_uppercase());
542 /// assert!(s2.is_ascii_uppercase());
543 /// assert!(!s3.is_ascii_uppercase());
544 /// ```
545 #[inline]
546 #[must_use]
547 pub const fn is_ascii_uppercase(&self) -> bool {
548 check_is!(
549 self,
550 is_ascii_uppercase,
551 !is_ascii_lowercase,
552 !is_ascii_lowercase
553 )
554 }
555
556 /// Checks if the value is composed of ASCII alphabetic lower case characters:
557 ///
558 /// * U+0061 'a' ..= U+007A 'z',
559 ///
560 /// # Examples
561 ///
562 /// ```
563 /// use tinystr::TinyAsciiStr;
564 ///
565 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
566 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
567 /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
568 /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
569 /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
570 ///
571 /// assert!(!s1.is_ascii_alphabetic_lowercase());
572 /// assert!(!s2.is_ascii_alphabetic_lowercase());
573 /// assert!(!s3.is_ascii_alphabetic_lowercase());
574 /// assert!(s4.is_ascii_alphabetic_lowercase());
575 /// assert!(!s5.is_ascii_alphabetic_lowercase());
576 /// ```
577 #[inline]
578 #[must_use]
579 pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
580 check_is!(
581 self,
582 is_ascii_alphabetic_lowercase,
583 is_ascii_lowercase,
584 is_ascii_lowercase
585 )
586 }
587
588 /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase.
589 ///
590 /// # Examples
591 ///
592 /// ```
593 /// use tinystr::TinyAsciiStr;
594 ///
595 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
596 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
597 /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
598 /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse.");
599 /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
600 ///
601 /// assert!(s1.is_ascii_alphabetic_titlecase());
602 /// assert!(!s2.is_ascii_alphabetic_titlecase());
603 /// assert!(!s3.is_ascii_alphabetic_titlecase());
604 /// assert!(!s4.is_ascii_alphabetic_titlecase());
605 /// assert!(!s5.is_ascii_alphabetic_titlecase());
606 /// ```
607 #[inline]
608 #[must_use]
609 pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
610 check_is!(
611 self,
612 is_ascii_alphabetic_titlecase,
613 is_ascii_uppercase,
614 is_ascii_lowercase
615 )
616 }
617
618 /// Checks if the value is composed of ASCII alphabetic upper case characters:
619 ///
620 /// * U+0041 'A' ..= U+005A 'Z',
621 ///
622 /// # Examples
623 ///
624 /// ```
625 /// use tinystr::TinyAsciiStr;
626 ///
627 /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse.");
628 /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse.");
629 /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
630 /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse.");
631 /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse.");
632 ///
633 /// assert!(!s1.is_ascii_alphabetic_uppercase());
634 /// assert!(!s2.is_ascii_alphabetic_uppercase());
635 /// assert!(!s3.is_ascii_alphabetic_uppercase());
636 /// assert!(s4.is_ascii_alphabetic_uppercase());
637 /// assert!(!s5.is_ascii_alphabetic_uppercase());
638 /// ```
639 #[inline]
640 #[must_use]
641 pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
642 check_is!(
643 self,
644 is_ascii_alphabetic_uppercase,
645 is_ascii_uppercase,
646 is_ascii_uppercase
647 )
648 }
649}
650
651macro_rules! to {
652 ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{
653 let mut i = 0;
654 if N <= 4 {
655 let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
656 // Won't panic because self.bytes has length N and aligned has length >= N
657 #[allow(clippy::indexing_slicing)]
658 while i < N {
659 $self.bytes[i] = aligned[i];
660 i += 1;
661 }
662 } else if N <= 8 {
663 let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes();
664 // Won't panic because self.bytes has length N and aligned has length >= N
665 #[allow(clippy::indexing_slicing)]
666 while i < N {
667 $self.bytes[i] = aligned[i];
668 i += 1;
669 }
670 } else {
671 // Won't panic because self.bytes has length N
672 #[allow(clippy::indexing_slicing)]
673 while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 {
674 // SAFETY: AsciiByte is repr(u8) and has same size as u8
675 unsafe {
676 $self.bytes[i] = core::mem::transmute::<u8, AsciiByte>(
677 ($self.bytes[i] as u8).$later_char_to()
678 );
679 }
680 i += 1;
681 }
682 // SAFETY: AsciiByte is repr(u8) and has same size as u8
683 $(
684 $self.bytes[0] = unsafe {
685 core::mem::transmute::<u8, AsciiByte>(($self.bytes[0] as u8).$first_char_to())
686 };
687 )?
688 }
689 $self
690 }};
691}
692
693impl<const N: usize> TinyAsciiStr<N> {
694 /// Converts this type to its ASCII lower case equivalent in-place.
695 ///
696 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged.
697 ///
698 /// # Examples
699 ///
700 /// ```
701 /// use tinystr::TinyAsciiStr;
702 ///
703 /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse.");
704 ///
705 /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3");
706 /// ```
707 #[inline]
708 #[must_use]
709 pub const fn to_ascii_lowercase(mut self) -> Self {
710 to!(self, to_ascii_lowercase, to_ascii_lowercase)
711 }
712
713 /// Converts this type to its ASCII title case equivalent in-place.
714 ///
715 /// The first character is converted to ASCII uppercase; the remaining characters
716 /// are converted to ASCII lowercase.
717 ///
718 /// # Examples
719 ///
720 /// ```
721 /// use tinystr::TinyAsciiStr;
722 ///
723 /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse.");
724 ///
725 /// assert_eq!(&*s1.to_ascii_titlecase(), "Test");
726 /// ```
727 #[inline]
728 #[must_use]
729 pub const fn to_ascii_titlecase(mut self) -> Self {
730 to!(
731 self,
732 to_ascii_titlecase,
733 to_ascii_lowercase,
734 to_ascii_uppercase
735 )
736 }
737
738 /// Converts this type to its ASCII upper case equivalent in-place.
739 ///
740 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged.
741 ///
742 /// # Examples
743 ///
744 /// ```
745 /// use tinystr::TinyAsciiStr;
746 ///
747 /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse.");
748 ///
749 /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3");
750 /// ```
751 #[inline]
752 #[must_use]
753 pub const fn to_ascii_uppercase(mut self) -> Self {
754 to!(self, to_ascii_uppercase, to_ascii_uppercase)
755 }
756}
757
758impl<const N: usize> fmt::Debug for TinyAsciiStr<N> {
759 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
760 fmt::Debug::fmt(self.as_str(), f)
761 }
762}
763
764impl<const N: usize> fmt::Display for TinyAsciiStr<N> {
765 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
766 fmt::Display::fmt(self.as_str(), f)
767 }
768}
769
770impl<const N: usize> Deref for TinyAsciiStr<N> {
771 type Target = str;
772 #[inline]
773 fn deref(&self) -> &str {
774 self.as_str()
775 }
776}
777
778impl<const N: usize> Borrow<str> for TinyAsciiStr<N> {
779 #[inline]
780 fn borrow(&self) -> &str {
781 self.as_str()
782 }
783}
784
785impl<const N: usize> FromStr for TinyAsciiStr<N> {
786 type Err = ParseError;
787 #[inline]
788 fn from_str(s: &str) -> Result<Self, Self::Err> {
789 Self::try_from_str(s)
790 }
791}
792
793impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> {
794 fn eq(&self, other: &str) -> bool {
795 self.deref() == other
796 }
797}
798
799impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> {
800 fn eq(&self, other: &&str) -> bool {
801 self.deref() == *other
802 }
803}
804
805#[cfg(feature = "alloc")]
806impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> {
807 fn eq(&self, other: &alloc::string::String) -> bool {
808 self.deref() == other.deref()
809 }
810}
811
812#[cfg(feature = "alloc")]
813impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String {
814 fn eq(&self, other: &TinyAsciiStr<N>) -> bool {
815 self.deref() == other.deref()
816 }
817}
818
819#[cfg(test)]
820mod test {
821 use super::*;
822 use rand::distributions::Distribution;
823 use rand::distributions::Standard;
824 use rand::rngs::SmallRng;
825 use rand::seq::SliceRandom;
826 use rand::SeedableRng;
827
828 const STRINGS: [&str; 26] = [
829 "Latn",
830 "laTn",
831 "windows",
832 "AR",
833 "Hans",
834 "macos",
835 "AT",
836 "infiniband",
837 "FR",
838 "en",
839 "Cyrl",
840 "FromIntegral",
841 "NO",
842 "419",
843 "MacintoshOSX2019",
844 "a3z",
845 "A3z",
846 "A3Z",
847 "a3Z",
848 "3A",
849 "3Z",
850 "3a",
851 "3z",
852 "@@[`{",
853 "UK",
854 "E12",
855 ];
856
857 fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> {
858 let mut rng = SmallRng::seed_from_u64(2022);
859 // Need to do this in 2 steps since the RNG is needed twice
860 let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap())
861 .take(num_strings)
862 .collect::<Vec<usize>>();
863 string_lengths
864 .iter()
865 .map(|len| {
866 Standard
867 .sample_iter(&mut rng)
868 .filter(|b: &u8| *b > 0 && *b < 0x80)
869 .take(*len)
870 .collect::<Vec<u8>>()
871 })
872 .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII"))
873 .collect()
874 }
875
876 fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2)
877 where
878 F1: Fn(&str) -> T,
879 F2: Fn(TinyAsciiStr<N>) -> T,
880 T: core::fmt::Debug + core::cmp::PartialEq,
881 {
882 for s in STRINGS
883 .into_iter()
884 .map(str::to_owned)
885 .chain(gen_strings(100, &[3, 4, 5, 8, 12]))
886 {
887 let t = match TinyAsciiStr::<N>::from_str(&s) {
888 Ok(t) => t,
889 Err(ParseError::TooLong { .. }) => continue,
890 Err(e) => panic!("{}", e),
891 };
892 let expected = reference_f(&s);
893 let actual = tinystr_f(t);
894 assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
895
896 let s_utf16: Vec<u16> = s.encode_utf16().collect();
897 let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) {
898 Ok(t) => t,
899 Err(ParseError::TooLong { .. }) => continue,
900 Err(e) => panic!("{}", e),
901 };
902 let expected = reference_f(&s);
903 let actual = tinystr_f(t);
904 assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}");
905 }
906 }
907
908 #[test]
909 fn test_is_ascii_alphabetic() {
910 fn check<const N: usize>() {
911 check_operation(
912 |s| s.chars().all(|c| c.is_ascii_alphabetic()),
913 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t),
914 )
915 }
916 check::<2>();
917 check::<3>();
918 check::<4>();
919 check::<5>();
920 check::<8>();
921 check::<16>();
922 }
923
924 #[test]
925 fn test_is_ascii_alphanumeric() {
926 fn check<const N: usize>() {
927 check_operation(
928 |s| s.chars().all(|c| c.is_ascii_alphanumeric()),
929 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t),
930 )
931 }
932 check::<2>();
933 check::<3>();
934 check::<4>();
935 check::<5>();
936 check::<8>();
937 check::<16>();
938 }
939
940 #[test]
941 fn test_is_ascii_numeric() {
942 fn check<const N: usize>() {
943 check_operation(
944 |s| s.chars().all(|c| c.is_ascii_digit()),
945 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t),
946 )
947 }
948 check::<2>();
949 check::<3>();
950 check::<4>();
951 check::<5>();
952 check::<8>();
953 check::<16>();
954 }
955
956 #[test]
957 fn test_is_ascii_lowercase() {
958 fn check<const N: usize>() {
959 check_operation(
960 |s| {
961 s == TinyAsciiStr::<16>::try_from_str(s)
962 .unwrap()
963 .to_ascii_lowercase()
964 .as_str()
965 },
966 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t),
967 )
968 }
969 check::<2>();
970 check::<3>();
971 check::<4>();
972 check::<5>();
973 check::<8>();
974 check::<16>();
975 }
976
977 #[test]
978 fn test_is_ascii_titlecase() {
979 fn check<const N: usize>() {
980 check_operation(
981 |s| {
982 s == TinyAsciiStr::<16>::try_from_str(s)
983 .unwrap()
984 .to_ascii_titlecase()
985 .as_str()
986 },
987 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t),
988 )
989 }
990 check::<2>();
991 check::<3>();
992 check::<4>();
993 check::<5>();
994 check::<8>();
995 check::<16>();
996 }
997
998 #[test]
999 fn test_is_ascii_uppercase() {
1000 fn check<const N: usize>() {
1001 check_operation(
1002 |s| {
1003 s == TinyAsciiStr::<16>::try_from_str(s)
1004 .unwrap()
1005 .to_ascii_uppercase()
1006 .as_str()
1007 },
1008 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t),
1009 )
1010 }
1011 check::<2>();
1012 check::<3>();
1013 check::<4>();
1014 check::<5>();
1015 check::<8>();
1016 check::<16>();
1017 }
1018
1019 #[test]
1020 fn test_is_ascii_alphabetic_lowercase() {
1021 fn check<const N: usize>() {
1022 check_operation(
1023 |s| {
1024 // Check alphabetic
1025 s.chars().all(|c| c.is_ascii_alphabetic()) &&
1026 // Check lowercase
1027 s == TinyAsciiStr::<16>::try_from_str(s)
1028 .unwrap()
1029 .to_ascii_lowercase()
1030 .as_str()
1031 },
1032 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t),
1033 )
1034 }
1035 check::<2>();
1036 check::<3>();
1037 check::<4>();
1038 check::<5>();
1039 check::<8>();
1040 check::<16>();
1041 }
1042
1043 #[test]
1044 fn test_is_ascii_alphabetic_titlecase() {
1045 fn check<const N: usize>() {
1046 check_operation(
1047 |s| {
1048 // Check alphabetic
1049 s.chars().all(|c| c.is_ascii_alphabetic()) &&
1050 // Check titlecase
1051 s == TinyAsciiStr::<16>::try_from_str(s)
1052 .unwrap()
1053 .to_ascii_titlecase()
1054 .as_str()
1055 },
1056 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t),
1057 )
1058 }
1059 check::<2>();
1060 check::<3>();
1061 check::<4>();
1062 check::<5>();
1063 check::<8>();
1064 check::<16>();
1065 }
1066
1067 #[test]
1068 fn test_is_ascii_alphabetic_uppercase() {
1069 fn check<const N: usize>() {
1070 check_operation(
1071 |s| {
1072 // Check alphabetic
1073 s.chars().all(|c| c.is_ascii_alphabetic()) &&
1074 // Check uppercase
1075 s == TinyAsciiStr::<16>::try_from_str(s)
1076 .unwrap()
1077 .to_ascii_uppercase()
1078 .as_str()
1079 },
1080 |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t),
1081 )
1082 }
1083 check::<2>();
1084 check::<3>();
1085 check::<4>();
1086 check::<5>();
1087 check::<8>();
1088 check::<16>();
1089 }
1090
1091 #[test]
1092 fn test_to_ascii_lowercase() {
1093 fn check<const N: usize>() {
1094 check_operation(
1095 |s| {
1096 s.chars()
1097 .map(|c| c.to_ascii_lowercase())
1098 .collect::<String>()
1099 },
1100 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(),
1101 )
1102 }
1103 check::<2>();
1104 check::<3>();
1105 check::<4>();
1106 check::<5>();
1107 check::<8>();
1108 check::<16>();
1109 }
1110
1111 #[test]
1112 fn test_to_ascii_titlecase() {
1113 fn check<const N: usize>() {
1114 check_operation(
1115 |s| {
1116 let mut r = s
1117 .chars()
1118 .map(|c| c.to_ascii_lowercase())
1119 .collect::<String>();
1120 // Safe because the string is nonempty and an ASCII string
1121 unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() };
1122 r
1123 },
1124 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(),
1125 )
1126 }
1127 check::<2>();
1128 check::<3>();
1129 check::<4>();
1130 check::<5>();
1131 check::<8>();
1132 check::<16>();
1133 }
1134
1135 #[test]
1136 fn test_to_ascii_uppercase() {
1137 fn check<const N: usize>() {
1138 check_operation(
1139 |s| {
1140 s.chars()
1141 .map(|c| c.to_ascii_uppercase())
1142 .collect::<String>()
1143 },
1144 |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(),
1145 )
1146 }
1147 check::<2>();
1148 check::<3>();
1149 check::<4>();
1150 check::<5>();
1151 check::<8>();
1152 check::<16>();
1153 }
1154
1155 #[test]
1156 fn lossy_constructor() {
1157 assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), "");
1158 assert_eq!(
1159 TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(),
1160 "oh?o"
1161 );
1162 assert_eq!(
1163 TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(),
1164 "?"
1165 );
1166 assert_eq!(
1167 TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(),
1168 "tool"
1169 );
1170 assert_eq!(
1171 TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(),
1172 "a??1"
1173 );
1174 }
1175}