uuid/
parser.rs

1// Copyright 2013-2014 The Rust Project Developers.
2// Copyright 2018 The Uuid Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12//! [`Uuid`] parsing constructs and utilities.
13//!
14//! [`Uuid`]: ../struct.Uuid.html
15
16use crate::{
17    error::*,
18    std::{convert::TryFrom, str},
19    Uuid,
20};
21
22#[cfg(feature = "std")]
23use crate::std::string::String;
24
25impl str::FromStr for Uuid {
26    type Err = Error;
27
28    fn from_str(uuid_str: &str) -> Result<Self, Self::Err> {
29        Uuid::parse_str(uuid_str)
30    }
31}
32
33impl TryFrom<&'_ str> for Uuid {
34    type Error = Error;
35
36    fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> {
37        Uuid::parse_str(uuid_str)
38    }
39}
40
41#[cfg(feature = "std")]
42impl TryFrom<String> for Uuid {
43    type Error = Error;
44
45    fn try_from(uuid_str: String) -> Result<Self, Self::Error> {
46        Uuid::try_from(uuid_str.as_ref())
47    }
48}
49
50impl Uuid {
51    /// Parses a `Uuid` from a string of hexadecimal digits with optional
52    /// hyphens.
53    ///
54    /// Any of the formats generated by this module (simple, hyphenated, urn,
55    /// Microsoft GUID) are supported by this parsing function.
56    ///
57    /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
58    /// This method will be eventually deprecated in favor of `try_parse`.
59    ///
60    /// # Examples
61    ///
62    /// Parse a hyphenated UUID:
63    ///
64    /// ```
65    /// # use uuid::{Uuid, Version, Variant};
66    /// # fn main() -> Result<(), uuid::Error> {
67    /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?;
68    ///
69    /// assert_eq!(Some(Version::Random), uuid.get_version());
70    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
71    /// # Ok(())
72    /// # }
73    /// ```
74    ///
75    /// [`try_parse`]: #method.try_parse
76    pub fn parse_str(input: &str) -> Result<Uuid, Error> {
77        try_parse(input.as_bytes())
78            .map(Uuid::from_bytes)
79            .map_err(InvalidUuid::into_err)
80    }
81
82    /// Parses a `Uuid` from a string of hexadecimal digits with optional
83    /// hyphens.
84    ///
85    /// This function is similar to [`parse_str`], in fact `parse_str` shares
86    /// the same underlying parser. The difference is that if `try_parse`
87    /// fails, it won't generate very useful error messages. The `parse_str`
88    /// function will eventually be deprecated in favor of `try_parse`.
89    ///
90    /// To parse a UUID from a byte stream instead of a UTF8 string, see
91    /// [`try_parse_ascii`].
92    ///
93    /// # Examples
94    ///
95    /// Parse a hyphenated UUID:
96    ///
97    /// ```
98    /// # use uuid::{Uuid, Version, Variant};
99    /// # fn main() -> Result<(), uuid::Error> {
100    /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000")?;
101    ///
102    /// assert_eq!(Some(Version::Random), uuid.get_version());
103    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
104    /// # Ok(())
105    /// # }
106    /// ```
107    ///
108    /// [`parse_str`]: #method.parse_str
109    /// [`try_parse_ascii`]: #method.try_parse_ascii
110    pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
111        Self::try_parse_ascii(input.as_bytes())
112    }
113
114    /// Parses a `Uuid` from a string of hexadecimal digits with optional
115    /// hyphens.
116    ///
117    /// The input is expected to be a string of ASCII characters. This method
118    /// can be more convenient than [`try_parse`] if the UUID is being
119    /// parsed from a byte stream instead of from a UTF8 string.
120    ///
121    /// # Examples
122    ///
123    /// Parse a hyphenated UUID:
124    ///
125    /// ```
126    /// # use uuid::{Uuid, Version, Variant};
127    /// # fn main() -> Result<(), uuid::Error> {
128    /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
129    ///
130    /// assert_eq!(Some(Version::Random), uuid.get_version());
131    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
132    /// # Ok(())
133    /// # }
134    /// ```
135    ///
136    /// [`try_parse`]: #method.try_parse
137    pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
138        match try_parse(input) {
139            Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
140            // If parsing fails then we don't know exactly what went wrong
141            // In this case, we just return a generic error
142            Err(_) => Err(Error(ErrorKind::Other)),
143        }
144    }
145}
146
147const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
148    match (input.len(), input) {
149        // Inputs of 32 bytes must be a non-hyphenated UUID
150        (32, s) => parse_simple(s),
151        // Hyphenated UUIDs may be wrapped in various ways:
152        // - `{UUID}` for braced UUIDs
153        // - `urn:uuid:UUID` for URNs
154        // - `UUID` for a regular hyphenated UUID
155        (36, s)
156        | (38, [b'{', s @ .., b'}'])
157        | (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) => {
158            parse_hyphenated(s)
159        }
160        // Any other shaped input is immediately invalid
161        _ => Err(InvalidUuid(input)),
162    }
163}
164
165#[inline]
166#[allow(dead_code)]
167pub(crate) const fn parse_braced(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
168    if let (38, [b'{', s @ .., b'}']) = (input.len(), input) {
169        parse_hyphenated(s)
170    } else {
171        Err(InvalidUuid(input))
172    }
173}
174
175#[inline]
176#[allow(dead_code)]
177pub(crate) const fn parse_urn(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
178    if let (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) =
179        (input.len(), input)
180    {
181        parse_hyphenated(s)
182    } else {
183        Err(InvalidUuid(input))
184    }
185}
186
187#[inline]
188pub(crate) const fn parse_simple(s: &[u8]) -> Result<[u8; 16], InvalidUuid> {
189    // This length check here removes all other bounds
190    // checks in this function
191    if s.len() != 32 {
192        return Err(InvalidUuid(s));
193    }
194
195    let mut buf: [u8; 16] = [0; 16];
196    let mut i = 0;
197
198    while i < 16 {
199        // Convert a two-char hex value (like `A8`)
200        // into a byte (like `10101000`)
201        let h1 = HEX_TABLE[s[i * 2] as usize];
202        let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
203
204        // We use `0xff` as a sentinel value to indicate
205        // an invalid hex character sequence (like the letter `G`)
206        if h1 | h2 == 0xff {
207            return Err(InvalidUuid(s));
208        }
209
210        // The upper nibble needs to be shifted into position
211        // to produce the final byte value
212        buf[i] = SHL4_TABLE[h1 as usize] | h2;
213        i += 1;
214    }
215
216    Ok(buf)
217}
218
219#[inline]
220pub(crate) const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], InvalidUuid> {
221    // This length check here removes all other bounds
222    // checks in this function
223    if s.len() != 36 {
224        return Err(InvalidUuid(s));
225    }
226
227    // We look at two hex-encoded values (4 chars) at a time because
228    // that's the size of the smallest group in a hyphenated UUID.
229    // The indexes we're interested in are:
230    //
231    // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
232    //            |   |   ||   ||   ||   ||   |   |
233    // hyphens  : |   |   8|  13|  18|  23|   |   |
234    // positions: 0   4    9   14   19   24  28  32
235
236    // First, ensure the hyphens appear in the right places
237    match [s[8], s[13], s[18], s[23]] {
238        [b'-', b'-', b'-', b'-'] => {}
239        _ => return Err(InvalidUuid(s)),
240    }
241
242    let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
243    let mut buf: [u8; 16] = [0; 16];
244    let mut j = 0;
245
246    while j < 8 {
247        let i = positions[j];
248
249        // The decoding here is the same as the simple case
250        // We're just dealing with two values instead of one
251        let h1 = HEX_TABLE[s[i as usize] as usize];
252        let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
253        let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
254        let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
255
256        if h1 | h2 | h3 | h4 == 0xff {
257            return Err(InvalidUuid(s));
258        }
259
260        buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
261        buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
262        j += 1;
263    }
264
265    Ok(buf)
266}
267
268const HEX_TABLE: &[u8; 256] = &{
269    let mut buf = [0; 256];
270    let mut i: u8 = 0;
271
272    loop {
273        buf[i as usize] = match i {
274            b'0'..=b'9' => i - b'0',
275            b'a'..=b'f' => i - b'a' + 10,
276            b'A'..=b'F' => i - b'A' + 10,
277            _ => 0xff,
278        };
279
280        if i == 255 {
281            break buf;
282        }
283
284        i += 1
285    }
286};
287
288const SHL4_TABLE: &[u8; 256] = &{
289    let mut buf = [0; 256];
290    let mut i: u8 = 0;
291
292    loop {
293        buf[i as usize] = i.wrapping_shl(4);
294
295        if i == 255 {
296            break buf;
297        }
298
299        i += 1;
300    }
301};
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use crate::{std::string::ToString, tests::new};
307
308    #[test]
309    fn test_parse_uuid_v4_valid() {
310        let from_hyphenated = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
311        let from_simple = Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").unwrap();
312        let from_urn = Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
313        let from_guid = Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}").unwrap();
314
315        assert_eq!(from_hyphenated, from_simple);
316        assert_eq!(from_hyphenated, from_urn);
317        assert_eq!(from_hyphenated, from_guid);
318
319        assert!(Uuid::parse_str("00000000000000000000000000000000").is_ok());
320        assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
321        assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4").is_ok());
322        assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").is_ok());
323        assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546").is_ok());
324        assert!(Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
325        assert!(Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}").is_ok());
326
327        // Nil
328        let nil = Uuid::nil();
329        assert_eq!(
330            Uuid::parse_str("00000000000000000000000000000000").unwrap(),
331            nil
332        );
333        assert_eq!(
334            Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
335            nil
336        );
337    }
338
339    #[test]
340    fn test_parse_uuid_v4_invalid() {
341        // Invalid
342        assert_eq!(
343            Uuid::parse_str(""),
344            Err(Error(ErrorKind::SimpleLength { len: 0 }))
345        );
346
347        assert_eq!(
348            Uuid::parse_str("!"),
349            Err(Error(ErrorKind::Char {
350                character: '!',
351                index: 1,
352            }))
353        );
354
355        assert_eq!(
356            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45"),
357            Err(Error(ErrorKind::GroupLength {
358                group: 4,
359                len: 13,
360                index: 25,
361            }))
362        );
363
364        assert_eq!(
365            Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4"),
366            Err(Error(ErrorKind::GroupLength {
367                group: 3,
368                len: 3,
369                index: 20,
370            }))
371        );
372
373        assert_eq!(
374            Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4"),
375            Err(Error(ErrorKind::Char {
376                character: 'G',
377                index: 21,
378            }))
379        );
380
381        assert_eq!(
382            Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4"),
383            Err(Error(ErrorKind::GroupCount { count: 2 }))
384        );
385
386        assert_eq!(
387            Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4"),
388            Err(Error(ErrorKind::GroupCount { count: 3 }))
389        );
390
391        assert_eq!(
392            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4"),
393            Err(Error(ErrorKind::GroupCount { count: 4 }))
394        );
395
396        assert_eq!(
397            Uuid::parse_str("F9168C5E-CEB2-4faa"),
398            Err(Error(ErrorKind::GroupCount { count: 3 }))
399        );
400
401        assert_eq!(
402            Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4"),
403            Err(Error(ErrorKind::Char {
404                character: 'X',
405                index: 19,
406            }))
407        );
408
409        assert_eq!(
410            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41"),
411            Err(Error(ErrorKind::Char {
412                character: '{',
413                index: 1,
414            }))
415        );
416
417        assert_eq!(
418            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}"),
419            Err(Error(ErrorKind::GroupCount { count: 3 }))
420        );
421
422        assert_eq!(
423            Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4"),
424            Err(Error(ErrorKind::GroupLength {
425                group: 1,
426                len: 3,
427                index: 10,
428            }))
429        );
430
431        // // (group, found, expecting)
432        // //
433        assert_eq!(
434            Uuid::parse_str("01020304-1112-2122-3132-41424344"),
435            Err(Error(ErrorKind::GroupLength {
436                group: 4,
437                len: 8,
438                index: 25,
439            }))
440        );
441
442        assert_eq!(
443            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
444            Err(Error(ErrorKind::SimpleLength { len: 31 }))
445        );
446
447        assert_eq!(
448            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88"),
449            Err(Error(ErrorKind::SimpleLength { len: 33 }))
450        );
451
452        assert_eq!(
453            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8"),
454            Err(Error(ErrorKind::Char {
455                character: 'g',
456                index: 32,
457            }))
458        );
459
460        assert_eq!(
461            Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8"),
462            Err(Error(ErrorKind::Char {
463                character: '%',
464                index: 16,
465            }))
466        );
467
468        assert_eq!(
469            Uuid::parse_str("231231212212423424324323477343246663"),
470            Err(Error(ErrorKind::SimpleLength { len: 36 }))
471        );
472
473        assert_eq!(
474            Uuid::parse_str("{00000000000000000000000000000000}"),
475            Err(Error(ErrorKind::GroupCount { count: 1 }))
476        );
477
478        assert_eq!(
479            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
480            Err(Error(ErrorKind::SimpleLength { len: 31 }))
481        );
482
483        assert_eq!(
484            Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd"),
485            Err(Error(ErrorKind::Char {
486                character: 'X',
487                index: 7,
488            }))
489        );
490
491        assert_eq!(
492            Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c"),
493            Err(Error(ErrorKind::GroupCount { count: 2 }))
494        );
495
496        assert_eq!(
497            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4"),
498            Err(Error(ErrorKind::GroupLength {
499                group: 3,
500                len: 5,
501                index: 20,
502            }))
503        );
504
505        assert_eq!(
506            Uuid::parse_str("\u{bcf3c}"),
507            Err(Error(ErrorKind::Char {
508                character: '\u{bcf3c}',
509                index: 1
510            }))
511        );
512    }
513
514    #[test]
515    fn test_roundtrip_default() {
516        let uuid_orig = new();
517        let orig_str = uuid_orig.to_string();
518        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
519        assert_eq!(uuid_orig, uuid_out);
520    }
521
522    #[test]
523    fn test_roundtrip_hyphenated() {
524        let uuid_orig = new();
525        let orig_str = uuid_orig.hyphenated().to_string();
526        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
527        assert_eq!(uuid_orig, uuid_out);
528    }
529
530    #[test]
531    fn test_roundtrip_simple() {
532        let uuid_orig = new();
533        let orig_str = uuid_orig.simple().to_string();
534        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
535        assert_eq!(uuid_orig, uuid_out);
536    }
537
538    #[test]
539    fn test_roundtrip_urn() {
540        let uuid_orig = new();
541        let orig_str = uuid_orig.urn().to_string();
542        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
543        assert_eq!(uuid_orig, uuid_out);
544    }
545
546    #[test]
547    fn test_roundtrip_braced() {
548        let uuid_orig = new();
549        let orig_str = uuid_orig.braced().to_string();
550        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
551        assert_eq!(uuid_orig, uuid_out);
552    }
553
554    #[test]
555    fn test_roundtrip_parse_urn() {
556        let uuid_orig = new();
557        let orig_str = uuid_orig.urn().to_string();
558        let uuid_out = Uuid::from_bytes(parse_urn(orig_str.as_bytes()).unwrap());
559        assert_eq!(uuid_orig, uuid_out);
560    }
561
562    #[test]
563    fn test_roundtrip_parse_braced() {
564        let uuid_orig = new();
565        let orig_str = uuid_orig.braced().to_string();
566        let uuid_out = Uuid::from_bytes(parse_braced(orig_str.as_bytes()).unwrap());
567        assert_eq!(uuid_orig, uuid_out);
568    }
569
570    #[test]
571    fn test_try_parse_ascii_non_utf8() {
572        assert!(Uuid::try_parse_ascii(b"67e55044-10b1-426f-9247-bb680e5\0e0c8").is_err());
573    }
574}