litrs/
lib.rs

1//! Parsing and inspecting Rust literal tokens.
2//!
3//! This library offers functionality to parse Rust literals, i.e. tokens in the
4//! Rust programming language that represent fixed values. The grammar for
5//! those is defined [here][ref].
6//!
7//! This kind of functionality already exists in the crate `syn`. However, as
8//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was
9//! built. This crate also offers a bit more flexibility compared to `syn`
10//! (only regarding literals, of course).
11//!
12//!
13//! # Quick start
14//!
15//! | **`StringLit::try_from(tt)?.value()`** |
16//! | - |
17//!
18//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be
19//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]).
20//! Calling `value()` returns the value that is represented by the literal.
21//!
22//! **Mini Example**
23//!
24//! ```ignore
25//! use proc_macro::TokenStream;
26//!
27//! #[proc_macro]
28//! pub fn foo(input: TokenStream) -> TokenStream {
29//!      let first_token = input.into_iter().next().unwrap(); // Do proper error handling!
30//!      let string_value = match litrs::StringLit::try_from(first_token) {
31//!          Ok(string_lit) => string_lit.value(),
32//!          Err(e) => return e.to_compile_error(),
33//!      };
34//!
35//!      // `string_value` is the string value with all escapes resolved.
36//!      todo!()
37//! }
38//! ```
39//!
40//! # Overview
41//!
42//! The main types of this library are [`Literal`], representing any kind of
43//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a
44//! specific kind of literal.
45//!
46//! There are different ways to obtain such a literal type:
47//!
48//! - **`parse`**: parses a `&str` or `String` and returns `Result<_,
49//!     ParseError>`. For example: [`Literal::parse`] and
50//!     [`IntegerLit::parse`].
51//!
52//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from
53//!     the `proc_macro` crate into a `Literal` from this crate.
54//!
55//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a
56//!     `proc_macro::Literal` into a specific literal type of this crate. If
57//!     the input is a literal of a different kind, `Err(InvalidToken)` is
58//!     returned.
59//!
60//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a
61//!     literal type of this crate. An error is returned if the token tree is
62//!     not a literal, or if you are trying to turn it into a specific kind of
63//!     literal and the token tree is a different kind of literal.
64//!
65//! All of the `From` and `TryFrom` conversions also work for reference to
66//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is
67//! enabled, all these `From` and `TryFrom` impls also exist for the
68//! corresponding `proc_macro2` types.
69//!
70//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro.
71//! The `TryFrom<TokenTree>` impls check for those two special idents and
72//! return a [`BoolLit`] appropriately. For that reason, there is also no
73//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal`
74//! simply cannot represent bool literals.
75//!
76//!
77//! # Examples
78//!
79//! In a proc-macro:
80//!
81//! ```ignore
82//! use std::convert::TryFrom;
83//! use proc_macro::TokenStream;
84//! use litrs::FloatLit;
85//!
86//! #[proc_macro]
87//! pub fn foo(input: TokenStream) -> TokenStream {
88//!      let mut input = input.into_iter().collect::<Vec<_>>();
89//!      if input.len() != 1 {
90//!          // Please do proper error handling in your real code!
91//!          panic!("expected exactly one token as input");
92//!      }
93//!      let token = input.remove(0);
94//!
95//!      match FloatLit::try_from(token) {
96//!          Ok(float_lit) => { /* do something */ }
97//!          Err(e) => return e.to_compile_error(),
98//!      }
99//!
100//!      // Dummy output
101//!      TokenStream::new()
102//! }
103//! ```
104//!
105//! Parsing from string:
106//!
107//! ```
108//! use litrs::{FloatLit, Literal};
109//!
110//! // Parse a specific kind of literal (float in this case):
111//! let float_lit = FloatLit::parse("3.14f32");
112//! assert!(float_lit.is_ok());
113//! assert_eq!(float_lit.unwrap().suffix(), "f32");
114//! assert!(FloatLit::parse("'c'").is_err());
115//!
116//! // Parse any kind of literal. After parsing, you can inspect the literal
117//! // and decide what to do in each case.
118//! let lit = Literal::parse("0xff80").expect("failed to parse literal");
119//! match lit {
120//!     Literal::Integer(lit) => { /* ... */ }
121//!     Literal::Float(lit) => { /* ... */ }
122//!     Literal::Bool(lit) => { /* ... */ }
123//!     Literal::Char(lit) => { /* ... */ }
124//!     Literal::String(lit) => { /* ... */ }
125//!     Literal::Byte(lit) => { /* ... */ }
126//!     Literal::ByteString(lit) => { /* ... */ }
127//!     Literal::CString(lit) => { /* ... */ }
128//!     _ => { /* ... */ }
129//! }
130//! ```
131//!
132//! # SemVer/Versioning guarantees
133//!
134//! Some technically breaking changes might be released as a minor/patch version
135//! in some situations, for example:
136//! - Bugs in this library (e.g. behavior different from rustc)
137//! - Rust making breaking changes, likely via new edition
138//!
139//! In all cases, releasing these changes as a minor/patch version is only done
140//! if it is expected that breakage is minimal or non-existent.
141//!
142//!
143//! # Crate features
144//!
145//! - `proc-macro2`: adds the dependency `proc_macro2`, a bunch of `From` and
146//!   `TryFrom` impls, and [`InvalidToken::to_compile_error2`].
147//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the
148//!   literal suffix is valid. Adds the dependency `unicode-xid`. If disabled,
149//!   only an approximate check (only in ASCII range) is done. If you are
150//!   writing a proc macro, you don't need to enable this as the suffix is
151//!   already checked by the compiler.
152//!
153//!
154//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals
155//!
156
157#![deny(missing_debug_implementations)]
158
159extern crate proc_macro;
160
161#[cfg(test)]
162#[macro_use]
163mod test_util;
164
165#[cfg(test)]
166mod tests;
167
168mod bool;
169mod byte;
170mod bytestr;
171mod char;
172mod cstr;
173mod err;
174mod escape;
175mod float;
176mod impls;
177mod integer;
178mod parse;
179mod string;
180
181
182use std::{
183    borrow::{Borrow, Cow},
184    fmt,
185    ops::{Deref, Range},
186};
187
188pub use self::{
189    bool::BoolLit,
190    byte::ByteLit,
191    bytestr::ByteStringLit,
192    char::CharLit,
193    cstr::CStringLit,
194    err::{InvalidToken, ParseError},
195    float::{FloatLit, FloatType},
196    integer::{FromIntegerLiteral, IntegerBase, IntegerLit, IntegerType},
197    string::StringLit,
198};
199
200
201// ==============================================================================================
202// ===== `Literal` and type defs
203// ==============================================================================================
204
205/// A literal. This is the main type of this library.
206///
207/// This type is generic over the underlying buffer `B`, which can be `&str` or
208/// `String`.
209///
210/// To create this type, you have to either call [`Literal::parse`] with an
211/// input string or use the `From<_>` impls of this type. The impls are only
212/// available of the corresponding crate features are enabled (they are enabled
213/// by default).
214#[derive(Debug, Clone, PartialEq, Eq)]
215#[non_exhaustive]
216pub enum Literal<B: Buffer> {
217    Bool(BoolLit),
218    Integer(IntegerLit<B>),
219    Float(FloatLit<B>),
220    Char(CharLit<B>),
221    String(StringLit<B>),
222    Byte(ByteLit<B>),
223    ByteString(ByteStringLit<B>),
224    CString(CStringLit<B>),
225}
226
227impl<B: Buffer> Literal<B> {
228    /// Parses the given input as a Rust literal.
229    pub fn parse(input: B) -> Result<Self, ParseError> {
230        parse::parse(input)
231    }
232
233    /// Returns the suffix of this literal or `""` if it doesn't have one.
234    ///
235    /// Rust token grammar actually allows suffixes for all kinds of tokens.
236    /// Most Rust programmer only know the type suffixes for integer and
237    /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an
238    /// error. But it is possible to pass literals with arbitrary suffixes to
239    /// proc macros, for example:
240    ///
241    /// ```ignore
242    /// some_macro!(3.14f33  16px  '🦊'good_boy  "toph"beifong);
243    /// ```
244    ///
245    /// Boolean literals, not actually being literals, but idents, cannot have
246    /// suffixes and this method always returns `""` for those.
247    ///
248    /// There are some edge cases to be aware of:
249    /// - Integer suffixes must not start with `e` or `E` as that conflicts with
250    ///   the exponent grammar for floats. `0e1` is a float; `0eel` is also
251    ///   parsed as a float and results in an error.
252    /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a
253    ///   suffix von `gh`.
254    /// - Suffixes can contain and start with `_`, but for integer and number
255    ///   literals, `_` is eagerly parsed as part of the number, so `1_x` has
256    ///   the suffix `x`.
257    /// - The input `55f32` is regarded as integer literal with suffix `f32`.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// use litrs::Literal;
263    ///
264    /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33");
265    /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman");
266    /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck");
267    /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy");
268    /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong");
269    /// ```
270    pub fn suffix(&self) -> &str {
271        match self {
272            Literal::Bool(_) => "",
273            Literal::Integer(l) => l.suffix(),
274            Literal::Float(l) => l.suffix(),
275            Literal::Char(l) => l.suffix(),
276            Literal::String(l) => l.suffix(),
277            Literal::Byte(l) => l.suffix(),
278            Literal::ByteString(l) => l.suffix(),
279            Literal::CString(l) => l.suffix(),
280        }
281    }
282
283    /// Returns the raw input that was passed to `parse`.
284    ///
285    /// This can be used to compare literals with different `Buffer` types.
286    /// Note: this does not necessarily point to the same string buffer, in
287    /// particular, bool literals just return a `&'static str`.
288    pub fn raw_input(&self) -> &str {
289        match self {
290            Literal::Bool(l) => l.as_str(),
291            Literal::Integer(l) => l.raw_input(),
292            Literal::Float(l) => l.raw_input(),
293            Literal::Char(l) => l.raw_input(),
294            Literal::String(l) => l.raw_input(),
295            Literal::Byte(l) => l.raw_input(),
296            Literal::ByteString(l) => l.raw_input(),
297            Literal::CString(l) => l.raw_input(),
298        }
299    }
300}
301
302impl Literal<&str> {
303    /// Makes a copy of the underlying buffer and returns the owned version of
304    /// `Self`.
305    pub fn into_owned(self) -> Literal<String> {
306        match self {
307            Literal::Bool(l) => Literal::Bool(l.to_owned()),
308            Literal::Integer(l) => Literal::Integer(l.to_owned()),
309            Literal::Float(l) => Literal::Float(l.to_owned()),
310            Literal::Char(l) => Literal::Char(l.to_owned()),
311            Literal::String(l) => Literal::String(l.into_owned()),
312            Literal::Byte(l) => Literal::Byte(l.to_owned()),
313            Literal::ByteString(l) => Literal::ByteString(l.into_owned()),
314            Literal::CString(l) => Literal::CString(l.into_owned()),
315        }
316    }
317}
318
319impl<B: Buffer> fmt::Display for Literal<B> {
320    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321        match self {
322            Literal::Bool(l) => l.fmt(f),
323            Literal::Integer(l) => l.fmt(f),
324            Literal::Float(l) => l.fmt(f),
325            Literal::Char(l) => l.fmt(f),
326            Literal::String(l) => l.fmt(f),
327            Literal::Byte(l) => l.fmt(f),
328            Literal::ByteString(l) => l.fmt(f),
329            Literal::CString(l) => l.fmt(f),
330        }
331    }
332}
333
334
335// ==============================================================================================
336// ===== Buffer
337// ==============================================================================================
338
339/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*.
340///
341/// This is trait is implementation detail of this library, cannot be
342/// implemented in other crates and is not subject to semantic versioning.
343/// `litrs` only guarantees that this trait is implemented for `String` and
344/// `for<'a> &'a str`.
345pub trait Buffer: sealed::Sealed + Deref<Target = str> {
346    /// This is `String` for `String`, and `Cow<'a, str>` for `&'a str`.
347    type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;
348
349    #[doc(hidden)]
350    fn into_cow(self) -> Self::Cow;
351
352    /// This is `Vec<u8>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.
353    type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;
354
355    #[doc(hidden)]
356    fn into_byte_cow(self) -> Self::ByteCow;
357
358    /// Cuts away some characters at the beginning and some at the end. Given
359    /// range has to be in bounds.
360    #[doc(hidden)]
361    fn cut(self, range: Range<usize>) -> Self;
362}
363
364mod sealed {
365    pub trait Sealed {}
366}
367
368impl sealed::Sealed for &'_ str {}
369impl<'a> Buffer for &'a str {
370    #[doc(hidden)]
371    fn cut(self, range: Range<usize>) -> Self {
372        &self[range]
373    }
374
375    type Cow = Cow<'a, str>;
376    #[doc(hidden)]
377    fn into_cow(self) -> Self::Cow {
378        self.into()
379    }
380    type ByteCow = Cow<'a, [u8]>;
381    #[doc(hidden)]
382    fn into_byte_cow(self) -> Self::ByteCow {
383        self.as_bytes().into()
384    }
385}
386
387impl sealed::Sealed for String {}
388impl Buffer for String {
389    #[doc(hidden)]
390    fn cut(mut self, range: Range<usize>) -> Self {
391        // This is not the most efficient way, but it works. First we cut the
392        // end, then the beginning. Note that `drain` also removes the range if
393        // the iterator is not consumed.
394        self.truncate(range.end);
395        self.drain(..range.start);
396        self
397    }
398
399    type Cow = String;
400    #[doc(hidden)]
401    fn into_cow(self) -> Self::Cow {
402        self
403    }
404
405    type ByteCow = Vec<u8>;
406    #[doc(hidden)]
407    fn into_byte_cow(self) -> Self::ByteCow {
408        self.into_bytes()
409    }
410}