litrs/cstr/
mod.rs

1use std::{
2    ffi::{CStr, CString},
3    fmt,
4    ops::Range,
5};
6
7use crate::{
8    err::{perr, ParseErrorKind::*},
9    escape::{scan_raw_string, unescape_string},
10    Buffer, ParseError,
11};
12
13
14/// A C string or raw C string literal, e.g. `c"hello"` or `cr#"abc"def"#`.
15///
16/// See [the reference][ref] for more information.
17///
18/// [ref]: https://doc.rust-lang.org/reference/tokens.html#c-string-and-raw-c-string-literals
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct CStringLit<B: Buffer> {
21    /// The raw input.
22    raw: B,
23
24    /// The string value (with all escaped unescaped) as CString. This is not an
25    /// `Option` as we always have to add the trailing zero byte.
26    value: CString,
27
28    /// The number of hash signs in case of a raw string literal, or `None` if
29    /// it's not a raw string literal.
30    num_hashes: Option<u8>,
31
32    /// Start index of the suffix or `raw.len()` if there is no suffix.
33    start_suffix: usize,
34}
35
36impl<B: Buffer> CStringLit<B> {
37    /// Parses the input as a (raw) byte string literal. Returns an error if the
38    /// input is invalid or represents a different kind of literal.
39    pub fn parse(input: B) -> Result<Self, ParseError> {
40        if input.is_empty() {
41            return Err(perr(None, Empty));
42        }
43        if !input.starts_with(r#"c""#) && !input.starts_with("cr") {
44            return Err(perr(None, InvalidCStringLiteralStart));
45        }
46
47        let (value, num_hashes, start_suffix) = parse_impl(&input)?;
48        Ok(Self { raw: input, value, num_hashes, start_suffix })
49    }
50
51    /// Returns the string value this literal represents (where all escapes have
52    /// been turned into their respective values).
53    pub fn value(&self) -> &CStr {
54        &self.value
55    }
56
57    /// Like `value` but returns an owned version of the value.
58    pub fn into_value(self) -> CString {
59        self.value
60    }
61
62    /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
63    pub fn suffix(&self) -> &str {
64        &(*self.raw)[self.start_suffix..]
65    }
66
67    /// Returns whether this literal is a raw string literal (starting with
68    /// `cr`).
69    pub fn is_raw_c_string(&self) -> bool {
70        self.num_hashes.is_some()
71    }
72
73    /// Returns the raw input that was passed to `parse`.
74    pub fn raw_input(&self) -> &str {
75        &self.raw
76    }
77
78    /// Returns the raw input that was passed to `parse`, potentially owned.
79    pub fn into_raw_input(self) -> B {
80        self.raw
81    }
82}
83
84/// The range within `self.raw` that excludes the quotes and potential `r#`.
85fn inner_range(num_hashes: Option<u8>, start_suffix: usize) -> Range<usize> {
86    match num_hashes {
87        None => 2..start_suffix - 1,
88        Some(n) => 2 + n as usize + 1..start_suffix - n as usize - 1,
89    }
90}
91
92impl CStringLit<&str> {
93    /// Makes a copy of the underlying buffer and returns the owned version of
94    /// `Self`.
95    pub fn into_owned(self) -> CStringLit<String> {
96        CStringLit {
97            raw: self.raw.to_owned(),
98            value: self.value,
99            num_hashes: self.num_hashes,
100            start_suffix: self.start_suffix,
101        }
102    }
103}
104
105impl<B: Buffer> fmt::Display for CStringLit<B> {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        f.pad(&self.raw)
108    }
109}
110
111
112/// Precondition: input has to start with either `b"` or `br`.
113#[inline(never)]
114fn parse_impl(input: &str) -> Result<(CString, Option<u8>, usize), ParseError> {
115    let (vec, num_hashes, start_suffix) = if input.starts_with("cr") {
116        scan_raw_string(input, 2, true, false)
117            .map(|(num, start_suffix)| (None, Some(num), start_suffix))?
118    } else {
119        unescape_string::<Vec<u8>>(input, 2, true, true, false)
120            .map(|(v, start_suffix)| (v, None, start_suffix))?
121    };
122
123
124    let inner_range = inner_range(num_hashes, start_suffix);
125    let vec = vec.unwrap_or_else(|| input[inner_range].as_bytes().to_vec());
126    let value = CString::new(vec).unwrap(); // we already checked for nul bytes
127
128    Ok((value, num_hashes, start_suffix))
129}
130
131
132#[cfg(test)]
133mod tests;