1use crate::{
2 err::{perr, ParseErrorKind::*},
3 parse::{check_suffix, hex_digit_value},
4 ParseError,
5};
6
7
8pub(crate) fn unescape(
22 input: &str,
23 unicode: bool,
24 byte_escapes: bool,
25 allow_nul: bool,
26) -> Result<(Unescape, usize), ParseError> {
27 let first = input.as_bytes().get(1).ok_or(perr(0, UnterminatedEscape))?;
28 let out = match first {
29 b'\'' => (Unescape::Byte(b'\''), 2),
31 b'"' => (Unescape::Byte(b'"'), 2),
32
33 b'n' => (Unescape::Byte(b'\n'), 2),
35 b'r' => (Unescape::Byte(b'\r'), 2),
36 b't' => (Unescape::Byte(b'\t'), 2),
37 b'\\' => (Unescape::Byte(b'\\'), 2),
38 b'0' => if allow_nul {
39 (Unescape::Byte(b'\0'), 2)
40 } else {
41 return Err(perr(0..2, DisallowedNulEscape))
42 },
43 b'x' => {
44 let hex_string = input.get(2..4)
45 .ok_or(perr(0..input.len(), UnterminatedEscape))?
46 .as_bytes();
47 let first = hex_digit_value(hex_string[0]).ok_or(perr(0..4, InvalidXEscape))?;
48 let second = hex_digit_value(hex_string[1]).ok_or(perr(0..4, InvalidXEscape))?;
49 let value = second + 16 * first;
50
51 if !byte_escapes && value > 0x7F {
52 return Err(perr(0..4, NonAsciiXEscape));
53 }
54
55 if !allow_nul && value == 0 {
56 return Err(perr(0..4, DisallowedNulEscape));
57 }
58
59 (Unescape::Byte(value), 4)
60 }
61
62 b'u' => {
64 if !unicode {
65 return Err(perr(0..2, UnicodeEscapeInByteLiteral));
66 }
67
68 if input.as_bytes().get(2) != Some(&b'{') {
69 return Err(perr(0..2, UnicodeEscapeWithoutBrace));
70 }
71
72 let closing_pos = input.bytes().position(|b| b == b'}')
73 .ok_or(perr(0..input.len(), UnterminatedUnicodeEscape))?;
74
75 let inner = &input[3..closing_pos];
76 if inner.as_bytes().first() == Some(&b'_') {
77 return Err(perr(3, InvalidStartOfUnicodeEscape));
78 }
79
80 let mut v: u32 = 0;
81 let mut digit_count = 0;
82 for (i, b) in inner.bytes().enumerate() {
83 if b == b'_' {
84 continue;
85 }
86
87 let digit = hex_digit_value(b).ok_or(perr(3 + i, NonHexDigitInUnicodeEscape))?;
88
89 if digit_count == 6 {
90 return Err(perr(3 + i, TooManyDigitInUnicodeEscape));
91 }
92 digit_count += 1;
93 v = 16 * v + digit as u32;
94 }
95
96 if !allow_nul && v == 0 {
97 return Err(perr(0..closing_pos + 1, DisallowedNulEscape));
98 }
99
100 let c = std::char::from_u32(v)
101 .ok_or(perr(0..closing_pos + 1, InvalidUnicodeEscapeChar))?;
102
103 (Unescape::Unicode(c), closing_pos + 1)
104 }
105
106 _ => return Err(perr(0..2, UnknownEscape)),
107 };
108
109 Ok(out)
110}
111
112pub(crate) enum Unescape {
114 Byte(u8),
115 Unicode(char),
116}
117
118impl Unescape {
119 pub(crate) fn unwrap_char(self) -> char {
121 match self {
122 Self::Byte(b) => {
123 assert!(b <= 0x7F, "non ASCII byte");
124 b.into()
125 }
126 Self::Unicode(c) => c,
127 }
128 }
129
130 pub(crate) fn unwrap_byte(self) -> u8 {
132 match self {
133 Self::Byte(b) => b,
134 Self::Unicode(_) => panic!("unexpected unicode escape value"),
135 }
136 }
137}
138
139pub(crate) trait EscapeContainer {
140 fn new() -> Self;
141 fn is_empty(&self) -> bool;
142 fn push(&mut self, v: Unescape);
143 fn push_str(&mut self, s: &str);
144}
145
146impl EscapeContainer for Vec<u8> {
147 fn new() -> Self {
148 Self::new()
149 }
150 fn is_empty(&self) -> bool {
151 self.is_empty()
152 }
153 fn push_str(&mut self, s: &str) {
154 self.extend_from_slice(s.as_bytes());
155 }
156 fn push(&mut self, v: Unescape) {
157 match v {
158 Unescape::Byte(b) => self.push(b),
159 Unescape::Unicode(c) => {
160 let start = self.len();
161 self.resize(self.len() + c.len_utf8(), 0);
162 c.encode_utf8(&mut self[start..]);
163 }
164 }
165 }
166}
167
168impl EscapeContainer for String {
169 fn new() -> Self {
170 Self::new()
171 }
172 fn is_empty(&self) -> bool {
173 self.is_empty()
174 }
175 fn push_str(&mut self, s: &str) {
176 self.push_str(s);
177 }
178 fn push(&mut self, v: Unescape) {
179 self.push(v.unwrap_char());
180 }
181}
182
183
184fn is_string_continue_skipable_whitespace(b: u8) -> bool {
187 b == b' ' || b == b'\t' || b == b'\n'
188}
189
190#[inline(never)]
192pub(crate) fn unescape_string<C: EscapeContainer>(
193 input: &str,
194 offset: usize,
195 unicode: bool,
196 byte_escapes: bool,
197 allow_nul: bool,
198) -> Result<(Option<C>, usize), ParseError> {
199 let mut closing_quote_pos = None;
200 let mut i = offset;
201 let mut end_last_escape = offset;
202 let mut value = C::new();
203 while i < input.len() {
204 match input.as_bytes()[i] {
205 b'\\' if input.as_bytes().get(i + 1) == Some(&b'\n') => {
207 value.push_str(&input[end_last_escape..i]);
208
209 let end_escape = input[i + 2..].bytes()
211 .position(|b| !is_string_continue_skipable_whitespace(b))
212 .ok_or(perr(None, UnterminatedString))?;
213
214 i += 2 + end_escape;
215 end_last_escape = i;
216 }
217 b'\\' => {
218 let rest = &input[i..input.len() - 1];
219 let (c, len) = unescape(rest, unicode, byte_escapes, allow_nul)
220 .map_err(|e| e.offset_span(i))?;
221 value.push_str(&input[end_last_escape..i]);
222 value.push(c);
223 i += len;
224 end_last_escape = i;
225 }
226 b'\r' => return Err(perr(i, CarriageReturn)),
227 b'"' => {
228 closing_quote_pos = Some(i);
229 break;
230 }
231 b'\0' if !allow_nul => return Err(perr(i, NulByte)),
232 b if !unicode && !b.is_ascii() => return Err(perr(i, NonAsciiInByteLiteral)),
233 _ => i += 1,
234 }
235 }
236
237 let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedString))?;
238
239 let start_suffix = closing_quote_pos + 1;
240 let suffix = &input[start_suffix..];
241 check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
242
243 let value = if value.is_empty() {
247 None
248 } else {
249 value.push_str(&input[end_last_escape..closing_quote_pos]);
252 Some(value)
253 };
254
255 Ok((value, start_suffix))
256}
257
258#[inline(never)]
261pub(crate) fn scan_raw_string(
262 input: &str,
263 offset: usize,
264 unicode: bool,
265 allow_nul: bool,
266) -> Result<(u8, usize), ParseError> {
267 let num_hashes = input[offset..].bytes().position(|b| b != b'#')
269 .ok_or(perr(None, InvalidLiteral))?;
270
271 if num_hashes > 256 {
272 return Err(perr(offset..offset + num_hashes, TooManyHashes));
273 }
274
275 if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
276 return Err(perr(None, InvalidLiteral));
277 }
278 let start_inner = offset + num_hashes + 1;
279 let hashes = &input[offset..num_hashes + offset];
280
281 let mut closing_quote_pos = None;
282 let mut i = start_inner;
283 while i < input.len() {
284 let b = input.as_bytes()[i];
285 if b == b'"' && input[i + 1..].starts_with(hashes) {
286 closing_quote_pos = Some(i);
287 break;
288 }
289
290 if b == b'\r' {
294 return Err(perr(i, CarriageReturn));
295 }
296
297 if b == b'\0' && !allow_nul {
298 return Err(perr(i, NulByte));
299 }
300
301 if !unicode {
302 if !b.is_ascii() {
303 return Err(perr(i, NonAsciiInByteLiteral));
304 }
305 }
306
307 i += 1;
308 }
309
310 let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedRawString))?;
311
312 let start_suffix = closing_quote_pos + num_hashes + 1;
313 let suffix = &input[start_suffix..];
314 check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
315
316 Ok((num_hashes as u8, start_suffix))
317}