ureq/
header.rs

1use crate::error::{Error, ErrorKind};
2use std::fmt;
3use std::str::{from_utf8, FromStr};
4
5/// Since a status line or header can contain non-utf8 characters the
6/// backing store is a `Vec<u8>`
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub(crate) struct HeaderLine(Vec<u8>);
9
10impl From<String> for HeaderLine {
11    fn from(s: String) -> Self {
12        HeaderLine(s.into_bytes())
13    }
14}
15
16impl From<Vec<u8>> for HeaderLine {
17    fn from(b: Vec<u8>) -> Self {
18        HeaderLine(b)
19    }
20}
21
22impl HeaderLine {
23    pub fn into_string_lossy(self) -> String {
24        // Try to avoid an extra allcation.
25        String::from_utf8(self.0)
26            .unwrap_or_else(|e| String::from_utf8_lossy(&e.into_bytes()).to_string())
27    }
28
29    pub fn is_empty(&self) -> bool {
30        self.0.is_empty()
31    }
32
33    fn as_bytes(&self) -> &[u8] {
34        &self.0
35    }
36
37    pub fn into_header(self) -> Result<Header, Error> {
38        // The header name should always be ascii, we can read anything up to the
39        // ':' delimiter byte-by-byte.
40        let mut index = 0;
41
42        for c in self.as_bytes() {
43            if *c == b':' {
44                break;
45            }
46            if !is_tchar(c) {
47                return Err(Error::new(
48                    ErrorKind::BadHeader,
49                    Some(format!("Invalid char ({:0x?}) while looking for ':'", *c)),
50                ));
51            }
52            index += 1;
53        }
54
55        Ok(Header { line: self, index })
56    }
57}
58
59impl fmt::Display for HeaderLine {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        write!(f, "{}", String::from_utf8_lossy(&self.0))
62    }
63}
64
65#[derive(Clone, PartialEq, Eq)]
66/// Wrapper type for a header field.
67/// <https://tools.ietf.org/html/rfc7230#section-3.2>
68pub(crate) struct Header {
69    // Line contains the unmodified bytes of single header field.
70    // It does not contain the final CRLF.
71    line: HeaderLine,
72    // Index is the position of the colon within the header field.
73    // Invariant: index > 0
74    // Invariant: index + 1 < line.len()
75    index: usize,
76}
77
78impl fmt::Debug for Header {
79    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
80        write!(f, "{}", self.line)
81    }
82}
83
84impl Header {
85    pub fn new(name: &str, value: &str) -> Self {
86        let line = format!("{}: {}", name, value).into();
87        let index = name.len();
88        Header { line, index }
89    }
90
91    /// The header name.
92    pub fn name(&self) -> &str {
93        let bytes = &self.line.as_bytes()[0..self.index];
94        // Since we validate the header name in HeaderLine::into_header, we
95        // are guaranteed it is valid utf-8 at this point.
96        from_utf8(bytes).expect("Legal chars in header name")
97    }
98
99    /// The header value.
100    ///
101    /// For non-utf8 headers this returns [`None`] (use [`Header::value_raw()`]).
102    pub fn value(&self) -> Option<&str> {
103        let bytes = &self.line.as_bytes()[self.index + 1..];
104        from_utf8(bytes)
105            .map(|s| s.trim())
106            .ok()
107            // ensure all bytes are valid field name.
108            .filter(|s| s.as_bytes().iter().all(is_field_vchar_or_obs_fold))
109    }
110
111    /// The header value as a byte slice.
112    ///
113    /// For legacy reasons, the HTTP spec allows headers to be non-ascii characters.
114    /// Typically such headers are encoded in a non-utf8 encoding (such as iso-8859-1).
115    ///
116    /// ureq can't know what encoding the header is in, but this function provides
117    /// an escape hatch for users that need to handle such headers.
118    #[allow(unused)]
119    pub fn value_raw(&self) -> &[u8] {
120        let mut bytes = &self.line.as_bytes()[self.index + 1..];
121
122        if !bytes.is_empty() {
123            // trim front
124            while !bytes.is_empty() && bytes[0].is_ascii_whitespace() {
125                bytes = &bytes[1..];
126            }
127            // trim back
128            while !bytes.is_empty() && bytes[bytes.len() - 1].is_ascii_whitespace() {
129                bytes = &bytes[..(bytes.len() - 1)];
130            }
131        }
132
133        bytes
134    }
135
136    /// Compares the given str to the header name ignoring case.
137    pub fn is_name(&self, other: &str) -> bool {
138        self.name().eq_ignore_ascii_case(other)
139    }
140
141    pub(crate) fn validate(&self) -> Result<(), Error> {
142        let bytes = self.line.as_bytes();
143        let name_raw = &bytes[0..self.index];
144        let value_raw = &bytes[self.index + 1..];
145
146        if !valid_name(name_raw) || !valid_value(value_raw) {
147            Err(ErrorKind::BadHeader.msg(format!("invalid header '{}'", self.line)))
148        } else {
149            Ok(())
150        }
151    }
152}
153
154/// For non-utf8 headers this returns [`None`] (use [`get_header_raw()`]).
155pub(crate) fn get_header<'h>(headers: &'h [Header], name: &str) -> Option<&'h str> {
156    headers
157        .iter()
158        .find(|h| h.is_name(name))
159        .and_then(|h| h.value())
160}
161
162#[allow(unused)]
163pub(crate) fn get_header_raw<'h>(headers: &'h [Header], name: &str) -> Option<&'h [u8]> {
164    headers
165        .iter()
166        .find(|h| h.is_name(name))
167        .map(|h| h.value_raw())
168}
169
170pub(crate) fn get_all_headers<'h>(headers: &'h [Header], name: &str) -> Vec<&'h str> {
171    headers
172        .iter()
173        .filter(|h| h.is_name(name))
174        .filter_map(|h| h.value())
175        .collect()
176}
177
178pub(crate) fn has_header(headers: &[Header], name: &str) -> bool {
179    get_header(headers, name).is_some()
180}
181
182pub(crate) fn add_header(headers: &mut Vec<Header>, header: Header) {
183    let name = header.name();
184    if !name.starts_with("x-") && !name.starts_with("X-") {
185        headers.retain(|h| h.name() != name);
186    }
187    headers.push(header);
188}
189
190// https://tools.ietf.org/html/rfc7230#section-3.2
191// Each header field consists of a case-insensitive field name followed
192// by a colon (":"), optional leading whitespace, the field value, and
193// optional trailing whitespace.
194// field-name     = token
195// token = 1*tchar
196// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
197// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
198fn valid_name(name: &[u8]) -> bool {
199    !name.is_empty() && name.iter().all(is_tchar)
200}
201
202#[inline]
203pub(crate) fn is_tchar(b: &u8) -> bool {
204    match b {
205        b'!' | b'#' | b'$' | b'%' | b'&' => true,
206        b'\'' | b'*' | b'+' | b'-' | b'.' => true,
207        b'^' | b'_' | b'`' | b'|' | b'~' => true,
208        b if b.is_ascii_alphanumeric() => true,
209        _ => false,
210    }
211}
212
213// https://tools.ietf.org/html/rfc7230#section-3.2
214// Note that field-content has an errata:
215// https://www.rfc-editor.org/errata/eid4189
216// field-value    = *( field-content / obs-fold )
217// field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
218// field-vchar    = VCHAR / obs-text
219//
220// obs-fold       = CRLF 1*( SP / HTAB )
221//               ; obsolete line folding
222//               ; see Section 3.2.4
223// https://tools.ietf.org/html/rfc5234#appendix-B.1
224// VCHAR          =  %x21-7E
225//                        ; visible (printing) characters
226fn valid_value(value: &[u8]) -> bool {
227    value.iter().all(is_field_vchar_or_obs_fold)
228}
229
230#[inline]
231fn is_field_vchar_or_obs_fold(b: &u8) -> bool {
232    match b {
233        b' ' | b'\t' => true,
234        0x21..=0x7E => true,
235        _ => false,
236    }
237}
238
239impl FromStr for Header {
240    type Err = Error;
241    fn from_str(s: &str) -> Result<Self, Self::Err> {
242        //
243        let line: HeaderLine = s.to_string().into();
244
245        let header = line.into_header()?;
246
247        header.validate()?;
248        Ok(header)
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn test_valid_name() {
258        assert!(valid_name(b"example"));
259        assert!(valid_name(b"Content-Type"));
260        assert!(valid_name(b"h-123456789"));
261        assert!(!valid_name(b"Content-Type:"));
262        assert!(!valid_name(b"Content-Type "));
263        assert!(!valid_name(b" some-header"));
264        assert!(!valid_name(b"\"invalid\""));
265        assert!(!valid_name(b"G\xf6del"));
266    }
267
268    #[test]
269    fn test_valid_value() {
270        assert!(valid_value(b"example"));
271        assert!(valid_value(b"foo bar"));
272        assert!(valid_value(b" foobar "));
273        assert!(valid_value(b" foo\tbar "));
274        assert!(valid_value(b" foo~"));
275        assert!(valid_value(b" !bar"));
276        assert!(valid_value(b" "));
277        assert!(!valid_value(b" \nfoo"));
278        assert!(!valid_value(b"foo\x7F"));
279    }
280
281    #[test]
282    fn test_parse_invalid_name() {
283        let cases = vec![
284            "Content-Type  :",
285            " Content-Type: foo",
286            "Content-Type foo",
287            "\"some-header\": foo",
288            "Gödel: Escher, Bach",
289            "Foo: \n",
290            "Foo: \nbar",
291            "Foo: \x7F bar",
292        ];
293        for c in cases {
294            let result = c.parse::<Header>();
295            assert!(
296                matches!(result, Err(ref e) if e.kind() == ErrorKind::BadHeader),
297                "'{}'.parse(): expected BadHeader, got {:?}",
298                c,
299                result
300            );
301        }
302    }
303
304    #[test]
305    #[cfg(feature = "charset")]
306    fn test_parse_non_utf8_value() {
307        let (cow, _, _) = encoding_rs::WINDOWS_1252.encode("x-geo-stuff: älvsjö ");
308        let bytes = cow.to_vec();
309        let line: HeaderLine = bytes.into();
310        let header = line.into_header().unwrap();
311        assert_eq!(header.name(), "x-geo-stuff");
312        assert_eq!(header.value(), None);
313        assert_eq!(header.value_raw(), [228, 108, 118, 115, 106, 246]);
314    }
315
316    #[test]
317    fn empty_value() {
318        let h = "foo:".parse::<Header>().unwrap();
319        assert_eq!(h.value(), Some(""));
320    }
321
322    #[test]
323    fn value_with_whitespace() {
324        let h = "foo:      bar    ".parse::<Header>().unwrap();
325        assert_eq!(h.value(), Some("bar"));
326    }
327
328    #[test]
329    fn name_and_value() {
330        let header: Header = "X-Forwarded-For: 127.0.0.1".parse().unwrap();
331        assert_eq!("X-Forwarded-For", header.name());
332        assert_eq!(header.value(), Some("127.0.0.1"));
333        assert!(header.is_name("X-Forwarded-For"));
334        assert!(header.is_name("x-forwarded-for"));
335        assert!(header.is_name("X-FORWARDED-FOR"));
336    }
337
338    #[test]
339    fn test_iso8859_utf8_mixup() {
340        // C2 A5 is ¥ in UTF-8 and Â¥ in ISO-8859-1
341        let b = "header: \0xc2\0xa5".to_string().into_bytes();
342        let l: HeaderLine = b.into();
343        let h = l.into_header().unwrap();
344        assert_eq!(h.value(), None);
345    }
346}