ureq/
unit.rs

1use std::fmt::{self, Display};
2use std::io::{self, Write};
3use std::ops::Range;
4use std::time;
5
6use base64::{prelude::BASE64_STANDARD, Engine};
7use log::debug;
8use url::Url;
9
10#[cfg(feature = "cookies")]
11use cookie::Cookie;
12
13use crate::agent::RedirectAuthHeaders;
14use crate::body::{self, BodySize, Payload, SizedReader};
15use crate::error::{Error, ErrorKind};
16use crate::header;
17use crate::header::{get_header, Header};
18use crate::proxy::Proto;
19use crate::resolve::ArcResolver;
20use crate::response::Response;
21use crate::stream::{self, connect_test, Stream};
22use crate::Agent;
23
24/// A Unit is fully-built Request, ready to execute.
25///
26/// *Internal API*
27#[derive(Clone)]
28pub(crate) struct Unit {
29    pub agent: Agent,
30    pub method: String,
31    pub url: Url,
32    is_chunked: bool,
33    headers: Vec<Header>,
34    pub deadline: Option<time::Instant>,
35}
36
37impl Unit {
38    //
39
40    pub(crate) fn new(
41        agent: &Agent,
42        method: &str,
43        url: &Url,
44        mut headers: Vec<Header>,
45        body: &SizedReader,
46        deadline: Option<time::Instant>,
47    ) -> Self {
48        //
49
50        let (is_transfer_encoding_set, mut is_chunked) = get_header(&headers, "transfer-encoding")
51            // if the user has set an encoding header, obey that.
52            .map(|enc| {
53                let is_transfer_encoding_set = !enc.is_empty();
54                let last_encoding = enc.split(',').last();
55                let is_chunked = last_encoding
56                    .map(|last_enc| last_enc.trim() == "chunked")
57                    .unwrap_or(false);
58                (is_transfer_encoding_set, is_chunked)
59            })
60            // otherwise, no chunking.
61            .unwrap_or((false, false));
62
63        let mut extra_headers = {
64            let mut extra = vec![];
65
66            // chunking and Content-Length headers are mutually exclusive
67            // also don't write this if the user has set it themselves
68            if !is_chunked && get_header(&headers, "content-length").is_none() {
69                // if the payload is of known size (everything beside an unsized reader), set
70                // Content-Length,
71                // otherwise, use the chunked Transfer-Encoding (only if no other Transfer-Encoding
72                // has been set
73                match body.size {
74                    BodySize::Known(size) => {
75                        extra.push(Header::new("Content-Length", &format!("{}", size)))
76                    }
77                    BodySize::Unknown => {
78                        if !is_transfer_encoding_set {
79                            extra.push(Header::new("Transfer-Encoding", "chunked"));
80                            is_chunked = true;
81                        }
82                    }
83                    BodySize::Empty => {}
84                }
85            }
86
87            let username = url.username();
88            let password = url.password().unwrap_or("");
89            if (!username.is_empty() || !password.is_empty())
90                && get_header(&headers, "authorization").is_none()
91            {
92                let encoded = BASE64_STANDARD.encode(format!("{}:{}", username, password));
93                extra.push(Header::new("Authorization", &format!("Basic {}", encoded)));
94            }
95
96            #[cfg(feature = "cookies")]
97            extra.extend(extract_cookies(agent, url).into_iter());
98
99            extra
100        };
101
102        headers.append(&mut extra_headers);
103
104        Unit {
105            agent: agent.clone(),
106            method: method.to_string(),
107            url: url.clone(),
108            is_chunked,
109            headers,
110            deadline,
111        }
112    }
113
114    pub fn resolver(&self) -> ArcResolver {
115        self.agent.state.resolver.clone()
116    }
117
118    #[cfg(test)]
119    pub fn header(&self, name: &str) -> Option<&str> {
120        header::get_header(&self.headers, name)
121    }
122    #[cfg(test)]
123    pub fn has(&self, name: &str) -> bool {
124        header::has_header(&self.headers, name)
125    }
126    #[cfg(test)]
127    pub fn all(&self, name: &str) -> Vec<&str> {
128        header::get_all_headers(&self.headers, name)
129    }
130
131    // Returns true if this request, with the provided body, is retryable.
132    pub(crate) fn is_retryable(&self, body: &SizedReader) -> bool {
133        // Per https://tools.ietf.org/html/rfc7231#section-8.1.3
134        // these methods are idempotent.
135        let idempotent = match self.method.as_str() {
136            "DELETE" | "GET" | "HEAD" | "OPTIONS" | "PUT" | "TRACE" => true,
137            _ => false,
138        };
139        // Unsized bodies aren't retryable because we can't rewind the reader.
140        // Sized bodies are retryable only if they are zero-length because of
141        // coincidences of the current implementation - the function responsible
142        // for retries doesn't have a way to replay a Payload.
143        let retryable_body = match body.size {
144            BodySize::Unknown => false,
145            BodySize::Known(0) => true,
146            BodySize::Known(_) => false,
147            BodySize::Empty => true,
148        };
149
150        idempotent && retryable_body
151    }
152}
153
154/// Perform a connection. Follows redirects.
155pub(crate) fn connect(
156    mut unit: Unit,
157    use_pooled: bool,
158    mut body: SizedReader,
159) -> Result<Response, Error> {
160    let mut history = vec![];
161    let mut resp = loop {
162        let resp = connect_inner(&unit, use_pooled, body, &history)?;
163
164        // handle redirects
165        if !(300..399).contains(&resp.status()) || unit.agent.config.redirects == 0 {
166            break resp;
167        }
168        if history.len() + 1 >= unit.agent.config.redirects as usize {
169            return Err(ErrorKind::TooManyRedirects.msg(format!(
170                "reached max redirects ({})",
171                unit.agent.config.redirects
172            )));
173        }
174        // the location header
175        let location = match resp.header("location") {
176            Some(l) => l,
177            None => break resp,
178        };
179
180        let url = &unit.url;
181        let method = &unit.method;
182        // join location header to current url in case it is relative
183        let new_url = url.join(location).map_err(|e| {
184            ErrorKind::InvalidUrl
185                .msg(format!("Bad redirection: {}", location))
186                .src(e)
187        })?;
188
189        // perform the redirect differently depending on 3xx code.
190        let new_method = match resp.status() {
191            // this is to follow how curl does it. POST, PUT etc change
192            // to GET on a redirect.
193            301 | 302 | 303 => match &method[..] {
194                "GET" | "HEAD" => unit.method,
195                _ => "GET".into(),
196            },
197            // never change the method for 307/308
198            // only resend the request if it cannot have a body
199            // NOTE: DELETE is intentionally excluded: https://stackoverflow.com/questions/299628
200            307 | 308 if ["GET", "HEAD", "OPTIONS", "TRACE"].contains(&method.as_str()) => {
201                unit.method
202            }
203            _ => break resp,
204        };
205
206        let keep_auth_header = can_propagate_authorization_on_redirect(
207            &unit.agent.config.redirect_auth_headers,
208            url,
209            &new_url,
210        );
211
212        debug!("redirect {} {} -> {}", resp.status(), url, new_url);
213        history.push(unit.url);
214        body = Payload::Empty.into_read();
215
216        // reuse the previous header vec on redirects.
217        let mut headers = unit.headers;
218
219        // on redirects we don't want to keep "content-length". we also might want to
220        // strip away "authorization" and "cookie" to ensure credentials are not leaked.
221        headers.retain(|h| {
222            !h.is_name("content-length")
223                && !h.is_name("cookie")
224                && (!h.is_name("authorization") || keep_auth_header)
225        });
226
227        // recreate the unit to get a new hostname and cookies for the new host.
228        unit = Unit::new(
229            &unit.agent,
230            &new_method,
231            &new_url,
232            headers,
233            &body,
234            unit.deadline,
235        );
236    };
237    resp.history = history;
238    Ok(resp)
239}
240
241/// Perform a connection. Does not follow redirects.
242fn connect_inner(
243    unit: &Unit,
244    use_pooled: bool,
245    body: SizedReader,
246    history: &[Url],
247) -> Result<Response, Error> {
248    let host = unit
249        .url
250        .host_str()
251        // This unwrap is ok because Request::parse_url() ensure there is always a host present.
252        .unwrap();
253    let url = &unit.url;
254    let method = &unit.method;
255    // open socket
256    let (mut stream, is_recycled) = connect_socket(unit, host, use_pooled)?;
257
258    if is_recycled {
259        debug!("sending request (reused connection) {} {}", method, url);
260    } else {
261        debug!("sending request {} {}", method, url);
262    }
263
264    let send_result = send_prelude(unit, &mut stream);
265
266    if let Err(err) = send_result {
267        if is_recycled {
268            debug!("retrying request early {} {}: {}", method, url, err);
269            // we try open a new connection, this time there will be
270            // no connection in the pool. don't use it.
271            // NOTE: this recurses at most once because `use_pooled` is `false`.
272            return connect_inner(unit, false, body, history);
273        } else {
274            // not a pooled connection, propagate the error.
275            return Err(err.into());
276        }
277    }
278    let retryable = unit.is_retryable(&body);
279
280    // send the body (which can be empty now depending on redirects)
281    body::send_body(body, unit.is_chunked, &mut stream)?;
282
283    // start reading the response to process cookies and redirects.
284    // TODO: this unit.clone() bothers me. At this stage, we're not
285    // going to use the unit (much) anymore, and it should be possible
286    // to have ownership of it and pass it into the Response.
287    let result = Response::do_from_stream(stream, unit.clone());
288
289    // https://tools.ietf.org/html/rfc7230#section-6.3.1
290    // When an inbound connection is closed prematurely, a client MAY
291    // open a new connection and automatically retransmit an aborted
292    // sequence of requests if all of those requests have idempotent
293    // methods.
294    //
295    // We choose to retry only requests that used a recycled connection
296    // from the ConnectionPool, since those are most likely to have
297    // reached a server-side timeout. Note that this means we may do
298    // up to N+1 total tries, where N is max_idle_connections_per_host.
299    let resp = match result {
300        Err(err) if err.connection_closed() && retryable && is_recycled => {
301            debug!("retrying request {} {}: {}", method, url, err);
302            let empty = Payload::Empty.into_read();
303            // NOTE: this recurses at most once because `use_pooled` is `false`.
304            return connect_inner(unit, false, empty, history);
305        }
306        Err(e) => return Err(e),
307        Ok(resp) => resp,
308    };
309
310    // squirrel away cookies
311    #[cfg(feature = "cookies")]
312    save_cookies(unit, &resp);
313
314    debug!("response {} to {} {}", resp.status(), method, url);
315
316    // release the response
317    Ok(resp)
318}
319
320#[cfg(feature = "cookies")]
321fn extract_cookies(agent: &Agent, url: &Url) -> Option<Header> {
322    let header_value = agent
323        .state
324        .cookie_tin
325        .get_request_cookies(url)
326        .iter()
327        // This guards against sending rfc non-compliant cookies, even if the user has
328        // "prepped" their local cookie store with such cookies.
329        .filter(|c| {
330            let is_ok = is_cookie_rfc_compliant(c);
331            if !is_ok {
332                debug!("do not send non compliant cookie: {:?}", c);
333            }
334            is_ok
335        })
336        .map(|c| c.to_string())
337        .collect::<Vec<_>>()
338        .join(";");
339    match header_value.as_str() {
340        "" => None,
341        val => Some(Header::new("Cookie", val)),
342    }
343}
344
345/// Connect the socket, either by using the pool or grab a new one.
346fn connect_socket(unit: &Unit, hostname: &str, use_pooled: bool) -> Result<(Stream, bool), Error> {
347    match unit.url.scheme() {
348        "http" | "https" | "test" => (),
349        scheme => return Err(ErrorKind::UnknownScheme.msg(format!("unknown scheme '{}'", scheme))),
350    };
351    if unit.url.scheme() != "https" && unit.agent.config.https_only {
352        return Err(ErrorKind::InsecureRequestHttpsOnly
353            .msg("can't perform non https request with https_only set"));
354    }
355    if use_pooled {
356        let pool = &unit.agent.state.pool;
357        let proxy = &unit.agent.config.proxy;
358        // The connection may have been closed by the server
359        // due to idle timeout while it was sitting in the pool.
360        // Loop until we find one that is still good or run out of connections.
361        while let Some(stream) = pool.try_get_connection(&unit.url, proxy.clone()) {
362            let server_closed = stream.server_closed()?;
363            if !server_closed {
364                return Ok((stream, true));
365            }
366            debug!("dropping stream from pool; closed by server: {:?}", stream);
367        }
368    }
369    let stream = match unit.url.scheme() {
370        "http" => stream::connect_http(unit, hostname),
371        "https" => stream::connect_https(unit, hostname),
372        "test" => connect_test(unit),
373        scheme => Err(ErrorKind::UnknownScheme.msg(format!("unknown scheme {}", scheme))),
374    };
375    Ok((stream?, false))
376}
377
378fn can_propagate_authorization_on_redirect(
379    redirect_auth_headers: &RedirectAuthHeaders,
380    prev_url: &Url,
381    url: &Url,
382) -> bool {
383    fn scheme_is_https(url: &Url) -> bool {
384        url.scheme() == "https" || (cfg!(test) && url.scheme() == "test")
385    }
386
387    match redirect_auth_headers {
388        RedirectAuthHeaders::Never => false,
389        RedirectAuthHeaders::SameHost => {
390            let host = url.host_str();
391            let is_https = scheme_is_https(url);
392
393            let prev_host = prev_url.host_str();
394            let prev_is_https = scheme_is_https(prev_url);
395
396            let same_scheme_or_more_secure =
397                is_https == prev_is_https || (!prev_is_https && is_https);
398
399            host == prev_host && same_scheme_or_more_secure
400        }
401    }
402}
403
404/// Send request line + headers (all up until the body).
405#[allow(clippy::write_with_newline)]
406fn send_prelude(unit: &Unit, stream: &mut Stream) -> io::Result<()> {
407    // build into a buffer and send in one go.
408    let mut prelude = PreludeBuilder::new();
409
410    let path = if let Some(proxy) = &unit.agent.config.proxy {
411        // HTTP proxies require the path to be in absolute URI form
412        // https://www.rfc-editor.org/rfc/rfc7230#section-5.3.2
413        match proxy.proto {
414            Proto::HTTP => match unit.url.port() {
415                Some(port) => format!(
416                    "{}://{}:{}{}",
417                    unit.url.scheme(),
418                    unit.url.host().unwrap(),
419                    port,
420                    unit.url.path()
421                ),
422                None => format!(
423                    "{}://{}{}",
424                    unit.url.scheme(),
425                    unit.url.host().unwrap(),
426                    unit.url.path()
427                ),
428            },
429            _ => unit.url.path().into(),
430        }
431    } else {
432        unit.url.path().into()
433    };
434
435    // request line
436    prelude.write_request_line(&unit.method, &path, unit.url.query().unwrap_or_default())?;
437
438    // host header if not set by user.
439    if !header::has_header(&unit.headers, "host") {
440        let host = unit.url.host().unwrap();
441        match unit.url.port() {
442            Some(port) => {
443                let scheme_default: u16 = match unit.url.scheme() {
444                    "http" => 80,
445                    "https" => 443,
446                    _ => 0,
447                };
448                if scheme_default != 0 && scheme_default == port {
449                    prelude.write_header("Host", host)?;
450                } else {
451                    prelude.write_header("Host", format_args!("{}:{}", host, port))?;
452                }
453            }
454            None => {
455                prelude.write_header("Host", host)?;
456            }
457        }
458    }
459    if !header::has_header(&unit.headers, "user-agent") {
460        prelude.write_header("User-Agent", &unit.agent.config.user_agent)?;
461    }
462    if !header::has_header(&unit.headers, "accept") {
463        prelude.write_header("Accept", "*/*")?;
464    }
465
466    // other headers
467    for header in &unit.headers {
468        if let Some(v) = header.value() {
469            if is_header_sensitive(header) {
470                prelude.write_sensitive_header(header.name(), v)?;
471            } else {
472                prelude.write_header(header.name(), v)?;
473            }
474        }
475    }
476
477    // finish
478    prelude.finish()?;
479
480    debug!("writing prelude: {}", prelude);
481    // write all to the wire
482    stream.write_all(prelude.as_slice())?;
483
484    Ok(())
485}
486
487fn is_header_sensitive(header: &Header) -> bool {
488    header.is_name("Authorization") || header.is_name("Cookie")
489}
490
491struct PreludeBuilder {
492    prelude: Vec<u8>,
493    // Sensitive information to be omitted in debug logging
494    sensitive_spans: Vec<Range<usize>>,
495}
496
497impl PreludeBuilder {
498    fn new() -> Self {
499        PreludeBuilder {
500            prelude: Vec::with_capacity(256),
501            sensitive_spans: Vec::new(),
502        }
503    }
504
505    fn write_request_line(&mut self, method: &str, path: &str, query: &str) -> io::Result<()> {
506        write!(self.prelude, "{} {}", method, path,)?;
507        if !query.is_empty() {
508            write!(self.prelude, "?{}", query)?;
509        }
510        write!(self.prelude, " HTTP/1.1\r\n")?;
511        Ok(())
512    }
513
514    fn write_header(&mut self, name: &str, value: impl Display) -> io::Result<()> {
515        write!(self.prelude, "{}: {}\r\n", name, value)
516    }
517
518    fn write_sensitive_header(&mut self, name: &str, value: impl Display) -> io::Result<()> {
519        write!(self.prelude, "{}: ", name)?;
520        let start = self.prelude.len();
521        write!(self.prelude, "{}", value)?;
522        let end = self.prelude.len();
523        self.sensitive_spans.push(start..end);
524        write!(self.prelude, "\r\n")?;
525        Ok(())
526    }
527
528    fn finish(&mut self) -> io::Result<()> {
529        write!(self.prelude, "\r\n")
530    }
531
532    fn as_slice(&self) -> &[u8] {
533        &self.prelude
534    }
535}
536
537impl fmt::Display for PreludeBuilder {
538    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
539        let mut pos = 0;
540        for span in &self.sensitive_spans {
541            write!(
542                f,
543                "{}",
544                String::from_utf8_lossy(&self.prelude[pos..span.start])
545            )?;
546            write!(f, "***")?;
547            pos = span.end;
548        }
549        write!(
550            f,
551            "{}",
552            String::from_utf8_lossy(&self.prelude[pos..]).trim_end()
553        )?;
554        Ok(())
555    }
556}
557
558/// Investigate a response for "Set-Cookie" headers.
559#[cfg(feature = "cookies")]
560fn save_cookies(unit: &Unit, resp: &Response) {
561    //
562
563    let headers = resp.all("set-cookie");
564    // Avoid locking if there are no cookie headers
565    if headers.is_empty() {
566        return;
567    }
568    let cookies = headers.into_iter().flat_map(|header_value| {
569        debug!(
570            "received 'set-cookie: {}' from {} {}",
571            header_value, unit.method, unit.url
572        );
573        match Cookie::parse(header_value.to_string()) {
574            Err(_) => None,
575            Ok(c) => {
576                // This guards against accepting rfc non-compliant cookies from a host.
577                if is_cookie_rfc_compliant(&c) {
578                    Some(c)
579                } else {
580                    debug!("ignore incoming non compliant cookie: {:?}", c);
581                    None
582                }
583            }
584        }
585    });
586    unit.agent
587        .state
588        .cookie_tin
589        .store_response_cookies(cookies, &unit.url.clone());
590}
591
592#[cfg(feature = "cookies")]
593fn is_cookie_rfc_compliant(cookie: &Cookie) -> bool {
594    // https://tools.ietf.org/html/rfc6265#page-9
595    // set-cookie-header = "Set-Cookie:" SP set-cookie-string
596    // set-cookie-string = cookie-pair *( ";" SP cookie-av )
597    // cookie-pair       = cookie-name "=" cookie-value
598    // cookie-name       = token
599    // cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
600    // cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
601    //                       ; US-ASCII characters excluding CTLs,
602    //                       ; whitespace DQUOTE, comma, semicolon,
603    //                       ; and backslash
604    // token             = <token, defined in [RFC2616], Section 2.2>
605
606    // https://tools.ietf.org/html/rfc2616#page-17
607    // CHAR           = <any US-ASCII character (octets 0 - 127)>
608    // ...
609    //        CTL            = <any US-ASCII control character
610    //                         (octets 0 - 31) and DEL (127)>
611    // ...
612    //        token          = 1*<any CHAR except CTLs or separators>
613    //        separators     = "(" | ")" | "<" | ">" | "@"
614    //                       | "," | ";" | ":" | "\" | <">
615    //                       | "/" | "[" | "]" | "?" | "="
616    //                       | "{" | "}" | SP | HT
617
618    fn is_valid_name(b: &u8) -> bool {
619        header::is_tchar(b)
620    }
621
622    fn is_valid_value(b: &u8) -> bool {
623        b.is_ascii()
624            && !b.is_ascii_control()
625            && !b.is_ascii_whitespace()
626            && *b != b'"'
627            && *b != b','
628            && *b != b';'
629            && *b != b'\\'
630    }
631
632    let name = cookie.name().as_bytes();
633
634    let valid_name = name.iter().all(is_valid_name);
635
636    if !valid_name {
637        log::trace!("cookie name is not valid: {:?}", cookie.name());
638        return false;
639    }
640
641    let value = cookie.value().as_bytes();
642
643    let valid_value = value.iter().all(is_valid_value);
644
645    if !valid_value {
646        log::trace!("cookie value is not valid: {:?}", cookie.value());
647        return false;
648    }
649
650    true
651}
652
653#[cfg(test)]
654#[cfg(feature = "cookies")]
655mod tests {
656    use cookie::Cookie;
657    use cookie_store::CookieStore;
658
659    use super::*;
660
661    use crate::Agent;
662    ///////////////////// COOKIE TESTS //////////////////////////////
663
664    #[test]
665    fn match_cookies_returns_one_header() {
666        let agent = Agent::new();
667        let url: Url = "https://crates.io/".parse().unwrap();
668        let cookie1: Cookie = "cookie1=value1; Domain=crates.io; Path=/".parse().unwrap();
669        let cookie2: Cookie = "cookie2=value2; Domain=crates.io; Path=/".parse().unwrap();
670        agent
671            .state
672            .cookie_tin
673            .store_response_cookies(vec![cookie1, cookie2].into_iter(), &url);
674
675        // There's no guarantee to the order in which cookies are defined.
676        // Ensure that they're either in one order or the other.
677        let result = extract_cookies(&agent, &url);
678        let order1 = "cookie1=value1;cookie2=value2";
679        let order2 = "cookie2=value2;cookie1=value1";
680
681        assert!(
682            result == Some(Header::new("Cookie", order1))
683                || result == Some(Header::new("Cookie", order2))
684        );
685    }
686
687    #[test]
688    fn not_send_illegal_cookies() {
689        // This prepares a cookie store with a cookie that isn't legal
690        // according to the relevant rfcs. ureq should not send this.
691        let empty = b"";
692        #[allow(deprecated)]
693        let mut store = CookieStore::load_json(&empty[..]).unwrap();
694        let url = Url::parse("https://mydomain.com").unwrap();
695        let cookie = Cookie::new("borked///", "illegal<>//");
696        store.insert_raw(&cookie, &url).unwrap();
697
698        let agent = crate::builder().cookie_store(store).build();
699        let cookies = extract_cookies(&agent, &url);
700        assert_eq!(cookies, None);
701    }
702
703    #[test]
704    fn check_cookie_crate_allows_illegal() {
705        // This test is there to see whether the cookie crate enforces
706        // https://tools.ietf.org/html/rfc6265#page-9
707        // https://tools.ietf.org/html/rfc2616#page-17
708        // for cookie name or cookie value.
709        // As long as it doesn't, we do additional filtering in ureq
710        // to not let non-compliant cookies through.
711        let cookie = Cookie::parse("borked///=illegal\\,").unwrap();
712        // these should not be allowed according to the RFCs.
713        assert_eq!(cookie.name(), "borked///");
714        assert_eq!(cookie.value(), "illegal\\,");
715    }
716
717    #[test]
718    fn illegal_cookie_name() {
719        let cookie = Cookie::parse("borked/=value").unwrap();
720        assert!(!is_cookie_rfc_compliant(&cookie));
721    }
722
723    #[test]
724    fn illegal_cookie_value() {
725        let cookie = Cookie::parse("name=borked,").unwrap();
726        assert!(!is_cookie_rfc_compliant(&cookie));
727    }
728
729    #[test]
730    fn legal_cookie_name_value() {
731        let cookie = Cookie::parse("name=value").unwrap();
732        assert!(is_cookie_rfc_compliant(&cookie));
733    }
734}