multipart/server/
boundary.rs

1// Copyright 2016 `multipart` Crate Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Boundary parsing for `multipart` requests.
9
10use ::safemem;
11
12use super::buf_redux::BufReader;
13use super::buf_redux::policy::MinBuffered;
14use super::twoway;
15
16use std::cmp;
17use std::borrow::Borrow;
18
19use std::io;
20use std::io::prelude::*;
21
22use self::State::*;
23
24pub const MIN_BUF_SIZE: usize = 1024;
25
26#[derive(Debug, PartialEq, Eq)]
27enum State {
28    Searching,
29    BoundaryRead,
30    AtEnd
31}
32
33/// A struct implementing `Read` and `BufRead` that will yield bytes until it sees a given sequence.
34#[derive(Debug)]
35pub struct BoundaryReader<R> {
36    source: BufReader<R, MinBuffered>,
37    boundary: Vec<u8>,
38    search_idx: usize,
39    state: State,
40}
41
42impl<R> BoundaryReader<R> where R: Read {
43    /// Internal API
44    pub fn from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R> {
45        let mut boundary = boundary.into();
46        safemem::prepend(b"--", &mut boundary);
47        let source = BufReader::new(reader).set_policy(MinBuffered(MIN_BUF_SIZE));
48
49        BoundaryReader {
50            source,
51            boundary,
52            search_idx: 0,
53            state: Searching,
54        }
55    }
56
57    fn read_to_boundary(&mut self) -> io::Result<&[u8]> {
58        let buf = self.source.fill_buf()?;
59
60        trace!("Buf: {:?}", String::from_utf8_lossy(buf));
61
62        debug!("Before search Buf len: {} Search idx: {} State: {:?}",
63               buf.len(), self.search_idx, self.state);
64
65        if self.state == BoundaryRead || self.state == AtEnd {
66            return Ok(&buf[..self.search_idx])
67        }
68
69        if self.state == Searching && self.search_idx < buf.len() {
70            let lookahead = &buf[self.search_idx..];
71
72            // Look for the boundary, or if it isn't found, stop near the end.
73            match find_boundary(lookahead, &self.boundary) {
74                Ok(found_idx) => {
75                    self.search_idx += found_idx;
76                    self.state = BoundaryRead;
77                },
78                Err(yield_len) => {
79                    self.search_idx += yield_len;
80                }
81            }
82        }        
83        
84        debug!("After search Buf len: {} Search idx: {} State: {:?}",
85               buf.len(), self.search_idx, self.state);
86
87        // back up the cursor to before the boundary's preceding CRLF if we haven't already
88        if self.search_idx >= 2 && !buf[self.search_idx..].starts_with(b"\r\n") {
89            let two_bytes_before = &buf[self.search_idx - 2 .. self.search_idx];
90
91            trace!("Two bytes before: {:?} ({:?}) (\"\\r\\n\": {:?})",
92                   String::from_utf8_lossy(two_bytes_before), two_bytes_before, b"\r\n");
93
94            if two_bytes_before == *b"\r\n" {
95                debug!("Subtract two!");
96                self.search_idx -= 2;
97            }
98        }
99
100        let ret_buf = &buf[..self.search_idx];
101
102        trace!("Returning buf: {:?}", String::from_utf8_lossy(ret_buf));
103
104        Ok(ret_buf)
105    }
106
107    pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
108        // ensure the minimum buf size is at least enough to find a boundary with some extra
109        let min_buf_size = cmp::max(self.boundary.len() * 2, min_buf_size);
110
111        self.source.policy_mut().0 = min_buf_size;
112    }
113
114    pub fn consume_boundary(&mut self) -> io::Result<bool> {
115        if self.state == AtEnd {
116            return Ok(false);
117        }
118
119        while self.state == Searching {
120            debug!("Boundary not found yet");
121
122            let buf_len = self.read_to_boundary()?.len();
123
124            if buf_len == 0 && self.state == Searching {
125                return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
126                                          "unexpected end of request body"));
127            }
128
129            debug!("Discarding {} bytes", buf_len);
130
131            self.consume(buf_len);
132        }
133
134        let consume_amt = {
135            let buf = self.source.fill_buf()?;
136
137            // if the boundary is found we should have at least this much in-buffer
138            let mut consume_amt = self.search_idx + self.boundary.len();
139
140            // we don't care about data before the cursor
141            let bnd_segment = &buf[self.search_idx..];
142
143            if bnd_segment.starts_with(b"\r\n") {
144                // preceding CRLF needs to be consumed as well
145                consume_amt += 2;
146
147                // assert that we've found the boundary after the CRLF
148                debug_assert_eq!(*self.boundary, bnd_segment[2 .. self.boundary.len() + 2]);
149            } else {
150                // assert that we've found the boundary
151                debug_assert_eq!(*self.boundary, bnd_segment[..self.boundary.len()]);
152            }
153
154            // include the trailing CRLF or --
155            consume_amt += 2;
156
157            if buf.len() < consume_amt {
158                return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
159                                          "not enough bytes to verify boundary"));
160            }
161
162            // we have enough bytes to verify
163            self.state = Searching;
164
165            let last_two = &buf[consume_amt - 2 .. consume_amt];
166
167            match last_two {
168                b"\r\n" => self.state = Searching,
169                b"--" => self.state = AtEnd,
170                _ => return Err(io::Error::new(
171                    io::ErrorKind::InvalidData,
172                    format!("unexpected bytes following multipart boundary: {:X} {:X}",
173                            last_two[0], last_two[1])
174                )),
175            }
176
177            consume_amt
178        };
179
180        trace!("Consuming {} bytes, remaining buf: {:?}",
181               consume_amt,
182               String::from_utf8_lossy(self.source.buffer()));
183
184        self.source.consume(consume_amt);
185
186        if cfg!(debug_assertions) {
187
188        }
189
190        self.search_idx = 0;
191
192        trace!("Consumed boundary (state: {:?}), remaining buf: {:?}", self.state,
193               String::from_utf8_lossy(self.source.buffer()));
194
195        Ok(self.state != AtEnd)
196    }
197}
198
199/// Find the boundary occurrence or the highest length to safely yield
200fn find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize> {
201    if let Some(idx) = twoway::find_bytes(buf, boundary) {
202        return Ok(idx);
203    }
204
205    let search_start = buf.len().saturating_sub(boundary.len());
206
207    // search for just the boundary fragment
208    for i in search_start .. buf.len() {
209        if boundary.starts_with(&buf[i..]) {
210            return Err(i);
211        }
212    }
213
214    Err(buf.len())
215}
216
217#[cfg(feature = "bench")]
218impl<'a> BoundaryReader<io::Cursor<&'a [u8]>> {
219    fn new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self {
220        Self::from_reader(io::Cursor::new(bytes), boundary)
221    }
222
223    fn reset(&mut self) {
224        // Dump buffer and reset cursor
225        self.source.seek(io::SeekFrom::Start(0));
226        self.state = Searching;
227        self.search_idx = 0;
228    }
229}
230
231impl<R> Borrow<R> for BoundaryReader<R> {
232    fn borrow(&self) -> &R {
233        self.source.get_ref()
234    }
235}
236
237impl<R> Read for BoundaryReader<R> where R: Read {
238    fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
239        let read = {
240            let mut buf = self.read_to_boundary()?;
241            // This shouldn't ever be an error so unwrapping is fine.
242            buf.read(out).unwrap()
243        };
244
245        self.consume(read);
246        Ok(read)
247    }
248}
249
250impl<R> BufRead for BoundaryReader<R> where R: Read {
251    fn fill_buf(&mut self) -> io::Result<&[u8]> {
252        self.read_to_boundary()
253    }
254
255    fn consume(&mut self, amt: usize) {
256        let true_amt = cmp::min(amt, self.search_idx);
257
258        debug!("Consume! amt: {} true amt: {}", amt, true_amt);
259
260        self.source.consume(true_amt);
261        self.search_idx -= true_amt;
262    }
263}
264
265#[cfg(test)]
266mod test {
267    use super::BoundaryReader;
268
269    use std::io;
270    use std::io::prelude::*;
271
272    const BOUNDARY: &'static str = "boundary";
273    const TEST_VAL: &'static str = "--boundary\r\n\
274                                    dashed-value-1\r\n\
275                                    --boundary\r\n\
276                                    dashed-value-2\r\n\
277                                    --boundary--";
278        
279    #[test]
280    fn test_boundary() {
281        ::init_log();
282
283        debug!("Testing boundary (no split)");
284
285        let src = &mut TEST_VAL.as_bytes();
286        let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
287
288        let mut buf = String::new();
289        
290        test_boundary_reader(&mut reader, &mut buf);
291    }
292
293    struct SplitReader<'a> {
294        left: &'a [u8],
295        right: &'a [u8],
296    }
297
298    impl<'a> SplitReader<'a> {
299        fn split(data: &'a [u8], at: usize) -> SplitReader<'a> {
300            let (left, right) = data.split_at(at);
301
302            SplitReader { 
303                left: left,
304                right: right,
305            }
306        }
307    }
308
309    impl<'a> Read for SplitReader<'a> {
310        fn read(&mut self, dst: &mut [u8]) -> io::Result<usize> {
311            fn copy_bytes_partial(src: &mut &[u8], dst: &mut [u8]) -> usize {
312                src.read(dst).unwrap()
313            }
314
315            let mut copy_amt = copy_bytes_partial(&mut self.left, dst);
316
317            if copy_amt == 0 {
318                copy_amt = copy_bytes_partial(&mut self.right, dst)
319            };
320
321            Ok(copy_amt)
322        }
323    }
324
325    #[test]
326    fn test_split_boundary() {
327        ::init_log();
328
329        debug!("Testing boundary (split)");
330
331        let mut buf = String::new();
332        
333        // Substitute for `.step_by()` being unstable.
334        for split_at in 0 .. TEST_VAL.len(){
335            debug!("Testing split at: {}", split_at);
336
337            let src = SplitReader::split(TEST_VAL.as_bytes(), split_at);
338            let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
339            test_boundary_reader(&mut reader, &mut buf);
340        }
341    }
342
343    fn test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String) {
344        buf.clear();
345
346        debug!("Read 1");
347        let _ = reader.read_to_string(buf).unwrap();
348        assert!(buf.is_empty(), "Buffer not empty: {:?}", buf);
349        buf.clear();
350
351        debug!("Consume 1");
352        reader.consume_boundary().unwrap();
353
354        debug!("Read 2");
355        let _ = reader.read_to_string(buf).unwrap();
356        assert_eq!(buf, "dashed-value-1");
357        buf.clear();
358
359        debug!("Consume 2");
360        reader.consume_boundary().unwrap();
361
362        debug!("Read 3");
363        let _ = reader.read_to_string(buf).unwrap();
364        assert_eq!(buf, "dashed-value-2");
365        buf.clear();
366
367        debug!("Consume 3");
368        reader.consume_boundary().unwrap();
369
370        debug!("Read 4");
371        let _ = reader.read_to_string(buf).unwrap();
372        assert_eq!(buf, "");
373    }
374
375    #[test]
376    fn test_empty_body() {
377        ::init_log();
378
379        // empty body contains closing boundary only
380        let mut body: &[u8] = b"--boundary--";
381
382        let ref mut buf = String::new();
383        let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
384
385        debug!("Consume 1");
386        assert_eq!(reader.consume_boundary().unwrap(), false);
387
388        debug!("Read 1");
389        let _ = reader.read_to_string(buf).unwrap();
390        assert_eq!(buf, "");
391        buf.clear();
392
393        debug!("Consume 2");
394        assert_eq!(reader.consume_boundary().unwrap(), false);
395    }
396
397    #[test]
398    fn test_leading_crlf() {
399        ::init_log();
400
401        let mut body: &[u8] = b"\r\n\r\n--boundary\r\n\
402                         asdf1234\
403                         \r\n\r\n--boundary--";
404
405        let ref mut buf = String::new();
406        let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
407
408
409        debug!("Consume 1");
410        assert_eq!(reader.consume_boundary().unwrap(), true);
411
412        debug!("Read 1");
413        let _ = reader.read_to_string(buf).unwrap();
414        assert_eq!(buf, "asdf1234\r\n");
415        buf.clear();
416
417        debug!("Consume 2");
418        assert_eq!(reader.consume_boundary().unwrap(), false);
419
420        debug!("Read 2 (empty)");
421        let _ = reader.read_to_string(buf).unwrap();
422        assert_eq!(buf, "");
423    }
424
425    #[test]
426    fn test_trailing_crlf() {
427        ::init_log();
428
429        let mut body: &[u8] = b"--boundary\r\n\
430                         asdf1234\
431                         \r\n\r\n--boundary\r\n\
432                         hjkl5678\r\n--boundary--";
433
434        let ref mut buf = String::new();
435        let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
436
437        debug!("Consume 1");
438        assert_eq!(reader.consume_boundary().unwrap(), true);
439
440        debug!("Read 1");
441
442        // Repro for https://github.com/abonander/multipart/issues/93
443        // These two reads should produce the same buffer
444        let buf1 = reader.read_to_boundary().unwrap().to_owned();
445        let buf2 = reader.read_to_boundary().unwrap().to_owned();
446        assert_eq!(buf1, buf2);
447
448        let _ = reader.read_to_string(buf).unwrap();
449        assert_eq!(buf, "asdf1234\r\n");
450        buf.clear();
451
452        debug!("Consume 2");
453        assert_eq!(reader.consume_boundary().unwrap(), true);
454
455        debug!("Read 2");
456        let _ = reader.read_to_string(buf).unwrap();
457        assert_eq!(buf, "hjkl5678");
458        buf.clear();
459
460        debug!("Consume 3");
461        assert_eq!(reader.consume_boundary().unwrap(), false);
462
463        debug!("Read 3 (empty)");
464        let _ = reader.read_to_string(buf).unwrap();
465        assert_eq!(buf, "");
466    }
467
468    // https://github.com/abonander/multipart/issues/93#issuecomment-343610587
469    #[test]
470    fn test_trailing_lflf() {
471        ::init_log();
472
473        let mut body: &[u8] = b"--boundary\r\n\
474                         asdf1234\
475                         \n\n\r\n--boundary\r\n\
476                         hjkl5678\r\n--boundary--";
477
478        let ref mut buf = String::new();
479        let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
480
481        debug!("Consume 1");
482        assert_eq!(reader.consume_boundary().unwrap(), true);
483
484        debug!("Read 1");
485
486        // same as above
487        let buf1 = reader.read_to_boundary().unwrap().to_owned();
488        let buf2 = reader.read_to_boundary().unwrap().to_owned();
489        assert_eq!(buf1, buf2);
490
491        let _ = reader.read_to_string(buf).unwrap();
492        assert_eq!(buf, "asdf1234\n\n");
493        buf.clear();
494
495        debug!("Consume 2");
496        assert_eq!(reader.consume_boundary().unwrap(), true);
497
498        debug!("Read 2");
499        let _ = reader.read_to_string(buf).unwrap();
500        assert_eq!(buf, "hjkl5678");
501        buf.clear();
502
503        debug!("Consume 3");
504        assert_eq!(reader.consume_boundary().unwrap(), false);
505
506        debug!("Read 3 (empty)");
507        let _ = reader.read_to_string(buf).unwrap();
508        assert_eq!(buf, "");
509    }
510
511    // https://github.com/abonander/multipart/issues/104
512    #[test]
513    fn test_unterminated_body() {
514        ::init_log();
515
516        let mut body: &[u8] = b"--boundary\r\n\
517                         asdf1234\
518                         \n\n\r\n--boundary\r\n\
519                         hjkl5678  ";
520
521        let ref mut buf = String::new();
522        let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
523
524        debug!("Consume 1");
525        assert_eq!(reader.consume_boundary().unwrap(), true);
526
527        debug!("Read 1");
528
529        // same as above
530        let buf1 = reader.read_to_boundary().unwrap().to_owned();
531        let buf2 = reader.read_to_boundary().unwrap().to_owned();
532        assert_eq!(buf1, buf2);
533
534        let _ = reader.read_to_string(buf).unwrap();
535        assert_eq!(buf, "asdf1234\n\n");
536        buf.clear();
537
538        debug!("Consume 2");
539        assert_eq!(reader.consume_boundary().unwrap(), true);
540
541        debug!("Read 2");
542        let _ = reader.read_to_string(buf).unwrap();
543        assert_eq!(buf, "hjkl5678  ");
544        buf.clear();
545
546        debug!("Consume 3 - expecting error");
547        reader.consume_boundary().unwrap_err();
548    }
549
550    #[test]
551    fn test_lone_boundary() {
552        let mut body: &[u8] = b"--boundary";
553        let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
554        reader.consume_boundary().unwrap_err();
555    }
556
557    #[test]
558    fn test_invalid_boundary() {
559        let mut body: &[u8] = b"--boundary\x00\x00";
560        let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
561        reader.consume_boundary().unwrap_err();
562    }
563
564    #[test]
565    fn test_skip_field() {
566        let mut body: &[u8] = b"--boundary\r\nfield1\r\n--boundary\r\nfield2\r\n--boundary--";
567        let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
568
569        assert_eq!(reader.consume_boundary().unwrap(), true);
570        // skip `field1`
571        assert_eq!(reader.consume_boundary().unwrap(), true);
572
573        let mut buf = String::new();
574        reader.read_to_string(&mut buf).unwrap();
575        assert_eq!(buf, "field2");
576
577        assert_eq!(reader.consume_boundary().unwrap(), false);
578    }
579
580    #[cfg(feature = "bench")]
581    mod bench {
582        extern crate test;
583        use self::test::Bencher;
584
585        use super::*;
586
587        #[bench]
588        fn bench_boundary_reader(b: &mut Bencher) {
589            let mut reader = BoundaryReader::new_with_bytes(TEST_VAL.as_bytes(), BOUNDARY);
590            let mut buf = String::with_capacity(256);
591
592            b.iter(|| {
593                reader.reset();
594                test_boundary_reader(&mut reader, &mut buf);
595            });
596        }
597    }
598}