deflate/
writer.rs

1use std::io::Write;
2use std::{io, thread};
3
4use crate::checksum::{Adler32Checksum, RollingChecksum};
5use crate::compress::compress_data_dynamic_n;
6use crate::compress::Flush;
7use crate::compression_options::CompressionOptions;
8use crate::deflate_state::DeflateState;
9use crate::zlib::{write_zlib_header, CompressionLevel};
10
11const ERR_STR: &str = "Error! The wrapped writer is missing.\
12                       This is a bug, please file an issue.";
13
14/// Keep compressing until all the input has been compressed and output or the writer returns `Err`.
15pub fn compress_until_done<W: Write>(
16    mut input: &[u8],
17    deflate_state: &mut DeflateState<W>,
18    flush_mode: Flush,
19) -> io::Result<()> {
20    // This should only be used for flushing.
21    assert!(flush_mode != Flush::None);
22    loop {
23        match compress_data_dynamic_n(input, deflate_state, flush_mode) {
24            Ok(0) => {
25                if deflate_state.output_buf().is_empty() {
26                    break;
27                } else {
28                    // If the output buffer isn't empty, keep going until it is, as there is still
29                    // data to be flushed.
30                    input = &[];
31                }
32            }
33            Ok(n) => {
34                if n < input.len() {
35                    input = &input[n..]
36                } else {
37                    input = &[];
38                }
39            }
40            Err(e) => {
41                match e.kind() {
42                    // This error means that there may still be data to flush.
43                    // This could possibly get stuck if the underlying writer keeps returning this
44                    // error.
45                    io::ErrorKind::Interrupted => (),
46                    _ => return Err(e),
47                }
48            }
49        }
50    }
51
52    debug_assert_eq!(
53        deflate_state.bytes_written,
54        deflate_state.bytes_written_control.get()
55    );
56
57    Ok(())
58}
59
60/// A DEFLATE encoder/compressor.
61///
62/// A struct implementing a [`Write`] interface that takes arbitrary data and compresses it to
63/// the provided writer using DEFLATE compression.
64///
65/// # Examples
66///
67/// ```rust
68/// # use std::io;
69/// #
70/// # fn try_main() -> io::Result<Vec<u8>> {
71/// #
72/// use std::io::Write;
73///
74/// use deflate::Compression;
75/// use deflate::write::DeflateEncoder;
76///
77/// let data = b"This is some test data";
78/// let mut encoder = DeflateEncoder::new(Vec::new(), Compression::Default);
79/// encoder.write_all(data)?;
80/// let compressed_data = encoder.finish()?;
81/// # Ok(compressed_data)
82/// #
83/// # }
84/// # fn main() {
85/// #     try_main().unwrap();
86/// # }
87/// ```
88/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
89pub struct DeflateEncoder<W: Write> {
90    deflate_state: DeflateState<W>,
91}
92
93impl<W: Write> DeflateEncoder<W> {
94    /// Creates a new encoder using the provided compression options.
95    pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> DeflateEncoder<W> {
96        DeflateEncoder {
97            deflate_state: DeflateState::new(options.into(), writer),
98        }
99    }
100
101    /// Encode all pending data to the contained writer, consume this `DeflateEncoder`,
102    /// and return the contained writer if writing succeeds.
103    pub fn finish(mut self) -> io::Result<W> {
104        self.output_all()?;
105        // We have to move the inner writer out of the encoder, and replace it with `None`
106        // to let the `DeflateEncoder` drop safely.
107        Ok(self.deflate_state.inner.take().expect(ERR_STR))
108    }
109
110    /// Resets the encoder (except the compression options), replacing the current writer
111    /// with a new one, returning the old one.
112    pub fn reset(&mut self, w: W) -> io::Result<W> {
113        self.output_all()?;
114        self.deflate_state.reset(w)
115    }
116
117    /// Output all pending data as if encoding is done, but without resetting anything
118    fn output_all(&mut self) -> io::Result<()> {
119        compress_until_done(&[], &mut self.deflate_state, Flush::Finish)
120    }
121}
122
123impl<W: Write> io::Write for DeflateEncoder<W> {
124    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
125        let flush_mode = self.deflate_state.flush_mode;
126        compress_data_dynamic_n(buf, &mut self.deflate_state, flush_mode)
127    }
128
129    /// Flush the encoder.
130    ///
131    /// This will flush the encoder, emulating the Sync flush method from Zlib.
132    /// This essentially finishes the current block, and sends an additional empty stored block to
133    /// the writer.
134    fn flush(&mut self) -> io::Result<()> {
135        compress_until_done(&[], &mut self.deflate_state, Flush::Sync)
136    }
137}
138
139impl<W: Write> Drop for DeflateEncoder<W> {
140    /// When the encoder is dropped, output the rest of the data.
141    ///
142    /// WARNING: This may silently fail if writing fails, so using this to finish encoding
143    /// for writers where writing might fail is not recommended, for that call
144    /// [`finish()`](#method.finish) instead.
145    fn drop(&mut self) {
146        // Not sure if implementing drop is a good idea or not, but we follow flate2 for now.
147        // We only do this if we are not panicking, to avoid a double panic.
148        if self.deflate_state.inner.is_some() && !thread::panicking() {
149            let _ = self.output_all();
150        }
151    }
152}
153
154/// A Zlib encoder/compressor.
155///
156/// A struct implementing a [`Write`] interface that takes arbitrary data and compresses it to
157/// the provided writer using DEFLATE compression with Zlib headers and trailers.
158///
159/// # Examples
160///
161/// ```rust
162/// # use std::io;
163/// #
164/// # fn try_main() -> io::Result<Vec<u8>> {
165/// #
166/// use std::io::Write;
167///
168/// use deflate::Compression;
169/// use deflate::write::ZlibEncoder;
170///
171/// let data = b"This is some test data";
172/// let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
173/// encoder.write_all(data)?;
174/// let compressed_data = encoder.finish()?;
175/// # Ok(compressed_data)
176/// #
177/// # }
178/// # fn main() {
179/// #     try_main().unwrap();
180/// # }
181/// ```
182/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
183pub struct ZlibEncoder<W: Write> {
184    deflate_state: DeflateState<W>,
185    checksum: Adler32Checksum,
186    header_written: bool,
187}
188
189impl<W: Write> ZlibEncoder<W> {
190    /// Create a new `ZlibEncoder` using the provided compression options.
191    pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> ZlibEncoder<W> {
192        ZlibEncoder {
193            deflate_state: DeflateState::new(options.into(), writer),
194            checksum: Adler32Checksum::new(),
195            header_written: false,
196        }
197    }
198
199    /// Output all pending data ,including the trailer(checksum) as if encoding is done,
200    /// but without resetting anything.
201    fn output_all(&mut self) -> io::Result<()> {
202        self.check_write_header()?;
203        compress_until_done(&[], &mut self.deflate_state, Flush::Finish)?;
204        self.write_trailer()
205    }
206
207    /// Encode all pending data to the contained writer, consume this `ZlibEncoder`,
208    /// and return the contained writer if writing succeeds.
209    pub fn finish(mut self) -> io::Result<W> {
210        self.output_all()?;
211        // We have to move the inner writer out of the encoder, and replace it with `None`
212        // to let the `DeflateEncoder` drop safely.
213        Ok(self.deflate_state.inner.take().expect(ERR_STR))
214    }
215
216    /// Resets the encoder (except the compression options), replacing the current writer
217    /// with a new one, returning the old one.
218    pub fn reset(&mut self, writer: W) -> io::Result<W> {
219        self.output_all()?;
220        self.header_written = false;
221        self.checksum = Adler32Checksum::new();
222        self.deflate_state.reset(writer)
223    }
224
225    /// Check if a zlib header should be written.
226    fn check_write_header(&mut self) -> io::Result<()> {
227        if !self.header_written {
228            write_zlib_header(self.deflate_state.output_buf(), CompressionLevel::Default)?;
229            self.header_written = true;
230        }
231        Ok(())
232    }
233
234    /// Write the trailer, which for zlib is the Adler32 checksum.
235    fn write_trailer(&mut self) -> io::Result<()> {
236        let hash = self.checksum.current_hash();
237
238        self.deflate_state
239            .inner
240            .as_mut()
241            .expect(ERR_STR)
242            .write_all(&hash.to_be_bytes())?;
243
244        Ok(())
245    }
246
247    /// Return the adler32 checksum of the currently consumed data.
248    pub fn checksum(&self) -> u32 {
249        self.checksum.current_hash()
250    }
251}
252
253impl<W: Write> io::Write for ZlibEncoder<W> {
254    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
255        self.check_write_header()?;
256        let flush_mode = self.deflate_state.flush_mode;
257        let res = compress_data_dynamic_n(buf, &mut self.deflate_state, flush_mode);
258        match res {
259            // If this is returned, the whole buffer was consumed
260            Ok(0) => self.checksum.update_from_slice(buf),
261            // Otherwise, only part of it was consumed, so only that part
262            // added to the checksum.
263            Ok(n) => self.checksum.update_from_slice(&buf[0..n]),
264            _ => (),
265        };
266        res
267    }
268
269    /// Flush the encoder.
270    ///
271    /// This will flush the encoder, emulating the Sync flush method from Zlib.
272    /// This essentially finishes the current block, and sends an additional empty stored block to
273    /// the writer.
274    fn flush(&mut self) -> io::Result<()> {
275        compress_until_done(&[], &mut self.deflate_state, Flush::Sync)
276    }
277}
278
279impl<W: Write> Drop for ZlibEncoder<W> {
280    /// When the encoder is dropped, output the rest of the data.
281    ///
282    /// WARNING: This may silently fail if writing fails, so using this to finish encoding
283    /// for writers where writing might fail is not recommended, for that call
284    /// [`finish()`](#method.finish) instead.
285    fn drop(&mut self) {
286        if self.deflate_state.inner.is_some() && !thread::panicking() {
287            let _ = self.output_all();
288        }
289    }
290}
291
292#[cfg(feature = "gzip")]
293pub mod gzip {
294
295    use std::io::{Cursor, Write};
296    use std::{io, thread};
297
298    use super::*;
299
300    use gzip_header::{Crc, GzBuilder};
301
302    /// A Gzip encoder/compressor.
303    ///
304    /// A struct implementing a [`Write`] interface that takes arbitrary data and compresses it to
305    /// the provided writer using DEFLATE compression with Gzip headers and trailers.
306    ///
307    /// # Examples
308    ///
309    /// ```rust
310    /// # use std::io;
311    /// #
312    /// # fn try_main() -> io::Result<Vec<u8>> {
313    /// #
314    /// use std::io::Write;
315    ///
316    /// use deflate::Compression;
317    /// use deflate::write::GzEncoder;
318    ///
319    /// let data = b"This is some test data";
320    /// let mut encoder = GzEncoder::new(Vec::new(), Compression::Default);
321    /// encoder.write_all(data)?;
322    /// let compressed_data = encoder.finish()?;
323    /// # Ok(compressed_data)
324    /// #
325    /// # }
326    /// # fn main() {
327    /// #     try_main().unwrap();
328    /// # }
329    /// ```
330    /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
331    pub struct GzEncoder<W: Write> {
332        inner: DeflateEncoder<W>,
333        checksum: Crc,
334        header: Vec<u8>,
335    }
336
337    impl<W: Write> GzEncoder<W> {
338        /// Create a new `GzEncoder` writing deflate-compressed data to the underlying writer when
339        /// written to, wrapped in a gzip header and trailer. The header details will be blank.
340        pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> GzEncoder<W> {
341            GzEncoder::from_builder(GzBuilder::new(), writer, options)
342        }
343
344        /// Create a new GzEncoder from the provided `GzBuilder`. This allows customising
345        /// the details of the header, such as the filename and comment fields.
346        pub fn from_builder<O: Into<CompressionOptions>>(
347            builder: GzBuilder,
348            writer: W,
349            options: O,
350        ) -> GzEncoder<W> {
351            GzEncoder {
352                inner: DeflateEncoder::new(writer, options),
353                checksum: Crc::new(),
354                header: builder.into_header(),
355            }
356        }
357
358        /// Write header to the output buffer if it hasn't been done yet.
359        fn check_write_header(&mut self) {
360            if !self.header.is_empty() {
361                self.inner
362                    .deflate_state
363                    .output_buf()
364                    .extend_from_slice(&self.header);
365                self.header.clear();
366            }
367        }
368
369        /// Output all pending data ,including the trailer(checksum + count) as if encoding is done.
370        /// but without resetting anything.
371        fn output_all(&mut self) -> io::Result<()> {
372            self.check_write_header();
373            self.inner.output_all()?;
374            self.write_trailer()
375        }
376
377        /// Encode all pending data to the contained writer, consume this `GzEncoder`,
378        /// and return the contained writer if writing succeeds.
379        pub fn finish(mut self) -> io::Result<W> {
380            self.output_all()?;
381            // We have to move the inner writer out of the encoder, and replace it with `None`
382            // to let the `DeflateEncoder` drop safely.
383            Ok(self.inner.deflate_state.inner.take().expect(ERR_STR))
384        }
385
386        fn reset_no_header(&mut self, writer: W) -> io::Result<W> {
387            self.output_all()?;
388            self.checksum = Crc::new();
389            self.inner.deflate_state.reset(writer)
390        }
391
392        /// Resets the encoder (except the compression options), replacing the current writer
393        /// with a new one, returning the old one. (Using a blank header).
394        pub fn reset(&mut self, writer: W) -> io::Result<W> {
395            let w = self.reset_no_header(writer);
396            self.header = GzBuilder::new().into_header();
397            w
398        }
399
400        /// Resets the encoder (except the compression options), replacing the current writer
401        /// with a new one, returning the old one, and using the provided `GzBuilder` to
402        /// create the header.
403        pub fn reset_with_builder(&mut self, writer: W, builder: GzBuilder) -> io::Result<W> {
404            let w = self.reset_no_header(writer);
405            self.header = builder.into_header();
406            w
407        }
408
409        /// Write the checksum and number of bytes mod 2^32 to the output writer.
410        fn write_trailer(&mut self) -> io::Result<()> {
411            let crc = self.checksum.sum();
412            let amount = self.checksum.amt_as_u32();
413
414            // We use a buffer here to make sure we don't end up writing only half the header if
415            // writing fails.
416            let mut buf = [0u8; 8];
417            let mut temp = Cursor::new(&mut buf[..]);
418            temp.write_all(&crc.to_le_bytes()).unwrap();
419            temp.write_all(&amount.to_le_bytes()).unwrap();
420            self.inner
421                .deflate_state
422                .inner
423                .as_mut()
424                .expect(ERR_STR)
425                .write_all(temp.into_inner())
426        }
427
428        /// Get the crc32 checksum of the data consumed so far.
429        pub fn checksum(&self) -> u32 {
430            self.checksum.sum()
431        }
432    }
433
434    impl<W: Write> io::Write for GzEncoder<W> {
435        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
436            self.check_write_header();
437            let res = self.inner.write(buf);
438            match res {
439                Ok(0) => self.checksum.update(buf),
440                Ok(n) => self.checksum.update(&buf[0..n]),
441                _ => (),
442            };
443            res
444        }
445
446        /// Flush the encoder.
447        ///
448        /// This will flush the encoder, emulating the Sync flush method from Zlib.
449        /// This essentially finishes the current block, and sends an additional empty stored
450        /// block to the writer.
451        fn flush(&mut self) -> io::Result<()> {
452            self.inner.flush()
453        }
454    }
455
456    impl<W: Write> Drop for GzEncoder<W> {
457        /// When the encoder is dropped, output the rest of the data.
458        ///
459        /// WARNING: This may silently fail if writing fails, so using this to finish encoding
460        /// for writers where writing might fail is not recommended, for that call
461        /// [`finish()`](#method.finish) instead.
462        fn drop(&mut self) {
463            if self.inner.deflate_state.inner.is_some() && !thread::panicking() {
464                let _ = self.output_all();
465            }
466        }
467    }
468
469    #[cfg(test)]
470    mod test {
471        use super::*;
472        use crate::test_utils::{decompress_gzip, get_test_data};
473        #[test]
474        fn gzip_writer() {
475            let data = get_test_data();
476            let comment = b"Comment";
477            let compressed = {
478                let mut compressor = GzEncoder::from_builder(
479                    GzBuilder::new().comment(&comment[..]),
480                    Vec::with_capacity(data.len() / 3),
481                    CompressionOptions::default(),
482                );
483                compressor.write_all(&data[0..data.len() / 2]).unwrap();
484                compressor.write_all(&data[data.len() / 2..]).unwrap();
485                compressor.finish().unwrap()
486            };
487
488            let (dec, res) = decompress_gzip(&compressed);
489            assert_eq!(dec.comment().unwrap(), comment);
490            assert!(res == data);
491        }
492    }
493}
494
495#[cfg(test)]
496mod test {
497    use super::*;
498    use crate::compression_options::CompressionOptions;
499    use crate::test_utils::{decompress_to_end, decompress_zlib, get_test_data};
500    use std::io::Write;
501
502    #[test]
503    fn deflate_writer() {
504        let data = get_test_data();
505        let compressed = {
506            let mut compressor = DeflateEncoder::new(
507                Vec::with_capacity(data.len() / 3),
508                CompressionOptions::high(),
509            );
510            // Write in multiple steps to see if this works as it's supposed to.
511            compressor.write_all(&data[0..data.len() / 2]).unwrap();
512            compressor.write_all(&data[data.len() / 2..]).unwrap();
513            compressor.finish().unwrap()
514        };
515
516        let res = decompress_to_end(&compressed);
517        assert!(res == data);
518    }
519
520    #[test]
521    fn zlib_writer() {
522        let data = get_test_data();
523        let compressed = {
524            let mut compressor = ZlibEncoder::new(
525                Vec::with_capacity(data.len() / 3),
526                CompressionOptions::high(),
527            );
528            compressor.write_all(&data[0..data.len() / 2]).unwrap();
529            compressor.write_all(&data[data.len() / 2..]).unwrap();
530            compressor.finish().unwrap()
531        };
532
533        let res = decompress_zlib(&compressed);
534        assert!(res == data);
535    }
536
537    #[test]
538    /// Check if the result of compressing after resetting is the same as before.
539    fn writer_reset() {
540        let data = get_test_data();
541        let mut compressor = DeflateEncoder::new(
542            Vec::with_capacity(data.len() / 3),
543            CompressionOptions::default(),
544        );
545        compressor.write_all(&data).unwrap();
546        let res1 = compressor
547            .reset(Vec::with_capacity(data.len() / 3))
548            .unwrap();
549        compressor.write_all(&data).unwrap();
550        let res2 = compressor.finish().unwrap();
551        assert!(res1 == res2);
552    }
553
554    #[test]
555    fn writer_reset_zlib() {
556        let data = get_test_data();
557        let mut compressor = ZlibEncoder::new(
558            Vec::with_capacity(data.len() / 3),
559            CompressionOptions::default(),
560        );
561        compressor.write_all(&data).unwrap();
562        let res1 = compressor
563            .reset(Vec::with_capacity(data.len() / 3))
564            .unwrap();
565        compressor.write_all(&data).unwrap();
566        let res2 = compressor.finish().unwrap();
567        assert!(res1 == res2);
568    }
569
570    #[test]
571    fn writer_sync() {
572        let data = get_test_data();
573        let compressed = {
574            let mut compressor = DeflateEncoder::new(
575                Vec::with_capacity(data.len() / 3),
576                CompressionOptions::default(),
577            );
578            let split = data.len() / 2;
579            compressor.write_all(&data[..split]).unwrap();
580            compressor.flush().unwrap();
581            {
582                let buf = &mut compressor.deflate_state.inner.as_mut().unwrap();
583                let buf_len = buf.len();
584                // Check for the sync marker. (excluding the header as it might not line
585                // up with the byte boundary.)
586                assert_eq!(buf[buf_len - 4..], [0, 0, 255, 255]);
587            }
588            compressor.write_all(&data[split..]).unwrap();
589            compressor.finish().unwrap()
590        };
591
592        let decompressed = decompress_to_end(&compressed);
593
594        assert!(decompressed == data);
595    }
596
597    #[test]
598    /// Make sure compression works with the writer when the input is between 1 and 2 window sizes.
599    fn issue_18() {
600        use crate::compression_options::Compression;
601        let data = vec![0; 61000];
602        let compressed = {
603            let mut compressor = ZlibEncoder::new(Vec::new(), Compression::Default);
604            compressor.write_all(&data[..]).unwrap();
605            compressor.finish().unwrap()
606        };
607        let decompressed = decompress_zlib(&compressed);
608        assert!(decompressed == data);
609    }
610
611    #[test]
612    fn writer_sync_multiple() {
613        use std::cmp;
614        let data = get_test_data();
615        let compressed = {
616            let mut compressor = DeflateEncoder::new(
617                Vec::with_capacity(data.len() / 3),
618                CompressionOptions::default(),
619            );
620            let split = data.len() / 2;
621            compressor.write_all(&data[..split]).unwrap();
622            compressor.flush().unwrap();
623            compressor.flush().unwrap();
624            {
625                let buf = &mut compressor.deflate_state.inner.as_mut().unwrap();
626                let buf_len = buf.len();
627                // Check for the sync marker. (excluding the header as it might not line
628                // up with the byte boundary.)
629                assert_eq!(buf[buf_len - 4..], [0, 0, 255, 255]);
630            }
631            compressor
632                .write_all(&data[split..cmp::min(split + 2, data.len())])
633                .unwrap();
634            compressor.flush().unwrap();
635            compressor
636                .write_all(&data[cmp::min(split + 2, data.len())..])
637                .unwrap();
638            compressor.finish().unwrap()
639        };
640
641        let decompressed = decompress_to_end(&compressed);
642
643        assert!(decompressed == data);
644
645        let mut compressor = DeflateEncoder::new(
646            Vec::with_capacity(data.len() / 3),
647            CompressionOptions::default(),
648        );
649
650        compressor.flush().unwrap();
651        compressor.write_all(&[1, 2]).unwrap();
652        compressor.flush().unwrap();
653        compressor.write_all(&[3]).unwrap();
654        compressor.flush().unwrap();
655        let compressed = compressor.finish().unwrap();
656
657        let decompressed = decompress_to_end(&compressed);
658
659        assert_eq!(decompressed, [1, 2, 3]);
660    }
661}