deflate/
lib.rs

1//! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2//! compression algorithm in pure Rust.
3//!
4//! This library provides functions to compress data using the DEFLATE algorithm,
5//! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6//! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7//! The current implementation is still a bit lacking speed-wise compared to C-libraries
8//! like zlib and miniz.
9//!
10//! The deflate algorithm is an older compression algorithm that is still widely used today,
11//! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip`
12//! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13//! some extra metadata and a checksum to validate the integrity of the raw data.
14//!
15//! The deflate algorithm does not perform as well as newer algorithms used in file formats such as
16//! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17//! the `DEFLATE` format (with or without wrappers) is not required.
18//!
19//! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default
20//! but can be enabled with the `gzip` feature.
21//!
22//! As this library is still in development, the compression output may change slightly
23//! between versions.
24//!
25//!
26//! # Examples:
27//! ## Simple compression function:
28//! ``` rust
29//! use deflate::deflate_bytes;
30//!
31//! let data = b"Some data";
32//! let compressed = deflate_bytes(data);
33//! # let _ = compressed;
34//! ```
35//!
36//! ## Using a writer:
37//! ``` rust
38//! use std::io::Write;
39//!
40//! use deflate::Compression;
41//! use deflate::write::ZlibEncoder;
42//!
43//! let data = b"This is some test data";
44//! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45//! encoder.write_all(data).expect("Write error!");
46//! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47//! # let _ = compressed_data;
48//! ```
49
50#![forbid(unsafe_code)]
51#![cfg_attr(all(feature = "benchmarks", test), feature(test))]
52
53#[cfg(all(test, feature = "benchmarks"))]
54extern crate test as test_std;
55
56#[cfg(test)]
57extern crate miniz_oxide;
58
59extern crate adler32;
60#[cfg(feature = "gzip")]
61extern crate gzip_header;
62
63mod bit_reverse;
64mod bitstream;
65mod chained_hash_table;
66mod checksum;
67mod compress;
68mod compression_options;
69mod deflate_state;
70mod encoder_state;
71mod huffman_lengths;
72mod huffman_table;
73mod input_buffer;
74mod length_encode;
75mod lz77;
76mod lzvalue;
77mod matching;
78mod output_writer;
79mod rle;
80mod stored_block;
81#[cfg(test)]
82mod test_utils;
83mod writer;
84mod zlib;
85
86use std::io;
87use std::io::Write;
88
89#[cfg(feature = "gzip")]
90use gzip_header::Crc;
91#[cfg(feature = "gzip")]
92use gzip_header::GzBuilder;
93
94use crate::checksum::RollingChecksum;
95use crate::deflate_state::DeflateState;
96
97use crate::compress::Flush;
98pub use compression_options::{Compression, CompressionOptions, SpecialOptions};
99pub use lz77::MatchingType;
100
101use crate::writer::compress_until_done;
102
103/// Encoders implementing a `Write` interface.
104pub mod write {
105    #[cfg(feature = "gzip")]
106    pub use crate::writer::gzip::GzEncoder;
107    pub use crate::writer::{DeflateEncoder, ZlibEncoder};
108}
109
110fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
111    input: &[u8],
112    writer: &mut W,
113    mut checksum: RC,
114    compression_options: CompressionOptions,
115) -> io::Result<()> {
116    checksum.update_from_slice(input);
117    // We use a box here to avoid putting the buffers on the stack
118    // It's done here rather than in the structs themselves for now to
119    // keep the data close in memory.
120    let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
121    compress_until_done(input, &mut deflate_state, Flush::Finish)
122}
123
124/// Compress the given slice of bytes with DEFLATE compression.
125///
126/// Returns a `Vec<u8>` of the compressed data.
127///
128/// # Examples
129///
130/// ```
131/// use deflate::{deflate_bytes_conf, Compression};
132///
133/// let data = b"This is some test data";
134/// let compressed_data = deflate_bytes_conf(data, Compression::Best);
135/// # let _ = compressed_data;
136/// ```
137pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
138    let mut writer = Vec::with_capacity(input.len() / 3);
139    compress_data_dynamic(
140        input,
141        &mut writer,
142        checksum::NoChecksum::new(),
143        options.into(),
144    )
145    .expect("Write error!");
146    writer
147}
148
149/// Compress the given slice of bytes with DEFLATE compression using the default compression
150/// level.
151///
152/// Returns a `Vec<u8>` of the compressed data.
153///
154/// # Examples
155///
156/// ```
157/// use deflate::deflate_bytes;
158///
159/// let data = b"This is some test data";
160/// let compressed_data = deflate_bytes(data);
161/// # let _ = compressed_data;
162/// ```
163pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
164    deflate_bytes_conf(input, Compression::Default)
165}
166
167/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
168///
169/// Returns a `Vec<u8>` of the compressed data.
170///
171/// Zlib dictionaries are not yet suppored.
172///
173/// # Examples
174///
175/// ```
176/// use deflate::{deflate_bytes_zlib_conf, Compression};
177///
178/// let data = b"This is some test data";
179/// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
180/// # let _ = compressed_data;
181/// ```
182pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
183    let mut writer = Vec::with_capacity(input.len() / 3);
184    // Write header
185    zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
186        .expect("Write error when writing zlib header!");
187
188    let mut checksum = checksum::Adler32Checksum::new();
189    compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
190        .expect("Write error when writing compressed data!");
191
192    let hash = checksum.current_hash();
193
194    writer
195        .write_all(&hash.to_be_bytes())
196        .expect("Write error when writing checksum!");
197    writer
198}
199
200/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
201/// using the default compression level.
202///
203/// Returns a Vec<u8> of the compressed data.
204///
205/// Zlib dictionaries are not yet suppored.
206///
207/// # Examples
208///
209/// ```
210/// use deflate::deflate_bytes_zlib;
211///
212/// let data = b"This is some test data";
213/// let compressed_data = deflate_bytes_zlib(data);
214/// # let _ = compressed_data;
215/// ```
216pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
217    deflate_bytes_zlib_conf(input, Compression::Default)
218}
219
220/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
221/// using the given gzip header and compression options.
222///
223/// Returns a `Vec<u8>` of the compressed data.
224///
225///
226/// # Examples
227///
228/// ```
229/// extern crate gzip_header;
230/// extern crate deflate;
231///
232/// # fn main() {
233/// use deflate::{deflate_bytes_gzip_conf, Compression};
234/// use gzip_header::GzBuilder;
235///
236/// let data = b"This is some test data";
237/// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
238/// # let _ = compressed_data;
239/// # }
240/// ```
241#[cfg(feature = "gzip")]
242pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
243    input: &[u8],
244    options: O,
245    gzip_header: GzBuilder,
246) -> Vec<u8> {
247    let mut writer = Vec::with_capacity(input.len() / 3);
248
249    // Write header
250    writer
251        .write_all(&gzip_header.into_header())
252        .expect("Write error when writing header!");
253    let mut checksum = checksum::NoChecksum::new();
254    compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
255        .expect("Write error when writing compressed data!");
256
257    let mut crc = Crc::new();
258    crc.update(input);
259
260    writer
261        .write_all(&crc.sum().to_le_bytes())
262        .expect("Write error when writing checksum!");
263    writer
264        .write_all(&crc.amt_as_u32().to_le_bytes())
265        .expect("Write error when writing amt!");
266    writer
267}
268
269/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
270/// using the default compression level, and a gzip header with default values.
271///
272/// Returns a `Vec<u8>` of the compressed data.
273///
274///
275/// # Examples
276///
277/// ```
278/// use deflate::deflate_bytes_gzip;
279/// let data = b"This is some test data";
280/// let compressed_data = deflate_bytes_gzip(data);
281/// # let _ = compressed_data;
282/// ```
283#[cfg(feature = "gzip")]
284pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
285    deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
286}
287
288#[cfg(test)]
289mod test {
290    use super::*;
291    use std::io::Write;
292
293    #[cfg(feature = "gzip")]
294    use test_utils::decompress_gzip;
295    use test_utils::{decompress_to_end, decompress_zlib, get_test_data};
296
297    type CO = CompressionOptions;
298
299    /// Write data to the writer in chunks of chunk_size.
300    fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
301        for chunk in data.chunks(chunk_size) {
302            writer.write_all(&chunk).unwrap();
303        }
304    }
305
306    #[test]
307    fn dynamic_string_mem() {
308        let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
309        let compressed = deflate_bytes(&test_data);
310
311        assert!(compressed.len() < test_data.len());
312
313        let result = decompress_to_end(&compressed);
314        assert_eq!(test_data, result);
315    }
316
317    #[test]
318    fn dynamic_string_file() {
319        let input = get_test_data();
320        let compressed = deflate_bytes(&input);
321
322        let result = decompress_to_end(&compressed);
323        for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
324            if a != b {
325                println!("First difference at {}, input: {}, output: {}", n, a, b);
326                println!(
327                    "input: {:?}, output: {:?}",
328                    &input[n - 3..n + 3],
329                    &result[n - 3..n + 3]
330                );
331                break;
332            }
333        }
334        // Not using assert_eq here deliberately to avoid massive amounts of output spam
335        assert!(input == result);
336        // Check that we actually managed to compress the input
337        assert!(compressed.len() < input.len());
338    }
339
340    #[test]
341    fn file_rle() {
342        let input = get_test_data();
343        let compressed = deflate_bytes_conf(&input, CO::rle());
344
345        let result = decompress_to_end(&compressed);
346        assert!(input == result);
347    }
348
349    #[test]
350    fn file_zlib() {
351        let test_data = get_test_data();
352
353        let compressed = deflate_bytes_zlib(&test_data);
354        // {
355        //     use std::fs::File;
356        //     use std::io::Write;
357        //     let mut f = File::create("out.zlib").unwrap();
358        //     f.write_all(&compressed).unwrap();
359        // }
360
361        println!("file_zlib compressed(default) length: {}", compressed.len());
362
363        let result = decompress_zlib(&compressed);
364
365        assert!(&test_data == &result);
366        assert!(compressed.len() < test_data.len());
367    }
368
369    #[test]
370    fn zlib_short() {
371        let test_data = [10, 10, 10, 10, 10, 55];
372        roundtrip_zlib(&test_data, CO::default());
373    }
374
375    #[test]
376    fn zlib_last_block() {
377        let mut test_data = vec![22; 32768];
378        test_data.extend(&[5, 2, 55, 11, 12]);
379        roundtrip_zlib(&test_data, CO::default());
380    }
381
382    #[test]
383    fn deflate_short() {
384        let test_data = [10, 10, 10, 10, 10, 55];
385        let compressed = deflate_bytes(&test_data);
386
387        let result = decompress_to_end(&compressed);
388        assert_eq!(&test_data, result.as_slice());
389        // If block type and compression is selected correctly, this should only take 5 bytes.
390        assert_eq!(compressed.len(), 5);
391    }
392
393    #[cfg(feature = "gzip")]
394    #[test]
395    fn gzip() {
396        let data = get_test_data();
397        let comment = b"Test";
398        let compressed = deflate_bytes_gzip_conf(
399            &data,
400            Compression::Default,
401            GzBuilder::new().comment(&comment[..]),
402        );
403        let (dec, decompressed) = decompress_gzip(&compressed);
404        assert_eq!(dec.comment().unwrap(), comment);
405        assert!(data == decompressed);
406    }
407
408    fn chunk_test(chunk_size: usize, level: CompressionOptions) {
409        let mut compressed = Vec::with_capacity(32000);
410        let data = get_test_data();
411        {
412            let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
413            chunked_write(&mut compressor, &data, chunk_size);
414            compressor.finish().unwrap();
415        }
416        let compressed2 = deflate_bytes_zlib_conf(&data, level);
417        let res = decompress_zlib(&compressed);
418        assert!(res == data);
419        assert_eq!(compressed.len(), compressed2.len());
420        assert!(compressed == compressed2);
421    }
422
423    fn writer_chunks_level(level: CompressionOptions) {
424        use input_buffer::BUFFER_SIZE;
425        let ct = |n| chunk_test(n, level);
426        ct(1);
427        ct(50);
428        ct(400);
429        ct(32768);
430        ct(BUFFER_SIZE);
431        ct(50000);
432        ct((32768 * 2) + 258);
433    }
434
435    #[ignore]
436    #[test]
437    /// Test the writer by inputing data in one chunk at the time.
438    fn zlib_writer_chunks() {
439        writer_chunks_level(CompressionOptions::default());
440        writer_chunks_level(CompressionOptions::fast());
441        writer_chunks_level(CompressionOptions::rle());
442    }
443
444    /// Check that the frequency values don't overflow.
445    #[test]
446    fn frequency_overflow() {
447        let _ = deflate_bytes_conf(
448            &vec![5; 100000],
449            compression_options::CompressionOptions::default(),
450        );
451    }
452
453    fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
454        let compressed = deflate_bytes_zlib_conf(data, level);
455        let res = decompress_zlib(&compressed);
456        if data.len() <= 32 {
457            assert_eq!(res, data, "Failed with level: {:?}", level);
458        } else {
459            assert!(res == data, "Failed with level: {:?}", level);
460        }
461    }
462
463    fn check_zero(level: CompressionOptions) {
464        roundtrip_zlib(&[], level);
465    }
466
467    /// Compress with an empty slice.
468    #[test]
469    fn empty_input() {
470        check_zero(CompressionOptions::default());
471        check_zero(CompressionOptions::fast());
472        check_zero(CompressionOptions::rle());
473    }
474
475    #[test]
476    fn one_and_two_values() {
477        let one = &[1][..];
478        roundtrip_zlib(one, CO::rle());
479        roundtrip_zlib(one, CO::fast());
480        roundtrip_zlib(one, CO::default());
481        let two = &[5, 6, 7, 8][..];
482        roundtrip_zlib(two, CO::rle());
483        roundtrip_zlib(two, CO::fast());
484        roundtrip_zlib(two, CO::default());
485    }
486}