fdeflate/
compress.rs

1use simd_adler32::Adler32;
2use std::io::{self, Seek, SeekFrom, Write};
3
4use crate::tables::{
5    BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL,
6};
7
8/// Compressor that produces fdeflate compressed streams.
9pub struct Compressor<W: Write> {
10    checksum: Adler32,
11    buffer: u64,
12    nbits: u8,
13    writer: W,
14}
15impl<W: Write> Compressor<W> {
16    fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> {
17        debug_assert!(nbits <= 64);
18
19        self.buffer |= bits << self.nbits;
20        self.nbits += nbits;
21
22        if self.nbits >= 64 {
23            self.writer.write_all(&self.buffer.to_le_bytes())?;
24            self.nbits -= 64;
25            self.buffer = bits.checked_shr((nbits - self.nbits) as u32).unwrap_or(0);
26        }
27        debug_assert!(self.nbits < 64);
28        Ok(())
29    }
30
31    fn flush(&mut self) -> io::Result<()> {
32        if self.nbits % 8 != 0 {
33            self.write_bits(0, 8 - self.nbits % 8)?;
34        }
35        if self.nbits > 0 {
36            self.writer
37                .write_all(&self.buffer.to_le_bytes()[..self.nbits as usize / 8])
38                .unwrap();
39            self.buffer = 0;
40            self.nbits = 0;
41        }
42        Ok(())
43    }
44
45    fn write_run(&mut self, mut run: u32) -> io::Result<()> {
46        self.write_bits(HUFFMAN_CODES[0] as u64, HUFFMAN_LENGTHS[0])?;
47        run -= 1;
48
49        while run >= 258 {
50            self.write_bits(HUFFMAN_CODES[285] as u64, HUFFMAN_LENGTHS[285] + 1)?;
51            run -= 258;
52        }
53
54        if run > 4 {
55            let sym = LENGTH_TO_SYMBOL[run as usize - 3] as usize;
56            self.write_bits(HUFFMAN_CODES[sym] as u64, HUFFMAN_LENGTHS[sym])?;
57
58            let len_extra = LENGTH_TO_LEN_EXTRA[run as usize - 3];
59            let extra = ((run - 3) & BITMASKS[len_extra as usize]) as u64;
60            self.write_bits(extra, len_extra + 1)?;
61        } else {
62            debug_assert_eq!(HUFFMAN_CODES[0], 0);
63            self.write_bits(0, run as u8 * HUFFMAN_LENGTHS[0])?;
64        }
65
66        Ok(())
67    }
68
69    /// Create a new Compressor.
70    pub fn new(writer: W) -> io::Result<Self> {
71        let mut compressor = Self {
72            checksum: Adler32::new(),
73            buffer: 0,
74            nbits: 0,
75            writer,
76        };
77        compressor.write_headers()?;
78        Ok(compressor)
79    }
80
81    fn write_headers(&mut self) -> io::Result<()> {
82        const HEADER: [u8; 54] = [
83            120, 1, 237, 192, 3, 160, 36, 89, 150, 198, 241, 255, 119, 238, 141, 200, 204, 167,
84            114, 75, 99, 174, 109, 219, 182, 109, 219, 182, 109, 219, 182, 109, 105, 140, 158, 150,
85            74, 175, 158, 50, 51, 34, 238, 249, 118, 183, 106, 122, 166, 135, 59, 107, 213, 15,
86        ];
87        self.writer.write_all(&HEADER[..53]).unwrap();
88        self.write_bits(HEADER[53] as u64, 5)?;
89
90        Ok(())
91    }
92
93    /// Write data to the compressor.
94    pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> {
95        self.checksum.write(data);
96
97        let mut run = 0;
98        let mut chunks = data.chunks_exact(8);
99        for chunk in &mut chunks {
100            let ichunk = u64::from_le_bytes(chunk.try_into().unwrap());
101
102            if ichunk == 0 {
103                run += 8;
104                continue;
105            } else if run > 0 {
106                let run_extra = ichunk.trailing_zeros() / 8;
107                self.write_run(run + run_extra)?;
108                run = 0;
109
110                if run_extra > 0 {
111                    run = ichunk.leading_zeros() / 8;
112                    for &b in &chunk[run_extra as usize..8 - run as usize] {
113                        self.write_bits(
114                            HUFFMAN_CODES[b as usize] as u64,
115                            HUFFMAN_LENGTHS[b as usize],
116                        )?;
117                    }
118                    continue;
119                }
120            }
121
122            let run_start = ichunk.leading_zeros() / 8;
123            if run_start > 0 {
124                for &b in &chunk[..8 - run_start as usize] {
125                    self.write_bits(
126                        HUFFMAN_CODES[b as usize] as u64,
127                        HUFFMAN_LENGTHS[b as usize],
128                    )?;
129                }
130                run = run_start;
131                continue;
132            }
133
134            let n0 = HUFFMAN_LENGTHS[chunk[0] as usize];
135            let n1 = HUFFMAN_LENGTHS[chunk[1] as usize];
136            let n2 = HUFFMAN_LENGTHS[chunk[2] as usize];
137            let n3 = HUFFMAN_LENGTHS[chunk[3] as usize];
138            let bits = HUFFMAN_CODES[chunk[0] as usize] as u64
139                | ((HUFFMAN_CODES[chunk[1] as usize] as u64) << n0)
140                | ((HUFFMAN_CODES[chunk[2] as usize] as u64) << (n0 + n1))
141                | ((HUFFMAN_CODES[chunk[3] as usize] as u64) << (n0 + n1 + n2));
142            self.write_bits(bits, n0 + n1 + n2 + n3)?;
143
144            let n4 = HUFFMAN_LENGTHS[chunk[4] as usize];
145            let n5 = HUFFMAN_LENGTHS[chunk[5] as usize];
146            let n6 = HUFFMAN_LENGTHS[chunk[6] as usize];
147            let n7 = HUFFMAN_LENGTHS[chunk[7] as usize];
148            let bits2 = HUFFMAN_CODES[chunk[4] as usize] as u64
149                | ((HUFFMAN_CODES[chunk[5] as usize] as u64) << n4)
150                | ((HUFFMAN_CODES[chunk[6] as usize] as u64) << (n4 + n5))
151                | ((HUFFMAN_CODES[chunk[7] as usize] as u64) << (n4 + n5 + n6));
152            self.write_bits(bits2, n4 + n5 + n6 + n7)?;
153        }
154
155        if run > 0 {
156            self.write_run(run)?;
157        }
158
159        for &b in chunks.remainder() {
160            self.write_bits(
161                HUFFMAN_CODES[b as usize] as u64,
162                HUFFMAN_LENGTHS[b as usize],
163            )?;
164        }
165
166        Ok(())
167    }
168
169    /// Write the remainder of the stream and return the inner writer.
170    pub fn finish(mut self) -> io::Result<W> {
171        // Write end of block
172        self.write_bits(HUFFMAN_CODES[256] as u64, HUFFMAN_LENGTHS[256])?;
173        self.flush()?;
174
175        // Write Adler32 checksum
176        let checksum: u32 = self.checksum.finish();
177        self.writer
178            .write_all(checksum.to_be_bytes().as_ref())
179            .unwrap();
180        Ok(self.writer)
181    }
182}
183
184/// Compressor that only writes the stored blocks.
185///
186/// This is useful for writing files that are not compressed, but still need to be wrapped in a
187/// zlib stream.
188pub struct StoredOnlyCompressor<W> {
189    writer: W,
190    checksum: Adler32,
191    block_bytes: u16,
192}
193impl<W: Write + Seek> StoredOnlyCompressor<W> {
194    /// Creates a new `StoredOnlyCompressor` that writes to the given writer.
195    pub fn new(mut writer: W) -> io::Result<Self> {
196        writer.write_all(&[0x78, 0x01])?; // zlib header
197        writer.write_all(&[0; 5])?; // placeholder stored block header
198
199        Ok(Self {
200            writer,
201            checksum: Adler32::new(),
202            block_bytes: 0,
203        })
204    }
205
206    fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> {
207        self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?;
208        self.writer.write_all(&[
209            last as u8,
210            (size & 0xFF) as u8,
211            ((size >> 8) & 0xFF) as u8,
212            (!size & 0xFF) as u8,
213            ((!size >> 8) & 0xFF) as u8,
214        ])?;
215        self.writer.seek(SeekFrom::Current(size as i64))?;
216
217        Ok(())
218    }
219
220    /// Writes the given data to the underlying writer.
221    pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> {
222        self.checksum.write(data);
223        while !data.is_empty() {
224            if self.block_bytes == u16::MAX {
225                self.set_block_header(u16::MAX, false)?;
226                self.writer.write_all(&[0; 5])?; // placeholder stored block header
227                self.block_bytes = 0;
228            }
229
230            let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize);
231            self.writer.write_all(&data[..prefix_bytes])?;
232            self.block_bytes += prefix_bytes as u16;
233            data = &data[prefix_bytes..];
234        }
235
236        Ok(())
237    }
238
239    /// Finish writing the final block and return the underlying writer.
240    pub fn finish(mut self) -> io::Result<W> {
241        self.set_block_header(self.block_bytes, true)?;
242
243        // Write Adler32 checksum
244        let checksum: u32 = self.checksum.finish();
245        self.writer
246            .write_all(checksum.to_be_bytes().as_ref())
247            .unwrap();
248
249        Ok(self.writer)
250    }
251}
252impl<W> StoredOnlyCompressor<W> {
253    /// Return the number of bytes that will be written to the output stream
254    /// for the given input size. Because this compressor only writes stored blocks,
255    /// the output size is always slightly *larger* than the input size.
256    pub fn compressed_size(raw_size: usize) -> usize {
257        (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5)
258            + (raw_size % u16::MAX as usize + 5)
259            + 6
260    }
261}
262
263/// Compresses the given data.
264pub fn compress_to_vec(input: &[u8]) -> Vec<u8> {
265    let mut compressor = Compressor::new(Vec::with_capacity(input.len() / 4)).unwrap();
266    compressor.write_data(input).unwrap();
267    compressor.finish().unwrap()
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    use rand::Rng;
274
275    fn roundtrip(data: &[u8]) {
276        let compressed = compress_to_vec(data);
277        let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap();
278        assert_eq!(&decompressed, data);
279    }
280
281    #[test]
282    fn it_works() {
283        roundtrip(b"Hello world!");
284    }
285
286    #[test]
287    fn constant() {
288        roundtrip(&vec![0; 2048]);
289        roundtrip(&vec![5; 2048]);
290        roundtrip(&vec![128; 2048]);
291        roundtrip(&vec![254; 2048]);
292    }
293
294    #[test]
295    fn random() {
296        let mut rng = rand::thread_rng();
297        let mut data = vec![0; 2048];
298        for _ in 0..10 {
299            for byte in &mut data {
300                *byte = rng.gen();
301            }
302            roundtrip(&data);
303        }
304    }
305}