deflate/
compress.rs

1use std::io;
2use std::io::Write;
3
4use crate::bitstream::LsbWriter;
5use crate::deflate_state::DeflateState;
6use crate::encoder_state::EncoderState;
7use crate::huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType};
8use crate::lz77::{lz77_compress_block, LZ77Status};
9use crate::lzvalue::LZValue;
10use crate::stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH};
11
12const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32;
13
14/// Flush mode to use when compressing input received in multiple steps.
15///
16/// (The more obscure ZLIB flush modes are not implemented.)
17#[derive(Eq, PartialEq, Debug, Copy, Clone)]
18pub enum Flush {
19    // Simply wait for more input when we are out of input data to process.
20    None,
21    // Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and
22    // outputting all pending data, and then outputs an empty stored block.
23    // (That is, the block header indicating a stored block followed by `0000FFFF`).
24    Sync,
25    _Partial,
26    _Block,
27    _Full,
28    // Finish compressing and output all remaining input.
29    Finish,
30}
31
32/// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish
33/// with the end of block code.
34pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) {
35    for &b in buffer {
36        state.write_lzvalue(b.value());
37    }
38    state.write_end_of_block()
39}
40
41/// Compress the input data using only fixed huffman codes.
42///
43/// Currently only used in tests.
44#[cfg(test)]
45pub fn compress_data_fixed(input: &[u8]) -> Vec<u8> {
46    use crate::lz77::lz77_compress;
47
48    let mut state = EncoderState::fixed(Vec::new());
49    let compressed = lz77_compress(input).unwrap();
50
51    // We currently don't split blocks here(this function is just used for tests anyhow)
52    state.write_start_of_block(true, true);
53    flush_to_bitstream(&compressed, &mut state);
54
55    state.flush();
56    state.reset(Vec::new())
57}
58
59fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) {
60    // If the input is not zero, we write stored blocks for the input data.
61    if !input.is_empty() {
62        let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable();
63
64        while let Some(chunk) = i.next() {
65            let last_chunk = i.peek().is_none();
66            // Write the block header
67            write_stored_header(writer, final_block && last_chunk);
68
69            // Write the actual data.
70            compress_block_stored(chunk, &mut writer).expect("Write error");
71        }
72    } else {
73        // If the input length is zero, we output an empty block. This is used for syncing.
74        write_stored_header(writer, final_block);
75        compress_block_stored(&[], &mut writer).expect("Write error");
76    }
77}
78
79/// Inner compression function used by both the writers and the simple compression functions.
80pub fn compress_data_dynamic_n<W: Write>(
81    input: &[u8],
82    deflate_state: &mut DeflateState<W>,
83    flush: Flush,
84) -> io::Result<usize> {
85    let mut bytes_written = 0;
86
87    let mut slice = input;
88
89    // enter the decompression loop unless we did a sync flush, in case we want to make sure
90    // everything is output before continuing.
91    while !deflate_state.needs_flush {
92        let output_buf_len = deflate_state.output_buf().len();
93        let output_buf_pos = deflate_state.output_buf_pos;
94        // If the output buffer has too much data in it already, flush it before doing anything
95        // else.
96        if output_buf_len > LARGEST_OUTPUT_BUF_SIZE {
97            let written = deflate_state
98                .inner
99                .as_mut()
100                .expect("Missing writer!")
101                .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
102
103            if written < output_buf_len.checked_sub(output_buf_pos).unwrap() {
104                // Only some of the data was flushed, so keep track of where we were.
105                deflate_state.output_buf_pos += written;
106            } else {
107                // If we flushed all of the output, reset the output buffer.
108                deflate_state.needs_flush = false;
109                deflate_state.output_buf_pos = 0;
110                deflate_state.output_buf().clear();
111            }
112
113            if bytes_written == 0 {
114                // If the buffer was already full when the function was called, this has to be
115                // returned rather than Ok(0) to indicate that we didn't write anything, but are
116                // not done yet.
117                return Err(io::Error::new(
118                    io::ErrorKind::Interrupted,
119                    "Internal buffer full.",
120                ));
121            } else {
122                return Ok(bytes_written);
123            }
124        }
125
126        if deflate_state.lz77_state.is_last_block() {
127            // The last block has already been written, so we don't have anything to compress.
128            break;
129        }
130
131        let (written, status, position) = lz77_compress_block(
132            slice,
133            &mut deflate_state.lz77_state,
134            &mut deflate_state.input_buffer,
135            &mut deflate_state.lz77_writer,
136            flush,
137        );
138
139        // Bytes written in this call
140        bytes_written += written;
141        // Total bytes written since the compression process started
142        // TODO: Should we realistically have to worry about overflowing here?
143        deflate_state.bytes_written += written as u64;
144
145        if status == LZ77Status::NeedInput {
146            // If we've consumed all the data input so far, and we're not
147            // finishing or syncing or ending the block here, simply return
148            // the number of bytes consumed so far.
149            return Ok(bytes_written);
150        }
151
152        // Increment start of input data
153        slice = &slice[written..];
154
155        // We need to check if this is the last block as the header will then be
156        // slightly different to indicate this.
157        let last_block = deflate_state.lz77_state.is_last_block();
158
159        let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes();
160
161        if cfg!(debug_assertions) {
162            deflate_state
163                .bytes_written_control
164                .add(current_block_input_bytes);
165        }
166
167        let partial_bits = deflate_state.encoder_state.writer.pending_bits();
168
169        let res = {
170            let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies();
171            let (l_lengths, d_lengths) =
172                deflate_state.encoder_state.huffman_table.get_lengths_mut();
173
174            gen_huffman_lengths(
175                l_freqs,
176                d_freqs,
177                current_block_input_bytes,
178                partial_bits,
179                l_lengths,
180                d_lengths,
181                &mut deflate_state.length_buffers,
182            )
183        };
184
185        // Check if we've actually managed to compress the input, and output stored blocks
186        // if not.
187        match res {
188            BlockType::Dynamic(header) => {
189                // Write the block header.
190                deflate_state
191                    .encoder_state
192                    .write_start_of_block(false, last_block);
193
194                // Output the lengths of the huffman codes used in this block.
195                write_huffman_lengths(
196                    &header,
197                    &deflate_state.encoder_state.huffman_table,
198                    &deflate_state.length_buffers.length_buf,
199                    &mut deflate_state.encoder_state.writer,
200                );
201
202                // Uupdate the huffman codes that will be used to encode the
203                // lz77-compressed data.
204                deflate_state
205                    .encoder_state
206                    .huffman_table
207                    .update_from_lengths();
208
209                // Write the huffman compressed data and the end of block marker.
210                flush_to_bitstream(
211                    deflate_state.lz77_writer.get_buffer(),
212                    &mut deflate_state.encoder_state,
213                );
214            }
215            BlockType::Fixed => {
216                // Write the block header for fixed code blocks.
217                deflate_state
218                    .encoder_state
219                    .write_start_of_block(true, last_block);
220
221                // Use the pre-defined static huffman codes.
222                deflate_state.encoder_state.set_huffman_to_fixed();
223
224                // Write the compressed data and the end of block marker.
225                flush_to_bitstream(
226                    deflate_state.lz77_writer.get_buffer(),
227                    &mut deflate_state.encoder_state,
228                );
229            }
230            BlockType::Stored => {
231                // If compression fails, output a stored block instead.
232
233                let start_pos = position.saturating_sub(current_block_input_bytes as usize);
234
235                assert!(
236                    position >= current_block_input_bytes as usize,
237                    "Error! Trying to output a stored block with forgotten data!\
238                     if you encounter this error, please file an issue!"
239                );
240
241                write_stored_block(
242                    &deflate_state.input_buffer.get_buffer()[start_pos..position],
243                    &mut deflate_state.encoder_state.writer,
244                    flush == Flush::Finish && last_block,
245                );
246            }
247        };
248
249        // Clear the current lz77 data in the writer for the next call.
250        deflate_state.lz77_writer.clear();
251        // We are done with the block, so we reset the number of bytes taken
252        // for the next one.
253        deflate_state.lz77_state.reset_input_bytes();
254
255        // We are done for now.
256        if status == LZ77Status::Finished {
257            // This flush mode means that there should be an empty stored block at the end.
258            if flush == Flush::Sync {
259                write_stored_block(&[], &mut deflate_state.encoder_state.writer, false);
260                // Indicate that we need to flush the buffers before doing anything else.
261                deflate_state.needs_flush = true;
262            } else if !deflate_state.lz77_state.is_last_block() {
263                // Make sure a block with the last block header has been output.
264                // Not sure this can actually happen, but we make sure to finish properly
265                // if it somehow does.
266                // An empty fixed block is the shortest.
267                let es = &mut deflate_state.encoder_state;
268                es.set_huffman_to_fixed();
269                es.write_start_of_block(true, true);
270                es.write_end_of_block();
271            }
272            break;
273        }
274    }
275
276    // If we reach this point, the remaining data in the buffers is to be flushed.
277    deflate_state.encoder_state.flush();
278    // Make sure we've output everything, and return the number of bytes written if everything
279    // went well.
280    let output_buf_pos = deflate_state.output_buf_pos;
281    let written_to_writer = deflate_state
282        .inner
283        .as_mut()
284        .expect("Missing writer!")
285        .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
286    if written_to_writer
287        < deflate_state
288            .output_buf()
289            .len()
290            .checked_sub(output_buf_pos)
291            .unwrap()
292    {
293        deflate_state.output_buf_pos += written_to_writer;
294    } else {
295        // If we sucessfully wrote all the data, we can clear the output buffer.
296        deflate_state.output_buf_pos = 0;
297        deflate_state.output_buf().clear();
298        deflate_state.needs_flush = false;
299    }
300
301    Ok(bytes_written)
302}
303
304#[cfg(test)]
305mod test {
306    use super::*;
307    use crate::test_utils::{decompress_to_end, get_test_data};
308
309    #[test]
310    /// Test compressing a short string using fixed encoding.
311    fn fixed_string_mem() {
312        let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
313        let compressed = compress_data_fixed(&test_data);
314
315        let result = decompress_to_end(&compressed);
316
317        assert_eq!(test_data, result);
318    }
319
320    #[test]
321    fn fixed_data() {
322        let data = vec![190u8; 400];
323        let compressed = compress_data_fixed(&data);
324        let result = decompress_to_end(&compressed);
325
326        assert_eq!(data, result);
327    }
328
329    /// Test deflate example.
330    ///
331    /// Check if the encoder produces the same code as the example given by Mark Adler here:
332    /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
333    #[test]
334    fn fixed_example() {
335        let test_data = b"Deflate late";
336        // let check =
337        // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0];
338        let check = [
339            0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00,
340        ];
341        let compressed = compress_data_fixed(test_data);
342        assert_eq!(&compressed, &check);
343        let decompressed = decompress_to_end(&compressed);
344        assert_eq!(&decompressed, test_data)
345    }
346
347    #[test]
348    /// Test compression from a file.
349    fn fixed_string_file() {
350        let input = get_test_data();
351
352        let compressed = compress_data_fixed(&input);
353        println!("Fixed codes compressed len: {}", compressed.len());
354        let result = decompress_to_end(&compressed);
355
356        assert_eq!(input.len(), result.len());
357        // Not using assert_eq here deliberately to avoid massive amounts of output spam.
358        assert!(input == result);
359    }
360}