1use std::io;
2use std::io::Write;
34use crate::bitstream::LsbWriter;
5use crate::deflate_state::DeflateState;
6use crate::encoder_state::EncoderState;
7use crate::huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType};
8use crate::lz77::{lz77_compress_block, LZ77Status};
9use crate::lzvalue::LZValue;
10use crate::stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH};
1112const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32;
1314/// Flush mode to use when compressing input received in multiple steps.
15///
16/// (The more obscure ZLIB flush modes are not implemented.)
17#[derive(Eq, PartialEq, Debug, Copy, Clone)]
18pub enum Flush {
19// Simply wait for more input when we are out of input data to process.
20None,
21// Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and
22 // outputting all pending data, and then outputs an empty stored block.
23 // (That is, the block header indicating a stored block followed by `0000FFFF`).
24Sync,
25 _Partial,
26 _Block,
27 _Full,
28// Finish compressing and output all remaining input.
29Finish,
30}
3132/// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish
33/// with the end of block code.
34pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) {
35for &b in buffer {
36 state.write_lzvalue(b.value());
37 }
38 state.write_end_of_block()
39}
4041/// Compress the input data using only fixed huffman codes.
42///
43/// Currently only used in tests.
44#[cfg(test)]
45pub fn compress_data_fixed(input: &[u8]) -> Vec<u8> {
46use crate::lz77::lz77_compress;
4748let mut state = EncoderState::fixed(Vec::new());
49let compressed = lz77_compress(input).unwrap();
5051// We currently don't split blocks here(this function is just used for tests anyhow)
52state.write_start_of_block(true, true);
53 flush_to_bitstream(&compressed, &mut state);
5455 state.flush();
56 state.reset(Vec::new())
57}
5859fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) {
60// If the input is not zero, we write stored blocks for the input data.
61if !input.is_empty() {
62let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable();
6364while let Some(chunk) = i.next() {
65let last_chunk = i.peek().is_none();
66// Write the block header
67write_stored_header(writer, final_block && last_chunk);
6869// Write the actual data.
70compress_block_stored(chunk, &mut writer).expect("Write error");
71 }
72 } else {
73// If the input length is zero, we output an empty block. This is used for syncing.
74write_stored_header(writer, final_block);
75 compress_block_stored(&[], &mut writer).expect("Write error");
76 }
77}
7879/// Inner compression function used by both the writers and the simple compression functions.
80pub fn compress_data_dynamic_n<W: Write>(
81 input: &[u8],
82 deflate_state: &mut DeflateState<W>,
83 flush: Flush,
84) -> io::Result<usize> {
85let mut bytes_written = 0;
8687let mut slice = input;
8889// enter the decompression loop unless we did a sync flush, in case we want to make sure
90 // everything is output before continuing.
91while !deflate_state.needs_flush {
92let output_buf_len = deflate_state.output_buf().len();
93let output_buf_pos = deflate_state.output_buf_pos;
94// If the output buffer has too much data in it already, flush it before doing anything
95 // else.
96if output_buf_len > LARGEST_OUTPUT_BUF_SIZE {
97let written = deflate_state
98 .inner
99 .as_mut()
100 .expect("Missing writer!")
101 .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
102103if written < output_buf_len.checked_sub(output_buf_pos).unwrap() {
104// Only some of the data was flushed, so keep track of where we were.
105deflate_state.output_buf_pos += written;
106 } else {
107// If we flushed all of the output, reset the output buffer.
108deflate_state.needs_flush = false;
109 deflate_state.output_buf_pos = 0;
110 deflate_state.output_buf().clear();
111 }
112113if bytes_written == 0 {
114// If the buffer was already full when the function was called, this has to be
115 // returned rather than Ok(0) to indicate that we didn't write anything, but are
116 // not done yet.
117return Err(io::Error::new(
118 io::ErrorKind::Interrupted,
119"Internal buffer full.",
120 ));
121 } else {
122return Ok(bytes_written);
123 }
124 }
125126if deflate_state.lz77_state.is_last_block() {
127// The last block has already been written, so we don't have anything to compress.
128break;
129 }
130131let (written, status, position) = lz77_compress_block(
132 slice,
133&mut deflate_state.lz77_state,
134&mut deflate_state.input_buffer,
135&mut deflate_state.lz77_writer,
136 flush,
137 );
138139// Bytes written in this call
140bytes_written += written;
141// Total bytes written since the compression process started
142 // TODO: Should we realistically have to worry about overflowing here?
143deflate_state.bytes_written += written as u64;
144145if status == LZ77Status::NeedInput {
146// If we've consumed all the data input so far, and we're not
147 // finishing or syncing or ending the block here, simply return
148 // the number of bytes consumed so far.
149return Ok(bytes_written);
150 }
151152// Increment start of input data
153slice = &slice[written..];
154155// We need to check if this is the last block as the header will then be
156 // slightly different to indicate this.
157let last_block = deflate_state.lz77_state.is_last_block();
158159let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes();
160161if cfg!(debug_assertions) {
162 deflate_state
163 .bytes_written_control
164 .add(current_block_input_bytes);
165 }
166167let partial_bits = deflate_state.encoder_state.writer.pending_bits();
168169let res = {
170let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies();
171let (l_lengths, d_lengths) =
172 deflate_state.encoder_state.huffman_table.get_lengths_mut();
173174 gen_huffman_lengths(
175 l_freqs,
176 d_freqs,
177 current_block_input_bytes,
178 partial_bits,
179 l_lengths,
180 d_lengths,
181&mut deflate_state.length_buffers,
182 )
183 };
184185// Check if we've actually managed to compress the input, and output stored blocks
186 // if not.
187match res {
188 BlockType::Dynamic(header) => {
189// Write the block header.
190deflate_state
191 .encoder_state
192 .write_start_of_block(false, last_block);
193194// Output the lengths of the huffman codes used in this block.
195write_huffman_lengths(
196&header,
197&deflate_state.encoder_state.huffman_table,
198&deflate_state.length_buffers.length_buf,
199&mut deflate_state.encoder_state.writer,
200 );
201202// Uupdate the huffman codes that will be used to encode the
203 // lz77-compressed data.
204deflate_state
205 .encoder_state
206 .huffman_table
207 .update_from_lengths();
208209// Write the huffman compressed data and the end of block marker.
210flush_to_bitstream(
211 deflate_state.lz77_writer.get_buffer(),
212&mut deflate_state.encoder_state,
213 );
214 }
215 BlockType::Fixed => {
216// Write the block header for fixed code blocks.
217deflate_state
218 .encoder_state
219 .write_start_of_block(true, last_block);
220221// Use the pre-defined static huffman codes.
222deflate_state.encoder_state.set_huffman_to_fixed();
223224// Write the compressed data and the end of block marker.
225flush_to_bitstream(
226 deflate_state.lz77_writer.get_buffer(),
227&mut deflate_state.encoder_state,
228 );
229 }
230 BlockType::Stored => {
231// If compression fails, output a stored block instead.
232233let start_pos = position.saturating_sub(current_block_input_bytes as usize);
234235assert!(
236 position >= current_block_input_bytes as usize,
237"Error! Trying to output a stored block with forgotten data!\
238 if you encounter this error, please file an issue!"
239);
240241 write_stored_block(
242&deflate_state.input_buffer.get_buffer()[start_pos..position],
243&mut deflate_state.encoder_state.writer,
244 flush == Flush::Finish && last_block,
245 );
246 }
247 };
248249// Clear the current lz77 data in the writer for the next call.
250deflate_state.lz77_writer.clear();
251// We are done with the block, so we reset the number of bytes taken
252 // for the next one.
253deflate_state.lz77_state.reset_input_bytes();
254255// We are done for now.
256if status == LZ77Status::Finished {
257// This flush mode means that there should be an empty stored block at the end.
258if flush == Flush::Sync {
259 write_stored_block(&[], &mut deflate_state.encoder_state.writer, false);
260// Indicate that we need to flush the buffers before doing anything else.
261deflate_state.needs_flush = true;
262 } else if !deflate_state.lz77_state.is_last_block() {
263// Make sure a block with the last block header has been output.
264 // Not sure this can actually happen, but we make sure to finish properly
265 // if it somehow does.
266 // An empty fixed block is the shortest.
267let es = &mut deflate_state.encoder_state;
268 es.set_huffman_to_fixed();
269 es.write_start_of_block(true, true);
270 es.write_end_of_block();
271 }
272break;
273 }
274 }
275276// If we reach this point, the remaining data in the buffers is to be flushed.
277deflate_state.encoder_state.flush();
278// Make sure we've output everything, and return the number of bytes written if everything
279 // went well.
280let output_buf_pos = deflate_state.output_buf_pos;
281let written_to_writer = deflate_state
282 .inner
283 .as_mut()
284 .expect("Missing writer!")
285 .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
286if written_to_writer
287 < deflate_state
288 .output_buf()
289 .len()
290 .checked_sub(output_buf_pos)
291 .unwrap()
292 {
293 deflate_state.output_buf_pos += written_to_writer;
294 } else {
295// If we sucessfully wrote all the data, we can clear the output buffer.
296deflate_state.output_buf_pos = 0;
297 deflate_state.output_buf().clear();
298 deflate_state.needs_flush = false;
299 }
300301Ok(bytes_written)
302}
303304#[cfg(test)]
305mod test {
306use super::*;
307use crate::test_utils::{decompress_to_end, get_test_data};
308309#[test]
310/// Test compressing a short string using fixed encoding.
311fn fixed_string_mem() {
312let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
313let compressed = compress_data_fixed(&test_data);
314315let result = decompress_to_end(&compressed);
316317assert_eq!(test_data, result);
318 }
319320#[test]
321fn fixed_data() {
322let data = vec![190u8; 400];
323let compressed = compress_data_fixed(&data);
324let result = decompress_to_end(&compressed);
325326assert_eq!(data, result);
327 }
328329/// Test deflate example.
330 ///
331 /// Check if the encoder produces the same code as the example given by Mark Adler here:
332 /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
333#[test]
334fn fixed_example() {
335let test_data = b"Deflate late";
336// let check =
337 // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0];
338let check = [
3390x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00,
340 ];
341let compressed = compress_data_fixed(test_data);
342assert_eq!(&compressed, &check);
343let decompressed = decompress_to_end(&compressed);
344assert_eq!(&decompressed, test_data)
345 }
346347#[test]
348/// Test compression from a file.
349fn fixed_string_file() {
350let input = get_test_data();
351352let compressed = compress_data_fixed(&input);
353println!("Fixed codes compressed len: {}", compressed.len());
354let result = decompress_to_end(&compressed);
355356assert_eq!(input.len(), result.len());
357// Not using assert_eq here deliberately to avoid massive amounts of output spam.
358assert!(input == result);
359 }
360}