deflate/
output_writer.rs

1use std::u16;
2
3use crate::huffman_table::{
4    get_distance_code, get_length_code, END_OF_BLOCK_POSITION, NUM_DISTANCE_CODES,
5    NUM_LITERALS_AND_LENGTHS,
6};
7use crate::lzvalue::LZValue;
8
9/// The type used for representing how many times a literal, length or distance code has been output
10/// to the current buffer.
11/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
12/// 16-bit values.
13pub type FrequencyType = u16;
14
15/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
16/// number of literals/lengths output before a new block is started.
17/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
18/// overflowing (which would degrade, or in the worst case break compression).
19pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
20
21#[derive(Debug, PartialEq)]
22pub enum BufferStatus {
23    NotFull,
24    Full,
25}
26
27/// Struct that buffers lz77 data and keeps track of the usage of different codes
28pub struct DynamicWriter {
29    buffer: Vec<LZValue>,
30    // The two last length codes are not actually used, but only participates in code construction
31    // Therefore, we ignore them to get the correct number of lengths
32    frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
33    distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
34}
35
36impl DynamicWriter {
37    #[inline]
38    pub fn check_buffer_length(&self) -> BufferStatus {
39        if self.buffer.len() >= MAX_BUFFER_LENGTH {
40            BufferStatus::Full
41        } else {
42            BufferStatus::NotFull
43        }
44    }
45
46    #[inline]
47    pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
48        debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
49        self.buffer.push(LZValue::literal(literal));
50        self.frequencies[usize::from(literal)] += 1;
51        self.check_buffer_length()
52    }
53
54    #[inline]
55    pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
56        self.buffer.push(LZValue::length_distance(length, distance));
57        let l_code_num = get_length_code(length);
58        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
59        self.frequencies[l_code_num] += 1;
60
61        let d_code_num = get_distance_code(distance);
62        // The compiler seems to be able to evade the bounds check here somehow.
63        self.distance_frequencies[usize::from(d_code_num)] += 1;
64        self.check_buffer_length()
65    }
66
67    pub fn buffer_length(&self) -> usize {
68        self.buffer.len()
69    }
70
71    pub fn get_buffer(&self) -> &[LZValue] {
72        &self.buffer
73    }
74
75    pub fn new() -> DynamicWriter {
76        let mut w = DynamicWriter {
77            buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
78            frequencies: [0; NUM_LITERALS_AND_LENGTHS],
79            distance_frequencies: [0; NUM_DISTANCE_CODES],
80        };
81        // This will always be 1,
82        // since there will always only be one end of block marker in each block
83        w.frequencies[END_OF_BLOCK_POSITION] = 1;
84        w
85    }
86
87    /// Special output function used with RLE compression
88    /// that avoids bothering to lookup a distance code.
89    #[inline]
90    pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
91        self.buffer.push(LZValue::length_distance(length, 1));
92        let l_code_num = get_length_code(length);
93        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
94        self.frequencies[l_code_num] += 1;
95
96        self.distance_frequencies[0] += 1;
97        self.check_buffer_length()
98    }
99
100    pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
101        (&self.frequencies, &self.distance_frequencies)
102    }
103
104    pub fn clear_frequencies(&mut self) {
105        self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
106        self.distance_frequencies = [0; NUM_DISTANCE_CODES];
107        self.frequencies[END_OF_BLOCK_POSITION] = 1;
108    }
109
110    pub fn clear_data(&mut self) {
111        self.buffer.clear()
112    }
113
114    pub fn clear(&mut self) {
115        self.clear_frequencies();
116        self.clear_data();
117    }
118}
119
120#[cfg(test)]
121mod test {
122    use super::*;
123    use crate::huffman_table::{get_distance_code, get_length_code};
124    #[test]
125    /// Ensure that these function won't produce values that would overflow the output_writer
126    /// tables since we use some unsafe indexing.
127    fn array_bounds() {
128        let w = DynamicWriter::new();
129
130        for i in 0..u16::max_value() {
131            assert!(get_length_code(i) < w.frequencies.len());
132        }
133
134        for i in 0..u16::max_value() {
135            assert!(get_distance_code(i) < w.distance_frequencies.len() as u8);
136        }
137    }
138}