1use simd_adler32::Adler32;
2use std::io::{self, Seek, SeekFrom, Write};
3
4use crate::tables::{
5 BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL,
6};
7
8pub struct Compressor<W: Write> {
10 checksum: Adler32,
11 buffer: u64,
12 nbits: u8,
13 writer: W,
14}
15impl<W: Write> Compressor<W> {
16 fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> {
17 debug_assert!(nbits <= 64);
18
19 self.buffer |= bits << self.nbits;
20 self.nbits += nbits;
21
22 if self.nbits >= 64 {
23 self.writer.write_all(&self.buffer.to_le_bytes())?;
24 self.nbits -= 64;
25 self.buffer = bits.checked_shr((nbits - self.nbits) as u32).unwrap_or(0);
26 }
27 debug_assert!(self.nbits < 64);
28 Ok(())
29 }
30
31 fn flush(&mut self) -> io::Result<()> {
32 if self.nbits % 8 != 0 {
33 self.write_bits(0, 8 - self.nbits % 8)?;
34 }
35 if self.nbits > 0 {
36 self.writer
37 .write_all(&self.buffer.to_le_bytes()[..self.nbits as usize / 8])
38 .unwrap();
39 self.buffer = 0;
40 self.nbits = 0;
41 }
42 Ok(())
43 }
44
45 fn write_run(&mut self, mut run: u32) -> io::Result<()> {
46 self.write_bits(HUFFMAN_CODES[0] as u64, HUFFMAN_LENGTHS[0])?;
47 run -= 1;
48
49 while run >= 258 {
50 self.write_bits(HUFFMAN_CODES[285] as u64, HUFFMAN_LENGTHS[285] + 1)?;
51 run -= 258;
52 }
53
54 if run > 4 {
55 let sym = LENGTH_TO_SYMBOL[run as usize - 3] as usize;
56 self.write_bits(HUFFMAN_CODES[sym] as u64, HUFFMAN_LENGTHS[sym])?;
57
58 let len_extra = LENGTH_TO_LEN_EXTRA[run as usize - 3];
59 let extra = ((run - 3) & BITMASKS[len_extra as usize]) as u64;
60 self.write_bits(extra, len_extra + 1)?;
61 } else {
62 debug_assert_eq!(HUFFMAN_CODES[0], 0);
63 self.write_bits(0, run as u8 * HUFFMAN_LENGTHS[0])?;
64 }
65
66 Ok(())
67 }
68
69 pub fn new(writer: W) -> io::Result<Self> {
71 let mut compressor = Self {
72 checksum: Adler32::new(),
73 buffer: 0,
74 nbits: 0,
75 writer,
76 };
77 compressor.write_headers()?;
78 Ok(compressor)
79 }
80
81 fn write_headers(&mut self) -> io::Result<()> {
82 const HEADER: [u8; 54] = [
83 120, 1, 237, 192, 3, 160, 36, 89, 150, 198, 241, 255, 119, 238, 141, 200, 204, 167,
84 114, 75, 99, 174, 109, 219, 182, 109, 219, 182, 109, 219, 182, 109, 105, 140, 158, 150,
85 74, 175, 158, 50, 51, 34, 238, 249, 118, 183, 106, 122, 166, 135, 59, 107, 213, 15,
86 ];
87 self.writer.write_all(&HEADER[..53]).unwrap();
88 self.write_bits(HEADER[53] as u64, 5)?;
89
90 Ok(())
91 }
92
93 pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> {
95 self.checksum.write(data);
96
97 let mut run = 0;
98 let mut chunks = data.chunks_exact(8);
99 for chunk in &mut chunks {
100 let ichunk = u64::from_le_bytes(chunk.try_into().unwrap());
101
102 if ichunk == 0 {
103 run += 8;
104 continue;
105 } else if run > 0 {
106 let run_extra = ichunk.trailing_zeros() / 8;
107 self.write_run(run + run_extra)?;
108 run = 0;
109
110 if run_extra > 0 {
111 run = ichunk.leading_zeros() / 8;
112 for &b in &chunk[run_extra as usize..8 - run as usize] {
113 self.write_bits(
114 HUFFMAN_CODES[b as usize] as u64,
115 HUFFMAN_LENGTHS[b as usize],
116 )?;
117 }
118 continue;
119 }
120 }
121
122 let run_start = ichunk.leading_zeros() / 8;
123 if run_start > 0 {
124 for &b in &chunk[..8 - run_start as usize] {
125 self.write_bits(
126 HUFFMAN_CODES[b as usize] as u64,
127 HUFFMAN_LENGTHS[b as usize],
128 )?;
129 }
130 run = run_start;
131 continue;
132 }
133
134 let n0 = HUFFMAN_LENGTHS[chunk[0] as usize];
135 let n1 = HUFFMAN_LENGTHS[chunk[1] as usize];
136 let n2 = HUFFMAN_LENGTHS[chunk[2] as usize];
137 let n3 = HUFFMAN_LENGTHS[chunk[3] as usize];
138 let bits = HUFFMAN_CODES[chunk[0] as usize] as u64
139 | ((HUFFMAN_CODES[chunk[1] as usize] as u64) << n0)
140 | ((HUFFMAN_CODES[chunk[2] as usize] as u64) << (n0 + n1))
141 | ((HUFFMAN_CODES[chunk[3] as usize] as u64) << (n0 + n1 + n2));
142 self.write_bits(bits, n0 + n1 + n2 + n3)?;
143
144 let n4 = HUFFMAN_LENGTHS[chunk[4] as usize];
145 let n5 = HUFFMAN_LENGTHS[chunk[5] as usize];
146 let n6 = HUFFMAN_LENGTHS[chunk[6] as usize];
147 let n7 = HUFFMAN_LENGTHS[chunk[7] as usize];
148 let bits2 = HUFFMAN_CODES[chunk[4] as usize] as u64
149 | ((HUFFMAN_CODES[chunk[5] as usize] as u64) << n4)
150 | ((HUFFMAN_CODES[chunk[6] as usize] as u64) << (n4 + n5))
151 | ((HUFFMAN_CODES[chunk[7] as usize] as u64) << (n4 + n5 + n6));
152 self.write_bits(bits2, n4 + n5 + n6 + n7)?;
153 }
154
155 if run > 0 {
156 self.write_run(run)?;
157 }
158
159 for &b in chunks.remainder() {
160 self.write_bits(
161 HUFFMAN_CODES[b as usize] as u64,
162 HUFFMAN_LENGTHS[b as usize],
163 )?;
164 }
165
166 Ok(())
167 }
168
169 pub fn finish(mut self) -> io::Result<W> {
171 self.write_bits(HUFFMAN_CODES[256] as u64, HUFFMAN_LENGTHS[256])?;
173 self.flush()?;
174
175 let checksum: u32 = self.checksum.finish();
177 self.writer
178 .write_all(checksum.to_be_bytes().as_ref())
179 .unwrap();
180 Ok(self.writer)
181 }
182}
183
184pub struct StoredOnlyCompressor<W> {
189 writer: W,
190 checksum: Adler32,
191 block_bytes: u16,
192}
193impl<W: Write + Seek> StoredOnlyCompressor<W> {
194 pub fn new(mut writer: W) -> io::Result<Self> {
196 writer.write_all(&[0x78, 0x01])?; writer.write_all(&[0; 5])?; Ok(Self {
200 writer,
201 checksum: Adler32::new(),
202 block_bytes: 0,
203 })
204 }
205
206 fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> {
207 self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?;
208 self.writer.write_all(&[
209 last as u8,
210 (size & 0xFF) as u8,
211 ((size >> 8) & 0xFF) as u8,
212 (!size & 0xFF) as u8,
213 ((!size >> 8) & 0xFF) as u8,
214 ])?;
215 self.writer.seek(SeekFrom::Current(size as i64))?;
216
217 Ok(())
218 }
219
220 pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> {
222 self.checksum.write(data);
223 while !data.is_empty() {
224 if self.block_bytes == u16::MAX {
225 self.set_block_header(u16::MAX, false)?;
226 self.writer.write_all(&[0; 5])?; self.block_bytes = 0;
228 }
229
230 let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize);
231 self.writer.write_all(&data[..prefix_bytes])?;
232 self.block_bytes += prefix_bytes as u16;
233 data = &data[prefix_bytes..];
234 }
235
236 Ok(())
237 }
238
239 pub fn finish(mut self) -> io::Result<W> {
241 self.set_block_header(self.block_bytes, true)?;
242
243 let checksum: u32 = self.checksum.finish();
245 self.writer
246 .write_all(checksum.to_be_bytes().as_ref())
247 .unwrap();
248
249 Ok(self.writer)
250 }
251}
252impl<W> StoredOnlyCompressor<W> {
253 pub fn compressed_size(raw_size: usize) -> usize {
257 (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5)
258 + (raw_size % u16::MAX as usize + 5)
259 + 6
260 }
261}
262
263pub fn compress_to_vec(input: &[u8]) -> Vec<u8> {
265 let mut compressor = Compressor::new(Vec::with_capacity(input.len() / 4)).unwrap();
266 compressor.write_data(input).unwrap();
267 compressor.finish().unwrap()
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273 use rand::Rng;
274
275 fn roundtrip(data: &[u8]) {
276 let compressed = compress_to_vec(data);
277 let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap();
278 assert_eq!(&decompressed, data);
279 }
280
281 #[test]
282 fn it_works() {
283 roundtrip(b"Hello world!");
284 }
285
286 #[test]
287 fn constant() {
288 roundtrip(&vec![0; 2048]);
289 roundtrip(&vec![5; 2048]);
290 roundtrip(&vec![128; 2048]);
291 roundtrip(&vec![254; 2048]);
292 }
293
294 #[test]
295 fn random() {
296 let mut rng = rand::thread_rng();
297 let mut data = vec![0; 2048];
298 for _ in 0..10 {
299 for byte in &mut data {
300 *byte = rng.gen();
301 }
302 roundtrip(&data);
303 }
304 }
305}