deflate/
length_encode.rs

1use std::clone::Clone;
2use std::iter::Iterator;
3
4/// An enum representing the different types in the run-length encoded data used to encode
5/// Huffman table lengths
6#[derive(Debug, PartialEq, Eq)]
7pub enum EncodedLength {
8    // An actual length value
9    Length(u8),
10    // Copy the previous value n times
11    CopyPrevious(u8),
12    // Repeat zero n times (with n represented by 3 bits)
13    RepeatZero3Bits(u8),
14    // Repeat zero n times (with n represented by 7 bits)
15    RepeatZero7Bits(u8),
16}
17
18impl EncodedLength {
19    fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
20        match prev {
21            0 => {
22                if repeat <= 10 {
23                    EncodedLength::RepeatZero3Bits(repeat)
24                } else {
25                    EncodedLength::RepeatZero7Bits(repeat)
26                }
27            }
28            1..=15 => EncodedLength::CopyPrevious(repeat),
29            _ => panic!(),
30        }
31    }
32}
33
34pub const COPY_PREVIOUS: usize = 16;
35pub const REPEAT_ZERO_3_BITS: usize = 17;
36pub const REPEAT_ZERO_7_BITS: usize = 18;
37
38const MIN_REPEAT: u8 = 3;
39
40/// Push an `EncodedLength` to the vector and update the frequency table.
41fn update_out_and_freq(
42    encoded: EncodedLength,
43    output: &mut Vec<EncodedLength>,
44    frequencies: &mut [u16; 19],
45) {
46    let index = match encoded {
47        EncodedLength::Length(l) => usize::from(l),
48        EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
49        EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
50        EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
51    };
52
53    frequencies[index] += 1;
54
55    output.push(encoded);
56}
57
58/// Convenience function to check if the repeat counter should be incremented further
59fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
60    (length_value == 0 && repeats < 138) || repeats < 6
61}
62
63///Convenience version for unit tests.
64#[cfg(test)]
65pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
66where
67    I: Iterator<Item = &'a u8> + Clone,
68{
69    let mut freqs = [0u16; 19];
70    let mut encoded: Vec<EncodedLength> = Vec::new();
71    encode_lengths_m(lengths, &mut encoded, &mut freqs);
72    (encoded, freqs)
73}
74
75/// Run-length encodes the lengths of the values in `lengths` according to the deflate
76/// specification. This is used for writing the code lengths for the Huffman tables for
77/// the deflate stream.
78///
79/// Populates the supplied array with the frequency of the different encoded length values
80/// The frequency array is taken as a parameter rather than returned to avoid
81/// excessive `memcpy`-ing.
82pub fn encode_lengths_m<'a, I>(
83    lengths: I,
84    mut out: &mut Vec<EncodedLength>,
85    mut frequencies: &mut [u16; 19],
86) where
87    I: Iterator<Item = &'a u8> + Clone,
88{
89    out.clear();
90    // Number of repetitions of the current value
91    let mut repeat = 0;
92    let mut iter = lengths.clone().enumerate().peekable();
93    // Previous value
94    // We set it to the compliment of the first falue to simplify the code.
95    let mut prev = !iter.peek().expect("No length values!").1;
96
97    while let Some((n, &l)) = iter.next() {
98        if l == prev && not_max_repetitions(l, repeat) {
99            repeat += 1;
100        }
101        if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
102            if repeat >= MIN_REPEAT {
103                // The previous value has been repeated enough times to write out a repeat code.
104
105                let val = EncodedLength::from_prev_and_repeat(prev, repeat);
106                update_out_and_freq(val, &mut out, &mut frequencies);
107                repeat = 0;
108                // If we have a new length value, output l unless the last value is 0 or l is the
109                // last byte.
110                if l != prev {
111                    if l != 0 || iter.peek().is_none() {
112                        update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
113                        repeat = 0;
114                    } else {
115                        // If we have a zero, we start repeat at one instead of outputting, as
116                        // there are separate codes for repeats of zero so we don't need a literal
117                        // to define what byte to repeat.
118                        repeat = 1;
119                    }
120                }
121            } else {
122                // There haven't been enough repetitions of the previous value,
123                // so just we output the lengths directly.
124
125                // If we are at the end, and we have a value that is repeated, we need to
126                // skip a byte and output the last one.
127                let extra_skip = if iter.peek().is_none() && l == prev {
128                    1
129                } else {
130                    0
131                };
132
133                // Get to the position of the next byte to output by starting at zero and skipping.
134                let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
135
136                // As repeats of zeroes have separate codes, we don't need to output a literal here
137                // if we have a zero (unless we are at the end).
138                let extra = if l != 0 || iter.peek().is_none() {
139                    1
140                } else {
141                    0
142                };
143
144                for &i in b_iter.take(repeat as usize + extra) {
145                    update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
146                }
147
148                // If the current byte is zero we start repeat at 1 as we didn't output the literal
149                // directly.
150                repeat = 1 - extra as u8;
151            }
152        }
153        prev = l;
154    }
155}
156
157#[cfg(test)]
158pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
159    in_place::gen_lengths(frequencies, max_len)
160}
161
162pub type LeafVec = Vec<in_place::Node>;
163
164/// Generate a set of canonical huffman lengths from the given frequencies, with a maximum length
165/// of `max_len`. The lengths are put in the lens slice parameter. Unused lengths are set to 0.
166///
167/// The leaf buffer is passed in to avoid allocating it every time this function is called.
168/// The existing data contained in it is not preserved.
169pub fn huffman_lengths_from_frequency_m(
170    frequencies: &[u16],
171    max_len: usize,
172    leaf_buffer: &mut LeafVec,
173    lens: &mut [u8],
174) {
175    in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
176}
177
178mod in_place {
179    type WeightType = u32;
180
181    #[cfg(debug)]
182    pub fn validate_lengths(lengths: &[u8]) -> bool {
183        // Avoid issue with floating point on mips: https://github.com/image-rs/deflate-rs/issues/23
184        if cfg!(any(
185            target_arch = "mips",
186            target_arch = "mipsel",
187            target_arch = "mips64",
188            target_arch = "mipsel64"
189        )) {
190            true
191        } else {
192            let v = lengths.iter().fold(0f64, |acc, &n| {
193                acc + if n != 0 {
194                    2f64.powi(-(i32::from(n)))
195                } else {
196                    0f64
197                }
198            });
199
200            match v.partial_cmp(&1.0) {
201                Some(std::cmp::Ordering::Greater) => false,
202                _ => true,
203            }
204        }
205    }
206
207    #[cfg(not(debug))]
208    pub fn validate_lengths(_: &[u8]) -> bool {
209        true
210    }
211
212    #[derive(Eq, PartialEq, Debug)]
213    pub struct Node {
214        value: WeightType,
215        symbol: u16,
216    }
217
218    fn step_1(leaves: &mut [Node]) {
219        // If there are less than 2 non-zero frequencies, this function should not have been
220        // called and we should not have gotten to this point.
221        debug_assert!(leaves.len() >= 2);
222        let mut root = 0;
223        let mut leaf = 2;
224
225        leaves[0].value += leaves[1].value;
226
227        for next in 1..leaves.len() - 1 {
228            if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
229                leaves[next].value = leaves[root].value;
230                leaves[root].value = next as WeightType;
231                root += 1;
232            } else {
233                leaves[next].value = leaves[leaf].value;
234                leaf += 1;
235            }
236
237            if (leaf >= leaves.len()) || (root < next && (leaves[root].value < leaves[leaf].value))
238            {
239                leaves[next].value += leaves[root].value;
240                leaves[root].value = next as WeightType;
241                root += 1;
242            } else {
243                leaves[next].value += leaves[leaf].value;
244                leaf += 1;
245            }
246        }
247    }
248
249    fn step_2(leaves: &mut [Node]) {
250        debug_assert!(leaves.len() >= 2);
251        let n = leaves.len();
252
253        leaves[n - 2].value = 0;
254        for t in (0..(n + 1 - 3)).rev() {
255            leaves[t].value = leaves[leaves[t].value as usize].value + 1;
256        }
257
258        let mut available = 1_usize;
259        let mut used = 0;
260        let mut depth = 0;
261        let mut root = n as isize - 2;
262        let mut next = n as isize - 1;
263
264        while available > 0 {
265            while root >= 0 && leaves[root as usize].value == depth {
266                used += 1;
267                root -= 1;
268            }
269            while available > used {
270                leaves[next as usize].value = depth;
271                next -= 1;
272                available -= 1;
273            }
274            available = 2 * used;
275            depth += 1;
276            used = 0;
277        }
278    }
279
280    const MAX_NUMBER_OF_CODES: usize = 32;
281    const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;
282
283    /// Checks if any of the lengths exceed `max_len`, and if that is the case, alters the length
284    /// table so that no codes exceed `max_len`.
285    /// This is ported from miniz (which is released as public domain by Rich Geldreich
286    /// https://github.com/richgel999/miniz/blob/master/miniz.c)
287    ///
288    /// This will not generate optimal (minimim-redundancy) codes, however in most cases
289    /// this won't make a large difference.
290    pub fn enforce_max_code_lengths(
291        num_codes: &mut [u16; NUM_CODES_LENGTH],
292        num_used: usize,
293        max_len: usize,
294    ) {
295        debug_assert!(max_len <= 15);
296
297        if num_used > 1 {
298            let mut num_above_max = 0u16;
299            for &l in num_codes[(max_len as usize + 1)..].iter() {
300                num_above_max += l;
301            }
302
303            num_codes[max_len] += num_above_max;
304
305            let mut total = 0u32;
306            for i in (1..=max_len).rev() {
307                // This should be safe as max_len won't be higher than 15, and num_codes[i] can't
308                // be higher than 288,
309                // and 288 << 15 will not be anywhere close to overflowing 32 bits
310                total += (u32::from(num_codes[i])) << (max_len - i);
311            }
312
313            // miniz uses unsigned long here. 32-bits should be sufficient though,
314            // as max_len won't be longer than 15 anyhow.
315            while total != 1u32 << max_len {
316                num_codes[max_len] -= 1;
317                for i in (1..max_len).rev() {
318                    if num_codes[i] != 0 {
319                        num_codes[i] -= 1;
320                        num_codes[i + 1] += 2;
321                        break;
322                    }
323                }
324                total -= 1;
325            }
326        }
327    }
328
329    #[cfg(test)]
330    /// Convenience wrapper for tests.
331    pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
332        let mut lens = vec![0u8; frequencies.len()];
333        let mut leaves = Vec::new();
334        in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
335        lens
336    }
337
338    /// Generate huffman code lengths, using the algorithm described by
339    /// Moffat and Katajainen in In-Place Calculation of Minimum-Redundancy Codes
340    /// https://people.eng.unimelb.edu.au/ammoffat/abstracts/mk95wads.html
341    /// and it's implementation.
342    ///
343    /// This is significantly faster, and seems to generally create lengths that result in length
344    /// tables that are better compressible than the algorithm used previously. The downside of this
345    /// algorithm is that it's not length-limited, so if too long code lengths are generated,
346    /// it might result in a sub-optimal tables as the length-restricting function isn't optimal.
347    pub fn in_place_lengths(
348        frequencies: &[u16],
349        max_len: usize,
350        mut leaves: &mut Vec<Node>,
351        lengths: &mut [u8],
352    ) {
353        debug_assert!(lengths.len() >= frequencies.len());
354
355        for l in lengths.iter_mut() {
356            *l = 0;
357        }
358
359        // Clear any previous leaves in the leaf buffer.
360        leaves.clear();
361
362        // Discard zero length nodes as they won't be given a code and thus don't need to
363        // participate in code length generation and create a new vec of the remaining
364        // symbols and weights.
365        leaves.extend(frequencies.iter().enumerate().filter_map(|(n, f)| {
366            if *f > 0 {
367                Some(Node {
368                    value: u32::from(*f),
369                    symbol: n as u16,
370                })
371            } else {
372                None
373            }
374        }));
375
376        // Special cases with zero or 1 value having a non-zero frequency
377        if leaves.len() == 1 {
378            lengths[leaves[0].symbol as usize] = 1;
379            return;
380        } else if leaves.is_empty() {
381            return;
382        }
383
384        // Sort the leaves by value. As the sort in the standard library is stable, we don't
385        // have to worry about the symbol code here.
386        leaves.sort_by(|a, b| a.value.cmp(&b.value));
387
388        step_1(&mut leaves);
389        step_2(&mut leaves);
390
391        // Count how many codes of each length used, for usage in the next section.
392        let mut num_codes = [0u16; NUM_CODES_LENGTH];
393        for l in leaves.iter() {
394            num_codes[l.value as usize] += 1;
395        }
396
397        // As the algorithm used here doesn't limit the maximum length that can be generated
398        // we need to make sure none of the lengths exceed `max_len`
399        enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);
400
401        // Output the actual lengths
402        let mut leaf_it = leaves.iter().rev();
403        // Start at 1 since the length table is already filled with zeroes.
404        for (&n_codes, i) in num_codes[1..=max_len].iter().zip(1..=(max_len as u8)) {
405            for _ in 0..n_codes {
406                lengths[leaf_it.next().unwrap().symbol as usize] = i;
407            }
408        }
409
410        debug_assert_eq!(leaf_it.next(), None);
411        debug_assert!(
412            validate_lengths(lengths),
413            "The generated length codes were not valid!"
414        );
415    }
416}
417
418#[cfg(test)]
419mod test {
420    use super::*;
421    use crate::huffman_table::NUM_LITERALS_AND_LENGTHS;
422    use std::u16;
423
424    fn lit(value: u8) -> EncodedLength {
425        EncodedLength::Length(value)
426    }
427
428    fn zero(repeats: u8) -> EncodedLength {
429        match repeats {
430            0..=1 => EncodedLength::Length(0),
431            2..=10 => EncodedLength::RepeatZero3Bits(repeats),
432            _ => EncodedLength::RepeatZero7Bits(repeats),
433        }
434    }
435
436    fn copy(copies: u8) -> EncodedLength {
437        EncodedLength::CopyPrevious(copies)
438    }
439
440    #[test]
441    fn test_encode_lengths() {
442        use crate::huffman_table::FIXED_CODE_LENGTHS;
443        let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
444        // There are no lengths lower than 6 in the fixed table
445        assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
446        // Neither are there any lengths above 9
447        assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
448        // Also there are no zero-length codes so there shouldn't be any repetitions of zero
449        assert_eq!(enc.1[17..19], [0, 0]);
450
451        let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
452        let enc = encode_lengths(test_lengths.iter()).0;
453        assert_eq!(
454            enc,
455            vec![
456                lit(0),
457                lit(0),
458                lit(5),
459                lit(0),
460                lit(15),
461                lit(1),
462                zero(3),
463                lit(2),
464                lit(4),
465                copy(3),
466                lit(3),
467                lit(5),
468                copy(3),
469            ]
470        );
471        let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
472        let enc = encode_lengths(test_lengths.iter()).0;
473        assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
474
475        let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
476        let enc = encode_lengths(test_lengths.iter()).0;
477        assert_eq!(
478            enc,
479            vec![
480                zero(3),
481                lit(3),
482                lit(3),
483                lit(3),
484                lit(5),
485                lit(4),
486                copy(3),
487                lit(0),
488                lit(0),
489            ]
490        );
491
492        let lens = [
493            0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0,
494            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
495            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
502            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
503        ];
504
505        let _ = encode_lengths(lens.iter()).0;
506
507        let lens = [
508            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509            0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0, 7, 6, 7, 8, 7, 7,
510            8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10, 8, 8, 10, 8, 0, 8, 7, 0,
511            10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6, 7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0,
512            7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4,
517            3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9, /* litlen */
518            0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 3,
519            4,
520        ];
521
522        let enc = encode_lengths(lens.iter()).0;
523
524        assert_eq!(
525            &enc[..10],
526            &[
527                zero(10),
528                lit(9),
529                lit(0),
530                lit(0),
531                lit(9),
532                zero(18),
533                lit(6),
534                zero(3),
535                lit(8),
536                zero(4)
537            ]
538        );
539        assert_eq!(
540            &enc[10..20],
541            &[
542                lit(8),
543                lit(0),
544                lit(0),
545                lit(7),
546                lit(8),
547                lit(7),
548                lit(8),
549                lit(6),
550                lit(6),
551                lit(8)
552            ]
553        );
554
555        let enc = encode_lengths([1, 1, 1, 2].iter()).0;
556        assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
557        let enc = encode_lengths([0, 0, 3].iter()).0;
558        assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
559        let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
560        assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
561
562        let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
563        assert!(*enc.last().unwrap() != lit(5));
564
565        let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
566        assert_eq!(*enc.last().unwrap(), zero(0));
567    }
568
569    #[test]
570    fn test_lengths_from_frequencies() {
571        let frequencies = [1, 1, 5, 7, 10, 14];
572
573        let expected = [4, 4, 3, 2, 2, 2];
574        let res = huffman_lengths_from_frequency(&frequencies, 4);
575
576        assert_eq!(expected, res.as_slice());
577
578        let frequencies = [1, 5, 1, 7, 10, 14];
579        let expected = [4, 3, 4, 2, 2, 2];
580
581        let res = huffman_lengths_from_frequency(&frequencies, 4);
582
583        assert_eq!(expected, res.as_slice());
584
585        let frequencies = [0, 25, 0, 10, 2, 4];
586
587        let res = huffman_lengths_from_frequency(&frequencies, 4);
588        assert_eq!(res[0], 0);
589        assert_eq!(res[2], 0);
590        assert!(res[1] < 4);
591
592        // Only one value
593        let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
594        let res = huffman_lengths_from_frequency(&frequencies, 5);
595        let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
596        assert_eq!(expected, res.as_slice());
597
598        // No values
599        let frequencies = [0; 30];
600        let res = huffman_lengths_from_frequency(&frequencies, 5);
601        for (a, b) in frequencies.iter().zip(res.iter()) {
602            assert_eq!(*a, (*b).into());
603        }
604        // assert_eq!(frequencies, res.as_slice());
605
606        let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
607        frequencies[55] = u16::MAX / 3;
608        frequencies[125] = u16::MAX / 3;
609
610        let res = huffman_lengths_from_frequency(&frequencies, 15);
611        assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
612        assert!(res[55] < 3);
613        assert!(res[125] < 3);
614    }
615
616    #[test]
617    /// Test if the bit lengths for a set of frequencies are optimal (give the best compression
618    /// give the provided frequencies).
619    fn optimal_lengths() {
620        let freqs = [
621            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622            0, 0, 0, 68, 0, 14, 0, 0, 0, 0, 3, 7, 6, 1, 0, 12, 14, 9, 2, 6, 9, 4, 1, 1, 4, 1, 1, 0,
623            0, 1, 3, 0, 6, 0, 0, 0, 4, 4, 1, 2, 5, 3, 2, 2, 9, 0, 0, 3, 1, 5, 5, 8, 0, 6, 10, 5, 2,
624            0, 0, 1, 2, 0, 8, 11, 4, 0, 1, 3, 31, 13, 23, 22, 56, 22, 8, 11, 43, 0, 7, 33, 15, 45,
625            40, 16, 1, 28, 37, 35, 26, 3, 7, 11, 9, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
627            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630            0, 0, 1, 126, 114, 66, 31, 41, 25, 15, 21, 20, 16, 15, 10, 7, 5, 1, 1,
631        ];
632
633        let lens = huffman_lengths_from_frequency(&freqs, 15);
634
635        // Lengths produced by miniz for this frequency table for comparison
636        // the number of total bits encoded with these huffman codes is 7701
637        // NOTE: There can be more than one set of optimal lengths for a set of frequencies,
638        // (though there may be a difference in how well the table itself can be represented)
639        // so testing for a specific length table is not ideal since different algorithms
640        // may produce different length tables.
641        // let lens3 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
642        // 0, 0, 0, 0, 0,
643        // 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 0, 0, 0, 9, 8, 8, 10, 0, 7, 7, 7, 10, 8, 7, 8,
644        // 10, 10, 8, 10, 10, 0, 0, 10, 9, 0, 8, 0, 0, 0, 8, 8, 10, 9, 8, 9, 9, 9, 7, 0,
645        // 0, 9, 10, 8, 8, 7, 0, 8, 7, 8, 9, 0, 0, 10, 9, 0, 7, 7, 8, 0, 10, 9, 6, 7, 6,
646        // 6, 5, 6, 7, 7, 5, 0, 8, 5, 7, 5, 5, 6, 10, 6, 5, 5, 6, 9, 8, 7, 7, 10, 10, 0,
647        // 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652        // 0, 0, 10, 4, 4, 4, 5, 5, 6, 7, 6, 6, 6, 6, 7, 8, 8, 10, 10];
653        //
654
655        let num_bits = lens
656            .iter()
657            .zip(freqs.iter())
658            .fold(0, |a, (&f, &l)| a + (f as u16 * l));
659        assert_eq!(num_bits, 7701);
660    }
661}