deflate/
length_encode.rs

use std::clone::Clone;
use std::iter::Iterator;

/// An enum representing the different types in the run-length encoded data used to encode
/// Huffman table lengths
#[derive(Debug, PartialEq, Eq)]
pub enum EncodedLength {
    // An actual length value
    Length(u8),
    // Copy the previous value n times
    CopyPrevious(u8),
    // Repeat zero n times (with n represented by 3 bits)
    RepeatZero3Bits(u8),
    // Repeat zero n times (with n represented by 7 bits)
    RepeatZero7Bits(u8),
}

impl EncodedLength {
    fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
        match prev {
            0 => {
                if repeat <= 10 {
                    EncodedLength::RepeatZero3Bits(repeat)
                } else {
                    EncodedLength::RepeatZero7Bits(repeat)
                }
            }
            1..=15 => EncodedLength::CopyPrevious(repeat),
            _ => panic!(),
        }
    }
}

pub const COPY_PREVIOUS: usize = 16;
pub const REPEAT_ZERO_3_BITS: usize = 17;
pub const REPEAT_ZERO_7_BITS: usize = 18;

const MIN_REPEAT: u8 = 3;

/// Push an `EncodedLength` to the vector and update the frequency table.
fn update_out_and_freq(
    encoded: EncodedLength,
    output: &mut Vec<EncodedLength>,
    frequencies: &mut [u16; 19],
) {
    let index = match encoded {
        EncodedLength::Length(l) => usize::from(l),
        EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
        EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
        EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
    };

    frequencies[index] += 1;

    output.push(encoded);
}

/// Convenience function to check if the repeat counter should be incremented further
fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
    (length_value == 0 && repeats < 138) || repeats < 6
}

///Convenience version for unit tests.
#[cfg(test)]
pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
where
    I: Iterator<Item = &'a u8> + Clone,
{
    let mut freqs = [0u16; 19];
    let mut encoded: Vec<EncodedLength> = Vec::new();
    encode_lengths_m(lengths, &mut encoded, &mut freqs);
    (encoded, freqs)
}

/// Run-length encodes the lengths of the values in `lengths` according to the deflate
/// specification. This is used for writing the code lengths for the Huffman tables for
/// the deflate stream.
///
/// Populates the supplied array with the frequency of the different encoded length values
/// The frequency array is taken as a parameter rather than returned to avoid
/// excessive `memcpy`-ing.
pub fn encode_lengths_m<'a, I>(
    lengths: I,
    mut out: &mut Vec<EncodedLength>,
    mut frequencies: &mut [u16; 19],
) where
    I: Iterator<Item = &'a u8> + Clone,
{
    out.clear();
    // Number of repetitions of the current value
    let mut repeat = 0;
    let mut iter = lengths.clone().enumerate().peekable();
    // Previous value
    // We set it to the compliment of the first falue to simplify the code.
    let mut prev = !iter.peek().expect("No length values!").1;

    while let Some((n, &l)) = iter.next() {
        if l == prev && not_max_repetitions(l, repeat) {
            repeat += 1;
        }
        if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
            if repeat >= MIN_REPEAT {
                // The previous value has been repeated enough times to write out a repeat code.

                let val = EncodedLength::from_prev_and_repeat(prev, repeat);
                update_out_and_freq(val, &mut out, &mut frequencies);
                repeat = 0;
                // If we have a new length value, output l unless the last value is 0 or l is the
                // last byte.
                if l != prev {
                    if l != 0 || iter.peek().is_none() {
                        update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
                        repeat = 0;
                    } else {
                        // If we have a zero, we start repeat at one instead of outputting, as
                        // there are separate codes for repeats of zero so we don't need a literal
                        // to define what byte to repeat.
                        repeat = 1;
                    }
                }
            } else {
                // There haven't been enough repetitions of the previous value,
                // so just we output the lengths directly.

                // If we are at the end, and we have a value that is repeated, we need to
                // skip a byte and output the last one.
                let extra_skip = if iter.peek().is_none() && l == prev {
                    1
                } else {
                    0
                };

                // Get to the position of the next byte to output by starting at zero and skipping.
                let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);

                // As repeats of zeroes have separate codes, we don't need to output a literal here
                // if we have a zero (unless we are at the end).
                let extra = if l != 0 || iter.peek().is_none() {
                    1
                } else {
                    0
                };

                for &i in b_iter.take(repeat as usize + extra) {
                    update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
                }

                // If the current byte is zero we start repeat at 1 as we didn't output the literal
                // directly.
                repeat = 1 - extra as u8;
            }
        }
        prev = l;
    }
}

#[cfg(test)]
pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
    in_place::gen_lengths(frequencies, max_len)
}

pub type LeafVec = Vec<in_place::Node>;

/// Generate a set of canonical huffman lengths from the given frequencies, with a maximum length
/// of `max_len`. The lengths are put in the lens slice parameter. Unused lengths are set to 0.
///
/// The leaf buffer is passed in to avoid allocating it every time this function is called.
/// The existing data contained in it is not preserved.
pub fn huffman_lengths_from_frequency_m(
    frequencies: &[u16],
    max_len: usize,
    leaf_buffer: &mut LeafVec,
    lens: &mut [u8],
) {
    in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
}

mod in_place {
    type WeightType = u32;

    #[cfg(debug)]
    pub fn validate_lengths(lengths: &[u8]) -> bool {
        // Avoid issue with floating point on mips: https://github.com/image-rs/deflate-rs/issues/23
        if cfg!(any(
            target_arch = "mips",
            target_arch = "mipsel",
            target_arch = "mips64",
            target_arch = "mipsel64"
        )) {
            true
        } else {
            let v = lengths.iter().fold(0f64, |acc, &n| {
                acc + if n != 0 {
                    2f64.powi(-(i32::from(n)))
                } else {
                    0f64
                }
            });

            match v.partial_cmp(&1.0) {
                Some(std::cmp::Ordering::Greater) => false,
                _ => true,
            }
        }
    }

    #[cfg(not(debug))]
    pub fn validate_lengths(_: &[u8]) -> bool {
        true
    }

    #[derive(Eq, PartialEq, Debug)]
    pub struct Node {
        value: WeightType,
        symbol: u16,
    }

    fn step_1(leaves: &mut [Node]) {
        // If there are less than 2 non-zero frequencies, this function should not have been
        // called and we should not have gotten to this point.
        debug_assert!(leaves.len() >= 2);
        let mut root = 0;
        let mut leaf = 2;

        leaves[0].value += leaves[1].value;

        for next in 1..leaves.len() - 1 {
            if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
                leaves[next].value = leaves[root].value;
                leaves[root].value = next as WeightType;
                root += 1;
            } else {
                leaves[next].value = leaves[leaf].value;
                leaf += 1;
            }

            if (leaf >= leaves.len()) || (root < next && (leaves[root].value < leaves[leaf].value))
            {
                leaves[next].value += leaves[root].value;
                leaves[root].value = next as WeightType;
                root += 1;
            } else {
                leaves[next].value += leaves[leaf].value;
                leaf += 1;
            }
        }
    }

    fn step_2(leaves: &mut [Node]) {
        debug_assert!(leaves.len() >= 2);
        let n = leaves.len();

        leaves[n - 2].value = 0;
        for t in (0..(n + 1 - 3)).rev() {
            leaves[t].value = leaves[leaves[t].value as usize].value + 1;
        }

        let mut available = 1_usize;
        let mut used = 0;
        let mut depth = 0;
        let mut root = n as isize - 2;
        let mut next = n as isize - 1;

        while available > 0 {
            while root >= 0 && leaves[root as usize].value == depth {
                used += 1;
                root -= 1;
            }
            while available > used {
                leaves[next as usize].value = depth;
                next -= 1;
                available -= 1;
            }
            available = 2 * used;
            depth += 1;
            used = 0;
        }
    }

    const MAX_NUMBER_OF_CODES: usize = 32;
    const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;

    /// Checks if any of the lengths exceed `max_len`, and if that is the case, alters the length
    /// table so that no codes exceed `max_len`.
    /// This is ported from miniz (which is released as public domain by Rich Geldreich
    /// https://github.com/richgel999/miniz/blob/master/miniz.c)
    ///
    /// This will not generate optimal (minimim-redundancy) codes, however in most cases
    /// this won't make a large difference.
    pub fn enforce_max_code_lengths(
        num_codes: &mut [u16; NUM_CODES_LENGTH],
        num_used: usize,
        max_len: usize,
    ) {
        debug_assert!(max_len <= 15);

        if num_used > 1 {
            let mut num_above_max = 0u16;
            for &l in num_codes[(max_len as usize + 1)..].iter() {
                num_above_max += l;
            }

            num_codes[max_len] += num_above_max;

            let mut total = 0u32;
            for i in (1..=max_len).rev() {
                // This should be safe as max_len won't be higher than 15, and num_codes[i] can't
                // be higher than 288,
                // and 288 << 15 will not be anywhere close to overflowing 32 bits
                total += (u32::from(num_codes[i])) << (max_len - i);
            }

            // miniz uses unsigned long here. 32-bits should be sufficient though,
            // as max_len won't be longer than 15 anyhow.
            while total != 1u32 << max_len {
                num_codes[max_len] -= 1;
                for i in (1..max_len).rev() {
                    if num_codes[i] != 0 {
                        num_codes[i] -= 1;
                        num_codes[i + 1] += 2;
                        break;
                    }
                }
                total -= 1;
            }
        }
    }

    #[cfg(test)]
    /// Convenience wrapper for tests.
    pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
        let mut lens = vec![0u8; frequencies.len()];
        let mut leaves = Vec::new();
        in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
        lens
    }

    /// Generate huffman code lengths, using the algorithm described by
    /// Moffat and Katajainen in In-Place Calculation of Minimum-Redundancy Codes
    /// https://people.eng.unimelb.edu.au/ammoffat/abstracts/mk95wads.html
    /// and it's implementation.
    ///
    /// This is significantly faster, and seems to generally create lengths that result in length
    /// tables that are better compressible than the algorithm used previously. The downside of this
    /// algorithm is that it's not length-limited, so if too long code lengths are generated,
    /// it might result in a sub-optimal tables as the length-restricting function isn't optimal.
    pub fn in_place_lengths(
        frequencies: &[u16],
        max_len: usize,
        mut leaves: &mut Vec<Node>,
        lengths: &mut [u8],
    ) {
        debug_assert!(lengths.len() >= frequencies.len());

        for l in lengths.iter_mut() {
            *l = 0;
        }

        // Clear any previous leaves in the leaf buffer.
        leaves.clear();

        // Discard zero length nodes as they won't be given a code and thus don't need to
        // participate in code length generation and create a new vec of the remaining
        // symbols and weights.
        leaves.extend(frequencies.iter().enumerate().filter_map(|(n, f)| {
            if *f > 0 {
                Some(Node {
                    value: u32::from(*f),
                    symbol: n as u16,
                })
            } else {
                None
            }
        }));

        // Special cases with zero or 1 value having a non-zero frequency
        if leaves.len() == 1 {
            lengths[leaves[0].symbol as usize] = 1;
            return;
        } else if leaves.is_empty() {
            return;
        }

        // Sort the leaves by value. As the sort in the standard library is stable, we don't
        // have to worry about the symbol code here.
        leaves.sort_by(|a, b| a.value.cmp(&b.value));

        step_1(&mut leaves);
        step_2(&mut leaves);

        // Count how many codes of each length used, for usage in the next section.
        let mut num_codes = [0u16; NUM_CODES_LENGTH];
        for l in leaves.iter() {
            num_codes[l.value as usize] += 1;
        }

        // As the algorithm used here doesn't limit the maximum length that can be generated
        // we need to make sure none of the lengths exceed `max_len`
        enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);

        // Output the actual lengths
        let mut leaf_it = leaves.iter().rev();
        // Start at 1 since the length table is already filled with zeroes.
        for (&n_codes, i) in num_codes[1..=max_len].iter().zip(1..=(max_len as u8)) {
            for _ in 0..n_codes {
                lengths[leaf_it.next().unwrap().symbol as usize] = i;
            }
        }

        debug_assert_eq!(leaf_it.next(), None);
        debug_assert!(
            validate_lengths(lengths),
            "The generated length codes were not valid!"
        );
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::huffman_table::NUM_LITERALS_AND_LENGTHS;
    use std::u16;

    fn lit(value: u8) -> EncodedLength {
        EncodedLength::Length(value)
    }

    fn zero(repeats: u8) -> EncodedLength {
        match repeats {
            0..=1 => EncodedLength::Length(0),
            2..=10 => EncodedLength::RepeatZero3Bits(repeats),
            _ => EncodedLength::RepeatZero7Bits(repeats),
        }
    }

    fn copy(copies: u8) -> EncodedLength {
        EncodedLength::CopyPrevious(copies)
    }

    #[test]
    fn test_encode_lengths() {
        use crate::huffman_table::FIXED_CODE_LENGTHS;
        let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
        // There are no lengths lower than 6 in the fixed table
        assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
        // Neither are there any lengths above 9
        assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
        // Also there are no zero-length codes so there shouldn't be any repetitions of zero
        assert_eq!(enc.1[17..19], [0, 0]);

        let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
        let enc = encode_lengths(test_lengths.iter()).0;
        assert_eq!(
            enc,
            vec![
                lit(0),
                lit(0),
                lit(5),
                lit(0),
                lit(15),
                lit(1),
                zero(3),
                lit(2),
                lit(4),
                copy(3),
                lit(3),
                lit(5),
                copy(3),
            ]
        );
        let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
        let enc = encode_lengths(test_lengths.iter()).0;
        assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);

        let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
        let enc = encode_lengths(test_lengths.iter()).0;
        assert_eq!(
            enc,
            vec![
                zero(3),
                lit(3),
                lit(3),
                lit(3),
                lit(5),
                lit(4),
                copy(3),
                lit(0),
                lit(0),
            ]
        );

        let lens = [
            0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
        ];

        let _ = encode_lengths(lens.iter()).0;

        let lens = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0, 7, 6, 7, 8, 7, 7,
            8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10, 8, 8, 10, 8, 0, 8, 7, 0,
            10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6, 7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0,
            7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4,
            3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9, /* litlen */
            0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 3,
            4,
        ];

        let enc = encode_lengths(lens.iter()).0;

        assert_eq!(
            &enc[..10],
            &[
                zero(10),
                lit(9),
                lit(0),
                lit(0),
                lit(9),
                zero(18),
                lit(6),
                zero(3),
                lit(8),
                zero(4)
            ]
        );
        assert_eq!(
            &enc[10..20],
            &[
                lit(8),
                lit(0),
                lit(0),
                lit(7),
                lit(8),
                lit(7),
                lit(8),
                lit(6),
                lit(6),
                lit(8)
            ]
        );

        let enc = encode_lengths([1, 1, 1, 2].iter()).0;
        assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
        let enc = encode_lengths([0, 0, 3].iter()).0;
        assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
        let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
        assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);

        let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
        assert!(*enc.last().unwrap() != lit(5));

        let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
        assert_eq!(*enc.last().unwrap(), zero(0));
    }

    #[test]
    fn test_lengths_from_frequencies() {
        let frequencies = [1, 1, 5, 7, 10, 14];

        let expected = [4, 4, 3, 2, 2, 2];
        let res = huffman_lengths_from_frequency(&frequencies, 4);

        assert_eq!(expected, res.as_slice());

        let frequencies = [1, 5, 1, 7, 10, 14];
        let expected = [4, 3, 4, 2, 2, 2];

        let res = huffman_lengths_from_frequency(&frequencies, 4);

        assert_eq!(expected, res.as_slice());

        let frequencies = [0, 25, 0, 10, 2, 4];

        let res = huffman_lengths_from_frequency(&frequencies, 4);
        assert_eq!(res[0], 0);
        assert_eq!(res[2], 0);
        assert!(res[1] < 4);

        // Only one value
        let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
        let res = huffman_lengths_from_frequency(&frequencies, 5);
        let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
        assert_eq!(expected, res.as_slice());

        // No values
        let frequencies = [0; 30];
        let res = huffman_lengths_from_frequency(&frequencies, 5);
        for (a, b) in frequencies.iter().zip(res.iter()) {
            assert_eq!(*a, (*b).into());
        }
        // assert_eq!(frequencies, res.as_slice());

        let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
        frequencies[55] = u16::MAX / 3;
        frequencies[125] = u16::MAX / 3;

        let res = huffman_lengths_from_frequency(&frequencies, 15);
        assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
        assert!(res[55] < 3);
        assert!(res[125] < 3);
    }

    #[test]
    /// Test if the bit lengths for a set of frequencies are optimal (give the best compression
    /// give the provided frequencies).
    fn optimal_lengths() {
        let freqs = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 68, 0, 14, 0, 0, 0, 0, 3, 7, 6, 1, 0, 12, 14, 9, 2, 6, 9, 4, 1, 1, 4, 1, 1, 0,
            0, 1, 3, 0, 6, 0, 0, 0, 4, 4, 1, 2, 5, 3, 2, 2, 9, 0, 0, 3, 1, 5, 5, 8, 0, 6, 10, 5, 2,
            0, 0, 1, 2, 0, 8, 11, 4, 0, 1, 3, 31, 13, 23, 22, 56, 22, 8, 11, 43, 0, 7, 33, 15, 45,
            40, 16, 1, 28, 37, 35, 26, 3, 7, 11, 9, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 1, 126, 114, 66, 31, 41, 25, 15, 21, 20, 16, 15, 10, 7, 5, 1, 1,
        ];

        let lens = huffman_lengths_from_frequency(&freqs, 15);

        // Lengths produced by miniz for this frequency table for comparison
        // the number of total bits encoded with these huffman codes is 7701
        // NOTE: There can be more than one set of optimal lengths for a set of frequencies,
        // (though there may be a difference in how well the table itself can be represented)
        // so testing for a specific length table is not ideal since different algorithms
        // may produce different length tables.
        // let lens3 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        // 0, 0, 0, 0, 0,
        // 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 0, 0, 0, 9, 8, 8, 10, 0, 7, 7, 7, 10, 8, 7, 8,
        // 10, 10, 8, 10, 10, 0, 0, 10, 9, 0, 8, 0, 0, 0, 8, 8, 10, 9, 8, 9, 9, 9, 7, 0,
        // 0, 9, 10, 8, 8, 7, 0, 8, 7, 8, 9, 0, 0, 10, 9, 0, 7, 7, 8, 0, 10, 9, 6, 7, 6,
        // 6, 5, 6, 7, 7, 5, 0, 8, 5, 7, 5, 5, 6, 10, 6, 5, 5, 6, 9, 8, 7, 7, 10, 10, 0,
        // 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        // 0, 0, 10, 4, 4, 4, 5, 5, 6, 7, 6, 6, 6, 6, 7, 8, 8, 10, 10];
        //

        let num_bits = lens
            .iter()
            .zip(freqs.iter())
            .fold(0, |a, (&f, &l)| a + (f as u16 * l));
        assert_eq!(num_bits, 7701);
    }
}
deflate/length_encode.rs

deflate/
length_encode.rs