1use std::clone::Clone;
2use std::iter::Iterator;
3
4#[derive(Debug, PartialEq, Eq)]
7pub enum EncodedLength {
8 Length(u8),
10 CopyPrevious(u8),
12 RepeatZero3Bits(u8),
14 RepeatZero7Bits(u8),
16}
17
18impl EncodedLength {
19 fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
20 match prev {
21 0 => {
22 if repeat <= 10 {
23 EncodedLength::RepeatZero3Bits(repeat)
24 } else {
25 EncodedLength::RepeatZero7Bits(repeat)
26 }
27 }
28 1..=15 => EncodedLength::CopyPrevious(repeat),
29 _ => panic!(),
30 }
31 }
32}
33
34pub const COPY_PREVIOUS: usize = 16;
35pub const REPEAT_ZERO_3_BITS: usize = 17;
36pub const REPEAT_ZERO_7_BITS: usize = 18;
37
38const MIN_REPEAT: u8 = 3;
39
40fn update_out_and_freq(
42 encoded: EncodedLength,
43 output: &mut Vec<EncodedLength>,
44 frequencies: &mut [u16; 19],
45) {
46 let index = match encoded {
47 EncodedLength::Length(l) => usize::from(l),
48 EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
49 EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
50 EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
51 };
52
53 frequencies[index] += 1;
54
55 output.push(encoded);
56}
57
58fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
60 (length_value == 0 && repeats < 138) || repeats < 6
61}
62
63#[cfg(test)]
65pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
66where
67 I: Iterator<Item = &'a u8> + Clone,
68{
69 let mut freqs = [0u16; 19];
70 let mut encoded: Vec<EncodedLength> = Vec::new();
71 encode_lengths_m(lengths, &mut encoded, &mut freqs);
72 (encoded, freqs)
73}
74
75pub fn encode_lengths_m<'a, I>(
83 lengths: I,
84 mut out: &mut Vec<EncodedLength>,
85 mut frequencies: &mut [u16; 19],
86) where
87 I: Iterator<Item = &'a u8> + Clone,
88{
89 out.clear();
90 let mut repeat = 0;
92 let mut iter = lengths.clone().enumerate().peekable();
93 let mut prev = !iter.peek().expect("No length values!").1;
96
97 while let Some((n, &l)) = iter.next() {
98 if l == prev && not_max_repetitions(l, repeat) {
99 repeat += 1;
100 }
101 if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
102 if repeat >= MIN_REPEAT {
103 let val = EncodedLength::from_prev_and_repeat(prev, repeat);
106 update_out_and_freq(val, &mut out, &mut frequencies);
107 repeat = 0;
108 if l != prev {
111 if l != 0 || iter.peek().is_none() {
112 update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
113 repeat = 0;
114 } else {
115 repeat = 1;
119 }
120 }
121 } else {
122 let extra_skip = if iter.peek().is_none() && l == prev {
128 1
129 } else {
130 0
131 };
132
133 let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
135
136 let extra = if l != 0 || iter.peek().is_none() {
139 1
140 } else {
141 0
142 };
143
144 for &i in b_iter.take(repeat as usize + extra) {
145 update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
146 }
147
148 repeat = 1 - extra as u8;
151 }
152 }
153 prev = l;
154 }
155}
156
157#[cfg(test)]
158pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
159 in_place::gen_lengths(frequencies, max_len)
160}
161
162pub type LeafVec = Vec<in_place::Node>;
163
164pub fn huffman_lengths_from_frequency_m(
170 frequencies: &[u16],
171 max_len: usize,
172 leaf_buffer: &mut LeafVec,
173 lens: &mut [u8],
174) {
175 in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
176}
177
178mod in_place {
179 type WeightType = u32;
180
181 #[cfg(debug)]
182 pub fn validate_lengths(lengths: &[u8]) -> bool {
183 if cfg!(any(
185 target_arch = "mips",
186 target_arch = "mipsel",
187 target_arch = "mips64",
188 target_arch = "mipsel64"
189 )) {
190 true
191 } else {
192 let v = lengths.iter().fold(0f64, |acc, &n| {
193 acc + if n != 0 {
194 2f64.powi(-(i32::from(n)))
195 } else {
196 0f64
197 }
198 });
199
200 match v.partial_cmp(&1.0) {
201 Some(std::cmp::Ordering::Greater) => false,
202 _ => true,
203 }
204 }
205 }
206
207 #[cfg(not(debug))]
208 pub fn validate_lengths(_: &[u8]) -> bool {
209 true
210 }
211
212 #[derive(Eq, PartialEq, Debug)]
213 pub struct Node {
214 value: WeightType,
215 symbol: u16,
216 }
217
218 fn step_1(leaves: &mut [Node]) {
219 debug_assert!(leaves.len() >= 2);
222 let mut root = 0;
223 let mut leaf = 2;
224
225 leaves[0].value += leaves[1].value;
226
227 for next in 1..leaves.len() - 1 {
228 if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
229 leaves[next].value = leaves[root].value;
230 leaves[root].value = next as WeightType;
231 root += 1;
232 } else {
233 leaves[next].value = leaves[leaf].value;
234 leaf += 1;
235 }
236
237 if (leaf >= leaves.len()) || (root < next && (leaves[root].value < leaves[leaf].value))
238 {
239 leaves[next].value += leaves[root].value;
240 leaves[root].value = next as WeightType;
241 root += 1;
242 } else {
243 leaves[next].value += leaves[leaf].value;
244 leaf += 1;
245 }
246 }
247 }
248
249 fn step_2(leaves: &mut [Node]) {
250 debug_assert!(leaves.len() >= 2);
251 let n = leaves.len();
252
253 leaves[n - 2].value = 0;
254 for t in (0..(n + 1 - 3)).rev() {
255 leaves[t].value = leaves[leaves[t].value as usize].value + 1;
256 }
257
258 let mut available = 1_usize;
259 let mut used = 0;
260 let mut depth = 0;
261 let mut root = n as isize - 2;
262 let mut next = n as isize - 1;
263
264 while available > 0 {
265 while root >= 0 && leaves[root as usize].value == depth {
266 used += 1;
267 root -= 1;
268 }
269 while available > used {
270 leaves[next as usize].value = depth;
271 next -= 1;
272 available -= 1;
273 }
274 available = 2 * used;
275 depth += 1;
276 used = 0;
277 }
278 }
279
280 const MAX_NUMBER_OF_CODES: usize = 32;
281 const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;
282
283 pub fn enforce_max_code_lengths(
291 num_codes: &mut [u16; NUM_CODES_LENGTH],
292 num_used: usize,
293 max_len: usize,
294 ) {
295 debug_assert!(max_len <= 15);
296
297 if num_used > 1 {
298 let mut num_above_max = 0u16;
299 for &l in num_codes[(max_len as usize + 1)..].iter() {
300 num_above_max += l;
301 }
302
303 num_codes[max_len] += num_above_max;
304
305 let mut total = 0u32;
306 for i in (1..=max_len).rev() {
307 total += (u32::from(num_codes[i])) << (max_len - i);
311 }
312
313 while total != 1u32 << max_len {
316 num_codes[max_len] -= 1;
317 for i in (1..max_len).rev() {
318 if num_codes[i] != 0 {
319 num_codes[i] -= 1;
320 num_codes[i + 1] += 2;
321 break;
322 }
323 }
324 total -= 1;
325 }
326 }
327 }
328
329 #[cfg(test)]
330 pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
332 let mut lens = vec![0u8; frequencies.len()];
333 let mut leaves = Vec::new();
334 in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
335 lens
336 }
337
338 pub fn in_place_lengths(
348 frequencies: &[u16],
349 max_len: usize,
350 mut leaves: &mut Vec<Node>,
351 lengths: &mut [u8],
352 ) {
353 debug_assert!(lengths.len() >= frequencies.len());
354
355 for l in lengths.iter_mut() {
356 *l = 0;
357 }
358
359 leaves.clear();
361
362 leaves.extend(frequencies.iter().enumerate().filter_map(|(n, f)| {
366 if *f > 0 {
367 Some(Node {
368 value: u32::from(*f),
369 symbol: n as u16,
370 })
371 } else {
372 None
373 }
374 }));
375
376 if leaves.len() == 1 {
378 lengths[leaves[0].symbol as usize] = 1;
379 return;
380 } else if leaves.is_empty() {
381 return;
382 }
383
384 leaves.sort_by(|a, b| a.value.cmp(&b.value));
387
388 step_1(&mut leaves);
389 step_2(&mut leaves);
390
391 let mut num_codes = [0u16; NUM_CODES_LENGTH];
393 for l in leaves.iter() {
394 num_codes[l.value as usize] += 1;
395 }
396
397 enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);
400
401 let mut leaf_it = leaves.iter().rev();
403 for (&n_codes, i) in num_codes[1..=max_len].iter().zip(1..=(max_len as u8)) {
405 for _ in 0..n_codes {
406 lengths[leaf_it.next().unwrap().symbol as usize] = i;
407 }
408 }
409
410 debug_assert_eq!(leaf_it.next(), None);
411 debug_assert!(
412 validate_lengths(lengths),
413 "The generated length codes were not valid!"
414 );
415 }
416}
417
418#[cfg(test)]
419mod test {
420 use super::*;
421 use crate::huffman_table::NUM_LITERALS_AND_LENGTHS;
422 use std::u16;
423
424 fn lit(value: u8) -> EncodedLength {
425 EncodedLength::Length(value)
426 }
427
428 fn zero(repeats: u8) -> EncodedLength {
429 match repeats {
430 0..=1 => EncodedLength::Length(0),
431 2..=10 => EncodedLength::RepeatZero3Bits(repeats),
432 _ => EncodedLength::RepeatZero7Bits(repeats),
433 }
434 }
435
436 fn copy(copies: u8) -> EncodedLength {
437 EncodedLength::CopyPrevious(copies)
438 }
439
440 #[test]
441 fn test_encode_lengths() {
442 use crate::huffman_table::FIXED_CODE_LENGTHS;
443 let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
444 assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
446 assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
448 assert_eq!(enc.1[17..19], [0, 0]);
450
451 let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
452 let enc = encode_lengths(test_lengths.iter()).0;
453 assert_eq!(
454 enc,
455 vec![
456 lit(0),
457 lit(0),
458 lit(5),
459 lit(0),
460 lit(15),
461 lit(1),
462 zero(3),
463 lit(2),
464 lit(4),
465 copy(3),
466 lit(3),
467 lit(5),
468 copy(3),
469 ]
470 );
471 let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
472 let enc = encode_lengths(test_lengths.iter()).0;
473 assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
474
475 let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
476 let enc = encode_lengths(test_lengths.iter()).0;
477 assert_eq!(
478 enc,
479 vec![
480 zero(3),
481 lit(3),
482 lit(3),
483 lit(3),
484 lit(5),
485 lit(4),
486 copy(3),
487 lit(0),
488 lit(0),
489 ]
490 );
491
492 let lens = [
493 0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
503 ];
504
505 let _ = encode_lengths(lens.iter()).0;
506
507 let lens = [
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0, 7, 6, 7, 8, 7, 7,
510 8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10, 8, 8, 10, 8, 0, 8, 7, 0,
511 10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6, 7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0,
512 7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4,
517 3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 3,
519 4,
520 ];
521
522 let enc = encode_lengths(lens.iter()).0;
523
524 assert_eq!(
525 &enc[..10],
526 &[
527 zero(10),
528 lit(9),
529 lit(0),
530 lit(0),
531 lit(9),
532 zero(18),
533 lit(6),
534 zero(3),
535 lit(8),
536 zero(4)
537 ]
538 );
539 assert_eq!(
540 &enc[10..20],
541 &[
542 lit(8),
543 lit(0),
544 lit(0),
545 lit(7),
546 lit(8),
547 lit(7),
548 lit(8),
549 lit(6),
550 lit(6),
551 lit(8)
552 ]
553 );
554
555 let enc = encode_lengths([1, 1, 1, 2].iter()).0;
556 assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
557 let enc = encode_lengths([0, 0, 3].iter()).0;
558 assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
559 let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
560 assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
561
562 let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
563 assert!(*enc.last().unwrap() != lit(5));
564
565 let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
566 assert_eq!(*enc.last().unwrap(), zero(0));
567 }
568
569 #[test]
570 fn test_lengths_from_frequencies() {
571 let frequencies = [1, 1, 5, 7, 10, 14];
572
573 let expected = [4, 4, 3, 2, 2, 2];
574 let res = huffman_lengths_from_frequency(&frequencies, 4);
575
576 assert_eq!(expected, res.as_slice());
577
578 let frequencies = [1, 5, 1, 7, 10, 14];
579 let expected = [4, 3, 4, 2, 2, 2];
580
581 let res = huffman_lengths_from_frequency(&frequencies, 4);
582
583 assert_eq!(expected, res.as_slice());
584
585 let frequencies = [0, 25, 0, 10, 2, 4];
586
587 let res = huffman_lengths_from_frequency(&frequencies, 4);
588 assert_eq!(res[0], 0);
589 assert_eq!(res[2], 0);
590 assert!(res[1] < 4);
591
592 let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
594 let res = huffman_lengths_from_frequency(&frequencies, 5);
595 let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
596 assert_eq!(expected, res.as_slice());
597
598 let frequencies = [0; 30];
600 let res = huffman_lengths_from_frequency(&frequencies, 5);
601 for (a, b) in frequencies.iter().zip(res.iter()) {
602 assert_eq!(*a, (*b).into());
603 }
604 let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
607 frequencies[55] = u16::MAX / 3;
608 frequencies[125] = u16::MAX / 3;
609
610 let res = huffman_lengths_from_frequency(&frequencies, 15);
611 assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
612 assert!(res[55] < 3);
613 assert!(res[125] < 3);
614 }
615
616 #[test]
617 fn optimal_lengths() {
620 let freqs = [
621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 0, 0, 0, 68, 0, 14, 0, 0, 0, 0, 3, 7, 6, 1, 0, 12, 14, 9, 2, 6, 9, 4, 1, 1, 4, 1, 1, 0,
623 0, 1, 3, 0, 6, 0, 0, 0, 4, 4, 1, 2, 5, 3, 2, 2, 9, 0, 0, 3, 1, 5, 5, 8, 0, 6, 10, 5, 2,
624 0, 0, 1, 2, 0, 8, 11, 4, 0, 1, 3, 31, 13, 23, 22, 56, 22, 8, 11, 43, 0, 7, 33, 15, 45,
625 40, 16, 1, 28, 37, 35, 26, 3, 7, 11, 9, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
626 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 0, 0, 1, 126, 114, 66, 31, 41, 25, 15, 21, 20, 16, 15, 10, 7, 5, 1, 1,
631 ];
632
633 let lens = huffman_lengths_from_frequency(&freqs, 15);
634
635 let num_bits = lens
656 .iter()
657 .zip(freqs.iter())
658 .fold(0, |a, (&f, &l)| a + (f as u16 * l));
659 assert_eq!(num_bits, 7701);
660 }
661}