lewton/
huffman_tree.rs

1// Vorbis decoder written in Rust
2//
3// Copyright (c) 2016 est31 <MTest31@outlook.com>
4// and contributors. All rights reserved.
5// Licensed under MIT license, or Apache 2 license,
6// at your option. Please see the LICENSE file
7// attached to this source distribution for details.
8
9/*!
10Huffman tree unpacking and traversal
11
12This mod contains the `VorbisHuffmanTree` struct which
13can be loaded from the `codebook_codeword_lengths` array
14specified for each codebook in the vorbis setup header.
15
16Once decoding is happening, you are more interested in
17the `VorbisHuffmanIter` struct which provides you with
18facilities to load a value bit by bit.
19*/
20
21struct HuffTree {
22	// True iff every sub-tree in this tree
23	// either has two direct children or none
24	even_childs :bool,
25	payload :Option<u32>,
26	l :Option<Box<HuffTree>>,
27	r :Option<Box<HuffTree>>,
28}
29
30/*
31use std::fmt;
32impl fmt::Debug for HuffTree {
33	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34		fn fmt_rec(s :&HuffTree, f: &mut fmt::Formatter, depth :u32) -> fmt::Result {
35			macro_rules! depth_print {
36			($f:ident, $depth:ident) => {
37				for _ in 0..$depth {
38					try!(write!($f, "| "));
39				}
40			}}
41			if s.l.is_some() || s.r.is_some() {
42				try!(writeln!(f, "ec: {:?}, pl: {:?}, LIS {:?} RIS {:?}",
43					s.even_childs, s.payload, s.l.is_some(), s.r.is_some()));
44			} else {
45				try!(writeln!(f, "ec: {:?}, pl: {:?}", s.even_childs, s.payload));
46			}
47			if let Some(ref v) = s.l {
48				depth_print!(f, depth);
49				try!(write!(f, "LEFT "));
50				try!(fmt_rec(&*v, f, depth + 1));
51			}
52			if let Some(ref v) = s.r {
53				depth_print!(f, depth);
54				try!(write!(f, "RIGT "));
55				try!(fmt_rec(&*v, f, depth + 1));
56			}
57			return Ok(());
58		}
59		try!(fmt_rec(self, f, 1));
60		return Ok(());
61	}
62} // */
63
64impl HuffTree {
65	/// Returns whether the addition was successful
66	pub fn insert_rec(&mut self, payload :u32, depth :u8) -> bool {
67		//print!("INSERT payload {:?} depth {:?} ", payload, depth);
68		if self.payload.is_some() {
69			//println!(" => OCCUPIED AS LEAF");
70			return false;
71		}
72		if depth == 0 {
73			if !(self.l.is_none() && self.r.is_none()) {
74				//println!(" => INNER NODE");
75				return false;
76			}
77			self.payload = Some(payload);
78			//println!(" => ADDED");
79			return true;
80		}
81		if self.even_childs {
82			//println!(" => HAS EVEN CHILDS");
83			match &mut self.l {
84				&mut Some(_) => return false,
85				&mut None => {
86					let mut new_node = HuffTree { even_childs :true, payload :None, l :None, r :None };
87					new_node.insert_rec(payload, depth - 1);
88					self.l = Some(Box::new(new_node));
89					self.even_childs = false;
90					return true;
91				}
92			}
93		} else {
94			//println!(" => HAS NOT EVEN CHILDS");
95			// First try left branch
96			let left = self.l.as_mut().unwrap();
97			if !left.even_childs {
98				if left.insert_rec(payload, depth - 1) {
99					self.even_childs = left.even_childs &&
100						if let &mut Some(ref mut right) = &mut self.r.as_mut() { right.even_childs } else { false };
101					return true;
102				}
103			}
104			// Left sub tree was either full or leaf
105			// Therefore, put it in the right branch now
106			// As left has even_childs == true, right causes
107			// us to have even_childs == false.
108			return match self.r {
109				Some(ref mut right) => {
110					let success = right.insert_rec(payload, depth - 1);
111					self.even_childs = left.even_childs && right.even_childs;
112					success
113				},
114				None => {
115					let mut new_node = HuffTree { even_childs :true, payload :None, l :None, r :None };
116					let success = new_node.insert_rec(payload, depth - 1);
117					self.even_childs = left.even_childs && new_node.even_childs;
118					self.r = Some(Box::new(new_node));
119					success
120				}
121			};
122		}
123	}
124}
125
126#[derive(Debug)]
127pub enum HuffmanError {
128	Overspecified,
129	Underpopulated,
130	InvalidSingleEntry,
131}
132
133#[derive(Clone, Copy)]
134enum UnrolledLookupEntry {
135	/// The specified entry was found in the lookup array
136	///
137	/// First param: offset by which to advance the reader
138	/// Second param: the payload
139	HasEntry(u8, u32),
140	/// Seems the given input is inconclusive and not complete yet.
141	///
142	/// The argument contains a hint that is an offset inside desc_prog
143	/// to help to advance the reader.
144	InconclusiveWithHint(u32),
145	/// Seems the given input is inconclusive and not complete yet.
146	Inconclusive,
147}
148
149pub enum PeekedDataLookupResult<'l> {
150	/// The supplied info is not enough to result in a payload directly.
151	///
152	/// First param is the number of bits to advance.
153	///
154	/// The returned iterator has state up to the count of bits that could be used.
155	Iter(u8, VorbisHuffmanIter<'l>),
156	/// The supplied info is enough to map to a payload
157	///
158	/// First param is the number of bits to advance. Second is payload.
159	PayloadFound(u8, u32),
160}
161
162/// Huffman tree representation
163pub struct VorbisHuffmanTree {
164	// Format: three bytes per non leaf node, one byte per leaf node.
165	// First byte is the payload container,
166	// second and third point to the indices inside the vector that
167	// have left and right children.
168	// If the node is a leaf the highest bit of the payload container 0,
169	// if it has children the bit is 1. If its a leaf the lower 31 bits of the
170	// payload container form the actual payload.
171	desc_prog :Vec<u32>,
172
173	unrolled_entries :[UnrolledLookupEntry; 256],
174}
175
176impl VorbisHuffmanTree {
177	/// Constructs a new `VorbisHuffmanTree` instance from the passed array,
178	/// like the vorbis spec demands.
179	///
180	/// Returns the resulting tree if the array results in a valid (neither
181	/// underspecified nor overspecified) tree.
182	pub fn load_from_array(codebook_codeword_lengths :&[u8]) -> Result<VorbisHuffmanTree, HuffmanError> {
183		// First step: generate a simple tree representing the
184		// Huffman tree
185		let mut simple_tree = HuffTree { even_childs :true, payload :None, l :None, r :None };
186		let mut cnt :usize = 0;
187		let mut last_valid_idx = None;
188		for (i, &codeword_length) in codebook_codeword_lengths.iter().enumerate() {
189			if codeword_length == 0 {
190				continue;
191			}
192			cnt += 1;
193			last_valid_idx = Some(i);
194			if !simple_tree.insert_rec(i as u32, codeword_length) {
195				try!(Err(HuffmanError::Overspecified)) /* Overspecified, can't be put into tree */
196			}
197		}
198		//println!("The tree:\n{:?}", simple_tree);
199
200		// Single entry codebook special handling
201		if cnt == 1 {
202			let decoded = last_valid_idx.unwrap();
203			let encoded_len = codebook_codeword_lengths[decoded];
204			if encoded_len == 1 {
205				// Return a vorbis tree that returns decoded for any single bit input
206				return Ok(VorbisHuffmanTree {
207					desc_prog :vec![1u32 << 31, 3, 3, decoded as u32],
208					unrolled_entries :[
209						UnrolledLookupEntry::HasEntry(1, decoded as u32); 256
210					],
211				});
212			} else {
213				// Single entry codebooks must have 1 as their only length entry
214				try!(Err(HuffmanError::InvalidSingleEntry))
215			}
216		}
217
218		if !simple_tree.even_childs {
219			try!(Err(HuffmanError::Underpopulated)); /* Underpopulated */
220		}
221
222		// Second step: generate the actual desc_prog
223		// by pre_order traversal of the tree.
224		//
225		// The general advantage of this approach over one with only the simple tree
226		// is better cache locality and less memory requirements (at least after the
227		// setup with the simple tree).
228		let mut desc_prog = Vec::with_capacity(cnt);
229		fn traverse(tree :& HuffTree, desc_prog :&mut Vec<u32>) -> u32 {
230			let cur_pos = desc_prog.len() as u32;
231			let has_children = tree.l.is_some() || tree.r.is_some();
232
233			let entry = ((has_children as u32) << 31) | tree.payload.unwrap_or(0);
234			//println!("push node (w_children : {:?}) at {:?} : {:?}", has_children, cur_pos, entry);
235			desc_prog.push(entry);
236
237			if has_children {
238				desc_prog.push(0);
239				desc_prog.push(0);
240				desc_prog[cur_pos as usize + 1] =
241					traverse(tree.l.as_ref().unwrap(), desc_prog);
242				/*println!("left child of node {:?}: at {:?}", cur_pos,
243					desc_prog[cur_pos as usize + 1]);// */
244				desc_prog[cur_pos as usize + 2] =
245					traverse(tree.r.as_ref().unwrap(), desc_prog);
246				/*println!("right child of node {:?}: at {:?}", cur_pos,
247					desc_prog[cur_pos as usize + 2]);// */
248			}
249			return cur_pos;
250		}
251		assert_eq!(traverse(&simple_tree, &mut desc_prog), 0);
252
253		// Third step: generate unrolled entries array
254		// Also by pre_order traversal.
255		//
256		// This gives us a speedup over desc_prog as reading the unrolled
257		// entries should involve less branching and less lookups overall.
258		let mut unrolled_entries = [UnrolledLookupEntry::Inconclusive; 256];
259		fn uroll_traverse(tree :& HuffTree,
260				unrolled_entries :&mut [UnrolledLookupEntry; 256],
261				prefix :u32, prefix_idx :u8,
262				desc_prog :&[u32], desc_prog_idx :u32) {
263			let has_children = tree.l.is_some() || tree.r.is_some();
264
265			if has_children {
266				// There are children.
267				// We'd like to recurse deeper. Can we?
268				if prefix_idx == 8 {
269					// No we can't.
270					// The tree is too deep.
271					unrolled_entries[prefix as usize] =
272						UnrolledLookupEntry::InconclusiveWithHint(desc_prog_idx);
273				} else {
274					// Recurse deeper.
275					uroll_traverse(tree.l.as_ref().unwrap(),
276						unrolled_entries,
277						prefix + (0 << prefix_idx), prefix_idx + 1,
278						desc_prog, desc_prog[desc_prog_idx as usize + 1]);
279					uroll_traverse(tree.r.as_ref().unwrap(),
280						unrolled_entries,
281						prefix + (1 << prefix_idx), prefix_idx + 1,
282						desc_prog, desc_prog[desc_prog_idx as usize + 2]);
283				}
284			} else {
285				// No children, fill the entries in the range according to
286				// the prefix we have.
287				let payload = tree.payload.unwrap();
288				let it = 1 << prefix_idx;
289				let mut i = prefix as usize;
290				for _ in 1 .. (1u16 << (8 - prefix_idx)) {
291					unrolled_entries[i] =
292						UnrolledLookupEntry::HasEntry(prefix_idx, payload);
293					i += it;
294				}
295			}
296		}
297		if cnt > 0 {
298			uroll_traverse(&simple_tree,
299				&mut unrolled_entries, 0, 0, &desc_prog, 0);
300		}
301
302		// Now we are done, return the result
303		return Ok(VorbisHuffmanTree {
304			desc_prog,
305			unrolled_entries,
306		});
307	}
308
309	/// Returns an iterator over this tree.
310	pub fn iter<'l>(&'l self) -> VorbisHuffmanIter<'l> {
311		return VorbisHuffmanIter { desc_prog :&self.desc_prog, pos :0 };
312	}
313
314	/// Resolves a given number of peeked bits.
315	///
316	/// Returns whether the data given is enough to uniquely identify a
317	/// tree element, or whether only an iterator that's progressed by
318	/// a given amount can be returned. Also, info is returned about how
319	/// far the reader can be advanced.
320	pub fn lookup_peeked_data<'l>(&'l self, bit_count :u8, peeked_data :u32)
321			-> PeekedDataLookupResult<'l> {
322		if bit_count > 8 {
323			panic!("Bit count {} larger than allowed 8", bit_count);
324		}
325		use self::UnrolledLookupEntry::*;
326		use self::PeekedDataLookupResult::*;
327		return match self.unrolled_entries[peeked_data as usize] {
328			// If cnt_to_remove is bigger than bit_count the result is inconclusive.
329			// Return in this case.
330			HasEntry(cnt_to_remove, payload) if cnt_to_remove <= bit_count
331				=> PayloadFound(cnt_to_remove, payload),
332			InconclusiveWithHint(hint)
333				=> Iter(8, VorbisHuffmanIter { desc_prog : &self.desc_prog, pos : hint }),
334			_
335				=> Iter(0, VorbisHuffmanIter { desc_prog : &self.desc_prog, pos : 0 }),
336		};
337	}
338}
339
340/// Iterator on the Huffman tree
341pub struct VorbisHuffmanIter<'a> {
342	desc_prog :&'a Vec<u32>,
343	pos :u32,
344}
345
346impl<'a> VorbisHuffmanIter<'a> {
347	/// Iterate one level deeper inside the tree.
348	/// Returns `Some(p)` if it encounters a leaf with a payload p,
349	/// None if it only processed an inner node.
350	///
351	/// Inner nodes don't carry payloads in huffman trees.
352	///
353	/// If this function encounters a leaf, it automatically resets
354	/// the iterator to its starting state.
355	///
356	/// # Panics
357	///
358	/// Panics if the vorbis huffman treee is empty. It has to be found out
359	/// what to do if the huffman tree is empty, whether to reject the stream,
360	/// or whether to do sth else. Finding this out is a TODO.
361	pub fn next(&mut self, bit :bool) -> Option<u32> {
362		// Assertion test for the paranoid and testing, comment out if you are:
363		/*let cur_entry = self.desc_prog[self.pos as usize];
364		assert!((cur_entry & (1u32 << 31)) != 0);*/
365
366		//print!("With bit {:?}, pos {:?} becomes pos ", bit, self.pos);
367		self.pos = self.desc_prog[self.pos as usize + 1 + bit as usize];
368		//print!("{:?}", self.pos);
369		let child = self.desc_prog[self.pos as usize];
370		if (child & (1u32 << 31)) != 0 {
371			//println!(" => None");
372			// child has children
373			return None;
374		} else {
375			//println!(" => Some({:?})", child);
376			// child has no children, it's a leaf
377			self.pos = 0;
378			return Some(child);
379		}
380	}
381}
382
383#[cfg(test)]
384impl VorbisHuffmanTree {
385	fn iter_test(&self, path :u32, path_len :u8, expected_val :u32) {
386		let mut itr = self.iter();
387		for i in 1 .. path_len {
388			assert_eq!(itr.next((path & (1 << (path_len - i))) != 0), None);
389		}
390		assert_eq!(itr.next((path & 1) != 0), Some(expected_val));
391	}
392}
393
394#[test]
395fn test_huffman_tree() {
396	// Official example from the vorbis spec section 3.2.1
397	let tree = VorbisHuffmanTree::load_from_array(&[2, 4, 4, 4, 4, 2, 3, 3]).unwrap();
398
399	tree.iter_test(0b00, 2, 0);
400	tree.iter_test(0b0100, 4, 1);
401	tree.iter_test(0b0101, 4, 2);
402	tree.iter_test(0b0110, 4, 3);
403	tree.iter_test(0b0111, 4, 4);
404	tree.iter_test(0b10, 2, 5);
405	tree.iter_test(0b110, 3, 6);
406	tree.iter_test(0b111, 3, 7);
407
408	// Some other example
409	// we mostly test the length (max 32) here
410	VorbisHuffmanTree::load_from_array(&[
411		1,   2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
412		17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32]).unwrap();
413}
414
415#[test]
416fn test_issue_8() {
417	// regression test for issue 8
418	// make sure that it doesn't panic.
419	let _ = VorbisHuffmanTree::load_from_array(&[0; 625]);
420}
421
422#[test]
423fn test_under_over_spec() {
424	// All trees base on the official example from the vorbis spec section 3.2.1
425	// but with modifications to under- or overspecify them
426
427	// underspecified
428	let tree = VorbisHuffmanTree::load_from_array(&[2, 4, 4, 4, 4, 2, 3/*, 3*/]);
429	assert!(tree.is_err());
430
431	// underspecified
432	let tree = VorbisHuffmanTree::load_from_array(&[2, 4, 4, 4, /*4,*/ 2, 3, 3]);
433	assert!(tree.is_err());
434
435	// overspecified
436	let tree = VorbisHuffmanTree::load_from_array(&[2, 4, 4, 4, 4, 2, 3, 3/*]*/,3]);
437	assert!(tree.is_err());
438}
439
440#[test]
441fn test_single_entry_huffman_tree() {
442	// Special testing for single entry codebooks, as required by the vorbis spec
443	let tree = VorbisHuffmanTree::load_from_array(&[1]).unwrap();
444	tree.iter_test(0b0, 1, 0);
445	tree.iter_test(0b1, 1, 0);
446
447	let tree = VorbisHuffmanTree::load_from_array(&[0, 0, 1, 0]).unwrap();
448	tree.iter_test(0b0, 1, 2);
449	tree.iter_test(0b1, 1, 2);
450
451	let tree = VorbisHuffmanTree::load_from_array(&[2]);
452	assert!(tree.is_err());
453}
454
455#[test]
456fn test_unordered_huffman_tree() {
457	// Reordered the official example from the vorbis spec section 3.2.1
458	//
459	// Ensuring that unordered huffman trees work as well is important
460	// because the spec does not disallow them, and unordered
461	// huffman trees appear in "the wild".
462	let tree = VorbisHuffmanTree::load_from_array(&[2, 4, 4, 2, 4, 4, 3, 3]).unwrap();
463
464	tree.iter_test(0b00, 2, 0);
465	tree.iter_test(0b0100, 4, 1);
466	tree.iter_test(0b0101, 4, 2);
467	tree.iter_test(0b10, 2, 3);
468	tree.iter_test(0b0110, 4, 4);
469	tree.iter_test(0b0111, 4, 5);
470	tree.iter_test(0b110, 3, 6);
471	tree.iter_test(0b111, 3, 7);
472}
473
474#[test]
475fn test_extracted_huffman_tree() {
476	// Extracted from a real-life vorbis file.
477	VorbisHuffmanTree::load_from_array(&[
478	5,  6, 11, 11, 11, 11, 10, 10, 12, 11,  5,  2, 11,  5,  6,  6,
479	7,  9, 11, 13, 13, 10,  7, 11,  6,  7,  8,  9, 10, 12, 11,  5,
480	11, 6,  8,  7,  9, 11, 14, 15, 11,  6,  6,  8,  4,  5,  7,  8,
481	10,13, 10,  5,  7,  7,  5,  5,  6,  8, 10, 11, 10,  7,  7,  8,
482	6,  5,  5,  7,  9,  9, 11,  8,  8, 11,  8,  7,  6,  6,  7,  9,
483	12,11, 10, 13,  9,  9,  7,  7,  7,  9, 11, 13, 12, 15, 12, 11,
484	9,  8,  8,  8]).unwrap();
485}