claxon/subframe.rs
1// Claxon -- A FLAC decoding library in Rust
2// Copyright 2014 Ruud van Asseldonk
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// A copy of the License has been included in the root of the repository.
7
8//! The `subframe` module deals with subframes that make up a frame of the FLAC stream.
9
10use std::cmp;
11use std::num;
12use error::{Error, Result, fmt_err};
13use input::{Bitstream, ReadBytes};
14
15#[derive(Clone, Copy, Debug)]
16enum SubframeType {
17 Constant,
18 Verbatim,
19 Fixed(u8),
20 Lpc(u8),
21}
22
23#[derive(Clone, Copy)]
24struct SubframeHeader {
25 sf_type: SubframeType,
26 wasted_bits_per_sample: u32,
27}
28
29fn read_subframe_header<R: ReadBytes>(input: &mut Bitstream<R>) -> Result<SubframeHeader> {
30 // The first bit must be a 0 padding bit.
31 if try!(input.read_bit()) {
32 return fmt_err("invalid subframe header");
33 }
34
35 // Next is a 6-bit subframe type.
36 let sf_type = match try!(input.read_leq_u8(6)) {
37 0 => SubframeType::Constant,
38 1 => SubframeType::Verbatim,
39
40 // Bit patterns 00001x, 0001xx and 01xxxx are reserved, this library
41 // would not know how to handle them, so this is an error. Values that
42 // are reserved at the time of writing are a format error, the
43 // `Unsupported` error type is for specified features that are not
44 // implemented.
45 n if (n & 0b111_110 == 0b000_010) || (n & 0b111_100 == 0b000_100) ||
46 (n & 0b110_000 == 0b010_000) => {
47 return fmt_err("invalid subframe header, encountered reserved value");
48 }
49
50 n if n & 0b111_000 == 0b001_000 => {
51 let order = n & 0b000_111;
52
53 // A fixed frame has order up to 4, other bit patterns are reserved.
54 if order > 4 {
55 return fmt_err("invalid subframe header, encountered reserved value");
56 }
57
58 SubframeType::Fixed(order)
59 }
60
61 // The only possibility left is bit pattern 1xxxxx, an LPC subframe.
62 n => {
63 // The xxxxx bits are the order minus one.
64 let order_mo = n & 0b011_111;
65 SubframeType::Lpc(order_mo + 1)
66 }
67 };
68
69 // Next bits indicates whether there are wasted bits per sample.
70 let wastes_bits = try!(input.read_bit());
71
72 // If so, k - 1 zero bits follow, where k is the number of wasted bits.
73 let wasted_bits = if !wastes_bits {
74 0
75 } else {
76 1 + try!(input.read_unary())
77 };
78
79 // The spec puts no bounds on the number of wasted bits per sample, but more
80 // than 31 does not make sense, as it would remove all data even for 32-bit
81 // samples.
82 if wasted_bits > 31 {
83 return fmt_err("wasted bits per sample must not exceed 31");
84 }
85
86 let subframe_header = SubframeHeader {
87 sf_type: sf_type,
88 wasted_bits_per_sample: wasted_bits,
89 };
90 Ok(subframe_header)
91}
92
93/// Given a signed two's complement integer in the `bits` least significant
94/// bits of `val`, extends the sign bit to a valid 16-bit signed integer.
95#[inline(always)]
96fn extend_sign_u16(val: u16, bits: u32) -> i16 {
97 // First shift the value so the desired sign bit is the actual sign bit,
98 // then convert to a signed integer, and then do an arithmetic shift back,
99 // which will extend the sign bit.
100 return ((val << (16 - bits)) as i16) >> (16 - bits);
101}
102
103#[test]
104fn verify_extend_sign_u16() {
105 assert_eq!(5, extend_sign_u16(5, 4));
106 assert_eq!(0x3ffe, extend_sign_u16(0x3ffe, 15));
107 assert_eq!(-5, extend_sign_u16(16 - 5, 4));
108 assert_eq!(-3, extend_sign_u16(512 - 3, 9));
109 assert_eq!(-1, extend_sign_u16(0xffff, 16));
110 assert_eq!(-2, extend_sign_u16(0xfffe, 16));
111 assert_eq!(-1, extend_sign_u16(0x7fff, 15));
112}
113
114/// Given a signed two's complement integer in the `bits` least significant
115/// bits of `val`, extends the sign bit to a valid 32-bit signed integer.
116#[inline(always)]
117pub fn extend_sign_u32(val: u32, bits: u32) -> i32 {
118 // First shift the value so the desired sign bit is the actual sign bit,
119 // then convert to a signed integer, and then do an arithmetic shift back,
120 // which will extend the sign bit.
121 ((val << (32 - bits)) as i32) >> (32 - bits)
122}
123
124#[test]
125fn verify_extend_sign_u32() {
126 assert_eq!(5, extend_sign_u32(5, 4));
127 assert_eq!(0x3ffffffe, extend_sign_u32(0x3ffffffe, 31));
128 assert_eq!(-5, extend_sign_u32(16 - 5, 4));
129 assert_eq!(-3, extend_sign_u32(512 - 3, 9));
130 assert_eq!(-2, extend_sign_u32(0xfffe, 16));
131 assert_eq!(-1, extend_sign_u32(0xffffffff_u32, 32));
132 assert_eq!(-2, extend_sign_u32(0xfffffffe_u32, 32));
133 assert_eq!(-1, extend_sign_u32(0x7fffffff, 31));
134
135 // The data below are samples from a real FLAC stream.
136 assert_eq!(-6392, extend_sign_u32(124680, 17));
137 assert_eq!(-6605, extend_sign_u32(124467, 17));
138 assert_eq!(-6850, extend_sign_u32(124222, 17));
139 assert_eq!(-7061, extend_sign_u32(124011, 17));
140}
141
142/// Decodes a signed number from Rice coding to the two's complement.
143///
144/// The Rice coding used by FLAC operates on unsigned integers, but the
145/// residual is signed. The mapping is done as follows:
146///
147/// 0 -> 0
148/// -1 -> 1
149/// 1 -> 2
150/// -2 -> 3
151/// 2 -> 4
152/// etc.
153///
154/// This function takes the unsigned value and converts it into a signed
155/// number.
156#[inline(always)]
157fn rice_to_signed(val: u32) -> i32 {
158 // The following bit-level hackery compiles to only four instructions on
159 // x64. It is equivalent to the following code:
160 //
161 // if val & 1 == 1 {
162 // -1 - (val / 2) as i32
163 // } else {
164 // (val / 2) as i32
165 // }
166 //
167 let half = (val >> 1) as i32;
168 let extended_bit_0 = ((val << 31) as i32) >> 31;
169 half ^ extended_bit_0
170}
171
172#[test]
173fn verify_rice_to_signed() {
174 assert_eq!(rice_to_signed(0), 0);
175 assert_eq!(rice_to_signed(1), -1);
176 assert_eq!(rice_to_signed(2), 1);
177 assert_eq!(rice_to_signed(3), -2);
178 assert_eq!(rice_to_signed(4), 2);
179}
180
181/// Decodes a subframe into the provided block-size buffer.
182///
183/// It is assumed that the length of the buffer is the block size.
184pub fn decode<R: ReadBytes>(input: &mut Bitstream<R>,
185 bps: u32,
186 buffer: &mut [i32])
187 -> Result<()> {
188 // The sample type i32 should be wide enough to accomodate for all bits of
189 // the stream, but this can be verified at a higher level than here. Still,
190 // it is a good idea to make the assumption explicit. FLAC supports up to
191 // sample widths of 32 in theory, so with the delta between channels that
192 // requires 33 bits, but the reference decoder supports only subset FLAC of
193 // 24 bits per sample at most, so restricting ourselves to i32 is fine.
194 debug_assert!(32 >= bps);
195
196 let header = try!(read_subframe_header(input));
197
198 if header.wasted_bits_per_sample >= bps {
199 return fmt_err("subframe has no non-wasted bits");
200 }
201
202 // If there are wasted bits, the subframe stores samples with a lower bps
203 // than the stream bps. We later shift all the samples left to correct this.
204 let sf_bps = bps - header.wasted_bits_per_sample;
205
206 match header.sf_type {
207 SubframeType::Constant => try!(decode_constant(input, sf_bps, buffer)),
208 SubframeType::Verbatim => try!(decode_verbatim(input, sf_bps, buffer)),
209 SubframeType::Fixed(ord) => try!(decode_fixed(input, sf_bps, ord as u32, buffer)),
210 SubframeType::Lpc(ord) => try!(decode_lpc(input, sf_bps, ord as u32, buffer)),
211 }
212
213 // Finally, everything must be shifted by 'wasted bits per sample' to
214 // the left. Note: it might be better performance-wise to do this on
215 // the fly while decoding. That could be done if this is a bottleneck.
216 if header.wasted_bits_per_sample > 0 {
217 debug_assert!(header.wasted_bits_per_sample <= 31,
218 "Cannot shift by more than the sample width.");
219 for s in buffer {
220 // For a valid FLAC file, this shift does not overflow. For an
221 // invalid file it might, and then we decode garbage, but we don't
222 // crash the program in debug mode due to shift overflow.
223 *s = s.wrapping_shl(header.wasted_bits_per_sample);
224 }
225 }
226
227 Ok(())
228}
229
230#[derive(Copy, Clone)]
231enum RicePartitionType {
232 Rice,
233 Rice2,
234}
235
236fn decode_residual<R: ReadBytes>(input: &mut Bitstream<R>,
237 block_size: u16,
238 buffer: &mut [i32])
239 -> Result<()> {
240 // Residual starts with two bits of coding method.
241 let partition_type = match try!(input.read_leq_u8(2)) {
242 0b00 => RicePartitionType::Rice,
243 0b01 => RicePartitionType::Rice2,
244 // 10 and 11 are reserved.
245 _ => return fmt_err("invalid residual, encountered reserved value"),
246 };
247
248 // Next are 4 bits partition order.
249 let order = try!(input.read_leq_u8(4));
250
251 // There are 2^order partitions. Note: the specification states a 4-bit
252 // partition order, so the order is at most 31, so there could be 2^31
253 // partitions, but the block size is a 16-bit number, so there are at
254 // most 2^16 - 1 samples in the block. No values have been marked as
255 // invalid by the specification though.
256 let n_partitions = 1u32 << order;
257 let n_samples_per_partition = block_size >> order;
258
259 // The partitions together must fill the block. If the block size is not a
260 // multiple of 2^order; if we shifted off some bits, then we would not fill
261 // the entire block. Such a partition order is invalid for this block size.
262 if block_size & (n_partitions - 1) as u16 != 0 {
263 return fmt_err("invalid partition order")
264 }
265
266 // NOTE: the check above checks that block_size is a multiple of n_partitions
267 // (this works because n_partitions is a power of 2). The check below is
268 // equivalent but more expensive.
269 debug_assert_eq!(n_partitions * n_samples_per_partition as u32, block_size as u32);
270
271 let n_warm_up = block_size - buffer.len() as u16;
272
273 // The partition size must be at least as big as the number of warm-up
274 // samples, otherwise the size of the first partition is negative.
275 if n_warm_up > n_samples_per_partition {
276 return fmt_err("invalid residual");
277 }
278
279 // Finally decode the partitions themselves.
280 match partition_type {
281 RicePartitionType::Rice => {
282 let mut start = 0;
283 let mut len = n_samples_per_partition - n_warm_up;
284 for _ in 0..n_partitions {
285 let slice = &mut buffer[start..start + len as usize];
286 try!(decode_rice_partition(input, slice));
287 start = start + len as usize;
288 len = n_samples_per_partition;
289 }
290 }
291 RicePartitionType::Rice2 => {
292 let mut start = 0;
293 let mut len = n_samples_per_partition - n_warm_up;
294 for _ in 0..n_partitions {
295 let slice = &mut buffer[start..start + len as usize];
296 try!(decode_rice2_partition(input, slice));
297 start = start + len as usize;
298 len = n_samples_per_partition;
299 }
300 }
301 }
302
303 Ok(())
304}
305
306// Performance note: all Rice partitions in real-world FLAC files are Rice
307// partitions, not Rice2 partitions. Therefore it makes sense to inline this
308// function into decode_residual.
309#[inline(always)]
310fn decode_rice_partition<R: ReadBytes>(input: &mut Bitstream<R>,
311 buffer: &mut [i32])
312 -> Result<()> {
313 // A Rice partition (not Rice2), starts with a 4-bit Rice parameter.
314 let rice_param = try!(input.read_leq_u8(4)) as u32;
315
316 // All ones is an escape code that indicates unencoded binary.
317 if rice_param == 0b1111 {
318 return Err(Error::Unsupported("unencoded binary is not yet implemented"))
319 }
320
321 // About the decoding below: the first part of the sample is the quotient,
322 // unary encoded. This means that there are q zeros, and then a one.
323 //
324 // The reference decoder supports sample widths up to 24 bits, so with
325 // the additional bytes for difference in channels and for prediction, a
326 // sample fits in 26 bits. The Rice parameter could be as little as 1,
327 // so the quotient can potentially be very large. However, in practice
328 // it is rarely greater than 5. Values as large as 75 still occur though.
329 //
330 // Next up is the remainder in rice_param bits. Depending on the number of
331 // bits, at most two or three bytes need to be read, so the code below is
332 // split into two cases to allow a more efficient reading function to be
333 // used when possible. About 45% of the time, rice_param is less than 9
334 // (measured from real-world FLAC files).
335
336 if rice_param <= 8 {
337 for sample in buffer.iter_mut() {
338 let q = try!(input.read_unary());
339 let r = try!(input.read_leq_u8(rice_param)) as u32;
340 *sample = rice_to_signed((q << rice_param) | r);
341 }
342 } else {
343 for sample in buffer.iter_mut() {
344 let q = try!(input.read_unary());
345 let r = try!(input.read_gt_u8_leq_u16(rice_param));
346 *sample = rice_to_signed((q << rice_param) | r);
347 }
348 }
349
350 Ok(())
351}
352
353// Performance note: a Rice2 partition is extremely uncommon, I haven’t seen a
354// single one in any real-world FLAC file. So do not inline it, in order not to
355// pollute the caller with dead code.
356#[inline(never)]
357#[cold]
358fn decode_rice2_partition<R: ReadBytes>(input: &mut Bitstream<R>,
359 buffer: &mut [i32])
360 -> Result<()> {
361 // A Rice2 partition, starts with a 5-bit Rice parameter.
362 let rice_param = try!(input.read_leq_u8(5)) as u32;
363
364 // All ones is an escape code that indicates unencoded binary.
365 if rice_param == 0b11111 {
366 return Err(Error::Unsupported("unencoded binary is not yet implemented"))
367 }
368
369 for sample in buffer.iter_mut() {
370 // First part of the sample is the quotient, unary encoded.
371 let q = try!(input.read_unary());
372
373 // Next is the remainder, in rice_param bits. Because at this
374 // point rice_param is at most 30, we can safely read into a u32.
375 let r = try!(input.read_leq_u32(rice_param));
376 *sample = rice_to_signed((q << rice_param) | r);
377 }
378
379 Ok(())
380}
381
382fn decode_constant<R: ReadBytes>(input: &mut Bitstream<R>,
383 bps: u32,
384 buffer: &mut [i32])
385 -> Result<()> {
386 let sample_u32 = try!(input.read_leq_u32(bps));
387 let sample = extend_sign_u32(sample_u32, bps);
388
389 for s in buffer {
390 *s = sample;
391 }
392
393 Ok(())
394}
395
396#[cold]
397fn decode_verbatim<R: ReadBytes>(input: &mut Bitstream<R>,
398 bps: u32,
399 buffer: &mut [i32])
400 -> Result<()> {
401
402 // This function must not be called for a sample wider than the sample type.
403 // This has been verified at an earlier stage, but it is good to state the
404 // assumption explicitly. FLAC supports up to 32-bit samples, so the
405 // mid/side delta would require 33 bits per sample. But that is not subset
406 // FLAC, and the reference decoder does not support it either.
407 debug_assert!(bps <= 32);
408
409 // A verbatim block stores samples without encoding whatsoever.
410 for s in buffer {
411 *s = extend_sign_u32(try!(input.read_leq_u32(bps)), bps);
412 }
413
414 Ok(())
415}
416
417fn predict_fixed(order: u32, buffer: &mut [i32]) -> Result<()> {
418 // When this is called during decoding, the order as read from the subframe
419 // header has already been verified, so it is safe to assume that
420 // 0 <= order <= 4. Still, it is good to state that assumption explicitly.
421 debug_assert!(order <= 4);
422
423 // Coefficients for fitting an order n polynomial. You get these
424 // coefficients by writing down n numbers, then their differences, then the
425 // differences of the differences, etc. What results is Pascal's triangle
426 // with alternating signs.
427 let o0 = [];
428 let o1 = [1];
429 let o2 = [-1, 2];
430 let o3 = [1, -3, 3];
431 let o4 = [-1, 4, -6, 4];
432
433 // Multiplying samples with at most 6 adds 3 bits. Then summing at most 5
434 // of those values again adds at most 4 bits, so a sample type that is 7
435 // bits wider than bps should suffice. Subset FLAC supports at most 24 bits
436 // per sample, 25 for the channel delta, so using an i32 is safe here.
437
438 let coefficients: &[i32] = match order {
439 0 => &o0,
440 1 => &o1,
441 2 => &o2,
442 3 => &o3,
443 4 => &o4,
444 _ => unreachable!(),
445 };
446
447 let window_size = order as usize + 1;
448
449 // TODO: abstract away this iterating over a window into a function?
450 for i in 0..buffer.len() - order as usize {
451 // Manually do the windowing, because .windows() returns immutable slices.
452 let window = &mut buffer[i..i + window_size];
453
454 // The #coefficients elements of the window store already decoded
455 // samples, the last element of the window is the delta. Therefore,
456 // predict based on the first #coefficients samples. From the note
457 // above we know that the multiplication will not overflow for 24-bit
458 // samples, so the wrapping mul is safe. If it wraps, the file was
459 // invalid, and we make no guarantees about the decoded result. But
460 // we explicitly do not crash.
461 let prediction = coefficients.iter()
462 .zip(window.iter())
463 .map(|(&c, &s)| num::Wrapping(c) * num::Wrapping(s))
464 // Rust 1.13 does not support using `sum`
465 // with `Wrapping`, so do a fold.
466 .fold(num::Wrapping(0), |a, x| a + x).0;
467
468 // The delta is stored, so the sample is the prediction + delta.
469 let delta = window[coefficients.len()];
470 window[coefficients.len()] = prediction.wrapping_add(delta);
471 }
472
473 Ok(())
474}
475
476#[test]
477fn verify_predict_fixed() {
478 // The following data is from an actual FLAC stream and has been verified
479 // against the reference decoder. The data is from a 16-bit stream.
480 let mut buffer = [-729, -722, -667, -19, -16, 17, -23, -7,
481 16, -16, -5, 3, -8, -13, -15, -1];
482 assert!(predict_fixed(3, &mut buffer).is_ok());
483 assert_eq!(&buffer, &[-729, -722, -667, -583, -486, -359, -225, -91,
484 59, 209, 354, 497, 630, 740, 812, 845]);
485
486 // The following data causes overflow of i32 when not handled with care.
487 let mut buffer = [21877, 27482, -6513];
488 assert!(predict_fixed(2, &mut buffer).is_ok());
489 assert_eq!(&buffer, &[21877, 27482, 26574]);
490}
491
492fn decode_fixed<R: ReadBytes>(input: &mut Bitstream<R>,
493 bps: u32,
494 order: u32,
495 buffer: &mut [i32])
496 -> Result<()> {
497 // The length of the buffer which is passed in, is the length of the block.
498 // Thus, the number of warm-up samples must not exceed that length.
499 if buffer.len() < order as usize {
500 return fmt_err("invalid fixed subframe, order is larger than block size")
501 }
502
503 // There are order * bits per sample unencoded warm-up sample bits.
504 try!(decode_verbatim(input, bps, &mut buffer[..order as usize]));
505
506 // Next up is the residual. We decode into the buffer directly, the
507 // predictor contributions will be added in a second pass. The first
508 // `order` samples have been decoded already, so continue after that.
509 try!(decode_residual(input,
510 buffer.len() as u16,
511 &mut buffer[order as usize..]));
512
513 try!(predict_fixed(order, buffer));
514
515 Ok(())
516}
517
518/// Apply LPC prediction for subframes with LPC order of at most 12.
519///
520/// This function takes advantage of the upper bound on the order. Virtually all
521/// files that occur in the wild are subset-compliant files, which have an order
522/// of at most 12, so it makes sense to optimize for this. A simpler (but
523/// slower) fallback is implemented in `predict_lpc_high_order`.
524fn predict_lpc_low_order(
525 raw_coefficients: &[i16],
526 qlp_shift: i16,
527 buffer: &mut [i32],
528) {
529 debug_assert!(qlp_shift >= 0, "Right-shift by negative value is not allowed.");
530 debug_assert!(qlp_shift < 64, "Cannot shift by more than integer width.");
531 // The decoded residuals are 25 bits at most (assuming subset FLAC of at
532 // most 24 bits per sample, but there is the delta encoding for channels).
533 // The coefficients are 16 bits at most, so their product is 41 bits. In
534 // practice the predictor order does not exceed 12, so adding 12 numbers of
535 // 41 bits each requires at most 53 bits. Therefore, do all intermediate
536 // computations as i64.
537
538 // In the code below, a predictor order of 12 is assumed. This aids
539 // optimization and vectorization by making some counts available at compile
540 // time. If the actual order is less than 12, simply set the early
541 // coefficients to 0.
542 let order = raw_coefficients.len();
543 let coefficients = {
544 let mut buf = [0i64; 12];
545 let mut i = 12 - order;
546 for c in raw_coefficients {
547 buf[i] = *c as i64;
548 i = i + 1;
549 }
550 buf
551 };
552
553 // The linear prediction is essentially an inner product of the known
554 // samples with the coefficients, followed by a shift. To be able to do an
555 // inner product of 12 elements at a time, we must first have 12 samples.
556 // If the predictor order is less, first predict the few samples after the
557 // warm-up samples.
558 let left = cmp::min(12, buffer.len()) - order;
559 for i in 0..left {
560 let prediction = raw_coefficients.iter()
561 .zip(&buffer[i..order + i])
562 .map(|(&c, &s)| c as i64 * s as i64)
563 .sum::<i64>() >> qlp_shift;
564 let delta = buffer[order + i] as i64;
565 buffer[order + i] = (prediction + delta) as i32;
566 }
567
568 if buffer.len() <= 12 {
569 return
570 }
571
572 // At this point, buffer[0..12] has been predicted. For the rest of the
573 // buffer we can do inner products of 12 samples. This reduces the amount of
574 // conditional code, and improves performance significantly.
575 for i in 12..buffer.len() {
576 let prediction = coefficients.iter()
577 .zip(&buffer[i - 12..i])
578 .map(|(&c, &s)| c * s as i64)
579 .sum::<i64>() >> qlp_shift;
580 let delta = buffer[i] as i64;
581 buffer[i] = (prediction + delta) as i32;
582 }
583}
584
585/// Apply LPC prediction for non-subset subframes, with LPC order > 12.
586fn predict_lpc_high_order(
587 coefficients: &[i16],
588 qlp_shift: i16,
589 buffer: &mut [i32],
590) {
591 // NOTE: See `predict_lpc_low_order` for more details. This function is a
592 // copy that lifts the order restrictions (and specializations) at the cost
593 // of performance. It is only used for subframes with a high LPC order,
594 // which only occur in non-subset files. Such files are rare in the wild.
595
596 let order = coefficients.len();
597
598 debug_assert!(qlp_shift >= 0, "Right-shift by negative value is not allowed.");
599 debug_assert!(qlp_shift < 64, "Cannot shift by more than integer width.");
600 debug_assert!(order > 12, "Use the faster predict_lpc_low_order for LPC order <= 12.");
601 debug_assert!(buffer.len() >= order, "Buffer must fit at least `order` warm-up samples.");
602
603 // The linear prediction is essentially an inner product of the known
604 // samples with the coefficients, followed by a shift. The first `order`
605 // samples are stored as-is.
606 for i in order..buffer.len() {
607 let prediction = coefficients.iter()
608 .zip(&buffer[i - order..i])
609 .map(|(&c, &s)| c as i64 * s as i64)
610 .sum::<i64>() >> qlp_shift;
611 let delta = buffer[i] as i64;
612 buffer[i] = (prediction + delta) as i32;
613 }
614}
615
616#[test]
617fn verify_predict_lpc() {
618 // The following data is from an actual FLAC stream and has been verified
619 // against the reference decoder. The data is from a 16-bit stream.
620 let coefficients = [-75, 166, 121, -269, -75, -399, 1042];
621 let mut buffer = [-796, -547, -285, -32, 199, 443, 670, -2,
622 -23, 14, 6, 3, -4, 12, -2, 10];
623 predict_lpc_low_order(&coefficients, 9, &mut buffer);
624 assert_eq!(&buffer, &[-796, -547, -285, -32, 199, 443, 670, 875,
625 1046, 1208, 1343, 1454, 1541, 1616, 1663, 1701]);
626
627 // The following data causes an overflow when not handled with care.
628 let coefficients = [119, -255, 555, -836, 879, -1199, 1757];
629 let mut buffer = [-21363, -21951, -22649, -24364, -27297, -26870, -30017, 3157];
630 predict_lpc_low_order(&coefficients, 10, &mut buffer);
631 assert_eq!(&buffer, &[-21363, -21951, -22649, -24364, -27297, -26870, -30017, -29718]);
632
633 // The following data from a real-world file has a high LPC order, is has
634 // more than 12 coefficients. The excepted output has been verified against
635 // the reference decoder.
636 let coefficients = [
637 709, -2589, 4600, -4612, 1350, 4220, -9743, 12671, -12129, 8586,
638 -3775, -645, 3904, -5543, 4373, 182, -6873, 13265, -15417, 11550,
639 ];
640 let mut buffer = [
641 213238, 210830, 234493, 209515, 235139, 201836, 208151, 186277, 157720, 148176,
642 115037, 104836, 60794, 54523, 412, 17943, -6025, -3713, 8373, 11764, 30094,
643 ];
644 predict_lpc_high_order(&coefficients, 12, &mut buffer);
645 assert_eq!(&buffer, &[
646 213238, 210830, 234493, 209515, 235139, 201836, 208151, 186277, 157720, 148176,
647 115037, 104836, 60794, 54523, 412, 17943, -6025, -3713, 8373, 11764, 33931,
648 ]);
649}
650
651fn decode_lpc<R: ReadBytes>(input: &mut Bitstream<R>,
652 bps: u32,
653 order: u32,
654 buffer: &mut [i32])
655 -> Result<()> {
656 // The order minus one fits in 5 bits, so the order is at most 32.
657 debug_assert!(order <= 32);
658
659 // On the frame decoding level it is ensured that the buffer is large
660 // enough. If it can't even fit the warm-up samples, then there is a frame
661 // smaller than its lpc order, which is invalid.
662 if buffer.len() < order as usize {
663 return fmt_err("invalid LPC subframe, lpc order is larger than block size")
664 }
665
666 // There are order * bits per sample unencoded warm-up sample bits.
667 try!(decode_verbatim(input, bps, &mut buffer[..order as usize]));
668
669 // Next are four bits quantised linear predictor coefficient precision - 1.
670 let qlp_precision = try!(input.read_leq_u8(4)) as u32 + 1;
671
672 // The bit pattern 1111 is invalid.
673 if qlp_precision - 1 == 0b1111 {
674 return fmt_err("invalid subframe, qlp precision value invalid");
675 }
676
677 // Next are five bits quantized linear predictor coefficient shift,
678 // in signed two's complement. Read 5 bits and then extend the sign bit.
679 let qlp_shift_unsig = try!(input.read_leq_u16(5));
680 let qlp_shift = extend_sign_u16(qlp_shift_unsig, 5);
681
682 // The spec does allow the qlp shift to be negative, but in practice this
683 // does not happen. Fully supporting it would be a performance hit, as an
684 // arithmetic shift by a negative amount is invalid, so this would incur a
685 // branch. If a real-world file ever hits this case, then we should consider
686 // making two LPC predictors, one for positive, and one for negative qlp.
687 if qlp_shift < 0 {
688 let msg = "a negative quantized linear predictor coefficient shift is \
689 not supported, please file a bug.";
690 return Err(Error::Unsupported(msg))
691 }
692
693 // Finally, the coefficients themselves. The order is at most 32, so all
694 // coefficients can be kept on the stack. Store them in reverse, because
695 // that how they are used in prediction.
696 let mut coefficients = [0; 32];
697 for coef in coefficients[..order as usize].iter_mut().rev() {
698 // We can safely read into a u16, qlp_precision is at most 15.
699 let coef_unsig = try!(input.read_leq_u16(qlp_precision));
700 *coef = extend_sign_u16(coef_unsig, qlp_precision);
701 }
702
703 // Next up is the residual. We decode it into the buffer directly, the
704 // predictor contributions will be added in a second pass. The first
705 // `order` samples have been decoded already, so continue after that.
706 try!(decode_residual(input,
707 buffer.len() as u16,
708 &mut buffer[order as usize..]));
709
710 // In "subset"-compliant files, the LPC order is at most 12. For LPC
711 // prediction of such files we have a special fast path that takes advantage
712 // of the low order. We can still decode non-subset file using a less
713 // specialized implementation. Non-subset files are rare in the wild.
714 if order <= 12 {
715 predict_lpc_low_order(&coefficients[..order as usize], qlp_shift, buffer);
716 } else {
717 predict_lpc_high_order(&coefficients[..order as usize], qlp_shift, buffer);
718 }
719
720 Ok(())
721}