claxon/
subframe.rs

1// Claxon -- A FLAC decoding library in Rust
2// Copyright 2014 Ruud van Asseldonk
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// A copy of the License has been included in the root of the repository.
7
8//! The `subframe` module deals with subframes that make up a frame of the FLAC stream.
9
10use std::cmp;
11use std::num;
12use error::{Error, Result, fmt_err};
13use input::{Bitstream, ReadBytes};
14
15#[derive(Clone, Copy, Debug)]
16enum SubframeType {
17    Constant,
18    Verbatim,
19    Fixed(u8),
20    Lpc(u8),
21}
22
23#[derive(Clone, Copy)]
24struct SubframeHeader {
25    sf_type: SubframeType,
26    wasted_bits_per_sample: u32,
27}
28
29fn read_subframe_header<R: ReadBytes>(input: &mut Bitstream<R>) -> Result<SubframeHeader> {
30    // The first bit must be a 0 padding bit.
31    if try!(input.read_bit()) {
32        return fmt_err("invalid subframe header");
33    }
34
35    // Next is a 6-bit subframe type.
36    let sf_type = match try!(input.read_leq_u8(6)) {
37        0 => SubframeType::Constant,
38        1 => SubframeType::Verbatim,
39
40        // Bit patterns 00001x, 0001xx and 01xxxx are reserved, this library
41        // would not know how to handle them, so this is an error. Values that
42        // are reserved at the time of writing are a format error, the
43        // `Unsupported` error type is for specified features that are not
44        // implemented.
45        n if (n & 0b111_110 == 0b000_010) || (n & 0b111_100 == 0b000_100) ||
46             (n & 0b110_000 == 0b010_000) => {
47            return fmt_err("invalid subframe header, encountered reserved value");
48        }
49
50        n if n & 0b111_000 == 0b001_000 => {
51            let order = n & 0b000_111;
52
53            // A fixed frame has order up to 4, other bit patterns are reserved.
54            if order > 4 {
55                return fmt_err("invalid subframe header, encountered reserved value");
56            }
57
58            SubframeType::Fixed(order)
59        }
60
61        // The only possibility left is bit pattern 1xxxxx, an LPC subframe.
62        n => {
63            // The xxxxx bits are the order minus one.
64            let order_mo = n & 0b011_111;
65            SubframeType::Lpc(order_mo + 1)
66        }
67    };
68
69    // Next bits indicates whether there are wasted bits per sample.
70    let wastes_bits = try!(input.read_bit());
71
72    // If so, k - 1 zero bits follow, where k is the number of wasted bits.
73    let wasted_bits = if !wastes_bits {
74        0
75    } else {
76        1 + try!(input.read_unary())
77    };
78
79    // The spec puts no bounds on the number of wasted bits per sample, but more
80    // than 31 does not make sense, as it would remove all data even for 32-bit
81    // samples.
82    if wasted_bits > 31 {
83        return fmt_err("wasted bits per sample must not exceed 31");
84    }
85
86    let subframe_header = SubframeHeader {
87        sf_type: sf_type,
88        wasted_bits_per_sample: wasted_bits,
89    };
90    Ok(subframe_header)
91}
92
93/// Given a signed two's complement integer in the `bits` least significant
94/// bits of `val`, extends the sign bit to a valid 16-bit signed integer.
95#[inline(always)]
96fn extend_sign_u16(val: u16, bits: u32) -> i16 {
97    // First shift the value so the desired sign bit is the actual sign bit,
98    // then convert to a signed integer, and then do an arithmetic shift back,
99    // which will extend the sign bit.
100    return ((val << (16 - bits)) as i16) >> (16 - bits);
101}
102
103#[test]
104fn verify_extend_sign_u16() {
105    assert_eq!(5, extend_sign_u16(5, 4));
106    assert_eq!(0x3ffe, extend_sign_u16(0x3ffe, 15));
107    assert_eq!(-5, extend_sign_u16(16 - 5, 4));
108    assert_eq!(-3, extend_sign_u16(512 - 3, 9));
109    assert_eq!(-1, extend_sign_u16(0xffff, 16));
110    assert_eq!(-2, extend_sign_u16(0xfffe, 16));
111    assert_eq!(-1, extend_sign_u16(0x7fff, 15));
112}
113
114/// Given a signed two's complement integer in the `bits` least significant
115/// bits of `val`, extends the sign bit to a valid 32-bit signed integer.
116#[inline(always)]
117pub fn extend_sign_u32(val: u32, bits: u32) -> i32 {
118    // First shift the value so the desired sign bit is the actual sign bit,
119    // then convert to a signed integer, and then do an arithmetic shift back,
120    // which will extend the sign bit.
121    ((val << (32 - bits)) as i32) >> (32 - bits)
122}
123
124#[test]
125fn verify_extend_sign_u32() {
126    assert_eq!(5, extend_sign_u32(5, 4));
127    assert_eq!(0x3ffffffe, extend_sign_u32(0x3ffffffe, 31));
128    assert_eq!(-5, extend_sign_u32(16 - 5, 4));
129    assert_eq!(-3, extend_sign_u32(512 - 3, 9));
130    assert_eq!(-2, extend_sign_u32(0xfffe, 16));
131    assert_eq!(-1, extend_sign_u32(0xffffffff_u32, 32));
132    assert_eq!(-2, extend_sign_u32(0xfffffffe_u32, 32));
133    assert_eq!(-1, extend_sign_u32(0x7fffffff, 31));
134
135    // The data below are samples from a real FLAC stream.
136    assert_eq!(-6392, extend_sign_u32(124680, 17));
137    assert_eq!(-6605, extend_sign_u32(124467, 17));
138    assert_eq!(-6850, extend_sign_u32(124222, 17));
139    assert_eq!(-7061, extend_sign_u32(124011, 17));
140}
141
142/// Decodes a signed number from Rice coding to the two's complement.
143///
144/// The Rice coding used by FLAC operates on unsigned integers, but the
145/// residual is signed. The mapping is done as follows:
146///
147///  0 -> 0
148/// -1 -> 1
149///  1 -> 2
150/// -2 -> 3
151///  2 -> 4
152///  etc.
153///
154/// This function takes the unsigned value and converts it into a signed
155/// number.
156#[inline(always)]
157fn rice_to_signed(val: u32) -> i32 {
158    // The following bit-level hackery compiles to only four instructions on
159    // x64. It is equivalent to the following code:
160    //
161    //   if val & 1 == 1 {
162    //       -1 - (val / 2) as i32
163    //   } else {
164    //       (val / 2) as i32
165    //   }
166    //
167    let half = (val >> 1) as i32;
168    let extended_bit_0 = ((val << 31) as i32) >> 31;
169    half ^ extended_bit_0
170}
171
172#[test]
173fn verify_rice_to_signed() {
174    assert_eq!(rice_to_signed(0), 0);
175    assert_eq!(rice_to_signed(1), -1);
176    assert_eq!(rice_to_signed(2), 1);
177    assert_eq!(rice_to_signed(3), -2);
178    assert_eq!(rice_to_signed(4), 2);
179}
180
181/// Decodes a subframe into the provided block-size buffer.
182///
183/// It is assumed that the length of the buffer is the block size.
184pub fn decode<R: ReadBytes>(input: &mut Bitstream<R>,
185                            bps: u32,
186                            buffer: &mut [i32])
187                            -> Result<()> {
188    // The sample type i32 should be wide enough to accomodate for all bits of
189    // the stream, but this can be verified at a higher level than here. Still,
190    // it is a good idea to make the assumption explicit. FLAC supports up to
191    // sample widths of 32 in theory, so with the delta between channels that
192    // requires 33 bits, but the reference decoder supports only subset FLAC of
193    // 24 bits per sample at most, so restricting ourselves to i32 is fine.
194    debug_assert!(32 >= bps);
195
196    let header = try!(read_subframe_header(input));
197
198    if header.wasted_bits_per_sample >= bps {
199        return fmt_err("subframe has no non-wasted bits");
200    }
201
202    // If there are wasted bits, the subframe stores samples with a lower bps
203    // than the stream bps. We later shift all the samples left to correct this.
204    let sf_bps = bps - header.wasted_bits_per_sample;
205
206    match header.sf_type {
207        SubframeType::Constant => try!(decode_constant(input, sf_bps, buffer)),
208        SubframeType::Verbatim => try!(decode_verbatim(input, sf_bps, buffer)),
209        SubframeType::Fixed(ord) => try!(decode_fixed(input, sf_bps, ord as u32, buffer)),
210        SubframeType::Lpc(ord) => try!(decode_lpc(input, sf_bps, ord as u32, buffer)),
211    }
212
213    // Finally, everything must be shifted by 'wasted bits per sample' to
214    // the left. Note: it might be better performance-wise to do this on
215    // the fly while decoding. That could be done if this is a bottleneck.
216    if header.wasted_bits_per_sample > 0 {
217        debug_assert!(header.wasted_bits_per_sample <= 31,
218                      "Cannot shift by more than the sample width.");
219        for s in buffer {
220            // For a valid FLAC file, this shift does not overflow. For an
221            // invalid file it might, and then we decode garbage, but we don't
222            // crash the program in debug mode due to shift overflow.
223            *s = s.wrapping_shl(header.wasted_bits_per_sample);
224        }
225    }
226
227    Ok(())
228}
229
230#[derive(Copy, Clone)]
231enum RicePartitionType {
232    Rice,
233    Rice2,
234}
235
236fn decode_residual<R: ReadBytes>(input: &mut Bitstream<R>,
237                                 block_size: u16,
238                                 buffer: &mut [i32])
239                                 -> Result<()> {
240    // Residual starts with two bits of coding method.
241    let partition_type = match try!(input.read_leq_u8(2)) {
242        0b00 => RicePartitionType::Rice,
243        0b01 => RicePartitionType::Rice2,
244        // 10 and 11 are reserved.
245        _ => return fmt_err("invalid residual, encountered reserved value"),
246    };
247
248    // Next are 4 bits partition order.
249    let order = try!(input.read_leq_u8(4));
250
251    // There are 2^order partitions. Note: the specification states a 4-bit
252    // partition order, so the order is at most 31, so there could be 2^31
253    // partitions, but the block size is a 16-bit number, so there are at
254    // most 2^16 - 1 samples in the block. No values have been marked as
255    // invalid by the specification though.
256    let n_partitions = 1u32 << order;
257    let n_samples_per_partition = block_size >> order;
258
259    // The partitions together must fill the block. If the block size is not a
260    // multiple of 2^order; if we shifted off some bits, then we would not fill
261    // the entire block. Such a partition order is invalid for this block size.
262    if block_size & (n_partitions - 1) as u16 != 0 {
263        return fmt_err("invalid partition order")
264    }
265
266    // NOTE: the check above checks that block_size is a multiple of n_partitions
267    // (this works because n_partitions is a power of 2). The check below is
268    // equivalent but more expensive.
269    debug_assert_eq!(n_partitions * n_samples_per_partition as u32, block_size as u32);
270
271    let n_warm_up = block_size - buffer.len() as u16;
272
273    // The partition size must be at least as big as the number of warm-up
274    // samples, otherwise the size of the first partition is negative.
275    if n_warm_up > n_samples_per_partition {
276        return fmt_err("invalid residual");
277    }
278
279    // Finally decode the partitions themselves.
280    match partition_type {
281        RicePartitionType::Rice => {
282            let mut start = 0;
283            let mut len = n_samples_per_partition - n_warm_up;
284            for _ in 0..n_partitions {
285                let slice = &mut buffer[start..start + len as usize];
286                try!(decode_rice_partition(input, slice));
287                start = start + len as usize;
288                len = n_samples_per_partition;
289            }
290        }
291        RicePartitionType::Rice2 => {
292            let mut start = 0;
293            let mut len = n_samples_per_partition - n_warm_up;
294            for _ in 0..n_partitions {
295                let slice = &mut buffer[start..start + len as usize];
296                try!(decode_rice2_partition(input, slice));
297                start = start + len as usize;
298                len = n_samples_per_partition;
299            }
300        }
301    }
302
303    Ok(())
304}
305
306// Performance note: all Rice partitions in real-world FLAC files are Rice
307// partitions, not Rice2 partitions. Therefore it makes sense to inline this
308// function into decode_residual.
309#[inline(always)]
310fn decode_rice_partition<R: ReadBytes>(input: &mut Bitstream<R>,
311                                       buffer: &mut [i32])
312                                       -> Result<()> {
313    // A Rice partition (not Rice2), starts with a 4-bit Rice parameter.
314    let rice_param = try!(input.read_leq_u8(4)) as u32;
315
316    // All ones is an escape code that indicates unencoded binary.
317    if rice_param == 0b1111 {
318        return Err(Error::Unsupported("unencoded binary is not yet implemented"))
319    }
320
321    // About the decoding below: the first part of the sample is the quotient,
322    // unary encoded. This means that there are q zeros, and then a one.
323    //
324    // The reference decoder supports sample widths up to 24 bits, so with
325    // the additional bytes for difference in channels and for prediction, a
326    // sample fits in 26 bits. The Rice parameter could be as little as 1,
327    // so the quotient can potentially be very large. However, in practice
328    // it is rarely greater than 5. Values as large as 75 still occur though.
329    //
330    // Next up is the remainder in rice_param bits. Depending on the number of
331    // bits, at most two or three bytes need to be read, so the code below is
332    // split into two cases to allow a more efficient reading function to be
333    // used when possible. About 45% of the time, rice_param is less than 9
334    // (measured from real-world FLAC files).
335
336    if rice_param <= 8 {
337        for sample in buffer.iter_mut() {
338            let q = try!(input.read_unary());
339            let r = try!(input.read_leq_u8(rice_param)) as u32;
340            *sample = rice_to_signed((q << rice_param) | r);
341        }
342    } else {
343        for sample in buffer.iter_mut() {
344            let q = try!(input.read_unary());
345            let r = try!(input.read_gt_u8_leq_u16(rice_param));
346            *sample = rice_to_signed((q << rice_param) | r);
347        }
348    }
349
350    Ok(())
351}
352
353// Performance note: a Rice2 partition is extremely uncommon, I haven’t seen a
354// single one in any real-world FLAC file. So do not inline it, in order not to
355// pollute the caller with dead code.
356#[inline(never)]
357#[cold]
358fn decode_rice2_partition<R: ReadBytes>(input: &mut Bitstream<R>,
359                                        buffer: &mut [i32])
360                                        -> Result<()> {
361    // A Rice2 partition, starts with a 5-bit Rice parameter.
362    let rice_param = try!(input.read_leq_u8(5)) as u32;
363
364    // All ones is an escape code that indicates unencoded binary.
365    if rice_param == 0b11111 {
366        return Err(Error::Unsupported("unencoded binary is not yet implemented"))
367    }
368
369    for sample in buffer.iter_mut() {
370        // First part of the sample is the quotient, unary encoded.
371        let q = try!(input.read_unary());
372
373        // Next is the remainder, in rice_param bits. Because at this
374        // point rice_param is at most 30, we can safely read into a u32.
375        let r = try!(input.read_leq_u32(rice_param));
376        *sample = rice_to_signed((q << rice_param) | r);
377    }
378
379    Ok(())
380}
381
382fn decode_constant<R: ReadBytes>(input: &mut Bitstream<R>,
383                                 bps: u32,
384                                 buffer: &mut [i32])
385                                 -> Result<()> {
386    let sample_u32 = try!(input.read_leq_u32(bps));
387    let sample = extend_sign_u32(sample_u32, bps);
388
389    for s in buffer {
390        *s = sample;
391    }
392
393    Ok(())
394}
395
396#[cold]
397fn decode_verbatim<R: ReadBytes>(input: &mut Bitstream<R>,
398                                 bps: u32,
399                                 buffer: &mut [i32])
400                                 -> Result<()> {
401
402    // This function must not be called for a sample wider than the sample type.
403    // This has been verified at an earlier stage, but it is good to state the
404    // assumption explicitly. FLAC supports up to 32-bit samples, so the
405    // mid/side delta would require 33 bits per sample. But that is not subset
406    // FLAC, and the reference decoder does not support it either.
407    debug_assert!(bps <= 32);
408
409    // A verbatim block stores samples without encoding whatsoever.
410    for s in buffer {
411        *s = extend_sign_u32(try!(input.read_leq_u32(bps)), bps);
412    }
413
414    Ok(())
415}
416
417fn predict_fixed(order: u32, buffer: &mut [i32]) -> Result<()> {
418    // When this is called during decoding, the order as read from the subframe
419    // header has already been verified, so it is safe to assume that
420    // 0 <= order <= 4. Still, it is good to state that assumption explicitly.
421    debug_assert!(order <= 4);
422
423    // Coefficients for fitting an order n polynomial. You get these
424    // coefficients by writing down n numbers, then their differences, then the
425    // differences of the differences, etc. What results is Pascal's triangle
426    // with alternating signs.
427    let o0 = [];
428    let o1 = [1];
429    let o2 = [-1, 2];
430    let o3 = [1, -3, 3];
431    let o4 = [-1, 4, -6, 4];
432
433    // Multiplying samples with at most 6 adds 3 bits. Then summing at most 5
434    // of those values again adds at most 4 bits, so a sample type that is 7
435    // bits wider than bps should suffice. Subset FLAC supports at most 24 bits
436    // per sample, 25 for the channel delta, so using an i32 is safe here.
437
438    let coefficients: &[i32] = match order {
439        0 => &o0,
440        1 => &o1,
441        2 => &o2,
442        3 => &o3,
443        4 => &o4,
444        _ => unreachable!(),
445    };
446
447    let window_size = order as usize + 1;
448
449    // TODO: abstract away this iterating over a window into a function?
450    for i in 0..buffer.len() - order as usize {
451        // Manually do the windowing, because .windows() returns immutable slices.
452        let window = &mut buffer[i..i + window_size];
453
454        // The #coefficients elements of the window store already decoded
455        // samples, the last element of the window is the delta. Therefore,
456        // predict based on the first #coefficients samples. From the note
457        // above we know that the multiplication will not overflow for 24-bit
458        // samples, so the wrapping mul is safe. If it wraps, the file was
459        // invalid, and we make no guarantees about the decoded result. But
460        // we explicitly do not crash.
461        let prediction = coefficients.iter()
462                                     .zip(window.iter())
463                                     .map(|(&c, &s)| num::Wrapping(c) * num::Wrapping(s))
464                                     // Rust 1.13 does not support using `sum`
465                                     // with `Wrapping`, so do a fold.
466                                     .fold(num::Wrapping(0), |a, x| a + x).0;
467
468        // The delta is stored, so the sample is the prediction + delta.
469        let delta = window[coefficients.len()];
470        window[coefficients.len()] = prediction.wrapping_add(delta);
471    }
472
473    Ok(())
474}
475
476#[test]
477fn verify_predict_fixed() {
478    // The following data is from an actual FLAC stream and has been verified
479    // against the reference decoder. The data is from a 16-bit stream.
480    let mut buffer = [-729, -722, -667, -19, -16,  17, -23, -7,
481                        16,  -16,   -5,   3,  -8, -13, -15, -1];
482    assert!(predict_fixed(3, &mut buffer).is_ok());
483    assert_eq!(&buffer, &[-729, -722, -667, -583, -486, -359, -225, -91,
484                            59,  209,  354,  497,  630,  740,  812, 845]);
485
486    // The following data causes overflow of i32 when not handled with care.
487    let mut buffer = [21877, 27482, -6513];
488    assert!(predict_fixed(2, &mut buffer).is_ok());
489    assert_eq!(&buffer, &[21877, 27482, 26574]);
490}
491
492fn decode_fixed<R: ReadBytes>(input: &mut Bitstream<R>,
493                              bps: u32,
494                              order: u32,
495                              buffer: &mut [i32])
496                              -> Result<()> {
497    // The length of the buffer which is passed in, is the length of the block.
498    // Thus, the number of warm-up samples must not exceed that length.
499    if buffer.len() < order as usize {
500        return fmt_err("invalid fixed subframe, order is larger than block size")
501    }
502
503    // There are order * bits per sample unencoded warm-up sample bits.
504    try!(decode_verbatim(input, bps, &mut buffer[..order as usize]));
505
506    // Next up is the residual. We decode into the buffer directly, the
507    // predictor contributions will be added in a second pass. The first
508    // `order` samples have been decoded already, so continue after that.
509    try!(decode_residual(input,
510                         buffer.len() as u16,
511                         &mut buffer[order as usize..]));
512
513    try!(predict_fixed(order, buffer));
514
515    Ok(())
516}
517
518/// Apply LPC prediction for subframes with LPC order of at most 12.
519///
520/// This function takes advantage of the upper bound on the order. Virtually all
521/// files that occur in the wild are subset-compliant files, which have an order
522/// of at most 12, so it makes sense to optimize for this. A simpler (but
523/// slower) fallback is implemented in `predict_lpc_high_order`.
524fn predict_lpc_low_order(
525    raw_coefficients: &[i16],
526    qlp_shift: i16,
527    buffer: &mut [i32],
528) {
529    debug_assert!(qlp_shift >= 0, "Right-shift by negative value is not allowed.");
530    debug_assert!(qlp_shift < 64, "Cannot shift by more than integer width.");
531    // The decoded residuals are 25 bits at most (assuming subset FLAC of at
532    // most 24 bits per sample, but there is the delta encoding for channels).
533    // The coefficients are 16 bits at most, so their product is 41 bits. In
534    // practice the predictor order does not exceed 12, so adding 12 numbers of
535    // 41 bits each requires at most 53 bits. Therefore, do all intermediate
536    // computations as i64.
537
538    // In the code below, a predictor order of 12 is assumed. This aids
539    // optimization and vectorization by making some counts available at compile
540    // time. If the actual order is less than 12, simply set the early
541    // coefficients to 0.
542    let order = raw_coefficients.len();
543    let coefficients = {
544        let mut buf = [0i64; 12];
545        let mut i = 12 - order;
546        for c in raw_coefficients {
547            buf[i] = *c as i64;
548            i = i + 1;
549        }
550        buf
551    };
552
553    // The linear prediction is essentially an inner product of the known
554    // samples with the coefficients, followed by a shift. To be able to do an
555    // inner product of 12 elements at a time, we must first have 12 samples.
556    // If the predictor order is less, first predict the few samples after the
557    // warm-up samples.
558    let left = cmp::min(12, buffer.len()) - order;
559    for i in 0..left {
560        let prediction = raw_coefficients.iter()
561                                         .zip(&buffer[i..order + i])
562                                         .map(|(&c, &s)| c as i64 * s as i64)
563                                         .sum::<i64>() >> qlp_shift;
564        let delta = buffer[order + i] as i64;
565        buffer[order + i] = (prediction + delta) as i32;
566    }
567
568    if buffer.len() <= 12 {
569        return
570    }
571
572    // At this point, buffer[0..12] has been predicted. For the rest of the
573    // buffer we can do inner products of 12 samples. This reduces the amount of
574    // conditional code, and improves performance significantly.
575    for i in 12..buffer.len() {
576        let prediction = coefficients.iter()
577                                     .zip(&buffer[i - 12..i])
578                                     .map(|(&c, &s)| c * s as i64)
579                                     .sum::<i64>() >> qlp_shift;
580        let delta = buffer[i] as i64;
581        buffer[i] = (prediction + delta) as i32;
582    }
583}
584
585/// Apply LPC prediction for non-subset subframes, with LPC order > 12.
586fn predict_lpc_high_order(
587    coefficients: &[i16],
588    qlp_shift: i16,
589    buffer: &mut [i32],
590) {
591    // NOTE: See `predict_lpc_low_order` for more details. This function is a
592    // copy that lifts the order restrictions (and specializations) at the cost
593    // of performance. It is only used for subframes with a high LPC order,
594    // which only occur in non-subset files. Such files are rare in the wild.
595
596    let order = coefficients.len();
597
598    debug_assert!(qlp_shift >= 0, "Right-shift by negative value is not allowed.");
599    debug_assert!(qlp_shift < 64, "Cannot shift by more than integer width.");
600    debug_assert!(order > 12, "Use the faster predict_lpc_low_order for LPC order <= 12.");
601    debug_assert!(buffer.len() >= order, "Buffer must fit at least `order` warm-up samples.");
602
603    // The linear prediction is essentially an inner product of the known
604    // samples with the coefficients, followed by a shift. The first `order`
605    // samples are stored as-is.
606    for i in order..buffer.len() {
607        let prediction = coefficients.iter()
608                                     .zip(&buffer[i - order..i])
609                                     .map(|(&c, &s)| c as i64 * s as i64)
610                                     .sum::<i64>() >> qlp_shift;
611        let delta = buffer[i] as i64;
612        buffer[i] = (prediction + delta) as i32;
613    }
614}
615
616#[test]
617fn verify_predict_lpc() {
618    // The following data is from an actual FLAC stream and has been verified
619    // against the reference decoder. The data is from a 16-bit stream.
620    let coefficients = [-75, 166,  121, -269, -75, -399, 1042];
621    let mut buffer = [-796, -547, -285,  -32, 199,  443,  670, -2,
622                       -23,   14,    6,    3,  -4,   12,   -2, 10];
623    predict_lpc_low_order(&coefficients, 9, &mut buffer);
624    assert_eq!(&buffer, &[-796, -547, -285,  -32,  199,  443,  670,  875,
625                          1046, 1208, 1343, 1454, 1541, 1616, 1663, 1701]);
626
627    // The following data causes an overflow when not handled with care.
628    let coefficients = [119, -255, 555, -836, 879, -1199, 1757];
629    let mut buffer = [-21363, -21951, -22649, -24364, -27297, -26870, -30017, 3157];
630    predict_lpc_low_order(&coefficients, 10, &mut buffer);
631    assert_eq!(&buffer, &[-21363, -21951, -22649, -24364, -27297, -26870, -30017, -29718]);
632
633    // The following data from a real-world file has a high LPC order, is has
634    // more than 12 coefficients. The excepted output has been verified against
635    // the reference decoder.
636    let coefficients = [
637        709, -2589, 4600, -4612, 1350, 4220, -9743, 12671, -12129, 8586,
638        -3775, -645, 3904, -5543, 4373, 182, -6873, 13265, -15417, 11550,
639    ];
640    let mut buffer = [
641        213238, 210830, 234493, 209515, 235139, 201836, 208151, 186277, 157720, 148176,
642        115037, 104836, 60794, 54523, 412, 17943, -6025, -3713, 8373, 11764, 30094,
643    ];
644    predict_lpc_high_order(&coefficients, 12, &mut buffer);
645    assert_eq!(&buffer, &[
646        213238, 210830, 234493, 209515, 235139, 201836, 208151, 186277, 157720, 148176,
647        115037, 104836, 60794, 54523, 412, 17943, -6025, -3713, 8373, 11764, 33931,
648    ]);
649}
650
651fn decode_lpc<R: ReadBytes>(input: &mut Bitstream<R>,
652                            bps: u32,
653                            order: u32,
654                            buffer: &mut [i32])
655                            -> Result<()> {
656    // The order minus one fits in 5 bits, so the order is at most 32.
657    debug_assert!(order <= 32);
658
659    // On the frame decoding level it is ensured that the buffer is large
660    // enough. If it can't even fit the warm-up samples, then there is a frame
661    // smaller than its lpc order, which is invalid.
662    if buffer.len() < order as usize {
663        return fmt_err("invalid LPC subframe, lpc order is larger than block size")
664    }
665
666    // There are order * bits per sample unencoded warm-up sample bits.
667    try!(decode_verbatim(input, bps, &mut buffer[..order as usize]));
668
669    // Next are four bits quantised linear predictor coefficient precision - 1.
670    let qlp_precision = try!(input.read_leq_u8(4)) as u32 + 1;
671
672    // The bit pattern 1111 is invalid.
673    if qlp_precision - 1 == 0b1111 {
674        return fmt_err("invalid subframe, qlp precision value invalid");
675    }
676
677    // Next are five bits quantized linear predictor coefficient shift,
678    // in signed two's complement. Read 5 bits and then extend the sign bit.
679    let qlp_shift_unsig = try!(input.read_leq_u16(5));
680    let qlp_shift = extend_sign_u16(qlp_shift_unsig, 5);
681
682    // The spec does allow the qlp shift to be negative, but in practice this
683    // does not happen. Fully supporting it would be a performance hit, as an
684    // arithmetic shift by a negative amount is invalid, so this would incur a
685    // branch. If a real-world file ever hits this case, then we should consider
686    // making two LPC predictors, one for positive, and one for negative qlp.
687    if qlp_shift < 0 {
688        let msg = "a negative quantized linear predictor coefficient shift is \
689                   not supported, please file a bug.";
690        return Err(Error::Unsupported(msg))
691    }
692
693    // Finally, the coefficients themselves. The order is at most 32, so all
694    // coefficients can be kept on the stack. Store them in reverse, because
695    // that how they are used in prediction.
696    let mut coefficients = [0; 32];
697    for coef in coefficients[..order as usize].iter_mut().rev() {
698        // We can safely read into a u16, qlp_precision is at most 15.
699        let coef_unsig = try!(input.read_leq_u16(qlp_precision));
700        *coef = extend_sign_u16(coef_unsig, qlp_precision);
701    }
702
703    // Next up is the residual. We decode it into the buffer directly, the
704    // predictor contributions will be added in a second pass. The first
705    // `order` samples have been decoded already, so continue after that.
706    try!(decode_residual(input,
707                         buffer.len() as u16,
708                         &mut buffer[order as usize..]));
709
710    // In "subset"-compliant files, the LPC order is at most 12. For LPC
711    // prediction of such files we have a special fast path that takes advantage
712    // of the low order. We can still decode non-subset file using a less
713    // specialized implementation. Non-subset files are rare in the wild.
714    if order <= 12 {
715        predict_lpc_low_order(&coefficients[..order as usize], qlp_shift, buffer);
716    } else {
717        predict_lpc_high_order(&coefficients[..order as usize], qlp_shift, buffer);
718    }
719
720    Ok(())
721}