symphonia_bundle_mp3/layer3/
stereo.rs

1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8use symphonia_core::errors::{decode_error, Result};
9
10use crate::common::{ChannelMode, FrameHeader, Mode};
11
12use super::{common::*, Granule};
13
14use std::cmp::max;
15use std::{f32, f64};
16
17use lazy_static::lazy_static;
18
19/// The invalid intensity position for MPEG1 bitstreams.
20const INTENSITY_INV_POS_MPEG1: u8 = 7;
21
22/// The invalid intensity position for MPEG2 and MPEG2.5 bitstreams.
23///
24/// NOTE: Some decoders also consider 7 to be an invalid intensity position in MPEG2 and MPEG2.5.
25/// However, this appears wrong. According to the standard, the /maximum/ value for the intensity
26/// position is considered invalid. For MPEG1, since the intensity ratios are only specified for
27/// is_pos < 7. For MPEG2 the ratios are specified for is_pos < 31. Therefore, it doesn't make sense
28/// to consider is_pos == 7 invalid for MPEG2 or 2.5.
29const INTENSITY_INV_POS_MPEG2: u8 = 31;
30
31lazy_static! {
32    /// (Left, right) channel coefficients for decoding intensity stereo in MPEG2 bitstreams.
33    ///
34    /// These coefficients are derived from section 2.4.3.2 of ISO/IEC 13818-3.
35    ///
36    /// As per the specification, for a given intensity position, is_pos (0 <= is_pos < 32), the
37    /// channel coefficients, k_l and k_r, may be calculated as per the table below:
38    ///
39    /// ```text
40    /// If...            | k_l                     | k_r
41    /// -----------------+-------------------------+-------------------
42    /// is_pos     == 0  | 1.0                     | 1.0
43    /// is_pos & 1 == 1  | i0 ^ [(is_pos + 1) / 2] | 1.0
44    /// is_pos & 1 == 0  | 1.0                     | i0 ^ (is_pos / 2)
45    /// ```
46    ///
47    /// The value of i0 is dependant on the least significant bit of scalefac_compress.
48    ///
49    ///  ```text
50    /// scalefac_compress & 1 | i0
51    /// ----------------------+---------------------
52    /// 0                     | 1 / sqrt(sqrt(2.0))
53    /// 1                     | 1 / sqrt(2.0)
54    /// ```
55    ///
56    /// The first dimension of this table is indexed by scalefac_compress & 1 to select i0. The
57    /// second dimension is indexed by is_pos to obtain the channel coefficients. Note that
58    /// is_pos == 31 is considered an invalid position, but IS included in the table.
59    static ref INTENSITY_STEREO_RATIOS_MPEG2: [[(f32, f32); 32]; 2] = {
60        let is_scale: [f64; 2] = [
61            1.0 / f64::sqrt(f64::consts::SQRT_2),
62            f64::consts::FRAC_1_SQRT_2,
63        ];
64
65        let mut ratios = [[(0.0, 0.0); 32]; 2];
66
67        for (i, is_pos) in (0..32).enumerate() {
68            if is_pos & 1 != 0 {
69                // Odd case.
70                ratios[0][i] = (is_scale[0].powf(f64::from(is_pos + 1) / 2.0) as f32, 1.0);
71                ratios[1][i] = (is_scale[1].powf(f64::from(is_pos + 1) / 2.0) as f32, 1.0);
72            }
73            else {
74                // Even & zero case.
75                ratios[0][i] = (1.0, is_scale[0].powf(f64::from(is_pos) / 2.0) as f32);
76                ratios[1][i] = (1.0, is_scale[1].powf(f64::from(is_pos) / 2.0) as f32);
77            }
78        }
79
80        ratios
81    };
82}
83
84lazy_static! {
85    /// (Left, right) channel coeffcients for decoding intensity stereo in MPEG1 bitstreams.
86    ///
87    /// These coefficients are derived from section 2.4.3.4.9.3 of ISO/IEC 11172-3.
88    ///
89    /// As per the specification, for a given intensity position, is_pos (0 <= is_pos < 7), a ratio,
90    /// is_ratio, is calculated as follows:
91    ///
92    /// ```text
93    /// is_ratio = tan(is_pos * PI/12)
94    /// ```
95    ///
96    /// Then, the channel coefficients, k_l and k_r, are calculated as follows:
97    ///
98    /// ```text
99    /// k_l = is_ratio / (1 + is_ratio)
100    /// k_r =        1 / (1 + is_ratio)
101    /// ```
102    ///
103    /// This table is indexed by is_pos. Note that is_pos == 7 is invalid and is NOT included in the
104    /// table.
105    static ref INTENSITY_STEREO_RATIOS_MPEG1: [(f32, f32); 7] = {
106        const PI_12: f64 = f64::consts::PI / 12.0;
107
108        let mut ratios = [(0.0, 0.0); 7];
109
110        for (is_pos, ratio) in ratios.iter_mut().enumerate() {
111            let is_ratio = (PI_12 * is_pos as f64).tan();
112            *ratio = (
113                (is_ratio / (1.0 + is_ratio)) as f32,
114                (1.0 / (1.0 + is_ratio)) as f32
115            );
116        }
117
118        ratios[6] = (1.0, 0.0);
119
120        ratios
121    };
122}
123
124/// Decorrelates mid and side channels into left and right channels.
125///
126/// In mid-side (MS) stereo, the left and right channels are encoded as average (mid) and
127/// difference (side) components.
128///
129/// As per ISO/IEC 11172-3, to reconstruct the left and right channels, the following calculation
130/// is performed:
131///
132/// ```text
133///      l[i] = (m[i] + s[i]) / sqrt(2)
134///      r[i] = (m[i] - s[i]) / sqrt(2)
135/// ```
136/// where:
137///      l[i], and r[i] are the left and right channels, respectively.
138///      m[i], and s[i] are the mid and side channels, respectively.
139///
140/// In the bitstream, m[i] is transmitted in channel 0, while s[i] in channel 1. After decoding,
141/// the left channel replaces m[i] in channel 0, and the right channel replaces s[i] in channel
142/// 1.
143fn process_mid_side(mid: &mut [f32], side: &mut [f32]) {
144    debug_assert!(mid.len() == side.len());
145
146    for (m, s) in mid.iter_mut().zip(side) {
147        let left = (*m + *s) * f32::consts::FRAC_1_SQRT_2;
148        let right = (*m - *s) * f32::consts::FRAC_1_SQRT_2;
149        *m = left;
150        *s = right;
151    }
152}
153
154/// Decodes channel 0 of the intensity stereo coded signal into left and right channels.
155///
156/// As per ISO/IEC 11172-3, the following calculation may be performed to decode the intensity
157/// stereo coded signal into left and right channels.
158///
159/// ```text
160///      l[i] = ch0[i] * k_l
161///      r[i] = ch0[i] * l_r
162/// ```
163///
164/// where:
165///      l[i], and r[i] are the left and right channels, respectively.
166///      ch0[i] is the intensity stereo coded signal found in channel 0.
167///      k_l, and k_r are the left and right channel ratios, respectively.
168fn process_intensity(
169    intensity_pos: u8,
170    intensity_table: &[(f32, f32)],
171    intensity_max: u8,
172    mid_side: bool,
173    ch0: &mut [f32],
174    ch1: &mut [f32],
175) {
176    if intensity_pos < intensity_max {
177        let (ratio_l, ratio_r) = intensity_table[usize::from(intensity_pos)];
178
179        for (l, r) in ch0.iter_mut().zip(ch1) {
180            let is = *l;
181            *l = ratio_l * is;
182            *r = ratio_r * is;
183        }
184    }
185    else if mid_side {
186        process_mid_side(ch0, ch1);
187    }
188}
189
190/// Determines if a band is zeroed.
191#[inline(always)]
192fn is_zero_band(band: &[f32]) -> bool {
193    !band.iter().any(|&x| x != 0.0)
194}
195
196/// Decodes all intensity stereo coded bands within an entire long block and returns the intensity
197/// bound.
198fn process_intensity_long_block(
199    header: &FrameHeader,
200    granule: &Granule,
201    mid_side: bool,
202    max_bound: usize,
203    ch0: &mut [f32; 576],
204    ch1: &mut [f32; 576],
205) -> usize {
206    // As per ISO/IEC 11172-3 and ISO/IEC 13818-3, for long blocks that have intensity stereo
207    // coding enabled, all bands starting after the last non-zero band in channel 1 may be
208    // intensity stereo coded.
209    //
210    // The scale-factors in channel 1 for those respective bands determine the intensity position.
211
212    // The rzero sample index is the index of last non-zero sample plus 1.
213    let rzero = granule.channels[1].rzero;
214
215    // Select the intensity stereo ratios table.
216    let (is_table, is_inv_pos) = if header.is_mpeg1() {
217        (&INTENSITY_STEREO_RATIOS_MPEG1[..], INTENSITY_INV_POS_MPEG1)
218    }
219    else {
220        let is_scale = granule.channels[1].scalefac_compress & 1;
221        (&INTENSITY_STEREO_RATIOS_MPEG2[usize::from(is_scale)][..], INTENSITY_INV_POS_MPEG2)
222    };
223
224    let bands = &SFB_LONG_BANDS[header.sample_rate_idx];
225
226    // The intensity positions are stored in the right channel (channel 1) scalefactors. The
227    // intensity position for band 21 is not coded and is copied from band 20.
228    let mut is_pos = [0; 22];
229    is_pos.copy_from_slice(&granule.channels[1].scalefacs[..22]);
230    is_pos[21] = is_pos[20];
231
232    // Create an iterator that yields a band start-end pair, and scale-factor.
233    let bands_iter = bands.iter().zip(&bands[1..]).zip(is_pos.iter());
234
235    let mut bound = max_bound;
236
237    // Iterate over each band and decode the intensity stereo coding if the band is zero.
238    for ((&start, &end), &is_pos) in bands_iter.rev() {
239        // Bands starting above rzero are always 0, however bands below it are ambiguous.
240        let is_zero_band = start >= rzero || is_zero_band(&ch1[start..end]);
241
242        if is_zero_band {
243            process_intensity(
244                is_pos,
245                is_table,
246                is_inv_pos,
247                mid_side,
248                &mut ch0[start..end],
249                &mut ch1[start..end],
250            );
251        }
252        else {
253            break;
254        }
255
256        // Update the intensity bound to the start of the band since it has now been processed.
257        bound = start;
258    }
259
260    bound
261}
262
263/// Decodes all intensity stereo coded bands within an entire short block and returns the intensity
264/// bound.
265fn process_intensity_short_block(
266    header: &FrameHeader,
267    granule: &Granule,
268    is_mixed: bool,
269    mid_side: bool,
270    max_bound: usize,
271    ch0: &mut [f32; 576],
272    ch1: &mut [f32; 576],
273) -> usize {
274    // For short, non-mixed, blocks, each band is composed of 3 windows (windows 0 thru 2). Windows
275    // are interleaved in each band.
276    //
277    // +--------------+--------------+--------------+-------+
278    // |     sfb0     |     sfb1     |     sfb2     |  ...  |
279    // +--------------+--------------+--------------+-------+
280    // | w0 | w1 | w2 | w0 | w1 | w2 | w0 | w1 | w2 |  ...  |
281    // +--------------+--------------+--------------+-------+
282    //
283    // However, each window of the same index is logically contiguous as depicted below.
284    //
285    // +------+------+------+------+
286    // | sfb0 | sfb1 | sfb2 | .... |
287    // +------+------+------+------+
288    // |  w0  |  w0  |  w0  | .... |
289    // +-------------+------+------+
290    // |  w1  |  w1  |  w1  | .... |
291    // +-------------+------+------+
292    // |  w2  |  w2  |  w2  | .... |
293    // +------+------+------+------+
294    //
295    // Each logically contiguous window may have it's own intensity bound. For example, in the
296    // example below, the intensity bound for window 0 is sfb0, for window 1 it's sfb2, and for
297    // window 2 it's sfb1.
298    //
299    //      +------+------+------+------+
300    //      | sfb0 | sfb1 | sfb2 | .... |
301    //      +------+------+------+------+
302    //  w0  | 0000 | 0000 | 0000 | 0... |
303    //      +-------------+------+------+
304    //  w1  | abcd | xyzw | 0000 | 0... |
305    //      +-------------+------+------+
306    //  w2  | xyz0 | 0000 | 0000 | 0... |
307    //      +------+------+------+------+
308    //
309    // For short blocks that are mixed, the long bands at the start follow the same rules as long
310    // blocks (see above). For example, for the block below, if sfb1 is the intensity bound, then
311    // all samples from sfb1 onwards must be zero. If the intensity bound is not within the long
312    // bands then the rules stated above are followed whereby each window has it's own intensity
313    // bound.
314    //
315    // |> Long bands        |> Short bands (3 windows)
316    // +------+------+------+--------+--------+------+
317    // | sfb0 | sfb1 | .... | sfbN-2 | sfbN-1 | sfbN |
318    // |------+------+------+--------+--------+------+
319    // |      |      |      |   w0   |   w0   |  w0  |
320    // |      |      |      +--------+--------+------+
321    // |      |      | .... |   w1   |   w1   |  w1  |
322    // |      |      |      +--------+--------+------+
323    // |      |      |      |   w2   |   w2   |  w2  |
324    // +------+------+------+--------+--------+------+
325    //
326
327    // First, if the short block is mixed, the get pair of short and long bands. Otherwise, if the
328    // block is not mixed, get the short bands. In both cases, the index of the last scale-factor is
329    // also returned.
330    let (short_bands, long_bands, mut sfi) = if is_mixed {
331        let bands = SFB_MIXED_BANDS[header.sample_rate_idx];
332        let switch = SFB_MIXED_SWITCH_POINT[header.sample_rate_idx];
333        // Variable number of short and long scalefactor bands based on the switch point.
334        (&bands[switch..], Some(&bands[..switch + 1]), bands.len() - 1)
335    }
336    else {
337        // 39 scalefactors from 13 scalefactor bands with 3 short windows per band.
338        (&SFB_SHORT_BANDS[header.sample_rate_idx][..], None, 39)
339    };
340
341    // Select the intensity stereo ratios table based on the bitstream version.
342    let (is_table, is_inv_pos) = if header.is_mpeg1() {
343        (&INTENSITY_STEREO_RATIOS_MPEG1[..], INTENSITY_INV_POS_MPEG1)
344    }
345    else {
346        let is_scale = granule.channels[1].scalefac_compress & 1;
347        (&INTENSITY_STEREO_RATIOS_MPEG2[usize::from(is_scale)][..], INTENSITY_INV_POS_MPEG2)
348    };
349
350    // The intensity position for the final band (last three short windows) is not coded and is
351    // copied from the previous band.
352    let mut is_pos = [0; 39];
353    is_pos[..36].copy_from_slice(&granule.channels[1].scalefacs[..36]);
354    is_pos[36..].copy_from_slice(&granule.channels[1].scalefacs[33..36]);
355
356    let mut window_is_zero = [true; 3];
357
358    let mut bound = max_bound;
359    let mut found_bound = false;
360
361    // Process the short bands.
362    for (((&s0, &s1), &s2), &s3) in short_bands
363        .iter()
364        .zip(&short_bands[1..])
365        .zip(&short_bands[2..])
366        .zip(&short_bands[3..])
367        .step_by(3)
368        .rev()
369    {
370        // For each short band, the following logic is repeated for each of the three windows.
371        //
372        // First, if the corresponding window in the previous band was zeroed, check if the
373        // window in this band is also zeroed. Note that if the window is non-zero, this statement
374        // short-circuits and avoids the costly zero-check.
375        window_is_zero[2] = window_is_zero[2] && is_zero_band(&ch1[s2..s3]);
376
377        // If the window is zeroed, process it with intensity stereo.
378        if window_is_zero[2] {
379            process_intensity(
380                is_pos[sfi - 1],
381                is_table,
382                is_inv_pos,
383                mid_side,
384                &mut ch0[s2..s3],
385                &mut ch1[s2..s3],
386            );
387        }
388        else if mid_side {
389            // If the window is non-zeroed, process it with mid-side stereo.
390            process_mid_side(&mut ch0[s2..s3], &mut ch1[s2..s3]);
391        }
392
393        // Decrement the scalefactor (intensity position) index to advance to the next window.
394        sfi -= 1;
395
396        // Repeat the same process for the second window.
397        window_is_zero[1] = window_is_zero[1] && is_zero_band(&ch1[s1..s2]);
398
399        if window_is_zero[1] {
400            process_intensity(
401                is_pos[sfi - 1],
402                is_table,
403                is_inv_pos,
404                mid_side,
405                &mut ch0[s1..s2],
406                &mut ch1[s1..s2],
407            );
408        }
409        else if mid_side {
410            process_mid_side(&mut ch0[s1..s2], &mut ch1[s1..s2]);
411        }
412
413        sfi -= 1;
414
415        // Repeat the same process for the third window.
416        window_is_zero[0] = window_is_zero[0] && is_zero_band(&ch1[s0..s1]);
417
418        if window_is_zero[0] {
419            process_intensity(
420                is_pos[sfi - 1],
421                is_table,
422                is_inv_pos,
423                mid_side,
424                &mut ch0[s0..s1],
425                &mut ch1[s0..s1],
426            );
427        }
428        else if mid_side {
429            process_mid_side(&mut ch0[s0..s1], &mut ch1[s0..s1]);
430        }
431
432        sfi -= 1;
433
434        // Update the intensity bound to the start of the first window since all three windows have
435        // now been processed by either intensity or mid-side stereo. Note that this is the "final"
436        // intensity bound of all the windows in the short bands. Individual windows may have
437        // reached their intensity bound earlier. Those windows are processed with mid-side stereo.
438        bound = s0;
439
440        // Determine if all windows non-zero.
441        found_bound = !window_is_zero[0] && !window_is_zero[1] && !window_is_zero[2];
442
443        // If all windows are non-zero then the all the remaining bands should be processed with
444        // mid-side stereo. Break out early in this case.
445        if found_bound {
446            break;
447        }
448    }
449
450    // If the final intensity bound was not found within the short bands, then it may be found
451    // within the long bands if the short block is mixed.
452    if !found_bound {
453        // If the short block is mixed, the long bands will not be None.
454        if let Some(long_bands) = long_bands {
455            // Process the long bands exactly as if it were a long block.
456            for (&start, &end) in long_bands.iter().zip(&long_bands[1..]).rev() {
457                let is_zero_band = is_zero_band(&ch1[start..end]);
458
459                if is_zero_band {
460                    process_intensity(
461                        is_pos[sfi - 1],
462                        is_table,
463                        is_inv_pos,
464                        mid_side,
465                        &mut ch0[start..end],
466                        &mut ch1[start..end],
467                    );
468                }
469                else {
470                    break;
471                }
472
473                sfi -= 1;
474
475                bound = start;
476            }
477        }
478    }
479
480    // Return the intensity bound.
481    bound
482}
483
484/// Perform joint stereo decoding on the channel pair.
485pub(super) fn stereo(
486    header: &FrameHeader,
487    granule: &mut Granule,
488    ch: &mut [[f32; 576]; 2],
489) -> Result<()> {
490    // Determine whether mid-side, and/or intensity stereo coding is used.
491    let (mid_side, intensity) = match header.channel_mode {
492        ChannelMode::JointStereo(Mode::Layer3 { mid_side, intensity }) => (mid_side, intensity),
493        ChannelMode::JointStereo(Mode::Intensity { .. }) => {
494            // This function only supports decoding Layer 3 stereo encodings, it is a fundamental
495            // error in the decoder logic if layer 1 or 2 stereo encodings are being decoded with
496            // this function.
497            panic!("invalid mode extension for layer 3 stereo decoding")
498        }
499        _ => return Ok(()),
500    };
501
502    // The block types must be the same.
503    if granule.channels[0].block_type != granule.channels[1].block_type {
504        return decode_error("mpa: stereo channel pair block_type mismatch");
505    }
506
507    // Split the sample buffer into two channels.
508    let (ch0, ch1) = {
509        let (ch0, ch1) = ch.split_first_mut().unwrap();
510        (ch0, &mut ch1[0])
511    };
512
513    // Joint stereo processing as specified in layer 3 is a combination of mid-side, and intensity
514    // encoding schemes. Each scale-factor band may use either mid-side, intensity, or no stereo
515    // encoding. The type of encoding used for each scale-factor band is determined by the MPEG
516    // bitstream version, the mode extension, the block type, and the content of the scale-factor
517    // bands.
518    let end = max(granule.channels[0].rzero, granule.channels[1].rzero);
519
520    // Decode intensity stereo coded bands if it is enabled and get the intensity bound.
521    let is_bound = if intensity {
522        // Decode intensity stereo coded bands based on bitstream version and block type.
523        match granule.channels[1].block_type {
524            BlockType::Short { is_mixed } => {
525                process_intensity_short_block(header, granule, is_mixed, mid_side, end, ch0, ch1)
526            }
527            _ => process_intensity_long_block(header, granule, mid_side, end, ch0, ch1),
528        }
529    }
530    // If intensity stereo coding is not enabled, then all samples are processed with mid-side
531    // stereo decoding. In other words, there are no samples encoded with intensity stereo and
532    // therefore the intensity bound is equal to the end of the non-zero portion of the samples.
533    else {
534        end
535    };
536
537    // If mid-side stereo coding is enabled, all samples up to the intensity bound should be
538    // decoded as mid-side stereo.
539    if mid_side && is_bound > 0 {
540        process_mid_side(&mut ch0[0..is_bound], &mut ch1[0..is_bound]);
541    }
542
543    // With joint stereo encoding, there is usually a mismatch between the number of samples
544    // initially read from the bitstream for each channel. This count is stored as the rzero sample
545    // index. However, after joint stereo decoding, both channels will have the same number of
546    // samples. Update rzero for both channels with the actual number of samples.
547    if intensity || mid_side {
548        granule.channels[0].rzero = end;
549        granule.channels[1].rzero = end;
550    }
551
552    Ok(())
553}