symphonia_bundle_mp3/layer3/
stereo.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
// Symphonia
// Copyright (c) 2019-2022 The Project Symphonia Developers.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use symphonia_core::errors::{decode_error, Result};

use crate::common::{ChannelMode, FrameHeader, Mode};

use super::{common::*, Granule};

use std::cmp::max;
use std::{f32, f64};

use lazy_static::lazy_static;

/// The invalid intensity position for MPEG1 bitstreams.
const INTENSITY_INV_POS_MPEG1: u8 = 7;

/// The invalid intensity position for MPEG2 and MPEG2.5 bitstreams.
///
/// NOTE: Some decoders also consider 7 to be an invalid intensity position in MPEG2 and MPEG2.5.
/// However, this appears wrong. According to the standard, the /maximum/ value for the intensity
/// position is considered invalid. For MPEG1, since the intensity ratios are only specified for
/// is_pos < 7. For MPEG2 the ratios are specified for is_pos < 31. Therefore, it doesn't make sense
/// to consider is_pos == 7 invalid for MPEG2 or 2.5.
const INTENSITY_INV_POS_MPEG2: u8 = 31;

lazy_static! {
    /// (Left, right) channel coefficients for decoding intensity stereo in MPEG2 bitstreams.
    ///
    /// These coefficients are derived from section 2.4.3.2 of ISO/IEC 13818-3.
    ///
    /// As per the specification, for a given intensity position, is_pos (0 <= is_pos < 32), the
    /// channel coefficients, k_l and k_r, may be calculated as per the table below:
    ///
    /// ```text
    /// If...            | k_l                     | k_r
    /// -----------------+-------------------------+-------------------
    /// is_pos     == 0  | 1.0                     | 1.0
    /// is_pos & 1 == 1  | i0 ^ [(is_pos + 1) / 2] | 1.0
    /// is_pos & 1 == 0  | 1.0                     | i0 ^ (is_pos / 2)
    /// ```
    ///
    /// The value of i0 is dependant on the least significant bit of scalefac_compress.
    ///
    ///  ```text
    /// scalefac_compress & 1 | i0
    /// ----------------------+---------------------
    /// 0                     | 1 / sqrt(sqrt(2.0))
    /// 1                     | 1 / sqrt(2.0)
    /// ```
    ///
    /// The first dimension of this table is indexed by scalefac_compress & 1 to select i0. The
    /// second dimension is indexed by is_pos to obtain the channel coefficients. Note that
    /// is_pos == 31 is considered an invalid position, but IS included in the table.
    static ref INTENSITY_STEREO_RATIOS_MPEG2: [[(f32, f32); 32]; 2] = {
        let is_scale: [f64; 2] = [
            1.0 / f64::sqrt(f64::consts::SQRT_2),
            f64::consts::FRAC_1_SQRT_2,
        ];

        let mut ratios = [[(0.0, 0.0); 32]; 2];

        for (i, is_pos) in (0..32).enumerate() {
            if is_pos & 1 != 0 {
                // Odd case.
                ratios[0][i] = (is_scale[0].powf(f64::from(is_pos + 1) / 2.0) as f32, 1.0);
                ratios[1][i] = (is_scale[1].powf(f64::from(is_pos + 1) / 2.0) as f32, 1.0);
            }
            else {
                // Even & zero case.
                ratios[0][i] = (1.0, is_scale[0].powf(f64::from(is_pos) / 2.0) as f32);
                ratios[1][i] = (1.0, is_scale[1].powf(f64::from(is_pos) / 2.0) as f32);
            }
        }

        ratios
    };
}

lazy_static! {
    /// (Left, right) channel coeffcients for decoding intensity stereo in MPEG1 bitstreams.
    ///
    /// These coefficients are derived from section 2.4.3.4.9.3 of ISO/IEC 11172-3.
    ///
    /// As per the specification, for a given intensity position, is_pos (0 <= is_pos < 7), a ratio,
    /// is_ratio, is calculated as follows:
    ///
    /// ```text
    /// is_ratio = tan(is_pos * PI/12)
    /// ```
    ///
    /// Then, the channel coefficients, k_l and k_r, are calculated as follows:
    ///
    /// ```text
    /// k_l = is_ratio / (1 + is_ratio)
    /// k_r =        1 / (1 + is_ratio)
    /// ```
    ///
    /// This table is indexed by is_pos. Note that is_pos == 7 is invalid and is NOT included in the
    /// table.
    static ref INTENSITY_STEREO_RATIOS_MPEG1: [(f32, f32); 7] = {
        const PI_12: f64 = f64::consts::PI / 12.0;

        let mut ratios = [(0.0, 0.0); 7];

        for (is_pos, ratio) in ratios.iter_mut().enumerate() {
            let is_ratio = (PI_12 * is_pos as f64).tan();
            *ratio = (
                (is_ratio / (1.0 + is_ratio)) as f32,
                (1.0 / (1.0 + is_ratio)) as f32
            );
        }

        ratios[6] = (1.0, 0.0);

        ratios
    };
}

/// Decorrelates mid and side channels into left and right channels.
///
/// In mid-side (MS) stereo, the left and right channels are encoded as average (mid) and
/// difference (side) components.
///
/// As per ISO/IEC 11172-3, to reconstruct the left and right channels, the following calculation
/// is performed:
///
/// ```text
///      l[i] = (m[i] + s[i]) / sqrt(2)
///      r[i] = (m[i] - s[i]) / sqrt(2)
/// ```
/// where:
///      l[i], and r[i] are the left and right channels, respectively.
///      m[i], and s[i] are the mid and side channels, respectively.
///
/// In the bitstream, m[i] is transmitted in channel 0, while s[i] in channel 1. After decoding,
/// the left channel replaces m[i] in channel 0, and the right channel replaces s[i] in channel
/// 1.
fn process_mid_side(mid: &mut [f32], side: &mut [f32]) {
    debug_assert!(mid.len() == side.len());

    for (m, s) in mid.iter_mut().zip(side) {
        let left = (*m + *s) * f32::consts::FRAC_1_SQRT_2;
        let right = (*m - *s) * f32::consts::FRAC_1_SQRT_2;
        *m = left;
        *s = right;
    }
}

/// Decodes channel 0 of the intensity stereo coded signal into left and right channels.
///
/// As per ISO/IEC 11172-3, the following calculation may be performed to decode the intensity
/// stereo coded signal into left and right channels.
///
/// ```text
///      l[i] = ch0[i] * k_l
///      r[i] = ch0[i] * l_r
/// ```
///
/// where:
///      l[i], and r[i] are the left and right channels, respectively.
///      ch0[i] is the intensity stereo coded signal found in channel 0.
///      k_l, and k_r are the left and right channel ratios, respectively.
fn process_intensity(
    intensity_pos: u8,
    intensity_table: &[(f32, f32)],
    intensity_max: u8,
    mid_side: bool,
    ch0: &mut [f32],
    ch1: &mut [f32],
) {
    if intensity_pos < intensity_max {
        let (ratio_l, ratio_r) = intensity_table[usize::from(intensity_pos)];

        for (l, r) in ch0.iter_mut().zip(ch1) {
            let is = *l;
            *l = ratio_l * is;
            *r = ratio_r * is;
        }
    }
    else if mid_side {
        process_mid_side(ch0, ch1);
    }
}

/// Determines if a band is zeroed.
#[inline(always)]
fn is_zero_band(band: &[f32]) -> bool {
    !band.iter().any(|&x| x != 0.0)
}

/// Decodes all intensity stereo coded bands within an entire long block and returns the intensity
/// bound.
fn process_intensity_long_block(
    header: &FrameHeader,
    granule: &Granule,
    mid_side: bool,
    max_bound: usize,
    ch0: &mut [f32; 576],
    ch1: &mut [f32; 576],
) -> usize {
    // As per ISO/IEC 11172-3 and ISO/IEC 13818-3, for long blocks that have intensity stereo
    // coding enabled, all bands starting after the last non-zero band in channel 1 may be
    // intensity stereo coded.
    //
    // The scale-factors in channel 1 for those respective bands determine the intensity position.

    // The rzero sample index is the index of last non-zero sample plus 1.
    let rzero = granule.channels[1].rzero;

    // Select the intensity stereo ratios table.
    let (is_table, is_inv_pos) = if header.is_mpeg1() {
        (&INTENSITY_STEREO_RATIOS_MPEG1[..], INTENSITY_INV_POS_MPEG1)
    }
    else {
        let is_scale = granule.channels[1].scalefac_compress & 1;
        (&INTENSITY_STEREO_RATIOS_MPEG2[usize::from(is_scale)][..], INTENSITY_INV_POS_MPEG2)
    };

    let bands = &SFB_LONG_BANDS[header.sample_rate_idx];

    // The intensity positions are stored in the right channel (channel 1) scalefactors. The
    // intensity position for band 21 is not coded and is copied from band 20.
    let mut is_pos = [0; 22];
    is_pos.copy_from_slice(&granule.channels[1].scalefacs[..22]);
    is_pos[21] = is_pos[20];

    // Create an iterator that yields a band start-end pair, and scale-factor.
    let bands_iter = bands.iter().zip(&bands[1..]).zip(is_pos.iter());

    let mut bound = max_bound;

    // Iterate over each band and decode the intensity stereo coding if the band is zero.
    for ((&start, &end), &is_pos) in bands_iter.rev() {
        // Bands starting above rzero are always 0, however bands below it are ambiguous.
        let is_zero_band = start >= rzero || is_zero_band(&ch1[start..end]);

        if is_zero_band {
            process_intensity(
                is_pos,
                is_table,
                is_inv_pos,
                mid_side,
                &mut ch0[start..end],
                &mut ch1[start..end],
            );
        }
        else {
            break;
        }

        // Update the intensity bound to the start of the band since it has now been processed.
        bound = start;
    }

    bound
}

/// Decodes all intensity stereo coded bands within an entire short block and returns the intensity
/// bound.
fn process_intensity_short_block(
    header: &FrameHeader,
    granule: &Granule,
    is_mixed: bool,
    mid_side: bool,
    max_bound: usize,
    ch0: &mut [f32; 576],
    ch1: &mut [f32; 576],
) -> usize {
    // For short, non-mixed, blocks, each band is composed of 3 windows (windows 0 thru 2). Windows
    // are interleaved in each band.
    //
    // +--------------+--------------+--------------+-------+
    // |     sfb0     |     sfb1     |     sfb2     |  ...  |
    // +--------------+--------------+--------------+-------+
    // | w0 | w1 | w2 | w0 | w1 | w2 | w0 | w1 | w2 |  ...  |
    // +--------------+--------------+--------------+-------+
    //
    // However, each window of the same index is logically contiguous as depicted below.
    //
    // +------+------+------+------+
    // | sfb0 | sfb1 | sfb2 | .... |
    // +------+------+------+------+
    // |  w0  |  w0  |  w0  | .... |
    // +-------------+------+------+
    // |  w1  |  w1  |  w1  | .... |
    // +-------------+------+------+
    // |  w2  |  w2  |  w2  | .... |
    // +------+------+------+------+
    //
    // Each logically contiguous window may have it's own intensity bound. For example, in the
    // example below, the intensity bound for window 0 is sfb0, for window 1 it's sfb2, and for
    // window 2 it's sfb1.
    //
    //      +------+------+------+------+
    //      | sfb0 | sfb1 | sfb2 | .... |
    //      +------+------+------+------+
    //  w0  | 0000 | 0000 | 0000 | 0... |
    //      +-------------+------+------+
    //  w1  | abcd | xyzw | 0000 | 0... |
    //      +-------------+------+------+
    //  w2  | xyz0 | 0000 | 0000 | 0... |
    //      +------+------+------+------+
    //
    // For short blocks that are mixed, the long bands at the start follow the same rules as long
    // blocks (see above). For example, for the block below, if sfb1 is the intensity bound, then
    // all samples from sfb1 onwards must be zero. If the intensity bound is not within the long
    // bands then the rules stated above are followed whereby each window has it's own intensity
    // bound.
    //
    // |> Long bands        |> Short bands (3 windows)
    // +------+------+------+--------+--------+------+
    // | sfb0 | sfb1 | .... | sfbN-2 | sfbN-1 | sfbN |
    // |------+------+------+--------+--------+------+
    // |      |      |      |   w0   |   w0   |  w0  |
    // |      |      |      +--------+--------+------+
    // |      |      | .... |   w1   |   w1   |  w1  |
    // |      |      |      +--------+--------+------+
    // |      |      |      |   w2   |   w2   |  w2  |
    // +------+------+------+--------+--------+------+
    //

    // First, if the short block is mixed, the get pair of short and long bands. Otherwise, if the
    // block is not mixed, get the short bands. In both cases, the index of the last scale-factor is
    // also returned.
    let (short_bands, long_bands, mut sfi) = if is_mixed {
        let bands = SFB_MIXED_BANDS[header.sample_rate_idx];
        let switch = SFB_MIXED_SWITCH_POINT[header.sample_rate_idx];
        // Variable number of short and long scalefactor bands based on the switch point.
        (&bands[switch..], Some(&bands[..switch + 1]), bands.len() - 1)
    }
    else {
        // 39 scalefactors from 13 scalefactor bands with 3 short windows per band.
        (&SFB_SHORT_BANDS[header.sample_rate_idx][..], None, 39)
    };

    // Select the intensity stereo ratios table based on the bitstream version.
    let (is_table, is_inv_pos) = if header.is_mpeg1() {
        (&INTENSITY_STEREO_RATIOS_MPEG1[..], INTENSITY_INV_POS_MPEG1)
    }
    else {
        let is_scale = granule.channels[1].scalefac_compress & 1;
        (&INTENSITY_STEREO_RATIOS_MPEG2[usize::from(is_scale)][..], INTENSITY_INV_POS_MPEG2)
    };

    // The intensity position for the final band (last three short windows) is not coded and is
    // copied from the previous band.
    let mut is_pos = [0; 39];
    is_pos[..36].copy_from_slice(&granule.channels[1].scalefacs[..36]);
    is_pos[36..].copy_from_slice(&granule.channels[1].scalefacs[33..36]);

    let mut window_is_zero = [true; 3];

    let mut bound = max_bound;
    let mut found_bound = false;

    // Process the short bands.
    for (((&s0, &s1), &s2), &s3) in short_bands
        .iter()
        .zip(&short_bands[1..])
        .zip(&short_bands[2..])
        .zip(&short_bands[3..])
        .step_by(3)
        .rev()
    {
        // For each short band, the following logic is repeated for each of the three windows.
        //
        // First, if the corresponding window in the previous band was zeroed, check if the
        // window in this band is also zeroed. Note that if the window is non-zero, this statement
        // short-circuits and avoids the costly zero-check.
        window_is_zero[2] = window_is_zero[2] && is_zero_band(&ch1[s2..s3]);

        // If the window is zeroed, process it with intensity stereo.
        if window_is_zero[2] {
            process_intensity(
                is_pos[sfi - 1],
                is_table,
                is_inv_pos,
                mid_side,
                &mut ch0[s2..s3],
                &mut ch1[s2..s3],
            );
        }
        else if mid_side {
            // If the window is non-zeroed, process it with mid-side stereo.
            process_mid_side(&mut ch0[s2..s3], &mut ch1[s2..s3]);
        }

        // Decrement the scalefactor (intensity position) index to advance to the next window.
        sfi -= 1;

        // Repeat the same process for the second window.
        window_is_zero[1] = window_is_zero[1] && is_zero_band(&ch1[s1..s2]);

        if window_is_zero[1] {
            process_intensity(
                is_pos[sfi - 1],
                is_table,
                is_inv_pos,
                mid_side,
                &mut ch0[s1..s2],
                &mut ch1[s1..s2],
            );
        }
        else if mid_side {
            process_mid_side(&mut ch0[s1..s2], &mut ch1[s1..s2]);
        }

        sfi -= 1;

        // Repeat the same process for the third window.
        window_is_zero[0] = window_is_zero[0] && is_zero_band(&ch1[s0..s1]);

        if window_is_zero[0] {
            process_intensity(
                is_pos[sfi - 1],
                is_table,
                is_inv_pos,
                mid_side,
                &mut ch0[s0..s1],
                &mut ch1[s0..s1],
            );
        }
        else if mid_side {
            process_mid_side(&mut ch0[s0..s1], &mut ch1[s0..s1]);
        }

        sfi -= 1;

        // Update the intensity bound to the start of the first window since all three windows have
        // now been processed by either intensity or mid-side stereo. Note that this is the "final"
        // intensity bound of all the windows in the short bands. Individual windows may have
        // reached their intensity bound earlier. Those windows are processed with mid-side stereo.
        bound = s0;

        // Determine if all windows non-zero.
        found_bound = !window_is_zero[0] && !window_is_zero[1] && !window_is_zero[2];

        // If all windows are non-zero then the all the remaining bands should be processed with
        // mid-side stereo. Break out early in this case.
        if found_bound {
            break;
        }
    }

    // If the final intensity bound was not found within the short bands, then it may be found
    // within the long bands if the short block is mixed.
    if !found_bound {
        // If the short block is mixed, the long bands will not be None.
        if let Some(long_bands) = long_bands {
            // Process the long bands exactly as if it were a long block.
            for (&start, &end) in long_bands.iter().zip(&long_bands[1..]).rev() {
                let is_zero_band = is_zero_band(&ch1[start..end]);

                if is_zero_band {
                    process_intensity(
                        is_pos[sfi - 1],
                        is_table,
                        is_inv_pos,
                        mid_side,
                        &mut ch0[start..end],
                        &mut ch1[start..end],
                    );
                }
                else {
                    break;
                }

                sfi -= 1;

                bound = start;
            }
        }
    }

    // Return the intensity bound.
    bound
}

/// Perform joint stereo decoding on the channel pair.
pub(super) fn stereo(
    header: &FrameHeader,
    granule: &mut Granule,
    ch: &mut [[f32; 576]; 2],
) -> Result<()> {
    // Determine whether mid-side, and/or intensity stereo coding is used.
    let (mid_side, intensity) = match header.channel_mode {
        ChannelMode::JointStereo(Mode::Layer3 { mid_side, intensity }) => (mid_side, intensity),
        ChannelMode::JointStereo(Mode::Intensity { .. }) => {
            // This function only supports decoding Layer 3 stereo encodings, it is a fundamental
            // error in the decoder logic if layer 1 or 2 stereo encodings are being decoded with
            // this function.
            panic!("invalid mode extension for layer 3 stereo decoding")
        }
        _ => return Ok(()),
    };

    // The block types must be the same.
    if granule.channels[0].block_type != granule.channels[1].block_type {
        return decode_error("mpa: stereo channel pair block_type mismatch");
    }

    // Split the sample buffer into two channels.
    let (ch0, ch1) = {
        let (ch0, ch1) = ch.split_first_mut().unwrap();
        (ch0, &mut ch1[0])
    };

    // Joint stereo processing as specified in layer 3 is a combination of mid-side, and intensity
    // encoding schemes. Each scale-factor band may use either mid-side, intensity, or no stereo
    // encoding. The type of encoding used for each scale-factor band is determined by the MPEG
    // bitstream version, the mode extension, the block type, and the content of the scale-factor
    // bands.
    let end = max(granule.channels[0].rzero, granule.channels[1].rzero);

    // Decode intensity stereo coded bands if it is enabled and get the intensity bound.
    let is_bound = if intensity {
        // Decode intensity stereo coded bands based on bitstream version and block type.
        match granule.channels[1].block_type {
            BlockType::Short { is_mixed } => {
                process_intensity_short_block(header, granule, is_mixed, mid_side, end, ch0, ch1)
            }
            _ => process_intensity_long_block(header, granule, mid_side, end, ch0, ch1),
        }
    }
    // If intensity stereo coding is not enabled, then all samples are processed with mid-side
    // stereo decoding. In other words, there are no samples encoded with intensity stereo and
    // therefore the intensity bound is equal to the end of the non-zero portion of the samples.
    else {
        end
    };

    // If mid-side stereo coding is enabled, all samples up to the intensity bound should be
    // decoded as mid-side stereo.
    if mid_side && is_bound > 0 {
        process_mid_side(&mut ch0[0..is_bound], &mut ch1[0..is_bound]);
    }

    // With joint stereo encoding, there is usually a mismatch between the number of samples
    // initially read from the bitstream for each channel. This count is stored as the rzero sample
    // index. However, after joint stereo decoding, both channels will have the same number of
    // samples. Update rzero for both channels with the actual number of samples.
    if intensity || mid_side {
        granule.channels[0].rzero = end;
        granule.channels[1].rzero = end;
    }

    Ok(())
}