symphonia_bundle_mp3/layer3/stereo.rs
1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8use symphonia_core::errors::{decode_error, Result};
9
10use crate::common::{ChannelMode, FrameHeader, Mode};
11
12use super::{common::*, Granule};
13
14use std::cmp::max;
15use std::{f32, f64};
16
17use lazy_static::lazy_static;
18
19/// The invalid intensity position for MPEG1 bitstreams.
20const INTENSITY_INV_POS_MPEG1: u8 = 7;
21
22/// The invalid intensity position for MPEG2 and MPEG2.5 bitstreams.
23///
24/// NOTE: Some decoders also consider 7 to be an invalid intensity position in MPEG2 and MPEG2.5.
25/// However, this appears wrong. According to the standard, the /maximum/ value for the intensity
26/// position is considered invalid. For MPEG1, since the intensity ratios are only specified for
27/// is_pos < 7. For MPEG2 the ratios are specified for is_pos < 31. Therefore, it doesn't make sense
28/// to consider is_pos == 7 invalid for MPEG2 or 2.5.
29const INTENSITY_INV_POS_MPEG2: u8 = 31;
30
31lazy_static! {
32 /// (Left, right) channel coefficients for decoding intensity stereo in MPEG2 bitstreams.
33 ///
34 /// These coefficients are derived from section 2.4.3.2 of ISO/IEC 13818-3.
35 ///
36 /// As per the specification, for a given intensity position, is_pos (0 <= is_pos < 32), the
37 /// channel coefficients, k_l and k_r, may be calculated as per the table below:
38 ///
39 /// ```text
40 /// If... | k_l | k_r
41 /// -----------------+-------------------------+-------------------
42 /// is_pos == 0 | 1.0 | 1.0
43 /// is_pos & 1 == 1 | i0 ^ [(is_pos + 1) / 2] | 1.0
44 /// is_pos & 1 == 0 | 1.0 | i0 ^ (is_pos / 2)
45 /// ```
46 ///
47 /// The value of i0 is dependant on the least significant bit of scalefac_compress.
48 ///
49 /// ```text
50 /// scalefac_compress & 1 | i0
51 /// ----------------------+---------------------
52 /// 0 | 1 / sqrt(sqrt(2.0))
53 /// 1 | 1 / sqrt(2.0)
54 /// ```
55 ///
56 /// The first dimension of this table is indexed by scalefac_compress & 1 to select i0. The
57 /// second dimension is indexed by is_pos to obtain the channel coefficients. Note that
58 /// is_pos == 31 is considered an invalid position, but IS included in the table.
59 static ref INTENSITY_STEREO_RATIOS_MPEG2: [[(f32, f32); 32]; 2] = {
60 let is_scale: [f64; 2] = [
61 1.0 / f64::sqrt(f64::consts::SQRT_2),
62 f64::consts::FRAC_1_SQRT_2,
63 ];
64
65 let mut ratios = [[(0.0, 0.0); 32]; 2];
66
67 for (i, is_pos) in (0..32).enumerate() {
68 if is_pos & 1 != 0 {
69 // Odd case.
70 ratios[0][i] = (is_scale[0].powf(f64::from(is_pos + 1) / 2.0) as f32, 1.0);
71 ratios[1][i] = (is_scale[1].powf(f64::from(is_pos + 1) / 2.0) as f32, 1.0);
72 }
73 else {
74 // Even & zero case.
75 ratios[0][i] = (1.0, is_scale[0].powf(f64::from(is_pos) / 2.0) as f32);
76 ratios[1][i] = (1.0, is_scale[1].powf(f64::from(is_pos) / 2.0) as f32);
77 }
78 }
79
80 ratios
81 };
82}
83
84lazy_static! {
85 /// (Left, right) channel coeffcients for decoding intensity stereo in MPEG1 bitstreams.
86 ///
87 /// These coefficients are derived from section 2.4.3.4.9.3 of ISO/IEC 11172-3.
88 ///
89 /// As per the specification, for a given intensity position, is_pos (0 <= is_pos < 7), a ratio,
90 /// is_ratio, is calculated as follows:
91 ///
92 /// ```text
93 /// is_ratio = tan(is_pos * PI/12)
94 /// ```
95 ///
96 /// Then, the channel coefficients, k_l and k_r, are calculated as follows:
97 ///
98 /// ```text
99 /// k_l = is_ratio / (1 + is_ratio)
100 /// k_r = 1 / (1 + is_ratio)
101 /// ```
102 ///
103 /// This table is indexed by is_pos. Note that is_pos == 7 is invalid and is NOT included in the
104 /// table.
105 static ref INTENSITY_STEREO_RATIOS_MPEG1: [(f32, f32); 7] = {
106 const PI_12: f64 = f64::consts::PI / 12.0;
107
108 let mut ratios = [(0.0, 0.0); 7];
109
110 for (is_pos, ratio) in ratios.iter_mut().enumerate() {
111 let is_ratio = (PI_12 * is_pos as f64).tan();
112 *ratio = (
113 (is_ratio / (1.0 + is_ratio)) as f32,
114 (1.0 / (1.0 + is_ratio)) as f32
115 );
116 }
117
118 ratios[6] = (1.0, 0.0);
119
120 ratios
121 };
122}
123
124/// Decorrelates mid and side channels into left and right channels.
125///
126/// In mid-side (MS) stereo, the left and right channels are encoded as average (mid) and
127/// difference (side) components.
128///
129/// As per ISO/IEC 11172-3, to reconstruct the left and right channels, the following calculation
130/// is performed:
131///
132/// ```text
133/// l[i] = (m[i] + s[i]) / sqrt(2)
134/// r[i] = (m[i] - s[i]) / sqrt(2)
135/// ```
136/// where:
137/// l[i], and r[i] are the left and right channels, respectively.
138/// m[i], and s[i] are the mid and side channels, respectively.
139///
140/// In the bitstream, m[i] is transmitted in channel 0, while s[i] in channel 1. After decoding,
141/// the left channel replaces m[i] in channel 0, and the right channel replaces s[i] in channel
142/// 1.
143fn process_mid_side(mid: &mut [f32], side: &mut [f32]) {
144 debug_assert!(mid.len() == side.len());
145
146 for (m, s) in mid.iter_mut().zip(side) {
147 let left = (*m + *s) * f32::consts::FRAC_1_SQRT_2;
148 let right = (*m - *s) * f32::consts::FRAC_1_SQRT_2;
149 *m = left;
150 *s = right;
151 }
152}
153
154/// Decodes channel 0 of the intensity stereo coded signal into left and right channels.
155///
156/// As per ISO/IEC 11172-3, the following calculation may be performed to decode the intensity
157/// stereo coded signal into left and right channels.
158///
159/// ```text
160/// l[i] = ch0[i] * k_l
161/// r[i] = ch0[i] * l_r
162/// ```
163///
164/// where:
165/// l[i], and r[i] are the left and right channels, respectively.
166/// ch0[i] is the intensity stereo coded signal found in channel 0.
167/// k_l, and k_r are the left and right channel ratios, respectively.
168fn process_intensity(
169 intensity_pos: u8,
170 intensity_table: &[(f32, f32)],
171 intensity_max: u8,
172 mid_side: bool,
173 ch0: &mut [f32],
174 ch1: &mut [f32],
175) {
176 if intensity_pos < intensity_max {
177 let (ratio_l, ratio_r) = intensity_table[usize::from(intensity_pos)];
178
179 for (l, r) in ch0.iter_mut().zip(ch1) {
180 let is = *l;
181 *l = ratio_l * is;
182 *r = ratio_r * is;
183 }
184 }
185 else if mid_side {
186 process_mid_side(ch0, ch1);
187 }
188}
189
190/// Determines if a band is zeroed.
191#[inline(always)]
192fn is_zero_band(band: &[f32]) -> bool {
193 !band.iter().any(|&x| x != 0.0)
194}
195
196/// Decodes all intensity stereo coded bands within an entire long block and returns the intensity
197/// bound.
198fn process_intensity_long_block(
199 header: &FrameHeader,
200 granule: &Granule,
201 mid_side: bool,
202 max_bound: usize,
203 ch0: &mut [f32; 576],
204 ch1: &mut [f32; 576],
205) -> usize {
206 // As per ISO/IEC 11172-3 and ISO/IEC 13818-3, for long blocks that have intensity stereo
207 // coding enabled, all bands starting after the last non-zero band in channel 1 may be
208 // intensity stereo coded.
209 //
210 // The scale-factors in channel 1 for those respective bands determine the intensity position.
211
212 // The rzero sample index is the index of last non-zero sample plus 1.
213 let rzero = granule.channels[1].rzero;
214
215 // Select the intensity stereo ratios table.
216 let (is_table, is_inv_pos) = if header.is_mpeg1() {
217 (&INTENSITY_STEREO_RATIOS_MPEG1[..], INTENSITY_INV_POS_MPEG1)
218 }
219 else {
220 let is_scale = granule.channels[1].scalefac_compress & 1;
221 (&INTENSITY_STEREO_RATIOS_MPEG2[usize::from(is_scale)][..], INTENSITY_INV_POS_MPEG2)
222 };
223
224 let bands = &SFB_LONG_BANDS[header.sample_rate_idx];
225
226 // The intensity positions are stored in the right channel (channel 1) scalefactors. The
227 // intensity position for band 21 is not coded and is copied from band 20.
228 let mut is_pos = [0; 22];
229 is_pos.copy_from_slice(&granule.channels[1].scalefacs[..22]);
230 is_pos[21] = is_pos[20];
231
232 // Create an iterator that yields a band start-end pair, and scale-factor.
233 let bands_iter = bands.iter().zip(&bands[1..]).zip(is_pos.iter());
234
235 let mut bound = max_bound;
236
237 // Iterate over each band and decode the intensity stereo coding if the band is zero.
238 for ((&start, &end), &is_pos) in bands_iter.rev() {
239 // Bands starting above rzero are always 0, however bands below it are ambiguous.
240 let is_zero_band = start >= rzero || is_zero_band(&ch1[start..end]);
241
242 if is_zero_band {
243 process_intensity(
244 is_pos,
245 is_table,
246 is_inv_pos,
247 mid_side,
248 &mut ch0[start..end],
249 &mut ch1[start..end],
250 );
251 }
252 else {
253 break;
254 }
255
256 // Update the intensity bound to the start of the band since it has now been processed.
257 bound = start;
258 }
259
260 bound
261}
262
263/// Decodes all intensity stereo coded bands within an entire short block and returns the intensity
264/// bound.
265fn process_intensity_short_block(
266 header: &FrameHeader,
267 granule: &Granule,
268 is_mixed: bool,
269 mid_side: bool,
270 max_bound: usize,
271 ch0: &mut [f32; 576],
272 ch1: &mut [f32; 576],
273) -> usize {
274 // For short, non-mixed, blocks, each band is composed of 3 windows (windows 0 thru 2). Windows
275 // are interleaved in each band.
276 //
277 // +--------------+--------------+--------------+-------+
278 // | sfb0 | sfb1 | sfb2 | ... |
279 // +--------------+--------------+--------------+-------+
280 // | w0 | w1 | w2 | w0 | w1 | w2 | w0 | w1 | w2 | ... |
281 // +--------------+--------------+--------------+-------+
282 //
283 // However, each window of the same index is logically contiguous as depicted below.
284 //
285 // +------+------+------+------+
286 // | sfb0 | sfb1 | sfb2 | .... |
287 // +------+------+------+------+
288 // | w0 | w0 | w0 | .... |
289 // +-------------+------+------+
290 // | w1 | w1 | w1 | .... |
291 // +-------------+------+------+
292 // | w2 | w2 | w2 | .... |
293 // +------+------+------+------+
294 //
295 // Each logically contiguous window may have it's own intensity bound. For example, in the
296 // example below, the intensity bound for window 0 is sfb0, for window 1 it's sfb2, and for
297 // window 2 it's sfb1.
298 //
299 // +------+------+------+------+
300 // | sfb0 | sfb1 | sfb2 | .... |
301 // +------+------+------+------+
302 // w0 | 0000 | 0000 | 0000 | 0... |
303 // +-------------+------+------+
304 // w1 | abcd | xyzw | 0000 | 0... |
305 // +-------------+------+------+
306 // w2 | xyz0 | 0000 | 0000 | 0... |
307 // +------+------+------+------+
308 //
309 // For short blocks that are mixed, the long bands at the start follow the same rules as long
310 // blocks (see above). For example, for the block below, if sfb1 is the intensity bound, then
311 // all samples from sfb1 onwards must be zero. If the intensity bound is not within the long
312 // bands then the rules stated above are followed whereby each window has it's own intensity
313 // bound.
314 //
315 // |> Long bands |> Short bands (3 windows)
316 // +------+------+------+--------+--------+------+
317 // | sfb0 | sfb1 | .... | sfbN-2 | sfbN-1 | sfbN |
318 // |------+------+------+--------+--------+------+
319 // | | | | w0 | w0 | w0 |
320 // | | | +--------+--------+------+
321 // | | | .... | w1 | w1 | w1 |
322 // | | | +--------+--------+------+
323 // | | | | w2 | w2 | w2 |
324 // +------+------+------+--------+--------+------+
325 //
326
327 // First, if the short block is mixed, the get pair of short and long bands. Otherwise, if the
328 // block is not mixed, get the short bands. In both cases, the index of the last scale-factor is
329 // also returned.
330 let (short_bands, long_bands, mut sfi) = if is_mixed {
331 let bands = SFB_MIXED_BANDS[header.sample_rate_idx];
332 let switch = SFB_MIXED_SWITCH_POINT[header.sample_rate_idx];
333 // Variable number of short and long scalefactor bands based on the switch point.
334 (&bands[switch..], Some(&bands[..switch + 1]), bands.len() - 1)
335 }
336 else {
337 // 39 scalefactors from 13 scalefactor bands with 3 short windows per band.
338 (&SFB_SHORT_BANDS[header.sample_rate_idx][..], None, 39)
339 };
340
341 // Select the intensity stereo ratios table based on the bitstream version.
342 let (is_table, is_inv_pos) = if header.is_mpeg1() {
343 (&INTENSITY_STEREO_RATIOS_MPEG1[..], INTENSITY_INV_POS_MPEG1)
344 }
345 else {
346 let is_scale = granule.channels[1].scalefac_compress & 1;
347 (&INTENSITY_STEREO_RATIOS_MPEG2[usize::from(is_scale)][..], INTENSITY_INV_POS_MPEG2)
348 };
349
350 // The intensity position for the final band (last three short windows) is not coded and is
351 // copied from the previous band.
352 let mut is_pos = [0; 39];
353 is_pos[..36].copy_from_slice(&granule.channels[1].scalefacs[..36]);
354 is_pos[36..].copy_from_slice(&granule.channels[1].scalefacs[33..36]);
355
356 let mut window_is_zero = [true; 3];
357
358 let mut bound = max_bound;
359 let mut found_bound = false;
360
361 // Process the short bands.
362 for (((&s0, &s1), &s2), &s3) in short_bands
363 .iter()
364 .zip(&short_bands[1..])
365 .zip(&short_bands[2..])
366 .zip(&short_bands[3..])
367 .step_by(3)
368 .rev()
369 {
370 // For each short band, the following logic is repeated for each of the three windows.
371 //
372 // First, if the corresponding window in the previous band was zeroed, check if the
373 // window in this band is also zeroed. Note that if the window is non-zero, this statement
374 // short-circuits and avoids the costly zero-check.
375 window_is_zero[2] = window_is_zero[2] && is_zero_band(&ch1[s2..s3]);
376
377 // If the window is zeroed, process it with intensity stereo.
378 if window_is_zero[2] {
379 process_intensity(
380 is_pos[sfi - 1],
381 is_table,
382 is_inv_pos,
383 mid_side,
384 &mut ch0[s2..s3],
385 &mut ch1[s2..s3],
386 );
387 }
388 else if mid_side {
389 // If the window is non-zeroed, process it with mid-side stereo.
390 process_mid_side(&mut ch0[s2..s3], &mut ch1[s2..s3]);
391 }
392
393 // Decrement the scalefactor (intensity position) index to advance to the next window.
394 sfi -= 1;
395
396 // Repeat the same process for the second window.
397 window_is_zero[1] = window_is_zero[1] && is_zero_band(&ch1[s1..s2]);
398
399 if window_is_zero[1] {
400 process_intensity(
401 is_pos[sfi - 1],
402 is_table,
403 is_inv_pos,
404 mid_side,
405 &mut ch0[s1..s2],
406 &mut ch1[s1..s2],
407 );
408 }
409 else if mid_side {
410 process_mid_side(&mut ch0[s1..s2], &mut ch1[s1..s2]);
411 }
412
413 sfi -= 1;
414
415 // Repeat the same process for the third window.
416 window_is_zero[0] = window_is_zero[0] && is_zero_band(&ch1[s0..s1]);
417
418 if window_is_zero[0] {
419 process_intensity(
420 is_pos[sfi - 1],
421 is_table,
422 is_inv_pos,
423 mid_side,
424 &mut ch0[s0..s1],
425 &mut ch1[s0..s1],
426 );
427 }
428 else if mid_side {
429 process_mid_side(&mut ch0[s0..s1], &mut ch1[s0..s1]);
430 }
431
432 sfi -= 1;
433
434 // Update the intensity bound to the start of the first window since all three windows have
435 // now been processed by either intensity or mid-side stereo. Note that this is the "final"
436 // intensity bound of all the windows in the short bands. Individual windows may have
437 // reached their intensity bound earlier. Those windows are processed with mid-side stereo.
438 bound = s0;
439
440 // Determine if all windows non-zero.
441 found_bound = !window_is_zero[0] && !window_is_zero[1] && !window_is_zero[2];
442
443 // If all windows are non-zero then the all the remaining bands should be processed with
444 // mid-side stereo. Break out early in this case.
445 if found_bound {
446 break;
447 }
448 }
449
450 // If the final intensity bound was not found within the short bands, then it may be found
451 // within the long bands if the short block is mixed.
452 if !found_bound {
453 // If the short block is mixed, the long bands will not be None.
454 if let Some(long_bands) = long_bands {
455 // Process the long bands exactly as if it were a long block.
456 for (&start, &end) in long_bands.iter().zip(&long_bands[1..]).rev() {
457 let is_zero_band = is_zero_band(&ch1[start..end]);
458
459 if is_zero_band {
460 process_intensity(
461 is_pos[sfi - 1],
462 is_table,
463 is_inv_pos,
464 mid_side,
465 &mut ch0[start..end],
466 &mut ch1[start..end],
467 );
468 }
469 else {
470 break;
471 }
472
473 sfi -= 1;
474
475 bound = start;
476 }
477 }
478 }
479
480 // Return the intensity bound.
481 bound
482}
483
484/// Perform joint stereo decoding on the channel pair.
485pub(super) fn stereo(
486 header: &FrameHeader,
487 granule: &mut Granule,
488 ch: &mut [[f32; 576]; 2],
489) -> Result<()> {
490 // Determine whether mid-side, and/or intensity stereo coding is used.
491 let (mid_side, intensity) = match header.channel_mode {
492 ChannelMode::JointStereo(Mode::Layer3 { mid_side, intensity }) => (mid_side, intensity),
493 ChannelMode::JointStereo(Mode::Intensity { .. }) => {
494 // This function only supports decoding Layer 3 stereo encodings, it is a fundamental
495 // error in the decoder logic if layer 1 or 2 stereo encodings are being decoded with
496 // this function.
497 panic!("invalid mode extension for layer 3 stereo decoding")
498 }
499 _ => return Ok(()),
500 };
501
502 // The block types must be the same.
503 if granule.channels[0].block_type != granule.channels[1].block_type {
504 return decode_error("mpa: stereo channel pair block_type mismatch");
505 }
506
507 // Split the sample buffer into two channels.
508 let (ch0, ch1) = {
509 let (ch0, ch1) = ch.split_first_mut().unwrap();
510 (ch0, &mut ch1[0])
511 };
512
513 // Joint stereo processing as specified in layer 3 is a combination of mid-side, and intensity
514 // encoding schemes. Each scale-factor band may use either mid-side, intensity, or no stereo
515 // encoding. The type of encoding used for each scale-factor band is determined by the MPEG
516 // bitstream version, the mode extension, the block type, and the content of the scale-factor
517 // bands.
518 let end = max(granule.channels[0].rzero, granule.channels[1].rzero);
519
520 // Decode intensity stereo coded bands if it is enabled and get the intensity bound.
521 let is_bound = if intensity {
522 // Decode intensity stereo coded bands based on bitstream version and block type.
523 match granule.channels[1].block_type {
524 BlockType::Short { is_mixed } => {
525 process_intensity_short_block(header, granule, is_mixed, mid_side, end, ch0, ch1)
526 }
527 _ => process_intensity_long_block(header, granule, mid_side, end, ch0, ch1),
528 }
529 }
530 // If intensity stereo coding is not enabled, then all samples are processed with mid-side
531 // stereo decoding. In other words, there are no samples encoded with intensity stereo and
532 // therefore the intensity bound is equal to the end of the non-zero portion of the samples.
533 else {
534 end
535 };
536
537 // If mid-side stereo coding is enabled, all samples up to the intensity bound should be
538 // decoded as mid-side stereo.
539 if mid_side && is_bound > 0 {
540 process_mid_side(&mut ch0[0..is_bound], &mut ch1[0..is_bound]);
541 }
542
543 // With joint stereo encoding, there is usually a mismatch between the number of samples
544 // initially read from the bitstream for each channel. This count is stored as the rzero sample
545 // index. However, after joint stereo decoding, both channels will have the same number of
546 // samples. Update rzero for both channels with the actual number of samples.
547 if intensity || mid_side {
548 granule.channels[0].rzero = end;
549 granule.channels[1].rzero = end;
550 }
551
552 Ok(())
553}