symphonia_bundle_mp3/layer3/
hybrid_synthesis.rs

1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8// Justification: Some loops are better expressed without a range loop.
9#![allow(clippy::needless_range_loop)]
10
11use crate::common::FrameHeader;
12
13use super::{common::*, GranuleChannel};
14
15use std::{convert::TryInto, f64};
16
17use lazy_static::lazy_static;
18
19lazy_static! {
20    /// Hybrid synthesesis IMDCT window coefficients for: Long, Start, Short, and End block, in that
21    /// order.
22    ///
23    /// For long blocks:
24    ///
25    /// ```text
26    /// W[ 0..36] = sin(PI/36.0 * (i + 0.5))
27    /// ```
28    ///
29    /// For start blocks:
30    ///
31    /// ```text
32    /// W[ 0..18] = sin(PI/36.0 * (i + 0.5))
33    /// W[18..24] = 1.0
34    /// W[24..30] = sin(PI/12.0 * ((i - 18) - 0.5))
35    /// W[30..36] = 0.0
36    /// ```
37    ///
38    /// For short blocks (to be applied to each 12 sample window):
39    ///
40    /// ```text
41    /// W[ 0..12] = sin(PI/12.0 * (i + 0.5))
42    /// W[12..36] = 0.0
43    /// ```
44    ///
45    /// For end blocks:
46    ///
47    /// ```text
48    /// W[ 0..6 ] = 0.0
49    /// W[ 6..12] = sin(PI/12.0 * ((i - 6) + 0.5))
50    /// W[12..18] = 1.0
51    /// W[18..36] = sin(PI/36.0 * (i + 0.5))
52    /// ```
53    static ref IMDCT_WINDOWS: [[f32; 36]; 4] = {
54        const PI_36: f64 = f64::consts::PI / 36.0;
55        const PI_12: f64 = f64::consts::PI / 12.0;
56
57        let mut windows = [[0f32; 36]; 4];
58
59        // Window for Long blocks.
60        for i in 0..36 {
61            windows[0][i] = (PI_36 * (i as f64 + 0.5)).sin() as f32;
62        }
63
64        // Window for Start blocks (indicies 30..36 implictly 0.0).
65        for i in 0..18 {
66            windows[1][i] = (PI_36 * (i as f64 + 0.5)).sin() as f32;
67        }
68        for i in 18..24 {
69            windows[1][i] = 1.0;
70        }
71        for i in 24..30 {
72            windows[1][i] = (PI_12 * ((i - 18) as f64 + 0.5)).sin() as f32;
73        }
74
75        // Window for Short blocks.
76        for i in 0..12 {
77            windows[2][i] = (PI_12 * (i as f64 + 0.5)).sin() as f32;
78        }
79
80        // Window for End blocks (indicies 0..6 implicitly 0.0).
81        for i in 6..12 {
82            windows[3][i] = (PI_12 * ((i - 6) as f64 + 0.5)).sin() as f32;
83        }
84        for i in 12..18 {
85            windows[3][i] = 1.0;
86        }
87        for i in 18..36 {
88            windows[3][i] = (PI_36 * (i as f64 + 0.5)).sin() as f32;
89        }
90
91        windows
92   };
93}
94
95lazy_static! {
96    /// Lookup table of cosine coefficients for half of a 12-point IMDCT.
97    ///
98    /// This table is derived from the general expression:
99    ///
100    /// ```text
101    /// cos12[i][k] = cos(PI/24.0 * (2*i + 1 + N/2) * (2*k + 1))
102    /// ```
103    /// where:
104    ///     `N=12`, `i=N/4..3N/4`, and `k=0..N/2`.
105    static ref IMDCT_HALF_COS_12: [[f32; 6]; 6] = {
106        const PI_24: f64 = f64::consts::PI / 24.0;
107
108        let mut cos = [[0f32; 6]; 6];
109
110        for (i, cos_i) in cos.iter_mut().enumerate() {
111            for (k, cos_ik) in cos_i.iter_mut().enumerate() {
112                // Only compute the middle half of the cosine lookup table (i offset by 3).
113                let n = (2 * (i + 3) + (12 / 2) + 1) * (2 * k + 1);
114                *cos_ik = (PI_24 * n as f64).cos() as f32;
115            }
116        }
117
118        cos
119    };
120}
121
122lazy_static! {
123    /// Pair of lookup tables, CS and CA, for alias reduction.
124    ///
125    /// As per ISO/IEC 11172-3, CS and CA are calculated as follows:
126    ///
127    /// ```text
128    /// cs[i] =  1.0 / sqrt(1.0 + c[i]^2)
129    /// ca[i] = c[i] / sqrt(1.0 + c[i]^2)
130    /// ```
131    ///
132    /// where:
133    /// ```text
134    /// c[i] = [ -0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037 ]
135    /// ```
136    static ref ANTIALIAS_CS_CA: ([f32; 8], [f32; 8]) = {
137        const C: [f64; 8] = [ -0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037 ];
138
139        let mut cs = [0f32; 8];
140        let mut ca = [0f32; 8];
141
142        for i in 0..8 {
143            let sqrt = f64::sqrt(1.0 + (C[i] * C[i]));
144            cs[i] = (1.0 / sqrt) as f32;
145            ca[i] = (C[i] / sqrt) as f32;
146        }
147
148        (cs, ca)
149    };
150}
151
152/// Reorder samples that are part of short blocks into sub-band order.
153pub(super) fn reorder(header: &FrameHeader, channel: &mut GranuleChannel, buf: &mut [f32; 576]) {
154    // Only short blocks are reordered.
155    if let BlockType::Short { is_mixed } = channel.block_type {
156        // Every short block is split into 3 equally sized windows as illustrated below (e.g. for
157        // a short scale factor band with win_len=4):
158        //
159        //    <- Window #1 ->  <- Window #2 ->  <- Window #3 ->
160        //   [ 0 | 1 | 2 | 3 ][ 4 | 5 | 6 | 7 ][ 8 | 9 | a | b ]
161        //    <-----  3 * Short Scale Factor Band Width  ----->
162        //
163        // Reordering interleaves the samples of each window as follows:
164        //
165        //   [ 0 | 4 | 8 | 1 | 5 | 9 | 2 | 6 | a | 3 | 7 | b ]
166        //    <----  3 * Short Scale Factor Band Width  ---->
167        //
168        // Basically, reordering interleaves the 3 windows the same way that 3 planar audio buffers
169        // would be interleaved.
170        debug_assert!(channel.rzero <= 576);
171
172        // In mixed blocks, only the short bands can be re-ordered. Determine the applicable bands.
173        let bands = if is_mixed {
174            let switch = SFB_MIXED_SWITCH_POINT[header.sample_rate_idx];
175            &SFB_MIXED_BANDS[header.sample_rate_idx][switch..]
176        }
177        else {
178            &SFB_SHORT_BANDS[header.sample_rate_idx]
179        };
180
181        let mut reorder_buf = [0f32; 576];
182
183        let start = bands[0];
184        let mut i = start;
185
186        for (((s0, s1), s2), s3) in
187            bands.iter().zip(&bands[1..]).zip(&bands[2..]).zip(&bands[3..]).step_by(3)
188        {
189            // Do not reorder short blocks that begin after the rzero partition boundary since
190            // they're zeroed.
191            if *s0 >= channel.rzero {
192                break;
193            }
194
195            // The three short sample windows.
196            let win0 = &buf[*s0..*s1];
197            let win1 = &buf[*s1..*s2];
198            let win2 = &buf[*s2..*s3];
199
200            // Interleave the three short sample windows.
201            for ((w0, w1), w2) in win0.iter().zip(win1).zip(win2) {
202                reorder_buf[i + 0] = *w0;
203                reorder_buf[i + 1] = *w1;
204                reorder_buf[i + 2] = *w2;
205                i += 3;
206            }
207        }
208
209        // Copy reordered samples from the reorder buffer to the actual sample buffer.
210        buf[start..i].copy_from_slice(&reorder_buf[start..i]);
211
212        // After reordering, the start of the rzero partition may no longer be valid. Update it.
213        channel.rzero = channel.rzero.max(i);
214    }
215}
216
217/// Applies the anti-aliasing filter to sub-bands that are not part of short blocks.
218pub(super) fn antialias(channel: &mut GranuleChannel, samples: &mut [f32; 576]) {
219    // The maximum number of sub-bands to anti-alias depends on block type.
220    let sb_limit = match channel.block_type {
221        // Short blocks are never anti-aliased.
222        BlockType::Short { is_mixed: false } => return,
223        // Mixed blocks have a long block span the first 36 samples (2 sub-bands). Therefore, only
224        // anti-alias these two sub-bands.
225        BlockType::Short { is_mixed: true } => 2,
226        // All other block types require all 32 sub-bands to be anti-aliased.
227        _ => 32,
228    };
229
230    // Amortize the lazy_static fetch over the entire anti-aliasing operation.
231    let (cs, ca): &([f32; 8], [f32; 8]) = &ANTIALIAS_CS_CA;
232
233    // The sub-band that intersects the start of the rzero partition. All sub-bands after this one
234    // are zeroed and do-not need anti-aliasing.
235    let sb_rzero = channel.rzero / 18;
236
237    // The anti-aliasing filter must be applied up-to the last non-zero sub-band. After
238    // anti-aliasing, the first zeroed sub-band may have non-zero values "smeared" into it.
239    // Therefore, the rzero must be updated.
240    channel.rzero = 18 * sb_limit.min(sb_rzero + 2).min(32);
241
242    // Anti-aliasing is performed using 8 butterfly calculations at the boundaries of ADJACENT
243    // sub-bands. For each calculation, there are two samples: lower and upper. For each iteration,
244    // the lower sample index advances backwards from the boundary, while the upper sample index
245    // advances forward from the boundary.
246    //
247    // For example, let B(li, ui) represent the butterfly calculation where li and ui are the
248    // indicies of the lower and upper samples respectively. If j is the index of the first sample
249    // of a sub-band, then the iterations are as follows:
250    //
251    // B(j-1,j), B(j-2,j+1), B(j-3,j+2), B(j-4,j+3), B(j-5,j+4), B(j-6,j+5), B(j-7,j+6), B(j-8,j+7)
252    //
253    // The butterfly calculation itself can be illustrated as follows:
254    //
255    //              * cs[i]
256    //   l0 -------o------(-)------> l1
257    //               \    /                  l1 = l0 * cs[i] - u0 * ca[i]
258    //                \  / * ca[i]           u1 = u0 * cs[i] + l0 * ca[i]
259    //                 \
260    //               /  \  * ca[i]           where:
261    //             /     \                       cs[i], ca[i] are constant values for iteration i,
262    //   u0 ------o------(+)-------> u1          derived from table B.9 of ISO/IEC 11172-3.
263    //             * cs[i]
264    //
265    // Note that all butterfly calculations only involve two samples, and all iterations are
266    // independant of each other. This lends itself well for SIMD processing.
267    for sb in (18..channel.rzero).step_by(18) {
268        for i in 0..8 {
269            let li = sb - 1 - i;
270            let ui = sb + i;
271            let lower = samples[li];
272            let upper = samples[ui];
273            samples[li] = lower * cs[i] - upper * ca[i];
274            samples[ui] = upper * cs[i] + lower * ca[i];
275        }
276    }
277}
278
279/// Performs hybrid synthesis (IMDCT and windowing).
280pub(super) fn hybrid_synthesis(
281    channel: &GranuleChannel,
282    overlap: &mut [[f32; 18]; 32],
283    samples: &mut [f32; 576],
284) {
285    // The first sub-band after the rzero partition boundary is the sub-band limit. All sub-bands
286    // past this are zeroed.
287    let sb_limit = (channel.rzero + 17) / 18;
288
289    // Determine the split point of long and short blocks in terms of a sub-band index.
290    //
291    // Short blocks process 0 sub-bands as long blocks, mixed blocks process the first 2 sub-bands
292    // as long blocks, and all other block types (long, start, end) process all 32 sub-bands as long
293    // blocks.
294    let sb_split = match channel.block_type {
295        BlockType::Short { is_mixed: false } => 0,
296        BlockType::Short { is_mixed: true } => 2,
297        _ => 32,
298    };
299
300    // If the split point is not 0, then some sub-bands need to be processed as long blocks using
301    // the 36-point IMDCT.
302    if sb_split > 0 {
303        // Select the appropriate window given the block type.
304        let window: &[f32; 36] = match channel.block_type {
305            BlockType::Start => &IMDCT_WINDOWS[1],
306            BlockType::End => &IMDCT_WINDOWS[3],
307            _ => &IMDCT_WINDOWS[0],
308        };
309
310        let sb_long_end = sb_split.min(sb_limit);
311
312        // For each of the sub-bands (18 samples each) in the long block...
313        for sb in 0..sb_long_end {
314            let start = 18 * sb;
315
316            // Casting to a slice of a known-size lets the compiler elide bounds checks.
317            let sub_band: &mut [f32; 18] = (&mut samples[start..(start + 18)]).try_into().unwrap();
318
319            // Perform the 36-point on the entire sub-band.
320            imdct36::imdct36(sub_band, window, &mut overlap[sb]);
321        }
322    }
323
324    // If the split point is less-than 32, then some sub-bands need to be processed as short blocks
325    // using the 12-point IMDCT on each of the three windows.
326    if sb_split < 32 {
327        // Select the short block window.
328        let window: &[f32; 36] = &IMDCT_WINDOWS[2];
329
330        let sb_short_begin = sb_split.min(sb_limit);
331
332        // For each of the sub-bands (18 samples each) in the short block...
333        for sb in sb_short_begin..sb_limit {
334            let start = 18 * sb;
335
336            // Casting to a slice of a known-size lets the compiler elide bounds checks.
337            let sub_band: &mut [f32; 18] = (&mut samples[start..(start + 18)]).try_into().unwrap();
338
339            // Perform the 12-point IMDCT on each of the 3 short windows within the sub-band (6
340            // samples each).
341            imdct12_win(sub_band, window, &mut overlap[sb]);
342        }
343    }
344
345    // Every sub-band after the the sub-band limit are zeroed, however, the overlap for that
346    // sub-band may be non-zero. Therefore, copy it over.
347    for sb in sb_limit..32 {
348        let start = 18 * sb;
349        let sub_band: &mut [f32; 18] = (&mut samples[start..(start + 18)]).try_into().unwrap();
350
351        sub_band.copy_from_slice(&overlap[sb]);
352        overlap[sb].fill(0.0);
353    }
354}
355
356/// Performs the 12-point IMDCT, and windowing for each of the 3 short windows of a short block, and
357/// then overlap-adds the result.
358fn imdct12_win(x: &mut [f32; 18], window: &[f32; 36], overlap: &mut [f32; 18]) {
359    let cos12: &[[f32; 6]; 6] = &IMDCT_HALF_COS_12;
360
361    let mut tmp = [0.0; 36];
362
363    for w in 0..3 {
364        for i in 0..3 {
365            // Compute the 12-point IMDCT for each of the 3 short windows using a half-size IMDCT
366            // followed by post-processing.
367            //
368            // In general, the IMDCT is defined as:
369            //
370            //        (N/2)-1
371            // y[i] =   SUM   { x[k] * cos(PI/2N * (2i + 1 + N/2) * (2k + 1)) }
372            //          k=0
373            //
374            // For N=12, the IMDCT becomes:
375            //
376            //         5
377            // y[i] = SUM { x[k] * cos(PI/24 * (2i + 7) * (2k + 1)) }
378            //        k=0
379            //
380            // The cosine twiddle factors are easily indexable by i and k, and are therefore
381            // pre-computed and placed into a look-up table.
382            //
383            // Further, y[3..0] = -y[3..6], and y[12..9] = y[6..9] which reduces the amount of work
384            // by half.
385            //
386            // Therefore, it is possible to split the half-size IMDCT computation into two halves.
387            // In the calculations below, yl is the left-half output of the half-size IMDCT, and yr
388            // is the right-half.
389
390            let yl = (x[w] * cos12[i][0])
391                + (x[3 * 1 + w] * cos12[i][1])
392                + (x[3 * 2 + w] * cos12[i][2])
393                + (x[3 * 3 + w] * cos12[i][3])
394                + (x[3 * 4 + w] * cos12[i][4])
395                + (x[3 * 5 + w] * cos12[i][5]);
396
397            let yr = (x[w] * cos12[i + 3][0])
398                + (x[3 * 1 + w] * cos12[i + 3][1])
399                + (x[3 * 2 + w] * cos12[i + 3][2])
400                + (x[3 * 3 + w] * cos12[i + 3][3])
401                + (x[3 * 4 + w] * cos12[i + 3][4])
402                + (x[3 * 5 + w] * cos12[i + 3][5]);
403
404            // Each adjacent 12-point IMDCT windows are overlapped and added in the output, with the
405            // first and last 6 samples of the output always being 0.
406            //
407            // Each sample from the 12-point IMDCT is multiplied by the appropriate window function
408            // as specified in ISO/IEC 11172-3. The values of the window function are pre-computed
409            // and given by window[0..12].
410            //
411            // Since there are 3 IMDCT windows (indexed by w), y[0..12] is computed 3 times.
412            // For the purpose of the diagram below, we label these IMDCT windows as: y0[0..12],
413            // y1[0..12], and y2[0..12], for IMDCT windows 0..3 respectively.
414            //
415            // Therefore, the overlap-and-add operation can be visualized as below:
416            //
417            // 0             6           12           18           24           30            36
418            // +-------------+------------+------------+------------+------------+-------------+
419            // |      0      |  y0[..6]   |  y0[..6]   |  y1[6..]   |  y2[6..]   |      0      |
420            // |     (6)     |            |  + y1[6..] |  + y2[..6] |            |     (6)     |
421            // +-------------+------------+------------+------------+------------+-------------+
422            // .             .            .            .            .            .             .
423            // .             +-------------------------+            .            .             .
424            // .             |      IMDCT #1 (y0)      |            .            .             .
425            // .             +-------------------------+            .            .             .
426            // .             .            +-------------------------+            .             .
427            // .             .            |      IMDCT #2 (y1)      |            .             .
428            // .             .            +-------------------------+            .             .
429            // .             .            .            +-------------------------+             .
430            // .             .            .            |      IMDCT #3 (y2)      |             .
431            // .             .            .            +-------------------------+             .
432            // .             .            .            .            .            .             .
433            //
434            // Since the 12-point IMDCT was decomposed into a half-size IMDCT and post-processing
435            // operations, and further split into left and right halves, each iteration of this loop
436            // produces 4 output samples.
437
438            tmp[6 + 6 * w + 3 - i - 1] += -yl * window[3 - i - 1];
439            tmp[6 + 6 * w + i + 3] += yl * window[i + 3];
440            tmp[6 + 6 * w + i + 6] += yr * window[i + 6];
441            tmp[6 + 6 * w + 12 - i - 1] += yr * window[12 - i - 1];
442        }
443    }
444
445    // Overlap-add.
446    for i in 0..18 {
447        x[i] = tmp[i] + overlap[i];
448        overlap[i] = tmp[i + 18];
449    }
450}
451
452/// Inverts odd samples in odd sub-bands.
453pub fn frequency_inversion(samples: &mut [f32; 576]) {
454    // There are 32 sub-bands spanning 576 samples:
455    //
456    //        0    18    36    54    72    90   108       558    576
457    //        +-----+-----+-----+-----+-----+-----+ . . . . +------+
458    // s[i] = | sb0 | sb1 | sb2 | sb3 | sb4 | sb5 | . . . . | sb31 |
459    //        +-----+-----+-----+-----+-----+-----+ . . . . +------+
460    //
461    // The odd sub-bands are thusly:
462    //
463    //      sb1  -> s[ 18.. 36]
464    //      sb3  -> s[ 54.. 72]
465    //      sb5  -> s[ 90..108]
466    //      ...
467    //      sb31 -> s[558..576]
468    //
469    // Each odd sample in the aforementioned sub-bands must be negated.
470    for i in (18..576).step_by(36) {
471        // Sample negation is unrolled into a 2x4 + 1 (9) operation to improve vectorization.
472        for j in (i..i + 16).step_by(8) {
473            samples[j + 1] = -samples[j + 1];
474            samples[j + 3] = -samples[j + 3];
475            samples[j + 5] = -samples[j + 5];
476            samples[j + 7] = -samples[j + 7];
477        }
478        samples[i + 18 - 1] = -samples[i + 18 - 1];
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::imdct12_win;
485    use super::IMDCT_WINDOWS;
486    use std::f64;
487
488    fn imdct12_analytical(x: &[f32; 6]) -> [f32; 12] {
489        const PI_24: f64 = f64::consts::PI / 24.0;
490
491        let mut result = [0f32; 12];
492
493        for i in 0..12 {
494            let mut sum = 0.0;
495            for k in 0..6 {
496                sum +=
497                    (x[k] as f64) * (PI_24 * ((2 * i + (12 / 2) + 1) * (2 * k + 1)) as f64).cos();
498            }
499            result[i] = sum as f32;
500        }
501
502        result
503    }
504
505    #[test]
506    fn verify_imdct12_win() {
507        const TEST_VECTOR: [f32; 18] = [
508            0.0976, 0.9321, 0.6138, 0.0857, 0.0433, 0.4855, 0.2144, 0.8488, //
509            0.6889, 0.2983, 0.1957, 0.7037, 0.0052, 0.0197, 0.3188, 0.5123, //
510            0.2994, 0.7157,
511        ];
512
513        let window = &IMDCT_WINDOWS[2];
514
515        let mut actual = TEST_VECTOR;
516        let mut overlap = [0.0; 18];
517        imdct12_win(&mut actual, window, &mut overlap);
518
519        // The following block performs 3 analytical 12-point IMDCTs over the test vector, and then
520        // windows and overlaps the results to generate the final result.
521        let expected = {
522            let mut expected = [0f32; 36];
523
524            let mut x0 = [0f32; 6];
525            let mut x1 = [0f32; 6];
526            let mut x2 = [0f32; 6];
527
528            for i in 0..6 {
529                x0[i] = TEST_VECTOR[3 * i + 0];
530                x1[i] = TEST_VECTOR[3 * i + 1];
531                x2[i] = TEST_VECTOR[3 * i + 2];
532            }
533
534            let imdct0 = imdct12_analytical(&x0);
535            let imdct1 = imdct12_analytical(&x1);
536            let imdct2 = imdct12_analytical(&x2);
537
538            for i in 0..12 {
539                expected[6 + i] += imdct0[i] * window[i];
540                expected[12 + i] += imdct1[i] * window[i];
541                expected[18 + i] += imdct2[i] * window[i];
542            }
543
544            expected
545        };
546
547        for i in 0..18 {
548            assert!((expected[i] - actual[i]).abs() < 0.00001);
549            assert!((expected[i + 18] - overlap[i]).abs() < 0.00001);
550        }
551    }
552}
553
554mod imdct36 {
555    /// Performs an Inverse Modified Discrete Cosine Transform (IMDCT) transforming 18
556    /// frequency-domain input samples, into 36 time-domain output samples.
557    ///
558    /// This is a straight-forward implementation of the IMDCT using Szu-Wei Lee's algorithm
559    /// published in article [1].
560    ///
561    /// [1] Szu-Wei Lee, "Improved algorithm for efficient computation of the forward and backward
562    /// MDCT in MPEG audio coder", IEEE Transactions on Circuits and Systems II: Analog and Digital
563    /// Signal Processing, vol. 48, no. 10, pp. 990-994, 2001.
564    ///
565    /// https://ieeexplore.ieee.org/document/974789
566    pub fn imdct36(x: &mut [f32; 18], window: &[f32; 36], overlap: &mut [f32; 18]) {
567        let mut dct = [0f32; 18];
568
569        dct_iv(x, &mut dct);
570
571        // Mapping of DCT-IV to IMDCT
572        //
573        //  0            9                       27           36
574        //  +------------+------------------------+------------+
575        //  | dct[9..18] | -dct[0..18].rev()      | -dct[0..9] |
576        //  +------------+------------------------+------------+
577        //
578        // where dct[] is the DCT-IV of x.
579
580        // First 9 IMDCT values are values 9..18 in the DCT-IV.
581        for i in 0..9 {
582            x[i] = overlap[i] + dct[9 + i] * window[i];
583        }
584
585        // Next 18 IMDCT values are negated and /reversed/ values 0..18 in the DCT-IV.
586        for i in 9..18 {
587            x[i] = overlap[i] - dct[27 - i - 1] * window[i];
588        }
589
590        for i in 18..27 {
591            overlap[i - 18] = -dct[27 - i - 1] * window[i];
592        }
593
594        // Last 9 IMDCT values are negated values 0..9 in the DCT-IV.
595        for i in 27..36 {
596            overlap[i - 18] = -dct[i - 27] * window[i];
597        }
598    }
599
600    /// Continutation of `imdct36`.
601    ///
602    /// Step 2: Mapping N/2-point DCT-IV to N/2-point SDCT-II.
603    fn dct_iv(x: &[f32; 18], y: &mut [f32; 18]) {
604        // Scale factors for input samples. Computed from (16).
605        // 2 * cos(PI * (2*m + 1) / (2*36)
606        const SCALE: [f32; 18] = [
607            1.998_096_443_163_715_6, // m=0
608            1.982_889_722_747_620_8, // m=1
609            1.952_592_014_239_866_7, // m=2
610            1.907_433_901_496_453_9, // m=3
611            1.847_759_065_022_573_5, // m=4
612            1.774_021_666_356_443_4, // m=5
613            1.686_782_891_625_771_4, // m=6
614            1.586_706_680_582_470_6, // m=7
615            1.474_554_673_620_247_9, // m=8
616            1.351_180_415_231_320_7, // m=9
617            1.217_522_858_017_441_3, // m=10
618            1.074_599_216_693_647_8, // m=11
619            0.923_497_226_470_067_7, // m=12
620            0.765_366_864_730_179_7, // m=13
621            0.601_411_599_008_546_1, // m=14
622            0.432_879_227_876_205_8, // m=15
623            0.261_052_384_440_103_0, // m=16
624            0.087_238_774_730_672_0, // m=17
625        ];
626
627        let samples = [
628            SCALE[0] * x[0],
629            SCALE[1] * x[1],
630            SCALE[2] * x[2],
631            SCALE[3] * x[3],
632            SCALE[4] * x[4],
633            SCALE[5] * x[5],
634            SCALE[6] * x[6],
635            SCALE[7] * x[7],
636            SCALE[8] * x[8],
637            SCALE[9] * x[9],
638            SCALE[10] * x[10],
639            SCALE[11] * x[11],
640            SCALE[12] * x[12],
641            SCALE[13] * x[13],
642            SCALE[14] * x[14],
643            SCALE[15] * x[15],
644            SCALE[16] * x[16],
645            SCALE[17] * x[17],
646        ];
647
648        sdct_ii_18(&samples, y);
649
650        y[0] /= 2.0;
651        for i in 1..17 {
652            y[i] = (y[i] / 2.0) - y[i - 1];
653        }
654        y[17] = (y[17] / 2.0) - y[16];
655    }
656
657    /// Continutation of `imdct36`.
658    ///
659    /// Step 3: Decompose N/2-point SDCT-II into two N/4-point SDCT-IIs.
660    fn sdct_ii_18(x: &[f32; 18], y: &mut [f32; 18]) {
661        // Scale factors for odd input samples. Computed from (23).
662        // 2 * cos(PI * (2*m + 1) / 36)
663        const SCALE: [f32; 9] = [
664            1.992_389_396_183_491_1,  // m=0
665            1.931_851_652_578_136_6,  // m=1
666            1.812_615_574_073_299_9,  // m=2
667            1.638_304_088_577_983_6,  // m=3
668            std::f32::consts::SQRT_2, // m=4
669            1.147_152_872_702_092_3,  // m=5
670            0.845_236_523_481_398_9,  // m=6
671            0.517_638_090_205_041_9,  // m=7
672            0.174_311_485_495_316_3,  // m=8
673        ];
674
675        let even = [
676            x[0] + x[18 - 1],
677            x[1] + x[18 - 2],
678            x[2] + x[18 - 3],
679            x[3] + x[18 - 4],
680            x[4] + x[18 - 5],
681            x[5] + x[18 - 6],
682            x[6] + x[18 - 7],
683            x[7] + x[18 - 8],
684            x[8] + x[18 - 9],
685        ];
686
687        sdct_ii_9(&even, y);
688
689        let odd = [
690            SCALE[0] * (x[0] - x[18 - 1]),
691            SCALE[1] * (x[1] - x[18 - 2]),
692            SCALE[2] * (x[2] - x[18 - 3]),
693            SCALE[3] * (x[3] - x[18 - 4]),
694            SCALE[4] * (x[4] - x[18 - 5]),
695            SCALE[5] * (x[5] - x[18 - 6]),
696            SCALE[6] * (x[6] - x[18 - 7]),
697            SCALE[7] * (x[7] - x[18 - 8]),
698            SCALE[8] * (x[8] - x[18 - 9]),
699        ];
700
701        sdct_ii_9(&odd, &mut y[1..]);
702
703        y[3] -= y[3 - 2];
704        y[5] -= y[5 - 2];
705        y[7] -= y[7 - 2];
706        y[9] -= y[9 - 2];
707        y[11] -= y[11 - 2];
708        y[13] -= y[13 - 2];
709        y[15] -= y[15 - 2];
710        y[17] -= y[17 - 2];
711    }
712
713    /// Continutation of `imdct36`.
714    ///
715    /// Step 4: Computation of 9-point (N/4) SDCT-II.
716    fn sdct_ii_9(x: &[f32; 9], y: &mut [f32]) {
717        const D: [f32; 7] = [
718            -1.732_050_807_568_877_2, // -sqrt(3.0)
719            1.879_385_241_571_816_6,  // -2.0 * cos(8.0 * PI / 9.0)
720            -0.347_296_355_333_860_8, // -2.0 * cos(4.0 * PI / 9.0)
721            -1.532_088_886_237_956_0, // -2.0 * cos(2.0 * PI / 9.0)
722            -0.684_040_286_651_337_8, // -2.0 * sin(8.0 * PI / 9.0)
723            -1.969_615_506_024_416_0, // -2.0 * sin(4.0 * PI / 9.0)
724            -1.285_575_219_373_078_5, // -2.0 * sin(2.0 * PI / 9.0)
725        ];
726
727        let a01 = x[3] + x[5];
728        let a02 = x[3] - x[5];
729        let a03 = x[6] + x[2];
730        let a04 = x[6] - x[2];
731        let a05 = x[1] + x[7];
732        let a06 = x[1] - x[7];
733        let a07 = x[8] + x[0];
734        let a08 = x[8] - x[0];
735
736        let a09 = x[4] + a05;
737        let a10 = a01 + a03;
738        let a11 = a10 + a07;
739        let a12 = a03 - a07;
740        let a13 = a01 - a07;
741        let a14 = a01 - a03;
742        let a15 = a02 - a04;
743        let a16 = a15 + a08;
744        let a17 = a04 + a08;
745        let a18 = a02 - a08;
746        let a19 = a02 + a04;
747        let a20 = 2.0 * x[4] - a05;
748
749        let m1 = D[0] * a06;
750        let m2 = D[1] * a12;
751        let m3 = D[2] * a13;
752        let m4 = D[3] * a14;
753        let m5 = D[0] * a16;
754        let m6 = D[4] * a17;
755        let m7 = D[5] * a18; // Note: the cited paper has an error, a1 should be a18.
756        let m8 = D[6] * a19;
757
758        let a21 = a20 + m2;
759        let a22 = a20 - m2;
760        let a23 = a20 + m3;
761        let a24 = m1 + m6;
762        let a25 = m1 - m6;
763        let a26 = m1 + m7;
764
765        y[0] = a09 + a11;
766        y[2] = m8 - a26;
767        y[4] = m4 - a21;
768        y[6] = m5;
769        y[8] = a22 - m3;
770        y[10] = a25 - m7;
771        y[12] = a11 - 2.0 * a09;
772        y[14] = a24 + m8;
773        y[16] = a23 + m4;
774    }
775
776    #[cfg(test)]
777    mod tests {
778        use super::imdct36;
779        use std::f64;
780
781        fn imdct36_analytical(x: &[f32; 18]) -> [f32; 36] {
782            let mut result = [0f32; 36];
783
784            const PI_72: f64 = f64::consts::PI / 72.0;
785
786            for i in 0..36 {
787                let mut sum = 0.0;
788                for j in 0..18 {
789                    sum +=
790                        (x[j] as f64) * (PI_72 * (((2 * i) + 1 + 18) * ((2 * j) + 1)) as f64).cos();
791                }
792                result[i] = sum as f32;
793            }
794            result
795        }
796
797        #[test]
798        fn verify_imdct36() {
799            const TEST_VECTOR: [f32; 18] = [
800                0.0976, 0.9321, 0.6138, 0.0857, 0.0433, 0.4855, 0.2144, 0.8488, //
801                0.6889, 0.2983, 0.1957, 0.7037, 0.0052, 0.0197, 0.3188, 0.5123, //
802                0.2994, 0.7157,
803            ];
804
805            const WINDOW: [f32; 36] = [1.0; 36];
806
807            let mut actual = TEST_VECTOR;
808            let mut overlap = [0.0; 18];
809            imdct36(&mut actual, &WINDOW, &mut overlap);
810
811            let expected = imdct36_analytical(&TEST_VECTOR);
812
813            for i in 0..18 {
814                assert!((expected[i] - actual[i]).abs() < 0.00001);
815                assert!((expected[i + 18] - overlap[i]).abs() < 0.00001);
816            }
817        }
818    }
819}