symphonia_bundle_mp3/layer3/hybrid_synthesis.rs
1// Symphonia
2// Copyright (c) 2019-2022 The Project Symphonia Developers.
3//
4// This Source Code Form is subject to the terms of the Mozilla Public
5// License, v. 2.0. If a copy of the MPL was not distributed with this
6// file, You can obtain one at https://mozilla.org/MPL/2.0/.
7
8// Justification: Some loops are better expressed without a range loop.
9#![allow(clippy::needless_range_loop)]
10
11use crate::common::FrameHeader;
12
13use super::{common::*, GranuleChannel};
14
15use std::{convert::TryInto, f64};
16
17use lazy_static::lazy_static;
18
19lazy_static! {
20 /// Hybrid synthesesis IMDCT window coefficients for: Long, Start, Short, and End block, in that
21 /// order.
22 ///
23 /// For long blocks:
24 ///
25 /// ```text
26 /// W[ 0..36] = sin(PI/36.0 * (i + 0.5))
27 /// ```
28 ///
29 /// For start blocks:
30 ///
31 /// ```text
32 /// W[ 0..18] = sin(PI/36.0 * (i + 0.5))
33 /// W[18..24] = 1.0
34 /// W[24..30] = sin(PI/12.0 * ((i - 18) - 0.5))
35 /// W[30..36] = 0.0
36 /// ```
37 ///
38 /// For short blocks (to be applied to each 12 sample window):
39 ///
40 /// ```text
41 /// W[ 0..12] = sin(PI/12.0 * (i + 0.5))
42 /// W[12..36] = 0.0
43 /// ```
44 ///
45 /// For end blocks:
46 ///
47 /// ```text
48 /// W[ 0..6 ] = 0.0
49 /// W[ 6..12] = sin(PI/12.0 * ((i - 6) + 0.5))
50 /// W[12..18] = 1.0
51 /// W[18..36] = sin(PI/36.0 * (i + 0.5))
52 /// ```
53 static ref IMDCT_WINDOWS: [[f32; 36]; 4] = {
54 const PI_36: f64 = f64::consts::PI / 36.0;
55 const PI_12: f64 = f64::consts::PI / 12.0;
56
57 let mut windows = [[0f32; 36]; 4];
58
59 // Window for Long blocks.
60 for i in 0..36 {
61 windows[0][i] = (PI_36 * (i as f64 + 0.5)).sin() as f32;
62 }
63
64 // Window for Start blocks (indicies 30..36 implictly 0.0).
65 for i in 0..18 {
66 windows[1][i] = (PI_36 * (i as f64 + 0.5)).sin() as f32;
67 }
68 for i in 18..24 {
69 windows[1][i] = 1.0;
70 }
71 for i in 24..30 {
72 windows[1][i] = (PI_12 * ((i - 18) as f64 + 0.5)).sin() as f32;
73 }
74
75 // Window for Short blocks.
76 for i in 0..12 {
77 windows[2][i] = (PI_12 * (i as f64 + 0.5)).sin() as f32;
78 }
79
80 // Window for End blocks (indicies 0..6 implicitly 0.0).
81 for i in 6..12 {
82 windows[3][i] = (PI_12 * ((i - 6) as f64 + 0.5)).sin() as f32;
83 }
84 for i in 12..18 {
85 windows[3][i] = 1.0;
86 }
87 for i in 18..36 {
88 windows[3][i] = (PI_36 * (i as f64 + 0.5)).sin() as f32;
89 }
90
91 windows
92 };
93}
94
95lazy_static! {
96 /// Lookup table of cosine coefficients for half of a 12-point IMDCT.
97 ///
98 /// This table is derived from the general expression:
99 ///
100 /// ```text
101 /// cos12[i][k] = cos(PI/24.0 * (2*i + 1 + N/2) * (2*k + 1))
102 /// ```
103 /// where:
104 /// `N=12`, `i=N/4..3N/4`, and `k=0..N/2`.
105 static ref IMDCT_HALF_COS_12: [[f32; 6]; 6] = {
106 const PI_24: f64 = f64::consts::PI / 24.0;
107
108 let mut cos = [[0f32; 6]; 6];
109
110 for (i, cos_i) in cos.iter_mut().enumerate() {
111 for (k, cos_ik) in cos_i.iter_mut().enumerate() {
112 // Only compute the middle half of the cosine lookup table (i offset by 3).
113 let n = (2 * (i + 3) + (12 / 2) + 1) * (2 * k + 1);
114 *cos_ik = (PI_24 * n as f64).cos() as f32;
115 }
116 }
117
118 cos
119 };
120}
121
122lazy_static! {
123 /// Pair of lookup tables, CS and CA, for alias reduction.
124 ///
125 /// As per ISO/IEC 11172-3, CS and CA are calculated as follows:
126 ///
127 /// ```text
128 /// cs[i] = 1.0 / sqrt(1.0 + c[i]^2)
129 /// ca[i] = c[i] / sqrt(1.0 + c[i]^2)
130 /// ```
131 ///
132 /// where:
133 /// ```text
134 /// c[i] = [ -0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037 ]
135 /// ```
136 static ref ANTIALIAS_CS_CA: ([f32; 8], [f32; 8]) = {
137 const C: [f64; 8] = [ -0.6, -0.535, -0.33, -0.185, -0.095, -0.041, -0.0142, -0.0037 ];
138
139 let mut cs = [0f32; 8];
140 let mut ca = [0f32; 8];
141
142 for i in 0..8 {
143 let sqrt = f64::sqrt(1.0 + (C[i] * C[i]));
144 cs[i] = (1.0 / sqrt) as f32;
145 ca[i] = (C[i] / sqrt) as f32;
146 }
147
148 (cs, ca)
149 };
150}
151
152/// Reorder samples that are part of short blocks into sub-band order.
153pub(super) fn reorder(header: &FrameHeader, channel: &mut GranuleChannel, buf: &mut [f32; 576]) {
154 // Only short blocks are reordered.
155 if let BlockType::Short { is_mixed } = channel.block_type {
156 // Every short block is split into 3 equally sized windows as illustrated below (e.g. for
157 // a short scale factor band with win_len=4):
158 //
159 // <- Window #1 -> <- Window #2 -> <- Window #3 ->
160 // [ 0 | 1 | 2 | 3 ][ 4 | 5 | 6 | 7 ][ 8 | 9 | a | b ]
161 // <----- 3 * Short Scale Factor Band Width ----->
162 //
163 // Reordering interleaves the samples of each window as follows:
164 //
165 // [ 0 | 4 | 8 | 1 | 5 | 9 | 2 | 6 | a | 3 | 7 | b ]
166 // <---- 3 * Short Scale Factor Band Width ---->
167 //
168 // Basically, reordering interleaves the 3 windows the same way that 3 planar audio buffers
169 // would be interleaved.
170 debug_assert!(channel.rzero <= 576);
171
172 // In mixed blocks, only the short bands can be re-ordered. Determine the applicable bands.
173 let bands = if is_mixed {
174 let switch = SFB_MIXED_SWITCH_POINT[header.sample_rate_idx];
175 &SFB_MIXED_BANDS[header.sample_rate_idx][switch..]
176 }
177 else {
178 &SFB_SHORT_BANDS[header.sample_rate_idx]
179 };
180
181 let mut reorder_buf = [0f32; 576];
182
183 let start = bands[0];
184 let mut i = start;
185
186 for (((s0, s1), s2), s3) in
187 bands.iter().zip(&bands[1..]).zip(&bands[2..]).zip(&bands[3..]).step_by(3)
188 {
189 // Do not reorder short blocks that begin after the rzero partition boundary since
190 // they're zeroed.
191 if *s0 >= channel.rzero {
192 break;
193 }
194
195 // The three short sample windows.
196 let win0 = &buf[*s0..*s1];
197 let win1 = &buf[*s1..*s2];
198 let win2 = &buf[*s2..*s3];
199
200 // Interleave the three short sample windows.
201 for ((w0, w1), w2) in win0.iter().zip(win1).zip(win2) {
202 reorder_buf[i + 0] = *w0;
203 reorder_buf[i + 1] = *w1;
204 reorder_buf[i + 2] = *w2;
205 i += 3;
206 }
207 }
208
209 // Copy reordered samples from the reorder buffer to the actual sample buffer.
210 buf[start..i].copy_from_slice(&reorder_buf[start..i]);
211
212 // After reordering, the start of the rzero partition may no longer be valid. Update it.
213 channel.rzero = channel.rzero.max(i);
214 }
215}
216
217/// Applies the anti-aliasing filter to sub-bands that are not part of short blocks.
218pub(super) fn antialias(channel: &mut GranuleChannel, samples: &mut [f32; 576]) {
219 // The maximum number of sub-bands to anti-alias depends on block type.
220 let sb_limit = match channel.block_type {
221 // Short blocks are never anti-aliased.
222 BlockType::Short { is_mixed: false } => return,
223 // Mixed blocks have a long block span the first 36 samples (2 sub-bands). Therefore, only
224 // anti-alias these two sub-bands.
225 BlockType::Short { is_mixed: true } => 2,
226 // All other block types require all 32 sub-bands to be anti-aliased.
227 _ => 32,
228 };
229
230 // Amortize the lazy_static fetch over the entire anti-aliasing operation.
231 let (cs, ca): &([f32; 8], [f32; 8]) = &ANTIALIAS_CS_CA;
232
233 // The sub-band that intersects the start of the rzero partition. All sub-bands after this one
234 // are zeroed and do-not need anti-aliasing.
235 let sb_rzero = channel.rzero / 18;
236
237 // The anti-aliasing filter must be applied up-to the last non-zero sub-band. After
238 // anti-aliasing, the first zeroed sub-band may have non-zero values "smeared" into it.
239 // Therefore, the rzero must be updated.
240 channel.rzero = 18 * sb_limit.min(sb_rzero + 2).min(32);
241
242 // Anti-aliasing is performed using 8 butterfly calculations at the boundaries of ADJACENT
243 // sub-bands. For each calculation, there are two samples: lower and upper. For each iteration,
244 // the lower sample index advances backwards from the boundary, while the upper sample index
245 // advances forward from the boundary.
246 //
247 // For example, let B(li, ui) represent the butterfly calculation where li and ui are the
248 // indicies of the lower and upper samples respectively. If j is the index of the first sample
249 // of a sub-band, then the iterations are as follows:
250 //
251 // B(j-1,j), B(j-2,j+1), B(j-3,j+2), B(j-4,j+3), B(j-5,j+4), B(j-6,j+5), B(j-7,j+6), B(j-8,j+7)
252 //
253 // The butterfly calculation itself can be illustrated as follows:
254 //
255 // * cs[i]
256 // l0 -------o------(-)------> l1
257 // \ / l1 = l0 * cs[i] - u0 * ca[i]
258 // \ / * ca[i] u1 = u0 * cs[i] + l0 * ca[i]
259 // \
260 // / \ * ca[i] where:
261 // / \ cs[i], ca[i] are constant values for iteration i,
262 // u0 ------o------(+)-------> u1 derived from table B.9 of ISO/IEC 11172-3.
263 // * cs[i]
264 //
265 // Note that all butterfly calculations only involve two samples, and all iterations are
266 // independant of each other. This lends itself well for SIMD processing.
267 for sb in (18..channel.rzero).step_by(18) {
268 for i in 0..8 {
269 let li = sb - 1 - i;
270 let ui = sb + i;
271 let lower = samples[li];
272 let upper = samples[ui];
273 samples[li] = lower * cs[i] - upper * ca[i];
274 samples[ui] = upper * cs[i] + lower * ca[i];
275 }
276 }
277}
278
279/// Performs hybrid synthesis (IMDCT and windowing).
280pub(super) fn hybrid_synthesis(
281 channel: &GranuleChannel,
282 overlap: &mut [[f32; 18]; 32],
283 samples: &mut [f32; 576],
284) {
285 // The first sub-band after the rzero partition boundary is the sub-band limit. All sub-bands
286 // past this are zeroed.
287 let sb_limit = (channel.rzero + 17) / 18;
288
289 // Determine the split point of long and short blocks in terms of a sub-band index.
290 //
291 // Short blocks process 0 sub-bands as long blocks, mixed blocks process the first 2 sub-bands
292 // as long blocks, and all other block types (long, start, end) process all 32 sub-bands as long
293 // blocks.
294 let sb_split = match channel.block_type {
295 BlockType::Short { is_mixed: false } => 0,
296 BlockType::Short { is_mixed: true } => 2,
297 _ => 32,
298 };
299
300 // If the split point is not 0, then some sub-bands need to be processed as long blocks using
301 // the 36-point IMDCT.
302 if sb_split > 0 {
303 // Select the appropriate window given the block type.
304 let window: &[f32; 36] = match channel.block_type {
305 BlockType::Start => &IMDCT_WINDOWS[1],
306 BlockType::End => &IMDCT_WINDOWS[3],
307 _ => &IMDCT_WINDOWS[0],
308 };
309
310 let sb_long_end = sb_split.min(sb_limit);
311
312 // For each of the sub-bands (18 samples each) in the long block...
313 for sb in 0..sb_long_end {
314 let start = 18 * sb;
315
316 // Casting to a slice of a known-size lets the compiler elide bounds checks.
317 let sub_band: &mut [f32; 18] = (&mut samples[start..(start + 18)]).try_into().unwrap();
318
319 // Perform the 36-point on the entire sub-band.
320 imdct36::imdct36(sub_band, window, &mut overlap[sb]);
321 }
322 }
323
324 // If the split point is less-than 32, then some sub-bands need to be processed as short blocks
325 // using the 12-point IMDCT on each of the three windows.
326 if sb_split < 32 {
327 // Select the short block window.
328 let window: &[f32; 36] = &IMDCT_WINDOWS[2];
329
330 let sb_short_begin = sb_split.min(sb_limit);
331
332 // For each of the sub-bands (18 samples each) in the short block...
333 for sb in sb_short_begin..sb_limit {
334 let start = 18 * sb;
335
336 // Casting to a slice of a known-size lets the compiler elide bounds checks.
337 let sub_band: &mut [f32; 18] = (&mut samples[start..(start + 18)]).try_into().unwrap();
338
339 // Perform the 12-point IMDCT on each of the 3 short windows within the sub-band (6
340 // samples each).
341 imdct12_win(sub_band, window, &mut overlap[sb]);
342 }
343 }
344
345 // Every sub-band after the the sub-band limit are zeroed, however, the overlap for that
346 // sub-band may be non-zero. Therefore, copy it over.
347 for sb in sb_limit..32 {
348 let start = 18 * sb;
349 let sub_band: &mut [f32; 18] = (&mut samples[start..(start + 18)]).try_into().unwrap();
350
351 sub_band.copy_from_slice(&overlap[sb]);
352 overlap[sb].fill(0.0);
353 }
354}
355
356/// Performs the 12-point IMDCT, and windowing for each of the 3 short windows of a short block, and
357/// then overlap-adds the result.
358fn imdct12_win(x: &mut [f32; 18], window: &[f32; 36], overlap: &mut [f32; 18]) {
359 let cos12: &[[f32; 6]; 6] = &IMDCT_HALF_COS_12;
360
361 let mut tmp = [0.0; 36];
362
363 for w in 0..3 {
364 for i in 0..3 {
365 // Compute the 12-point IMDCT for each of the 3 short windows using a half-size IMDCT
366 // followed by post-processing.
367 //
368 // In general, the IMDCT is defined as:
369 //
370 // (N/2)-1
371 // y[i] = SUM { x[k] * cos(PI/2N * (2i + 1 + N/2) * (2k + 1)) }
372 // k=0
373 //
374 // For N=12, the IMDCT becomes:
375 //
376 // 5
377 // y[i] = SUM { x[k] * cos(PI/24 * (2i + 7) * (2k + 1)) }
378 // k=0
379 //
380 // The cosine twiddle factors are easily indexable by i and k, and are therefore
381 // pre-computed and placed into a look-up table.
382 //
383 // Further, y[3..0] = -y[3..6], and y[12..9] = y[6..9] which reduces the amount of work
384 // by half.
385 //
386 // Therefore, it is possible to split the half-size IMDCT computation into two halves.
387 // In the calculations below, yl is the left-half output of the half-size IMDCT, and yr
388 // is the right-half.
389
390 let yl = (x[w] * cos12[i][0])
391 + (x[3 * 1 + w] * cos12[i][1])
392 + (x[3 * 2 + w] * cos12[i][2])
393 + (x[3 * 3 + w] * cos12[i][3])
394 + (x[3 * 4 + w] * cos12[i][4])
395 + (x[3 * 5 + w] * cos12[i][5]);
396
397 let yr = (x[w] * cos12[i + 3][0])
398 + (x[3 * 1 + w] * cos12[i + 3][1])
399 + (x[3 * 2 + w] * cos12[i + 3][2])
400 + (x[3 * 3 + w] * cos12[i + 3][3])
401 + (x[3 * 4 + w] * cos12[i + 3][4])
402 + (x[3 * 5 + w] * cos12[i + 3][5]);
403
404 // Each adjacent 12-point IMDCT windows are overlapped and added in the output, with the
405 // first and last 6 samples of the output always being 0.
406 //
407 // Each sample from the 12-point IMDCT is multiplied by the appropriate window function
408 // as specified in ISO/IEC 11172-3. The values of the window function are pre-computed
409 // and given by window[0..12].
410 //
411 // Since there are 3 IMDCT windows (indexed by w), y[0..12] is computed 3 times.
412 // For the purpose of the diagram below, we label these IMDCT windows as: y0[0..12],
413 // y1[0..12], and y2[0..12], for IMDCT windows 0..3 respectively.
414 //
415 // Therefore, the overlap-and-add operation can be visualized as below:
416 //
417 // 0 6 12 18 24 30 36
418 // +-------------+------------+------------+------------+------------+-------------+
419 // | 0 | y0[..6] | y0[..6] | y1[6..] | y2[6..] | 0 |
420 // | (6) | | + y1[6..] | + y2[..6] | | (6) |
421 // +-------------+------------+------------+------------+------------+-------------+
422 // . . . . . . .
423 // . +-------------------------+ . . .
424 // . | IMDCT #1 (y0) | . . .
425 // . +-------------------------+ . . .
426 // . . +-------------------------+ . .
427 // . . | IMDCT #2 (y1) | . .
428 // . . +-------------------------+ . .
429 // . . . +-------------------------+ .
430 // . . . | IMDCT #3 (y2) | .
431 // . . . +-------------------------+ .
432 // . . . . . . .
433 //
434 // Since the 12-point IMDCT was decomposed into a half-size IMDCT and post-processing
435 // operations, and further split into left and right halves, each iteration of this loop
436 // produces 4 output samples.
437
438 tmp[6 + 6 * w + 3 - i - 1] += -yl * window[3 - i - 1];
439 tmp[6 + 6 * w + i + 3] += yl * window[i + 3];
440 tmp[6 + 6 * w + i + 6] += yr * window[i + 6];
441 tmp[6 + 6 * w + 12 - i - 1] += yr * window[12 - i - 1];
442 }
443 }
444
445 // Overlap-add.
446 for i in 0..18 {
447 x[i] = tmp[i] + overlap[i];
448 overlap[i] = tmp[i + 18];
449 }
450}
451
452/// Inverts odd samples in odd sub-bands.
453pub fn frequency_inversion(samples: &mut [f32; 576]) {
454 // There are 32 sub-bands spanning 576 samples:
455 //
456 // 0 18 36 54 72 90 108 558 576
457 // +-----+-----+-----+-----+-----+-----+ . . . . +------+
458 // s[i] = | sb0 | sb1 | sb2 | sb3 | sb4 | sb5 | . . . . | sb31 |
459 // +-----+-----+-----+-----+-----+-----+ . . . . +------+
460 //
461 // The odd sub-bands are thusly:
462 //
463 // sb1 -> s[ 18.. 36]
464 // sb3 -> s[ 54.. 72]
465 // sb5 -> s[ 90..108]
466 // ...
467 // sb31 -> s[558..576]
468 //
469 // Each odd sample in the aforementioned sub-bands must be negated.
470 for i in (18..576).step_by(36) {
471 // Sample negation is unrolled into a 2x4 + 1 (9) operation to improve vectorization.
472 for j in (i..i + 16).step_by(8) {
473 samples[j + 1] = -samples[j + 1];
474 samples[j + 3] = -samples[j + 3];
475 samples[j + 5] = -samples[j + 5];
476 samples[j + 7] = -samples[j + 7];
477 }
478 samples[i + 18 - 1] = -samples[i + 18 - 1];
479 }
480}
481
482#[cfg(test)]
483mod tests {
484 use super::imdct12_win;
485 use super::IMDCT_WINDOWS;
486 use std::f64;
487
488 fn imdct12_analytical(x: &[f32; 6]) -> [f32; 12] {
489 const PI_24: f64 = f64::consts::PI / 24.0;
490
491 let mut result = [0f32; 12];
492
493 for i in 0..12 {
494 let mut sum = 0.0;
495 for k in 0..6 {
496 sum +=
497 (x[k] as f64) * (PI_24 * ((2 * i + (12 / 2) + 1) * (2 * k + 1)) as f64).cos();
498 }
499 result[i] = sum as f32;
500 }
501
502 result
503 }
504
505 #[test]
506 fn verify_imdct12_win() {
507 const TEST_VECTOR: [f32; 18] = [
508 0.0976, 0.9321, 0.6138, 0.0857, 0.0433, 0.4855, 0.2144, 0.8488, //
509 0.6889, 0.2983, 0.1957, 0.7037, 0.0052, 0.0197, 0.3188, 0.5123, //
510 0.2994, 0.7157,
511 ];
512
513 let window = &IMDCT_WINDOWS[2];
514
515 let mut actual = TEST_VECTOR;
516 let mut overlap = [0.0; 18];
517 imdct12_win(&mut actual, window, &mut overlap);
518
519 // The following block performs 3 analytical 12-point IMDCTs over the test vector, and then
520 // windows and overlaps the results to generate the final result.
521 let expected = {
522 let mut expected = [0f32; 36];
523
524 let mut x0 = [0f32; 6];
525 let mut x1 = [0f32; 6];
526 let mut x2 = [0f32; 6];
527
528 for i in 0..6 {
529 x0[i] = TEST_VECTOR[3 * i + 0];
530 x1[i] = TEST_VECTOR[3 * i + 1];
531 x2[i] = TEST_VECTOR[3 * i + 2];
532 }
533
534 let imdct0 = imdct12_analytical(&x0);
535 let imdct1 = imdct12_analytical(&x1);
536 let imdct2 = imdct12_analytical(&x2);
537
538 for i in 0..12 {
539 expected[6 + i] += imdct0[i] * window[i];
540 expected[12 + i] += imdct1[i] * window[i];
541 expected[18 + i] += imdct2[i] * window[i];
542 }
543
544 expected
545 };
546
547 for i in 0..18 {
548 assert!((expected[i] - actual[i]).abs() < 0.00001);
549 assert!((expected[i + 18] - overlap[i]).abs() < 0.00001);
550 }
551 }
552}
553
554mod imdct36 {
555 /// Performs an Inverse Modified Discrete Cosine Transform (IMDCT) transforming 18
556 /// frequency-domain input samples, into 36 time-domain output samples.
557 ///
558 /// This is a straight-forward implementation of the IMDCT using Szu-Wei Lee's algorithm
559 /// published in article [1].
560 ///
561 /// [1] Szu-Wei Lee, "Improved algorithm for efficient computation of the forward and backward
562 /// MDCT in MPEG audio coder", IEEE Transactions on Circuits and Systems II: Analog and Digital
563 /// Signal Processing, vol. 48, no. 10, pp. 990-994, 2001.
564 ///
565 /// https://ieeexplore.ieee.org/document/974789
566 pub fn imdct36(x: &mut [f32; 18], window: &[f32; 36], overlap: &mut [f32; 18]) {
567 let mut dct = [0f32; 18];
568
569 dct_iv(x, &mut dct);
570
571 // Mapping of DCT-IV to IMDCT
572 //
573 // 0 9 27 36
574 // +------------+------------------------+------------+
575 // | dct[9..18] | -dct[0..18].rev() | -dct[0..9] |
576 // +------------+------------------------+------------+
577 //
578 // where dct[] is the DCT-IV of x.
579
580 // First 9 IMDCT values are values 9..18 in the DCT-IV.
581 for i in 0..9 {
582 x[i] = overlap[i] + dct[9 + i] * window[i];
583 }
584
585 // Next 18 IMDCT values are negated and /reversed/ values 0..18 in the DCT-IV.
586 for i in 9..18 {
587 x[i] = overlap[i] - dct[27 - i - 1] * window[i];
588 }
589
590 for i in 18..27 {
591 overlap[i - 18] = -dct[27 - i - 1] * window[i];
592 }
593
594 // Last 9 IMDCT values are negated values 0..9 in the DCT-IV.
595 for i in 27..36 {
596 overlap[i - 18] = -dct[i - 27] * window[i];
597 }
598 }
599
600 /// Continutation of `imdct36`.
601 ///
602 /// Step 2: Mapping N/2-point DCT-IV to N/2-point SDCT-II.
603 fn dct_iv(x: &[f32; 18], y: &mut [f32; 18]) {
604 // Scale factors for input samples. Computed from (16).
605 // 2 * cos(PI * (2*m + 1) / (2*36)
606 const SCALE: [f32; 18] = [
607 1.998_096_443_163_715_6, // m=0
608 1.982_889_722_747_620_8, // m=1
609 1.952_592_014_239_866_7, // m=2
610 1.907_433_901_496_453_9, // m=3
611 1.847_759_065_022_573_5, // m=4
612 1.774_021_666_356_443_4, // m=5
613 1.686_782_891_625_771_4, // m=6
614 1.586_706_680_582_470_6, // m=7
615 1.474_554_673_620_247_9, // m=8
616 1.351_180_415_231_320_7, // m=9
617 1.217_522_858_017_441_3, // m=10
618 1.074_599_216_693_647_8, // m=11
619 0.923_497_226_470_067_7, // m=12
620 0.765_366_864_730_179_7, // m=13
621 0.601_411_599_008_546_1, // m=14
622 0.432_879_227_876_205_8, // m=15
623 0.261_052_384_440_103_0, // m=16
624 0.087_238_774_730_672_0, // m=17
625 ];
626
627 let samples = [
628 SCALE[0] * x[0],
629 SCALE[1] * x[1],
630 SCALE[2] * x[2],
631 SCALE[3] * x[3],
632 SCALE[4] * x[4],
633 SCALE[5] * x[5],
634 SCALE[6] * x[6],
635 SCALE[7] * x[7],
636 SCALE[8] * x[8],
637 SCALE[9] * x[9],
638 SCALE[10] * x[10],
639 SCALE[11] * x[11],
640 SCALE[12] * x[12],
641 SCALE[13] * x[13],
642 SCALE[14] * x[14],
643 SCALE[15] * x[15],
644 SCALE[16] * x[16],
645 SCALE[17] * x[17],
646 ];
647
648 sdct_ii_18(&samples, y);
649
650 y[0] /= 2.0;
651 for i in 1..17 {
652 y[i] = (y[i] / 2.0) - y[i - 1];
653 }
654 y[17] = (y[17] / 2.0) - y[16];
655 }
656
657 /// Continutation of `imdct36`.
658 ///
659 /// Step 3: Decompose N/2-point SDCT-II into two N/4-point SDCT-IIs.
660 fn sdct_ii_18(x: &[f32; 18], y: &mut [f32; 18]) {
661 // Scale factors for odd input samples. Computed from (23).
662 // 2 * cos(PI * (2*m + 1) / 36)
663 const SCALE: [f32; 9] = [
664 1.992_389_396_183_491_1, // m=0
665 1.931_851_652_578_136_6, // m=1
666 1.812_615_574_073_299_9, // m=2
667 1.638_304_088_577_983_6, // m=3
668 std::f32::consts::SQRT_2, // m=4
669 1.147_152_872_702_092_3, // m=5
670 0.845_236_523_481_398_9, // m=6
671 0.517_638_090_205_041_9, // m=7
672 0.174_311_485_495_316_3, // m=8
673 ];
674
675 let even = [
676 x[0] + x[18 - 1],
677 x[1] + x[18 - 2],
678 x[2] + x[18 - 3],
679 x[3] + x[18 - 4],
680 x[4] + x[18 - 5],
681 x[5] + x[18 - 6],
682 x[6] + x[18 - 7],
683 x[7] + x[18 - 8],
684 x[8] + x[18 - 9],
685 ];
686
687 sdct_ii_9(&even, y);
688
689 let odd = [
690 SCALE[0] * (x[0] - x[18 - 1]),
691 SCALE[1] * (x[1] - x[18 - 2]),
692 SCALE[2] * (x[2] - x[18 - 3]),
693 SCALE[3] * (x[3] - x[18 - 4]),
694 SCALE[4] * (x[4] - x[18 - 5]),
695 SCALE[5] * (x[5] - x[18 - 6]),
696 SCALE[6] * (x[6] - x[18 - 7]),
697 SCALE[7] * (x[7] - x[18 - 8]),
698 SCALE[8] * (x[8] - x[18 - 9]),
699 ];
700
701 sdct_ii_9(&odd, &mut y[1..]);
702
703 y[3] -= y[3 - 2];
704 y[5] -= y[5 - 2];
705 y[7] -= y[7 - 2];
706 y[9] -= y[9 - 2];
707 y[11] -= y[11 - 2];
708 y[13] -= y[13 - 2];
709 y[15] -= y[15 - 2];
710 y[17] -= y[17 - 2];
711 }
712
713 /// Continutation of `imdct36`.
714 ///
715 /// Step 4: Computation of 9-point (N/4) SDCT-II.
716 fn sdct_ii_9(x: &[f32; 9], y: &mut [f32]) {
717 const D: [f32; 7] = [
718 -1.732_050_807_568_877_2, // -sqrt(3.0)
719 1.879_385_241_571_816_6, // -2.0 * cos(8.0 * PI / 9.0)
720 -0.347_296_355_333_860_8, // -2.0 * cos(4.0 * PI / 9.0)
721 -1.532_088_886_237_956_0, // -2.0 * cos(2.0 * PI / 9.0)
722 -0.684_040_286_651_337_8, // -2.0 * sin(8.0 * PI / 9.0)
723 -1.969_615_506_024_416_0, // -2.0 * sin(4.0 * PI / 9.0)
724 -1.285_575_219_373_078_5, // -2.0 * sin(2.0 * PI / 9.0)
725 ];
726
727 let a01 = x[3] + x[5];
728 let a02 = x[3] - x[5];
729 let a03 = x[6] + x[2];
730 let a04 = x[6] - x[2];
731 let a05 = x[1] + x[7];
732 let a06 = x[1] - x[7];
733 let a07 = x[8] + x[0];
734 let a08 = x[8] - x[0];
735
736 let a09 = x[4] + a05;
737 let a10 = a01 + a03;
738 let a11 = a10 + a07;
739 let a12 = a03 - a07;
740 let a13 = a01 - a07;
741 let a14 = a01 - a03;
742 let a15 = a02 - a04;
743 let a16 = a15 + a08;
744 let a17 = a04 + a08;
745 let a18 = a02 - a08;
746 let a19 = a02 + a04;
747 let a20 = 2.0 * x[4] - a05;
748
749 let m1 = D[0] * a06;
750 let m2 = D[1] * a12;
751 let m3 = D[2] * a13;
752 let m4 = D[3] * a14;
753 let m5 = D[0] * a16;
754 let m6 = D[4] * a17;
755 let m7 = D[5] * a18; // Note: the cited paper has an error, a1 should be a18.
756 let m8 = D[6] * a19;
757
758 let a21 = a20 + m2;
759 let a22 = a20 - m2;
760 let a23 = a20 + m3;
761 let a24 = m1 + m6;
762 let a25 = m1 - m6;
763 let a26 = m1 + m7;
764
765 y[0] = a09 + a11;
766 y[2] = m8 - a26;
767 y[4] = m4 - a21;
768 y[6] = m5;
769 y[8] = a22 - m3;
770 y[10] = a25 - m7;
771 y[12] = a11 - 2.0 * a09;
772 y[14] = a24 + m8;
773 y[16] = a23 + m4;
774 }
775
776 #[cfg(test)]
777 mod tests {
778 use super::imdct36;
779 use std::f64;
780
781 fn imdct36_analytical(x: &[f32; 18]) -> [f32; 36] {
782 let mut result = [0f32; 36];
783
784 const PI_72: f64 = f64::consts::PI / 72.0;
785
786 for i in 0..36 {
787 let mut sum = 0.0;
788 for j in 0..18 {
789 sum +=
790 (x[j] as f64) * (PI_72 * (((2 * i) + 1 + 18) * ((2 * j) + 1)) as f64).cos();
791 }
792 result[i] = sum as f32;
793 }
794 result
795 }
796
797 #[test]
798 fn verify_imdct36() {
799 const TEST_VECTOR: [f32; 18] = [
800 0.0976, 0.9321, 0.6138, 0.0857, 0.0433, 0.4855, 0.2144, 0.8488, //
801 0.6889, 0.2983, 0.1957, 0.7037, 0.0052, 0.0197, 0.3188, 0.5123, //
802 0.2994, 0.7157,
803 ];
804
805 const WINDOW: [f32; 36] = [1.0; 36];
806
807 let mut actual = TEST_VECTOR;
808 let mut overlap = [0.0; 18];
809 imdct36(&mut actual, &WINDOW, &mut overlap);
810
811 let expected = imdct36_analytical(&TEST_VECTOR);
812
813 for i in 0..18 {
814 assert!((expected[i] - actual[i]).abs() < 0.00001);
815 assert!((expected[i + 18] - overlap[i]).abs() < 0.00001);
816 }
817 }
818 }
819}