1use crate::conversions::interpolator::BarycentricWeight;
30use crate::math::FusedMultiplyAdd;
31use num_traits::AsPrimitive;
32use std::arch::x86_64::*;
33use std::ops::{Add, Mul, Sub};
34
35#[repr(align(8), C)]
36pub(crate) struct AvxAlignedI16(pub(crate) [i16; 4]);
37
38#[cfg(feature = "options")]
39pub(crate) struct TetrahedralAvxQ0_15<'a, const GRID_SIZE: usize> {
40 pub(crate) cube: &'a [AvxAlignedI16],
41}
42
43#[cfg(feature = "options")]
44pub(crate) struct PyramidalAvxQ0_15<'a, const GRID_SIZE: usize> {
45 pub(crate) cube: &'a [AvxAlignedI16],
46}
47
48#[cfg(feature = "options")]
49pub(crate) struct PrismaticAvxQ0_15<'a, const GRID_SIZE: usize> {
50 pub(crate) cube: &'a [AvxAlignedI16],
51}
52
53pub(crate) struct TrilinearAvxQ0_15<'a, const GRID_SIZE: usize> {
54 pub(crate) cube: &'a [AvxAlignedI16],
55}
56
57#[cfg(feature = "options")]
58pub(crate) struct PrismaticAvxQ0_15Double<'a, const GRID_SIZE: usize> {
59 pub(crate) cube0: &'a [AvxAlignedI16],
60 pub(crate) cube1: &'a [AvxAlignedI16],
61}
62
63pub(crate) struct TrilinearAvxQ0_15Double<'a, const GRID_SIZE: usize> {
64 pub(crate) cube0: &'a [AvxAlignedI16],
65 pub(crate) cube1: &'a [AvxAlignedI16],
66}
67
68#[cfg(feature = "options")]
69pub(crate) struct PyramidAvxFmaQ0_15Double<'a, const GRID_SIZE: usize> {
70 pub(crate) cube0: &'a [AvxAlignedI16],
71 pub(crate) cube1: &'a [AvxAlignedI16],
72}
73
74#[cfg(feature = "options")]
75pub(crate) struct TetrahedralAvxQ0_15Double<'a, const GRID_SIZE: usize> {
76 pub(crate) cube0: &'a [AvxAlignedI16],
77 pub(crate) cube1: &'a [AvxAlignedI16],
78}
79
80pub(crate) trait AvxMdInterpolationQ0_15Double<'a, const GRID_SIZE: usize> {
81 fn new(table0: &'a [AvxAlignedI16], table1: &'a [AvxAlignedI16]) -> Self;
82 fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
83 &self,
84 in_r: U,
85 in_g: U,
86 in_b: U,
87 lut: &[BarycentricWeight<i16>; BINS],
88 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse);
89}
90
91pub(crate) trait AvxMdInterpolationQ0_15<'a, const GRID_SIZE: usize> {
92 fn new(table: &'a [AvxAlignedI16]) -> Self;
93 fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
94 &self,
95 in_r: U,
96 in_g: U,
97 in_b: U,
98 lut: &[BarycentricWeight<i16>; BINS],
99 ) -> AvxVectorQ0_15Sse;
100}
101
102trait Fetcher<T> {
103 fn fetch(&self, x: i32, y: i32, z: i32) -> T;
104}
105
106#[derive(Copy, Clone)]
107#[repr(transparent)]
108pub(crate) struct AvxVectorQ0_15Sse {
109 pub(crate) v: __m128i,
110}
111
112#[derive(Copy, Clone)]
113#[repr(transparent)]
114pub(crate) struct AvxVectorQ0_15 {
115 pub(crate) v: __m256i,
116}
117
118impl AvxVectorQ0_15 {
119 #[inline(always)]
120 pub(crate) fn from_sse(lo: AvxVectorQ0_15Sse, hi: AvxVectorQ0_15Sse) -> AvxVectorQ0_15 {
121 unsafe {
122 AvxVectorQ0_15 {
123 v: _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(lo.v), hi.v),
124 }
125 }
126 }
127
128 #[inline(always)]
129 pub(crate) fn split(self) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
130 unsafe {
131 (
132 AvxVectorQ0_15Sse {
133 v: _mm256_castsi256_si128(self.v),
134 },
135 AvxVectorQ0_15Sse {
136 v: _mm256_extracti128_si256::<1>(self.v),
137 },
138 )
139 }
140 }
141}
142
143impl From<i16> for AvxVectorQ0_15Sse {
144 #[inline(always)]
145 fn from(v: i16) -> Self {
146 AvxVectorQ0_15Sse {
147 v: unsafe { _mm_set1_epi16(v) },
148 }
149 }
150}
151
152impl From<i16> for AvxVectorQ0_15 {
153 #[inline(always)]
154 fn from(v: i16) -> Self {
155 AvxVectorQ0_15 {
156 v: unsafe { _mm256_set1_epi16(v) },
157 }
158 }
159}
160
161impl Sub<AvxVectorQ0_15Sse> for AvxVectorQ0_15Sse {
162 type Output = Self;
163 #[inline(always)]
164 fn sub(self, rhs: AvxVectorQ0_15Sse) -> Self::Output {
165 AvxVectorQ0_15Sse {
166 v: unsafe { _mm_sub_epi16(self.v, rhs.v) },
167 }
168 }
169}
170
171impl Sub<AvxVectorQ0_15> for AvxVectorQ0_15 {
172 type Output = Self;
173 #[inline(always)]
174 fn sub(self, rhs: AvxVectorQ0_15) -> Self::Output {
175 AvxVectorQ0_15 {
176 v: unsafe { _mm256_sub_epi16(self.v, rhs.v) },
177 }
178 }
179}
180
181impl Add<AvxVectorQ0_15Sse> for AvxVectorQ0_15Sse {
182 type Output = Self;
183 #[inline(always)]
184 fn add(self, rhs: AvxVectorQ0_15Sse) -> Self::Output {
185 AvxVectorQ0_15Sse {
186 v: unsafe { _mm_add_epi16(self.v, rhs.v) },
187 }
188 }
189}
190
191impl Mul<AvxVectorQ0_15Sse> for AvxVectorQ0_15Sse {
192 type Output = Self;
193 #[inline(always)]
194 fn mul(self, rhs: AvxVectorQ0_15Sse) -> Self::Output {
195 AvxVectorQ0_15Sse {
196 v: unsafe { _mm_mulhrs_epi16(self.v, rhs.v) },
197 }
198 }
199}
200
201impl Add<AvxVectorQ0_15> for AvxVectorQ0_15 {
202 type Output = Self;
203 #[inline(always)]
204 fn add(self, rhs: AvxVectorQ0_15) -> Self::Output {
205 AvxVectorQ0_15 {
206 v: unsafe { _mm256_add_epi16(self.v, rhs.v) },
207 }
208 }
209}
210
211impl Mul<AvxVectorQ0_15> for AvxVectorQ0_15 {
212 type Output = Self;
213 #[inline(always)]
214 fn mul(self, rhs: AvxVectorQ0_15) -> Self::Output {
215 AvxVectorQ0_15 {
216 v: unsafe { _mm256_mulhrs_epi16(self.v, rhs.v) },
217 }
218 }
219}
220
221impl FusedMultiplyAdd<AvxVectorQ0_15Sse> for AvxVectorQ0_15Sse {
222 #[inline(always)]
223 fn mla(&self, b: AvxVectorQ0_15Sse, c: AvxVectorQ0_15Sse) -> AvxVectorQ0_15Sse {
224 AvxVectorQ0_15Sse {
225 v: unsafe { _mm_add_epi16(_mm_mulhrs_epi16(b.v, c.v), self.v) },
226 }
227 }
228}
229
230impl FusedMultiplyAdd<AvxVectorQ0_15> for AvxVectorQ0_15 {
231 #[inline(always)]
232 fn mla(&self, b: AvxVectorQ0_15, c: AvxVectorQ0_15) -> AvxVectorQ0_15 {
233 AvxVectorQ0_15 {
234 v: unsafe { _mm256_add_epi16(_mm256_mulhrs_epi16(b.v, c.v), self.v) },
235 }
236 }
237}
238
239struct TetrahedralAvxSseFetchVector<'a, const GRID_SIZE: usize> {
240 cube: &'a [AvxAlignedI16],
241}
242
243struct TetrahedralAvxFetchVector<'a, const GRID_SIZE: usize> {
244 cube0: &'a [AvxAlignedI16],
245 cube1: &'a [AvxAlignedI16],
246}
247
248impl<const GRID_SIZE: usize> Fetcher<AvxVectorQ0_15> for TetrahedralAvxFetchVector<'_, GRID_SIZE> {
249 #[inline(always)]
250 fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorQ0_15 {
251 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
252 + y as u32 * GRID_SIZE as u32
253 + z as u32) as usize;
254 let jx0 = unsafe { self.cube0.get_unchecked(offset..) };
255 let jx1 = unsafe { self.cube1.get_unchecked(offset..) };
256 AvxVectorQ0_15 {
257 v: unsafe {
258 _mm256_inserti128_si256::<1>(
259 _mm256_castsi128_si256(_mm_loadu_si64(jx0.as_ptr() as *const _)),
260 _mm_loadu_si64(jx1.as_ptr() as *const _),
261 )
262 },
263 }
264 }
265}
266
267impl<const GRID_SIZE: usize> Fetcher<AvxVectorQ0_15Sse>
268 for TetrahedralAvxSseFetchVector<'_, GRID_SIZE>
269{
270 #[inline(always)]
271 fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorQ0_15Sse {
272 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
273 + y as u32 * GRID_SIZE as u32
274 + z as u32) as usize;
275 let jx = unsafe { self.cube.get_unchecked(offset..) };
276 AvxVectorQ0_15Sse {
277 v: unsafe { _mm_loadu_si64(jx.as_ptr() as *const _) },
278 }
279 }
280}
281
282#[cfg(feature = "options")]
283impl<const GRID_SIZE: usize> TetrahedralAvxQ0_15<'_, GRID_SIZE> {
284 #[inline(always)]
285 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
286 &self,
287 in_r: U,
288 in_g: U,
289 in_b: U,
290 lut: &[BarycentricWeight<i16>; BINS],
291 r: impl Fetcher<AvxVectorQ0_15Sse>,
292 ) -> AvxVectorQ0_15Sse {
293 let lut_r = lut[in_r.as_()];
294 let lut_g = lut[in_g.as_()];
295 let lut_b = lut[in_b.as_()];
296
297 let x: i32 = lut_r.x;
298 let y: i32 = lut_g.x;
299 let z: i32 = lut_b.x;
300
301 let x_n: i32 = lut_r.x_n;
302 let y_n: i32 = lut_g.x_n;
303 let z_n: i32 = lut_b.x_n;
304
305 let rx = lut_r.w;
306 let ry = lut_g.w;
307 let rz = lut_b.w;
308
309 let c0 = r.fetch(x, y, z);
310
311 let c2;
312 let c1;
313 let c3;
314 if rx >= ry {
315 if ry >= rz {
316 c1 = r.fetch(x_n, y, z) - c0;
318 c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
319 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
320 } else if rx >= rz {
321 c1 = r.fetch(x_n, y, z) - c0;
323 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
324 c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
325 } else {
326 c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
328 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
329 c3 = r.fetch(x, y, z_n) - c0;
330 }
331 } else if rx >= rz {
332 c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
334 c2 = r.fetch(x, y_n, z) - c0;
335 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
336 } else if ry >= rz {
337 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
339 c2 = r.fetch(x, y_n, z) - c0;
340 c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
341 } else {
342 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
344 c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
345 c3 = r.fetch(x, y, z_n) - c0;
346 }
347 let s0 = c0.mla(c1, AvxVectorQ0_15Sse::from(rx));
348 let s1 = s0.mla(c2, AvxVectorQ0_15Sse::from(ry));
349 s1.mla(c3, AvxVectorQ0_15Sse::from(rz))
350 }
351}
352
353macro_rules! define_interp_avx {
354 ($interpolator: ident) => {
355 impl<'a, const GRID_SIZE: usize> AvxMdInterpolationQ0_15<'a, GRID_SIZE>
356 for $interpolator<'a, GRID_SIZE>
357 {
358 #[inline(always)]
359 fn new(table: &'a [AvxAlignedI16]) -> Self {
360 Self { cube: table }
361 }
362
363 #[inline(always)]
364 fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
365 &self,
366 in_r: U,
367 in_g: U,
368 in_b: U,
369 lut: &[BarycentricWeight<i16>; BINS],
370 ) -> AvxVectorQ0_15Sse {
371 self.interpolate(
372 in_r,
373 in_g,
374 in_b,
375 lut,
376 TetrahedralAvxSseFetchVector::<GRID_SIZE> { cube: self.cube },
377 )
378 }
379 }
380 };
381}
382
383#[cfg(feature = "options")]
384macro_rules! define_interp_avx_d {
385 ($interpolator: ident) => {
386 impl<'a, const GRID_SIZE: usize> AvxMdInterpolationQ0_15Double<'a, GRID_SIZE>
387 for $interpolator<'a, GRID_SIZE>
388 {
389 #[inline(always)]
390 fn new(table0: &'a [AvxAlignedI16], table1: &'a [AvxAlignedI16]) -> Self {
391 Self {
392 cube0: table0,
393 cube1: table1,
394 }
395 }
396
397 #[inline(always)]
398 fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
399 &self,
400 in_r: U,
401 in_g: U,
402 in_b: U,
403 lut: &[BarycentricWeight<i16>; BINS],
404 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
405 self.interpolate(
406 in_r,
407 in_g,
408 in_b,
409 lut,
410 TetrahedralAvxSseFetchVector::<GRID_SIZE> { cube: self.cube0 },
411 TetrahedralAvxSseFetchVector::<GRID_SIZE> { cube: self.cube1 },
412 )
413 }
414 }
415 };
416}
417
418#[cfg(feature = "options")]
419define_interp_avx!(TetrahedralAvxQ0_15);
420#[cfg(feature = "options")]
421define_interp_avx!(PyramidalAvxQ0_15);
422#[cfg(feature = "options")]
423define_interp_avx!(PrismaticAvxQ0_15);
424define_interp_avx!(TrilinearAvxQ0_15);
425#[cfg(feature = "options")]
426define_interp_avx_d!(PrismaticAvxQ0_15Double);
427#[cfg(feature = "options")]
428define_interp_avx_d!(PyramidAvxFmaQ0_15Double);
429
430#[cfg(feature = "options")]
431impl<'a, const GRID_SIZE: usize> AvxMdInterpolationQ0_15Double<'a, GRID_SIZE>
432 for TetrahedralAvxQ0_15Double<'a, GRID_SIZE>
433{
434 #[inline(always)]
435 fn new(table0: &'a [AvxAlignedI16], table1: &'a [AvxAlignedI16]) -> Self {
436 Self {
437 cube0: table0,
438 cube1: table1,
439 }
440 }
441
442 #[inline(always)]
443 fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
444 &self,
445 in_r: U,
446 in_g: U,
447 in_b: U,
448 lut: &[BarycentricWeight<i16>; BINS],
449 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
450 self.interpolate(
451 in_r,
452 in_g,
453 in_b,
454 lut,
455 TetrahedralAvxFetchVector::<GRID_SIZE> {
456 cube0: self.cube0,
457 cube1: self.cube1,
458 },
459 )
460 }
461}
462
463impl<'a, const GRID_SIZE: usize> AvxMdInterpolationQ0_15Double<'a, GRID_SIZE>
464 for TrilinearAvxQ0_15Double<'a, GRID_SIZE>
465{
466 #[inline(always)]
467 fn new(table0: &'a [AvxAlignedI16], table1: &'a [AvxAlignedI16]) -> Self {
468 Self {
469 cube0: table0,
470 cube1: table1,
471 }
472 }
473
474 #[inline(always)]
475 fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
476 &self,
477 in_r: U,
478 in_g: U,
479 in_b: U,
480 lut: &[BarycentricWeight<i16>; BINS],
481 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
482 self.interpolate(
483 in_r,
484 in_g,
485 in_b,
486 lut,
487 TetrahedralAvxFetchVector::<GRID_SIZE> {
488 cube0: self.cube0,
489 cube1: self.cube1,
490 },
491 )
492 }
493}
494
495#[cfg(feature = "options")]
496impl<const GRID_SIZE: usize> PyramidalAvxQ0_15<'_, GRID_SIZE> {
497 #[inline(always)]
498 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
499 &self,
500 in_r: U,
501 in_g: U,
502 in_b: U,
503 lut: &[BarycentricWeight<i16>; BINS],
504 r: impl Fetcher<AvxVectorQ0_15Sse>,
505 ) -> AvxVectorQ0_15Sse {
506 let lut_r = lut[in_r.as_()];
507 let lut_g = lut[in_g.as_()];
508 let lut_b = lut[in_b.as_()];
509
510 let x: i32 = lut_r.x;
511 let y: i32 = lut_g.x;
512 let z: i32 = lut_b.x;
513
514 let x_n: i32 = lut_r.x_n;
515 let y_n: i32 = lut_g.x_n;
516 let z_n: i32 = lut_b.x_n;
517
518 let dr = lut_r.w;
519 let dg = lut_g.w;
520 let db = lut_b.w;
521
522 let c0 = r.fetch(x, y, z);
523
524 let w0 = AvxVectorQ0_15Sse::from(db);
525 let w1 = AvxVectorQ0_15Sse::from(dr);
526 let w2 = AvxVectorQ0_15Sse::from(dg);
527
528 if dr > db && dg > db {
529 let w3 = AvxVectorQ0_15Sse::from(dr) * AvxVectorQ0_15Sse::from(dg);
530 let x0 = r.fetch(x_n, y_n, z_n);
531 let x1 = r.fetch(x_n, y_n, z);
532 let x2 = r.fetch(x_n, y, z);
533 let x3 = r.fetch(x, y_n, z);
534
535 let c1 = x0 - x1;
536 let c2 = x2 - c0;
537 let c3 = x3 - c0;
538 let c4 = c0 - x3 - x2 + x1;
539
540 let s0 = c0.mla(c1, w0);
541 let s1 = s0.mla(c2, w1);
542 let s2 = s1.mla(c3, w2);
543 s2.mla(c4, w3)
544 } else if db > dr && dg > dr {
545 let w3 = AvxVectorQ0_15Sse::from(dg) * AvxVectorQ0_15Sse::from(db);
546
547 let x0 = r.fetch(x, y, z_n);
548 let x1 = r.fetch(x_n, y_n, z_n);
549 let x2 = r.fetch(x, y_n, z_n);
550 let x3 = r.fetch(x, y_n, z);
551
552 let c1 = x0 - c0;
553 let c2 = x1 - x2;
554 let c3 = x3 - c0;
555 let c4 = c0 - x3 - x0 + x2;
556
557 let s0 = c0.mla(c1, w0);
558 let s1 = s0.mla(c2, w1);
559 let s2 = s1.mla(c3, w2);
560 s2.mla(c4, w3)
561 } else {
562 let w3 = AvxVectorQ0_15Sse::from(db) * AvxVectorQ0_15Sse::from(dr);
563
564 let x0 = r.fetch(x, y, z_n);
565 let x1 = r.fetch(x_n, y, z);
566 let x2 = r.fetch(x_n, y, z_n);
567 let x3 = r.fetch(x_n, y_n, z_n);
568
569 let c1 = x0 - c0;
570 let c2 = x1 - c0;
571 let c3 = x3 - x2;
572 let c4 = c0 - x1 - x0 + x2;
573
574 let s0 = c0.mla(c1, w0);
575 let s1 = s0.mla(c2, w1);
576 let s2 = s1.mla(c3, w2);
577 s2.mla(c4, w3)
578 }
579 }
580}
581
582#[cfg(feature = "options")]
583impl<const GRID_SIZE: usize> PrismaticAvxQ0_15<'_, GRID_SIZE> {
584 #[inline(always)]
585 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
586 &self,
587 in_r: U,
588 in_g: U,
589 in_b: U,
590 lut: &[BarycentricWeight<i16>; BINS],
591 r: impl Fetcher<AvxVectorQ0_15Sse>,
592 ) -> AvxVectorQ0_15Sse {
593 let lut_r = lut[in_r.as_()];
594 let lut_g = lut[in_g.as_()];
595 let lut_b = lut[in_b.as_()];
596
597 let x: i32 = lut_r.x;
598 let y: i32 = lut_g.x;
599 let z: i32 = lut_b.x;
600
601 let x_n: i32 = lut_r.x_n;
602 let y_n: i32 = lut_g.x_n;
603 let z_n: i32 = lut_b.x_n;
604
605 let dr = lut_r.w;
606 let dg = lut_g.w;
607 let db = lut_b.w;
608
609 let c0 = r.fetch(x, y, z);
610
611 let w0 = AvxVectorQ0_15Sse::from(db);
612 let w1 = AvxVectorQ0_15Sse::from(dr);
613 let w2 = AvxVectorQ0_15Sse::from(dg);
614 let w3 = AvxVectorQ0_15Sse::from(dg) * AvxVectorQ0_15Sse::from(db);
615 let w4 = AvxVectorQ0_15Sse::from(dr) * AvxVectorQ0_15Sse::from(dg);
616
617 if db > dr {
618 let x0 = r.fetch(x, y, z_n);
619 let x1 = r.fetch(x_n, y, z_n);
620 let x2 = r.fetch(x, y_n, z);
621 let x3 = r.fetch(x, y_n, z_n);
622 let x4 = r.fetch(x_n, y_n, z_n);
623
624 let c1 = x0 - c0;
625 let c2 = x1 - x0;
626 let c3 = x2 - c0;
627 let c4 = c0 - x2 - x0 + x3;
628 let c5 = x0 - x3 - x1 + x4;
629
630 let s0 = c0.mla(c1, w0);
631 let s1 = s0.mla(c2, w1);
632 let s2 = s1.mla(c3, w2);
633 let s3 = s2.mla(c4, w3);
634 s3.mla(c5, w4)
635 } else {
636 let x0 = r.fetch(x_n, y, z);
637 let x1 = r.fetch(x_n, y, z_n);
638 let x2 = r.fetch(x, y_n, z);
639 let x3 = r.fetch(x_n, y_n, z);
640 let x4 = r.fetch(x_n, y_n, z_n);
641
642 let c1 = x1 - x0;
643 let c2 = x0 - c0;
644 let c3 = x2 - c0;
645 let c4 = x0 - x3 - x1 + x4;
646 let c5 = c0 - x2 - x0 + x3;
647
648 let s0 = c0.mla(c1, w0);
649 let s1 = s0.mla(c2, w1);
650 let s2 = s1.mla(c3, w2);
651 let s3 = s2.mla(c4, w3);
652 s3.mla(c5, w4)
653 }
654 }
655}
656
657#[cfg(feature = "options")]
658impl<const GRID_SIZE: usize> PrismaticAvxQ0_15Double<'_, GRID_SIZE> {
659 #[inline(always)]
660 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
661 &self,
662 in_r: U,
663 in_g: U,
664 in_b: U,
665 lut: &[BarycentricWeight<i16>; BINS],
666 r0: impl Fetcher<AvxVectorQ0_15Sse>,
667 r1: impl Fetcher<AvxVectorQ0_15Sse>,
668 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
669 let lut_r = lut[in_r.as_()];
670 let lut_g = lut[in_g.as_()];
671 let lut_b = lut[in_b.as_()];
672
673 let x: i32 = lut_r.x;
674 let y: i32 = lut_g.x;
675 let z: i32 = lut_b.x;
676
677 let x_n: i32 = lut_r.x_n;
678 let y_n: i32 = lut_g.x_n;
679 let z_n: i32 = lut_b.x_n;
680
681 let dr = lut_r.w;
682 let dg = lut_g.w;
683 let db = lut_b.w;
684
685 let c0_0 = r0.fetch(x, y, z);
686 let c0_1 = r0.fetch(x, y, z);
687
688 let w0 = AvxVectorQ0_15::from(db);
689 let w1 = AvxVectorQ0_15::from(dr);
690 let w2 = AvxVectorQ0_15::from(dg);
691 let w3 = AvxVectorQ0_15::from(dg) * AvxVectorQ0_15::from(db);
692 let w4 = AvxVectorQ0_15::from(dr) * AvxVectorQ0_15::from(dg);
693
694 let c0 = AvxVectorQ0_15::from_sse(c0_0, c0_1);
695
696 if db > dr {
697 let x0_0 = r0.fetch(x, y, z_n);
698 let x1_0 = r0.fetch(x_n, y, z_n);
699 let x2_0 = r0.fetch(x, y_n, z);
700 let x3_0 = r0.fetch(x, y_n, z_n);
701 let x4_0 = r0.fetch(x_n, y_n, z_n);
702
703 let x0_1 = r1.fetch(x, y, z_n);
704 let x1_1 = r1.fetch(x_n, y, z_n);
705 let x2_1 = r1.fetch(x, y_n, z);
706 let x3_1 = r1.fetch(x, y_n, z_n);
707 let x4_1 = r1.fetch(x_n, y_n, z_n);
708
709 let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1);
710 let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1);
711 let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1);
712 let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1);
713 let x4 = AvxVectorQ0_15::from_sse(x4_0, x4_1);
714
715 let c1 = x0 - c0;
716 let c2 = x1 - x0;
717 let c3 = x2 - c0;
718 let c4 = c0 - x2 - x0 + x3;
719 let c5 = x0 - x3 - x1 + x4;
720
721 let s0 = c0.mla(c1, w0);
722 let s1 = s0.mla(c2, w1);
723 let s2 = s1.mla(c3, w2);
724 let s3 = s2.mla(c4, w3);
725 s3.mla(c5, w4).split()
726 } else {
727 let x0_0 = r0.fetch(x_n, y, z);
728 let x1_0 = r0.fetch(x_n, y, z_n);
729 let x2_0 = r0.fetch(x, y_n, z);
730 let x3_0 = r0.fetch(x_n, y_n, z);
731 let x4_0 = r0.fetch(x_n, y_n, z_n);
732
733 let x0_1 = r1.fetch(x_n, y, z);
734 let x1_1 = r1.fetch(x_n, y, z_n);
735 let x2_1 = r1.fetch(x, y_n, z);
736 let x3_1 = r1.fetch(x_n, y_n, z);
737 let x4_1 = r1.fetch(x_n, y_n, z_n);
738
739 let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1);
740 let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1);
741 let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1);
742 let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1);
743 let x4 = AvxVectorQ0_15::from_sse(x4_0, x4_1);
744
745 let c1 = x1 - x0;
746 let c2 = x0 - c0;
747 let c3 = x2 - c0;
748 let c4 = x0 - x3 - x1 + x4;
749 let c5 = c0 - x2 - x0 + x3;
750
751 let s0 = c0.mla(c1, w0);
752 let s1 = s0.mla(c2, w1);
753 let s2 = s1.mla(c3, w2);
754 let s3 = s2.mla(c4, w3);
755 s3.mla(c5, w4).split()
756 }
757 }
758}
759
760#[cfg(feature = "options")]
761impl<const GRID_SIZE: usize> PyramidAvxFmaQ0_15Double<'_, GRID_SIZE> {
762 #[inline(always)]
763 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
764 &self,
765 in_r: U,
766 in_g: U,
767 in_b: U,
768 lut: &[BarycentricWeight<i16>; BINS],
769 r0: impl Fetcher<AvxVectorQ0_15Sse>,
770 r1: impl Fetcher<AvxVectorQ0_15Sse>,
771 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
772 let lut_r = lut[in_r.as_()];
773 let lut_g = lut[in_g.as_()];
774 let lut_b = lut[in_b.as_()];
775
776 let x: i32 = lut_r.x;
777 let y: i32 = lut_g.x;
778 let z: i32 = lut_b.x;
779
780 let x_n: i32 = lut_r.x_n;
781 let y_n: i32 = lut_g.x_n;
782 let z_n: i32 = lut_b.x_n;
783
784 let dr = lut_r.w;
785 let dg = lut_g.w;
786 let db = lut_b.w;
787
788 let c0_0 = r0.fetch(x, y, z);
789 let c0_1 = r1.fetch(x, y, z);
790
791 let w0 = AvxVectorQ0_15::from(db);
792 let w1 = AvxVectorQ0_15::from(dr);
793 let w2 = AvxVectorQ0_15::from(dg);
794
795 let c0 = AvxVectorQ0_15::from_sse(c0_0, c0_1);
796
797 if dr > db && dg > db {
798 let w3 = AvxVectorQ0_15::from(dr) * AvxVectorQ0_15::from(dg);
799
800 let x0_0 = r0.fetch(x_n, y_n, z_n);
801 let x1_0 = r0.fetch(x_n, y_n, z);
802 let x2_0 = r0.fetch(x_n, y, z);
803 let x3_0 = r0.fetch(x, y_n, z);
804
805 let x0_1 = r1.fetch(x_n, y_n, z_n);
806 let x1_1 = r1.fetch(x_n, y_n, z);
807 let x2_1 = r1.fetch(x_n, y, z);
808 let x3_1 = r1.fetch(x, y_n, z);
809
810 let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1);
811 let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1);
812 let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1);
813 let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1);
814
815 let c1 = x0 - x1;
816 let c2 = x2 - c0;
817 let c3 = x3 - c0;
818 let c4 = c0 - x3 - x2 + x1;
819
820 let s0 = c0.mla(c1, w0);
821 let s1 = s0.mla(c2, w1);
822 let s2 = s1.mla(c3, w2);
823 s2.mla(c4, w3).split()
824 } else if db > dr && dg > dr {
825 let w3 = AvxVectorQ0_15::from(dg) * AvxVectorQ0_15::from(db);
826
827 let x0_0 = r0.fetch(x, y, z_n);
828 let x1_0 = r0.fetch(x_n, y_n, z_n);
829 let x2_0 = r0.fetch(x, y_n, z_n);
830 let x3_0 = r0.fetch(x, y_n, z);
831
832 let x0_1 = r1.fetch(x, y, z_n);
833 let x1_1 = r1.fetch(x_n, y_n, z_n);
834 let x2_1 = r1.fetch(x, y_n, z_n);
835 let x3_1 = r1.fetch(x, y_n, z);
836
837 let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1);
838 let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1);
839 let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1);
840 let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1);
841
842 let c1 = x0 - c0;
843 let c2 = x1 - x2;
844 let c3 = x3 - c0;
845 let c4 = c0 - x3 - x0 + x2;
846
847 let s0 = c0.mla(c1, w0);
848 let s1 = s0.mla(c2, w1);
849 let s2 = s1.mla(c3, w2);
850 s2.mla(c4, w3).split()
851 } else {
852 let w3 = AvxVectorQ0_15::from(db) * AvxVectorQ0_15::from(dr);
853
854 let x0_0 = r0.fetch(x, y, z_n);
855 let x1_0 = r0.fetch(x_n, y, z);
856 let x2_0 = r0.fetch(x_n, y, z_n);
857 let x3_0 = r0.fetch(x_n, y_n, z_n);
858
859 let x0_1 = r1.fetch(x, y, z_n);
860 let x1_1 = r1.fetch(x_n, y, z);
861 let x2_1 = r1.fetch(x_n, y, z_n);
862 let x3_1 = r1.fetch(x_n, y_n, z_n);
863
864 let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1);
865 let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1);
866 let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1);
867 let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1);
868
869 let c1 = x0 - c0;
870 let c2 = x1 - c0;
871 let c3 = x3 - x2;
872 let c4 = c0 - x1 - x0 + x2;
873
874 let s0 = c0.mla(c1, w0);
875 let s1 = s0.mla(c2, w1);
876 let s2 = s1.mla(c3, w2);
877 s2.mla(c4, w3).split()
878 }
879 }
880}
881
882#[cfg(feature = "options")]
883impl<const GRID_SIZE: usize> TetrahedralAvxQ0_15Double<'_, GRID_SIZE> {
884 #[inline(always)]
885 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
886 &self,
887 in_r: U,
888 in_g: U,
889 in_b: U,
890 lut: &[BarycentricWeight<i16>; BINS],
891 rv: impl Fetcher<AvxVectorQ0_15>,
892 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
893 let lut_r = lut[in_r.as_()];
894 let lut_g = lut[in_g.as_()];
895 let lut_b = lut[in_b.as_()];
896
897 let x: i32 = lut_r.x;
898 let y: i32 = lut_g.x;
899 let z: i32 = lut_b.x;
900
901 let x_n: i32 = lut_r.x_n;
902 let y_n: i32 = lut_g.x_n;
903 let z_n: i32 = lut_b.x_n;
904
905 let rx = lut_r.w;
906 let ry = lut_g.w;
907 let rz = lut_b.w;
908
909 let c0 = rv.fetch(x, y, z);
910
911 let w0 = AvxVectorQ0_15::from(rx);
912 let w1 = AvxVectorQ0_15::from(ry);
913 let w2 = AvxVectorQ0_15::from(rz);
914
915 let c2;
916 let c1;
917 let c3;
918 if rx >= ry {
919 if ry >= rz {
920 c1 = rv.fetch(x_n, y, z) - c0;
922 c2 = rv.fetch(x_n, y_n, z) - rv.fetch(x_n, y, z);
923 c3 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y_n, z);
924 } else if rx >= rz {
925 c1 = rv.fetch(x_n, y, z) - c0;
927 c2 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y, z_n);
928 c3 = rv.fetch(x_n, y, z_n) - rv.fetch(x_n, y, z);
929 } else {
930 c1 = rv.fetch(x_n, y, z_n) - rv.fetch(x, y, z_n);
932 c2 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y, z_n);
933 c3 = rv.fetch(x, y, z_n) - c0;
934 }
935 } else if rx >= rz {
936 c1 = rv.fetch(x_n, y_n, z) - rv.fetch(x, y_n, z);
938 c2 = rv.fetch(x, y_n, z) - c0;
939 c3 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y_n, z);
940 } else if ry >= rz {
941 c1 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x, y_n, z_n);
943 c2 = rv.fetch(x, y_n, z) - c0;
944 c3 = rv.fetch(x, y_n, z_n) - rv.fetch(x, y_n, z);
945 } else {
946 c1 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x, y_n, z_n);
948 c2 = rv.fetch(x, y_n, z_n) - rv.fetch(x, y, z_n);
949 c3 = rv.fetch(x, y, z_n) - c0;
950 }
951 let s0 = c0.mla(c1, w0);
952 let s1 = s0.mla(c2, w1);
953 s1.mla(c3, w2).split()
954 }
955}
956
957impl<const GRID_SIZE: usize> TrilinearAvxQ0_15Double<'_, GRID_SIZE> {
958 #[inline(always)]
959 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
960 &self,
961 in_r: U,
962 in_g: U,
963 in_b: U,
964 lut: &[BarycentricWeight<i16>; BINS],
965 rv: impl Fetcher<AvxVectorQ0_15>,
966 ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) {
967 let lut_r = lut[in_r.as_()];
968 let lut_g = lut[in_g.as_()];
969 let lut_b = lut[in_b.as_()];
970
971 let x: i32 = lut_r.x;
972 let y: i32 = lut_g.x;
973 let z: i32 = lut_b.x;
974
975 let x_n: i32 = lut_r.x_n;
976 let y_n: i32 = lut_g.x_n;
977 let z_n: i32 = lut_b.x_n;
978
979 let rx = lut_r.w;
980 let ry = lut_g.w;
981 let rz = lut_b.w;
982
983 const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16;
984
985 let q_max = AvxVectorQ0_15::from(Q_MAX);
986 let w0 = AvxVectorQ0_15::from(rx);
987 let w1 = AvxVectorQ0_15::from(ry);
988 let w2 = AvxVectorQ0_15::from(rz);
989 let dx = q_max - w0;
990 let dy = q_max - w1;
991 let dz = q_max - w2;
992
993 let c000 = rv.fetch(x, y, z);
994 let c100 = rv.fetch(x_n, y, z);
995 let c010 = rv.fetch(x, y_n, z);
996 let c110 = rv.fetch(x_n, y_n, z);
997 let c001 = rv.fetch(x, y, z_n);
998 let c101 = rv.fetch(x_n, y, z_n);
999 let c011 = rv.fetch(x, y_n, z_n);
1000 let c111 = rv.fetch(x_n, y_n, z_n);
1001
1002 let c00 = (c000 * dx).mla(c100, w0);
1003 let c10 = (c010 * dx).mla(c110, w0);
1004 let c01 = (c001 * dx).mla(c101, w0);
1005 let c11 = (c011 * dx).mla(c111, w0);
1006
1007 let c0 = (c00 * dy).mla(c10, w1);
1008 let c1 = (c01 * dy).mla(c11, w1);
1009
1010 (c0 * dz).mla(c1, w2).split()
1011 }
1012}
1013
1014impl<const GRID_SIZE: usize> TrilinearAvxQ0_15<'_, GRID_SIZE> {
1015 #[inline(always)]
1016 fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
1017 &self,
1018 in_r: U,
1019 in_g: U,
1020 in_b: U,
1021 lut: &[BarycentricWeight<i16>; BINS],
1022 r: impl Fetcher<AvxVectorQ0_15Sse>,
1023 ) -> AvxVectorQ0_15Sse {
1024 let lut_r = lut[in_r.as_()];
1025 let lut_g = lut[in_g.as_()];
1026 let lut_b = lut[in_b.as_()];
1027
1028 let x: i32 = lut_r.x;
1029 let y: i32 = lut_g.x;
1030 let z: i32 = lut_b.x;
1031
1032 let x_n: i32 = lut_r.x_n;
1033 let y_n: i32 = lut_g.x_n;
1034 let z_n: i32 = lut_b.x_n;
1035
1036 let dr = lut_r.w;
1037 let dg = lut_g.w;
1038 let db = lut_b.w;
1039
1040 const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16;
1041
1042 let q_max = AvxVectorQ0_15Sse::from(Q_MAX);
1043 let q_max_avx = AvxVectorQ0_15::from(Q_MAX);
1044 let w0 = AvxVectorQ0_15::from(dr);
1045 let w1 = AvxVectorQ0_15::from(dg);
1046 let w2 = AvxVectorQ0_15Sse::from(db);
1047 let dx = q_max_avx - w0;
1048 let dy = q_max_avx - w1;
1049 let dz = q_max - w2;
1050
1051 let c000 = r.fetch(x, y, z);
1052 let c100 = r.fetch(x_n, y, z);
1053 let c010 = r.fetch(x, y_n, z);
1054 let c110 = r.fetch(x_n, y_n, z);
1055 let c001 = r.fetch(x, y, z_n);
1056 let c101 = r.fetch(x_n, y, z_n);
1057 let c011 = r.fetch(x, y_n, z_n);
1058 let c111 = r.fetch(x_n, y_n, z_n);
1059
1060 let x000 = AvxVectorQ0_15::from_sse(c000, c001);
1061 let x010 = AvxVectorQ0_15::from_sse(c010, c011);
1062 let x011 = AvxVectorQ0_15::from_sse(c100, c101);
1063 let x111 = AvxVectorQ0_15::from_sse(c110, c111);
1064
1065 let c00 = (x000 * dx).mla(x011, w0);
1066 let c10 = (x010 * dx).mla(x111, w0);
1067
1068 let c0 = (c00 * dy).mla(c10, w1);
1069
1070 let (c0, c1) = c0.split();
1071
1072 (c0 * dz).mla(c1, w2)
1073 }
1074}