1#![allow(dead_code)]
30use crate::conversions::lut_transforms::LUT_SAMPLING;
31use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
32use crate::{Vector3f, Vector4f};
33use num_traits::AsPrimitive;
34use std::ops::{Add, Mul, Sub};
35
36#[cfg(feature = "options")]
37pub(crate) struct Tetrahedral<'a, const GRID_SIZE: usize> {
38 pub(crate) cube: &'a [f32],
39}
40
41#[cfg(feature = "options")]
42pub(crate) struct Pyramidal<'a, const GRID_SIZE: usize> {
43 pub(crate) cube: &'a [f32],
44}
45
46#[cfg(feature = "options")]
47pub(crate) struct Prismatic<'a, const GRID_SIZE: usize> {
48 pub(crate) cube: &'a [f32],
49}
50
51pub(crate) struct Trilinear<'a, const GRID_SIZE: usize> {
52 pub(crate) cube: &'a [f32],
53}
54
55#[derive(Debug, Copy, Clone, Default)]
56pub(crate) struct BarycentricWeight<V> {
57 pub x: i32,
58 pub x_n: i32,
59 pub w: V,
60}
61
62impl BarycentricWeight<f32> {
63 pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<f32>; 256]>
64 {
65 let mut weights = Box::new([BarycentricWeight::default(); 256]);
66 for (index, weight) in weights.iter_mut().enumerate() {
67 const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
68 let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
69
70 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
71
72 let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
73
74 let dr = index as f32 * scale - x as f32;
75 *weight = BarycentricWeight { x, x_n, w: dr };
76 }
77 weights
78 }
79
80 #[cfg(feature = "options")]
81 pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
82 -> Box<[BarycentricWeight<f32>; 65536]> {
83 let mut weights = Box::new([BarycentricWeight::<f32>::default(); 65536]);
84 let b_scale: f32 = 1.0 / (BINS - 1) as f32;
85 for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
86 let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
87
88 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
89
90 let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
91
92 let dr = index as f32 * scale - x as f32;
93 *weight = BarycentricWeight { x, x_n, w: dr };
94 }
95 weights
96 }
97}
98
99#[allow(dead_code)]
100impl BarycentricWeight<i16> {
101 pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<i16>; 256]>
102 {
103 let mut weights = Box::new([BarycentricWeight::default(); 256]);
104 for (index, weight) in weights.iter_mut().enumerate() {
105 const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
106 let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
107
108 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
109
110 let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
111
112 const Q: f32 = ((1i32 << 15) - 1) as f32;
113
114 let dr = ((index as f32 * scale - x as f32) * Q)
115 .round()
116 .min(i16::MAX as f32)
117 .max(-i16::MAX as f32) as i16;
118 *weight = BarycentricWeight { x, x_n, w: dr };
119 }
120 weights
121 }
122
123 #[cfg(feature = "options")]
124 pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
125 -> Box<[BarycentricWeight<i16>; 65536]> {
126 let mut weights = Box::new([BarycentricWeight::<i16>::default(); 65536]);
127 let b_scale: f32 = 1.0 / (BINS - 1) as f32;
128 for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
129 let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
130
131 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
132
133 let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
134
135 const Q: f32 = ((1i32 << 15) - 1) as f32;
136
137 let dr = ((index as f32 * scale - x as f32) * Q)
138 .round()
139 .min(i16::MAX as f32)
140 .max(-i16::MAX as f32) as i16;
141 *weight = BarycentricWeight { x, x_n, w: dr };
142 }
143 weights
144 }
145}
146
147trait Fetcher<T> {
148 fn fetch(&self, x: i32, y: i32, z: i32) -> T;
149}
150
151struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> {
152 cube: &'a [f32],
153}
154
155pub(crate) trait MultidimensionalInterpolation<'a, const GRID_SIZE: usize> {
156 fn new(table: &'a [f32]) -> Self;
157 fn inter3<U: AsPrimitive<usize>, const BINS: usize>(
158 &self,
159 in_r: U,
160 in_g: U,
161 in_b: U,
162 lut: &[BarycentricWeight<f32>; BINS],
163 ) -> Vector3f;
164 fn inter4<U: AsPrimitive<usize>, const BINS: usize>(
165 &self,
166 in_r: U,
167 in_g: U,
168 in_b: U,
169 lut: &[BarycentricWeight<f32>; BINS],
170 ) -> Vector4f;
171}
172
173impl<const GRID_SIZE: usize> Fetcher<Vector3f> for TetrahedralFetchVector3f<'_, GRID_SIZE> {
174 #[inline(always)]
175 fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
176 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
177 + y as u32 * GRID_SIZE as u32
178 + z as u32) as usize
179 * 3;
180 let jx = &self.cube[offset..offset + 3];
181 Vector3f {
182 v: [jx[0], jx[1], jx[2]],
183 }
184 }
185}
186
187struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> {
188 cube: &'a [f32],
189}
190
191impl<const GRID_SIZE: usize> Fetcher<Vector4f> for TetrahedralFetchVector4f<'_, GRID_SIZE> {
192 #[inline(always)]
193 fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
194 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
195 + y as u32 * GRID_SIZE as u32
196 + z as u32) as usize
197 * 4;
198 let jx = &self.cube[offset..offset + 4];
199 Vector4f {
200 v: [jx[0], jx[1], jx[2], jx[3]],
201 }
202 }
203}
204
205#[cfg(feature = "options")]
206impl<const GRID_SIZE: usize> Tetrahedral<'_, GRID_SIZE> {
207 #[inline(always)]
208 fn interpolate<
209 T: Copy
210 + Sub<T, Output = T>
211 + Mul<T, Output = T>
212 + Mul<f32, Output = T>
213 + Add<T, Output = T>
214 + From<f32>
215 + FusedMultiplyAdd<T>,
216 U: AsPrimitive<usize>,
217 const BINS: usize,
218 >(
219 &self,
220 in_r: U,
221 in_g: U,
222 in_b: U,
223 lut: &[BarycentricWeight<f32>; BINS],
224 r: impl Fetcher<T>,
225 ) -> T {
226 let lut_r = lut[in_r.as_()];
227 let lut_g = lut[in_g.as_()];
228 let lut_b = lut[in_b.as_()];
229
230 let x: i32 = lut_r.x;
231 let y: i32 = lut_g.x;
232 let z: i32 = lut_b.x;
233
234 let x_n: i32 = lut_r.x_n;
235 let y_n: i32 = lut_g.x_n;
236 let z_n: i32 = lut_b.x_n;
237
238 let rx = lut_r.w;
239 let ry = lut_g.w;
240 let rz = lut_b.w;
241
242 let c0 = r.fetch(x, y, z);
243 let c2;
244 let c1;
245 let c3;
246 if rx >= ry {
247 if ry >= rz {
248 c1 = r.fetch(x_n, y, z) - c0;
250 c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
251 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
252 } else if rx >= rz {
253 c1 = r.fetch(x_n, y, z) - c0;
255 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
256 c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
257 } else {
258 c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
260 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
261 c3 = r.fetch(x, y, z_n) - c0;
262 }
263 } else if rx >= rz {
264 c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
266 c2 = r.fetch(x, y_n, z) - c0;
267 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
268 } else if ry >= rz {
269 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
271 c2 = r.fetch(x, y_n, z) - c0;
272 c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
273 } else {
274 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
276 c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
277 c3 = r.fetch(x, y, z_n) - c0;
278 }
279 let s0 = c0.mla(c1, T::from(rx));
280 let s1 = s0.mla(c2, T::from(ry));
281 s1.mla(c3, T::from(rz))
282 }
283}
284
285macro_rules! define_md_inter {
286 ($interpolator: ident) => {
287 impl<'a, const GRID_SIZE: usize> MultidimensionalInterpolation<'a, GRID_SIZE>
288 for $interpolator<'a, GRID_SIZE>
289 {
290 fn new(table: &'a [f32]) -> Self {
291 Self { cube: table }
292 }
293
294 fn inter3<U: AsPrimitive<usize>, const BINS: usize>(
295 &self,
296 in_r: U,
297 in_g: U,
298 in_b: U,
299 lut: &[BarycentricWeight<f32>; BINS],
300 ) -> Vector3f {
301 self.interpolate::<Vector3f, U, BINS>(
302 in_r,
303 in_g,
304 in_b,
305 lut,
306 TetrahedralFetchVector3f::<GRID_SIZE> { cube: self.cube },
307 )
308 }
309
310 fn inter4<U: AsPrimitive<usize>, const BINS: usize>(
311 &self,
312 in_r: U,
313 in_g: U,
314 in_b: U,
315 lut: &[BarycentricWeight<f32>; BINS],
316 ) -> Vector4f {
317 self.interpolate::<Vector4f, U, BINS>(
318 in_r,
319 in_g,
320 in_b,
321 lut,
322 TetrahedralFetchVector4f::<GRID_SIZE> { cube: self.cube },
323 )
324 }
325 }
326 };
327}
328
329#[cfg(feature = "options")]
330define_md_inter!(Tetrahedral);
331#[cfg(feature = "options")]
332define_md_inter!(Pyramidal);
333#[cfg(feature = "options")]
334define_md_inter!(Prismatic);
335define_md_inter!(Trilinear);
336
337#[cfg(feature = "options")]
338impl<const GRID_SIZE: usize> Pyramidal<'_, GRID_SIZE> {
339 #[inline(always)]
340 fn interpolate<
341 T: Copy
342 + Sub<T, Output = T>
343 + Mul<T, Output = T>
344 + Mul<f32, Output = T>
345 + Add<T, Output = T>
346 + From<f32>
347 + FusedMultiplyAdd<T>,
348 U: AsPrimitive<usize>,
349 const BINS: usize,
350 >(
351 &self,
352 in_r: U,
353 in_g: U,
354 in_b: U,
355 lut: &[BarycentricWeight<f32>; BINS],
356 r: impl Fetcher<T>,
357 ) -> T {
358 let lut_r = lut[in_r.as_()];
359 let lut_g = lut[in_g.as_()];
360 let lut_b = lut[in_b.as_()];
361
362 let x: i32 = lut_r.x;
363 let y: i32 = lut_g.x;
364 let z: i32 = lut_b.x;
365
366 let x_n: i32 = lut_r.x_n;
367 let y_n: i32 = lut_g.x_n;
368 let z_n: i32 = lut_b.x_n;
369
370 let dr = lut_r.w;
371 let dg = lut_g.w;
372 let db = lut_b.w;
373
374 let c0 = r.fetch(x, y, z);
375
376 if dr > db && dg > db {
377 let x0 = r.fetch(x_n, y_n, z_n);
378 let x1 = r.fetch(x_n, y_n, z);
379 let x2 = r.fetch(x_n, y, z);
380 let x3 = r.fetch(x, y_n, z);
381
382 let c1 = x0 - x1;
383 let c2 = x2 - c0;
384 let c3 = x3 - c0;
385 let c4 = c0 - x3 - x2 + x1;
386
387 let s0 = c0.mla(c1, T::from(db));
388 let s1 = s0.mla(c2, T::from(dr));
389 let s2 = s1.mla(c3, T::from(dg));
390 s2.mla(c4, T::from(dr * dg))
391 } else if db > dr && dg > dr {
392 let x0 = r.fetch(x, y, z_n);
393 let x1 = r.fetch(x_n, y_n, z_n);
394 let x2 = r.fetch(x, y_n, z_n);
395 let x3 = r.fetch(x, y_n, z);
396
397 let c1 = x0 - c0;
398 let c2 = x1 - x2;
399 let c3 = x3 - c0;
400 let c4 = c0 - x3 - x0 + x2;
401
402 let s0 = c0.mla(c1, T::from(db));
403 let s1 = s0.mla(c2, T::from(dr));
404 let s2 = s1.mla(c3, T::from(dg));
405 s2.mla(c4, T::from(dg * db))
406 } else {
407 let x0 = r.fetch(x, y, z_n);
408 let x1 = r.fetch(x_n, y, z);
409 let x2 = r.fetch(x_n, y, z_n);
410 let x3 = r.fetch(x_n, y_n, z_n);
411
412 let c1 = x0 - c0;
413 let c2 = x1 - c0;
414 let c3 = x3 - x2;
415 let c4 = c0 - x1 - x0 + x2;
416
417 let s0 = c0.mla(c1, T::from(db));
418 let s1 = s0.mla(c2, T::from(dr));
419 let s2 = s1.mla(c3, T::from(dg));
420 s2.mla(c4, T::from(db * dr))
421 }
422 }
423}
424
425#[cfg(feature = "options")]
426impl<const GRID_SIZE: usize> Prismatic<'_, GRID_SIZE> {
427 #[inline(always)]
428 fn interpolate<
429 T: Copy
430 + Sub<T, Output = T>
431 + Mul<T, Output = T>
432 + Mul<f32, Output = T>
433 + Add<T, Output = T>
434 + From<f32>
435 + FusedMultiplyAdd<T>,
436 U: AsPrimitive<usize>,
437 const BINS: usize,
438 >(
439 &self,
440 in_r: U,
441 in_g: U,
442 in_b: U,
443 lut: &[BarycentricWeight<f32>; BINS],
444 r: impl Fetcher<T>,
445 ) -> T {
446 let lut_r = lut[in_r.as_()];
447 let lut_g = lut[in_g.as_()];
448 let lut_b = lut[in_b.as_()];
449
450 let x: i32 = lut_r.x;
451 let y: i32 = lut_g.x;
452 let z: i32 = lut_b.x;
453
454 let x_n: i32 = lut_r.x_n;
455 let y_n: i32 = lut_g.x_n;
456 let z_n: i32 = lut_b.x_n;
457
458 let dr = lut_r.w;
459 let dg = lut_g.w;
460 let db = lut_b.w;
461
462 let c0 = r.fetch(x, y, z);
463
464 if db >= dr {
465 let x0 = r.fetch(x, y, z_n);
466 let x1 = r.fetch(x_n, y, z_n);
467 let x2 = r.fetch(x, y_n, z);
468 let x3 = r.fetch(x, y_n, z_n);
469 let x4 = r.fetch(x_n, y_n, z_n);
470
471 let c1 = x0 - c0;
472 let c2 = x1 - x0;
473 let c3 = x2 - c0;
474 let c4 = c0 - x2 - x0 + x3;
475 let c5 = x0 - x3 - x1 + x4;
476
477 let s0 = c0.mla(c1, T::from(db));
478 let s1 = s0.mla(c2, T::from(dr));
479 let s2 = s1.mla(c3, T::from(dg));
480 let s3 = s2.mla(c4, T::from(dg * db));
481 s3.mla(c5, T::from(dr * dg))
482 } else {
483 let x0 = r.fetch(x_n, y, z);
484 let x1 = r.fetch(x_n, y, z_n);
485 let x2 = r.fetch(x, y_n, z);
486 let x3 = r.fetch(x_n, y_n, z);
487 let x4 = r.fetch(x_n, y_n, z_n);
488
489 let c1 = x1 - x0;
490 let c2 = x0 - c0;
491 let c3 = x2 - c0;
492 let c4 = x0 - x3 - x1 + x4;
493 let c5 = c0 - x2 - x0 + x3;
494
495 let s0 = c0.mla(c1, T::from(db));
496 let s1 = s0.mla(c2, T::from(dr));
497 let s2 = s1.mla(c3, T::from(dg));
498 let s3 = s2.mla(c4, T::from(dg * db));
499 s3.mla(c5, T::from(dr * dg))
500 }
501 }
502}
503
504impl<const GRID_SIZE: usize> Trilinear<'_, GRID_SIZE> {
505 #[inline(always)]
506 fn interpolate<
507 T: Copy
508 + Sub<T, Output = T>
509 + Mul<T, Output = T>
510 + Mul<f32, Output = T>
511 + Add<T, Output = T>
512 + From<f32>
513 + FusedMultiplyAdd<T>
514 + FusedMultiplyNegAdd<T>,
515 U: AsPrimitive<usize>,
516 const BINS: usize,
517 >(
518 &self,
519 in_r: U,
520 in_g: U,
521 in_b: U,
522 lut: &[BarycentricWeight<f32>; BINS],
523 r: impl Fetcher<T>,
524 ) -> T {
525 let lut_r = lut[in_r.as_()];
526 let lut_g = lut[in_g.as_()];
527 let lut_b = lut[in_b.as_()];
528
529 let x: i32 = lut_r.x;
530 let y: i32 = lut_g.x;
531 let z: i32 = lut_b.x;
532
533 let x_n: i32 = lut_r.x_n;
534 let y_n: i32 = lut_g.x_n;
535 let z_n: i32 = lut_b.x_n;
536
537 let dr = lut_r.w;
538 let dg = lut_g.w;
539 let db = lut_b.w;
540
541 let w0 = T::from(dr);
542 let w1 = T::from(dg);
543 let w2 = T::from(db);
544
545 let c000 = r.fetch(x, y, z);
546 let c100 = r.fetch(x_n, y, z);
547 let c010 = r.fetch(x, y_n, z);
548 let c110 = r.fetch(x_n, y_n, z);
549 let c001 = r.fetch(x, y, z_n);
550 let c101 = r.fetch(x_n, y, z_n);
551 let c011 = r.fetch(x, y_n, z_n);
552 let c111 = r.fetch(x_n, y_n, z_n);
553
554 let dx = T::from(dr);
555
556 let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
557 let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
558 let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
559 let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
560
561 let dy = T::from(dg);
562
563 let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
564 let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
565
566 let dz = T::from(db);
567
568 c0.neg_mla(c0, dz).mla(c1, w2)
569 }
570}
571
572pub(crate) trait LutBarycentricReduction<T, U> {
573 fn reduce<const SRC_BP: usize, const BINS: usize>(v: T) -> U;
574}
575
576impl LutBarycentricReduction<u8, u8> for () {
577 #[inline(always)]
578 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u8 {
579 v
580 }
581}
582
583impl LutBarycentricReduction<u8, u16> for () {
584 #[inline(always)]
585 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u16 {
586 if BINS == 65536 {
587 return u16::from_ne_bytes([v, v]);
588 }
589 if BINS == 16384 {
590 return u16::from_ne_bytes([v, v]) >> 2;
591 }
592 unimplemented!()
593 }
594}
595
596impl LutBarycentricReduction<f32, u8> for () {
597 #[inline(always)]
598 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u8 {
599 (v * 255.).round().min(255.).max(0.) as u8
600 }
601}
602
603impl LutBarycentricReduction<f32, u16> for () {
604 #[inline(always)]
605 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u16 {
606 let scale = (BINS - 1) as f32;
607 (v * scale).round().min(scale).max(0.) as u16
608 }
609}
610
611impl LutBarycentricReduction<f64, u8> for () {
612 #[inline(always)]
613 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u8 {
614 (v * 255.).round().min(255.).max(0.) as u8
615 }
616}
617
618impl LutBarycentricReduction<f64, u16> for () {
619 #[inline(always)]
620 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u16 {
621 let scale = (BINS - 1) as f64;
622 (v * scale).round().min(scale).max(0.) as u16
623 }
624}
625
626impl LutBarycentricReduction<u16, u16> for () {
627 #[inline(always)]
628 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u16 {
629 let src_scale = 1. / ((1 << SRC_BP) - 1) as f32;
630 let scale = src_scale * (BINS - 1) as f32;
631 (v as f32 * scale).round().min(scale).max(0.) as u16
632 }
633}
634
635impl LutBarycentricReduction<u16, u8> for () {
636 #[inline(always)]
637 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u8 {
638 let shift = SRC_BP as u16 - 8;
639 if SRC_BP == 16 {
640 (v >> 8) as u8
641 } else {
642 (v >> shift).min(255) as u8
643 }
644 }
645}