1#![allow(dead_code)]
30use crate::conversions::lut_transforms::LUT_SAMPLING;
31use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
32use crate::{Vector3f, Vector4f};
33use std::ops::{Add, Mul, Sub};
34
35#[cfg(feature = "options")]
36pub(crate) struct Tetrahedral<const GRID_SIZE: usize> {}
37
38#[cfg(feature = "options")]
39pub(crate) struct Pyramidal<const GRID_SIZE: usize> {}
40
41#[cfg(feature = "options")]
42pub(crate) struct Prismatic<const GRID_SIZE: usize> {}
43
44pub(crate) struct Trilinear<const GRID_SIZE: usize> {}
45
46#[derive(Debug, Copy, Clone, Default)]
47pub(crate) struct BarycentricWeight<V> {
48 pub x: i32,
49 pub x_n: i32,
50 pub w: V,
51}
52
53impl BarycentricWeight<f32> {
54 pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<f32>; 256]>
55 {
56 let mut weights = Box::new([BarycentricWeight::default(); 256]);
57 for (index, weight) in weights.iter_mut().enumerate() {
58 const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
59 let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
60
61 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
62
63 let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
64
65 let dr = index as f32 * scale - x as f32;
66 *weight = BarycentricWeight { x, x_n, w: dr };
67 }
68 weights
69 }
70
71 #[cfg(feature = "options")]
72 pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
73 -> Box<[BarycentricWeight<f32>; 65536]> {
74 let mut weights = Box::new([BarycentricWeight::<f32>::default(); 65536]);
75 let b_scale: f32 = 1.0 / (BINS - 1) as f32;
76 for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
77 let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
78
79 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
80
81 let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
82
83 let dr = index as f32 * scale - x as f32;
84 *weight = BarycentricWeight { x, x_n, w: dr };
85 }
86 weights
87 }
88}
89
90#[allow(dead_code)]
91impl BarycentricWeight<i16> {
92 pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<i16>; 256]>
93 {
94 let mut weights = Box::new([BarycentricWeight::default(); 256]);
95 for (index, weight) in weights.iter_mut().enumerate() {
96 const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
97 let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
98
99 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
100
101 let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
102
103 const Q: f32 = ((1i32 << 15) - 1) as f32;
104
105 let dr = ((index as f32 * scale - x as f32) * Q)
106 .round()
107 .min(i16::MAX as f32)
108 .max(-i16::MAX as f32) as i16;
109 *weight = BarycentricWeight { x, x_n, w: dr };
110 }
111 weights
112 }
113
114 #[cfg(feature = "options")]
115 pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
116 -> Box<[BarycentricWeight<i16>; 65536]> {
117 let mut weights = Box::new([BarycentricWeight::<i16>::default(); 65536]);
118 let b_scale: f32 = 1.0 / (BINS - 1) as f32;
119 for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
120 let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
121
122 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
123
124 let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
125
126 const Q: f32 = ((1i32 << 15) - 1) as f32;
127
128 let dr = ((index as f32 * scale - x as f32) * Q)
129 .round()
130 .min(i16::MAX as f32)
131 .max(-i16::MAX as f32) as i16;
132 *weight = BarycentricWeight { x, x_n, w: dr };
133 }
134 weights
135 }
136}
137
138trait Fetcher<T> {
139 fn fetch(&self, x: i32, y: i32, z: i32) -> T;
140}
141
142struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> {
143 cube: &'a [f32],
144}
145
146pub(crate) trait MultidimensionalInterpolation {
147 fn inter3(
148 &self,
149 cube: &[f32],
150 lut_r: &BarycentricWeight<f32>,
151 lut_g: &BarycentricWeight<f32>,
152 lut_b: &BarycentricWeight<f32>,
153 ) -> Vector3f;
154 fn inter4(
155 &self,
156 cube: &[f32],
157 lut_r: &BarycentricWeight<f32>,
158 lut_g: &BarycentricWeight<f32>,
159 lut_b: &BarycentricWeight<f32>,
160 ) -> Vector4f;
161}
162
163impl<const GRID_SIZE: usize> Fetcher<Vector3f> for TetrahedralFetchVector3f<'_, GRID_SIZE> {
164 #[inline(always)]
165 fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
166 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
167 + y as u32 * GRID_SIZE as u32
168 + z as u32) as usize
169 * 3;
170 let jx = &self.cube[offset..offset + 3];
171 Vector3f {
172 v: [jx[0], jx[1], jx[2]],
173 }
174 }
175}
176
177struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> {
178 cube: &'a [f32],
179}
180
181impl<const GRID_SIZE: usize> Fetcher<Vector4f> for TetrahedralFetchVector4f<'_, GRID_SIZE> {
182 #[inline(always)]
183 fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
184 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
185 + y as u32 * GRID_SIZE as u32
186 + z as u32) as usize
187 * 4;
188 let jx = &self.cube[offset..offset + 4];
189 Vector4f {
190 v: [jx[0], jx[1], jx[2], jx[3]],
191 }
192 }
193}
194
195#[cfg(feature = "options")]
196impl<const GRID_SIZE: usize> Tetrahedral<GRID_SIZE> {
197 #[inline]
198 fn interpolate<
199 T: Copy
200 + Sub<T, Output = T>
201 + Mul<T, Output = T>
202 + Mul<f32, Output = T>
203 + Add<T, Output = T>
204 + From<f32>
205 + FusedMultiplyAdd<T>,
206 >(
207 &self,
208 lut_r: &BarycentricWeight<f32>,
209 lut_g: &BarycentricWeight<f32>,
210 lut_b: &BarycentricWeight<f32>,
211 r: impl Fetcher<T>,
212 ) -> T {
213 let x: i32 = lut_r.x;
214 let y: i32 = lut_g.x;
215 let z: i32 = lut_b.x;
216
217 let x_n: i32 = lut_r.x_n;
218 let y_n: i32 = lut_g.x_n;
219 let z_n: i32 = lut_b.x_n;
220
221 let rx = lut_r.w;
222 let ry = lut_g.w;
223 let rz = lut_b.w;
224
225 let c0 = r.fetch(x, y, z);
226 let c2;
227 let c1;
228 let c3;
229 if rx >= ry {
230 if ry >= rz {
231 c1 = r.fetch(x_n, y, z) - c0;
233 c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
234 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
235 } else if rx >= rz {
236 c1 = r.fetch(x_n, y, z) - c0;
238 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
239 c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
240 } else {
241 c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
243 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
244 c3 = r.fetch(x, y, z_n) - c0;
245 }
246 } else if rx >= rz {
247 c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
249 c2 = r.fetch(x, y_n, z) - c0;
250 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
251 } else if ry >= rz {
252 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
254 c2 = r.fetch(x, y_n, z) - c0;
255 c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
256 } else {
257 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
259 c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
260 c3 = r.fetch(x, y, z_n) - c0;
261 }
262 let s0 = c0.mla(c1, T::from(rx));
263 let s1 = s0.mla(c2, T::from(ry));
264 s1.mla(c3, T::from(rz))
265 }
266}
267
268macro_rules! define_md_inter {
269 ($interpolator: ident) => {
270 impl<const GRID_SIZE: usize> MultidimensionalInterpolation for $interpolator<GRID_SIZE> {
271 fn inter3(
272 &self,
273 cube: &[f32],
274 lut_r: &BarycentricWeight<f32>,
275 lut_g: &BarycentricWeight<f32>,
276 lut_b: &BarycentricWeight<f32>,
277 ) -> Vector3f {
278 self.interpolate::<Vector3f>(
279 lut_r,
280 lut_g,
281 lut_b,
282 TetrahedralFetchVector3f::<GRID_SIZE> { cube },
283 )
284 }
285
286 fn inter4(
287 &self,
288 cube: &[f32],
289 lut_r: &BarycentricWeight<f32>,
290 lut_g: &BarycentricWeight<f32>,
291 lut_b: &BarycentricWeight<f32>,
292 ) -> Vector4f {
293 self.interpolate::<Vector4f>(
294 lut_r,
295 lut_g,
296 lut_b,
297 TetrahedralFetchVector4f::<GRID_SIZE> { cube },
298 )
299 }
300 }
301 };
302}
303
304#[cfg(feature = "options")]
305define_md_inter!(Tetrahedral);
306#[cfg(feature = "options")]
307define_md_inter!(Pyramidal);
308#[cfg(feature = "options")]
309define_md_inter!(Prismatic);
310define_md_inter!(Trilinear);
311
312#[cfg(feature = "options")]
313impl<const GRID_SIZE: usize> Pyramidal<GRID_SIZE> {
314 #[inline]
315 fn interpolate<
316 T: Copy
317 + Sub<T, Output = T>
318 + Mul<T, Output = T>
319 + Mul<f32, Output = T>
320 + Add<T, Output = T>
321 + From<f32>
322 + FusedMultiplyAdd<T>,
323 >(
324 &self,
325 lut_r: &BarycentricWeight<f32>,
326 lut_g: &BarycentricWeight<f32>,
327 lut_b: &BarycentricWeight<f32>,
328 r: impl Fetcher<T>,
329 ) -> T {
330 let x: i32 = lut_r.x;
331 let y: i32 = lut_g.x;
332 let z: i32 = lut_b.x;
333
334 let x_n: i32 = lut_r.x_n;
335 let y_n: i32 = lut_g.x_n;
336 let z_n: i32 = lut_b.x_n;
337
338 let dr = lut_r.w;
339 let dg = lut_g.w;
340 let db = lut_b.w;
341
342 let c0 = r.fetch(x, y, z);
343
344 if dr > db && dg > db {
345 let x0 = r.fetch(x_n, y_n, z_n);
346 let x1 = r.fetch(x_n, y_n, z);
347 let x2 = r.fetch(x_n, y, z);
348 let x3 = r.fetch(x, y_n, z);
349
350 let c1 = x0 - x1;
351 let c2 = x2 - c0;
352 let c3 = x3 - c0;
353 let c4 = c0 - x3 - x2 + x1;
354
355 let s0 = c0.mla(c1, T::from(db));
356 let s1 = s0.mla(c2, T::from(dr));
357 let s2 = s1.mla(c3, T::from(dg));
358 s2.mla(c4, T::from(dr * dg))
359 } else if db > dr && dg > dr {
360 let x0 = r.fetch(x, y, z_n);
361 let x1 = r.fetch(x_n, y_n, z_n);
362 let x2 = r.fetch(x, y_n, z_n);
363 let x3 = r.fetch(x, y_n, z);
364
365 let c1 = x0 - c0;
366 let c2 = x1 - x2;
367 let c3 = x3 - c0;
368 let c4 = c0 - x3 - x0 + x2;
369
370 let s0 = c0.mla(c1, T::from(db));
371 let s1 = s0.mla(c2, T::from(dr));
372 let s2 = s1.mla(c3, T::from(dg));
373 s2.mla(c4, T::from(dg * db))
374 } else {
375 let x0 = r.fetch(x, y, z_n);
376 let x1 = r.fetch(x_n, y, z);
377 let x2 = r.fetch(x_n, y, z_n);
378 let x3 = r.fetch(x_n, y_n, z_n);
379
380 let c1 = x0 - c0;
381 let c2 = x1 - c0;
382 let c3 = x3 - x2;
383 let c4 = c0 - x1 - x0 + x2;
384
385 let s0 = c0.mla(c1, T::from(db));
386 let s1 = s0.mla(c2, T::from(dr));
387 let s2 = s1.mla(c3, T::from(dg));
388 s2.mla(c4, T::from(db * dr))
389 }
390 }
391}
392
393#[cfg(feature = "options")]
394impl<const GRID_SIZE: usize> Prismatic<GRID_SIZE> {
395 #[inline(always)]
396 fn interpolate<
397 T: Copy
398 + Sub<T, Output = T>
399 + Mul<T, Output = T>
400 + Mul<f32, Output = T>
401 + Add<T, Output = T>
402 + From<f32>
403 + FusedMultiplyAdd<T>,
404 >(
405 &self,
406 lut_r: &BarycentricWeight<f32>,
407 lut_g: &BarycentricWeight<f32>,
408 lut_b: &BarycentricWeight<f32>,
409 r: impl Fetcher<T>,
410 ) -> T {
411 let x: i32 = lut_r.x;
412 let y: i32 = lut_g.x;
413 let z: i32 = lut_b.x;
414
415 let x_n: i32 = lut_r.x_n;
416 let y_n: i32 = lut_g.x_n;
417 let z_n: i32 = lut_b.x_n;
418
419 let dr = lut_r.w;
420 let dg = lut_g.w;
421 let db = lut_b.w;
422
423 let c0 = r.fetch(x, y, z);
424
425 if db >= dr {
426 let x0 = r.fetch(x, y, z_n);
427 let x1 = r.fetch(x_n, y, z_n);
428 let x2 = r.fetch(x, y_n, z);
429 let x3 = r.fetch(x, y_n, z_n);
430 let x4 = r.fetch(x_n, y_n, z_n);
431
432 let c1 = x0 - c0;
433 let c2 = x1 - x0;
434 let c3 = x2 - c0;
435 let c4 = c0 - x2 - x0 + x3;
436 let c5 = x0 - x3 - x1 + x4;
437
438 let s0 = c0.mla(c1, T::from(db));
439 let s1 = s0.mla(c2, T::from(dr));
440 let s2 = s1.mla(c3, T::from(dg));
441 let s3 = s2.mla(c4, T::from(dg * db));
442 s3.mla(c5, T::from(dr * dg))
443 } else {
444 let x0 = r.fetch(x_n, y, z);
445 let x1 = r.fetch(x_n, y, z_n);
446 let x2 = r.fetch(x, y_n, z);
447 let x3 = r.fetch(x_n, y_n, z);
448 let x4 = r.fetch(x_n, y_n, z_n);
449
450 let c1 = x1 - x0;
451 let c2 = x0 - c0;
452 let c3 = x2 - c0;
453 let c4 = x0 - x3 - x1 + x4;
454 let c5 = c0 - x2 - x0 + x3;
455
456 let s0 = c0.mla(c1, T::from(db));
457 let s1 = s0.mla(c2, T::from(dr));
458 let s2 = s1.mla(c3, T::from(dg));
459 let s3 = s2.mla(c4, T::from(dg * db));
460 s3.mla(c5, T::from(dr * dg))
461 }
462 }
463}
464
465impl<const GRID_SIZE: usize> Trilinear<GRID_SIZE> {
466 #[inline(always)]
467 fn interpolate<
468 T: Copy
469 + Sub<T, Output = T>
470 + Mul<T, Output = T>
471 + Mul<f32, Output = T>
472 + Add<T, Output = T>
473 + From<f32>
474 + FusedMultiplyAdd<T>
475 + FusedMultiplyNegAdd<T>,
476 >(
477 &self,
478 lut_r: &BarycentricWeight<f32>,
479 lut_g: &BarycentricWeight<f32>,
480 lut_b: &BarycentricWeight<f32>,
481 r: impl Fetcher<T>,
482 ) -> T {
483 let x: i32 = lut_r.x;
484 let y: i32 = lut_g.x;
485 let z: i32 = lut_b.x;
486
487 let x_n: i32 = lut_r.x_n;
488 let y_n: i32 = lut_g.x_n;
489 let z_n: i32 = lut_b.x_n;
490
491 let dr = lut_r.w;
492 let dg = lut_g.w;
493 let db = lut_b.w;
494
495 let w0 = T::from(dr);
496 let w1 = T::from(dg);
497 let w2 = T::from(db);
498
499 let c000 = r.fetch(x, y, z);
500 let c100 = r.fetch(x_n, y, z);
501 let c010 = r.fetch(x, y_n, z);
502 let c110 = r.fetch(x_n, y_n, z);
503 let c001 = r.fetch(x, y, z_n);
504 let c101 = r.fetch(x_n, y, z_n);
505 let c011 = r.fetch(x, y_n, z_n);
506 let c111 = r.fetch(x_n, y_n, z_n);
507
508 let dx = T::from(dr);
509
510 let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
511 let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
512 let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
513 let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
514
515 let dy = T::from(dg);
516
517 let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
518 let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
519
520 let dz = T::from(db);
521
522 c0.neg_mla(c0, dz).mla(c1, w2)
523 }
524}
525
526pub(crate) trait LutBarycentricReduction<T, U> {
527 fn reduce<const SRC_BP: usize, const BINS: usize>(v: T) -> U;
528}
529
530impl LutBarycentricReduction<u8, u8> for () {
531 #[inline(always)]
532 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u8 {
533 v
534 }
535}
536
537impl LutBarycentricReduction<u8, u16> for () {
538 #[inline(always)]
539 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u16 {
540 if BINS == 65536 {
541 return u16::from_ne_bytes([v, v]);
542 }
543 if BINS == 16384 {
544 return u16::from_ne_bytes([v, v]) >> 2;
545 }
546 unimplemented!()
547 }
548}
549
550impl LutBarycentricReduction<f32, u8> for () {
551 #[inline(always)]
552 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u8 {
553 (v * 255.).round().min(255.).max(0.) as u8
554 }
555}
556
557impl LutBarycentricReduction<f32, u16> for () {
558 #[inline(always)]
559 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u16 {
560 let scale = (BINS - 1) as f32;
561 (v * scale).round().min(scale).max(0.) as u16
562 }
563}
564
565impl LutBarycentricReduction<f64, u8> for () {
566 #[inline(always)]
567 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u8 {
568 (v * 255.).round().min(255.).max(0.) as u8
569 }
570}
571
572impl LutBarycentricReduction<f64, u16> for () {
573 #[inline(always)]
574 fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u16 {
575 let scale = (BINS - 1) as f64;
576 (v * scale).round().min(scale).max(0.) as u16
577 }
578}
579
580impl LutBarycentricReduction<u16, u16> for () {
581 #[inline(always)]
582 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u16 {
583 let src_scale = 1. / ((1 << SRC_BP) - 1) as f32;
584 let scale = src_scale * (BINS - 1) as f32;
585 (v as f32 * scale).round().min(scale).max(0.) as u16
586 }
587}
588
589impl LutBarycentricReduction<u16, u8> for () {
590 #[inline(always)]
591 fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u8 {
592 let shift = SRC_BP as u16 - 8;
593 if SRC_BP == 16 {
594 (v >> 8) as u8
595 } else {
596 (v >> shift).min(255) as u8
597 }
598 }
599}