moxcms/conversions/
interpolator.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#![allow(dead_code)]
30use crate::conversions::lut_transforms::LUT_SAMPLING;
31use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
32use crate::{Vector3f, Vector4f};
33use std::ops::{Add, Mul, Sub};
34
35#[cfg(feature = "options")]
36pub(crate) struct Tetrahedral<const GRID_SIZE: usize> {}
37
38#[cfg(feature = "options")]
39pub(crate) struct Pyramidal<const GRID_SIZE: usize> {}
40
41#[cfg(feature = "options")]
42pub(crate) struct Prismatic<const GRID_SIZE: usize> {}
43
44pub(crate) struct Trilinear<const GRID_SIZE: usize> {}
45
46#[derive(Debug, Copy, Clone, Default)]
47pub(crate) struct BarycentricWeight<V> {
48    pub x: i32,
49    pub x_n: i32,
50    pub w: V,
51}
52
53impl BarycentricWeight<f32> {
54    pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<f32>; 256]>
55    {
56        let mut weights = Box::new([BarycentricWeight::default(); 256]);
57        for (index, weight) in weights.iter_mut().enumerate() {
58            const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
59            let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
60
61            let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
62
63            let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
64
65            let dr = index as f32 * scale - x as f32;
66            *weight = BarycentricWeight { x, x_n, w: dr };
67        }
68        weights
69    }
70
71    #[cfg(feature = "options")]
72    pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
73    -> Box<[BarycentricWeight<f32>; 65536]> {
74        let mut weights = Box::new([BarycentricWeight::<f32>::default(); 65536]);
75        let b_scale: f32 = 1.0 / (BINS - 1) as f32;
76        for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
77            let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
78
79            let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
80
81            let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
82
83            let dr = index as f32 * scale - x as f32;
84            *weight = BarycentricWeight { x, x_n, w: dr };
85        }
86        weights
87    }
88}
89
90#[allow(dead_code)]
91impl BarycentricWeight<i16> {
92    pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<i16>; 256]>
93    {
94        let mut weights = Box::new([BarycentricWeight::default(); 256]);
95        for (index, weight) in weights.iter_mut().enumerate() {
96            const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
97            let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
98
99            let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
100
101            let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
102
103            const Q: f32 = ((1i32 << 15) - 1) as f32;
104
105            let dr = ((index as f32 * scale - x as f32) * Q)
106                .round()
107                .min(i16::MAX as f32)
108                .max(-i16::MAX as f32) as i16;
109            *weight = BarycentricWeight { x, x_n, w: dr };
110        }
111        weights
112    }
113
114    #[cfg(feature = "options")]
115    pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
116    -> Box<[BarycentricWeight<i16>; 65536]> {
117        let mut weights = Box::new([BarycentricWeight::<i16>::default(); 65536]);
118        let b_scale: f32 = 1.0 / (BINS - 1) as f32;
119        for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
120            let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
121
122            let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
123
124            let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
125
126            const Q: f32 = ((1i32 << 15) - 1) as f32;
127
128            let dr = ((index as f32 * scale - x as f32) * Q)
129                .round()
130                .min(i16::MAX as f32)
131                .max(-i16::MAX as f32) as i16;
132            *weight = BarycentricWeight { x, x_n, w: dr };
133        }
134        weights
135    }
136}
137
138trait Fetcher<T> {
139    fn fetch(&self, x: i32, y: i32, z: i32) -> T;
140}
141
142struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> {
143    cube: &'a [f32],
144}
145
146pub(crate) trait MultidimensionalInterpolation {
147    fn inter3(
148        &self,
149        cube: &[f32],
150        lut_r: &BarycentricWeight<f32>,
151        lut_g: &BarycentricWeight<f32>,
152        lut_b: &BarycentricWeight<f32>,
153    ) -> Vector3f;
154    fn inter4(
155        &self,
156        cube: &[f32],
157        lut_r: &BarycentricWeight<f32>,
158        lut_g: &BarycentricWeight<f32>,
159        lut_b: &BarycentricWeight<f32>,
160    ) -> Vector4f;
161}
162
163impl<const GRID_SIZE: usize> Fetcher<Vector3f> for TetrahedralFetchVector3f<'_, GRID_SIZE> {
164    #[inline(always)]
165    fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
166        let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
167            + y as u32 * GRID_SIZE as u32
168            + z as u32) as usize
169            * 3;
170        let jx = &self.cube[offset..offset + 3];
171        Vector3f {
172            v: [jx[0], jx[1], jx[2]],
173        }
174    }
175}
176
177struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> {
178    cube: &'a [f32],
179}
180
181impl<const GRID_SIZE: usize> Fetcher<Vector4f> for TetrahedralFetchVector4f<'_, GRID_SIZE> {
182    #[inline(always)]
183    fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
184        let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
185            + y as u32 * GRID_SIZE as u32
186            + z as u32) as usize
187            * 4;
188        let jx = &self.cube[offset..offset + 4];
189        Vector4f {
190            v: [jx[0], jx[1], jx[2], jx[3]],
191        }
192    }
193}
194
195#[cfg(feature = "options")]
196impl<const GRID_SIZE: usize> Tetrahedral<GRID_SIZE> {
197    #[inline]
198    fn interpolate<
199        T: Copy
200            + Sub<T, Output = T>
201            + Mul<T, Output = T>
202            + Mul<f32, Output = T>
203            + Add<T, Output = T>
204            + From<f32>
205            + FusedMultiplyAdd<T>,
206    >(
207        &self,
208        lut_r: &BarycentricWeight<f32>,
209        lut_g: &BarycentricWeight<f32>,
210        lut_b: &BarycentricWeight<f32>,
211        r: impl Fetcher<T>,
212    ) -> T {
213        let x: i32 = lut_r.x;
214        let y: i32 = lut_g.x;
215        let z: i32 = lut_b.x;
216
217        let x_n: i32 = lut_r.x_n;
218        let y_n: i32 = lut_g.x_n;
219        let z_n: i32 = lut_b.x_n;
220
221        let rx = lut_r.w;
222        let ry = lut_g.w;
223        let rz = lut_b.w;
224
225        let c0 = r.fetch(x, y, z);
226        let c2;
227        let c1;
228        let c3;
229        if rx >= ry {
230            if ry >= rz {
231                //rx >= ry && ry >= rz
232                c1 = r.fetch(x_n, y, z) - c0;
233                c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
234                c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
235            } else if rx >= rz {
236                //rx >= rz && rz >= ry
237                c1 = r.fetch(x_n, y, z) - c0;
238                c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
239                c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
240            } else {
241                //rz > rx && rx >= ry
242                c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
243                c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
244                c3 = r.fetch(x, y, z_n) - c0;
245            }
246        } else if rx >= rz {
247            //ry > rx && rx >= rz
248            c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
249            c2 = r.fetch(x, y_n, z) - c0;
250            c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
251        } else if ry >= rz {
252            //ry >= rz && rz > rx
253            c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
254            c2 = r.fetch(x, y_n, z) - c0;
255            c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
256        } else {
257            //rz > ry && ry > rx
258            c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
259            c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
260            c3 = r.fetch(x, y, z_n) - c0;
261        }
262        let s0 = c0.mla(c1, T::from(rx));
263        let s1 = s0.mla(c2, T::from(ry));
264        s1.mla(c3, T::from(rz))
265    }
266}
267
268macro_rules! define_md_inter {
269    ($interpolator: ident) => {
270        impl<const GRID_SIZE: usize> MultidimensionalInterpolation for $interpolator<GRID_SIZE> {
271            fn inter3(
272                &self,
273                cube: &[f32],
274                lut_r: &BarycentricWeight<f32>,
275                lut_g: &BarycentricWeight<f32>,
276                lut_b: &BarycentricWeight<f32>,
277            ) -> Vector3f {
278                self.interpolate::<Vector3f>(
279                    lut_r,
280                    lut_g,
281                    lut_b,
282                    TetrahedralFetchVector3f::<GRID_SIZE> { cube },
283                )
284            }
285
286            fn inter4(
287                &self,
288                cube: &[f32],
289                lut_r: &BarycentricWeight<f32>,
290                lut_g: &BarycentricWeight<f32>,
291                lut_b: &BarycentricWeight<f32>,
292            ) -> Vector4f {
293                self.interpolate::<Vector4f>(
294                    lut_r,
295                    lut_g,
296                    lut_b,
297                    TetrahedralFetchVector4f::<GRID_SIZE> { cube },
298                )
299            }
300        }
301    };
302}
303
304#[cfg(feature = "options")]
305define_md_inter!(Tetrahedral);
306#[cfg(feature = "options")]
307define_md_inter!(Pyramidal);
308#[cfg(feature = "options")]
309define_md_inter!(Prismatic);
310define_md_inter!(Trilinear);
311
312#[cfg(feature = "options")]
313impl<const GRID_SIZE: usize> Pyramidal<GRID_SIZE> {
314    #[inline]
315    fn interpolate<
316        T: Copy
317            + Sub<T, Output = T>
318            + Mul<T, Output = T>
319            + Mul<f32, Output = T>
320            + Add<T, Output = T>
321            + From<f32>
322            + FusedMultiplyAdd<T>,
323    >(
324        &self,
325        lut_r: &BarycentricWeight<f32>,
326        lut_g: &BarycentricWeight<f32>,
327        lut_b: &BarycentricWeight<f32>,
328        r: impl Fetcher<T>,
329    ) -> T {
330        let x: i32 = lut_r.x;
331        let y: i32 = lut_g.x;
332        let z: i32 = lut_b.x;
333
334        let x_n: i32 = lut_r.x_n;
335        let y_n: i32 = lut_g.x_n;
336        let z_n: i32 = lut_b.x_n;
337
338        let dr = lut_r.w;
339        let dg = lut_g.w;
340        let db = lut_b.w;
341
342        let c0 = r.fetch(x, y, z);
343
344        if dr > db && dg > db {
345            let x0 = r.fetch(x_n, y_n, z_n);
346            let x1 = r.fetch(x_n, y_n, z);
347            let x2 = r.fetch(x_n, y, z);
348            let x3 = r.fetch(x, y_n, z);
349
350            let c1 = x0 - x1;
351            let c2 = x2 - c0;
352            let c3 = x3 - c0;
353            let c4 = c0 - x3 - x2 + x1;
354
355            let s0 = c0.mla(c1, T::from(db));
356            let s1 = s0.mla(c2, T::from(dr));
357            let s2 = s1.mla(c3, T::from(dg));
358            s2.mla(c4, T::from(dr * dg))
359        } else if db > dr && dg > dr {
360            let x0 = r.fetch(x, y, z_n);
361            let x1 = r.fetch(x_n, y_n, z_n);
362            let x2 = r.fetch(x, y_n, z_n);
363            let x3 = r.fetch(x, y_n, z);
364
365            let c1 = x0 - c0;
366            let c2 = x1 - x2;
367            let c3 = x3 - c0;
368            let c4 = c0 - x3 - x0 + x2;
369
370            let s0 = c0.mla(c1, T::from(db));
371            let s1 = s0.mla(c2, T::from(dr));
372            let s2 = s1.mla(c3, T::from(dg));
373            s2.mla(c4, T::from(dg * db))
374        } else {
375            let x0 = r.fetch(x, y, z_n);
376            let x1 = r.fetch(x_n, y, z);
377            let x2 = r.fetch(x_n, y, z_n);
378            let x3 = r.fetch(x_n, y_n, z_n);
379
380            let c1 = x0 - c0;
381            let c2 = x1 - c0;
382            let c3 = x3 - x2;
383            let c4 = c0 - x1 - x0 + x2;
384
385            let s0 = c0.mla(c1, T::from(db));
386            let s1 = s0.mla(c2, T::from(dr));
387            let s2 = s1.mla(c3, T::from(dg));
388            s2.mla(c4, T::from(db * dr))
389        }
390    }
391}
392
393#[cfg(feature = "options")]
394impl<const GRID_SIZE: usize> Prismatic<GRID_SIZE> {
395    #[inline(always)]
396    fn interpolate<
397        T: Copy
398            + Sub<T, Output = T>
399            + Mul<T, Output = T>
400            + Mul<f32, Output = T>
401            + Add<T, Output = T>
402            + From<f32>
403            + FusedMultiplyAdd<T>,
404    >(
405        &self,
406        lut_r: &BarycentricWeight<f32>,
407        lut_g: &BarycentricWeight<f32>,
408        lut_b: &BarycentricWeight<f32>,
409        r: impl Fetcher<T>,
410    ) -> T {
411        let x: i32 = lut_r.x;
412        let y: i32 = lut_g.x;
413        let z: i32 = lut_b.x;
414
415        let x_n: i32 = lut_r.x_n;
416        let y_n: i32 = lut_g.x_n;
417        let z_n: i32 = lut_b.x_n;
418
419        let dr = lut_r.w;
420        let dg = lut_g.w;
421        let db = lut_b.w;
422
423        let c0 = r.fetch(x, y, z);
424
425        if db >= dr {
426            let x0 = r.fetch(x, y, z_n);
427            let x1 = r.fetch(x_n, y, z_n);
428            let x2 = r.fetch(x, y_n, z);
429            let x3 = r.fetch(x, y_n, z_n);
430            let x4 = r.fetch(x_n, y_n, z_n);
431
432            let c1 = x0 - c0;
433            let c2 = x1 - x0;
434            let c3 = x2 - c0;
435            let c4 = c0 - x2 - x0 + x3;
436            let c5 = x0 - x3 - x1 + x4;
437
438            let s0 = c0.mla(c1, T::from(db));
439            let s1 = s0.mla(c2, T::from(dr));
440            let s2 = s1.mla(c3, T::from(dg));
441            let s3 = s2.mla(c4, T::from(dg * db));
442            s3.mla(c5, T::from(dr * dg))
443        } else {
444            let x0 = r.fetch(x_n, y, z);
445            let x1 = r.fetch(x_n, y, z_n);
446            let x2 = r.fetch(x, y_n, z);
447            let x3 = r.fetch(x_n, y_n, z);
448            let x4 = r.fetch(x_n, y_n, z_n);
449
450            let c1 = x1 - x0;
451            let c2 = x0 - c0;
452            let c3 = x2 - c0;
453            let c4 = x0 - x3 - x1 + x4;
454            let c5 = c0 - x2 - x0 + x3;
455
456            let s0 = c0.mla(c1, T::from(db));
457            let s1 = s0.mla(c2, T::from(dr));
458            let s2 = s1.mla(c3, T::from(dg));
459            let s3 = s2.mla(c4, T::from(dg * db));
460            s3.mla(c5, T::from(dr * dg))
461        }
462    }
463}
464
465impl<const GRID_SIZE: usize> Trilinear<GRID_SIZE> {
466    #[inline(always)]
467    fn interpolate<
468        T: Copy
469            + Sub<T, Output = T>
470            + Mul<T, Output = T>
471            + Mul<f32, Output = T>
472            + Add<T, Output = T>
473            + From<f32>
474            + FusedMultiplyAdd<T>
475            + FusedMultiplyNegAdd<T>,
476    >(
477        &self,
478        lut_r: &BarycentricWeight<f32>,
479        lut_g: &BarycentricWeight<f32>,
480        lut_b: &BarycentricWeight<f32>,
481        r: impl Fetcher<T>,
482    ) -> T {
483        let x: i32 = lut_r.x;
484        let y: i32 = lut_g.x;
485        let z: i32 = lut_b.x;
486
487        let x_n: i32 = lut_r.x_n;
488        let y_n: i32 = lut_g.x_n;
489        let z_n: i32 = lut_b.x_n;
490
491        let dr = lut_r.w;
492        let dg = lut_g.w;
493        let db = lut_b.w;
494
495        let w0 = T::from(dr);
496        let w1 = T::from(dg);
497        let w2 = T::from(db);
498
499        let c000 = r.fetch(x, y, z);
500        let c100 = r.fetch(x_n, y, z);
501        let c010 = r.fetch(x, y_n, z);
502        let c110 = r.fetch(x_n, y_n, z);
503        let c001 = r.fetch(x, y, z_n);
504        let c101 = r.fetch(x_n, y, z_n);
505        let c011 = r.fetch(x, y_n, z_n);
506        let c111 = r.fetch(x_n, y_n, z_n);
507
508        let dx = T::from(dr);
509
510        let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
511        let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
512        let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
513        let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
514
515        let dy = T::from(dg);
516
517        let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
518        let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
519
520        let dz = T::from(db);
521
522        c0.neg_mla(c0, dz).mla(c1, w2)
523    }
524}
525
526pub(crate) trait LutBarycentricReduction<T, U> {
527    fn reduce<const SRC_BP: usize, const BINS: usize>(v: T) -> U;
528}
529
530impl LutBarycentricReduction<u8, u8> for () {
531    #[inline(always)]
532    fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u8 {
533        v
534    }
535}
536
537impl LutBarycentricReduction<u8, u16> for () {
538    #[inline(always)]
539    fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u16 {
540        if BINS == 65536 {
541            return u16::from_ne_bytes([v, v]);
542        }
543        if BINS == 16384 {
544            return u16::from_ne_bytes([v, v]) >> 2;
545        }
546        unimplemented!()
547    }
548}
549
550impl LutBarycentricReduction<f32, u8> for () {
551    #[inline(always)]
552    fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u8 {
553        (v * 255.).round().min(255.).max(0.) as u8
554    }
555}
556
557impl LutBarycentricReduction<f32, u16> for () {
558    #[inline(always)]
559    fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u16 {
560        let scale = (BINS - 1) as f32;
561        (v * scale).round().min(scale).max(0.) as u16
562    }
563}
564
565impl LutBarycentricReduction<f64, u8> for () {
566    #[inline(always)]
567    fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u8 {
568        (v * 255.).round().min(255.).max(0.) as u8
569    }
570}
571
572impl LutBarycentricReduction<f64, u16> for () {
573    #[inline(always)]
574    fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u16 {
575        let scale = (BINS - 1) as f64;
576        (v * scale).round().min(scale).max(0.) as u16
577    }
578}
579
580impl LutBarycentricReduction<u16, u16> for () {
581    #[inline(always)]
582    fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u16 {
583        let src_scale = 1. / ((1 << SRC_BP) - 1) as f32;
584        let scale = src_scale * (BINS - 1) as f32;
585        (v as f32 * scale).round().min(scale).max(0.) as u16
586    }
587}
588
589impl LutBarycentricReduction<u16, u8> for () {
590    #[inline(always)]
591    fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u8 {
592        let shift = SRC_BP as u16 - 8;
593        if SRC_BP == 16 {
594            (v >> 8) as u8
595        } else {
596            (v >> shift).min(255) as u8
597        }
598    }
599}