moxcms/conversions/
lut4.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::conversions::katana::KatanaInitialStage;
30use crate::profile::LutDataType;
31use crate::safe_math::{SafeMul, SafePowi};
32use crate::trc::lut_interp_linear_float;
33use crate::{
34    CmsError, DataColorSpace, Hypercube, InterpolationMethod, MalformedSize,
35    PointeeSizeExpressible, Stage, TransformOptions, Vector3f,
36};
37use num_traits::AsPrimitive;
38use std::marker::PhantomData;
39
40#[allow(unused)]
41#[derive(Default)]
42struct Lut4x3 {
43    linearization: [Vec<f32>; 4],
44    clut: Vec<f32>,
45    grid_size: u8,
46    output: [Vec<f32>; 3],
47    interpolation_method: InterpolationMethod,
48    pcs: DataColorSpace,
49}
50
51#[allow(unused)]
52#[derive(Default)]
53struct KatanaLut4x3<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> {
54    linearization: [Vec<f32>; 4],
55    clut: Vec<f32>,
56    grid_size: u8,
57    output: [Vec<f32>; 3],
58    interpolation_method: InterpolationMethod,
59    pcs: DataColorSpace,
60    _phantom: PhantomData<T>,
61    bit_depth: usize,
62}
63
64#[allow(unused)]
65impl Lut4x3 {
66    fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
67        &self,
68        src: &[f32],
69        dst: &mut [f32],
70        fetch: Fetch,
71    ) -> Result<(), CmsError> {
72        let linearization_0 = &self.linearization[0];
73        let linearization_1 = &self.linearization[1];
74        let linearization_2 = &self.linearization[2];
75        let linearization_3 = &self.linearization[3];
76        for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
77            debug_assert!(self.grid_size as i32 >= 1);
78            let linear_x = lut_interp_linear_float(src[0], linearization_0);
79            let linear_y = lut_interp_linear_float(src[1], linearization_1);
80            let linear_z = lut_interp_linear_float(src[2], linearization_2);
81            let linear_w = lut_interp_linear_float(src[3], linearization_3);
82
83            let clut = fetch(linear_x, linear_y, linear_z, linear_w);
84
85            let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
86            let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
87            let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
88            dest[0] = pcs_x;
89            dest[1] = pcs_y;
90            dest[2] = pcs_z;
91        }
92        Ok(())
93    }
94}
95
96macro_rules! define_lut4_dispatch {
97    ($dispatcher: ident) => {
98        impl Stage for $dispatcher {
99            fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
100                let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
101
102                // If Source PCS is LAB trilinear should be used
103                if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
104                    return self
105                        .transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
106                }
107
108                match self.interpolation_method {
109                    #[cfg(feature = "options")]
110                    InterpolationMethod::Tetrahedral => {
111                        self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
112                    }
113                    #[cfg(feature = "options")]
114                    InterpolationMethod::Pyramid => {
115                        self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
116                    }
117                    #[cfg(feature = "options")]
118                    InterpolationMethod::Prism => {
119                        self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
120                    }
121                    InterpolationMethod::Linear => {
122                        self.transform_impl(src, dst, |x, y, z, w| {
123                            l_tbl.quadlinear_vec3(x, y, z, w)
124                        })?
125                    }
126                }
127                Ok(())
128            }
129        }
130    };
131}
132
133impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLut4x3<T> {
134    fn to_pcs_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
135        &self,
136        input: &[T],
137        fetch: Fetch,
138    ) -> Result<Vec<f32>, CmsError> {
139        if input.len() % 4 != 0 {
140            return Err(CmsError::LaneMultipleOfChannels);
141        }
142        let norm_value = if T::FINITE {
143            1.0 / ((1u32 << self.bit_depth) - 1) as f32
144        } else {
145            1.0
146        };
147        let mut dst = vec![0.; (input.len() / 4) * 3];
148        let linearization_0 = &self.linearization[0];
149        let linearization_1 = &self.linearization[1];
150        let linearization_2 = &self.linearization[2];
151        let linearization_3 = &self.linearization[3];
152        for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(4)) {
153            let linear_x = lut_interp_linear_float(src[0].as_() * norm_value, linearization_0);
154            let linear_y = lut_interp_linear_float(src[1].as_() * norm_value, linearization_1);
155            let linear_z = lut_interp_linear_float(src[2].as_() * norm_value, linearization_2);
156            let linear_w = lut_interp_linear_float(src[3].as_() * norm_value, linearization_3);
157
158            let clut = fetch(linear_x, linear_y, linear_z, linear_w);
159
160            let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
161            let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
162            let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
163            dest[0] = pcs_x;
164            dest[1] = pcs_y;
165            dest[2] = pcs_z;
166        }
167        Ok(dst)
168    }
169}
170
171impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
172    for KatanaLut4x3<T>
173{
174    fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
175        if input.len() % 4 != 0 {
176            return Err(CmsError::LaneMultipleOfChannels);
177        }
178        let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
179
180        // If Source PCS is LAB trilinear should be used
181        if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
182            return self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
183        }
184
185        match self.interpolation_method {
186            #[cfg(feature = "options")]
187            InterpolationMethod::Tetrahedral => {
188                self.to_pcs_impl(input, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))
189            }
190            #[cfg(feature = "options")]
191            InterpolationMethod::Pyramid => {
192                self.to_pcs_impl(input, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))
193            }
194            #[cfg(feature = "options")]
195            InterpolationMethod::Prism => {
196                self.to_pcs_impl(input, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))
197            }
198            InterpolationMethod::Linear => {
199                self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w))
200            }
201        }
202    }
203}
204
205define_lut4_dispatch!(Lut4x3);
206
207fn make_lut_4x3(
208    lut: &LutDataType,
209    options: TransformOptions,
210    pcs: DataColorSpace,
211) -> Result<Lut4x3, CmsError> {
212    // There is 4 possible cases:
213    // - All curves are non-linear
214    // - Linearization curves are non-linear, but gamma is linear
215    // - Gamma curves are non-linear, but linearization is linear
216    // - All curves linear
217    let clut_length: usize = (lut.num_clut_grid_points as usize)
218        .safe_powi(lut.num_input_channels as u32)?
219        .safe_mul(lut.num_output_channels as usize)?;
220
221    let clut_table = lut.clut_table.to_clut_f32();
222    if clut_table.len() != clut_length {
223        return Err(CmsError::MalformedClut(MalformedSize {
224            size: clut_table.len(),
225            expected: clut_length,
226        }));
227    }
228
229    let linearization_table = lut.input_table.to_clut_f32();
230
231    if linearization_table.len() < lut.num_input_table_entries as usize * 4 {
232        return Err(CmsError::MalformedCurveLutTable(MalformedSize {
233            size: linearization_table.len(),
234            expected: lut.num_input_table_entries as usize * 4,
235        }));
236    }
237
238    let lin_curve0 = linearization_table[0..lut.num_input_table_entries as usize].to_vec();
239    let lin_curve1 = linearization_table
240        [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
241        .to_vec();
242    let lin_curve2 = linearization_table
243        [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
244        .to_vec();
245    let lin_curve3 = linearization_table
246        [lut.num_input_table_entries as usize * 3..lut.num_input_table_entries as usize * 4]
247        .to_vec();
248
249    let gamma_table = lut.output_table.to_clut_f32();
250
251    if gamma_table.len() < lut.num_output_table_entries as usize * 3 {
252        return Err(CmsError::MalformedCurveLutTable(MalformedSize {
253            size: gamma_table.len(),
254            expected: lut.num_output_table_entries as usize * 3,
255        }));
256    }
257
258    let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
259    let gamma_curve1 = gamma_table
260        [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
261        .to_vec();
262    let gamma_curve2 = gamma_table
263        [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
264        .to_vec();
265
266    let transform = Lut4x3 {
267        linearization: [lin_curve0, lin_curve1, lin_curve2, lin_curve3],
268        interpolation_method: options.interpolation_method,
269        pcs,
270        clut: clut_table,
271        grid_size: lut.num_clut_grid_points,
272        output: [gamma_curve0, gamma_curve1, gamma_curve2],
273    };
274    Ok(transform)
275}
276
277fn stage_lut_4x3(
278    lut: &LutDataType,
279    options: TransformOptions,
280    pcs: DataColorSpace,
281) -> Result<Box<dyn Stage>, CmsError> {
282    let lut = make_lut_4x3(lut, options, pcs)?;
283    #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
284    {
285        use crate::conversions::neon::Lut4x3Neon;
286        let transform = Lut4x3Neon {
287            linearization: lut.linearization,
288            interpolation_method: lut.interpolation_method,
289            pcs: lut.pcs,
290            clut: lut.clut,
291            grid_size: lut.grid_size,
292            output: lut.output,
293        };
294        Ok(Box::new(transform))
295    }
296    #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
297    {
298        #[cfg(all(target_arch = "x86_64", feature = "avx"))]
299        {
300            use crate::conversions::avx::Lut4x3AvxFma;
301            if std::arch::is_x86_feature_detected!("avx2")
302                && std::arch::is_x86_feature_detected!("fma")
303            {
304                let transform = Lut4x3AvxFma {
305                    linearization: lut.linearization,
306                    interpolation_method: lut.interpolation_method,
307                    pcs: lut.pcs,
308                    clut: lut.clut,
309                    grid_size: lut.grid_size,
310                    output: lut.output,
311                };
312                return Ok(Box::new(transform));
313            }
314        }
315        let transform = Lut4x3 {
316            linearization: lut.linearization,
317            interpolation_method: lut.interpolation_method,
318            pcs: lut.pcs,
319            clut: lut.clut,
320            grid_size: lut.grid_size,
321            output: lut.output,
322        };
323        Ok(Box::new(transform))
324    }
325}
326
327pub(crate) fn katana_input_stage_lut_4x3<
328    T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
329>(
330    lut: &LutDataType,
331    options: TransformOptions,
332    pcs: DataColorSpace,
333    bit_depth: usize,
334) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
335    // There is 4 possible cases:
336    // - All curves are non-linear
337    // - Linearization curves are non-linear, but gamma is linear
338    // - Gamma curves are non-linear, but linearization is linear
339    // - All curves linear
340    let lut = make_lut_4x3(lut, options, pcs)?;
341
342    let transform = KatanaLut4x3::<T> {
343        linearization: lut.linearization,
344        interpolation_method: lut.interpolation_method,
345        pcs: lut.pcs,
346        clut: lut.clut,
347        grid_size: lut.grid_size,
348        output: lut.output,
349        _phantom: PhantomData,
350        bit_depth,
351    };
352    Ok(Box::new(transform))
353}
354
355pub(crate) fn create_lut4_norm_samples<const SAMPLES: usize>() -> Vec<f32> {
356    let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
357
358    let mut src = Vec::with_capacity(lut_size as usize);
359
360    let recpeq = 1f32 / (SAMPLES - 1) as f32;
361    for k in 0..SAMPLES {
362        for c in 0..SAMPLES {
363            for m in 0..SAMPLES {
364                for y in 0..SAMPLES {
365                    src.push(c as f32 * recpeq);
366                    src.push(m as f32 * recpeq);
367                    src.push(y as f32 * recpeq);
368                    src.push(k as f32 * recpeq);
369                }
370            }
371        }
372    }
373    src
374}
375
376pub(crate) fn create_lut4<const SAMPLES: usize>(
377    lut: &LutDataType,
378    options: TransformOptions,
379    pcs: DataColorSpace,
380) -> Result<Vec<f32>, CmsError> {
381    if lut.num_input_channels != 4 {
382        return Err(CmsError::UnsupportedProfileConnection);
383    }
384    let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
385
386    let src = create_lut4_norm_samples::<SAMPLES>();
387    let mut dest = vec![0.; (lut_size as usize) / 4 * 3];
388
389    let lut_stage = stage_lut_4x3(lut, options, pcs)?;
390    lut_stage.transform(&src, &mut dest)?;
391    Ok(dest)
392}