moxcms/conversions/
rgbxyz.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor};
30use num_traits::AsPrimitive;
31
32pub(crate) struct TransformMatrixShaper<T: Clone, const BUCKET: usize> {
33    pub(crate) r_linear: Box<[f32; BUCKET]>,
34    pub(crate) g_linear: Box<[f32; BUCKET]>,
35    pub(crate) b_linear: Box<[f32; BUCKET]>,
36    pub(crate) r_gamma: Box<[T; 65536]>,
37    pub(crate) g_gamma: Box<[T; 65536]>,
38    pub(crate) b_gamma: Box<[T; 65536]>,
39    pub(crate) adaptation_matrix: Matrix3f,
40}
41
42impl<T: Clone, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
43    #[inline(never)]
44    #[allow(dead_code)]
45    fn convert_to_v(self) -> TransformMatrixShaperV<T> {
46        TransformMatrixShaperV {
47            r_linear: self.r_linear.iter().copied().collect(),
48            g_linear: self.g_linear.iter().copied().collect(),
49            b_linear: self.b_linear.iter().copied().collect(),
50            r_gamma: self.r_gamma,
51            g_gamma: self.g_gamma,
52            b_gamma: self.b_gamma,
53            adaptation_matrix: self.adaptation_matrix,
54        }
55    }
56}
57
58#[allow(dead_code)]
59pub(crate) struct TransformMatrixShaperV<T: Clone> {
60    pub(crate) r_linear: Vec<f32>,
61    pub(crate) g_linear: Vec<f32>,
62    pub(crate) b_linear: Vec<f32>,
63    pub(crate) r_gamma: Box<[T; 65536]>,
64    pub(crate) g_gamma: Box<[T; 65536]>,
65    pub(crate) b_gamma: Box<[T; 65536]>,
66    pub(crate) adaptation_matrix: Matrix3f,
67}
68
69/// Low memory footprint optimized routine for matrix shaper profiles with the same
70/// Gamma and linear curves.
71pub(crate) struct TransformMatrixShaperOptimized<T: Clone, const BUCKET: usize> {
72    pub(crate) linear: Box<[f32; BUCKET]>,
73    pub(crate) gamma: Box<[T; 65536]>,
74    pub(crate) adaptation_matrix: Matrix3f,
75}
76
77#[allow(dead_code)]
78impl<T: Clone, const BUCKET: usize> TransformMatrixShaperOptimized<T, BUCKET> {
79    fn convert_to_v(self) -> TransformMatrixShaperOptimizedV<T> {
80        TransformMatrixShaperOptimizedV {
81            linear: self.linear.iter().copied().collect::<Vec<_>>(),
82            gamma: self.gamma,
83            adaptation_matrix: self.adaptation_matrix,
84        }
85    }
86}
87
88/// Low memory footprint optimized routine for matrix shaper profiles with the same
89/// Gamma and linear curves.
90#[allow(dead_code)]
91pub(crate) struct TransformMatrixShaperOptimizedV<T: Clone> {
92    pub(crate) linear: Vec<f32>,
93    pub(crate) gamma: Box<[T; 65536]>,
94    pub(crate) adaptation_matrix: Matrix3f,
95}
96
97impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
98    #[inline(never)]
99    #[allow(dead_code)]
100    pub(crate) fn to_q2_13_n<
101        R: Copy + 'static + Default,
102        const PRECISION: i32,
103        const LINEAR_CAP: usize,
104    >(
105        &self,
106        gamma_lut: usize,
107        bit_depth: usize,
108    ) -> TransformMatrixShaperFixedPoint<R, T, BUCKET>
109    where
110        f32: AsPrimitive<R>,
111    {
112        let linear_scale = if T::FINITE {
113            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
114            ((1 << bit_depth) - 1) as f32 * lut_scale
115        } else {
116            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
117            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
118        };
119        let mut new_box_r = Box::new([R::default(); BUCKET]);
120        let mut new_box_g = Box::new([R::default(); BUCKET]);
121        let mut new_box_b = Box::new([R::default(); BUCKET]);
122        for (dst, &src) in new_box_r.iter_mut().zip(self.r_linear.iter()) {
123            *dst = (src * linear_scale).round().as_();
124        }
125        for (dst, &src) in new_box_g.iter_mut().zip(self.g_linear.iter()) {
126            *dst = (src * linear_scale).round().as_();
127        }
128        for (dst, &src) in new_box_b.iter_mut().zip(self.b_linear.iter()) {
129            *dst = (src * linear_scale).round().as_();
130        }
131        let scale: f32 = (1i32 << PRECISION) as f32;
132        let source_matrix = self.adaptation_matrix;
133        let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
134        for i in 0..3 {
135            for j in 0..3 {
136                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
137            }
138        }
139        TransformMatrixShaperFixedPoint {
140            r_linear: new_box_r,
141            g_linear: new_box_g,
142            b_linear: new_box_b,
143            r_gamma: self.r_gamma.clone(),
144            g_gamma: self.g_gamma.clone(),
145            b_gamma: self.b_gamma.clone(),
146            adaptation_matrix: dst_matrix,
147        }
148    }
149
150    #[inline(never)]
151    #[allow(dead_code)]
152    pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
153        &self,
154        gamma_lut: usize,
155        bit_depth: usize,
156    ) -> TransformMatrixShaperFp<R, T>
157    where
158        f32: AsPrimitive<R>,
159    {
160        let linear_scale = if T::FINITE {
161            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
162            ((1 << bit_depth) - 1) as f32 * lut_scale
163        } else {
164            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
165            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
166        };
167        let new_box_r = self
168            .r_linear
169            .iter()
170            .map(|&x| (x * linear_scale).round().as_())
171            .collect::<Vec<R>>();
172        let new_box_g = self
173            .g_linear
174            .iter()
175            .map(|&x| (x * linear_scale).round().as_())
176            .collect::<Vec<R>>();
177        let new_box_b = self
178            .b_linear
179            .iter()
180            .map(|&x| (x * linear_scale).round().as_())
181            .collect::<Vec<_>>();
182        let scale: f32 = (1i32 << PRECISION) as f32;
183        let source_matrix = self.adaptation_matrix;
184        let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
185        for i in 0..3 {
186            for j in 0..3 {
187                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
188            }
189        }
190        TransformMatrixShaperFp {
191            r_linear: new_box_r,
192            g_linear: new_box_g,
193            b_linear: new_box_b,
194            r_gamma: self.r_gamma.clone(),
195            g_gamma: self.g_gamma.clone(),
196            b_gamma: self.b_gamma.clone(),
197            adaptation_matrix: dst_matrix,
198        }
199    }
200}
201
202impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize>
203    TransformMatrixShaperOptimized<T, BUCKET>
204{
205    #[allow(dead_code)]
206    pub(crate) fn to_q2_13_n<
207        R: Copy + 'static + Default,
208        const PRECISION: i32,
209        const LINEAR_CAP: usize,
210    >(
211        &self,
212        gamma_lut: usize,
213        bit_depth: usize,
214    ) -> TransformMatrixShaperFixedPointOpt<R, i16, T, BUCKET>
215    where
216        f32: AsPrimitive<R>,
217    {
218        let linear_scale = if T::FINITE {
219            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
220            ((1 << bit_depth) - 1) as f32 * lut_scale
221        } else {
222            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
223            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
224        };
225        let mut new_box_linear = Box::new([R::default(); BUCKET]);
226        for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
227            *dst = (*src * linear_scale).round().as_();
228        }
229        let scale: f32 = (1i32 << PRECISION) as f32;
230        let source_matrix = self.adaptation_matrix;
231        let mut dst_matrix = Matrix3::<i16> {
232            v: [[i16::default(); 3]; 3],
233        };
234        for i in 0..3 {
235            for j in 0..3 {
236                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
237            }
238        }
239        TransformMatrixShaperFixedPointOpt {
240            linear: new_box_linear,
241            gamma: self.gamma.clone(),
242            adaptation_matrix: dst_matrix,
243        }
244    }
245
246    #[allow(dead_code)]
247    pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
248        &self,
249        gamma_lut: usize,
250        bit_depth: usize,
251    ) -> TransformMatrixShaperFpOptVec<R, i16, T>
252    where
253        f32: AsPrimitive<R>,
254    {
255        let linear_scale = if T::FINITE {
256            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
257            ((1 << bit_depth) - 1) as f32 * lut_scale
258        } else {
259            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
260            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
261        };
262        let new_box_linear = self
263            .linear
264            .iter()
265            .map(|&x| (x * linear_scale).round().as_())
266            .collect::<Vec<R>>();
267        let scale: f32 = (1i32 << PRECISION) as f32;
268        let source_matrix = self.adaptation_matrix;
269        let mut dst_matrix = Matrix3::<i16> {
270            v: [[i16::default(); 3]; 3],
271        };
272        for i in 0..3 {
273            for j in 0..3 {
274                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
275            }
276        }
277        TransformMatrixShaperFpOptVec {
278            linear: new_box_linear,
279            gamma: self.gamma.clone(),
280            adaptation_matrix: dst_matrix,
281        }
282    }
283
284    #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
285    pub(crate) fn to_q1_30_n<R: Copy + 'static + Default, const PRECISION: i32>(
286        &self,
287        gamma_lut: usize,
288        bit_depth: usize,
289    ) -> TransformMatrixShaperFpOptVec<R, i32, T>
290    where
291        f32: AsPrimitive<R>,
292        f64: AsPrimitive<R>,
293    {
294        // It is important to scale 1 bit more to compensate vqrdmlah Q0.31, because we're going to use Q1.30
295        let table_size = if T::FINITE {
296            (1 << bit_depth) - 1
297        } else {
298            T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
299        };
300        let ext_bp = if T::FINITE {
301            bit_depth as u32 + 1
302        } else {
303            let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones();
304            bp + 1
305        };
306        let linear_scale = {
307            let lut_scale = (gamma_lut - 1) as f64 / table_size as f64;
308            ((1u32 << ext_bp) - 1) as f64 * lut_scale
309        };
310        let new_box_linear = self
311            .linear
312            .iter()
313            .map(|&v| (v as f64 * linear_scale).round().as_())
314            .collect::<Vec<R>>();
315        let scale: f64 = (1i64 << PRECISION) as f64;
316        let source_matrix = self.adaptation_matrix;
317        let mut dst_matrix = Matrix3::<i32> {
318            v: [[i32::default(); 3]; 3],
319        };
320        for i in 0..3 {
321            for j in 0..3 {
322                dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32;
323            }
324        }
325        TransformMatrixShaperFpOptVec {
326            linear: new_box_linear,
327            gamma: self.gamma.clone(),
328            adaptation_matrix: dst_matrix,
329        }
330    }
331}
332
333#[allow(unused)]
334struct TransformMatrixShaperScalar<
335    T: Clone,
336    const SRC_LAYOUT: u8,
337    const DST_LAYOUT: u8,
338    const LINEAR_CAP: usize,
339> {
340    pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
341    pub(crate) gamma_lut: usize,
342    pub(crate) bit_depth: usize,
343}
344
345#[allow(unused)]
346struct TransformMatrixShaperOptScalar<
347    T: Clone,
348    const SRC_LAYOUT: u8,
349    const DST_LAYOUT: u8,
350    const LINEAR_CAP: usize,
351> {
352    pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
353    pub(crate) gamma_lut: usize,
354    pub(crate) bit_depth: usize,
355}
356
357#[cfg(any(
358    any(target_arch = "x86", target_arch = "x86_64"),
359    all(target_arch = "aarch64", target_feature = "neon")
360))]
361#[allow(unused)]
362macro_rules! create_rgb_xyz_dependant_executor {
363    ($dep_name: ident, $dependant: ident, $shaper: ident) => {
364        pub(crate) fn $dep_name<
365            T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
366            const LINEAR_CAP: usize,
367        >(
368            src_layout: Layout,
369            dst_layout: Layout,
370            profile: $shaper<T, LINEAR_CAP>,
371            gamma_lut: usize,
372            bit_depth: usize,
373        ) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
374        where
375            u32: AsPrimitive<T>,
376        {
377            if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
378                return Ok(Box::new($dependant::<
379                    T,
380                    { Layout::Rgba as u8 },
381                    { Layout::Rgba as u8 },
382                    LINEAR_CAP,
383                > {
384                    profile,
385                    bit_depth,
386                    gamma_lut,
387                }));
388            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
389                return Ok(Box::new($dependant::<
390                    T,
391                    { Layout::Rgb as u8 },
392                    { Layout::Rgba as u8 },
393                    LINEAR_CAP,
394                > {
395                    profile,
396                    bit_depth,
397                    gamma_lut,
398                }));
399            } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
400                return Ok(Box::new($dependant::<
401                    T,
402                    { Layout::Rgba as u8 },
403                    { Layout::Rgb as u8 },
404                    LINEAR_CAP,
405                > {
406                    profile,
407                    bit_depth,
408                    gamma_lut,
409                }));
410            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
411                return Ok(Box::new($dependant::<
412                    T,
413                    { Layout::Rgb as u8 },
414                    { Layout::Rgb as u8 },
415                    LINEAR_CAP,
416                > {
417                    profile,
418                    bit_depth,
419                    gamma_lut,
420                }));
421            }
422            Err(CmsError::UnsupportedProfileConnection)
423        }
424    };
425}
426
427#[cfg(any(
428    any(target_arch = "x86", target_arch = "x86_64"),
429    all(target_arch = "aarch64", target_feature = "neon")
430))]
431#[allow(unused)]
432macro_rules! create_rgb_xyz_dependant_executor_to_v {
433    ($dep_name: ident, $dependant: ident, $shaper: ident) => {
434        pub(crate) fn $dep_name<
435            T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
436            const LINEAR_CAP: usize,
437        >(
438            src_layout: Layout,
439            dst_layout: Layout,
440            profile: $shaper<T, LINEAR_CAP>,
441            gamma_lut: usize,
442            bit_depth: usize,
443        ) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
444        where
445            u32: AsPrimitive<T>,
446        {
447            let profile = profile.convert_to_v();
448            if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
449                return Ok(Box::new($dependant::<
450                    T,
451                    { Layout::Rgba as u8 },
452                    { Layout::Rgba as u8 },
453                > {
454                    profile,
455                    bit_depth,
456                    gamma_lut,
457                }));
458            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
459                return Ok(Box::new($dependant::<
460                    T,
461                    { Layout::Rgb as u8 },
462                    { Layout::Rgba as u8 },
463                > {
464                    profile,
465                    bit_depth,
466                    gamma_lut,
467                }));
468            } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
469                return Ok(Box::new($dependant::<
470                    T,
471                    { Layout::Rgba as u8 },
472                    { Layout::Rgb as u8 },
473                > {
474                    profile,
475                    bit_depth,
476                    gamma_lut,
477                }));
478            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
479                return Ok(Box::new($dependant::<
480                    T,
481                    { Layout::Rgb as u8 },
482                    { Layout::Rgb as u8 },
483                > {
484                    profile,
485                    bit_depth,
486                    gamma_lut,
487                }));
488            }
489            Err(CmsError::UnsupportedProfileConnection)
490        }
491    };
492}
493
494#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
495use crate::conversions::sse::{TransformShaperRgbOptSse, TransformShaperRgbSse};
496
497#[cfg(all(target_arch = "x86_64", feature = "avx"))]
498use crate::conversions::avx::{TransformShaperRgbAvx, TransformShaperRgbOptAvx};
499
500#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
501create_rgb_xyz_dependant_executor!(
502    make_rgb_xyz_rgb_transform_sse_41,
503    TransformShaperRgbSse,
504    TransformMatrixShaper
505);
506
507#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
508create_rgb_xyz_dependant_executor_to_v!(
509    make_rgb_xyz_rgb_transform_sse_41_opt,
510    TransformShaperRgbOptSse,
511    TransformMatrixShaperOptimized
512);
513
514#[cfg(all(target_arch = "x86_64", feature = "avx"))]
515create_rgb_xyz_dependant_executor!(
516    make_rgb_xyz_rgb_transform_avx2,
517    TransformShaperRgbAvx,
518    TransformMatrixShaper
519);
520
521#[cfg(all(target_arch = "x86_64", feature = "avx"))]
522create_rgb_xyz_dependant_executor_to_v!(
523    make_rgb_xyz_rgb_transform_avx2_opt,
524    TransformShaperRgbOptAvx,
525    TransformMatrixShaperOptimized
526);
527
528#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
529use crate::conversions::avx512::TransformShaperRgbOptAvx512;
530
531#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
532create_rgb_xyz_dependant_executor!(
533    make_rgb_xyz_rgb_transform_avx512_opt,
534    TransformShaperRgbOptAvx512,
535    TransformMatrixShaperOptimized
536);
537
538#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
539pub(crate) fn make_rgb_xyz_rgb_transform<
540    T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
541    const LINEAR_CAP: usize,
542>(
543    src_layout: Layout,
544    dst_layout: Layout,
545    profile: TransformMatrixShaper<T, LINEAR_CAP>,
546    gamma_lut: usize,
547    bit_depth: usize,
548) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
549where
550    u32: AsPrimitive<T>,
551{
552    #[cfg(all(feature = "avx", target_arch = "x86_64"))]
553    if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
554        return make_rgb_xyz_rgb_transform_avx2::<T, LINEAR_CAP>(
555            src_layout, dst_layout, profile, gamma_lut, bit_depth,
556        );
557    }
558    #[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
559    if std::arch::is_x86_feature_detected!("sse4.1") {
560        return make_rgb_xyz_rgb_transform_sse_41::<T, LINEAR_CAP>(
561            src_layout, dst_layout, profile, gamma_lut, bit_depth,
562        );
563    }
564    if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
565        return Ok(Box::new(TransformMatrixShaperScalar::<
566            T,
567            { Layout::Rgba as u8 },
568            { Layout::Rgba as u8 },
569            LINEAR_CAP,
570        > {
571            profile,
572            gamma_lut,
573            bit_depth,
574        }));
575    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
576        return Ok(Box::new(TransformMatrixShaperScalar::<
577            T,
578            { Layout::Rgb as u8 },
579            { Layout::Rgba as u8 },
580            LINEAR_CAP,
581        > {
582            profile,
583            gamma_lut,
584            bit_depth,
585        }));
586    } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
587        return Ok(Box::new(TransformMatrixShaperScalar::<
588            T,
589            { Layout::Rgba as u8 },
590            { Layout::Rgb as u8 },
591            LINEAR_CAP,
592        > {
593            profile,
594            gamma_lut,
595            bit_depth,
596        }));
597    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
598        return Ok(Box::new(TransformMatrixShaperScalar::<
599            T,
600            { Layout::Rgb as u8 },
601            { Layout::Rgb as u8 },
602            LINEAR_CAP,
603        > {
604            profile,
605            gamma_lut,
606            bit_depth,
607        }));
608    }
609    Err(CmsError::UnsupportedProfileConnection)
610}
611
612#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
613pub(crate) fn make_rgb_xyz_rgb_transform_opt<
614    T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
615    const LINEAR_CAP: usize,
616>(
617    src_layout: Layout,
618    dst_layout: Layout,
619    profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
620    gamma_lut: usize,
621    bit_depth: usize,
622) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
623where
624    u32: AsPrimitive<T>,
625{
626    #[cfg(all(feature = "avx512", target_arch = "x86_64"))]
627    if std::arch::is_x86_feature_detected!("avx512bw")
628        && std::arch::is_x86_feature_detected!("avx512vl")
629        && std::arch::is_x86_feature_detected!("fma")
630    {
631        return make_rgb_xyz_rgb_transform_avx512_opt::<T, LINEAR_CAP>(
632            src_layout, dst_layout, profile, gamma_lut, bit_depth,
633        );
634    }
635    #[cfg(all(feature = "avx", target_arch = "x86_64"))]
636    if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
637        return make_rgb_xyz_rgb_transform_avx2_opt::<T, LINEAR_CAP>(
638            src_layout, dst_layout, profile, gamma_lut, bit_depth,
639        );
640    }
641    #[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
642    if std::arch::is_x86_feature_detected!("sse4.1") {
643        return make_rgb_xyz_rgb_transform_sse_41_opt::<T, LINEAR_CAP>(
644            src_layout, dst_layout, profile, gamma_lut, bit_depth,
645        );
646    }
647    if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
648        return Ok(Box::new(TransformMatrixShaperOptScalar::<
649            T,
650            { Layout::Rgba as u8 },
651            { Layout::Rgba as u8 },
652            LINEAR_CAP,
653        > {
654            profile,
655            gamma_lut,
656            bit_depth,
657        }));
658    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
659        return Ok(Box::new(TransformMatrixShaperOptScalar::<
660            T,
661            { Layout::Rgb as u8 },
662            { Layout::Rgba as u8 },
663            LINEAR_CAP,
664        > {
665            profile,
666            gamma_lut,
667            bit_depth,
668        }));
669    } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
670        return Ok(Box::new(TransformMatrixShaperOptScalar::<
671            T,
672            { Layout::Rgba as u8 },
673            { Layout::Rgb as u8 },
674            LINEAR_CAP,
675        > {
676            profile,
677            gamma_lut,
678            bit_depth,
679        }));
680    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
681        return Ok(Box::new(TransformMatrixShaperOptScalar::<
682            T,
683            { Layout::Rgb as u8 },
684            { Layout::Rgb as u8 },
685            LINEAR_CAP,
686        > {
687            profile,
688            gamma_lut,
689            bit_depth,
690        }));
691    }
692    Err(CmsError::UnsupportedProfileConnection)
693}
694
695#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
696use crate::conversions::neon::{TransformShaperRgbNeon, TransformShaperRgbOptNeon};
697use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec;
698use crate::conversions::rgbxyz_fixed::{
699    TransformMatrixShaperFixedPoint, TransformMatrixShaperFixedPointOpt, TransformMatrixShaperFp,
700};
701use crate::transform::PointeeSizeExpressible;
702
703#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
704create_rgb_xyz_dependant_executor_to_v!(
705    make_rgb_xyz_rgb_transform,
706    TransformShaperRgbNeon,
707    TransformMatrixShaper
708);
709
710#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
711create_rgb_xyz_dependant_executor_to_v!(
712    make_rgb_xyz_rgb_transform_opt,
713    TransformShaperRgbOptNeon,
714    TransformMatrixShaperOptimized
715);
716
717#[allow(unused)]
718impl<
719    T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
720    const SRC_LAYOUT: u8,
721    const DST_LAYOUT: u8,
722    const LINEAR_CAP: usize,
723> TransformExecutor<T> for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
724where
725    u32: AsPrimitive<T>,
726{
727    fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
728        use crate::mlaf::mlaf;
729        let src_cn = Layout::from(SRC_LAYOUT);
730        let dst_cn = Layout::from(DST_LAYOUT);
731        let src_channels = src_cn.channels();
732        let dst_channels = dst_cn.channels();
733
734        if src.len() / src_channels != dst.len() / dst_channels {
735            return Err(CmsError::LaneSizeMismatch);
736        }
737        if src.len() % src_channels != 0 {
738            return Err(CmsError::LaneMultipleOfChannels);
739        }
740        if dst.len() % dst_channels != 0 {
741            return Err(CmsError::LaneMultipleOfChannels);
742        }
743
744        let transform = self.profile.adaptation_matrix;
745        let scale = (self.gamma_lut - 1) as f32;
746        let max_colors: T = ((1 << self.bit_depth) - 1).as_();
747
748        for (src, dst) in src
749            .chunks_exact(src_channels)
750            .zip(dst.chunks_exact_mut(dst_channels))
751        {
752            let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
753            let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
754            let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
755            let a = if src_channels == 4 {
756                src[src_cn.a_i()]
757            } else {
758                max_colors
759            };
760
761            let new_r = mlaf(
762                0.5f32,
763                mlaf(
764                    mlaf(r * transform.v[0][0], g, transform.v[0][1]),
765                    b,
766                    transform.v[0][2],
767                )
768                .max(0f32)
769                .min(1f32),
770                scale,
771            );
772
773            let new_g = mlaf(
774                0.5f32,
775                mlaf(
776                    mlaf(r * transform.v[1][0], g, transform.v[1][1]),
777                    b,
778                    transform.v[1][2],
779                )
780                .max(0f32)
781                .min(1f32),
782                scale,
783            );
784
785            let new_b = mlaf(
786                0.5f32,
787                mlaf(
788                    mlaf(r * transform.v[2][0], g, transform.v[2][1]),
789                    b,
790                    transform.v[2][2],
791                )
792                .max(0f32)
793                .min(1f32),
794                scale,
795            );
796
797            dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
798            dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
799            dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
800            if dst_channels == 4 {
801                dst[dst_cn.a_i()] = a;
802            }
803        }
804
805        Ok(())
806    }
807}
808
809#[allow(unused)]
810impl<
811    T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
812    const SRC_LAYOUT: u8,
813    const DST_LAYOUT: u8,
814    const LINEAR_CAP: usize,
815> TransformExecutor<T> for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
816where
817    u32: AsPrimitive<T>,
818{
819    fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
820        use crate::mlaf::mlaf;
821        let src_cn = Layout::from(SRC_LAYOUT);
822        let dst_cn = Layout::from(DST_LAYOUT);
823        let src_channels = src_cn.channels();
824        let dst_channels = dst_cn.channels();
825
826        if src.len() / src_channels != dst.len() / dst_channels {
827            return Err(CmsError::LaneSizeMismatch);
828        }
829        if src.len() % src_channels != 0 {
830            return Err(CmsError::LaneMultipleOfChannels);
831        }
832        if dst.len() % dst_channels != 0 {
833            return Err(CmsError::LaneMultipleOfChannels);
834        }
835
836        let transform = self.profile.adaptation_matrix;
837        let scale = (self.gamma_lut - 1) as f32;
838        let max_colors: T = ((1 << self.bit_depth) - 1).as_();
839
840        for (src, dst) in src
841            .chunks_exact(src_channels)
842            .zip(dst.chunks_exact_mut(dst_channels))
843        {
844            let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
845            let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
846            let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
847            let a = if src_channels == 4 {
848                src[src_cn.a_i()]
849            } else {
850                max_colors
851            };
852
853            let new_r = mlaf(
854                0.5f32,
855                mlaf(
856                    mlaf(r * transform.v[0][0], g, transform.v[0][1]),
857                    b,
858                    transform.v[0][2],
859                )
860                .max(0f32)
861                .min(1f32),
862                scale,
863            );
864
865            let new_g = mlaf(
866                0.5f32,
867                mlaf(
868                    mlaf(r * transform.v[1][0], g, transform.v[1][1]),
869                    b,
870                    transform.v[1][2],
871                )
872                .max(0f32)
873                .min(1f32),
874                scale,
875            );
876
877            let new_b = mlaf(
878                0.5f32,
879                mlaf(
880                    mlaf(r * transform.v[2][0], g, transform.v[2][1]),
881                    b,
882                    transform.v[2][2],
883                )
884                .max(0f32)
885                .min(1f32),
886                scale,
887            );
888
889            dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
890            dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
891            dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
892            if dst_channels == 4 {
893                dst[dst_cn.a_i()] = a;
894            }
895        }
896
897        Ok(())
898    }
899}