moxcms/conversions/avx/
a_curves3.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::conversions::avx::cube::CubeAvxFma;
30use crate::conversions::avx::interpolator::AvxVectorSse;
31use crate::{CmsError, DataColorSpace, InPlaceStage, InterpolationMethod};
32use std::arch::x86_64::*;
33
34pub(crate) struct ACurves3AvxFma<'a, const DEPTH: usize> {
35    pub(crate) curve0: Box<[f32; 65536]>,
36    pub(crate) curve1: Box<[f32; 65536]>,
37    pub(crate) curve2: Box<[f32; 65536]>,
38    pub(crate) clut: &'a [f32],
39    pub(crate) grid_size: [u8; 3],
40    pub(crate) interpolation_method: InterpolationMethod,
41    pub(crate) pcs: DataColorSpace,
42}
43
44pub(crate) struct ACurves3OptimizedAvxFma<'a> {
45    pub(crate) clut: &'a [f32],
46    pub(crate) grid_size: [u8; 3],
47    pub(crate) interpolation_method: InterpolationMethod,
48    pub(crate) pcs: DataColorSpace,
49}
50
51pub(crate) struct ACurves3InverseAvxFma<'a, const DEPTH: usize> {
52    pub(crate) curve0: Box<[f32; 65536]>,
53    pub(crate) curve1: Box<[f32; 65536]>,
54    pub(crate) curve2: Box<[f32; 65536]>,
55    pub(crate) clut: &'a [f32],
56    pub(crate) grid_size: [u8; 3],
57    pub(crate) interpolation_method: InterpolationMethod,
58    pub(crate) pcs: DataColorSpace,
59}
60
61impl<const DEPTH: usize> ACurves3AvxFma<'_, DEPTH> {
62    #[allow(unused_unsafe)]
63    #[target_feature(enable = "avx2", enable = "fma")]
64    unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
65        &self,
66        dst: &mut [f32],
67        fetch: Fetch,
68    ) -> Result<(), CmsError> {
69        unsafe {
70            let scale_value = (DEPTH - 1) as f32;
71
72            for dst in dst.chunks_exact_mut(3) {
73                let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
74                let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
75                let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
76                let b0 = self.curve0[a0 as usize];
77                let b1 = self.curve1[a1 as usize];
78                let b2 = self.curve2[a2 as usize];
79                let v = fetch(b0, b1, b2).v;
80                dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
81                dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
82                dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
83            }
84        }
85        Ok(())
86    }
87}
88
89impl ACurves3OptimizedAvxFma<'_> {
90    #[allow(unused_unsafe)]
91    #[target_feature(enable = "avx2", enable = "fma")]
92    unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
93        &self,
94        dst: &mut [f32],
95        fetch: Fetch,
96    ) -> Result<(), CmsError> {
97        unsafe {
98            for dst in dst.chunks_exact_mut(3) {
99                let a0 = dst[0];
100                let a1 = dst[1];
101                let a2 = dst[2];
102                let v = fetch(a0, a1, a2).v;
103                dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
104                dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
105                dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
106            }
107        }
108        Ok(())
109    }
110}
111
112impl<const DEPTH: usize> InPlaceStage for ACurves3AvxFma<'_, DEPTH> {
113    fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
114        let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
115
116        unsafe {
117            // If PCS is LAB then linear interpolation should be used
118            if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
119                return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
120            }
121
122            match self.interpolation_method {
123                #[cfg(feature = "options")]
124                InterpolationMethod::Tetrahedral => {
125                    self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
126                }
127                #[cfg(feature = "options")]
128                InterpolationMethod::Pyramid => {
129                    self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
130                }
131                #[cfg(feature = "options")]
132                InterpolationMethod::Prism => {
133                    self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
134                }
135                InterpolationMethod::Linear => {
136                    self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
137                }
138            }
139        }
140        Ok(())
141    }
142}
143
144impl InPlaceStage for ACurves3OptimizedAvxFma<'_> {
145    fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
146        let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
147
148        unsafe {
149            // If PCS is LAB then linear interpolation should be used
150            if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
151                return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
152            }
153
154            match self.interpolation_method {
155                #[cfg(feature = "options")]
156                InterpolationMethod::Tetrahedral => {
157                    self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
158                }
159                #[cfg(feature = "options")]
160                InterpolationMethod::Pyramid => {
161                    self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
162                }
163                #[cfg(feature = "options")]
164                InterpolationMethod::Prism => {
165                    self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
166                }
167                InterpolationMethod::Linear => {
168                    self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
169                }
170            }
171        }
172        Ok(())
173    }
174}
175
176impl<const DEPTH: usize> ACurves3InverseAvxFma<'_, DEPTH> {
177    #[allow(unused_unsafe)]
178    #[target_feature(enable = "avx2", enable = "fma")]
179    unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
180        &self,
181        dst: &mut [f32],
182        fetch: Fetch,
183    ) -> Result<(), CmsError> {
184        unsafe {
185            let v_scale_value = _mm_set1_ps((DEPTH as u32 - 1u32) as f32);
186            for dst in dst.chunks_exact_mut(3) {
187                let mut v = fetch(dst[0], dst[1], dst[2]).v;
188                v = _mm_mul_ps(v, v_scale_value);
189                v = _mm_min_ps(v, v_scale_value);
190                v = _mm_max_ps(v, _mm_setzero_ps());
191                let c = _mm_cvtps_epi32(v);
192                let a0 = _mm_extract_epi32::<0>(c) as u16;
193                let a1 = _mm_extract_epi32::<1>(c) as u16;
194                let a2 = _mm_extract_epi32::<2>(c) as u16;
195                let b0 = self.curve0[a0 as usize];
196                let b1 = self.curve1[a1 as usize];
197                let b2 = self.curve2[a2 as usize];
198                dst[0] = b0;
199                dst[1] = b1;
200                dst[2] = b2;
201            }
202        }
203        Ok(())
204    }
205}
206
207impl<const DEPTH: usize> InPlaceStage for ACurves3InverseAvxFma<'_, DEPTH> {
208    fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
209        let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
210
211        unsafe {
212            // If PCS is LAB then linear interpolation should be used
213            if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
214                return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
215            }
216
217            match self.interpolation_method {
218                #[cfg(feature = "options")]
219                InterpolationMethod::Tetrahedral => {
220                    self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
221                }
222                #[cfg(feature = "options")]
223                InterpolationMethod::Pyramid => {
224                    self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
225                }
226                #[cfg(feature = "options")]
227                InterpolationMethod::Prism => {
228                    self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
229                }
230                InterpolationMethod::Linear => {
231                    self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
232                }
233            }
234        }
235        Ok(())
236    }
237}