1use crate::conversions::avx::cube::CubeAvxFma;
30use crate::conversions::avx::interpolator::AvxVectorSse;
31use crate::{CmsError, DataColorSpace, InPlaceStage, InterpolationMethod};
32use std::arch::x86_64::*;
33
34pub(crate) struct ACurves3AvxFma<'a, const DEPTH: usize> {
35 pub(crate) curve0: Box<[f32; 65536]>,
36 pub(crate) curve1: Box<[f32; 65536]>,
37 pub(crate) curve2: Box<[f32; 65536]>,
38 pub(crate) clut: &'a [f32],
39 pub(crate) grid_size: [u8; 3],
40 pub(crate) interpolation_method: InterpolationMethod,
41 pub(crate) pcs: DataColorSpace,
42}
43
44pub(crate) struct ACurves3OptimizedAvxFma<'a> {
45 pub(crate) clut: &'a [f32],
46 pub(crate) grid_size: [u8; 3],
47 pub(crate) interpolation_method: InterpolationMethod,
48 pub(crate) pcs: DataColorSpace,
49}
50
51pub(crate) struct ACurves3InverseAvxFma<'a, const DEPTH: usize> {
52 pub(crate) curve0: Box<[f32; 65536]>,
53 pub(crate) curve1: Box<[f32; 65536]>,
54 pub(crate) curve2: Box<[f32; 65536]>,
55 pub(crate) clut: &'a [f32],
56 pub(crate) grid_size: [u8; 3],
57 pub(crate) interpolation_method: InterpolationMethod,
58 pub(crate) pcs: DataColorSpace,
59}
60
61impl<const DEPTH: usize> ACurves3AvxFma<'_, DEPTH> {
62 #[allow(unused_unsafe)]
63 #[target_feature(enable = "avx2", enable = "fma")]
64 unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
65 &self,
66 dst: &mut [f32],
67 fetch: Fetch,
68 ) -> Result<(), CmsError> {
69 unsafe {
70 let scale_value = (DEPTH - 1) as f32;
71
72 for dst in dst.chunks_exact_mut(3) {
73 let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
74 let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
75 let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
76 let b0 = self.curve0[a0 as usize];
77 let b1 = self.curve1[a1 as usize];
78 let b2 = self.curve2[a2 as usize];
79 let v = fetch(b0, b1, b2).v;
80 dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
81 dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
82 dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
83 }
84 }
85 Ok(())
86 }
87}
88
89impl ACurves3OptimizedAvxFma<'_> {
90 #[allow(unused_unsafe)]
91 #[target_feature(enable = "avx2", enable = "fma")]
92 unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
93 &self,
94 dst: &mut [f32],
95 fetch: Fetch,
96 ) -> Result<(), CmsError> {
97 unsafe {
98 for dst in dst.chunks_exact_mut(3) {
99 let a0 = dst[0];
100 let a1 = dst[1];
101 let a2 = dst[2];
102 let v = fetch(a0, a1, a2).v;
103 dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
104 dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
105 dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
106 }
107 }
108 Ok(())
109 }
110}
111
112impl<const DEPTH: usize> InPlaceStage for ACurves3AvxFma<'_, DEPTH> {
113 fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
114 let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
115
116 unsafe {
117 if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
119 return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
120 }
121
122 match self.interpolation_method {
123 #[cfg(feature = "options")]
124 InterpolationMethod::Tetrahedral => {
125 self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
126 }
127 #[cfg(feature = "options")]
128 InterpolationMethod::Pyramid => {
129 self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
130 }
131 #[cfg(feature = "options")]
132 InterpolationMethod::Prism => {
133 self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
134 }
135 InterpolationMethod::Linear => {
136 self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
137 }
138 }
139 }
140 Ok(())
141 }
142}
143
144impl InPlaceStage for ACurves3OptimizedAvxFma<'_> {
145 fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
146 let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
147
148 unsafe {
149 if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
151 return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
152 }
153
154 match self.interpolation_method {
155 #[cfg(feature = "options")]
156 InterpolationMethod::Tetrahedral => {
157 self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
158 }
159 #[cfg(feature = "options")]
160 InterpolationMethod::Pyramid => {
161 self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
162 }
163 #[cfg(feature = "options")]
164 InterpolationMethod::Prism => {
165 self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
166 }
167 InterpolationMethod::Linear => {
168 self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
169 }
170 }
171 }
172 Ok(())
173 }
174}
175
176impl<const DEPTH: usize> ACurves3InverseAvxFma<'_, DEPTH> {
177 #[allow(unused_unsafe)]
178 #[target_feature(enable = "avx2", enable = "fma")]
179 unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
180 &self,
181 dst: &mut [f32],
182 fetch: Fetch,
183 ) -> Result<(), CmsError> {
184 unsafe {
185 let v_scale_value = _mm_set1_ps((DEPTH as u32 - 1u32) as f32);
186 for dst in dst.chunks_exact_mut(3) {
187 let mut v = fetch(dst[0], dst[1], dst[2]).v;
188 v = _mm_mul_ps(v, v_scale_value);
189 v = _mm_min_ps(v, v_scale_value);
190 v = _mm_max_ps(v, _mm_setzero_ps());
191 let c = _mm_cvtps_epi32(v);
192 let a0 = _mm_extract_epi32::<0>(c) as u16;
193 let a1 = _mm_extract_epi32::<1>(c) as u16;
194 let a2 = _mm_extract_epi32::<2>(c) as u16;
195 let b0 = self.curve0[a0 as usize];
196 let b1 = self.curve1[a1 as usize];
197 let b2 = self.curve2[a2 as usize];
198 dst[0] = b0;
199 dst[1] = b1;
200 dst[2] = b2;
201 }
202 }
203 Ok(())
204 }
205}
206
207impl<const DEPTH: usize> InPlaceStage for ACurves3InverseAvxFma<'_, DEPTH> {
208 fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
209 let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
210
211 unsafe {
212 if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
214 return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
215 }
216
217 match self.interpolation_method {
218 #[cfg(feature = "options")]
219 InterpolationMethod::Tetrahedral => {
220 self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
221 }
222 #[cfg(feature = "options")]
223 InterpolationMethod::Pyramid => {
224 self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
225 }
226 #[cfg(feature = "options")]
227 InterpolationMethod::Prism => {
228 self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
229 }
230 InterpolationMethod::Linear => {
231 self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
232 }
233 }
234 }
235 Ok(())
236 }
237}