1use crate::conversions::LutBarycentricReduction;
30use crate::conversions::avx::interpolator_q0_15::*;
31use crate::conversions::interpolator::BarycentricWeight;
32use crate::transform::PointeeSizeExpressible;
33use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
34use num_traits::AsPrimitive;
35use std::arch::x86_64::*;
36use std::marker::PhantomData;
37
38pub(crate) struct TransformLut3x3AvxQ0_15<
39 T,
40 U,
41 const SRC_LAYOUT: u8,
42 const DST_LAYOUT: u8,
43 const GRID_SIZE: usize,
44 const BIT_DEPTH: usize,
45 const BINS: usize,
46 const BARYCENTRIC_BINS: usize,
47> {
48 pub(crate) lut: Vec<AvxAlignedI16>,
49 pub(crate) _phantom: PhantomData<T>,
50 pub(crate) _phantom2: PhantomData<U>,
51 pub(crate) interpolation_method: InterpolationMethod,
52 pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
53 pub(crate) color_space: DataColorSpace,
54 pub(crate) is_linear: bool,
55}
56
57impl<
58 T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
59 U: AsPrimitive<usize>,
60 const SRC_LAYOUT: u8,
61 const DST_LAYOUT: u8,
62 const GRID_SIZE: usize,
63 const BIT_DEPTH: usize,
64 const BINS: usize,
65 const BARYCENTRIC_BINS: usize,
66>
67 TransformLut3x3AvxQ0_15<
68 T,
69 U,
70 SRC_LAYOUT,
71 DST_LAYOUT,
72 GRID_SIZE,
73 BIT_DEPTH,
74 BINS,
75 BARYCENTRIC_BINS,
76 >
77where
78 f32: AsPrimitive<T>,
79 u32: AsPrimitive<T>,
80 (): LutBarycentricReduction<T, U>,
81{
82 #[allow(unused_unsafe)]
83 #[target_feature(enable = "avx2")]
84 unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationQ0_15<'b, GRID_SIZE>>(
85 &'b self,
86 src: &[T],
87 dst: &mut [T],
88 ) {
89 unsafe {
90 let src_cn = Layout::from(SRC_LAYOUT);
91 let src_channels = src_cn.channels();
92
93 let dst_cn = Layout::from(DST_LAYOUT);
94 let dst_channels = dst_cn.channels();
95
96 let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
97 let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
98 let v_max_scale = if T::FINITE {
99 _mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
100 } else {
101 _mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
102 };
103
104 for (src, dst) in src
105 .chunks_exact(src_channels)
106 .zip(dst.chunks_exact_mut(dst_channels))
107 {
108 let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
109 src[src_cn.r_i()],
110 );
111 let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
112 src[src_cn.g_i()],
113 );
114 let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
115 src[src_cn.b_i()],
116 );
117
118 let a = if src_channels == 4 {
119 src[src_cn.a_i()]
120 } else {
121 max_value
122 };
123
124 let tetrahedral = Interpolator::new(&self.lut);
125 let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
126 if T::FINITE {
127 let mut o = _mm_max_epi16(v.v, _mm_setzero_si128());
128 o = _mm_min_epi16(o, v_max_scale);
129 let x = _mm_extract_epi16::<0>(o);
130 let y = _mm_extract_epi16::<1>(o);
131 let z = _mm_extract_epi16::<2>(o);
132
133 dst[dst_cn.r_i()] = (x as u32).as_();
134 dst[dst_cn.g_i()] = (y as u32).as_();
135 dst[dst_cn.b_i()] = (z as u32).as_();
136 } else {
137 let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v));
138 r = _mm_mul_ps(r, f_value_scale);
139 dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
140 dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
141 dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
142 }
143 if dst_channels == 4 {
144 dst[dst_cn.a_i()] = a;
145 }
146 }
147 }
148 }
149}
150
151impl<
152 T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
153 U: AsPrimitive<usize>,
154 const SRC_LAYOUT: u8,
155 const DST_LAYOUT: u8,
156 const GRID_SIZE: usize,
157 const BIT_DEPTH: usize,
158 const BINS: usize,
159 const BARYCENTRIC_BINS: usize,
160> TransformExecutor<T>
161 for TransformLut3x3AvxQ0_15<
162 T,
163 U,
164 SRC_LAYOUT,
165 DST_LAYOUT,
166 GRID_SIZE,
167 BIT_DEPTH,
168 BINS,
169 BARYCENTRIC_BINS,
170 >
171where
172 f32: AsPrimitive<T>,
173 u32: AsPrimitive<T>,
174 (): LutBarycentricReduction<T, U>,
175{
176 fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
177 let src_cn = Layout::from(SRC_LAYOUT);
178 let src_channels = src_cn.channels();
179
180 let dst_cn = Layout::from(DST_LAYOUT);
181 let dst_channels = dst_cn.channels();
182 if src.len() % src_channels != 0 {
183 return Err(CmsError::LaneMultipleOfChannels);
184 }
185 if dst.len() % dst_channels != 0 {
186 return Err(CmsError::LaneMultipleOfChannels);
187 }
188 let src_chunks = src.len() / src_channels;
189 let dst_chunks = dst.len() / dst_channels;
190 if src_chunks != dst_chunks {
191 return Err(CmsError::LaneSizeMismatch);
192 }
193
194 unsafe {
195 if self.color_space == DataColorSpace::Lab
196 || (self.is_linear && self.color_space == DataColorSpace::Rgb)
197 || self.color_space == DataColorSpace::Xyz
198 {
199 self.transform_chunk::<TrilinearAvxQ0_15<GRID_SIZE>>(src, dst);
200 } else {
201 match self.interpolation_method {
202 #[cfg(feature = "options")]
203 InterpolationMethod::Tetrahedral => {
204 self.transform_chunk::<TetrahedralAvxQ0_15<GRID_SIZE>>(src, dst);
205 }
206 #[cfg(feature = "options")]
207 InterpolationMethod::Pyramid => {
208 self.transform_chunk::<PyramidalAvxQ0_15<GRID_SIZE>>(src, dst);
209 }
210 #[cfg(feature = "options")]
211 InterpolationMethod::Prism => {
212 self.transform_chunk::<PrismaticAvxQ0_15<GRID_SIZE>>(src, dst);
213 }
214 InterpolationMethod::Linear => {
215 self.transform_chunk::<TrilinearAvxQ0_15<GRID_SIZE>>(src, dst);
216 }
217 }
218 }
219 }
220 Ok(())
221 }
222}