1use crate::conversions::LutBarycentricReduction;
30use crate::conversions::interpolator::BarycentricWeight;
31use crate::conversions::sse::interpolator_q0_15::*;
32use crate::transform::PointeeSizeExpressible;
33use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
34use num_traits::AsPrimitive;
35#[cfg(target_arch = "x86")]
36use std::arch::x86::*;
37#[cfg(target_arch = "x86_64")]
38use std::arch::x86_64::*;
39use std::marker::PhantomData;
40
41pub(crate) struct TransformLut3x3SseQ0_15<
42 T,
43 U,
44 const SRC_LAYOUT: u8,
45 const DST_LAYOUT: u8,
46 const GRID_SIZE: usize,
47 const BIT_DEPTH: usize,
48 const BINS: usize,
49 const BARYCENTRIC_BINS: usize,
50> {
51 pub(crate) lut: Vec<SseAlignedI16x4>,
52 pub(crate) _phantom: PhantomData<T>,
53 pub(crate) _phantom2: PhantomData<U>,
54 pub(crate) interpolation_method: InterpolationMethod,
55 pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
56 pub(crate) color_space: DataColorSpace,
57 pub(crate) is_linear: bool,
58}
59
60impl<
61 T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
62 U: AsPrimitive<usize>,
63 const SRC_LAYOUT: u8,
64 const DST_LAYOUT: u8,
65 const GRID_SIZE: usize,
66 const BIT_DEPTH: usize,
67 const BINS: usize,
68 const BARYCENTRIC_BINS: usize,
69>
70 TransformLut3x3SseQ0_15<
71 T,
72 U,
73 SRC_LAYOUT,
74 DST_LAYOUT,
75 GRID_SIZE,
76 BIT_DEPTH,
77 BINS,
78 BARYCENTRIC_BINS,
79 >
80where
81 f32: AsPrimitive<T>,
82 u32: AsPrimitive<T>,
83 (): LutBarycentricReduction<T, U>,
84{
85 #[allow(unused_unsafe)]
86 #[target_feature(enable = "sse4.1")]
87 unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolationQ0_15<'b, GRID_SIZE>>(
88 &'b self,
89 src: &[T],
90 dst: &mut [T],
91 ) {
92 unsafe {
93 let src_cn = Layout::from(SRC_LAYOUT);
94 let src_channels = src_cn.channels();
95
96 let dst_cn = Layout::from(DST_LAYOUT);
97 let dst_channels = dst_cn.channels();
98
99 let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
100 let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
101 let v_max_scale = if T::FINITE {
102 _mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
103 } else {
104 _mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
105 };
106
107 for (src, dst) in src
108 .chunks_exact(src_channels)
109 .zip(dst.chunks_exact_mut(dst_channels))
110 {
111 let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
112 src[src_cn.r_i()],
113 );
114 let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
115 src[src_cn.g_i()],
116 );
117 let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
118 src[src_cn.b_i()],
119 );
120
121 let a = if src_channels == 4 {
122 src[src_cn.a_i()]
123 } else {
124 max_value
125 };
126
127 let tetrahedral = Interpolator::new(&self.lut);
128 let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
129 if T::FINITE {
130 let mut o = _mm_max_epi16(v.v, _mm_setzero_si128());
131 o = _mm_min_epi16(o, v_max_scale);
132 let x = _mm_extract_epi16::<0>(o);
133 let y = _mm_extract_epi16::<1>(o);
134 let z = _mm_extract_epi16::<2>(o);
135
136 dst[dst_cn.r_i()] = (x as u32).as_();
137 dst[dst_cn.g_i()] = (y as u32).as_();
138 dst[dst_cn.b_i()] = (z as u32).as_();
139 } else {
140 let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v));
141 r = _mm_mul_ps(r, f_value_scale);
142 dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
143 dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
144 dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
145 }
146 if dst_channels == 4 {
147 dst[dst_cn.a_i()] = a;
148 }
149 }
150 }
151 }
152}
153
154impl<
155 T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
156 U: AsPrimitive<usize>,
157 const SRC_LAYOUT: u8,
158 const DST_LAYOUT: u8,
159 const GRID_SIZE: usize,
160 const BIT_DEPTH: usize,
161 const BINS: usize,
162 const BARYCENTRIC_BINS: usize,
163> TransformExecutor<T>
164 for TransformLut3x3SseQ0_15<
165 T,
166 U,
167 SRC_LAYOUT,
168 DST_LAYOUT,
169 GRID_SIZE,
170 BIT_DEPTH,
171 BINS,
172 BARYCENTRIC_BINS,
173 >
174where
175 f32: AsPrimitive<T>,
176 u32: AsPrimitive<T>,
177 (): LutBarycentricReduction<T, U>,
178{
179 fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
180 let src_cn = Layout::from(SRC_LAYOUT);
181 let src_channels = src_cn.channels();
182
183 let dst_cn = Layout::from(DST_LAYOUT);
184 let dst_channels = dst_cn.channels();
185 if src.len() % src_channels != 0 {
186 return Err(CmsError::LaneMultipleOfChannels);
187 }
188 if dst.len() % dst_channels != 0 {
189 return Err(CmsError::LaneMultipleOfChannels);
190 }
191 let src_chunks = src.len() / src_channels;
192 let dst_chunks = dst.len() / dst_channels;
193 if src_chunks != dst_chunks {
194 return Err(CmsError::LaneSizeMismatch);
195 }
196
197 unsafe {
198 if self.color_space == DataColorSpace::Lab
199 || (self.is_linear && self.color_space == DataColorSpace::Rgb)
200 || self.color_space == DataColorSpace::Xyz
201 {
202 self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
203 } else {
204 match self.interpolation_method {
205 #[cfg(feature = "options")]
206 InterpolationMethod::Tetrahedral => {
207 self.transform_chunk::<TetrahedralSseQ0_15<GRID_SIZE>>(src, dst);
208 }
209 #[cfg(feature = "options")]
210 InterpolationMethod::Pyramid => {
211 self.transform_chunk::<PyramidalSseQ0_15<GRID_SIZE>>(src, dst);
212 }
213 #[cfg(feature = "options")]
214 InterpolationMethod::Prism => {
215 self.transform_chunk::<PrismaticSseQ0_15<GRID_SIZE>>(src, dst);
216 }
217 InterpolationMethod::Linear => {
218 self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
219 }
220 }
221 }
222 }
223 Ok(())
224 }
225}