1use crate::conversions::katana::KatanaInitialStage;
30use crate::profile::LutDataType;
31use crate::safe_math::{SafeMul, SafePowi};
32use crate::trc::lut_interp_linear_float;
33use crate::{
34 CmsError, DataColorSpace, Hypercube, InterpolationMethod, MalformedSize,
35 PointeeSizeExpressible, Stage, TransformOptions, Vector3f,
36};
37use num_traits::AsPrimitive;
38use std::marker::PhantomData;
39
40#[allow(unused)]
41#[derive(Default)]
42struct Lut4x3 {
43 linearization: [Vec<f32>; 4],
44 clut: Vec<f32>,
45 grid_size: u8,
46 output: [Vec<f32>; 3],
47 interpolation_method: InterpolationMethod,
48 pcs: DataColorSpace,
49}
50
51#[allow(unused)]
52#[derive(Default)]
53struct KatanaLut4x3<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> {
54 linearization: [Vec<f32>; 4],
55 clut: Vec<f32>,
56 grid_size: u8,
57 output: [Vec<f32>; 3],
58 interpolation_method: InterpolationMethod,
59 pcs: DataColorSpace,
60 _phantom: PhantomData<T>,
61 bit_depth: usize,
62}
63
64#[allow(unused)]
65impl Lut4x3 {
66 fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
67 &self,
68 src: &[f32],
69 dst: &mut [f32],
70 fetch: Fetch,
71 ) -> Result<(), CmsError> {
72 let linearization_0 = &self.linearization[0];
73 let linearization_1 = &self.linearization[1];
74 let linearization_2 = &self.linearization[2];
75 let linearization_3 = &self.linearization[3];
76 for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
77 debug_assert!(self.grid_size as i32 >= 1);
78 let linear_x = lut_interp_linear_float(src[0], linearization_0);
79 let linear_y = lut_interp_linear_float(src[1], linearization_1);
80 let linear_z = lut_interp_linear_float(src[2], linearization_2);
81 let linear_w = lut_interp_linear_float(src[3], linearization_3);
82
83 let clut = fetch(linear_x, linear_y, linear_z, linear_w);
84
85 let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
86 let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
87 let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
88 dest[0] = pcs_x;
89 dest[1] = pcs_y;
90 dest[2] = pcs_z;
91 }
92 Ok(())
93 }
94}
95
96macro_rules! define_lut4_dispatch {
97 ($dispatcher: ident) => {
98 impl Stage for $dispatcher {
99 fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
100 let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
101
102 if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
104 return self
105 .transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
106 }
107
108 match self.interpolation_method {
109 #[cfg(feature = "options")]
110 InterpolationMethod::Tetrahedral => {
111 self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
112 }
113 #[cfg(feature = "options")]
114 InterpolationMethod::Pyramid => {
115 self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
116 }
117 #[cfg(feature = "options")]
118 InterpolationMethod::Prism => {
119 self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
120 }
121 InterpolationMethod::Linear => {
122 self.transform_impl(src, dst, |x, y, z, w| {
123 l_tbl.quadlinear_vec3(x, y, z, w)
124 })?
125 }
126 }
127 Ok(())
128 }
129 }
130 };
131}
132
133impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLut4x3<T> {
134 fn to_pcs_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
135 &self,
136 input: &[T],
137 fetch: Fetch,
138 ) -> Result<Vec<f32>, CmsError> {
139 if input.len() % 4 != 0 {
140 return Err(CmsError::LaneMultipleOfChannels);
141 }
142 let norm_value = if T::FINITE {
143 1.0 / ((1u32 << self.bit_depth) - 1) as f32
144 } else {
145 1.0
146 };
147 let mut dst = vec![0.; (input.len() / 4) * 3];
148 let linearization_0 = &self.linearization[0];
149 let linearization_1 = &self.linearization[1];
150 let linearization_2 = &self.linearization[2];
151 let linearization_3 = &self.linearization[3];
152 for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(4)) {
153 let linear_x = lut_interp_linear_float(src[0].as_() * norm_value, linearization_0);
154 let linear_y = lut_interp_linear_float(src[1].as_() * norm_value, linearization_1);
155 let linear_z = lut_interp_linear_float(src[2].as_() * norm_value, linearization_2);
156 let linear_w = lut_interp_linear_float(src[3].as_() * norm_value, linearization_3);
157
158 let clut = fetch(linear_x, linear_y, linear_z, linear_w);
159
160 let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
161 let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
162 let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
163 dest[0] = pcs_x;
164 dest[1] = pcs_y;
165 dest[2] = pcs_z;
166 }
167 Ok(dst)
168 }
169}
170
171impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
172 for KatanaLut4x3<T>
173{
174 fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
175 if input.len() % 4 != 0 {
176 return Err(CmsError::LaneMultipleOfChannels);
177 }
178 let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
179
180 if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
182 return self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
183 }
184
185 match self.interpolation_method {
186 #[cfg(feature = "options")]
187 InterpolationMethod::Tetrahedral => {
188 self.to_pcs_impl(input, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))
189 }
190 #[cfg(feature = "options")]
191 InterpolationMethod::Pyramid => {
192 self.to_pcs_impl(input, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))
193 }
194 #[cfg(feature = "options")]
195 InterpolationMethod::Prism => {
196 self.to_pcs_impl(input, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))
197 }
198 InterpolationMethod::Linear => {
199 self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w))
200 }
201 }
202 }
203}
204
205define_lut4_dispatch!(Lut4x3);
206
207fn make_lut_4x3(
208 lut: &LutDataType,
209 options: TransformOptions,
210 pcs: DataColorSpace,
211) -> Result<Lut4x3, CmsError> {
212 let clut_length: usize = (lut.num_clut_grid_points as usize)
218 .safe_powi(lut.num_input_channels as u32)?
219 .safe_mul(lut.num_output_channels as usize)?;
220
221 let clut_table = lut.clut_table.to_clut_f32();
222 if clut_table.len() != clut_length {
223 return Err(CmsError::MalformedClut(MalformedSize {
224 size: clut_table.len(),
225 expected: clut_length,
226 }));
227 }
228
229 let linearization_table = lut.input_table.to_clut_f32();
230
231 if linearization_table.len() < lut.num_input_table_entries as usize * 4 {
232 return Err(CmsError::MalformedCurveLutTable(MalformedSize {
233 size: linearization_table.len(),
234 expected: lut.num_input_table_entries as usize * 4,
235 }));
236 }
237
238 let lin_curve0 = linearization_table[0..lut.num_input_table_entries as usize].to_vec();
239 let lin_curve1 = linearization_table
240 [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
241 .to_vec();
242 let lin_curve2 = linearization_table
243 [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
244 .to_vec();
245 let lin_curve3 = linearization_table
246 [lut.num_input_table_entries as usize * 3..lut.num_input_table_entries as usize * 4]
247 .to_vec();
248
249 let gamma_table = lut.output_table.to_clut_f32();
250
251 if gamma_table.len() < lut.num_output_table_entries as usize * 3 {
252 return Err(CmsError::MalformedCurveLutTable(MalformedSize {
253 size: gamma_table.len(),
254 expected: lut.num_output_table_entries as usize * 3,
255 }));
256 }
257
258 let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
259 let gamma_curve1 = gamma_table
260 [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
261 .to_vec();
262 let gamma_curve2 = gamma_table
263 [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
264 .to_vec();
265
266 let transform = Lut4x3 {
267 linearization: [lin_curve0, lin_curve1, lin_curve2, lin_curve3],
268 interpolation_method: options.interpolation_method,
269 pcs,
270 clut: clut_table,
271 grid_size: lut.num_clut_grid_points,
272 output: [gamma_curve0, gamma_curve1, gamma_curve2],
273 };
274 Ok(transform)
275}
276
277fn stage_lut_4x3(
278 lut: &LutDataType,
279 options: TransformOptions,
280 pcs: DataColorSpace,
281) -> Result<Box<dyn Stage>, CmsError> {
282 let lut = make_lut_4x3(lut, options, pcs)?;
283 #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
284 {
285 use crate::conversions::neon::Lut4x3Neon;
286 let transform = Lut4x3Neon {
287 linearization: lut.linearization,
288 interpolation_method: lut.interpolation_method,
289 pcs: lut.pcs,
290 clut: lut.clut,
291 grid_size: lut.grid_size,
292 output: lut.output,
293 };
294 Ok(Box::new(transform))
295 }
296 #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
297 {
298 #[cfg(all(target_arch = "x86_64", feature = "avx"))]
299 {
300 use crate::conversions::avx::Lut4x3AvxFma;
301 if std::arch::is_x86_feature_detected!("avx2")
302 && std::arch::is_x86_feature_detected!("fma")
303 {
304 let transform = Lut4x3AvxFma {
305 linearization: lut.linearization,
306 interpolation_method: lut.interpolation_method,
307 pcs: lut.pcs,
308 clut: lut.clut,
309 grid_size: lut.grid_size,
310 output: lut.output,
311 };
312 return Ok(Box::new(transform));
313 }
314 }
315 let transform = Lut4x3 {
316 linearization: lut.linearization,
317 interpolation_method: lut.interpolation_method,
318 pcs: lut.pcs,
319 clut: lut.clut,
320 grid_size: lut.grid_size,
321 output: lut.output,
322 };
323 Ok(Box::new(transform))
324 }
325}
326
327pub(crate) fn katana_input_stage_lut_4x3<
328 T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
329>(
330 lut: &LutDataType,
331 options: TransformOptions,
332 pcs: DataColorSpace,
333 bit_depth: usize,
334) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
335 let lut = make_lut_4x3(lut, options, pcs)?;
341
342 let transform = KatanaLut4x3::<T> {
343 linearization: lut.linearization,
344 interpolation_method: lut.interpolation_method,
345 pcs: lut.pcs,
346 clut: lut.clut,
347 grid_size: lut.grid_size,
348 output: lut.output,
349 _phantom: PhantomData,
350 bit_depth,
351 };
352 Ok(Box::new(transform))
353}
354
355pub(crate) fn create_lut4_norm_samples<const SAMPLES: usize>() -> Vec<f32> {
356 let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
357
358 let mut src = Vec::with_capacity(lut_size as usize);
359
360 let recpeq = 1f32 / (SAMPLES - 1) as f32;
361 for k in 0..SAMPLES {
362 for c in 0..SAMPLES {
363 for m in 0..SAMPLES {
364 for y in 0..SAMPLES {
365 src.push(c as f32 * recpeq);
366 src.push(m as f32 * recpeq);
367 src.push(y as f32 * recpeq);
368 src.push(k as f32 * recpeq);
369 }
370 }
371 }
372 }
373 src
374}
375
376pub(crate) fn create_lut4<const SAMPLES: usize>(
377 lut: &LutDataType,
378 options: TransformOptions,
379 pcs: DataColorSpace,
380) -> Result<Vec<f32>, CmsError> {
381 if lut.num_input_channels != 4 {
382 return Err(CmsError::UnsupportedProfileConnection);
383 }
384 let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
385
386 let src = create_lut4_norm_samples::<SAMPLES>();
387 let mut dest = vec![0.; (lut_size as usize) / 4 * 3];
388
389 let lut_stage = stage_lut_4x3(lut, options, pcs)?;
390 lut_stage.transform(&src, &mut dest)?;
391 Ok(dest)
392}