1use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor};
30use num_traits::AsPrimitive;
31
32pub(crate) struct TransformMatrixShaper<T: Clone, const BUCKET: usize> {
33 pub(crate) r_linear: Box<[f32; BUCKET]>,
34 pub(crate) g_linear: Box<[f32; BUCKET]>,
35 pub(crate) b_linear: Box<[f32; BUCKET]>,
36 pub(crate) r_gamma: Box<[T; 65536]>,
37 pub(crate) g_gamma: Box<[T; 65536]>,
38 pub(crate) b_gamma: Box<[T; 65536]>,
39 pub(crate) adaptation_matrix: Matrix3f,
40}
41
42impl<T: Clone, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
43 #[inline(never)]
44 #[allow(dead_code)]
45 fn convert_to_v(self) -> TransformMatrixShaperV<T> {
46 TransformMatrixShaperV {
47 r_linear: self.r_linear.iter().copied().collect(),
48 g_linear: self.g_linear.iter().copied().collect(),
49 b_linear: self.b_linear.iter().copied().collect(),
50 r_gamma: self.r_gamma,
51 g_gamma: self.g_gamma,
52 b_gamma: self.b_gamma,
53 adaptation_matrix: self.adaptation_matrix,
54 }
55 }
56}
57
58#[allow(dead_code)]
59pub(crate) struct TransformMatrixShaperV<T: Clone> {
60 pub(crate) r_linear: Vec<f32>,
61 pub(crate) g_linear: Vec<f32>,
62 pub(crate) b_linear: Vec<f32>,
63 pub(crate) r_gamma: Box<[T; 65536]>,
64 pub(crate) g_gamma: Box<[T; 65536]>,
65 pub(crate) b_gamma: Box<[T; 65536]>,
66 pub(crate) adaptation_matrix: Matrix3f,
67}
68
69pub(crate) struct TransformMatrixShaperOptimized<T: Clone, const BUCKET: usize> {
72 pub(crate) linear: Box<[f32; BUCKET]>,
73 pub(crate) gamma: Box<[T; 65536]>,
74 pub(crate) adaptation_matrix: Matrix3f,
75}
76
77#[allow(dead_code)]
78impl<T: Clone, const BUCKET: usize> TransformMatrixShaperOptimized<T, BUCKET> {
79 fn convert_to_v(self) -> TransformMatrixShaperOptimizedV<T> {
80 TransformMatrixShaperOptimizedV {
81 linear: self.linear.iter().copied().collect::<Vec<_>>(),
82 gamma: self.gamma,
83 adaptation_matrix: self.adaptation_matrix,
84 }
85 }
86}
87
88#[allow(dead_code)]
91pub(crate) struct TransformMatrixShaperOptimizedV<T: Clone> {
92 pub(crate) linear: Vec<f32>,
93 pub(crate) gamma: Box<[T; 65536]>,
94 pub(crate) adaptation_matrix: Matrix3f,
95}
96
97impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
98 #[inline(never)]
99 #[allow(dead_code)]
100 pub(crate) fn to_q2_13_n<
101 R: Copy + 'static + Default,
102 const PRECISION: i32,
103 const LINEAR_CAP: usize,
104 >(
105 &self,
106 gamma_lut: usize,
107 bit_depth: usize,
108 ) -> TransformMatrixShaperFixedPoint<R, T, BUCKET>
109 where
110 f32: AsPrimitive<R>,
111 {
112 let linear_scale = if T::FINITE {
113 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
114 ((1 << bit_depth) - 1) as f32 * lut_scale
115 } else {
116 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
117 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
118 };
119 let mut new_box_r = Box::new([R::default(); BUCKET]);
120 let mut new_box_g = Box::new([R::default(); BUCKET]);
121 let mut new_box_b = Box::new([R::default(); BUCKET]);
122 for (dst, &src) in new_box_r.iter_mut().zip(self.r_linear.iter()) {
123 *dst = (src * linear_scale).round().as_();
124 }
125 for (dst, &src) in new_box_g.iter_mut().zip(self.g_linear.iter()) {
126 *dst = (src * linear_scale).round().as_();
127 }
128 for (dst, &src) in new_box_b.iter_mut().zip(self.b_linear.iter()) {
129 *dst = (src * linear_scale).round().as_();
130 }
131 let scale: f32 = (1i32 << PRECISION) as f32;
132 let source_matrix = self.adaptation_matrix;
133 let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
134 for i in 0..3 {
135 for j in 0..3 {
136 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
137 }
138 }
139 TransformMatrixShaperFixedPoint {
140 r_linear: new_box_r,
141 g_linear: new_box_g,
142 b_linear: new_box_b,
143 r_gamma: self.r_gamma.clone(),
144 g_gamma: self.g_gamma.clone(),
145 b_gamma: self.b_gamma.clone(),
146 adaptation_matrix: dst_matrix,
147 }
148 }
149
150 #[inline(never)]
151 #[allow(dead_code)]
152 pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
153 &self,
154 gamma_lut: usize,
155 bit_depth: usize,
156 ) -> TransformMatrixShaperFp<R, T>
157 where
158 f32: AsPrimitive<R>,
159 {
160 let linear_scale = if T::FINITE {
161 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
162 ((1 << bit_depth) - 1) as f32 * lut_scale
163 } else {
164 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
165 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
166 };
167 let new_box_r = self
168 .r_linear
169 .iter()
170 .map(|&x| (x * linear_scale).round().as_())
171 .collect::<Vec<R>>();
172 let new_box_g = self
173 .g_linear
174 .iter()
175 .map(|&x| (x * linear_scale).round().as_())
176 .collect::<Vec<R>>();
177 let new_box_b = self
178 .b_linear
179 .iter()
180 .map(|&x| (x * linear_scale).round().as_())
181 .collect::<Vec<_>>();
182 let scale: f32 = (1i32 << PRECISION) as f32;
183 let source_matrix = self.adaptation_matrix;
184 let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
185 for i in 0..3 {
186 for j in 0..3 {
187 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
188 }
189 }
190 TransformMatrixShaperFp {
191 r_linear: new_box_r,
192 g_linear: new_box_g,
193 b_linear: new_box_b,
194 r_gamma: self.r_gamma.clone(),
195 g_gamma: self.g_gamma.clone(),
196 b_gamma: self.b_gamma.clone(),
197 adaptation_matrix: dst_matrix,
198 }
199 }
200}
201
202impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize>
203 TransformMatrixShaperOptimized<T, BUCKET>
204{
205 #[allow(dead_code)]
206 pub(crate) fn to_q2_13_n<
207 R: Copy + 'static + Default,
208 const PRECISION: i32,
209 const LINEAR_CAP: usize,
210 >(
211 &self,
212 gamma_lut: usize,
213 bit_depth: usize,
214 ) -> TransformMatrixShaperFixedPointOpt<R, i16, T, BUCKET>
215 where
216 f32: AsPrimitive<R>,
217 {
218 let linear_scale = if T::FINITE {
219 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
220 ((1 << bit_depth) - 1) as f32 * lut_scale
221 } else {
222 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
223 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
224 };
225 let mut new_box_linear = Box::new([R::default(); BUCKET]);
226 for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
227 *dst = (*src * linear_scale).round().as_();
228 }
229 let scale: f32 = (1i32 << PRECISION) as f32;
230 let source_matrix = self.adaptation_matrix;
231 let mut dst_matrix = Matrix3::<i16> {
232 v: [[i16::default(); 3]; 3],
233 };
234 for i in 0..3 {
235 for j in 0..3 {
236 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
237 }
238 }
239 TransformMatrixShaperFixedPointOpt {
240 linear: new_box_linear,
241 gamma: self.gamma.clone(),
242 adaptation_matrix: dst_matrix,
243 }
244 }
245
246 #[allow(dead_code)]
247 pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
248 &self,
249 gamma_lut: usize,
250 bit_depth: usize,
251 ) -> TransformMatrixShaperFpOptVec<R, i16, T>
252 where
253 f32: AsPrimitive<R>,
254 {
255 let linear_scale = if T::FINITE {
256 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
257 ((1 << bit_depth) - 1) as f32 * lut_scale
258 } else {
259 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
260 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
261 };
262 let new_box_linear = self
263 .linear
264 .iter()
265 .map(|&x| (x * linear_scale).round().as_())
266 .collect::<Vec<R>>();
267 let scale: f32 = (1i32 << PRECISION) as f32;
268 let source_matrix = self.adaptation_matrix;
269 let mut dst_matrix = Matrix3::<i16> {
270 v: [[i16::default(); 3]; 3],
271 };
272 for i in 0..3 {
273 for j in 0..3 {
274 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
275 }
276 }
277 TransformMatrixShaperFpOptVec {
278 linear: new_box_linear,
279 gamma: self.gamma.clone(),
280 adaptation_matrix: dst_matrix,
281 }
282 }
283
284 #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
285 pub(crate) fn to_q1_30_n<R: Copy + 'static + Default, const PRECISION: i32>(
286 &self,
287 gamma_lut: usize,
288 bit_depth: usize,
289 ) -> TransformMatrixShaperFpOptVec<R, i32, T>
290 where
291 f32: AsPrimitive<R>,
292 f64: AsPrimitive<R>,
293 {
294 let table_size = if T::FINITE {
296 (1 << bit_depth) - 1
297 } else {
298 T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
299 };
300 let ext_bp = if T::FINITE {
301 bit_depth as u32 + 1
302 } else {
303 let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones();
304 bp + 1
305 };
306 let linear_scale = {
307 let lut_scale = (gamma_lut - 1) as f64 / table_size as f64;
308 ((1u32 << ext_bp) - 1) as f64 * lut_scale
309 };
310 let new_box_linear = self
311 .linear
312 .iter()
313 .map(|&v| (v as f64 * linear_scale).round().as_())
314 .collect::<Vec<R>>();
315 let scale: f64 = (1i64 << PRECISION) as f64;
316 let source_matrix = self.adaptation_matrix;
317 let mut dst_matrix = Matrix3::<i32> {
318 v: [[i32::default(); 3]; 3],
319 };
320 for i in 0..3 {
321 for j in 0..3 {
322 dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32;
323 }
324 }
325 TransformMatrixShaperFpOptVec {
326 linear: new_box_linear,
327 gamma: self.gamma.clone(),
328 adaptation_matrix: dst_matrix,
329 }
330 }
331}
332
333#[allow(unused)]
334struct TransformMatrixShaperScalar<
335 T: Clone,
336 const SRC_LAYOUT: u8,
337 const DST_LAYOUT: u8,
338 const LINEAR_CAP: usize,
339> {
340 pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
341 pub(crate) gamma_lut: usize,
342 pub(crate) bit_depth: usize,
343}
344
345#[allow(unused)]
346struct TransformMatrixShaperOptScalar<
347 T: Clone,
348 const SRC_LAYOUT: u8,
349 const DST_LAYOUT: u8,
350 const LINEAR_CAP: usize,
351> {
352 pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
353 pub(crate) gamma_lut: usize,
354 pub(crate) bit_depth: usize,
355}
356
357#[cfg(any(
358 any(target_arch = "x86", target_arch = "x86_64"),
359 all(target_arch = "aarch64", target_feature = "neon")
360))]
361#[allow(unused)]
362macro_rules! create_rgb_xyz_dependant_executor {
363 ($dep_name: ident, $dependant: ident, $shaper: ident) => {
364 pub(crate) fn $dep_name<
365 T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
366 const LINEAR_CAP: usize,
367 >(
368 src_layout: Layout,
369 dst_layout: Layout,
370 profile: $shaper<T, LINEAR_CAP>,
371 gamma_lut: usize,
372 bit_depth: usize,
373 ) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
374 where
375 u32: AsPrimitive<T>,
376 {
377 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
378 return Ok(Box::new($dependant::<
379 T,
380 { Layout::Rgba as u8 },
381 { Layout::Rgba as u8 },
382 LINEAR_CAP,
383 > {
384 profile,
385 bit_depth,
386 gamma_lut,
387 }));
388 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
389 return Ok(Box::new($dependant::<
390 T,
391 { Layout::Rgb as u8 },
392 { Layout::Rgba as u8 },
393 LINEAR_CAP,
394 > {
395 profile,
396 bit_depth,
397 gamma_lut,
398 }));
399 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
400 return Ok(Box::new($dependant::<
401 T,
402 { Layout::Rgba as u8 },
403 { Layout::Rgb as u8 },
404 LINEAR_CAP,
405 > {
406 profile,
407 bit_depth,
408 gamma_lut,
409 }));
410 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
411 return Ok(Box::new($dependant::<
412 T,
413 { Layout::Rgb as u8 },
414 { Layout::Rgb as u8 },
415 LINEAR_CAP,
416 > {
417 profile,
418 bit_depth,
419 gamma_lut,
420 }));
421 }
422 Err(CmsError::UnsupportedProfileConnection)
423 }
424 };
425}
426
427#[cfg(any(
428 any(target_arch = "x86", target_arch = "x86_64"),
429 all(target_arch = "aarch64", target_feature = "neon")
430))]
431#[allow(unused)]
432macro_rules! create_rgb_xyz_dependant_executor_to_v {
433 ($dep_name: ident, $dependant: ident, $shaper: ident) => {
434 pub(crate) fn $dep_name<
435 T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
436 const LINEAR_CAP: usize,
437 >(
438 src_layout: Layout,
439 dst_layout: Layout,
440 profile: $shaper<T, LINEAR_CAP>,
441 gamma_lut: usize,
442 bit_depth: usize,
443 ) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
444 where
445 u32: AsPrimitive<T>,
446 {
447 let profile = profile.convert_to_v();
448 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
449 return Ok(Box::new($dependant::<
450 T,
451 { Layout::Rgba as u8 },
452 { Layout::Rgba as u8 },
453 > {
454 profile,
455 bit_depth,
456 gamma_lut,
457 }));
458 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
459 return Ok(Box::new($dependant::<
460 T,
461 { Layout::Rgb as u8 },
462 { Layout::Rgba as u8 },
463 > {
464 profile,
465 bit_depth,
466 gamma_lut,
467 }));
468 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
469 return Ok(Box::new($dependant::<
470 T,
471 { Layout::Rgba as u8 },
472 { Layout::Rgb as u8 },
473 > {
474 profile,
475 bit_depth,
476 gamma_lut,
477 }));
478 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
479 return Ok(Box::new($dependant::<
480 T,
481 { Layout::Rgb as u8 },
482 { Layout::Rgb as u8 },
483 > {
484 profile,
485 bit_depth,
486 gamma_lut,
487 }));
488 }
489 Err(CmsError::UnsupportedProfileConnection)
490 }
491 };
492}
493
494#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
495use crate::conversions::sse::{TransformShaperRgbOptSse, TransformShaperRgbSse};
496
497#[cfg(all(target_arch = "x86_64", feature = "avx"))]
498use crate::conversions::avx::{TransformShaperRgbAvx, TransformShaperRgbOptAvx};
499
500#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
501create_rgb_xyz_dependant_executor!(
502 make_rgb_xyz_rgb_transform_sse_41,
503 TransformShaperRgbSse,
504 TransformMatrixShaper
505);
506
507#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
508create_rgb_xyz_dependant_executor_to_v!(
509 make_rgb_xyz_rgb_transform_sse_41_opt,
510 TransformShaperRgbOptSse,
511 TransformMatrixShaperOptimized
512);
513
514#[cfg(all(target_arch = "x86_64", feature = "avx"))]
515create_rgb_xyz_dependant_executor!(
516 make_rgb_xyz_rgb_transform_avx2,
517 TransformShaperRgbAvx,
518 TransformMatrixShaper
519);
520
521#[cfg(all(target_arch = "x86_64", feature = "avx"))]
522create_rgb_xyz_dependant_executor_to_v!(
523 make_rgb_xyz_rgb_transform_avx2_opt,
524 TransformShaperRgbOptAvx,
525 TransformMatrixShaperOptimized
526);
527
528#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
529use crate::conversions::avx512::TransformShaperRgbOptAvx512;
530
531#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
532create_rgb_xyz_dependant_executor!(
533 make_rgb_xyz_rgb_transform_avx512_opt,
534 TransformShaperRgbOptAvx512,
535 TransformMatrixShaperOptimized
536);
537
538#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
539pub(crate) fn make_rgb_xyz_rgb_transform<
540 T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
541 const LINEAR_CAP: usize,
542>(
543 src_layout: Layout,
544 dst_layout: Layout,
545 profile: TransformMatrixShaper<T, LINEAR_CAP>,
546 gamma_lut: usize,
547 bit_depth: usize,
548) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
549where
550 u32: AsPrimitive<T>,
551{
552 #[cfg(all(feature = "avx", target_arch = "x86_64"))]
553 if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
554 return make_rgb_xyz_rgb_transform_avx2::<T, LINEAR_CAP>(
555 src_layout, dst_layout, profile, gamma_lut, bit_depth,
556 );
557 }
558 #[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
559 if std::arch::is_x86_feature_detected!("sse4.1") {
560 return make_rgb_xyz_rgb_transform_sse_41::<T, LINEAR_CAP>(
561 src_layout, dst_layout, profile, gamma_lut, bit_depth,
562 );
563 }
564 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
565 return Ok(Box::new(TransformMatrixShaperScalar::<
566 T,
567 { Layout::Rgba as u8 },
568 { Layout::Rgba as u8 },
569 LINEAR_CAP,
570 > {
571 profile,
572 gamma_lut,
573 bit_depth,
574 }));
575 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
576 return Ok(Box::new(TransformMatrixShaperScalar::<
577 T,
578 { Layout::Rgb as u8 },
579 { Layout::Rgba as u8 },
580 LINEAR_CAP,
581 > {
582 profile,
583 gamma_lut,
584 bit_depth,
585 }));
586 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
587 return Ok(Box::new(TransformMatrixShaperScalar::<
588 T,
589 { Layout::Rgba as u8 },
590 { Layout::Rgb as u8 },
591 LINEAR_CAP,
592 > {
593 profile,
594 gamma_lut,
595 bit_depth,
596 }));
597 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
598 return Ok(Box::new(TransformMatrixShaperScalar::<
599 T,
600 { Layout::Rgb as u8 },
601 { Layout::Rgb as u8 },
602 LINEAR_CAP,
603 > {
604 profile,
605 gamma_lut,
606 bit_depth,
607 }));
608 }
609 Err(CmsError::UnsupportedProfileConnection)
610}
611
612#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
613pub(crate) fn make_rgb_xyz_rgb_transform_opt<
614 T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
615 const LINEAR_CAP: usize,
616>(
617 src_layout: Layout,
618 dst_layout: Layout,
619 profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
620 gamma_lut: usize,
621 bit_depth: usize,
622) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
623where
624 u32: AsPrimitive<T>,
625{
626 #[cfg(all(feature = "avx512", target_arch = "x86_64"))]
627 if std::arch::is_x86_feature_detected!("avx512bw")
628 && std::arch::is_x86_feature_detected!("avx512vl")
629 && std::arch::is_x86_feature_detected!("fma")
630 {
631 return make_rgb_xyz_rgb_transform_avx512_opt::<T, LINEAR_CAP>(
632 src_layout, dst_layout, profile, gamma_lut, bit_depth,
633 );
634 }
635 #[cfg(all(feature = "avx", target_arch = "x86_64"))]
636 if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
637 return make_rgb_xyz_rgb_transform_avx2_opt::<T, LINEAR_CAP>(
638 src_layout, dst_layout, profile, gamma_lut, bit_depth,
639 );
640 }
641 #[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
642 if std::arch::is_x86_feature_detected!("sse4.1") {
643 return make_rgb_xyz_rgb_transform_sse_41_opt::<T, LINEAR_CAP>(
644 src_layout, dst_layout, profile, gamma_lut, bit_depth,
645 );
646 }
647 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
648 return Ok(Box::new(TransformMatrixShaperOptScalar::<
649 T,
650 { Layout::Rgba as u8 },
651 { Layout::Rgba as u8 },
652 LINEAR_CAP,
653 > {
654 profile,
655 gamma_lut,
656 bit_depth,
657 }));
658 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
659 return Ok(Box::new(TransformMatrixShaperOptScalar::<
660 T,
661 { Layout::Rgb as u8 },
662 { Layout::Rgba as u8 },
663 LINEAR_CAP,
664 > {
665 profile,
666 gamma_lut,
667 bit_depth,
668 }));
669 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
670 return Ok(Box::new(TransformMatrixShaperOptScalar::<
671 T,
672 { Layout::Rgba as u8 },
673 { Layout::Rgb as u8 },
674 LINEAR_CAP,
675 > {
676 profile,
677 gamma_lut,
678 bit_depth,
679 }));
680 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
681 return Ok(Box::new(TransformMatrixShaperOptScalar::<
682 T,
683 { Layout::Rgb as u8 },
684 { Layout::Rgb as u8 },
685 LINEAR_CAP,
686 > {
687 profile,
688 gamma_lut,
689 bit_depth,
690 }));
691 }
692 Err(CmsError::UnsupportedProfileConnection)
693}
694
695#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
696use crate::conversions::neon::{TransformShaperRgbNeon, TransformShaperRgbOptNeon};
697use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec;
698use crate::conversions::rgbxyz_fixed::{
699 TransformMatrixShaperFixedPoint, TransformMatrixShaperFixedPointOpt, TransformMatrixShaperFp,
700};
701use crate::transform::PointeeSizeExpressible;
702
703#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
704create_rgb_xyz_dependant_executor_to_v!(
705 make_rgb_xyz_rgb_transform,
706 TransformShaperRgbNeon,
707 TransformMatrixShaper
708);
709
710#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
711create_rgb_xyz_dependant_executor_to_v!(
712 make_rgb_xyz_rgb_transform_opt,
713 TransformShaperRgbOptNeon,
714 TransformMatrixShaperOptimized
715);
716
717#[allow(unused)]
718impl<
719 T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
720 const SRC_LAYOUT: u8,
721 const DST_LAYOUT: u8,
722 const LINEAR_CAP: usize,
723> TransformExecutor<T> for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
724where
725 u32: AsPrimitive<T>,
726{
727 fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
728 use crate::mlaf::mlaf;
729 let src_cn = Layout::from(SRC_LAYOUT);
730 let dst_cn = Layout::from(DST_LAYOUT);
731 let src_channels = src_cn.channels();
732 let dst_channels = dst_cn.channels();
733
734 if src.len() / src_channels != dst.len() / dst_channels {
735 return Err(CmsError::LaneSizeMismatch);
736 }
737 if src.len() % src_channels != 0 {
738 return Err(CmsError::LaneMultipleOfChannels);
739 }
740 if dst.len() % dst_channels != 0 {
741 return Err(CmsError::LaneMultipleOfChannels);
742 }
743
744 let transform = self.profile.adaptation_matrix;
745 let scale = (self.gamma_lut - 1) as f32;
746 let max_colors: T = ((1 << self.bit_depth) - 1).as_();
747
748 for (src, dst) in src
749 .chunks_exact(src_channels)
750 .zip(dst.chunks_exact_mut(dst_channels))
751 {
752 let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
753 let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
754 let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
755 let a = if src_channels == 4 {
756 src[src_cn.a_i()]
757 } else {
758 max_colors
759 };
760
761 let new_r = mlaf(
762 0.5f32,
763 mlaf(
764 mlaf(r * transform.v[0][0], g, transform.v[0][1]),
765 b,
766 transform.v[0][2],
767 )
768 .max(0f32)
769 .min(1f32),
770 scale,
771 );
772
773 let new_g = mlaf(
774 0.5f32,
775 mlaf(
776 mlaf(r * transform.v[1][0], g, transform.v[1][1]),
777 b,
778 transform.v[1][2],
779 )
780 .max(0f32)
781 .min(1f32),
782 scale,
783 );
784
785 let new_b = mlaf(
786 0.5f32,
787 mlaf(
788 mlaf(r * transform.v[2][0], g, transform.v[2][1]),
789 b,
790 transform.v[2][2],
791 )
792 .max(0f32)
793 .min(1f32),
794 scale,
795 );
796
797 dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
798 dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
799 dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
800 if dst_channels == 4 {
801 dst[dst_cn.a_i()] = a;
802 }
803 }
804
805 Ok(())
806 }
807}
808
809#[allow(unused)]
810impl<
811 T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
812 const SRC_LAYOUT: u8,
813 const DST_LAYOUT: u8,
814 const LINEAR_CAP: usize,
815> TransformExecutor<T> for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
816where
817 u32: AsPrimitive<T>,
818{
819 fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
820 use crate::mlaf::mlaf;
821 let src_cn = Layout::from(SRC_LAYOUT);
822 let dst_cn = Layout::from(DST_LAYOUT);
823 let src_channels = src_cn.channels();
824 let dst_channels = dst_cn.channels();
825
826 if src.len() / src_channels != dst.len() / dst_channels {
827 return Err(CmsError::LaneSizeMismatch);
828 }
829 if src.len() % src_channels != 0 {
830 return Err(CmsError::LaneMultipleOfChannels);
831 }
832 if dst.len() % dst_channels != 0 {
833 return Err(CmsError::LaneMultipleOfChannels);
834 }
835
836 let transform = self.profile.adaptation_matrix;
837 let scale = (self.gamma_lut - 1) as f32;
838 let max_colors: T = ((1 << self.bit_depth) - 1).as_();
839
840 for (src, dst) in src
841 .chunks_exact(src_channels)
842 .zip(dst.chunks_exact_mut(dst_channels))
843 {
844 let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
845 let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
846 let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
847 let a = if src_channels == 4 {
848 src[src_cn.a_i()]
849 } else {
850 max_colors
851 };
852
853 let new_r = mlaf(
854 0.5f32,
855 mlaf(
856 mlaf(r * transform.v[0][0], g, transform.v[0][1]),
857 b,
858 transform.v[0][2],
859 )
860 .max(0f32)
861 .min(1f32),
862 scale,
863 );
864
865 let new_g = mlaf(
866 0.5f32,
867 mlaf(
868 mlaf(r * transform.v[1][0], g, transform.v[1][1]),
869 b,
870 transform.v[1][2],
871 )
872 .max(0f32)
873 .min(1f32),
874 scale,
875 );
876
877 let new_b = mlaf(
878 0.5f32,
879 mlaf(
880 mlaf(r * transform.v[2][0], g, transform.v[2][1]),
881 b,
882 transform.v[2][2],
883 )
884 .max(0f32)
885 .min(1f32),
886 scale,
887 );
888
889 dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
890 dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
891 dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
892 if dst_channels == 4 {
893 dst[dst_cn.a_i()] = a;
894 }
895 }
896
897 Ok(())
898 }
899}