tiny_skia/wide/
i32x8_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10use super::{f32x8, u32x8};
11
12cfg_if::cfg_if! {
13    if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
14        #[cfg(target_arch = "x86")]
15        use core::arch::x86::*;
16        #[cfg(target_arch = "x86_64")]
17        use core::arch::x86_64::*;
18
19        #[derive(Clone, Copy, Debug)]
20        #[repr(C, align(32))]
21        pub struct i32x8(__m256i);
22    } else {
23        use super::i32x4;
24
25        #[derive(Clone, Copy, Debug)]
26        #[repr(C, align(32))]
27        pub struct i32x8(pub i32x4, pub i32x4);
28    }
29}
30
31unsafe impl bytemuck::Zeroable for i32x8 {}
32unsafe impl bytemuck::Pod for i32x8 {}
33
34impl Default for i32x8 {
35    fn default() -> Self {
36        Self::splat(0)
37    }
38}
39
40impl i32x8 {
41    pub fn splat(n: i32) -> Self {
42        cast([n, n, n, n, n, n, n, n])
43    }
44
45    pub fn blend(self, t: Self, f: Self) -> Self {
46        cfg_if::cfg_if! {
47            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
48                Self(unsafe { _mm256_blendv_epi8(f.0, t.0, self.0) })
49            } else {
50                Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1))
51            }
52        }
53    }
54
55    pub fn cmp_eq(self, rhs: Self) -> Self {
56        cfg_if::cfg_if! {
57            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
58                Self(unsafe { _mm256_cmpeq_epi32(self.0, rhs.0) })
59            } else {
60                Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1))
61            }
62        }
63    }
64
65    pub fn cmp_gt(self, rhs: Self) -> Self {
66        cfg_if::cfg_if! {
67            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
68                Self(unsafe { _mm256_cmpgt_epi32(self.0, rhs.0) })
69            } else {
70                Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1))
71            }
72        }
73    }
74
75    pub fn cmp_lt(self, rhs: Self) -> Self {
76        cfg_if::cfg_if! {
77            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
78                // There is no `_mm256_cmpLT_epi32`, therefore we have to use
79                // `_mm256_cmpGT_epi32` and then invert the result.
80                let v = unsafe { _mm256_cmpgt_epi32(self.0, rhs.0) };
81                let all_bits = unsafe { _mm256_set1_epi16(-1) };
82                Self(unsafe { _mm256_xor_si256(v, all_bits) })
83            } else {
84                Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1))
85            }
86        }
87    }
88
89    pub fn to_f32x8(self) -> f32x8 {
90        cfg_if::cfg_if! {
91            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
92                cast(unsafe { _mm256_cvtepi32_ps(self.0) })
93            } else if #[cfg(all(feature = "simd", target_feature = "avx"))] {
94                cast([self.0.to_f32x4(), self.1.to_f32x4()])
95            } else {
96                f32x8(self.0.to_f32x4(), self.1.to_f32x4())
97            }
98        }
99    }
100
101    pub fn to_u32x8_bitcast(self) -> u32x8 {
102        bytemuck::cast(self)
103    }
104
105    pub fn to_f32x8_bitcast(self) -> f32x8 {
106        bytemuck::cast(self)
107    }
108}
109
110impl From<[i32; 8]> for i32x8 {
111    fn from(v: [i32; 8]) -> Self {
112        cast(v)
113    }
114}
115
116impl From<i32x8> for [i32; 8] {
117    fn from(v: i32x8) -> Self {
118        cast(v)
119    }
120}
121
122impl core::ops::Add for i32x8 {
123    type Output = Self;
124
125    fn add(self, rhs: Self) -> Self::Output {
126        cfg_if::cfg_if! {
127            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
128                Self(unsafe { _mm256_add_epi32(self.0, rhs.0) })
129            } else {
130                Self(self.0 + rhs.0, self.1 + rhs.1)
131            }
132        }
133    }
134}
135
136impl core::ops::BitAnd for i32x8 {
137    type Output = Self;
138
139    fn bitand(self, rhs: Self) -> Self::Output {
140        cfg_if::cfg_if! {
141            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
142                Self(unsafe { _mm256_and_si256(self.0, rhs.0) })
143            } else {
144                Self(self.0 & rhs.0, self.1 & rhs.1)
145            }
146        }
147    }
148}
149
150impl core::ops::Mul for i32x8 {
151    type Output = Self;
152
153    fn mul(self, rhs: Self) -> Self::Output {
154        cfg_if::cfg_if! {
155            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
156                Self(unsafe { _mm256_mullo_epi32(self.0, rhs.0) })
157            } else {
158                Self(self.0 * rhs.0, self.1 * rhs.1)
159            }
160        }
161    }
162}
163
164impl core::ops::BitOr for i32x8 {
165    type Output = Self;
166
167    #[inline]
168    fn bitor(self, rhs: Self) -> Self::Output {
169        cfg_if::cfg_if! {
170            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
171                Self(unsafe { _mm256_or_si256(self.0, rhs.0) })
172            } else {
173                Self(self.0 | rhs.0, self.1 | rhs.1)
174            }
175        }
176    }
177}
178
179impl core::ops::BitXor for i32x8 {
180    type Output = Self;
181
182    #[inline]
183    fn bitxor(self, rhs: Self) -> Self::Output {
184        cfg_if::cfg_if! {
185            if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
186                Self(unsafe { _mm256_xor_si256(self.0, rhs.0) })
187            } else {
188                Self(self.0 ^ rhs.0, self.1 ^ rhs.1)
189            }
190        }
191    }
192}