tiny_skia/wide/
u32x4_t.rs

1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8cfg_if::cfg_if! {
9    if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
10        #[cfg(target_arch = "x86")]
11        use core::arch::x86::*;
12        #[cfg(target_arch = "x86_64")]
13        use core::arch::x86_64::*;
14
15        // unused when AVX is available
16        #[cfg(not(all(feature = "simd", target_feature = "avx2")))]
17        use bytemuck::cast;
18
19        #[derive(Clone, Copy, Debug)]
20        #[repr(C, align(16))]
21        pub struct u32x4(__m128i);
22    } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
23        use core::arch::wasm32::*;
24
25        #[derive(Clone, Copy, Debug)]
26        #[repr(C, align(16))]
27        pub struct u32x4(v128);
28    } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
29        use core::arch::aarch64::*;
30
31        #[derive(Clone, Copy, Debug)]
32        #[repr(C, align(16))]
33        pub struct u32x4(uint32x4_t);
34    } else {
35        #[derive(Clone, Copy, Debug)]
36        #[repr(C, align(16))]
37        pub struct u32x4([u32; 4]);
38    }
39}
40
41unsafe impl bytemuck::Zeroable for u32x4 {}
42unsafe impl bytemuck::Pod for u32x4 {}
43
44impl Default for u32x4 {
45    fn default() -> Self {
46        Self::splat(0)
47    }
48}
49
50impl u32x4 {
51    pub fn splat(n: u32) -> Self {
52        bytemuck::cast([n, n, n, n])
53    }
54
55    // unused when AVX is available
56    #[cfg(not(all(feature = "simd", target_feature = "avx2")))]
57    pub fn cmp_eq(self, rhs: Self) -> Self {
58        cfg_if::cfg_if! {
59            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
60                Self(unsafe { _mm_cmpeq_epi32(self.0, rhs.0) })
61            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
62                Self(u32x4_eq(self.0, rhs.0))
63            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
64                Self(unsafe { vceqq_u32(self.0, rhs.0) })
65            } else {
66                Self([
67                    if self.0[0] == rhs.0[0] { u32::MAX } else { 0 },
68                    if self.0[1] == rhs.0[1] { u32::MAX } else { 0 },
69                    if self.0[2] == rhs.0[2] { u32::MAX } else { 0 },
70                    if self.0[3] == rhs.0[3] { u32::MAX } else { 0 },
71                ])
72            }
73        }
74    }
75
76    // unused when AVX is available
77    #[cfg(not(all(feature = "simd", target_feature = "avx2")))]
78    pub fn shl<const RHS: i32>(self) -> Self {
79        cfg_if::cfg_if! {
80            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
81                let shift = cast([RHS as u64, 0]);
82                Self(unsafe { _mm_sll_epi32(self.0, shift) })
83            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
84                Self(u32x4_shl(self.0, RHS as _))
85            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
86                Self(unsafe { vshlq_n_u32::<RHS>(self.0) })
87            } else {
88                let u = RHS as u64;
89                Self([
90                    self.0[0] << u,
91                    self.0[1] << u,
92                    self.0[2] << u,
93                    self.0[3] << u,
94                ])
95            }
96        }
97    }
98
99    // unused when AVX is available
100    #[cfg(not(all(feature = "simd", target_feature = "avx2")))]
101    pub fn shr<const RHS: i32>(self) -> Self {
102        cfg_if::cfg_if! {
103            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
104                let shift: __m128i = cast([RHS as u64, 0]);
105                Self(unsafe { _mm_srl_epi32(self.0, shift) })
106            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
107                Self(u32x4_shr(self.0, RHS as _))
108            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
109                Self(unsafe { vshrq_n_u32::<RHS>(self.0) })
110            } else {
111                let u = RHS as u64;
112                Self([
113                    self.0[0] >> u,
114                    self.0[1] >> u,
115                    self.0[2] >> u,
116                    self.0[3] >> u,
117                ])
118            }
119        }
120    }
121}
122
123impl core::ops::Not for u32x4 {
124    type Output = Self;
125
126    fn not(self) -> Self {
127        cfg_if::cfg_if! {
128            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
129                let all_bits = unsafe { _mm_set1_epi32(-1) };
130                Self(unsafe { _mm_xor_si128(self.0, all_bits) })
131            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
132                Self(v128_not(self.0))
133            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
134                Self(unsafe { vmvnq_u32(self.0) })
135            } else {
136                Self([
137                    !self.0[0],
138                    !self.0[1],
139                    !self.0[2],
140                    !self.0[3],
141                ])
142            }
143        }
144    }
145}
146
147impl core::ops::Add for u32x4 {
148    type Output = Self;
149
150    fn add(self, rhs: Self) -> Self::Output {
151        cfg_if::cfg_if! {
152            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
153                Self(unsafe { _mm_add_epi32(self.0, rhs.0) })
154            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
155                Self(u32x4_add(self.0, rhs.0))
156            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
157                Self(unsafe { vaddq_u32(self.0, rhs.0) })
158            } else {
159                Self([
160                    self.0[0].wrapping_add(rhs.0[0]),
161                    self.0[1].wrapping_add(rhs.0[1]),
162                    self.0[2].wrapping_add(rhs.0[2]),
163                    self.0[3].wrapping_add(rhs.0[3]),
164                ])
165            }
166        }
167    }
168}
169
170impl core::ops::BitAnd for u32x4 {
171    type Output = Self;
172
173    fn bitand(self, rhs: Self) -> Self::Output {
174        cfg_if::cfg_if! {
175            if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
176                Self(unsafe { _mm_and_si128(self.0, rhs.0) })
177            } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
178                Self(v128_and(self.0, rhs.0))
179            } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
180                Self(unsafe { vandq_u32(self.0, rhs.0) })
181            } else {
182                Self([
183                    self.0[0] & rhs.0[0],
184                    self.0[1] & rhs.0[1],
185                    self.0[2] & rhs.0[2],
186                    self.0[3] & rhs.0[3],
187                ])
188            }
189        }
190    }
191}