wide/
u32x8_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct u32x8 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct u32x8 { pub(crate) a : u32x4, pub(crate) b : u32x4 }
12  }
13}
14
15int_uint_consts!(u32, 8, u32x8, 256);
16
17unsafe impl Zeroable for u32x8 {}
18unsafe impl Pod for u32x8 {}
19
20impl Add for u32x8 {
21  type Output = Self;
22  #[inline]
23  #[must_use]
24  fn add(self, rhs: Self) -> Self::Output {
25    pick! {
26      if #[cfg(target_feature="avx2")] {
27        Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
28      } else {
29        Self {
30          a : self.a.add(rhs.a),
31          b : self.b.add(rhs.b),
32        }
33      }
34    }
35  }
36}
37
38impl Sub for u32x8 {
39  type Output = Self;
40  #[inline]
41  #[must_use]
42  fn sub(self, rhs: Self) -> Self::Output {
43    pick! {
44      if #[cfg(target_feature="avx2")] {
45        Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
46      } else {
47        Self {
48          a : self.a.sub(rhs.a),
49          b : self.b.sub(rhs.b),
50        }
51      }
52    }
53  }
54}
55
56impl Mul for u32x8 {
57  type Output = Self;
58  #[inline]
59  #[must_use]
60  fn mul(self, rhs: Self) -> Self::Output {
61    pick! {
62      if #[cfg(target_feature="avx2")] {
63        Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
64      } else {
65        Self {
66          a : self.a.mul(rhs.a),
67          b : self.b.mul(rhs.b),
68        }
69      }
70    }
71  }
72}
73
74impl BitAnd for u32x8 {
75  type Output = Self;
76  #[inline]
77  #[must_use]
78  fn bitand(self, rhs: Self) -> Self::Output {
79    pick! {
80      if #[cfg(target_feature="avx2")] {
81        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
82      } else {
83        Self {
84          a : self.a.bitand(rhs.a),
85          b : self.b.bitand(rhs.b),
86        }
87      }
88    }
89  }
90}
91
92impl BitOr for u32x8 {
93  type Output = Self;
94  #[inline]
95  #[must_use]
96  fn bitor(self, rhs: Self) -> Self::Output {
97    pick! {
98      if #[cfg(target_feature="avx2")] {
99        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
100      } else {
101        Self {
102          a : self.a.bitor(rhs.a),
103          b : self.b.bitor(rhs.b),
104        }
105      }
106    }
107  }
108}
109
110impl BitXor for u32x8 {
111  type Output = Self;
112  #[inline]
113  #[must_use]
114  fn bitxor(self, rhs: Self) -> Self::Output {
115    pick! {
116      if #[cfg(target_feature="avx2")] {
117        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
118      } else {
119        Self {
120          a : self.a.bitxor(rhs.a),
121          b : self.b.bitxor(rhs.b),
122        }
123      }
124    }
125  }
126}
127
128impl From<u16x8> for u32x8 {
129  /// widens and zero extends to u32x8
130  #[inline]
131  #[must_use]
132  fn from(v: u16x8) -> Self {
133    pick! {
134      if #[cfg(target_feature="avx2")] {
135        Self { avx2:convert_to_i32_m256i_from_u16_m128i(v.sse) }
136      } else if #[cfg(target_feature="sse2")] {
137        Self {
138          a: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
139          b: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
140        }
141      } else {
142        u32x8::new([
143          u32::from(v.as_array_ref()[0]),
144          u32::from(v.as_array_ref()[1]),
145          u32::from(v.as_array_ref()[2]),
146          u32::from(v.as_array_ref()[3]),
147          u32::from(v.as_array_ref()[4]),
148          u32::from(v.as_array_ref()[5]),
149          u32::from(v.as_array_ref()[6]),
150          u32::from(v.as_array_ref()[7]),
151        ])
152      }
153    }
154  }
155}
156
157macro_rules! impl_shl_t_for_u32x8 {
158  ($($shift_type:ty),+ $(,)?) => {
159    $(impl Shl<$shift_type> for u32x8 {
160      type Output = Self;
161      /// Shifts all lanes by the value given.
162      #[inline]
163      #[must_use]
164      fn shl(self, rhs: $shift_type) -> Self::Output {
165        pick! {
166          if #[cfg(target_feature="avx2")] {
167            let shift = cast([rhs as u64, 0]);
168            Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
169          } else {
170            Self {
171              a : self.a.shl(rhs),
172              b : self.b.shl(rhs),
173            }
174          }
175        }
176      }
177    })+
178  };
179}
180impl_shl_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
181
182macro_rules! impl_shr_t_for_u32x8 {
183  ($($shift_type:ty),+ $(,)?) => {
184    $(impl Shr<$shift_type> for u32x8 {
185      type Output = Self;
186      /// Shifts all lanes by the value given.
187      #[inline]
188      #[must_use]
189      fn shr(self, rhs: $shift_type) -> Self::Output {
190        pick! {
191          if #[cfg(target_feature="avx2")] {
192            let shift = cast([rhs as u64, 0]);
193            Self { avx2: shr_all_u32_m256i(self.avx2, shift) }
194          } else {
195            Self {
196              a : self.a.shr(rhs),
197              b : self.b.shr(rhs),
198            }
199          }
200        }
201      }
202    })+
203  };
204}
205
206impl_shr_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
207
208/// Shifts lanes by the corresponding lane.
209///
210/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
211/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
212/// of the type. (same as `wrapping_shr`)
213impl Shr<u32x8> for u32x8 {
214  type Output = Self;
215
216  #[inline]
217  #[must_use]
218  fn shr(self, rhs: u32x8) -> Self::Output {
219    pick! {
220      if #[cfg(target_feature="avx2")] {
221        // ensure same behavior as scalar wrapping_shr
222        let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
223        Self { avx2: shr_each_u32_m256i(self.avx2, shift_by ) }
224      } else {
225        Self {
226          a : self.a.shr(rhs.a),
227          b : self.b.shr(rhs.b),
228        }
229      }
230    }
231  }
232}
233
234/// Shifts lanes by the corresponding lane.
235///
236/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
237/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
238/// of the type. (same as `wrapping_shl`)
239impl Shl<u32x8> for u32x8 {
240  type Output = Self;
241
242  #[inline]
243  #[must_use]
244  fn shl(self, rhs: u32x8) -> Self::Output {
245    pick! {
246      if #[cfg(target_feature="avx2")] {
247        // ensure same behavior as scalar wrapping_shl
248        let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
249        Self { avx2: shl_each_u32_m256i(self.avx2, shift_by) }
250      } else {
251        Self {
252          a : self.a.shl(rhs.a),
253          b : self.b.shl(rhs.b),
254        }
255      }
256    }
257  }
258}
259
260impl CmpEq for u32x8 {
261  type Output = Self;
262  #[inline]
263  #[must_use]
264  fn cmp_eq(self, rhs: Self) -> Self::Output {
265    Self::cmp_eq(self, rhs)
266  }
267}
268
269impl u32x8 {
270  #[inline]
271  #[must_use]
272  pub const fn new(array: [u32; 8]) -> Self {
273    unsafe { core::mem::transmute(array) }
274  }
275  #[inline]
276  #[must_use]
277  pub fn cmp_eq(self, rhs: Self) -> Self {
278    pick! {
279      if #[cfg(target_feature="avx2")] {
280        Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2 ) }
281      } else {
282        Self {
283          a : self.a.cmp_eq(rhs.a),
284          b : self.b.cmp_eq(rhs.b),
285        }
286      }
287    }
288  }
289  #[inline]
290  #[must_use]
291  pub fn cmp_gt(self, rhs: Self) -> Self {
292    pick! {
293      if #[cfg(target_feature="avx2")] {
294        // no unsigned gt than so inverting the high bit will get the correct result
295        let highbit = u32x8::splat(1 << 31);
296        Self { avx2: cmp_gt_mask_i32_m256i((self ^ highbit).avx2, (rhs ^ highbit).avx2 ) }
297      } else {
298        Self {
299          a : self.a.cmp_gt(rhs.a),
300          b : self.b.cmp_gt(rhs.b),
301        }
302      }
303    }
304  }
305
306  #[inline]
307  #[must_use]
308  pub fn cmp_lt(self, rhs: Self) -> Self {
309    // lt is just gt the other way around
310    rhs.cmp_gt(self)
311  }
312
313  /// Multiplies 32x32 bit to 64 bit and then only keeps the high 32 bits of the
314  /// result. Useful for implementing divide constant value (see t_usefulness
315  /// example)
316  #[inline]
317  #[must_use]
318  pub fn mul_keep_high(self, rhs: u32x8) -> u32x8 {
319    pick! {
320      if #[cfg(target_feature="avx2")] {
321        let a : [u32;8]= cast(self);
322        let b : [u32;8]= cast(rhs);
323
324        // let the compiler shuffle the values around, it does the right thing
325        let r1 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[0], 0, a[1], 0, a[2], 0, a[3], 0]), cast([b[0], 0, b[1], 0, b[2], 0, b[3], 0])));
326        let r2 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[4], 0, a[5], 0, a[6], 0, a[7], 0]), cast([b[4], 0, b[5], 0, b[6], 0, b[7], 0])));
327
328        cast([r1[1], r1[3], r1[5], r1[7], r2[1], r2[3], r2[5], r2[7]])
329      } else {
330        Self {
331          a : self.a.mul_keep_high(rhs.a),
332          b : self.b.mul_keep_high(rhs.b),
333        }
334      }
335    }
336  }
337
338  #[inline]
339  #[must_use]
340  pub fn blend(self, t: Self, f: Self) -> Self {
341    pick! {
342      if #[cfg(target_feature="avx2")] {
343        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
344      } else {
345        Self {
346          a : self.a.blend(t.a, f.a),
347          b : self.b.blend(t.b, f.b),
348        }
349      }
350    }
351  }
352
353  #[inline]
354  #[must_use]
355  pub fn max(self, rhs: Self) -> Self {
356    pick! {
357      if #[cfg(target_feature="avx2")] {
358        Self { avx2: max_u32_m256i(self.avx2, rhs.avx2 ) }
359      } else {
360        Self {
361          a : self.a.max(rhs.a),
362          b : self.b.max(rhs.b),
363        }
364      }
365    }
366  }
367  #[inline]
368  #[must_use]
369  pub fn min(self, rhs: Self) -> Self {
370    pick! {
371      if #[cfg(target_feature="avx2")] {
372        Self { avx2: min_u32_m256i(self.avx2, rhs.avx2 ) }
373      } else {
374        Self {
375          a : self.a.min(rhs.a),
376          b : self.b.min(rhs.b),
377        }
378      }
379    }
380  }
381
382  #[inline]
383  #[must_use]
384  pub fn any(self) -> bool {
385    pick! {
386      if #[cfg(target_feature="avx2")] {
387        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) != 0
388      } else {
389        (self.a | self.b).any()
390      }
391    }
392  }
393
394  #[inline]
395  #[must_use]
396  pub fn all(self) -> bool {
397    pick! {
398      if #[cfg(target_feature="avx2")] {
399        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) == 0b10001000100010001000100010001000
400      } else {
401        (self.a & self.b).all()
402      }
403    }
404  }
405
406  #[inline]
407  #[must_use]
408  pub fn none(self) -> bool {
409    !self.any()
410  }
411
412  #[inline]
413  pub fn to_array(self) -> [u32; 8] {
414    cast(self)
415  }
416
417  #[inline]
418  pub fn as_array_ref(&self) -> &[u32; 8] {
419    cast_ref(self)
420  }
421
422  #[inline]
423  pub fn as_array_mut(&mut self) -> &mut [u32; 8] {
424    cast_mut(self)
425  }
426}
427
428impl Not for u32x8 {
429  type Output = Self;
430  #[inline]
431  fn not(self) -> Self {
432    pick! {
433      if #[cfg(target_feature="avx2")] {
434        Self { avx2: self.avx2.not()  }
435      } else {
436        Self {
437          a : self.a.not(),
438          b : self.b.not(),
439        }
440      }
441    }
442  }
443}