wide/
i16x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct i16x16 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct i16x16 { pub(crate) a : i16x8, pub(crate) b : i16x8 }
12  }
13}
14
15int_uint_consts!(i16, 16, i16x16, 256);
16
17unsafe impl Zeroable for i16x16 {}
18unsafe impl Pod for i16x16 {}
19
20impl Add for i16x16 {
21  type Output = Self;
22  #[inline]
23  #[must_use]
24  fn add(self, rhs: Self) -> Self::Output {
25    pick! {
26      if #[cfg(target_feature="avx2")] {
27        Self { avx2: add_i16_m256i(self.avx2, rhs.avx2) }
28      } else {
29        Self {
30          a : self.a.add(rhs.a),
31          b : self.b.add(rhs.b),
32        }
33      }
34    }
35  }
36}
37
38impl Sub for i16x16 {
39  type Output = Self;
40  #[inline]
41  #[must_use]
42  fn sub(self, rhs: Self) -> Self::Output {
43    pick! {
44      if #[cfg(target_feature="avx2")] {
45        Self { avx2: sub_i16_m256i(self.avx2, rhs.avx2) }
46      } else {
47        Self {
48          a : self.a.sub(rhs.a),
49          b : self.b.sub(rhs.b),
50        }
51      }
52    }
53  }
54}
55
56impl Mul for i16x16 {
57  type Output = Self;
58  #[inline]
59  #[must_use]
60  fn mul(self, rhs: Self) -> Self::Output {
61    pick! {
62      if #[cfg(target_feature="avx2")] {
63        Self { avx2: mul_i16_keep_low_m256i(self.avx2, rhs.avx2) }
64      } else {
65        Self {
66          a : self.a.mul(rhs.a),
67          b : self.b.mul(rhs.b),
68        }
69      }
70    }
71  }
72}
73
74impl Add<i16> for i16x16 {
75  type Output = Self;
76  #[inline]
77  #[must_use]
78  fn add(self, rhs: i16) -> Self::Output {
79    self.add(Self::splat(rhs))
80  }
81}
82
83impl Sub<i16> for i16x16 {
84  type Output = Self;
85  #[inline]
86  #[must_use]
87  fn sub(self, rhs: i16) -> Self::Output {
88    self.sub(Self::splat(rhs))
89  }
90}
91
92impl Mul<i16> for i16x16 {
93  type Output = Self;
94  #[inline]
95  #[must_use]
96  fn mul(self, rhs: i16) -> Self::Output {
97    self.mul(Self::splat(rhs))
98  }
99}
100
101impl Add<i16x16> for i16 {
102  type Output = i16x16;
103  #[inline]
104  #[must_use]
105  fn add(self, rhs: i16x16) -> Self::Output {
106    i16x16::splat(self).add(rhs)
107  }
108}
109
110impl Sub<i16x16> for i16 {
111  type Output = i16x16;
112  #[inline]
113  #[must_use]
114  fn sub(self, rhs: i16x16) -> Self::Output {
115    i16x16::splat(self).sub(rhs)
116  }
117}
118
119impl Mul<i16x16> for i16 {
120  type Output = i16x16;
121  #[inline]
122  #[must_use]
123  fn mul(self, rhs: i16x16) -> Self::Output {
124    i16x16::splat(self).mul(rhs)
125  }
126}
127
128impl BitAnd for i16x16 {
129  type Output = Self;
130  #[inline]
131  #[must_use]
132  fn bitand(self, rhs: Self) -> Self::Output {
133    pick! {
134      if #[cfg(target_feature="avx2")] {
135        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
136      } else {
137        Self {
138          a : self.a.bitand(rhs.a),
139          b : self.b.bitand(rhs.b),
140        }
141      }
142    }
143  }
144}
145
146impl BitOr for i16x16 {
147  type Output = Self;
148  #[inline]
149  #[must_use]
150  fn bitor(self, rhs: Self) -> Self::Output {
151    pick! {
152      if #[cfg(target_feature="avx2")] {
153        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
154      } else {
155        Self {
156          a : self.a.bitor(rhs.a),
157          b : self.b.bitor(rhs.b),
158        }
159      }
160    }
161  }
162}
163
164impl BitXor for i16x16 {
165  type Output = Self;
166  #[inline]
167  #[must_use]
168  fn bitxor(self, rhs: Self) -> Self::Output {
169    pick! {
170      if #[cfg(target_feature="avx2")] {
171        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
172      } else {
173        Self {
174          a : self.a.bitxor(rhs.a),
175          b : self.b.bitxor(rhs.b),
176        }
177      }
178    }
179  }
180}
181
182macro_rules! impl_shl_t_for_i16x16 {
183  ($($shift_type:ty),+ $(,)?) => {
184    $(impl Shl<$shift_type> for i16x16 {
185      type Output = Self;
186      /// Shifts all lanes by the value given.
187      #[inline]
188      #[must_use]
189      fn shl(self, rhs: $shift_type) -> Self::Output {
190        pick! {
191          if #[cfg(target_feature="avx2")] {
192            let shift = cast([rhs as u64, 0]);
193            Self { avx2: shl_all_u16_m256i(self.avx2, shift) }
194          } else {
195            Self {
196              a : self.a.shl(rhs),
197              b : self.b.shl(rhs),
198            }
199          }
200       }
201     }
202    })+
203  };
204}
205impl_shl_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
206
207macro_rules! impl_shr_t_for_i16x16 {
208  ($($shift_type:ty),+ $(,)?) => {
209    $(impl Shr<$shift_type> for i16x16 {
210      type Output = Self;
211      /// Shifts all lanes by the value given.
212      #[inline]
213      #[must_use]
214      fn shr(self, rhs: $shift_type) -> Self::Output {
215        pick! {
216          if #[cfg(target_feature="avx2")] {
217            let shift = cast([rhs as u64, 0]);
218            Self { avx2: shr_all_i16_m256i(self.avx2, shift) }
219          } else {
220            Self {
221              a : self.a.shr(rhs),
222              b : self.b.shr(rhs),
223            }
224          }
225        }
226      }
227    })+
228  };
229}
230impl_shr_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
231
232impl CmpEq for i16x16 {
233  type Output = Self;
234  #[inline]
235  #[must_use]
236  fn cmp_eq(self, rhs: Self) -> Self::Output {
237    pick! {
238      if #[cfg(target_feature="avx2")] {
239        Self { avx2: cmp_eq_mask_i16_m256i(self.avx2, rhs.avx2) }
240      } else {
241        Self {
242          a : self.a.cmp_eq(rhs.a),
243          b : self.b.cmp_eq(rhs.b),
244        }
245      }
246    }
247  }
248}
249
250impl CmpGt for i16x16 {
251  type Output = Self;
252  #[inline]
253  #[must_use]
254  fn cmp_gt(self, rhs: Self) -> Self::Output {
255    pick! {
256      if #[cfg(target_feature="avx2")] {
257        Self { avx2: cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) }
258      } else {
259        Self {
260          a : self.a.cmp_gt(rhs.a),
261          b : self.b.cmp_gt(rhs.b),
262        }
263      }
264    }
265  }
266}
267
268impl CmpLt for i16x16 {
269  type Output = Self;
270  #[inline]
271  #[must_use]
272  fn cmp_lt(self, rhs: Self) -> Self::Output {
273    pick! {
274      if #[cfg(target_feature="avx2")] {
275        Self { avx2: !cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i16_m256i(self.avx2,rhs.avx2) }
276      } else {
277        Self {
278          a : self.a.cmp_lt(rhs.a),
279          b : self.b.cmp_lt(rhs.b),
280        }
281      }
282    }
283  }
284}
285
286impl From<i8x16> for i16x16 {
287  /// widen with sign extend from i8 to i16
288  #[inline]
289  #[must_use]
290  fn from(i: i8x16) -> Self {
291    i16x16::from_i8x16(i)
292  }
293}
294
295impl From<u8x16> for i16x16 {
296  /// widen with zero extend from u8 to i16
297  #[inline]
298  #[must_use]
299  fn from(i: u8x16) -> Self {
300    cast(u16x16::from(i))
301  }
302}
303
304impl Not for i16x16 {
305  type Output = Self;
306  #[inline]
307  fn not(self) -> Self {
308    pick! {
309      if #[cfg(target_feature="avx2")] {
310        Self { avx2: self.avx2.not()  }
311      } else {
312        Self {
313          a : self.a.not(),
314          b : self.b.not(),
315        }
316      }
317    }
318  }
319}
320
321impl i16x16 {
322  #[inline]
323  #[must_use]
324  pub const fn new(array: [i16; 16]) -> Self {
325    unsafe { core::mem::transmute(array) }
326  }
327
328  #[inline]
329  #[must_use]
330  pub fn move_mask(self) -> i32 {
331    pick! {
332      if #[cfg(target_feature="sse2")] {
333          let [a,b] = cast::<_,[m128i;2]>(self);
334          move_mask_i8_m128i( pack_i16_to_i8_m128i(a,b))
335        } else {
336        self.a.move_mask() | (self.b.move_mask() << 8)
337      }
338    }
339  }
340
341  #[inline]
342  #[must_use]
343  pub fn any(self) -> bool {
344    pick! {
345      if #[cfg(target_feature="avx2")] {
346        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) != 0
347      } else {
348        (self.a | self.b).any()
349      }
350    }
351  }
352  #[inline]
353  #[must_use]
354  pub fn all(self) -> bool {
355    pick! {
356      if #[cfg(target_feature="avx2")] {
357        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) == 0b10101010101010101010101010101010
358      } else {
359        (self.a & self.b).all()
360      }
361    }
362  }
363  #[inline]
364  #[must_use]
365  pub fn none(self) -> bool {
366    !self.any()
367  }
368
369  /// widens and sign extends to i16x16
370  #[inline]
371  #[must_use]
372  pub fn from_i8x16(v: i8x16) -> Self {
373    pick! {
374      if #[cfg(target_feature="avx2")] {
375        i16x16 { avx2:convert_to_i16_m256i_from_i8_m128i(v.sse) }
376      } else if #[cfg(target_feature="sse4.1")] {
377        i16x16 {
378          a: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(v.sse) },
379          b: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(unpack_high_i64_m128i(v.sse, v.sse)) }
380        }
381      } else if #[cfg(target_feature="sse2")] {
382        i16x16 {
383          a: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_low_i8_m128i(v.sse, v.sse)) },
384          b: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_high_i8_m128i(v.sse, v.sse)) },
385        }
386      } else {
387
388        i16x16::new([
389          v.as_array_ref()[0] as i16,
390          v.as_array_ref()[1] as i16,
391          v.as_array_ref()[2] as i16,
392          v.as_array_ref()[3] as i16,
393          v.as_array_ref()[4] as i16,
394          v.as_array_ref()[5] as i16,
395          v.as_array_ref()[6] as i16,
396          v.as_array_ref()[7] as i16,
397          v.as_array_ref()[8] as i16,
398          v.as_array_ref()[9] as i16,
399          v.as_array_ref()[10] as i16,
400          v.as_array_ref()[11] as i16,
401          v.as_array_ref()[12] as i16,
402          v.as_array_ref()[13] as i16,
403          v.as_array_ref()[14] as i16,
404          v.as_array_ref()[15] as i16,
405          ])
406      }
407    }
408  }
409
410  #[inline]
411  #[must_use]
412  pub fn blend(self, t: Self, f: Self) -> Self {
413    pick! {
414      if #[cfg(target_feature="avx2")] {
415        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
416      } else {
417        Self {
418          a : self.a.blend(t.a, f.a),
419          b : self.b.blend(t.b, f.b),
420        }
421      }
422    }
423  }
424
425  /// horizontal add of all the elements of the vector
426  #[inline]
427  #[must_use]
428  pub fn reduce_add(self) -> i16 {
429    let arr: [i16x8; 2] = cast(self);
430
431    (arr[0] + arr[1]).reduce_add()
432  }
433
434  /// horizontal min of all the elements of the vector
435  #[inline]
436  #[must_use]
437  pub fn reduce_min(self) -> i16 {
438    let arr: [i16x8; 2] = cast(self);
439
440    arr[0].min(arr[1]).reduce_min()
441  }
442
443  /// horizontal max of all the elements of the vector
444  #[inline]
445  #[must_use]
446  pub fn reduce_max(self) -> i16 {
447    let arr: [i16x8; 2] = cast(self);
448
449    arr[0].max(arr[1]).reduce_max()
450  }
451
452  #[inline]
453  #[must_use]
454  pub fn abs(self) -> Self {
455    pick! {
456      if #[cfg(target_feature="avx2")] {
457        Self { avx2: abs_i16_m256i(self.avx2) }
458      } else {
459        Self {
460          a : self.a.abs(),
461          b : self.b.abs(),
462        }
463      }
464    }
465  }
466  #[inline]
467  #[must_use]
468  pub fn max(self, rhs: Self) -> Self {
469    pick! {
470      if #[cfg(target_feature="avx2")] {
471        Self { avx2: max_i16_m256i(self.avx2, rhs.avx2) }
472      } else {
473        Self {
474          a : self.a.max(rhs.a),
475          b : self.b.max(rhs.b),
476        }
477      }
478    }
479  }
480  #[inline]
481  #[must_use]
482  pub fn min(self, rhs: Self) -> Self {
483    pick! {
484      if #[cfg(target_feature="avx2")] {
485        Self { avx2: min_i16_m256i(self.avx2, rhs.avx2) }
486      } else {
487        Self {
488          a : self.a.min(rhs.a),
489          b : self.b.min(rhs.b),
490        }
491      }
492    }
493  }
494
495  #[inline]
496  #[must_use]
497  pub fn saturating_add(self, rhs: Self) -> Self {
498    pick! {
499      if #[cfg(target_feature="avx2")] {
500        Self { avx2: add_saturating_i16_m256i(self.avx2, rhs.avx2) }
501      } else {
502        Self {
503          a : self.a.saturating_add(rhs.a),
504          b : self.b.saturating_add(rhs.b),
505        }
506      }
507    }
508  }
509  #[inline]
510  #[must_use]
511  pub fn saturating_sub(self, rhs: Self) -> Self {
512    pick! {
513      if #[cfg(target_feature="avx2")] {
514        Self { avx2: sub_saturating_i16_m256i(self.avx2, rhs.avx2) }
515      } else {
516        Self {
517          a : self.a.saturating_sub(rhs.a),
518          b : self.b.saturating_sub(rhs.b),
519        }
520      }
521    }
522  }
523
524  /// Calculates partial dot product.
525  /// Multiplies packed signed 16-bit integers, producing intermediate signed
526  /// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
527  /// integers.
528  #[inline]
529  #[must_use]
530  pub fn dot(self, rhs: Self) -> i32x8 {
531    pick! {
532      if #[cfg(target_feature="avx2")] {
533        i32x8 { avx2:  mul_i16_horizontal_add_m256i(self.avx2, rhs.avx2) }
534      } else {
535        i32x8 {
536          a : self.a.dot(rhs.a),
537          b : self.b.dot(rhs.b),
538        }
539      }
540    }
541  }
542
543  /// Multiply and scale equivalent to `((self * rhs) + 0x4000) >> 15` on each
544  /// lane, effectively multiplying by a 16 bit fixed point number between `-1`
545  /// and `1`. This corresponds to the following instructions:
546  /// - `vqrdmulhq_n_s16` instruction on neon
547  /// - `i16x8_q15mulr_sat` on simd128
548  /// - `_mm256_mulhrs_epi16` on avx2
549  /// - emulated via `mul_i16_*` on sse2
550  #[inline]
551  #[must_use]
552  pub fn mul_scale_round(self, rhs: Self) -> Self {
553    pick! {
554      if #[cfg(target_feature="avx2")] {
555        Self { avx2: mul_i16_scale_round_m256i(self.avx2, rhs.avx2) }
556      } else {
557        Self {
558          a : self.a.mul_scale_round(rhs.a),
559          b : self.b.mul_scale_round(rhs.b),
560        }
561      }
562    }
563  }
564
565  /// Multiply and scale equivalent to `((self * rhs) + 0x4000) >> 15` on each
566  /// lane, effectively multiplying by a 16 bit fixed point number between `-1`
567  /// and `1`. This corresponds to the following instructions:
568  /// - `vqrdmulhq_n_s16` instruction on neon
569  /// - `i16x8_q15mulr_sat` on simd128
570  /// - `_mm256_mulhrs_epi16` on avx2
571  /// - emulated via `mul_i16_*` on sse2
572  #[inline]
573  #[must_use]
574  pub fn mul_scale_round_n(self, rhs: i16) -> Self {
575    pick! {
576      if #[cfg(target_feature="avx2")] {
577        Self { avx2: mul_i16_scale_round_m256i(self.avx2, set_splat_i16_m256i(rhs)) }
578      } else {
579        Self {
580          a : self.a.mul_scale_round_n(rhs),
581          b : self.b.mul_scale_round_n(rhs),
582        }
583      }
584    }
585  }
586
587  #[inline]
588  pub fn to_array(self) -> [i16; 16] {
589    cast(self)
590  }
591
592  #[inline]
593  pub fn as_array_ref(&self) -> &[i16; 16] {
594    cast_ref(self)
595  }
596
597  #[inline]
598  pub fn as_array_mut(&mut self) -> &mut [i16; 16] {
599    cast_mut(self)
600  }
601}