wide/
u16x8_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct u16x8 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct u16x8 { pub(crate) simd: v128 }
14
15    impl Default for u16x8 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for u16x8 {
22      fn eq(&self, other: &Self) -> bool {
23        u16x8_all_true(u16x8_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for u16x8 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29      use core::arch::aarch64::*;
30      #[repr(C)]
31      #[derive(Copy, Clone)]
32      pub struct u16x8 { pub(crate) neon : uint16x8_t }
33
34      impl Default for u16x8 {
35        #[inline]
36        #[must_use]
37        fn default() -> Self {
38          Self::splat(0)
39        }
40      }
41
42      impl PartialEq for u16x8 {
43        #[inline]
44        #[must_use]
45        fn eq(&self, other: &Self) -> bool {
46          unsafe { vminvq_u16(vceqq_u16(self.neon, other.neon))==u16::MAX }
47        }
48      }
49
50      impl Eq for u16x8 { }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq, Eq)]
53    #[repr(C, align(16))]
54    pub struct u16x8 { pub(crate) arr: [u16;8] }
55  }
56}
57
58int_uint_consts!(u16, 8, u16x8, 128);
59
60unsafe impl Zeroable for u16x8 {}
61unsafe impl Pod for u16x8 {}
62
63impl Add for u16x8 {
64  type Output = Self;
65  #[inline]
66  #[must_use]
67  fn add(self, rhs: Self) -> Self::Output {
68    pick! {
69      if #[cfg(target_feature="sse2")] {
70        Self { sse: add_i16_m128i(self.sse, rhs.sse) }
71      } else if #[cfg(target_feature="simd128")] {
72        Self { simd: u16x8_add(self.simd, rhs.simd) }
73      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74        unsafe { Self { neon: vaddq_u16(self.neon, rhs.neon) } }
75      } else {
76        Self { arr: [
77          self.arr[0].wrapping_add(rhs.arr[0]),
78          self.arr[1].wrapping_add(rhs.arr[1]),
79          self.arr[2].wrapping_add(rhs.arr[2]),
80          self.arr[3].wrapping_add(rhs.arr[3]),
81          self.arr[4].wrapping_add(rhs.arr[4]),
82          self.arr[5].wrapping_add(rhs.arr[5]),
83          self.arr[6].wrapping_add(rhs.arr[6]),
84          self.arr[7].wrapping_add(rhs.arr[7]),
85        ]}
86      }
87    }
88  }
89}
90
91impl Sub for u16x8 {
92  type Output = Self;
93  #[inline]
94  #[must_use]
95  fn sub(self, rhs: Self) -> Self::Output {
96    pick! {
97      if #[cfg(target_feature="sse2")] {
98        Self { sse: sub_i16_m128i(self.sse, rhs.sse) }
99      } else if #[cfg(target_feature="simd128")] {
100        Self { simd: u16x8_sub(self.simd, rhs.simd) }
101      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
102        unsafe {Self { neon: vsubq_u16(self.neon, rhs.neon) }}
103      } else {
104        Self { arr: [
105          self.arr[0].wrapping_sub(rhs.arr[0]),
106          self.arr[1].wrapping_sub(rhs.arr[1]),
107          self.arr[2].wrapping_sub(rhs.arr[2]),
108          self.arr[3].wrapping_sub(rhs.arr[3]),
109          self.arr[4].wrapping_sub(rhs.arr[4]),
110          self.arr[5].wrapping_sub(rhs.arr[5]),
111          self.arr[6].wrapping_sub(rhs.arr[6]),
112          self.arr[7].wrapping_sub(rhs.arr[7]),
113        ]}
114      }
115    }
116  }
117}
118
119impl Mul for u16x8 {
120  type Output = Self;
121  #[inline]
122  #[must_use]
123  fn mul(self, rhs: Self) -> Self::Output {
124    pick! {
125      if #[cfg(target_feature="sse2")] {
126        Self { sse: mul_i16_keep_low_m128i(self.sse, rhs.sse) }
127      } else if #[cfg(target_feature="simd128")] {
128        Self { simd: u16x8_mul(self.simd, rhs.simd) }
129      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
130        unsafe {Self { neon: vmulq_u16(self.neon, rhs.neon) }}
131      } else {
132        Self { arr: [
133          self.arr[0].wrapping_mul(rhs.arr[0]),
134          self.arr[1].wrapping_mul(rhs.arr[1]),
135          self.arr[2].wrapping_mul(rhs.arr[2]),
136          self.arr[3].wrapping_mul(rhs.arr[3]),
137          self.arr[4].wrapping_mul(rhs.arr[4]),
138          self.arr[5].wrapping_mul(rhs.arr[5]),
139          self.arr[6].wrapping_mul(rhs.arr[6]),
140          self.arr[7].wrapping_mul(rhs.arr[7]),
141        ]}
142      }
143    }
144  }
145}
146
147impl Add<u16> for u16x8 {
148  type Output = Self;
149  #[inline]
150  #[must_use]
151  fn add(self, rhs: u16) -> Self::Output {
152    self.add(Self::splat(rhs))
153  }
154}
155
156impl Sub<u16> for u16x8 {
157  type Output = Self;
158  #[inline]
159  #[must_use]
160  fn sub(self, rhs: u16) -> Self::Output {
161    self.sub(Self::splat(rhs))
162  }
163}
164
165impl Mul<u16> for u16x8 {
166  type Output = Self;
167  #[inline]
168  #[must_use]
169  fn mul(self, rhs: u16) -> Self::Output {
170    self.mul(Self::splat(rhs))
171  }
172}
173
174impl Add<u16x8> for u16 {
175  type Output = u16x8;
176  #[inline]
177  #[must_use]
178  fn add(self, rhs: u16x8) -> Self::Output {
179    u16x8::splat(self).add(rhs)
180  }
181}
182
183impl Sub<u16x8> for u16 {
184  type Output = u16x8;
185  #[inline]
186  #[must_use]
187  fn sub(self, rhs: u16x8) -> Self::Output {
188    u16x8::splat(self).sub(rhs)
189  }
190}
191
192impl Mul<u16x8> for u16 {
193  type Output = u16x8;
194  #[inline]
195  #[must_use]
196  fn mul(self, rhs: u16x8) -> Self::Output {
197    u16x8::splat(self).mul(rhs)
198  }
199}
200
201impl BitAnd for u16x8 {
202  type Output = Self;
203  #[inline]
204  #[must_use]
205  fn bitand(self, rhs: Self) -> Self::Output {
206    pick! {
207      if #[cfg(target_feature="sse2")] {
208        Self { sse: bitand_m128i(self.sse, rhs.sse) }
209      } else if #[cfg(target_feature="simd128")] {
210        Self { simd: v128_and(self.simd, rhs.simd) }
211      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
212        unsafe {Self { neon: vandq_u16(self.neon, rhs.neon) }}
213      } else {
214        Self { arr: [
215          self.arr[0].bitand(rhs.arr[0]),
216          self.arr[1].bitand(rhs.arr[1]),
217          self.arr[2].bitand(rhs.arr[2]),
218          self.arr[3].bitand(rhs.arr[3]),
219          self.arr[4].bitand(rhs.arr[4]),
220          self.arr[5].bitand(rhs.arr[5]),
221          self.arr[6].bitand(rhs.arr[6]),
222          self.arr[7].bitand(rhs.arr[7]),
223        ]}
224      }
225    }
226  }
227}
228
229impl BitOr for u16x8 {
230  type Output = Self;
231  #[inline]
232  #[must_use]
233  fn bitor(self, rhs: Self) -> Self::Output {
234    pick! {
235      if #[cfg(target_feature="sse2")] {
236        Self { sse: bitor_m128i(self.sse, rhs.sse) }
237      } else if #[cfg(target_feature="simd128")] {
238        Self { simd: v128_or(self.simd, rhs.simd) }
239      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
240        unsafe {Self { neon: vorrq_u16(self.neon, rhs.neon) }}
241      } else {
242        Self { arr: [
243          self.arr[0].bitor(rhs.arr[0]),
244          self.arr[1].bitor(rhs.arr[1]),
245          self.arr[2].bitor(rhs.arr[2]),
246          self.arr[3].bitor(rhs.arr[3]),
247          self.arr[4].bitor(rhs.arr[4]),
248          self.arr[5].bitor(rhs.arr[5]),
249          self.arr[6].bitor(rhs.arr[6]),
250          self.arr[7].bitor(rhs.arr[7]),
251        ]}
252      }
253    }
254  }
255}
256
257impl BitXor for u16x8 {
258  type Output = Self;
259  #[inline]
260  #[must_use]
261  fn bitxor(self, rhs: Self) -> Self::Output {
262    pick! {
263      if #[cfg(target_feature="sse2")] {
264        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
265      } else if #[cfg(target_feature="simd128")] {
266        Self { simd: v128_xor(self.simd, rhs.simd) }
267      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
268        unsafe {Self { neon: veorq_u16(self.neon, rhs.neon) }}
269      } else {
270        Self { arr: [
271          self.arr[0].bitxor(rhs.arr[0]),
272          self.arr[1].bitxor(rhs.arr[1]),
273          self.arr[2].bitxor(rhs.arr[2]),
274          self.arr[3].bitxor(rhs.arr[3]),
275          self.arr[4].bitxor(rhs.arr[4]),
276          self.arr[5].bitxor(rhs.arr[5]),
277          self.arr[6].bitxor(rhs.arr[6]),
278          self.arr[7].bitxor(rhs.arr[7]),
279        ]}
280      }
281    }
282  }
283}
284
285macro_rules! impl_shl_t_for_u16x8 {
286  ($($shift_type:ty),+ $(,)?) => {
287    $(impl Shl<$shift_type> for u16x8 {
288      type Output = Self;
289      /// Shifts all lanes by the value given.
290      #[inline]
291      #[must_use]
292      fn shl(self, rhs: $shift_type) -> Self::Output {
293        pick! {
294          if #[cfg(target_feature="sse2")] {
295            let shift = cast([rhs as u64, 0]);
296            Self { sse: shl_all_u16_m128i(self.sse, shift) }
297          } else if #[cfg(target_feature="simd128")] {
298            Self { simd: u16x8_shl(self.simd, rhs as u32) }
299          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
300            unsafe {Self { neon: vshlq_u16(self.neon, vmovq_n_s16(rhs as i16)) }}
301          } else {
302            let u = rhs as u64;
303            Self { arr: [
304              self.arr[0] << u,
305              self.arr[1] << u,
306              self.arr[2] << u,
307              self.arr[3] << u,
308              self.arr[4] << u,
309              self.arr[5] << u,
310              self.arr[6] << u,
311              self.arr[7] << u,
312            ]}
313          }
314        }
315      }
316    })+
317  };
318}
319impl_shl_t_for_u16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
320
321macro_rules! impl_shr_t_for_u16x8 {
322  ($($shift_type:ty),+ $(,)?) => {
323    $(impl Shr<$shift_type> for u16x8 {
324      type Output = Self;
325      /// Shifts all lanes by the value given.
326      #[inline]
327      #[must_use]
328      fn shr(self, rhs: $shift_type) -> Self::Output {
329        pick! {
330          if #[cfg(target_feature="sse2")] {
331            let shift = cast([rhs as u64, 0]);
332            Self { sse: shr_all_u16_m128i(self.sse, shift) }
333          } else if #[cfg(target_feature="simd128")] {
334            Self { simd: u16x8_shr(self.simd, rhs as u32) }
335          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
336            unsafe {Self { neon: vshlq_u16(self.neon, vmovq_n_s16( -(rhs as i16))) }}
337          } else {
338            let u = rhs as u64;
339            Self { arr: [
340              self.arr[0] >> u,
341              self.arr[1] >> u,
342              self.arr[2] >> u,
343              self.arr[3] >> u,
344              self.arr[4] >> u,
345              self.arr[5] >> u,
346              self.arr[6] >> u,
347              self.arr[7] >> u,
348            ]}
349          }
350        }
351      }
352    })+
353  };
354}
355impl_shr_t_for_u16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
356
357impl CmpEq for u16x8 {
358  type Output = Self;
359  #[inline]
360  #[must_use]
361  fn cmp_eq(self, rhs: Self) -> Self::Output {
362    Self::cmp_eq(self, rhs)
363  }
364}
365
366impl u16x8 {
367  #[inline]
368  #[must_use]
369  pub const fn new(array: [u16; 8]) -> Self {
370    unsafe { core::mem::transmute(array) }
371  }
372  #[inline]
373  #[must_use]
374  pub fn cmp_eq(self, rhs: Self) -> Self {
375    pick! {
376      if #[cfg(target_feature="sse2")] {
377        Self { sse: cmp_eq_mask_i16_m128i(self.sse, rhs.sse) }
378      } else if #[cfg(target_feature="simd128")] {
379        Self { simd: u16x8_eq(self.simd, rhs.simd) }
380      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
381        unsafe {Self { neon: vceqq_u16(self.neon, rhs.neon) }}
382      } else {
383        Self { arr: [
384          if self.arr[0] == rhs.arr[0] { u16::MAX } else { 0 },
385          if self.arr[1] == rhs.arr[1] { u16::MAX } else { 0 },
386          if self.arr[2] == rhs.arr[2] { u16::MAX } else { 0 },
387          if self.arr[3] == rhs.arr[3] { u16::MAX } else { 0 },
388          if self.arr[4] == rhs.arr[4] { u16::MAX } else { 0 },
389          if self.arr[5] == rhs.arr[5] { u16::MAX } else { 0 },
390          if self.arr[6] == rhs.arr[6] { u16::MAX } else { 0 },
391          if self.arr[7] == rhs.arr[7] { u16::MAX } else { 0 },
392        ]}
393      }
394    }
395  }
396  #[inline]
397  #[must_use]
398  pub fn blend(self, t: Self, f: Self) -> Self {
399    pick! {
400      if #[cfg(target_feature="sse4.1")] {
401        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
402      } else if #[cfg(target_feature="simd128")] {
403        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
404      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
405        unsafe {Self { neon: vbslq_u16(self.neon, t.neon, f.neon) }}
406      } else {
407        generic_bit_blend(self, t, f)
408      }
409    }
410  }
411  #[inline]
412  #[must_use]
413  pub fn max(self, rhs: Self) -> Self {
414    pick! {
415      if #[cfg(target_feature="sse4.1")] {
416        Self { sse: max_u16_m128i(self.sse, rhs.sse) }
417      } else if #[cfg(target_feature="simd128")] {
418        Self { simd: u16x8_max(self.simd, rhs.simd) }
419      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
420        unsafe {Self { neon: vmaxq_u16(self.neon, rhs.neon) }}
421      } else {
422        let arr: [u16; 8] = cast(self);
423        let rhs: [u16; 8] = cast(rhs);
424        cast([
425          arr[0].max(rhs[0]),
426          arr[1].max(rhs[1]),
427          arr[2].max(rhs[2]),
428          arr[3].max(rhs[3]),
429          arr[4].max(rhs[4]),
430          arr[5].max(rhs[5]),
431          arr[6].max(rhs[6]),
432          arr[7].max(rhs[7]),
433        ])
434      }
435    }
436  }
437  #[inline]
438  #[must_use]
439  pub fn min(self, rhs: Self) -> Self {
440    pick! {
441      if #[cfg(target_feature="sse4.1")] {
442        Self { sse: min_u16_m128i(self.sse, rhs.sse) }
443      } else if #[cfg(target_feature="simd128")] {
444        Self { simd: u16x8_min(self.simd, rhs.simd) }
445      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
446        unsafe {Self { neon: vminq_u16(self.neon, rhs.neon) }}
447      } else {
448        let arr: [u16; 8] = cast(self);
449        let rhs: [u16; 8] = cast(rhs);
450        cast([
451          arr[0].min(rhs[0]),
452          arr[1].min(rhs[1]),
453          arr[2].min(rhs[2]),
454          arr[3].min(rhs[3]),
455          arr[4].min(rhs[4]),
456          arr[5].min(rhs[5]),
457          arr[6].min(rhs[6]),
458          arr[7].min(rhs[7]),
459        ])
460      }
461    }
462  }
463
464  #[inline]
465  #[must_use]
466  pub fn saturating_add(self, rhs: Self) -> Self {
467    pick! {
468      if #[cfg(target_feature="sse2")] {
469        Self { sse: add_saturating_u16_m128i(self.sse, rhs.sse) }
470      } else if #[cfg(target_feature="simd128")] {
471        Self { simd: u16x8_add_sat(self.simd, rhs.simd) }
472      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
473        unsafe {Self { neon: vqaddq_u16(self.neon, rhs.neon) }}
474      } else {
475        Self { arr: [
476          self.arr[0].saturating_add(rhs.arr[0]),
477          self.arr[1].saturating_add(rhs.arr[1]),
478          self.arr[2].saturating_add(rhs.arr[2]),
479          self.arr[3].saturating_add(rhs.arr[3]),
480          self.arr[4].saturating_add(rhs.arr[4]),
481          self.arr[5].saturating_add(rhs.arr[5]),
482          self.arr[6].saturating_add(rhs.arr[6]),
483          self.arr[7].saturating_add(rhs.arr[7]),
484        ]}
485      }
486    }
487  }
488  #[inline]
489  #[must_use]
490  pub fn saturating_sub(self, rhs: Self) -> Self {
491    pick! {
492      if #[cfg(target_feature="sse2")] {
493        Self { sse: sub_saturating_u16_m128i(self.sse, rhs.sse) }
494      } else if #[cfg(target_feature="simd128")] {
495        Self { simd: u16x8_sub_sat(self.simd, rhs.simd) }
496      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
497        unsafe {Self { neon: vqsubq_u16(self.neon, rhs.neon) }}
498      } else {
499        Self { arr: [
500          self.arr[0].saturating_sub(rhs.arr[0]),
501          self.arr[1].saturating_sub(rhs.arr[1]),
502          self.arr[2].saturating_sub(rhs.arr[2]),
503          self.arr[3].saturating_sub(rhs.arr[3]),
504          self.arr[4].saturating_sub(rhs.arr[4]),
505          self.arr[5].saturating_sub(rhs.arr[5]),
506          self.arr[6].saturating_sub(rhs.arr[6]),
507          self.arr[7].saturating_sub(rhs.arr[7]),
508        ]}
509      }
510    }
511  }
512
513  /// Unpack the lower half of the input and zero expand it to `u16` values.
514  #[inline]
515  #[must_use]
516  pub fn from_u8x16_low(u: u8x16) -> Self {
517    pick! {
518      if #[cfg(target_feature="sse2")] {
519        Self{ sse: unpack_low_i8_m128i(u.sse, m128i::zeroed()) }
520      } else {
521        let u_arr: [u8; 16] = cast(u);
522        cast([
523          u_arr[0] as u16,
524          u_arr[1] as u16,
525          u_arr[2] as u16,
526          u_arr[3] as u16,
527          u_arr[4] as u16,
528          u_arr[5] as u16,
529          u_arr[6] as u16,
530          u_arr[7] as u16,
531        ])
532      }
533    }
534  }
535
536  /// Unpack the upper half of the input and zero expand it to `u16` values.
537  #[inline]
538  #[must_use]
539  pub fn from_u8x16_high(u: u8x16) -> Self {
540    pick! {
541      if #[cfg(target_feature="sse2")] {
542        Self{ sse: unpack_high_i8_m128i(u.sse, m128i::zeroed()) }
543      } else {
544        let u_arr: [u8; 16] = cast(u);
545        cast([
546          u_arr[8] as u16,
547          u_arr[9] as u16,
548          u_arr[10] as u16,
549          u_arr[11] as u16,
550          u_arr[12] as u16,
551          u_arr[13] as u16,
552          u_arr[14] as u16,
553          u_arr[15] as u16,
554        ])
555      }
556    }
557  }
558
559  /// multiplies two u16x8 and returns the result as a widened u32x8
560  #[inline]
561  #[must_use]
562  pub fn mul_widen(self, rhs: Self) -> u32x8 {
563    pick! {
564      if #[cfg(target_feature="avx2")] {
565        let a = convert_to_i32_m256i_from_u16_m128i(self.sse);
566        let b = convert_to_i32_m256i_from_u16_m128i(rhs.sse);
567        u32x8 { avx2: mul_i32_keep_low_m256i(a,b) }
568      } else if #[cfg(target_feature="sse2")] {
569         let low = mul_i16_keep_low_m128i(self.sse, rhs.sse);
570         let high = mul_u16_keep_high_m128i(self.sse, rhs.sse);
571         u32x8 {
572          a: u32x4 { sse:unpack_low_i16_m128i(low, high) },
573          b: u32x4 { sse:unpack_high_i16_m128i(low, high) }
574        }
575      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
576         let lhs_low = unsafe { vget_low_u16(self.neon) };
577         let rhs_low = unsafe { vget_low_u16(rhs.neon) };
578
579         let lhs_high = unsafe { vget_high_u16(self.neon) };
580         let rhs_high = unsafe { vget_high_u16(rhs.neon) };
581
582         let low = unsafe { vmull_u16(lhs_low, rhs_low) };
583         let high = unsafe { vmull_u16(lhs_high, rhs_high) };
584
585         u32x8 { a: u32x4 { neon: low }, b: u32x4 {neon: high } }
586       } else {
587        let a = self.as_array_ref();
588        let b = rhs.as_array_ref();
589         u32x8::new([
590           u32::from(a[0]) * u32::from(b[0]),
591           u32::from(a[1]) * u32::from(b[1]),
592           u32::from(a[2]) * u32::from(b[2]),
593           u32::from(a[3]) * u32::from(b[3]),
594           u32::from(a[4]) * u32::from(b[4]),
595           u32::from(a[5]) * u32::from(b[5]),
596           u32::from(a[6]) * u32::from(b[6]),
597           u32::from(a[7]) * u32::from(b[7]),
598         ])
599       }
600    }
601  }
602
603  /// Multiples two `u16x8` and return the high part of intermediate `u32x8`
604  #[inline]
605  #[must_use]
606  pub fn mul_keep_high(self, rhs: Self) -> Self {
607    pick! {
608      if #[cfg(target_feature="sse2")] {
609        Self { sse: mul_u16_keep_high_m128i(self.sse, rhs.sse) }
610      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
611        let lhs_low = unsafe { vget_low_u16(self.neon) };
612        let rhs_low = unsafe { vget_low_u16(rhs.neon) };
613
614        let lhs_high = unsafe { vget_high_u16(self.neon) };
615        let rhs_high = unsafe { vget_high_u16(rhs.neon) };
616
617        let low = unsafe { vmull_u16(lhs_low, rhs_low) };
618        let high = unsafe { vmull_u16(lhs_high, rhs_high) };
619
620        u16x8 { neon: unsafe { vuzpq_u16(vreinterpretq_u16_u32(low), vreinterpretq_u16_u32(high)).1 } }
621      } else if #[cfg(target_feature="simd128")] {
622        let low =  u32x4_extmul_low_u16x8(self.simd, rhs.simd);
623        let high = u32x4_extmul_high_u16x8(self.simd, rhs.simd);
624
625        Self { simd: u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(low, high) }
626      } else {
627        u16x8::new([
628          ((u32::from(rhs.as_array_ref()[0]) * u32::from(self.as_array_ref()[0])) >> 16) as u16,
629          ((u32::from(rhs.as_array_ref()[1]) * u32::from(self.as_array_ref()[1])) >> 16) as u16,
630          ((u32::from(rhs.as_array_ref()[2]) * u32::from(self.as_array_ref()[2])) >> 16) as u16,
631          ((u32::from(rhs.as_array_ref()[3]) * u32::from(self.as_array_ref()[3])) >> 16) as u16,
632          ((u32::from(rhs.as_array_ref()[4]) * u32::from(self.as_array_ref()[4])) >> 16) as u16,
633          ((u32::from(rhs.as_array_ref()[5]) * u32::from(self.as_array_ref()[5])) >> 16) as u16,
634          ((u32::from(rhs.as_array_ref()[6]) * u32::from(self.as_array_ref()[6])) >> 16) as u16,
635          ((u32::from(rhs.as_array_ref()[7]) * u32::from(self.as_array_ref()[7])) >> 16) as u16,
636        ])
637      }
638    }
639  }
640
641  #[inline]
642  pub fn to_array(self) -> [u16; 8] {
643    cast(self)
644  }
645
646  #[inline]
647  pub fn as_array_ref(&self) -> &[u16; 8] {
648    cast_ref(self)
649  }
650
651  #[inline]
652  pub fn as_array_mut(&mut self) -> &mut [u16; 8] {
653    cast_mut(self)
654  }
655}