#![allow(missing_docs)]
#![allow(non_camel_case_types)] use crate::scalar::{Field, SubsetOf, SupersetOf};
use crate::simd::{
PrimitiveSimdValue, SimdBool, SimdComplexField, SimdPartialOrd, SimdRealField, SimdSigned,
SimdValue,
};
use approx::AbsDiffEq;
#[cfg(feature = "decimal")]
use decimal::d128;
use num::{FromPrimitive, Num, One, Zero};
use std::{
fmt,
ops::{
Add, AddAssign, BitAnd, BitOr, BitXor, Div, DivAssign, Mul, MulAssign, Neg, Not, Rem,
RemAssign, Sub, SubAssign,
},
};
macro_rules! ident_to_value(
() => {
const _0: usize = 0; const _1: usize = 1; const _2: usize = 2; const _3: usize = 3; const _4: usize = 4; const _5: usize = 5; const _6: usize = 6; const _7: usize = 7;
const _8: usize = 8; const _9: usize = 9; const _10: usize = 10; const _11: usize = 11; const _12: usize = 12; const _13: usize = 13; const _14: usize = 14; const _15: usize = 15;
const _16: usize = 16; const _17: usize = 17; const _18: usize = 18; const _19: usize = 19; const _20: usize = 20; const _21: usize = 21; const _22: usize = 22; const _23: usize = 23;
const _24: usize = 24; const _25: usize = 25; const _26: usize = 26; const _27: usize = 27; const _28: usize = 28; const _29: usize = 29; const _30: usize = 30; const _31: usize = 31;
const _32: usize = 32; const _33: usize = 33; const _34: usize = 34; const _35: usize = 35; const _36: usize = 36; const _37: usize = 37; const _38: usize = 38; const _39: usize = 39;
const _40: usize = 40; const _41: usize = 41; const _42: usize = 42; const _43: usize = 43; const _44: usize = 44; const _45: usize = 45; const _46: usize = 46; const _47: usize = 47;
const _48: usize = 48; const _49: usize = 49; const _50: usize = 50; const _51: usize = 51; const _52: usize = 52; const _53: usize = 53; const _54: usize = 54; const _55: usize = 55;
const _56: usize = 56; const _57: usize = 57; const _58: usize = 58; const _59: usize = 59; const _60: usize = 60; const _61: usize = 61; const _62: usize = 62; const _63: usize = 63;
}
);
#[repr(align(16))]
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[cfg_attr(
feature = "rkyv",
derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)
)]
#[cfg_attr(feature = "cuda", derive(cust_core::DeviceCopy))]
pub struct AutoSimd<N>(pub N);
#[repr(align(16))]
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[cfg_attr(
feature = "rkyv",
derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)
)]
pub struct AutoBoolSimd<N>(pub N);
macro_rules! impl_bool_simd(
($($t: ty, $lanes: expr, $($i: ident),*;)*) => {$(
impl_simd_value!($t, bool, $lanes, AutoSimd<$t> $(, $i)*;);
impl From<[bool; $lanes]> for AutoSimd<$t> {
#[inline(always)]
fn from(vals: [bool; $lanes]) -> Self {
Self(vals)
}
}
impl Not for AutoSimd<$t> {
type Output = Self;
#[inline]
fn not(self) -> Self {
self.map(|x| !x)
}
}
impl BitAnd<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
fn bitand(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x & y)
}
}
impl BitOr<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
fn bitor(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x | y)
}
}
impl BitXor<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
fn bitxor(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x ^ y)
}
}
impl SimdBool for AutoSimd<$t> {
#[inline(always)]
fn bitmask(self) -> u64 {
ident_to_value!();
0u64 $(
| ((self.0[$i] as u64) << $i)
)*
}
#[inline(always)]
fn and(self) -> bool {
ident_to_value!();
true $(
&& self.0[$i]
)*
}
#[inline(always)]
fn or(self) -> bool {
ident_to_value!();
false $(
|| self.0[$i]
)*
}
#[inline(always)]
fn xor(self) -> bool {
ident_to_value!();
false $(
^ self.0[$i]
)*
}
#[inline(always)]
fn all(self) -> bool {
self.and()
}
#[inline(always)]
fn any(self) -> bool {
self.or()
}
#[inline(always)]
fn none(self) -> bool {
!self.any()
}
#[inline(always)]
fn if_else<Res: SimdValue<SimdBool = Self>>(
self,
if_value: impl FnOnce() -> Res,
else_value: impl FnOnce() -> Res,
) -> Res {
let a = if_value();
let b = else_value();
a.select(self, b)
}
#[inline(always)]
fn if_else2<Res: SimdValue<SimdBool = Self>>(
self,
if_value: impl FnOnce() -> Res,
else_if: (impl FnOnce() -> Self, impl FnOnce() -> Res),
else_value: impl FnOnce() -> Res,
) -> Res {
let a = if_value();
let b = else_if.1();
let c = else_value();
let cond_a = self;
let cond_b = else_if.0();
a.select(cond_a, b.select(cond_b, c))
}
#[inline(always)]
fn if_else3<Res: SimdValue<SimdBool = Self>>(
self,
if_value: impl FnOnce() -> Res,
else_if: (impl FnOnce() -> Self, impl FnOnce() -> Res),
else_else_if: (impl FnOnce() -> Self, impl FnOnce() -> Res),
else_value: impl FnOnce() -> Res,
) -> Res {
let a = if_value();
let b = else_if.1();
let c = else_else_if.1();
let d = else_value();
let cond_a = self;
let cond_b = else_if.0();
let cond_c = else_else_if.0();
a.select(cond_a, b.select(cond_b, c.select(cond_c, d)))
}
}
)*}
);
macro_rules! impl_scalar_subset_of_simd(
($($t: ty),*) => {$(
impl<N2> SubsetOf<AutoSimd<N2>> for $t
where AutoSimd<N2>: SimdValue + Copy,
<AutoSimd<N2> as SimdValue>::Element: SupersetOf<$t> + PartialEq, {
#[inline(always)]
fn to_superset(&self) -> AutoSimd<N2> {
AutoSimd::<N2>::splat(<AutoSimd<N2> as SimdValue>::Element::from_subset(self))
}
#[inline(always)]
fn from_superset_unchecked(element: &AutoSimd<N2>) -> $t {
element.extract(0).to_subset_unchecked()
}
#[inline(always)]
fn is_in_subset(c: &AutoSimd<N2>) -> bool {
let elt0 = c.extract(0);
elt0.is_in_subset() &&
(1..AutoSimd::<N2>::lanes()).all(|i| c.extract(i) == elt0)
}
}
)*}
);
impl_scalar_subset_of_simd!(u8, u16, u32, u64, usize, i8, i16, i32, i64, isize, f32, f64);
#[cfg(feature = "decimal")]
impl_scalar_subset_of_simd!(d128);
macro_rules! impl_simd_value(
($($t: ty, $elt: ty, $lanes: expr, $bool: ty, $($i: ident),*;)*) => ($(
impl ArrTransform for AutoSimd<$t> {
#[inline(always)]
fn map(self, f: impl Fn(Self::Element) -> Self::Element) -> Self {
ident_to_value!();
Self([$(f(self.0[$i])),*])
}
#[inline(always)]
fn zip_map(self, other: Self, f: impl Fn(Self::Element, Self::Element) -> Self::Element) -> Self {
ident_to_value!();
Self([$(f(self.0[$i], other.0[$i])),*])
}
#[inline(always)]
fn zip_zip_map(self, b: Self, c: Self, f: impl Fn(Self::Element, Self::Element, Self::Element) -> Self::Element) -> Self {
ident_to_value!();
Self([$(f(self.0[$i], b.0[$i], c.0[$i])),*])
}
#[inline(always)]
fn map_bool(self, f: impl Fn(Self::Element) -> bool) -> Self::SimdBool {
ident_to_value!();
AutoSimd([$(f(self.0[$i])),*])
}
#[inline(always)]
fn zip_map_bool(self, other: Self, f: impl Fn(Self::Element, Self::Element) -> bool) -> Self::SimdBool {
ident_to_value!();
AutoSimd([$(f(self.0[$i], other.0[$i])),*])
}
}
impl fmt::Display for AutoSimd<$t> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if Self::lanes() == 1 {
return self.extract(0).fmt(f);
}
write!(f, "({}", self.extract(0))?;
for i in 1..Self::lanes() {
write!(f, ", {}", self.extract(i))?;
}
write!(f, ")")
}
}
impl AutoSimd<$t> {
pub fn new($($i: $elt),*) -> Self {
AutoSimd([$($i),*])
}
}
impl PrimitiveSimdValue for AutoSimd<$t> {}
impl SimdValue for AutoSimd<$t> {
type Element = $elt;
type SimdBool = $bool;
#[inline(always)]
fn lanes() -> usize {
$lanes
}
#[inline(always)]
fn splat(val: Self::Element) -> Self {
AutoSimd([val; $lanes])
}
#[inline(always)]
fn extract(&self, i: usize) -> Self::Element {
self.0[i]
}
#[inline(always)]
unsafe fn extract_unchecked(&self, i: usize) -> Self::Element {
*self.0.get_unchecked(i)
}
#[inline(always)]
fn replace(&mut self, i: usize, val: Self::Element) {
self.0[i] = val
}
#[inline(always)]
unsafe fn replace_unchecked(&mut self, i: usize, val: Self::Element) {
*self.0.get_unchecked_mut(i) = val
}
#[inline(always)]
fn select(self, cond: Self::SimdBool, other: Self) -> Self {
ident_to_value!();
Self([
$(if cond.0[$i] { self.0[$i] } else { other.0[$i] }),*
])
}
}
)*)
);
macro_rules! impl_uint_simd(
($($t: ty, $elt: ty, $lanes: expr, $bool: ty, $($i: ident),*;)*) => ($(
impl_simd_value!($t, $elt, $lanes, $bool $(, $i)*;);
impl From<[$elt; $lanes]> for AutoSimd<$t> {
#[inline(always)]
fn from(vals: [$elt; $lanes]) -> Self {
AutoSimd(vals)
}
}
impl From<AutoSimd<$t>> for [$elt; $lanes] {
#[inline(always)]
fn from(val: AutoSimd<$t>) -> [$elt; $lanes] {
val.0
}
}
impl SubsetOf<AutoSimd<$t>> for AutoSimd<$t> {
#[inline(always)]
fn to_superset(&self) -> Self {
*self
}
#[inline(always)]
fn from_superset(element: &Self) -> Option<Self> {
Some(*element)
}
#[inline(always)]
fn from_superset_unchecked(element: &Self) -> Self {
*element
}
#[inline(always)]
fn is_in_subset(_: &Self) -> bool {
true
}
}
impl Num for AutoSimd<$t> {
type FromStrRadixErr = <$elt as Num>::FromStrRadixErr;
#[inline(always)]
fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
<$elt>::from_str_radix(str, radix).map(Self::splat)
}
}
impl FromPrimitive for AutoSimd<$t> {
#[inline(always)]
fn from_i64(n: i64) -> Option<Self> {
<$elt>::from_i64(n).map(Self::splat)
}
#[inline(always)]
fn from_u64(n: u64) -> Option<Self> {
<$elt>::from_u64(n).map(Self::splat)
}
#[inline(always)]
fn from_isize(n: isize) -> Option<Self> {
<$elt>::from_isize(n).map(Self::splat)
}
#[inline(always)]
fn from_i8(n: i8) -> Option<Self> {
<$elt>::from_i8(n).map(Self::splat)
}
#[inline(always)]
fn from_i16(n: i16) -> Option<Self> {
<$elt>::from_i16(n).map(Self::splat)
}
#[inline(always)]
fn from_i32(n: i32) -> Option<Self> {
<$elt>::from_i32(n).map(Self::splat)
}
#[inline(always)]
fn from_usize(n: usize) -> Option<Self> {
<$elt>::from_usize(n).map(Self::splat)
}
#[inline(always)]
fn from_u8(n: u8) -> Option<Self> {
<$elt>::from_u8(n).map(Self::splat)
}
#[inline(always)]
fn from_u16(n: u16) -> Option<Self> {
<$elt>::from_u16(n).map(Self::splat)
}
#[inline(always)]
fn from_u32(n: u32) -> Option<Self> {
<$elt>::from_u32(n).map(Self::splat)
}
#[inline(always)]
fn from_f32(n: f32) -> Option<Self> {
<$elt>::from_f32(n).map(Self::splat)
}
#[inline(always)]
fn from_f64(n: f64) -> Option<Self> {
<$elt>::from_f64(n).map(Self::splat)
}
}
impl Zero for AutoSimd<$t> {
#[inline(always)]
fn zero() -> Self {
AutoSimd([<$elt>::zero(); $lanes])
}
#[inline(always)]
fn is_zero(&self) -> bool {
*self == Self::zero()
}
}
impl One for AutoSimd<$t> {
#[inline(always)]
fn one() -> Self {
AutoSimd([<$elt>::one(); $lanes])
}
}
impl Add<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x + y)
}
}
impl Sub<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x - y)
}
}
impl Mul<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x * y)
}
}
impl Div<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
#[inline(always)]
fn div(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x / y)
}
}
impl Rem<AutoSimd<$t>> for AutoSimd<$t> {
type Output = Self;
#[inline(always)]
fn rem(self, rhs: Self) -> Self {
self.zip_map(rhs, |x, y| x % y)
}
}
impl AddAssign<AutoSimd<$t>> for AutoSimd<$t> {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl SubAssign<AutoSimd<$t>> for AutoSimd<$t> {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl DivAssign<AutoSimd<$t>> for AutoSimd<$t> {
#[inline(always)]
fn div_assign(&mut self, rhs: Self) {
*self = *self / rhs;
}
}
impl MulAssign<AutoSimd<$t>> for AutoSimd<$t> {
#[inline(always)]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl RemAssign<AutoSimd<$t>> for AutoSimd<$t> {
#[inline(always)]
fn rem_assign(&mut self, rhs: Self) {
*self = *self % rhs;
}
}
impl SimdPartialOrd for AutoSimd<$t> {
#[inline(always)]
fn simd_gt(self, other: Self) -> Self::SimdBool {
self.zip_map_bool(other, |x, y| x.simd_gt(y))
}
#[inline(always)]
fn simd_lt(self, other: Self) -> Self::SimdBool {
self.zip_map_bool(other, |x, y| x.simd_lt(y))
}
#[inline(always)]
fn simd_ge(self, other: Self) -> Self::SimdBool {
self.zip_map_bool(other, |x, y| x.simd_ge(y))
}
#[inline(always)]
fn simd_le(self, other: Self) -> Self::SimdBool {
self.zip_map_bool(other, |x, y| x.simd_le(y))
}
#[inline(always)]
fn simd_eq(self, other: Self) -> Self::SimdBool {
self.zip_map_bool(other, |x, y| x.simd_eq(y))
}
#[inline(always)]
fn simd_ne(self, other: Self) -> Self::SimdBool {
self.zip_map_bool(other, |x, y| x.simd_ne(y))
}
#[inline(always)]
fn simd_max(self, other: Self) -> Self {
self.zip_map(other, |x, y| x.simd_max(y))
}
#[inline(always)]
fn simd_min(self, other: Self) -> Self {
self.zip_map(other, |x, y| x.simd_min(y))
}
#[inline(always)]
fn simd_clamp(self, min: Self, max: Self) -> Self {
self.simd_max(min).simd_min(max)
}
#[inline(always)]
fn simd_horizontal_min(self) -> Self::Element {
ident_to_value!();
self.0[0] $(.simd_min(self.0[$i]))*
}
#[inline(always)]
fn simd_horizontal_max(self) -> Self::Element {
ident_to_value!();
self.0[0] $(.simd_max(self.0[$i]))*
}
}
)*)
);
macro_rules! impl_int_simd(
($($t: ty, $elt: ty, $lanes: expr, $bool: ty, $($i: ident),*;)*) => ($(
impl_uint_simd!($t, $elt, $lanes, $bool $(, $i)*;);
impl Neg for AutoSimd<$t> {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
self.map(|x| -x)
}
}
)*)
);
macro_rules! impl_float_simd(
($($t: ty, $elt: ty, $lanes: expr, $int: ty, $bool: ty, $($i: ident),*;)*) => ($(
impl_int_simd!($t, $elt, $lanes, $bool $(, $i)*;);
impl SimdSigned for AutoSimd<$t> {
#[inline(always)]
fn simd_abs(&self) -> Self {
self.map(|x| x.simd_abs())
}
#[inline(always)]
fn simd_abs_sub(&self, other: &Self) -> Self {
self.zip_map(*other, |x, y| x.simd_abs_sub(&y))
}
#[inline(always)]
fn simd_signum(&self) -> Self {
self.map(|x| x.simd_signum())
}
#[inline(always)]
fn is_simd_positive(&self) -> Self::SimdBool {
self.map_bool(|x| x.is_simd_positive())
}
#[inline(always)]
fn is_simd_negative(&self) -> Self::SimdBool {
self.map_bool(|x| x.is_simd_negative())
}
}
impl Field for AutoSimd<$t> {}
#[cfg(any(feature = "std", feature = "libm", feature = "libm_force", all(any(target_arch = "nvptx", target_arch = "nvptx64"), feature = "cuda")))]
impl SimdRealField for AutoSimd<$t> {
#[inline(always)]
fn simd_atan2(self, other: Self) -> Self {
self.zip_map(other, |x, y| x.simd_atan2(y))
}
#[inline(always)]
fn simd_copysign(self, sign: Self) -> Self {
self.zip_map(sign, |me, sgn| me.simd_copysign(sgn))
}
#[inline(always)]
fn simd_default_epsilon() -> Self {
Self::splat(<$elt>::default_epsilon())
}
#[inline(always)]
fn simd_pi() -> Self {
Self::splat(<$elt>::simd_pi())
}
#[inline(always)]
fn simd_two_pi() -> Self {
Self::splat(<$elt>::simd_two_pi())
}
#[inline(always)]
fn simd_frac_pi_2() -> Self {
Self::splat(<$elt>::simd_frac_pi_2())
}
#[inline(always)]
fn simd_frac_pi_3() -> Self {
Self::splat(<$elt>::simd_frac_pi_3())
}
#[inline(always)]
fn simd_frac_pi_4() -> Self {
Self::splat(<$elt>::simd_frac_pi_4())
}
#[inline(always)]
fn simd_frac_pi_6() -> Self {
Self::splat(<$elt>::simd_frac_pi_6())
}
#[inline(always)]
fn simd_frac_pi_8() -> Self {
Self::splat(<$elt>::simd_frac_pi_8())
}
#[inline(always)]
fn simd_frac_1_pi() -> Self {
Self::splat(<$elt>::simd_frac_1_pi())
}
#[inline(always)]
fn simd_frac_2_pi() -> Self {
Self::splat(<$elt>::simd_frac_2_pi())
}
#[inline(always)]
fn simd_frac_2_sqrt_pi() -> Self {
Self::splat(<$elt>::simd_frac_2_sqrt_pi())
}
#[inline(always)]
fn simd_e() -> Self {
Self::splat(<$elt>::simd_e())
}
#[inline(always)]
fn simd_log2_e() -> Self {
Self::splat(<$elt>::simd_log2_e())
}
#[inline(always)]
fn simd_log10_e() -> Self {
Self::splat(<$elt>::simd_log10_e() )
}
#[inline(always)]
fn simd_ln_2() -> Self {
Self::splat(<$elt>::simd_ln_2())
}
#[inline(always)]
fn simd_ln_10() -> Self {
Self::splat(<$elt>::simd_ln_10())
}
}
#[cfg(any(feature = "std", feature = "libm", feature = "libm_force", all(any(target_arch = "nvptx", target_arch = "nvptx64"), feature = "cuda")))]
impl SimdComplexField for AutoSimd<$t> {
type SimdRealField = Self;
#[inline(always)]
fn simd_horizontal_sum(self) -> Self::Element {
self.0.iter().sum()
}
#[inline(always)]
fn simd_horizontal_product(self) -> Self::Element {
self.0.iter().product()
}
#[inline(always)]
fn from_simd_real(re: Self::SimdRealField) -> Self {
re
}
#[inline(always)]
fn simd_real(self) -> Self::SimdRealField {
self
}
#[inline(always)]
fn simd_imaginary(self) -> Self::SimdRealField {
Self::zero()
}
#[inline(always)]
fn simd_norm1(self) -> Self::SimdRealField {
self.map(|x| x.simd_norm1())
}
#[inline(always)]
fn simd_modulus(self) -> Self::SimdRealField {
self.map(|x| x.simd_modulus())
}
#[inline(always)]
fn simd_modulus_squared(self) -> Self::SimdRealField {
self.map(|x| x.simd_modulus_squared())
}
#[inline(always)]
fn simd_argument(self) -> Self::SimdRealField {
self.map(|x| x.simd_argument())
}
#[inline(always)]
fn simd_to_exp(self) -> (Self::SimdRealField, Self) {
let ge = self.simd_ge(Self::one());
let exp = Self::one().select(ge, -Self::one());
(self * exp, exp)
}
#[inline(always)]
fn simd_recip(self) -> Self {
self.map(|x| x.simd_recip())
}
#[inline(always)]
fn simd_conjugate(self) -> Self {
self.map(|x| x.simd_conjugate())
}
#[inline(always)]
fn simd_scale(self, factor: Self::SimdRealField) -> Self {
self.zip_map(factor, |x, y| x.simd_scale(y))
}
#[inline(always)]
fn simd_unscale(self, factor: Self::SimdRealField) -> Self {
self.zip_map(factor, |x, y| x.simd_unscale(y))
}
#[inline(always)]
fn simd_floor(self) -> Self {
self.map(|e| e.simd_floor())
}
#[inline(always)]
fn simd_ceil(self) -> Self {
self.map(|e| e.simd_ceil())
}
#[inline(always)]
fn simd_round(self) -> Self {
self.map(|e| e.simd_round())
}
#[inline(always)]
fn simd_trunc(self) -> Self {
self.map(|e| e.simd_trunc())
}
#[inline(always)]
fn simd_fract(self) -> Self {
self.map(|e| e.simd_fract())
}
#[inline(always)]
fn simd_abs(self) -> Self {
self.map(|e| e.simd_abs())
}
#[inline(always)]
fn simd_signum(self) -> Self {
self.map(|e| e.simd_signum())
}
#[inline(always)]
fn simd_mul_add(self, a: Self, b: Self) -> Self {
self.zip_zip_map(a, b, |x, y, z| x.simd_mul_add(y, z))
}
#[inline(always)]
fn simd_powi(self, n: i32) -> Self {
self.map(|e| e.simd_powi(n))
}
#[inline(always)]
fn simd_powf(self, n: Self) -> Self {
self.zip_map(n, |x, y| x.simd_powf(y))
}
#[inline(always)]
fn simd_powc(self, n: Self) -> Self {
self.zip_map(n, |x, y| x.simd_powc(y))
}
#[inline(always)]
fn simd_sqrt(self) -> Self {
self.map(|x| x.simd_sqrt())
}
#[inline(always)]
fn simd_exp(self) -> Self {
self.map(|x| x.simd_exp())
}
#[inline(always)]
fn simd_exp2(self) -> Self {
self.map(|x| x.simd_exp2())
}
#[inline(always)]
fn simd_exp_m1(self) -> Self {
self.map(|x| x.simd_exp_m1())
}
#[inline(always)]
fn simd_ln_1p(self) -> Self {
self.map(|x| x.simd_ln_1p())
}
#[inline(always)]
fn simd_ln(self) -> Self {
self.map(|x| x.simd_ln())
}
#[inline(always)]
fn simd_log(self, base: Self) -> Self {
self.zip_map(base, |x, y| x.simd_log(y))
}
#[inline(always)]
fn simd_log2(self) -> Self {
self.map(|x| x.simd_log2())
}
#[inline(always)]
fn simd_log10(self) -> Self {
self.map(|x| x.simd_log10())
}
#[inline(always)]
fn simd_cbrt(self) -> Self {
self.map(|x| x.simd_cbrt())
}
#[inline(always)]
fn simd_hypot(self, other: Self) -> Self::SimdRealField {
self.zip_map(other, |x, y| x.simd_hypot(y))
}
#[inline(always)]
fn simd_sin(self) -> Self {
self.map(|x| x.simd_sin())
}
#[inline(always)]
fn simd_cos(self) -> Self {
self.map(|x| x.simd_cos())
}
#[inline(always)]
fn simd_tan(self) -> Self {
self.map(|x| x.simd_tan())
}
#[inline(always)]
fn simd_asin(self) -> Self {
self.map(|x| x.simd_asin())
}
#[inline(always)]
fn simd_acos(self) -> Self {
self.map(|x| x.simd_acos())
}
#[inline(always)]
fn simd_atan(self) -> Self {
self.map(|x| x.simd_atan())
}
#[inline(always)]
fn simd_sin_cos(self) -> (Self, Self) {
(self.simd_sin(), self.simd_cos())
}
#[inline(always)]
fn simd_sinh(self) -> Self {
self.map(|x| x.simd_sinh())
}
#[inline(always)]
fn simd_cosh(self) -> Self {
self.map(|x| x.simd_cosh())
}
#[inline(always)]
fn simd_tanh(self) -> Self {
self.map(|x| x.simd_tanh())
}
#[inline(always)]
fn simd_asinh(self) -> Self {
self.map(|x| x.simd_asinh())
}
#[inline(always)]
fn simd_acosh(self) -> Self {
self.map(|x| x.simd_acosh())
}
#[inline(always)]
fn simd_atanh(self) -> Self {
self.map(|x| x.simd_atanh())
}
}
#[cfg(any(feature = "std", feature = "libm", feature = "libm_force", all(any(target_arch = "nvptx", target_arch = "nvptx64"), feature = "cuda")))]
impl SimdComplexField for num_complex::Complex<AutoSimd<$t>> {
type SimdRealField = AutoSimd<$t>;
#[inline(always)]
fn simd_horizontal_sum(self) -> Self::Element {
num_complex::Complex::new(self.re.simd_horizontal_sum(), self.im.simd_horizontal_sum())
}
#[inline(always)]
fn simd_horizontal_product(self) -> Self::Element {
let mut prod = self.extract(0);
for ii in 1..$lanes {
prod = prod * self.extract(ii)
}
prod
}
#[inline]
fn from_simd_real(re: Self::SimdRealField) -> Self {
Self::new(re, Self::SimdRealField::zero())
}
#[inline]
fn simd_real(self) -> Self::SimdRealField {
self.re
}
#[inline]
fn simd_imaginary(self) -> Self::SimdRealField {
self.im
}
#[inline]
fn simd_argument(self) -> Self::SimdRealField {
self.im.simd_atan2(self.re)
}
#[inline]
fn simd_modulus(self) -> Self::SimdRealField {
self.re.simd_hypot(self.im)
}
#[inline]
fn simd_modulus_squared(self) -> Self::SimdRealField {
self.re * self.re + self.im * self.im
}
#[inline]
fn simd_norm1(self) -> Self::SimdRealField {
self.re.simd_abs() + self.im.simd_abs()
}
#[inline]
fn simd_recip(self) -> Self {
Self::one() / self
}
#[inline]
fn simd_conjugate(self) -> Self {
self.conj()
}
#[inline]
fn simd_scale(self, factor: Self::SimdRealField) -> Self {
self * factor
}
#[inline]
fn simd_unscale(self, factor: Self::SimdRealField) -> Self {
self / factor
}
#[inline]
fn simd_floor(self) -> Self {
Self::new(self.re.simd_floor(), self.im.simd_floor())
}
#[inline]
fn simd_ceil(self) -> Self {
Self::new(self.re.simd_ceil(), self.im.simd_ceil())
}
#[inline]
fn simd_round(self) -> Self {
Self::new(self.re.simd_round(), self.im.simd_round())
}
#[inline]
fn simd_trunc(self) -> Self {
Self::new(self.re.simd_trunc(), self.im.simd_trunc())
}
#[inline]
fn simd_fract(self) -> Self {
Self::new(self.re.simd_fract(), self.im.simd_fract())
}
#[inline]
fn simd_mul_add(self, a: Self, b: Self) -> Self {
self * a + b
}
#[inline]
fn simd_abs(self) -> Self::SimdRealField {
self.simd_modulus()
}
#[inline]
fn simd_exp2(self) -> Self {
let _2 = AutoSimd::<$t>::one() + AutoSimd::<$t>::one();
num_complex::Complex::new(_2, AutoSimd::<$t>::zero()).simd_powc(self)
}
#[inline]
fn simd_exp_m1(self) -> Self {
self.simd_exp() - Self::one()
}
#[inline]
fn simd_ln_1p(self) -> Self {
(Self::one() + self).simd_ln()
}
#[inline]
fn simd_log2(self) -> Self {
let _2 = AutoSimd::<$t>::one() + AutoSimd::<$t>::one();
self.simd_log(_2)
}
#[inline]
fn simd_log10(self) -> Self {
let _10 = AutoSimd::<$t>::from_subset(&10.0f64);
self.simd_log(_10)
}
#[inline]
fn simd_cbrt(self) -> Self {
let one_third = AutoSimd::<$t>::from_subset(&(1.0 / 3.0));
self.simd_powf(one_third)
}
#[inline]
fn simd_powi(self, n: i32) -> Self {
let n = AutoSimd::<$t>::from_subset(&(n as f64));
self.simd_powf(n)
}
#[inline]
fn simd_exp(self) -> Self {
simd_complex_from_polar(self.re.simd_exp(), self.im)
}
#[inline]
fn simd_ln(self) -> Self {
let (r, theta) = self.simd_to_polar();
Self::new(r.simd_ln(), theta)
}
#[inline]
fn simd_sqrt(self) -> Self {
let two = AutoSimd::<$t>::one() + AutoSimd::<$t>::one();
let (r, theta) = self.simd_to_polar();
simd_complex_from_polar(r.simd_sqrt(), theta / two)
}
#[inline]
fn simd_hypot(self, b: Self) -> Self::SimdRealField {
(self.simd_modulus_squared() + b.simd_modulus_squared()).simd_sqrt()
}
#[inline]
fn simd_powf(self, exp: Self::SimdRealField) -> Self {
let (r, theta) = self.simd_to_polar();
simd_complex_from_polar(r.simd_powf(exp), theta * exp)
}
#[inline]
fn simd_log(self, base: AutoSimd<$t>) -> Self {
let (r, theta) = self.simd_to_polar();
Self::new(r.simd_log(base), theta / base.simd_ln())
}
#[inline]
fn simd_powc(self, exp: Self) -> Self {
let (r, theta) = self.simd_to_polar();
simd_complex_from_polar(
r.simd_powf(exp.re) * (-exp.im * theta).simd_exp(),
exp.re * theta + exp.im * r.simd_ln(),
)
}
#[inline]
fn simd_sin(self) -> Self {
Self::new(
self.re.simd_sin() * self.im.simd_cosh(),
self.re.simd_cos() * self.im.simd_sinh(),
)
}
#[inline]
fn simd_cos(self) -> Self {
Self::new(
self.re.simd_cos() * self.im.simd_cosh(),
-self.re.simd_sin() * self.im.simd_sinh(),
)
}
#[inline]
fn simd_sin_cos(self) -> (Self, Self) {
let (rsin, rcos) = self.re.simd_sin_cos();
let (isinh, icosh) = self.im.simd_sinh_cosh();
let sin = Self::new(rsin * icosh, rcos * isinh);
let cos = Self::new(rcos * icosh, -rsin * isinh);
(sin, cos)
}
#[inline]
fn simd_tan(self) -> Self {
let (two_re, two_im) = (self.re + self.re, self.im + self.im);
Self::new(two_re.simd_sin(), two_im.simd_sinh()).unscale(two_re.simd_cos() + two_im.simd_cosh())
}
#[inline]
fn simd_asin(self) -> Self {
let i = Self::i();
-i * ((Self::one() - self * self).simd_sqrt() + i * self).simd_ln()
}
#[inline]
fn simd_acos(self) -> Self {
let i = Self::i();
-i * (i * (Self::one() - self * self).simd_sqrt() + self).simd_ln()
}
#[inline]
fn simd_atan(self) -> Self {
let i = Self::i();
let one = Self::one();
let two = one + one;
if self == i {
return Self::new(AutoSimd::<$t>::zero(), AutoSimd::<$t>::one() / AutoSimd::<$t>::zero());
} else if self == -i {
return Self::new(AutoSimd::<$t>::zero(), -AutoSimd::<$t>::one() / AutoSimd::<$t>::zero());
}
((one + i * self).simd_ln() - (one - i * self).simd_ln()) / (two * i)
}
#[inline]
fn simd_sinh(self) -> Self {
Self::new(
self.re.simd_sinh() * self.im.simd_cos(),
self.re.simd_cosh() * self.im.simd_sin(),
)
}
#[inline]
fn simd_cosh(self) -> Self {
Self::new(
self.re.simd_cosh() * self.im.simd_cos(),
self.re.simd_sinh() * self.im.simd_sin(),
)
}
#[inline]
fn simd_sinh_cosh(self) -> (Self, Self) {
let (rsinh, rcosh) = self.re.simd_sinh_cosh();
let (isin, icos) = self.im.simd_sin_cos();
let sin = Self::new(rsinh * icos, rcosh * isin);
let cos = Self::new(rcosh * icos, rsinh * isin);
(sin, cos)
}
#[inline]
fn simd_tanh(self) -> Self {
let (two_re, two_im) = (self.re + self.re, self.im + self.im);
Self::new(two_re.simd_sinh(), two_im.simd_sin()).unscale(two_re.simd_cosh() + two_im.simd_cos())
}
#[inline]
fn simd_asinh(self) -> Self {
let one = Self::one();
(self + (one + self * self).simd_sqrt()).simd_ln()
}
#[inline]
fn simd_acosh(self) -> Self {
let one = Self::one();
let two = one + one;
two * (((self + one) / two).simd_sqrt() + ((self - one) / two).simd_sqrt()).simd_ln()
}
#[inline]
fn simd_atanh(self) -> Self {
let one = Self::one();
let two = one + one;
if self == one {
return Self::new(AutoSimd::<$t>::one() / AutoSimd::<$t>::zero(), AutoSimd::<$t>::zero());
} else if self == -one {
return Self::new(-AutoSimd::<$t>::one() / AutoSimd::<$t>::zero(), AutoSimd::<$t>::zero());
}
((one + self).simd_ln() - (one - self).simd_ln()) / two
}
}
)*)
);
#[inline]
fn simd_complex_from_polar<N: SimdRealField>(r: N, theta: N) -> num_complex::Complex<N> {
num_complex::Complex::new(r.clone() * theta.clone().simd_cos(), r * theta.simd_sin())
}
impl_float_simd!(
[f32; 2], f32, 2, [i32; 2], AutoBoolx2, _0, _1;
[f32; 4], f32, 4, [i32; 4], AutoBoolx4, _0, _1, _2, _3;
[f32; 8], f32, 8, [i32; 8], AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[f32; 16], f32, 16, [i32; 16], AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[f64; 2], f64, 2, [i64; 2], AutoBoolx2, _0, _1;
[f64; 4], f64, 4, [i64; 4], AutoBoolx4, _0, _1, _2, _3;
[f64; 8], f64, 8, [i64; 8], AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
);
impl_int_simd!(
[i128; 1], i128, 1, AutoBoolx1, _0;
[i128; 2], i128, 2, AutoBoolx2, _0, _1;
[i128; 4], i128, 4, AutoBoolx4, _0, _1, _2, _3;
[i16; 2], i16, 2, AutoBoolx2, _0, _1;
[i16; 4], i16, 4, AutoBoolx4, _0, _1, _2, _3;
[i16; 8], i16, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[i16; 16], i16, 16, AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[i16; 32], i16, 32, AutoBoolx32, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31;
[i32; 2], i32, 2, AutoBoolx2, _0, _1;
[i32; 4], i32, 4, AutoBoolx4, _0, _1, _2, _3;
[i32; 8], i32, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[i32; 16], i32, 16, AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[i64; 2], i64, 2, AutoBoolx2, _0, _1;
[i64; 4], i64, 4, AutoBoolx4, _0, _1, _2, _3;
[i64; 8], i64, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[i8; 2], i8, 2, AutoBoolx2, _0, _1;
[i8; 4], i8, 4, AutoBoolx4, _0, _1, _2, _3;
[i8; 8], i8, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[i8; 16], i8, 16, AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[i8; 32], i8, 32, AutoBoolx32, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31;
[isize; 2], isize, 2, AutoBoolx2, _0, _1;
[isize; 4], isize, 4, AutoBoolx4, _0, _1, _2, _3;
[isize; 8], isize, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
);
impl_uint_simd!(
[u128; 1], u128, 1, AutoBoolx1, _0;
[u128; 2], u128, 2, AutoBoolx2, _0, _1;
[u128; 4], u128, 4, AutoBoolx4, _0, _1, _2, _3;
[u16; 2], u16, 2, AutoBoolx2, _0, _1;
[u16; 4], u16, 4, AutoBoolx4, _0, _1, _2, _3;
[u16; 8], u16, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[u16; 16], u16, 16, AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[u16; 32], u16, 32, AutoBoolx32, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31;
[u32; 2], u32, 2, AutoBoolx2, _0, _1;
[u32; 4], u32, 4, AutoBoolx4, _0, _1, _2, _3;
[u32; 8], u32, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[u32; 16], u32, 16, AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[u64; 2], u64, 2, AutoBoolx2, _0, _1;
[u64; 4], u64, 4, AutoBoolx4, _0, _1, _2, _3;
[u64; 8], u64, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[u8; 2], u8, 2, AutoBoolx2, _0, _1;
[u8; 4], u8, 4, AutoBoolx4, _0, _1, _2, _3;
[u8; 8], u8, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
[u8; 16], u8, 16, AutoBoolx16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[u8; 32], u8, 32, AutoBoolx32, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31;
[usize; 2], usize, 2, AutoBoolx2, _0, _1;
[usize; 4], usize, 4, AutoBoolx4, _0, _1, _2, _3;
[usize; 8], usize, 8, AutoBoolx8, _0, _1, _2, _3, _4, _5, _6, _7;
);
impl_bool_simd!(
[bool; 1], 1, _0;
[bool; 2], 2, _0, _1;
[bool; 4], 4, _0, _1, _2, _3;
[bool; 8], 8, _0, _1, _2, _3, _4, _5, _6, _7;
[bool; 16], 16, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15;
[bool; 32], 32, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31;
);
pub type AutoF32x2 = AutoSimd<[f32; 2]>;
pub type AutoF32x4 = AutoSimd<[f32; 4]>;
pub type AutoF32x8 = AutoSimd<[f32; 8]>;
pub type AutoF32x16 = AutoSimd<[f32; 16]>;
pub type AutoF64x2 = AutoSimd<[f64; 2]>;
pub type AutoF64x4 = AutoSimd<[f64; 4]>;
pub type AutoF64x8 = AutoSimd<[f64; 8]>;
pub type AutoI128x1 = AutoSimd<[i128; 1]>;
pub type AutoI128x2 = AutoSimd<[i128; 2]>;
pub type AutoI128x4 = AutoSimd<[i128; 4]>;
pub type AutoI16x2 = AutoSimd<[i16; 2]>;
pub type AutoI16x4 = AutoSimd<[i16; 4]>;
pub type AutoI16x8 = AutoSimd<[i16; 8]>;
pub type AutoI16x16 = AutoSimd<[i16; 16]>;
pub type AutoI16x32 = AutoSimd<[i16; 32]>;
pub type AutoI32x2 = AutoSimd<[i32; 2]>;
pub type AutoI32x4 = AutoSimd<[i32; 4]>;
pub type AutoI32x8 = AutoSimd<[i32; 8]>;
pub type AutoI32x16 = AutoSimd<[i32; 16]>;
pub type AutoI64x2 = AutoSimd<[i64; 2]>;
pub type AutoI64x4 = AutoSimd<[i64; 4]>;
pub type AutoI64x8 = AutoSimd<[i64; 8]>;
pub type AutoI8x2 = AutoSimd<[i8; 2]>;
pub type AutoI8x4 = AutoSimd<[i8; 4]>;
pub type AutoI8x8 = AutoSimd<[i8; 8]>;
pub type AutoI8x16 = AutoSimd<[i8; 16]>;
pub type AutoI8x32 = AutoSimd<[i8; 32]>;
pub type AutoIsizex2 = AutoSimd<[isize; 2]>;
pub type AutoIsizex4 = AutoSimd<[isize; 4]>;
pub type AutoIsizex8 = AutoSimd<[isize; 8]>;
pub type AutoU128x1 = AutoSimd<[u128; 1]>;
pub type AutoU128x2 = AutoSimd<[u128; 2]>;
pub type AutoU128x4 = AutoSimd<[u128; 4]>;
pub type AutoU16x2 = AutoSimd<[u16; 2]>;
pub type AutoU16x4 = AutoSimd<[u16; 4]>;
pub type AutoU16x8 = AutoSimd<[u16; 8]>;
pub type AutoU16x16 = AutoSimd<[u16; 16]>;
pub type AutoU16x32 = AutoSimd<[u16; 32]>;
pub type AutoU32x2 = AutoSimd<[u32; 2]>;
pub type AutoU32x4 = AutoSimd<[u32; 4]>;
pub type AutoU32x8 = AutoSimd<[u32; 8]>;
pub type AutoU32x16 = AutoSimd<[u32; 16]>;
pub type AutoU64x2 = AutoSimd<[u64; 2]>;
pub type AutoU64x4 = AutoSimd<[u64; 4]>;
pub type AutoU64x8 = AutoSimd<[u64; 8]>;
pub type AutoU8x2 = AutoSimd<[u8; 2]>;
pub type AutoU8x4 = AutoSimd<[u8; 4]>;
pub type AutoU8x8 = AutoSimd<[u8; 8]>;
pub type AutoU8x16 = AutoSimd<[u8; 16]>;
pub type AutoU8x32 = AutoSimd<[u8; 32]>;
pub type AutoUsizex2 = AutoSimd<[usize; 2]>;
pub type AutoUsizex4 = AutoSimd<[usize; 4]>;
pub type AutoUsizex8 = AutoSimd<[usize; 8]>;
pub type AutoBoolx1 = AutoSimd<[bool; 1]>;
pub type AutoBoolx16 = AutoSimd<[bool; 16]>;
pub type AutoBoolx2 = AutoSimd<[bool; 2]>;
pub type AutoBoolx32 = AutoSimd<[bool; 32]>;
pub type AutoBoolx4 = AutoSimd<[bool; 4]>;
pub type AutoBoolx8 = AutoSimd<[bool; 8]>;
trait ArrTransform: SimdValue {
fn map(self, f: impl Fn(Self::Element) -> Self::Element) -> Self;
fn zip_map(
self,
other: Self,
f: impl Fn(Self::Element, Self::Element) -> Self::Element,
) -> Self;
fn zip_zip_map(
self,
b: Self,
c: Self,
f: impl Fn(Self::Element, Self::Element, Self::Element) -> Self::Element,
) -> Self;
fn map_bool(self, f: impl Fn(Self::Element) -> bool) -> Self::SimdBool;
fn zip_map_bool(
self,
other: Self,
f: impl Fn(Self::Element, Self::Element) -> bool,
) -> Self::SimdBool;
}