matrixmultiply/x86/
mod.rs

1
2#[cfg(target_arch="x86")]
3use core::arch::x86::*;
4#[cfg(target_arch="x86_64")]
5use core::arch::x86_64::*;
6
7#[macro_use]
8mod macros;
9
10pub(crate) struct FusedMulAdd;
11pub(crate) struct AvxMulAdd;
12
13pub(crate) trait SMultiplyAdd {
14    const IS_FUSED: bool;
15    unsafe fn multiply_add(a: __m256, b: __m256, c: __m256) -> __m256;
16}
17
18impl SMultiplyAdd for AvxMulAdd {
19    const IS_FUSED: bool = false;
20    #[inline(always)]
21    unsafe fn multiply_add(a: __m256, b: __m256, c: __m256) -> __m256 {
22        _mm256_add_ps(_mm256_mul_ps(a, b), c)
23    }
24}
25
26impl SMultiplyAdd for FusedMulAdd {
27    const IS_FUSED: bool = true;
28    #[inline(always)]
29    unsafe fn multiply_add(a: __m256, b: __m256, c: __m256) -> __m256 {
30        _mm256_fmadd_ps(a, b, c)
31    }
32}
33
34pub(crate) trait DMultiplyAdd {
35    const IS_FUSED: bool;
36    unsafe fn multiply_add(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
37}
38
39impl DMultiplyAdd for AvxMulAdd {
40    const IS_FUSED: bool = false;
41    #[inline(always)]
42    unsafe fn multiply_add(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
43        _mm256_add_pd(_mm256_mul_pd(a, b), c)
44    }
45}
46
47impl DMultiplyAdd for FusedMulAdd {
48    const IS_FUSED: bool = true;
49    #[inline(always)]
50    unsafe fn multiply_add(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
51        _mm256_fmadd_pd(a, b, c)
52    }
53}
54