moxcms/mlaf.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use num_traits::MulAdd;
30use std::ops::{Add, Mul, Neg};
31
32#[cfg(any(
33 all(
34 any(target_arch = "x86", target_arch = "x86_64"),
35 target_feature = "fma"
36 ),
37 all(target_arch = "aarch64", target_feature = "neon")
38))]
39#[inline(always)]
40pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
41 acc: T,
42 a: T,
43 b: T,
44) -> T {
45 MulAdd::mul_add(a, b, acc)
46}
47
48#[inline(always)]
49#[cfg(not(any(
50 all(
51 any(target_arch = "x86", target_arch = "x86_64"),
52 target_feature = "fma"
53 ),
54 all(target_arch = "aarch64", target_feature = "neon")
55)))]
56pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
57 acc: T,
58 a: T,
59 b: T,
60) -> T {
61 acc + a * b
62}
63
64#[inline(always)]
65pub(crate) fn neg_mlaf<
66 T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T> + Neg<Output = T>,
67>(
68 acc: T,
69 a: T,
70 b: T,
71) -> T {
72 mlaf(acc, a, -b)
73}
74
75#[inline(always)]
76pub(crate) fn fmla<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
77 a: T,
78 b: T,
79 acc: T,
80) -> T {
81 mlaf(acc, a, b)
82}