moxcms/
mlaf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use num_traits::MulAdd;
30use std::ops::{Add, Mul, Neg};
31
32#[cfg(any(
33    all(
34        any(target_arch = "x86", target_arch = "x86_64"),
35        target_feature = "fma"
36    ),
37    all(target_arch = "aarch64", target_feature = "neon")
38))]
39#[inline(always)]
40pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
41    acc: T,
42    a: T,
43    b: T,
44) -> T {
45    MulAdd::mul_add(a, b, acc)
46}
47
48#[inline(always)]
49#[cfg(not(any(
50    all(
51        any(target_arch = "x86", target_arch = "x86_64"),
52        target_feature = "fma"
53    ),
54    all(target_arch = "aarch64", target_feature = "neon")
55)))]
56pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
57    acc: T,
58    a: T,
59    b: T,
60) -> T {
61    acc + a * b
62}
63
64#[inline(always)]
65pub(crate) fn neg_mlaf<
66    T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T> + Neg<Output = T>,
67>(
68    acc: T,
69    a: T,
70    b: T,
71) -> T {
72    mlaf(acc, a, -b)
73}
74
75#[inline(always)]
76pub(crate) fn fmla<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
77    a: T,
78    b: T,
79    acc: T,
80) -> T {
81    mlaf(acc, a, b)
82}