pxfm/
common.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::EXP_MASK;
30use num_traits::MulAdd;
31use std::ops::{Add, Mul};
32
33#[inline]
34pub(crate) fn is_integerf(x: f32) -> bool {
35    #[cfg(any(
36        all(
37            any(target_arch = "x86", target_arch = "x86_64"),
38            target_feature = "sse4.1"
39        ),
40        target_arch = "aarch64"
41    ))]
42    {
43        x.round_ties_even() == x
44    }
45    #[cfg(not(any(
46        all(
47            any(target_arch = "x86", target_arch = "x86_64"),
48            target_feature = "sse4.1"
49        ),
50        target_arch = "aarch64"
51    )))]
52    {
53        let x_u = x.to_bits();
54        let x_e = (x_u & EXP_MASK_F32) >> 23;
55        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
56        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
57        const UNIT_EXPONENT: u32 = E_BIAS + 23;
58        x_e + lsb >= UNIT_EXPONENT
59    }
60}
61
62#[inline]
63pub(crate) fn is_odd_integerf(x: f32) -> bool {
64    #[cfg(target_arch = "aarch64")]
65    {
66        (x as i32 & 1) != 0
67    }
68    #[cfg(not(target_arch = "aarch64"))]
69    {
70        let x_u = x.to_bits();
71        let x_e = (x_u & EXP_MASK_F32) >> 23;
72        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
73        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
74
75        const UNIT_EXPONENT: u32 = E_BIAS + 23;
76        x_e + lsb == UNIT_EXPONENT
77    }
78}
79
80#[inline]
81pub(crate) fn is_integer(n: f64) -> bool {
82    #[cfg(any(
83        all(
84            any(target_arch = "x86", target_arch = "x86_64"),
85            target_feature = "sse4.1"
86        ),
87        target_arch = "aarch64"
88    ))]
89    {
90        n == n.round_ties_even()
91    }
92    #[cfg(not(any(
93        all(
94            any(target_arch = "x86", target_arch = "x86_64"),
95            target_feature = "sse4.1"
96        ),
97        target_arch = "aarch64"
98    )))]
99    {
100        let x_u = n.to_bits();
101        let x_e = (x_u & EXP_MASK) >> 52;
102        let lsb = (x_u | EXP_MASK).trailing_zeros();
103        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
104
105        const UNIT_EXPONENT: u64 = E_BIAS + 52;
106        x_e + lsb as u64 >= UNIT_EXPONENT
107    }
108}
109
110#[inline]
111pub(crate) fn is_odd_integer(x: f64) -> bool {
112    let x_u = x.to_bits();
113    let x_e = (x_u & EXP_MASK) >> 52;
114    let lsb = (x_u | EXP_MASK).trailing_zeros();
115    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
116
117    const UNIT_EXPONENT: u64 = E_BIAS + 52;
118    x_e + lsb as u64 == UNIT_EXPONENT
119}
120
121#[cfg(any(
122    all(
123        any(target_arch = "x86", target_arch = "x86_64"),
124        target_feature = "fma"
125    ),
126    all(target_arch = "aarch64", target_feature = "neon")
127))]
128#[inline(always)]
129pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
130    acc: T,
131    a: T,
132    b: T,
133) -> T {
134    MulAdd::mul_add(a, b, acc)
135}
136
137#[inline(always)]
138#[cfg(not(any(
139    all(
140        any(target_arch = "x86", target_arch = "x86_64"),
141        target_feature = "fma"
142    ),
143    all(target_arch = "aarch64", target_feature = "neon")
144)))]
145pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
146    acc: T,
147    a: T,
148    b: T,
149) -> T {
150    acc + a * b
151}
152
153#[inline]
154pub(crate) const fn rintfk(x: f32) -> f32 {
155    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32
156}
157
158#[inline(always)]
159pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 {
160    c + a * b
161}
162
163#[inline(always)]
164pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 {
165    #[cfg(any(
166        all(
167            any(target_arch = "x86", target_arch = "x86_64"),
168            target_feature = "fma"
169        ),
170        all(target_arch = "aarch64", target_feature = "neon")
171    ))]
172    {
173        f32::mul_add(a, b, c)
174    }
175    #[cfg(not(any(
176        all(
177            any(target_arch = "x86", target_arch = "x86_64"),
178            target_feature = "fma"
179        ),
180        all(target_arch = "aarch64", target_feature = "neon")
181    )))]
182    {
183        a * b + c
184    }
185}
186
187/// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b`
188#[inline(always)]
189pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 {
190    #[cfg(any(
191        all(
192            any(target_arch = "x86", target_arch = "x86_64"),
193            target_feature = "fma"
194        ),
195        all(target_arch = "aarch64", target_feature = "neon")
196    ))]
197    {
198        f64::mul_add(a, b, c)
199    }
200    #[cfg(not(any(
201        all(
202            any(target_arch = "x86", target_arch = "x86_64"),
203            target_feature = "fma"
204        ),
205        all(target_arch = "aarch64", target_feature = "neon")
206    )))]
207    {
208        a * b + c
209    }
210}
211
212#[inline(always)]
213pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 {
214    c + a * b
215}
216
217/// Executes mandatory FMA
218/// if not available will be simulated through Dekker and Veltkamp
219#[inline(always)]
220pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 {
221    #[cfg(any(
222        all(
223            any(target_arch = "x86", target_arch = "x86_64"),
224            target_feature = "fma"
225        ),
226        all(target_arch = "aarch64", target_feature = "neon")
227    ))]
228    {
229        f_fmla(a, b, c)
230    }
231    #[cfg(not(any(
232        all(
233            any(target_arch = "x86", target_arch = "x86_64"),
234            target_feature = "fma"
235        ),
236        all(target_arch = "aarch64", target_feature = "neon")
237    )))]
238    {
239        use crate::double_double::DoubleDouble;
240        DoubleDouble::dd_f64_mul_add(a, b, c)
241    }
242}
243
244// Executes mandatory FMA
245// if not available will be simulated through dyadic float 128
246#[inline(always)]
247pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 {
248    #[cfg(any(
249        all(
250            any(target_arch = "x86", target_arch = "x86_64"),
251            target_feature = "fma"
252        ),
253        all(target_arch = "aarch64", target_feature = "neon")
254    ))]
255    {
256        f_fmla(a, b, c)
257    }
258    #[cfg(not(any(
259        all(
260            any(target_arch = "x86", target_arch = "x86_64"),
261            target_feature = "fma"
262        ),
263        all(target_arch = "aarch64", target_feature = "neon")
264    )))]
265    {
266        use crate::dyadic_float::DyadicFloat128;
267        let z = DyadicFloat128::new_from_f64(a);
268        let k = DyadicFloat128::new_from_f64(b);
269        let p = z * k + DyadicFloat128::new_from_f64(c);
270        p.fast_as_f64()
271    }
272}
273
274// Executes mandatory FMA
275// if not available will be simulated through Dekker and Veltkamp
276#[inline(always)]
277#[allow(unused)]
278pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 {
279    #[cfg(any(
280        all(
281            any(target_arch = "x86", target_arch = "x86_64"),
282            target_feature = "fma"
283        ),
284        all(target_arch = "aarch64", target_feature = "neon")
285    ))]
286    {
287        f_fmlaf(a, b, c)
288    }
289    #[cfg(not(any(
290        all(
291            any(target_arch = "x86", target_arch = "x86_64"),
292            target_feature = "fma"
293        ),
294        all(target_arch = "aarch64", target_feature = "neon")
295    )))]
296    {
297        (a as f64 * b as f64 + c as f64) as f32
298    }
299}
300
301#[allow(dead_code)]
302#[inline(always)]
303pub(crate) fn c_mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
304    a: T,
305    b: T,
306    c: T,
307) -> T {
308    mlaf(c, a, b)
309}
310
311/// Copies sign from `y` to `x`
312#[inline]
313pub const fn copysignfk(x: f32, y: f32) -> f32 {
314    f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31)))
315}
316
317// #[inline]
318// // Founds n in ln(𝑥)=ln(𝑎)+𝑛ln(2)
319// pub(crate) const fn ilogb2kf(d: f32) -> i32 {
320//     (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
321// }
322//
323// #[inline]
324// // Founds a in x=a+𝑛ln(2)
325// pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 {
326//     f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32)
327// }
328
329#[inline]
330pub(crate) const fn pow2if(q: i32) -> f32 {
331    f32::from_bits((q.wrapping_add(0x7f) as u32) << 23)
332}
333
334/// Round towards whole integral number
335#[inline]
336pub(crate) const fn rintk(x: f64) -> f64 {
337    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64
338}
339
340/// Computes 2^n
341#[inline(always)]
342pub(crate) const fn pow2i(q: i32) -> f64 {
343    f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52)
344}
345
346// #[inline]
347// pub(crate) const fn ilogb2k(d: f64) -> i32 {
348//     (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff
349// }
350//
351// #[inline]
352// pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 {
353//     f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64)
354// }
355
356/// Copies sign from `y` to `x`
357#[inline]
358pub const fn copysignk(x: f64, y: f64) -> f64 {
359    f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63)))
360}
361
362#[inline]
363pub(crate) const fn min_normal_f64() -> f64 {
364    let exponent_bits = 1u64 << 52;
365    let bits = exponent_bits;
366
367    f64::from_bits(bits)
368}
369
370#[inline]
371const fn mask_trailing_ones_u32(len: u32) -> u32 {
372    if len >= 32 {
373        u32::MAX // All ones if length is 64 or more
374    } else {
375        (1u32 << len).wrapping_sub(1)
376    }
377}
378
379pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23;
380
381#[inline]
382pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 {
383    let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32;
384    x ^ ((x ^ encoded_mask) & EXP_MASK_F32)
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    #[test]
391    fn test_is_integer() {
392        assert_eq!(is_integer(5.), true);
393        assert_eq!(is_integer(6.), true);
394        assert_eq!(is_integer(6.01), false);
395        assert_eq!(is_odd_integer(5.), true);
396        assert_eq!(is_odd_integer(6.), false);
397        assert_eq!(is_odd_integer(6.01), false);
398        assert_eq!(is_integerf(5.), true);
399        assert_eq!(is_integerf(6.), true);
400        assert_eq!(is_integerf(6.01), false);
401        assert_eq!(is_odd_integerf(5.), true);
402        assert_eq!(is_odd_integerf(6.), false);
403        assert_eq!(is_odd_integerf(6.01), false);
404    }
405}