pxfm/compound/
powm1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::compound::compound_m1f::compoundf_expf_poly;
31use crate::compound::compoundf::{
32    COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
33};
34use crate::rounding::CpuRoundTiesEven;
35use std::hint::black_box;
36
37#[inline]
38fn powm1f_log2_fast(x: f64) -> f64 {
39    /* for x > 0, 1+x is exact when 2^-29 <=  x < 2^53
40    for x < 0, 1+x is exact when -1 < x <= 2^-30 */
41
42    //  double u = (x >= 0x1p53) ? x : 1.0 + x;
43    /* For x < 0x1p53, x + 1 is exact thus u = x+1.
44    For x >= 2^53, we estimate log2(x) instead of log2(1+x),
45    since log2(1+x) = log2(x) + log2(1+1/x),
46    log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
47    error is bounded by 2^-52.471/53 < 2^-58.198 */
48
49    let mut v = x.to_bits();
50    let m: u64 = v & 0xfffffffffffffu64;
51    let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
52    // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
53    v = v.wrapping_sub((e << 52) as u64);
54    let t = f64::from_bits(v);
55    // u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
56    // thus log2(u) = e + log2(t)
57    v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
58    let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
59    let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
60    let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
61    // we approximates log2(t) by -log2(r) + log2(r*t)
62    let p = crate::compound::compoundf::log2p1_polyeval_1(z);
63    // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
64    e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
65}
66
67/// Computes x^y - 1
68pub fn f_powm1f(x: f32, y: f32) -> f32 {
69    let ax: u32 = x.to_bits().wrapping_shl(1);
70    let ay: u32 = y.to_bits().wrapping_shl(1);
71
72    // filter out exceptional cases
73    if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
74        if x.is_nan() || y.is_nan() {
75            return f32::NAN;
76        }
77
78        // Handle infinities
79        if x.is_infinite() {
80            return if x.is_sign_positive() {
81                if y.is_infinite() {
82                    return f32::INFINITY;
83                } else if y > 0.0 {
84                    f32::INFINITY // inf^positive -> inf
85                } else if y < 0.0 {
86                    -1.0 // inf^negative -> 0, so powm1 = -1
87                } else {
88                    f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
89                }
90            } else {
91                // x = -inf
92                if y.is_infinite() {
93                    return -1.0;
94                }
95                if is_integerf(y) {
96                    // Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
97                    let pow = if y as i32 % 2 == 0 {
98                        f32::INFINITY
99                    } else {
100                        f32::NEG_INFINITY
101                    };
102                    pow - 1.0
103                } else {
104                    f32::NAN // Negative base with non-integer exponent
105                }
106            };
107        }
108
109        // Handle y infinite
110        if y.is_infinite() {
111            return if x.abs() > 1.0 {
112                if y.is_sign_positive() {
113                    f32::INFINITY
114                } else {
115                    -1.0
116                }
117            } else if x.abs() < 1.0 {
118                if y.is_sign_positive() {
119                    -1.0
120                } else {
121                    f32::INFINITY
122                }
123            } else {
124                // |x| == 1
125                f32::NAN // 1^inf or -1^inf is undefined
126            };
127        }
128
129        // Handle zero base
130        if x == 0.0 {
131            return if y > 0.0 {
132                -1.0 // 0^positive -> 0, powm1 = -1
133            } else if y < 0.0 {
134                f32::INFINITY // 0^negative -> inf
135            } else {
136                0.0 // 0^0 -> conventionally 1, powm1 = 0
137            };
138        }
139    }
140
141    let y_integer = is_integerf(y);
142
143    let mut negative_parity: bool = false;
144
145    let mut x = x;
146
147    // Handle negative base with non-integer exponent
148    if x < 0.0 {
149        if !y_integer {
150            return f32::NAN; // x < 0 and non-integer y
151        }
152        x = x.abs();
153        if is_odd_integerf(y) {
154            negative_parity = true;
155        }
156    }
157
158    let xd = x as f64;
159    let yd = y as f64;
160    let tx = xd.to_bits();
161    let ty = yd.to_bits();
162
163    let l: f64 = powm1f_log2_fast(f64::from_bits(tx));
164
165    /* l approximates log2(1+x) with relative error < 2^-47.997,
166    and 2^-149 <= |l| < 128 */
167
168    let dt = l * f64::from_bits(ty);
169    let t: u64 = dt.to_bits();
170
171    // detect overflow/underflow
172    if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
173        // |t| >= 128
174        if t >= 0x3018bu64 << 46 {
175            // t <= -150
176            return -1.;
177        } else if (t >> 63) == 0 {
178            // t >= 128: overflow
179            return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
180        }
181    }
182
183    let res = powm1_exp2m1_fast(f64::from_bits(t));
184    // For x < 0 and integer y = n:
185    // if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
186    // if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
187    if negative_parity {
188        (-res - 2.) as f32
189    } else {
190        res as f32
191    }
192}
193
194#[inline]
195pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
196    let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150
197    let mut r = t - k; // |r| <= 1/2, exact
198    let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
199    // we add 2^-6 so that i is rounded to nearest
200    let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
201    r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
202    // now |r| <= 2^-6
203    // 2^t = 2^k * exp2_U[i][0] * 2^r
204    let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
205    let su = unsafe {
206        k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
207    };
208    s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
209    let q_poly = compoundf_expf_poly(r);
210    v = q_poly;
211
212    #[cfg(any(
213        all(
214            any(target_arch = "x86", target_arch = "x86_64"),
215            target_feature = "fma"
216        ),
217        target_arch = "aarch64"
218    ))]
219    {
220        v = f_fmla(v, s, s - 1f64);
221    }
222    #[cfg(not(any(
223        all(
224            any(target_arch = "x86", target_arch = "x86_64"),
225            target_feature = "fma"
226        ),
227        target_arch = "aarch64"
228    )))]
229    {
230        use crate::double_double::DoubleDouble;
231        let p0 = DoubleDouble::from_full_exact_add(s, -1.);
232        let z = DoubleDouble::from_exact_mult(v, s);
233        v = DoubleDouble::add(z, p0).to_f64();
234    }
235
236    v
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    #[test]
243    fn test_powm1f() {
244        assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
245        assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
246        assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
247        assert_eq!(f_powm1f(3., 2.), 8.);
248        assert_eq!(f_powm1f(3., 3.), 26.);
249        assert_eq!(f_powm1f(5., 2.), 24.);
250        assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
251        assert_eq!(f_powm1f(-5., 2.), 24.);
252        assert_eq!(f_powm1f(-5., 3.), -126.);
253        assert_eq!(
254            f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
255            1.455057e-38
256        );
257        assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
258        assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
259    }
260}
pxfm/compound/powm1f.rs

pxfm/compound/
powm1f.rs