pxfm/exponents/
expm1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::polyeval::{f_polyeval5, f_polyeval7};
31use crate::rounding::CpuRound;
32
33/// Computes e^x - 1
34///
35/// Max ULP 0.5
36#[inline]
37pub fn f_expm1f(x: f32) -> f32 {
38    let x_u: u32 = x.to_bits();
39    let x_abs = x_u & 0x7fff_ffffu32;
40
41    // When |x| > 25*log(2), or nan
42    if x_abs >= 0x418a_a123u32 {
43        // x < log(2^-25)
44        if x.is_sign_negative() {
45            // exp(-Inf) = 0
46            if x.is_infinite() {
47                return -1.0;
48            }
49            // exp(nan) = nan
50            if x.is_nan() {
51                return x;
52            }
53            return -1.0;
54        } else {
55            // x >= 89 or nan
56            if x_u >= 0x42b2_0000 {
57                return x + f32::INFINITY;
58            }
59        }
60    }
61
62    // |x| < 2^-4
63    if x_abs < 0x3d80_0000u32 {
64        // |x| < 2^-25
65        if x_abs < 0x3300_0000u32 {
66            // x = -0.0f
67            if x_u == 0x8000_0000u32 {
68                return x;
69            }
70            // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
71            // is:
72            //   |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
73            //                               = |x|
74            //                               < 2^-25
75            //                               < epsilon(1)/2.
76            // To simplify the rounding decision and make it more efficient, we use
77            //   fma(x, x, x) ~ x + x^2 instead.
78            // Note: to use the formula x + x^2 to decide the correct rounding, we
79            // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
80            // 2^-76. For targets without FMA instructions, we simply use double for
81            // intermediate results as it is more efficient than using an emulated
82            // version of FMA.
83            #[cfg(any(
84                all(
85                    any(target_arch = "x86", target_arch = "x86_64"),
86                    target_feature = "fma"
87                ),
88                target_arch = "aarch64"
89            ))]
90            {
91                use crate::common::f_fmlaf;
92                return f_fmlaf(x, x, x);
93            }
94            #[cfg(not(any(
95                all(
96                    any(target_arch = "x86", target_arch = "x86_64"),
97                    target_feature = "fma"
98                ),
99                target_arch = "aarch64"
100            )))]
101            {
102                let xd = x as f64;
103                return f_fmla(xd, xd, xd) as f32;
104            }
105        }
106
107        const C: [u64; 7] = [
108            0x3fe0000000000000,
109            0x3fc55555555557dd,
110            0x3fa55555555552fa,
111            0x3f8111110fcd58b7,
112            0x3f56c16c1717660b,
113            0x3f2a0241f0006d62,
114            0x3efa01e3f8d3c060,
115        ];
116
117        // 2^-25 <= |x| < 2^-4
118        let xd = x as f64;
119        let xsq = xd * xd;
120        // Degree-8 minimax polynomial generated by Sollya with:
121        // > display = hexadecimal;
122        // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
123
124        return f_fmla(
125            f_polyeval7(
126                xd,
127                f64::from_bits(C[0]),
128                f64::from_bits(C[1]),
129                f64::from_bits(C[2]),
130                f64::from_bits(C[3]),
131                f64::from_bits(C[4]),
132                f64::from_bits(C[5]),
133                f64::from_bits(C[6]),
134            ),
135            xsq,
136            xd,
137        ) as f32;
138    }
139
140    // For -104 < x < 89, to compute expm1(x), we perform the following range
141    // reduction: find hi, mid, lo such that:
142    //   x = hi + mid + lo, in which
143    //     hi is an integer,
144    //     mid * 2^7 is an integer
145    //     -2^(-8) <= lo < 2^-8.
146    // In particular,
147    //   hi + mid = round(x * 2^7) * 2^(-7).
148    // Then,
149    //   expm1(x) = expm1(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) - 1.
150    // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
151    // respectively.  exp(lo) is computed using a degree-4 minimax polynomial
152    // generated by Sollya.
153
154    // x_hi = (hi + mid) * 2^7 = round(x * 2^7).
155    let kf = (x * 128.).cpu_round();
156    // Subtract (hi + mid) from x to get lo.
157    let xd = f_fmlaf(kf, -0.0078125 /* - 1/128 */, x) as f64;
158    let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
159    x_hi += 104 << 7;
160    // hi = x_hi >> 7
161    let exp_hi = f64::from_bits(crate::exponents::expf::EXP_M1[(x_hi >> 7) as usize]);
162    // mid * 2^7 = x_hi & 0x0000'007fU;
163    let exp_mid = f64::from_bits(crate::exponents::expf::EXP_M2[(x_hi & 0x7f) as usize]);
164    // Degree-4 minimax polynomial generated by Sollya with the following
165    // commands:
166    // d = [-2^-8, 2^-8];
167    // f_exp = expm1(x)/x;
168    // Q = fpminimax(f_exp, 3, [|D...|], [-2^-8, 2^-8]);
169    let p = f_polyeval5(
170        xd,
171        1.,
172        f64::from_bits(0x3feffffffffff777),
173        f64::from_bits(0x3fe000000000071c),
174        f64::from_bits(0x3fc555566668e5e7),
175        f64::from_bits(0x3fa55555555ef243),
176    );
177    f_fmla(p * exp_hi, exp_mid, -1.) as f32
178}
179
180#[cfg(test)]
181mod tests {
182    use crate::f_expm1f;
183
184    #[test]
185    fn test_expm1f() {
186        assert_eq!(f_expm1f(-3.0923562e-5), -3.0923085e-5);
187        assert_eq!(f_expm1f(2.213121), 8.144211);
188        assert_eq!(f_expm1f(-3.213121), -0.9597691);
189        assert_eq!(f_expm1f(-2.35099e-38), -2.35099e-38);
190        assert_eq!(
191            f_expm1f(0.00000000000000000000000000000000000004355616),
192            0.00000000000000000000000000000000000004355616
193        );
194        assert_eq!(f_expm1f(25.12315), 81441420000.0);
195        assert_eq!(f_expm1f(12.986543), 436498.6);
196        assert_eq!(f_expm1f(-12.986543), -0.99999774);
197        assert_eq!(f_expm1f(-25.12315), -1.0);
198        assert_eq!(f_expm1f(f32::INFINITY), f32::INFINITY);
199        assert_eq!(f_expm1f(f32::NEG_INFINITY), -1.);
200        assert!(f_expm1f(f32::NAN).is_nan());
201    }
202}
pxfm/exponents/expm1f.rs

pxfm/exponents/
expm1f.rs