pxfm/exponents/expm1f.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::polyeval::{f_polyeval5, f_polyeval7};
31use crate::round::RoundFinite;
32
33/// Computes e^x - 1
34///
35/// Max ULP 0.5
36#[inline]
37pub fn f_expm1f(x: f32) -> f32 {
38 let x_u: u32 = x.to_bits();
39 let x_abs = x_u & 0x7fff_ffffu32;
40
41 // When |x| > 25*log(2), or nan
42 if x_abs >= 0x418a_a123u32 {
43 // x < log(2^-25)
44 if x.is_sign_negative() {
45 // exp(-Inf) = 0
46 if x.is_infinite() {
47 return -1.0;
48 }
49 // exp(nan) = nan
50 if x.is_nan() {
51 return x;
52 }
53 return -1.0;
54 } else {
55 // x >= 89 or nan
56 if x_u >= 0x42b2_0000 {
57 return x + f32::INFINITY;
58 }
59 }
60 }
61
62 // |x| < 2^-4
63 if x_abs < 0x3d80_0000u32 {
64 // |x| < 2^-25
65 if x_abs < 0x3300_0000u32 {
66 // x = -0.0f
67 if x_u == 0x8000_0000u32 {
68 return x;
69 }
70 // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
71 // is:
72 // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
73 // = |x|
74 // < 2^-25
75 // < epsilon(1)/2.
76 // To simplify the rounding decision and make it more efficient, we use
77 // fma(x, x, x) ~ x + x^2 instead.
78 // Note: to use the formula x + x^2 to decide the correct rounding, we
79 // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
80 // 2^-76. For targets without FMA instructions, we simply use double for
81 // intermediate results as it is more efficient than using an emulated
82 // version of FMA.
83 #[cfg(any(
84 all(
85 any(target_arch = "x86", target_arch = "x86_64"),
86 target_feature = "fma"
87 ),
88 all(target_arch = "aarch64", target_feature = "neon")
89 ))]
90 {
91 use crate::common::f_fmlaf;
92 return f_fmlaf(x, x, x);
93 }
94 #[cfg(not(any(
95 all(
96 any(target_arch = "x86", target_arch = "x86_64"),
97 target_feature = "fma"
98 ),
99 all(target_arch = "aarch64", target_feature = "neon")
100 )))]
101 {
102 let xd = x as f64;
103 return f_fmla(xd, xd, xd) as f32;
104 }
105 }
106
107 const C: [u64; 7] = [
108 0x3fe0000000000000,
109 0x3fc55555555557dd,
110 0x3fa55555555552fa,
111 0x3f8111110fcd58b7,
112 0x3f56c16c1717660b,
113 0x3f2a0241f0006d62,
114 0x3efa01e3f8d3c060,
115 ];
116
117 // 2^-25 <= |x| < 2^-4
118 let xd = x as f64;
119 let xsq = xd * xd;
120 // Degree-8 minimax polynomial generated by Sollya with:
121 // > display = hexadecimal;
122 // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
123
124 return f_fmla(
125 f_polyeval7(
126 xd,
127 f64::from_bits(C[0]),
128 f64::from_bits(C[1]),
129 f64::from_bits(C[2]),
130 f64::from_bits(C[3]),
131 f64::from_bits(C[4]),
132 f64::from_bits(C[5]),
133 f64::from_bits(C[6]),
134 ),
135 xsq,
136 xd,
137 ) as f32;
138 }
139
140 // For -104 < x < 89, to compute expm1(x), we perform the following range
141 // reduction: find hi, mid, lo such that:
142 // x = hi + mid + lo, in which
143 // hi is an integer,
144 // mid * 2^7 is an integer
145 // -2^(-8) <= lo < 2^-8.
146 // In particular,
147 // hi + mid = round(x * 2^7) * 2^(-7).
148 // Then,
149 // expm1(x) = expm1(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) - 1.
150 // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
151 // respectively. exp(lo) is computed using a degree-4 minimax polynomial
152 // generated by Sollya.
153
154 // x_hi = (hi + mid) * 2^7 = round(x * 2^7).
155 let kf = (x * 128.).round_finite();
156 // Subtract (hi + mid) from x to get lo.
157 let xd = f_fmlaf(kf, -0.0078125 /* - 1/128 */, x) as f64;
158 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
159 x_hi += 104 << 7;
160 // hi = x_hi >> 7
161 let exp_hi = f64::from_bits(crate::exponents::expf::EXP_M1[(x_hi >> 7) as usize]);
162 // mid * 2^7 = x_hi & 0x0000'007fU;
163 let exp_mid = f64::from_bits(crate::exponents::expf::EXP_M2[(x_hi & 0x7f) as usize]);
164 // Degree-4 minimax polynomial generated by Sollya with the following
165 // commands:
166 // d = [-2^-8, 2^-8];
167 // f_exp = expm1(x)/x;
168 // Q = fpminimax(f_exp, 3, [|D...|], [-2^-8, 2^-8]);
169 let p = f_polyeval5(
170 xd,
171 1.,
172 f64::from_bits(0x3feffffffffff777),
173 f64::from_bits(0x3fe000000000071c),
174 f64::from_bits(0x3fc555566668e5e7),
175 f64::from_bits(0x3fa55555555ef243),
176 );
177 f_fmla(p * exp_hi, exp_mid, -1.) as f32
178}
179
180#[cfg(test)]
181mod tests {
182 use crate::f_expm1f;
183
184 #[test]
185 fn test_expm1f() {
186 assert_eq!(f_expm1f(-3.0923562e-5), -3.0923085e-5);
187 assert_eq!(f_expm1f(2.213121), 8.144211);
188 assert_eq!(f_expm1f(-3.213121), -0.9597691);
189 assert_eq!(f_expm1f(-2.35099e-38), -2.35099e-38);
190 assert_eq!(
191 f_expm1f(0.00000000000000000000000000000000000004355616),
192 0.00000000000000000000000000000000000004355616
193 );
194 assert_eq!(f_expm1f(25.12315), 81441420000.0);
195 assert_eq!(f_expm1f(12.986543), 436498.6);
196 assert_eq!(f_expm1f(-12.986543), -0.99999774);
197 assert_eq!(f_expm1f(-25.12315), -1.0);
198 assert_eq!(f_expm1f(f32::INFINITY), f32::INFINITY);
199 assert_eq!(f_expm1f(f32::NEG_INFINITY), -1.);
200 assert!(f_expm1f(f32::NAN).is_nan());
201 }
202}