pxfm/compound/powm1f.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::compound::compound_m1f::compoundf_expf_poly;
31use crate::compound::compoundf::{
32 COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
33};
34use crate::round_ties_even::RoundTiesEven;
35use std::hint::black_box;
36
37#[inline]
38fn powm1f_log2_fast(x: f64) -> f64 {
39 /* for x > 0, 1+x is exact when 2^-29 <= x < 2^53
40 for x < 0, 1+x is exact when -1 < x <= 2^-30 */
41
42 // double u = (x >= 0x1p53) ? x : 1.0 + x;
43 /* For x < 0x1p53, x + 1 is exact thus u = x+1.
44 For x >= 2^53, we estimate log2(x) instead of log2(1+x),
45 since log2(1+x) = log2(x) + log2(1+1/x),
46 log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
47 error is bounded by 2^-52.471/53 < 2^-58.198 */
48
49 let mut v = x.to_bits();
50 let m: u64 = v & 0xfffffffffffffu64;
51 let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
52 // 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
53 v = v.wrapping_sub((e << 52) as u64);
54 let t = f64::from_bits(v);
55 // u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
56 // thus log2(u) = e + log2(t)
57 v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
58 let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
59 let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
60 let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
61 // we approximates log2(t) by -log2(r) + log2(r*t)
62 let p = crate::compound::compoundf::log2p1_polyeval_1(z);
63 // p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
64 e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
65}
66
67/// Computes x^y - 1
68pub fn f_powm1f(x: f32, y: f32) -> f32 {
69 let ax: u32 = x.to_bits().wrapping_shl(1);
70 let ay: u32 = y.to_bits().wrapping_shl(1);
71
72 // filter out exceptional cases
73 if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
74 if x.is_nan() || y.is_nan() {
75 return f32::NAN;
76 }
77
78 // Handle infinities
79 if x.is_infinite() {
80 return if x.is_sign_positive() {
81 if y.is_infinite() {
82 return f32::INFINITY;
83 } else if y > 0.0 {
84 f32::INFINITY // inf^positive -> inf
85 } else if y < 0.0 {
86 -1.0 // inf^negative -> 0, so powm1 = -1
87 } else {
88 f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
89 }
90 } else {
91 // x = -inf
92 if y.is_infinite() {
93 return -1.0;
94 }
95 if is_integerf(y) {
96 // Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
97 let pow = if y as i32 % 2 == 0 {
98 f32::INFINITY
99 } else {
100 f32::NEG_INFINITY
101 };
102 pow - 1.0
103 } else {
104 f32::NAN // Negative base with non-integer exponent
105 }
106 };
107 }
108
109 // Handle y infinite
110 if y.is_infinite() {
111 return if x.abs() > 1.0 {
112 if y.is_sign_positive() {
113 f32::INFINITY
114 } else {
115 -1.0
116 }
117 } else if x.abs() < 1.0 {
118 if y.is_sign_positive() {
119 -1.0
120 } else {
121 f32::INFINITY
122 }
123 } else {
124 // |x| == 1
125 f32::NAN // 1^inf or -1^inf is undefined
126 };
127 }
128
129 // Handle zero base
130 if x == 0.0 {
131 return if y > 0.0 {
132 -1.0 // 0^positive -> 0, powm1 = -1
133 } else if y < 0.0 {
134 f32::INFINITY // 0^negative -> inf
135 } else {
136 0.0 // 0^0 -> conventionally 1, powm1 = 0
137 };
138 }
139 }
140
141 let y_integer = is_integerf(y);
142
143 let mut negative_parity: bool = false;
144
145 let mut x = x;
146
147 // Handle negative base with non-integer exponent
148 if x < 0.0 {
149 if !y_integer {
150 return f32::NAN; // x < 0 and non-integer y
151 }
152 x = x.abs();
153 if is_odd_integerf(y) {
154 negative_parity = true;
155 }
156 }
157
158 let xd = x as f64;
159 let yd = y as f64;
160 let tx = xd.to_bits();
161 let ty = yd.to_bits();
162
163 let l: f64 = powm1f_log2_fast(f64::from_bits(tx));
164
165 /* l approximates log2(1+x) with relative error < 2^-47.997,
166 and 2^-149 <= |l| < 128 */
167
168 let dt = l * f64::from_bits(ty);
169 let t: u64 = dt.to_bits();
170
171 // detect overflow/underflow
172 if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
173 // |t| >= 128
174 if t >= 0x3018bu64 << 46 {
175 // t <= -150
176 return -1.;
177 } else if (t >> 63) == 0 {
178 // t >= 128: overflow
179 return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
180 }
181 }
182
183 let res = powm1_exp2m1_fast(f64::from_bits(t));
184 // For x < 0 and integer y = n:
185 // if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
186 // if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
187 if negative_parity {
188 (-res - 2.) as f32
189 } else {
190 res as f32
191 }
192}
193
194#[inline]
195pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
196 let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
197 let mut r = t - k; // |r| <= 1/2, exact
198 let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
199 // we add 2^-6 so that i is rounded to nearest
200 let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
201 r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
202 // now |r| <= 2^-6
203 // 2^t = 2^k * exp2_U[i][0] * 2^r
204 let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
205 let su = unsafe {
206 k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
207 };
208 s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
209 let q_poly = compoundf_expf_poly(r);
210 v = q_poly;
211
212 #[cfg(any(
213 all(
214 any(target_arch = "x86", target_arch = "x86_64"),
215 target_feature = "fma"
216 ),
217 all(target_arch = "aarch64", target_feature = "neon")
218 ))]
219 {
220 v = f_fmla(v, s, s - 1f64);
221 }
222 #[cfg(not(any(
223 all(
224 any(target_arch = "x86", target_arch = "x86_64"),
225 target_feature = "fma"
226 ),
227 all(target_arch = "aarch64", target_feature = "neon")
228 )))]
229 {
230 use crate::double_double::DoubleDouble;
231 let p0 = DoubleDouble::from_full_exact_add(s, -1.);
232 let z = DoubleDouble::from_exact_mult(v, s);
233 v = DoubleDouble::add(z, p0).to_f64();
234 }
235
236 v
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242 #[test]
243 fn test_powm1f() {
244 assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
245 assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
246 assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
247 assert_eq!(f_powm1f(3., 2.), 8.);
248 assert_eq!(f_powm1f(3., 3.), 26.);
249 assert_eq!(f_powm1f(5., 2.), 24.);
250 assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
251 assert_eq!(f_powm1f(-5., 2.), 24.);
252 assert_eq!(f_powm1f(-5., 3.), -126.);
253 assert_eq!(
254 f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
255 1.455057e-38
256 );
257 assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
258 assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
259 }
260}