pxfm/exponents/exp10f.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, f_fmlaf};
30use crate::polyeval::f_polyeval7;
31use crate::round::RoundFinite;
32
33pub(crate) struct ExpBReduc {
34 pub(crate) hi: f64,
35 pub(crate) lo: f64,
36}
37
38const MID_BITS: u32 = 5;
39const MID_MASK: usize = (1 << MID_BITS) - 1;
40const LOG2_B: f64 = f64::from_bits(0x400a934f0979a371) * (1 << MID_BITS) as f64;
41const M_LOGB_2_HI: f64 = f64::from_bits(0xbfd34413509f8000) / (1 << MID_BITS) as f64;
42const M_LOGB_2_LO: f64 = f64::from_bits(0x3d380433b83b532a) / (1 << MID_BITS) as f64;
43const EXP_2_MID: [u64; 32] = [
44 0x3ff0000000000000,
45 0x3ff059b0d3158574,
46 0x3ff0b5586cf9890f,
47 0x3ff11301d0125b51,
48 0x3ff172b83c7d517b,
49 0x3ff1d4873168b9aa,
50 0x3ff2387a6e756238,
51 0x3ff29e9df51fdee1,
52 0x3ff306fe0a31b715,
53 0x3ff371a7373aa9cb,
54 0x3ff3dea64c123422,
55 0x3ff44e086061892d,
56 0x3ff4bfdad5362a27,
57 0x3ff5342b569d4f82,
58 0x3ff5ab07dd485429,
59 0x3ff6247eb03a5585,
60 0x3ff6a09e667f3bcd,
61 0x3ff71f75e8ec5f74,
62 0x3ff7a11473eb0187,
63 0x3ff82589994cce13,
64 0x3ff8ace5422aa0db,
65 0x3ff93737b0cdc5e5,
66 0x3ff9c49182a3f090,
67 0x3ffa5503b23e255d,
68 0x3ffae89f995ad3ad,
69 0x3ffb7f76f2fb5e47,
70 0x3ffc199bdd85529c,
71 0x3ffcb720dcef9069,
72 0x3ffd5818dcfba487,
73 0x3ffdfc97337b9b5f,
74 0x3ffea4afa2a490da,
75 0x3fff50765b6e4540,
76];
77
78// Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
79// > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
80// Then:
81// 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
82pub(crate) const EXP10F_COEFFS: [u64; 5] = [
83 0x40026bb1bbb55515,
84 0x40053524c73bd3ea,
85 0x4000470591dff149,
86 0x3ff2bd7c0a9fbc4d,
87 0x3fe1429e74a98f43,
88];
89
90/// Range reduction function equivalent to exp_b_range_reduc
91#[inline]
92pub(crate) fn exp_b_range_reduc(x: f32) -> ExpBReduc {
93 let xd = x as f64;
94
95 // kd = round(log2(b) * x)
96 let kd = (LOG2_B * xd).round_finite();
97 let k = unsafe { kd.to_int_unchecked::<i32>() }; // it's already not indeterminate.
98
99 // hi = floor(kd / 2^MID_BITS)
100 let exp_hi = (k.wrapping_shr(MID_BITS) as u64).wrapping_shl(52); // 52 = fraction bits in f64
101
102 // mh = 2^hi * 2^mid
103 let mid_index = (k as usize) & MID_MASK;
104 let mh_bits = EXP_2_MID[mid_index].wrapping_add(exp_hi);
105 let mh = f64::from_bits(mh_bits);
106
107 // dx = x - (hi + mid) * log(2)
108 let z0 = f_fmla(kd, M_LOGB_2_HI, xd);
109 let dx = f_fmla(kd, M_LOGB_2_LO, z0);
110
111 ExpBReduc { lo: dx, hi: mh }
112}
113
114/// Computes exp10
115///
116/// Max found ULP 0.49999508
117#[inline]
118pub fn f_exp10f(x: f32) -> f32 {
119 let x_u = x.to_bits();
120 let x_abs = x_u & 0x7fffffff;
121
122 // When |x| >= log10(2^128), or x is nan
123 if x_abs >= 0x421a209bu32 {
124 // When x < log10(2^-150) or nan
125 if x_u > 0xc2349e35u32 {
126 // exp(-Inf) = 0
127 if x.is_infinite() {
128 return 0.0;
129 }
130 // exp(nan) = nan
131 if x.is_nan() {
132 return x;
133 }
134 return 0.0;
135 }
136 // x >= log10(2^128) or nan
137 if x > 0. && (x_u >= 0x421a209bu32) {
138 // x is +inf or nan
139 return x + f32::INFINITY;
140 }
141 }
142
143 if x_abs <= 0x3d000000u32 {
144 // |x| < 1/32
145 if x_abs <= 0x3b9a209bu32 {
146 if x_u == 0xb25e5bd9u32 {
147 // x = -1.2943e-08
148 return 1.;
149 }
150 // |x| < 2^-25
151 // 10^x ~ 1 + log(10) * x
152 if x_abs <= 0x32800000u32 {
153 return f_fmlaf(x, f32::from_bits(0x40135da2), 1.0);
154 }
155 }
156
157 let xd = x as f64;
158
159 // Special polynomial for small x.
160 // Generated by Sollya:
161 // d = [-1/32, 1/32];
162 // f_exp10f = (10^y - 1)/y;
163 // Q = fpminimax(f_exp10f, 6, [|D...|], d, relative, floating);
164
165 // See ./notes/exp10f_small.sollya
166 let p = f_polyeval7(
167 xd,
168 f64::from_bits(0x40026bb1bbb55516),
169 f64::from_bits(0x40053524c73cfbf6),
170 f64::from_bits(0x4000470591de0b07),
171 f64::from_bits(0x3ff2bd760599f3a5),
172 f64::from_bits(0x3fe142a001511a6f),
173 f64::from_bits(0x3fca7feffa781d53),
174 f64::from_bits(0x3fb16e53492c0f0e),
175 );
176 return f_fmla(p, xd, 1.) as f32;
177 }
178
179 // Range reduction: 10^x = 2^(mid + hi) * 10^lo
180 // rr = (2^(mid + hi), lo)
181 let rr = exp_b_range_reduc(x);
182
183 // The low part is approximated by a degree-5 minimax polynomial.
184 // 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5
185 let lo2 = rr.lo * rr.lo;
186 // c0 = 1 + COEFFS[0] * lo
187 let c0 = f_fmla(rr.lo, f64::from_bits(EXP10F_COEFFS[0]), 1.0);
188 // c1 = COEFFS[1] + COEFFS[2] * lo
189 let c1 = f_fmla(
190 rr.lo,
191 f64::from_bits(EXP10F_COEFFS[2]),
192 f64::from_bits(EXP10F_COEFFS[1]),
193 );
194 // c2 = COEFFS[3] + COEFFS[4] * lo
195 let c2 = f_fmla(
196 rr.lo,
197 f64::from_bits(EXP10F_COEFFS[4]),
198 f64::from_bits(EXP10F_COEFFS[3]),
199 );
200 // p = c1 + c2 * lo^2
201 // = COEFFS[1] + COEFFS[2] * lo + COEFFS[3] * lo^2 + COEFFS[4] * lo^3
202 let p = f_fmla(lo2, c2, c1);
203 // 10^lo ~ c0 + p * lo^2
204 // 10^x = 2^(mid + hi) * 10^lo
205 // ~ mh * (c0 + p * lo^2)
206 // = (mh * c0) + p * (mh * lo^2)
207 f_fmla(p, lo2 * rr.hi, c0 * rr.hi) as f32
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213
214 #[test]
215 fn test_exp10f() {
216 assert_eq!(f_exp10f(-1. / 64.), 0.9646616);
217 assert_eq!(f_exp10f(1. / 64.), 1.0366329);
218 assert_eq!(f_exp10f(1.), 10.0);
219 assert_eq!(f_exp10f(2.), 100.0);
220 assert_eq!(f_exp10f(3.), 1000.0);
221 assert_eq!(f_exp10f(f32::INFINITY), f32::INFINITY);
222 assert_eq!(f_exp10f(f32::NEG_INFINITY), 0.);
223 assert!(f_exp10f(f32::NAN).is_nan());
224 }
225}