pxfm/logs/
log10p1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::logs::LOG_RANGE_REDUCTION;
31use crate::polyeval::{f_estrin_polyeval7, f_polyeval6};
32
33// Generated by SageMath:
34// print("[")
35// for i in range(128):
36//     R = RealField(200)
37//     r = R(2)**(-8) * ( R(2)**8 * (R(1) - R(2)**(-8)) / (R(1) + R(i)*R(2)**-7) ).ceil()
38//
39//     if i == 0 or i == 127:
40//         print(double_to_hex(0), ",")
41//     else:
42//         print(double_to_hex(-r.log10()), ",")
43// print("];")
44static LOG10_D: [u64; 128] = [
45    0x0000000000000000,
46    0x3f6be76bd77b4fc3,
47    0x3f7c03a80ae5e054,
48    0x3f851824c7587eb0,
49    0x3f8c3d0837784c41,
50    0x3f91b85d6044e9ae,
51    0x3f9559bd2406c3ba,
52    0x3f9902c31d62a843,
53    0x3f9cb38fccd8bfdb,
54    0x3f9e8eeb09f2f6cb,
55    0x3fa125d0432ea20e,
56    0x3fa30838cdc2fbfd,
57    0x3fa3faf7c663060e,
58    0x3fa5e3966b7e9295,
59    0x3fa7d070145f4fd7,
60    0x3fa8c878eeb05074,
61    0x3faabbcebd84fca0,
62    0x3fabb7209d1e24e5,
63    0x3fadb11ed766abf4,
64    0x3faeafd05035bd3b,
65    0x3fb0585283764178,
66    0x3fb0d966cc6500fa,
67    0x3fb1dd5460c8b16f,
68    0x3fb2603072a25f82,
69    0x3fb367ba3aaa1883,
70    0x3fb3ec6ad5407868,
71    0x3fb4f7aad9bbcbaf,
72    0x3fb57e3d47c3af7b,
73    0x3fb605735ee985f1,
74    0x3fb715d0ce367afc,
75    0x3fb79efb57b0f803,
76    0x3fb828cfed29a215,
77    0x3fb93e7de0fc3e80,
78    0x3fb9ca5aa1729f45,
79    0x3fba56e8325f5c87,
80    0x3fbae4285509950b,
81    0x3fbb721cd17157e3,
82    0x3fbc902a19e65111,
83    0x3fbd204698cb42bd,
84    0x3fbdb11ed766abf4,
85    0x3fbe42b4c16caaf3,
86    0x3fbed50a4a26eafc,
87    0x3fbffbfc2bbc7803,
88    0x3fc0484e4942aa43,
89    0x3fc093025a19976c,
90    0x3fc0de1b56356b04,
91    0x3fc1299a4fb3e306,
92    0x3fc175805d1587c1,
93    0x3fc1c1ce9955c0c6,
94    0x3fc20e8624038fed,
95    0x3fc25ba8215af7fc,
96    0x3fc2a935ba5f1479,
97    0x3fc2f7301cf4e87b,
98    0x3fc345987bfeea91,
99    0x3fc394700f7953fd,
100    0x3fc3e3b8149739d4,
101    0x3fc43371cde076c2,
102    0x3fc4839e83506c87,
103    0x3fc4d43f8275a483,
104    0x3fc525561e9256ee,
105    0x3fc576e3b0bde0a7,
106    0x3fc5c8e998072fe2,
107    0x3fc61b6939983048,
108    0x3fc66e6400da3f77,
109    0x3fc6c1db5f9bb336,
110    0x3fc6c1db5f9bb336,
111    0x3fc715d0ce367afc,
112    0x3fc76a45cbb7e6ff,
113    0x3fc7bf3bde099f30,
114    0x3fc814b4921bd52b,
115    0x3fc86ab17c10bc7f,
116    0x3fc86ab17c10bc7f,
117    0x3fc8c13437695532,
118    0x3fc9183e673394fa,
119    0x3fc96fd1b639fc09,
120    0x3fc9c7efd734a2f9,
121    0x3fca209a84fbcff8,
122    0x3fca209a84fbcff8,
123    0x3fca79d382bc21d9,
124    0x3fcad39c9c2c6080,
125    0x3fcb2df7a5c50299,
126    0x3fcb2df7a5c50299,
127    0x3fcb88e67cf97980,
128    0x3fcbe46b087354bc,
129    0x3fcc4087384f4f80,
130    0x3fcc4087384f4f80,
131    0x3fcc9d3d065c5b42,
132    0x3fccfa8e765cbb72,
133    0x3fccfa8e765cbb72,
134    0x3fcd587d96494759,
135    0x3fcdb70c7e96e7f3,
136    0x3fcdb70c7e96e7f3,
137    0x3fce163d527e68cf,
138    0x3fce76124046b3f3,
139    0x3fce76124046b3f3,
140    0x3fced68d819191fc,
141    0x3fcf37b15bab08d1,
142    0x3fcf37b15bab08d1,
143    0x3fcf99801fdb749d,
144    0x3fcffbfc2bbc7803,
145    0x3fcffbfc2bbc7803,
146    0x3fd02f93f4c87101,
147    0x3fd06182e84fd4ac,
148    0x3fd06182e84fd4ac,
149    0x3fd093cc32c90f84,
150    0x3fd093cc32c90f84,
151    0x3fd0c6711d6abd7a,
152    0x3fd0f972f87ff3d6,
153    0x3fd0f972f87ff3d6,
154    0x3fd12cd31b9c99ff,
155    0x3fd12cd31b9c99ff,
156    0x3fd16092e5d3a9a6,
157    0x3fd194b3bdef6b9e,
158    0x3fd194b3bdef6b9e,
159    0x3fd1c93712abc7ff,
160    0x3fd1c93712abc7ff,
161    0x3fd1fe1e5af2c141,
162    0x3fd1fe1e5af2c141,
163    0x3fd2336b161b3337,
164    0x3fd2336b161b3337,
165    0x3fd2691ecc29f042,
166    0x3fd2691ecc29f042,
167    0x3fd29f3b0e15584b,
168    0x3fd29f3b0e15584b,
169    0x3fd2d5c1760b86bb,
170    0x3fd2d5c1760b86bb,
171    0x3fd30cb3a7bb3625,
172    0x0000000000000000,
173];
174
175#[inline]
176pub(crate) fn core_log10f(x: f64) -> f64 {
177    let x_u = x.to_bits();
178
179    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
180
181    let mut x_e: i32 = -(E_BIAS as i32);
182
183    // log2(x) = log2(2^x_e * x_m)
184    //         = x_e + log2(x_m)
185    // Range reduction for log2(x_m):
186    // For each x_m, we would like to find r such that:
187    //   -2^-8 <= r * x_m - 1 < 2^-7
188    let shifted = (x_u >> 45) as i32;
189    let index = shifted & 0x7F;
190    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
191
192    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
193    // all 1's.
194    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
195    let e_x = x_e as f64;
196
197    const LOG_10_2_HI: f64 = f64::from_bits(0x3fd34413509f79ff);
198
199    let log_r_dd = LOG10_D[index as usize];
200
201    // hi is exact
202    let hi = f_fmla(e_x, LOG_10_2_HI, f64::from_bits(log_r_dd));
203
204    // Set m = 1.mantissa.
205    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
206    let m = f64::from_bits(x_m);
207
208    let u;
209    #[cfg(any(
210        all(
211            any(target_arch = "x86", target_arch = "x86_64"),
212            target_feature = "fma"
213        ),
214        target_arch = "aarch64"
215    ))]
216    {
217        u = f_fmla(r, m, -1.0); // exact
218    }
219    #[cfg(not(any(
220        all(
221            any(target_arch = "x86", target_arch = "x86_64"),
222            target_feature = "fma"
223        ),
224        target_arch = "aarch64"
225    )))]
226    {
227        use crate::logs::LOG_CD;
228        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
229        let c = f64::from_bits(c_m);
230        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
231    }
232
233    // Polynomial for log(1+x)/x generated in Sollya:
234    // d = [-2^-8, 2^-7];
235    // f_log10 = log10(1 + x)/x;
236    // Q = fpminimax(f_log10, 6, [|D...|], d);
237    // See ./notes/log10pf_core.sollya
238    let p = f_polyeval6(
239        u,
240        f64::from_bits(0x3fdbcb7b1526e50e),
241        f64::from_bits(0xbfcbcb7b1526e4e2),
242        f64::from_bits(0x3fc287a763707f60),
243        f64::from_bits(0xbfbbcb7b16f1858d),
244        f64::from_bits(0x3fb63c613ca2ee0f),
245        f64::from_bits(0xbfb28617a50029a9),
246    );
247    f_fmla(p, u, hi)
248}
249
250/// Computes log10(x+1)
251///
252/// Max ULP 0.5
253#[inline]
254pub fn f_log10p1f(x: f32) -> f32 {
255    let z = x as f64;
256    let ux = x.to_bits().wrapping_shl(1);
257    if ux >= 0xffu32 << 24 || ux == 0 {
258        // |x| == 0, |x| == inf, x == NaN
259        if ux == 0 {
260            return x;
261        }
262        if x.is_infinite() {
263            return if x.is_sign_positive() {
264                f32::INFINITY
265            } else {
266                f32::NAN
267            };
268        }
269        return x + f32::NAN;
270    }
271
272    let ax = x.to_bits() & 0x7fff_ffffu32;
273
274    // Use log10p1(x) = log10(1 + x) for |x| > 2^-6;
275    if ax > 0x3c80_0000u32 {
276        if x == -1. {
277            return f32::NEG_INFINITY;
278        }
279        let x1p = z + 1.;
280        if x1p <= 0. {
281            if x1p == 0. {
282                return f32::NEG_INFINITY;
283            }
284            return f32::NAN;
285        }
286        return core_log10f(x1p) as f32;
287    }
288
289    // log10p1 is expected to be used near zero:
290    // Polynomial generated by Sollya:
291    // d = [-2^-6; 2^-6];
292    // f_log10pf = log10(1+x)/x;
293    // Q = fpminimax(f_log10pf, 6, [|D...|], d);
294    const C: [u64; 7] = [
295        0x3fdbcb7b1526e50e,
296        0xbfcbcb7b1526f138,
297        0x3fc287a7636f8798,
298        0xbfbbcb7b08fcfcad,
299        0x3fb63c625d2472a4,
300        0xbfb2892c9b620de1,
301        0x3fafc7d3af2f4b6c,
302    ];
303    let p = f_estrin_polyeval7(
304        z,
305        f64::from_bits(C[0]),
306        f64::from_bits(C[1]),
307        f64::from_bits(C[2]),
308        f64::from_bits(C[3]),
309        f64::from_bits(C[4]),
310        f64::from_bits(C[5]),
311        f64::from_bits(C[6]),
312    );
313    (p * z) as f32
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    #[test]
321    fn test_log10p1f() {
322        assert_eq!(f_log10p1f(0.0), 0.0);
323        assert_eq!(f_log10p1f(1.0), 0.30103);
324        assert_eq!(f_log10p1f(-0.0432432), -0.019198442);
325        assert_eq!(f_log10p1f(-0.009874634), -0.0043098135);
326        assert_eq!(f_log10p1f(-0.000000054233), -2.3553092e-8);
327        assert_eq!(f_log10p1f(1.2443), 0.35108092);
328        assert_eq!(f_log10p1f(f32::INFINITY), f32::INFINITY);
329        assert!(f_log10p1f(f32::NEG_INFINITY).is_nan());
330        assert!(f_log10p1f(-1.0432432).is_nan());
331    }
332}