pxfm/logs/
log10.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, min_normal_f64};
30use crate::double_double::DoubleDouble;
31use crate::logs::log2::LOG_COEFFS;
32use crate::logs::log10dd::log10_dd;
33use crate::logs::log10td::log10_td;
34use crate::polyeval::f_polyeval4;
35
36pub(crate) static LOG_R_DD: [(u64, u64); 128] = [
37    (0x0000000000000000, 0x0000000000000000),
38    (0xbd10c76b999d2be8, 0x3f80101575890000),
39    (0xbd23dc5b06e2f7d2, 0x3f90205658938000),
40    (0xbd2aa0ba325a0c34, 0x3f98492528c90000),
41    (0x3d0111c05cf1d753, 0x3fa0415d89e74000),
42    (0xbd2c167375bdfd28, 0x3fa466aed42e0000),
43    (0xbd197995d05a267d, 0x3fa894aa149fc000),
44    (0xbd1a68f247d82807, 0x3faccb73cdddc000),
45    (0xbd17e5dd7009902c, 0x3fb08598b59e4000),
46    (0xbd25325d560d9e9b, 0x3fb1973bd1466000),
47    (0x3d2cc85ea5db4ed7, 0x3fb3bdf5a7d1e000),
48    (0xbd2c69063c5d1d1e, 0x3fb5e95a4d97a000),
49    (0x3cec1e8da99ded32, 0x3fb700d30aeac000),
50    (0x3d23115c3abd47da, 0x3fb9335e5d594000),
51    (0xbd1390802bf768e5, 0x3fbb6ac88dad6000),
52    (0x3d2646d1c65aacd3, 0x3fbc885801bc4000),
53    (0xbd2dc068afe645e0, 0x3fbec739830a2000),
54    (0xbd2534d64fa10afd, 0x3fbfe89139dbe000),
55    (0x3d21ef78ce2d07f2, 0x3fc1178e8227e000),
56    (0x3d2ca78e44389934, 0x3fc1aa2b7e23f000),
57    (0x3d039d6ccb81b4a1, 0x3fc2d1610c868000),
58    (0x3cc62fa8234b7289, 0x3fc365fcb0159000),
59    (0x3d25837954fdb678, 0x3fc4913d8333b000),
60    (0x3d2633e8e5697dc7, 0x3fc527e5e4a1b000),
61    (0x3d19cf8b2c3c2e78, 0x3fc6574ebe8c1000),
62    (0xbd25118de59c21e1, 0x3fc6f0128b757000),
63    (0x3d1e0ddb9a631e83, 0x3fc823c16551a000),
64    (0xbd073d54aae92cd1, 0x3fc8beafeb390000),
65    (0x3d07f22858a0ff6f, 0x3fc95a5adcf70000),
66    (0xbd28724350562169, 0x3fca93ed3c8ae000),
67    (0xbd0c358d4eace1aa, 0x3fcb31d8575bd000),
68    (0xbd2d4bc4595412b6, 0x3fcbd087383be000),
69    (0xbd084a7e75b6f6e4, 0x3fcd1037f2656000),
70    (0xbd2aff2af715b035, 0x3fcdb13db0d49000),
71    (0x3cc212276041f430, 0x3fce530effe71000),
72    (0xbcca211565bb8e11, 0x3fcef5ade4dd0000),
73    (0x3d1bcbecca0cdf30, 0x3fcf991c6cb3b000),
74    (0x3cf89cdb16ed4e91, 0x3fd07138604d5800),
75    (0x3d27188b163ceae9, 0x3fd0c42d67616000),
76    (0xbd2c210e63a5f01c, 0x3fd1178e8227e800),
77    (0x3d2b9acdf7a51681, 0x3fd16b5ccbacf800),
78    (0x3d2ca6ed5147bdb7, 0x3fd1bf99635a6800),
79    (0x3d2c93c1df5bb3b6, 0x3fd269621134d800),
80    (0x3d2a9cfa4a5004f4, 0x3fd2bef07cdc9000),
81    (0xbd28e27ad3213cb8, 0x3fd314f1e1d36000),
82    (0x3d116ecdb0f177c8, 0x3fd36b6776be1000),
83    (0x3d183b54b606bd5c, 0x3fd3c25277333000),
84    (0x3d08e436ec90e09d, 0x3fd419b423d5e800),
85    (0xbd2f27ce0967d675, 0x3fd4718dc271c800),
86    (0xbd2e20891b0ad8a4, 0x3fd4c9e09e173000),
87    (0x3d2ebe708164c759, 0x3fd522ae0738a000),
88    (0x3d1fadedee5d40ef, 0x3fd57bf753c8d000),
89    (0xbd0a0b2a08a465dc, 0x3fd5d5bddf596000),
90    (0xbd2db623e731ae00, 0x3fd630030b3ab000),
91    (0x3d20a0d32756eba0, 0x3fd68ac83e9c6800),
92    (0x3d1721657c222d87, 0x3fd6e60ee6af1800),
93    (0x3d2d8b0949dc60b3, 0x3fd741d876c67800),
94    (0x3d29ec7d2efd1778, 0x3fd79e26687cf800),
95    (0xbd272090c812566a, 0x3fd7fafa3bd81800),
96    (0x3d2fd56f3333778a, 0x3fd85855776dc800),
97    (0xbd205ae1e5e70470, 0x3fd8b639a88b3000),
98    (0xbd1766b52ee6307d, 0x3fd914a8635bf800),
99    (0xbd152313a502d9f0, 0x3fd973a343135800),
100    (0xbd26279e10d0c0b0, 0x3fd9d32bea15f000),
101    (0x3d23c6457f9d79f5, 0x3fda33440224f800),
102    (0x3d23c6457f9d79f5, 0x3fda33440224f800),
103    (0x3d1e36f2bea77a5d, 0x3fda93ed3c8ad800),
104    (0xbd217cc552774458, 0x3fdaf5295248d000),
105    (0x3d1095252d841995, 0x3fdb56fa04462800),
106    (0x3d27d85bf40a666d, 0x3fdbb9611b80e000),
107    (0x3d2cec807fe8e180, 0x3fdc1c60693fa000),
108    (0x3d2cec807fe8e180, 0x3fdc1c60693fa000),
109    (0xbd29b6ddc15249ae, 0x3fdc7ff9c7455800),
110    (0xbd0797c33ec7a6b0, 0x3fdce42f18064800),
111    (0x3d235bafe9a767a8, 0x3fdd490246def800),
112    (0xbd1ea42d60dc616a, 0x3fddae75484c9800),
113    (0xbd1326b207322938, 0x3fde148a1a272800),
114    (0xbd1326b207322938, 0x3fde148a1a272800),
115    (0xbd2465505372bd08, 0x3fde7b42c3ddb000),
116    (0x3d2f27f45a470251, 0x3fdee2a156b41000),
117    (0x3d12cde56f014a8b, 0x3fdf4aa7ee031800),
118    (0x3d12cde56f014a8b, 0x3fdf4aa7ee031800),
119    (0x3d0085fa3c164935, 0x3fdfb358af7a4800),
120    (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800),
121    (0xbd04c45fe79539e0, 0x3fe04360be760400),
122    (0xbd04c45fe79539e0, 0x3fe04360be760400),
123    (0x3d26812241edf5fd, 0x3fe078bf0533c400),
124    (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400),
125    (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400),
126    (0x3d1c299807801742, 0x3fe0e4898611cc00),
127    (0xbd258647bb9ddcb2, 0x3fe11af823c75c00),
128    (0xbd258647bb9ddcb2, 0x3fe11af823c75c00),
129    (0xbd2edd97a293ae49, 0x3fe151c3f6f29800),
130    (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00),
131    (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00),
132    (0x3cccacdeed70e667, 0x3fe1c07849ae6000),
133    (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800),
134    (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800),
135    (0x3d12fc066e48667b, 0x3fe230b0d8bebc00),
136    (0xbd0b61f105226250, 0x3fe269621134dc00),
137    (0xbd0b61f105226250, 0x3fe269621134dc00),
138    (0x3d206d2be797882d, 0x3fe2a2786d0ec000),
139    (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000),
140    (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000),
141    (0xbd274e93c5a0ed9c, 0x3fe315da44340800),
142    (0xbd274e93c5a0ed9c, 0x3fe315da44340800),
143    (0x3d10b83f9527e6ac, 0x3fe35028ad9d8c00),
144    (0xbd218b7abb5569a4, 0x3fe38ae217197800),
145    (0xbd218b7abb5569a4, 0x3fe38ae217197800),
146    (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000),
147    (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000),
148    (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00),
149    (0xbcfd984f481051f7, 0x3fe43d9ff2f92400),
150    (0xbcfd984f481051f7, 0x3fe43d9ff2f92400),
151    (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400),
152    (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400),
153    (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00),
154    (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00),
155    (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000),
156    (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000),
157    (0x3d05ccc45d257531, 0x3fe5322e26867800),
158    (0x3d05ccc45d257531, 0x3fe5322e26867800),
159    (0x3d09980bff3303dd, 0x3fe5707a26bb8c00),
160    (0x3d09980bff3303dd, 0x3fe5707a26bb8c00),
161    (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800),
162    (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800),
163    (0x3d2202380cda46be, 0x3fe5ee82aa241800),
164    (0x0000000000000000, 0x0000000000000000),
165];
166
167/// Logarithm of base 10
168///
169/// Max found ULP 0.5
170pub fn f_log10(x: f64) -> f64 {
171    let mut x_u = x.to_bits();
172
173    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
174
175    let mut x_e: i64 = -(E_BIAS as i64);
176
177    const MAX_NORMAL: u64 = f64::to_bits(f64::MAX);
178
179    if x_u == 1f64.to_bits() {
180        // log2(1.0) = +0.0
181        return 0.0;
182    }
183    if x_u < min_normal_f64().to_bits() || x_u > MAX_NORMAL {
184        if x == 0.0 {
185            return f64::NEG_INFINITY;
186        }
187        if x < 0. || x.is_nan() {
188            return f64::NAN;
189        }
190        if x.is_infinite() || x.is_nan() {
191            return x + x;
192        }
193        // Normalize denormal inputs.
194        x_u = (x * f64::from_bits(0x4330000000000000)).to_bits();
195        x_e -= 52;
196    }
197
198    // log2(x) = log2(2^x_e * x_m)
199    //         = x_e + log2(x_m)
200    // Range reduction for log2(x_m):
201    // For each x_m, we would like to find r such that:
202    //   -2^-8 <= r * x_m - 1 < 2^-7
203    let shifted = (x_u >> 45) as i64;
204    let index = shifted & 0x7F;
205    let r = f64::from_bits(crate::logs::log2::LOG_RANGE_REDUCTION[index as usize]);
206
207    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
208    // all 1's.
209    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i64);
210    let e_x = x_e as f64;
211
212    const LOG_2_HI: f64 = f64::from_bits(0x3fe62e42fefa3800);
213    const LOG_2_LO: f64 = f64::from_bits(0x3d2ef35793c76730);
214
215    let log_r_dd = LOG_R_DD[index as usize];
216
217    // hi is exact
218    let hi = f_fmla(e_x, LOG_2_HI, f64::from_bits(log_r_dd.1));
219    // lo errors ~ e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo) + rounding err
220    //           <= 2 * (e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo))
221    let lo = f_fmla(e_x, LOG_2_LO, f64::from_bits(log_r_dd.0));
222
223    // Set m = 1.mantissa.
224    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
225    let m = f64::from_bits(x_m);
226
227    let u;
228    #[cfg(any(
229        all(
230            any(target_arch = "x86", target_arch = "x86_64"),
231            target_feature = "fma"
232        ),
233        all(target_arch = "aarch64", target_feature = "neon")
234    ))]
235    {
236        u = f_fmla(r, m, -1.0); // exact   
237    }
238    #[cfg(not(any(
239        all(
240            any(target_arch = "x86", target_arch = "x86_64"),
241            target_feature = "fma"
242        ),
243        all(target_arch = "aarch64", target_feature = "neon")
244    )))]
245    {
246        use crate::logs::log2::LOG_CD;
247        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
248        let c = f64::from_bits(c_m);
249        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
250    }
251
252    let u_sq = u * u;
253    // Degree-7 minimax polynomial
254    let p0 = f_fmla(
255        u,
256        f64::from_bits(LOG_COEFFS[1]),
257        f64::from_bits(LOG_COEFFS[0]),
258    );
259    let p1 = f_fmla(
260        u,
261        f64::from_bits(LOG_COEFFS[3]),
262        f64::from_bits(LOG_COEFFS[2]),
263    );
264    let p2 = f_fmla(
265        u,
266        f64::from_bits(LOG_COEFFS[5]),
267        f64::from_bits(LOG_COEFFS[4]),
268    );
269    let p = f_polyeval4(u_sq, lo, p0, p1, p2);
270
271    // Exact sum:
272    //   r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
273    let mut r1 = DoubleDouble::from_exact_add(hi, u);
274    r1.lo += p;
275
276    // Quick double-double multiplication:
277    //   r2.hi + r2.lo ~ r1 * log10(e),
278    // with error bounded by:
279    //   4*ulp( ulp(r2.hi) )
280    const LOG10_E: DoubleDouble = DoubleDouble::new(
281        f64::from_bits(0x3c695355baaafad3),
282        f64::from_bits(0x3fdbcb7b1526e50e),
283    );
284    let r2 = DoubleDouble::quick_mult(r1, LOG10_E);
285
286    const HI_ERR: f64 = f64::from_bits(0x3aa0000000000000);
287
288    // Extra errors from P is from using x^2 to reduce evaluation latency.
289    const P_ERR: f64 = f64::from_bits(0x3cc0000000000000);
290
291    // Technicallly error of r1.lo is bounded by:
292    //    |hi|*ulp(log(2)_lo) + C*ulp(u^2)
293    // To simplify the error computation a bit, we replace |hi|*ulp(log(2)_lo)
294    // with the upper bound: 2^11 * ulp(log(2)_lo) = 2^-85.
295    // Total error is bounded by ~ C * ulp(u^2) + 2^-85.
296    let err = f_fmla(u_sq, P_ERR, HI_ERR);
297
298    // Lower bound from the result
299    let left = r2.hi + (r2.lo - err);
300    // Upper bound from the result
301    let right = r2.hi + (r2.lo + err);
302
303    // Ziv's test if fast pass is accurate enough.
304    if left == right {
305        return left;
306    }
307
308    log10_dd_accurate(x)
309}
310
311#[cold]
312#[inline(never)]
313fn log10_dd_accurate(x: f64) -> f64 {
314    let r = log10_dd(x);
315    let err = f_fmla(
316        r.hi,
317        f64::from_bits(0x3b50000000000000), // 2^-74
318        f64::from_bits(0x3990000000000000), // 2^-102
319    );
320    let ub = r.hi + (r.lo + err);
321    let lb = r.hi + (r.lo - err);
322    if ub == lb {
323        return r.to_f64();
324    }
325    log10_dd_accurate_slow(x)
326}
327
328#[cold]
329#[inline(never)]
330fn log10_dd_accurate_slow(x: f64) -> f64 {
331    log10_td(x).to_f64()
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn test_log10d() {
340        assert_eq!(f_log10(0.35), -0.4559319556497244);
341        assert_eq!(f_log10(0.9), -0.045757490560675115);
342        assert_eq!(f_log10(10.), 1.);
343        assert_eq!(f_log10(0.), f64::NEG_INFINITY);
344        assert!(f_log10(-1.).is_nan());
345        assert!(f_log10(f64::NAN).is_nan());
346        assert!(f_log10(f64::NEG_INFINITY).is_nan());
347        assert_eq!(f_log10(f64::INFINITY), f64::INFINITY);
348    }
349}