pxfm/logs/
log2p1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::logs::LOG_RANGE_REDUCTION;
31use crate::polyeval::{f_estrin_polyeval7, f_polyeval6};
32
33// Generated in SageMath:
34// print("[")
35// for i in range(128):
36//     R = RealField(200)
37//     r = R(2)**(-8) * ( R(2)**8 * (R(1) - R(2)**(-8)) / (R(1) + R(i)*R(2)**-7) ).ceil()
38//
39//     if i == 0 or i == 127:
40//         print(double_to_hex(0), ",")
41//     else:
42//         print(double_to_hex(-r.log2()), ",")
43// print("];")
44static LOG2_D: [u64; 128] = [
45    0x0000000000000000,
46    0x3f872c7ba20f7327,
47    0x3f9743ee861f3556,
48    0x3fa184b8e4c56af8,
49    0x3fa77394c9d958d5,
50    0x3fad6ebd1f1febfe,
51    0x3fb1bb32a600549d,
52    0x3fb4c560fe68af88,
53    0x3fb7d60496cfbb4c,
54    0x3fb960caf9abb7ca,
55    0x3fbc7b528b70f1c5,
56    0x3fbf9c95dc1d1165,
57    0x3fc097e38ce60649,
58    0x3fc22dadc2ab3497,
59    0x3fc3c6fb650cde51,
60    0x3fc494f863b8df35,
61    0x3fc633a8bf437ce1,
62    0x3fc7046031c79f85,
63    0x3fc8a8980abfbd32,
64    0x3fc97c1cb13c7ec1,
65    0x3fcb2602497d5346,
66    0x3fcbfc67a7fff4cc,
67    0x3fcdac22d3e441d3,
68    0x3fce857d3d361368,
69    0x3fd01d9bbcfa61d4,
70    0x3fd08bce0d95fa38,
71    0x3fd169c05363f158,
72    0x3fd1d982c9d52708,
73    0x3fd249cd2b13cd6c,
74    0x3fd32bfee370ee68,
75    0x3fd39de8e1559f6f,
76    0x3fd4106017c3eca3,
77    0x3fd4f6fbb2cec598,
78    0x3fd56b22e6b578e5,
79    0x3fd5dfdcf1eeae0e,
80    0x3fd6552b49986277,
81    0x3fd6cb0f6865c8ea,
82    0x3fd7b89f02cf2aad,
83    0x3fd8304d90c11fd3,
84    0x3fd8a8980abfbd32,
85    0x3fd921800924dd3b,
86    0x3fd99b072a96c6b2,
87    0x3fda8ff971810a5e,
88    0x3fdb0b67f4f46810,
89    0x3fdb877c57b1b070,
90    0x3fdc043859e2fdb3,
91    0x3fdc819dc2d45fe4,
92    0x3fdcffae611ad12b,
93    0x3fdd7e6c0abc3579,
94    0x3fddfdd89d586e2b,
95    0x3fde7df5fe538ab3,
96    0x3fdefec61b011f85,
97    0x3fdf804ae8d0cd02,
98    0x3fe0014332be0033,
99    0x3fe042bd4b9a7c99,
100    0x3fe08494c66b8ef0,
101    0x3fe0c6caaf0c5597,
102    0x3fe1096015dee4da,
103    0x3fe14c560fe68af9,
104    0x3fe18fadb6e2d3c2,
105    0x3fe1d368296b5255,
106    0x3fe217868b0c37e8,
107    0x3fe25c0a0463beb0,
108    0x3fe2a0f3c340705c,
109    0x3fe2e644fac04fd8,
110    0x3fe2e644fac04fd8,
111    0x3fe32bfee370ee68,
112    0x3fe37222bb70747c,
113    0x3fe3b8b1c68fa6ed,
114    0x3fe3ffad4e74f1d6,
115    0x3fe44716a2c08262,
116    0x3fe44716a2c08262,
117    0x3fe48eef19317991,
118    0x3fe4d7380dcc422d,
119    0x3fe51ff2e30214bc,
120    0x3fe5692101d9b4a6,
121    0x3fe5b2c3da19723b,
122    0x3fe5b2c3da19723b,
123    0x3fe5fcdce2727ddb,
124    0x3fe6476d98ad990a,
125    0x3fe6927781d932a8,
126    0x3fe6927781d932a8,
127    0x3fe6ddfc2a78fc63,
128    0x3fe729fd26b707c8,
129    0x3fe7767c12967a45,
130    0x3fe7767c12967a45,
131    0x3fe7c37a9227e7fb,
132    0x3fe810fa51bf65fd,
133    0x3fe810fa51bf65fd,
134    0x3fe85efd062c656d,
135    0x3fe8ad846cf369a4,
136    0x3fe8ad846cf369a4,
137    0x3fe8fc924c89ac84,
138    0x3fe94c287492c4db,
139    0x3fe94c287492c4db,
140    0x3fe99c48be2063c8,
141    0x3fe9ecf50bf43f13,
142    0x3fe9ecf50bf43f13,
143    0x3fea3e2f4ac43f60,
144    0x3fea8ff971810a5e,
145    0x3fea8ff971810a5e,
146    0x3feae255819f022d,
147    0x3feb35458761d479,
148    0x3feb35458761d479,
149    0x3feb88cb9a2ab521,
150    0x3feb88cb9a2ab521,
151    0x3febdce9dcc96187,
152    0x3fec31a27dd00b4a,
153    0x3fec31a27dd00b4a,
154    0x3fec86f7b7ea4a89,
155    0x3fec86f7b7ea4a89,
156    0x3fecdcebd2373995,
157    0x3fed338120a6dd9d,
158    0x3fed338120a6dd9d,
159    0x3fed8aba045b01c8,
160    0x3fed8aba045b01c8,
161    0x3fede298ec0bac0d,
162    0x3fede298ec0bac0d,
163    0x3fee3b20546f554a,
164    0x3fee3b20546f554a,
165    0x3fee9452c8a71028,
166    0x3fee9452c8a71028,
167    0x3feeee32e2aeccbf,
168    0x3feeee32e2aeccbf,
169    0x3fef48c34bd1e96f,
170    0x3fef48c34bd1e96f,
171    0x3fefa406bd2443df,
172    0x0000000000000000,
173];
174
175#[inline]
176pub(crate) fn core_log2f(x: f64) -> f64 {
177    let x_u = x.to_bits();
178
179    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
180
181    let mut x_e: i32 = -(E_BIAS as i32);
182
183    // log2(x) = log2(2^x_e * x_m)
184    //         = x_e + log2(x_m)
185    // Range reduction for log2(x_m):
186    // For each x_m, we would like to find r such that:
187    //   -2^-8 <= r * x_m - 1 < 2^-7
188    let shifted = (x_u >> 45) as i32;
189    let index = shifted & 0x7F;
190    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
191
192    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
193    // all 1's.
194    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
195    let e_x = x_e as f64;
196
197    let log_r_dd = LOG2_D[index as usize];
198
199    // hi is exact
200    let hi = e_x + f64::from_bits(log_r_dd);
201
202    // Set m = 1.mantissa.
203    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
204    let m = f64::from_bits(x_m);
205
206    let u;
207    #[cfg(any(
208        all(
209            any(target_arch = "x86", target_arch = "x86_64"),
210            target_feature = "fma"
211        ),
212        target_arch = "aarch64"
213    ))]
214    {
215        u = f_fmla(r, m, -1.0); // exact
216    }
217    #[cfg(not(any(
218        all(
219            any(target_arch = "x86", target_arch = "x86_64"),
220            target_feature = "fma"
221        ),
222        target_arch = "aarch64"
223    )))]
224    {
225        use crate::logs::LOG_CD;
226        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
227        let c = f64::from_bits(c_m);
228        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
229    }
230
231    // Polynomial for log(1+x)/x generated in Sollya:
232    // d = [-2^-8, 2^-7];
233    // f_log2p1 = log2(1 + x)/x;
234    // Q = fpminimax(f_log2p1, 6, [|D...|], d);
235    // See ./notes/log10pf_core.sollya
236    let p = f_polyeval6(
237        u,
238        f64::from_bits(0x3ff71547652b82fe),
239        f64::from_bits(0xbfe71547652b82e7),
240        f64::from_bits(0x3fdec709dc3b1fd5),
241        f64::from_bits(0xbfd7154766124214),
242        f64::from_bits(0x3fd2776bd902599e),
243        f64::from_bits(0xbfcec586c6f55d08),
244    );
245    f_fmla(p, u, hi)
246}
247
248/// Computes log2(x+1)
249///
250/// Max ULP 0.5
251#[inline]
252pub fn f_log2p1f(x: f32) -> f32 {
253    let z = x as f64;
254    let ux = x.to_bits().wrapping_shl(1);
255    if ux >= 0xffu32 << 24 || ux == 0 {
256        // |x| == 0, |x| == inf, x == NaN
257        if ux == 0 {
258            return x;
259        }
260        if x.is_infinite() {
261            return if x.is_sign_positive() {
262                f32::INFINITY
263            } else {
264                f32::NAN
265            };
266        }
267        return x + f32::NAN;
268    }
269
270    let ax = x.to_bits() & 0x7fff_ffffu32;
271
272    // Use log2p1(x) = log10(1 + x) for |x| > 2^-6;
273    if ax > 0x3c80_0000u32 {
274        if x == -1. {
275            return f32::NEG_INFINITY;
276        }
277        let x1p = z + 1.;
278        if x1p <= 0. {
279            if x1p == 0. {
280                return f32::NEG_INFINITY;
281            }
282            return f32::NAN;
283        }
284        return core_log2f(x1p) as f32;
285    }
286
287    // log2p1 is expected to be used near zero:
288    // Polynomial generated by Sollya:
289    // d = [-2^-6; 2^-6];
290    // f_log2pf = log2(1+x)/x;
291    // Q = fpminimax(f_log2pf, 6, [|D...|], d);
292    const C: [u64; 7] = [
293        0x3ff71547652b82fe,
294        0xbfe71547652b8d18,
295        0x3fdec709dc3a501c,
296        0xbfd715475b117c95,
297        0x3fd2776c3fd833bd,
298        0xbfcec9905627faa6,
299        0x3fca64536a0ad148,
300    ];
301    let p = f_estrin_polyeval7(
302        z,
303        f64::from_bits(C[0]),
304        f64::from_bits(C[1]),
305        f64::from_bits(C[2]),
306        f64::from_bits(C[3]),
307        f64::from_bits(C[4]),
308        f64::from_bits(C[5]),
309        f64::from_bits(C[6]),
310    );
311    (p * z) as f32
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn test_log2p1f() {
320        assert_eq!(f_log2p1f(0.0), 0.0);
321        assert_eq!(f_log2p1f(1.0), 1.0);
322        assert_eq!(f_log2p1f(-0.0432432), -0.063775845);
323        assert_eq!(f_log2p1f(-0.009874634), -0.01431689);
324        assert_eq!(f_log2p1f(1.2443), 1.1662655);
325        assert_eq!(f_log2p1f(f32::INFINITY), f32::INFINITY);
326        assert!(f_log2p1f(f32::NEG_INFINITY).is_nan());
327        assert!(f_log2p1f(-1.0432432).is_nan());
328    }
329}