pxfm/logs/
log1pf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::logs::{LOG_R_DD, LOG_RANGE_REDUCTION};
31use crate::polyeval::{f_estrin_polyeval8, f_polyeval6};
32
33#[cold]
34pub(crate) fn special_logf(x: f32) -> f32 {
35    let t = x.to_bits();
36    if t == 0xbf800000u32 {
37        // +0.0
38        return f32::NEG_INFINITY;
39    }
40    if t == 0x7f800000u32 {
41        return x;
42    } // +inf
43    let ax: u32 = t.wrapping_shl(1);
44    if ax > 0xff000000u32 {
45        return x + x;
46    } // nan
47    f32::NAN
48}
49
50#[inline]
51pub(crate) fn core_logf(x: f64) -> f64 {
52    let x_u = x.to_bits();
53
54    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
55
56    let mut x_e: i32 = -(E_BIAS as i32);
57
58    // log2(x) = log2(2^x_e * x_m)
59    //         = x_e + log2(x_m)
60    // Range reduction for log2(x_m):
61    // For each x_m, we would like to find r such that:
62    //   -2^-8 <= r * x_m - 1 < 2^-7
63    let shifted = (x_u >> 45) as i32;
64    let index = shifted & 0x7F;
65    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
66
67    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
68    // all 1's.
69    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
70    let e_x = x_e as f64;
71
72    const LOG_2_HI: f64 = f64::from_bits(0x3fe62e42fefa3800);
73    const LOG_2_LO: f64 = f64::from_bits(0x3d2ef35793c76730);
74
75    let log_r_dd = LOG_R_DD[index as usize];
76
77    // hi is exact
78    let hi = f_fmla(e_x, LOG_2_HI, f64::from_bits(log_r_dd.1));
79    let lo = f_fmla(e_x, LOG_2_LO, f64::from_bits(log_r_dd.0));
80
81    // Set m = 1.mantissa.
82    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
83    let m = f64::from_bits(x_m);
84
85    let u;
86    #[cfg(any(
87        all(
88            any(target_arch = "x86", target_arch = "x86_64"),
89            target_feature = "fma"
90        ),
91        all(target_arch = "aarch64", target_feature = "neon")
92    ))]
93    {
94        u = f_fmla(r, m, -1.0); // exact
95    }
96    #[cfg(not(any(
97        all(
98            any(target_arch = "x86", target_arch = "x86_64"),
99            target_feature = "fma"
100        ),
101        all(target_arch = "aarch64", target_feature = "neon")
102    )))]
103    {
104        use crate::logs::LOG_CD;
105        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
106        let c = f64::from_bits(c_m);
107        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
108    }
109
110    let r1 = hi;
111    // Polynomial for log(1+x)/x generated in Sollya:
112    // d = [-2^-8, 2^-7];
113    // f_log = log(1 + x)/x;
114    // Q = fpminimax(f_log, 5, [|D...|], d);
115    // See ./notes/log1pf_core.sollya
116    let p = f_polyeval6(
117        u,
118        f64::from_bits(0x3fefffffffffffff),
119        f64::from_bits(0xbfdffffffffff3e6),
120        f64::from_bits(0x3fd5555555626b74),
121        f64::from_bits(0xbfd0000026aeecc8),
122        f64::from_bits(0x3fc9999114d16c06),
123        f64::from_bits(0xbfc51e433a85278a),
124    );
125    f_fmla(p, u, r1) + lo
126}
127
128/// Computes log(x+1)
129///
130/// Max ULP 0.5
131#[inline]
132pub fn f_log1pf(x: f32) -> f32 {
133    let ux = x.to_bits().wrapping_shl(1);
134    if ux >= 0xffu32 << 24 || ux == 0 {
135        // |x| == 0, |x| == inf, x == NaN
136        if ux == 0 {
137            return x;
138        }
139        if x.is_infinite() {
140            return if x.is_sign_positive() {
141                f32::INFINITY
142            } else {
143                f32::NAN
144            };
145        }
146        return x + f32::NAN;
147    }
148
149    let xd = x as f64;
150    let ax = x.to_bits() & 0x7fff_ffffu32;
151
152    // Use log1p(x) = log(1 + x) for |x| > 2^-6;
153    if ax > 0x3c80_0000u32 {
154        if x == -1. {
155            return f32::NEG_INFINITY;
156        }
157        let x1p = xd + 1.;
158        if x1p <= 0. {
159            if x1p == 0. {
160                return f32::NEG_INFINITY;
161            }
162            return f32::NAN;
163        }
164        return core_logf(x1p) as f32;
165    }
166
167    // log(1+x) is expected to be used near zero
168    // Polynomial generated by Sollya:
169    // d = [-2^-6; 2^-6];
170    // f_log1pf = log(1+x)/x;
171    // Q = fpminimax(f_log1pf, 7, [|0, D...|], d);
172    // See ./notes/log1pf.sollya
173
174    let p = f_estrin_polyeval8(
175        xd,
176        f64::from_bits(0x3ff0000000000000),
177        f64::from_bits(0xbfe0000000000000),
178        f64::from_bits(0x3fd5555555556aad),
179        f64::from_bits(0xbfd000000000181a),
180        f64::from_bits(0x3fc999998998124e),
181        f64::from_bits(0xbfc55555452e2a2b),
182        f64::from_bits(0x3fc24adb8cde4aa7),
183        f64::from_bits(0xbfc0019db915ef6f),
184    ) * xd;
185    p as f32
186}
187
188#[inline]
189pub(crate) fn core_log1pf(x: f32) -> f64 {
190    let xd = x as f64;
191    let ax = x.to_bits() & 0x7fff_ffffu32;
192
193    // Use log1p(x) = log(1 + x) for |x| > 2^-6;
194    if ax > 0x3c80_0000u32 {
195        let x1p = xd + 1.;
196        return core_logf(x1p);
197    }
198
199    // log(1+x) is expected to be used near zero
200    // Polynomial generated by Sollya:
201    // d = [-2^-6; 2^-6];
202    // f_log1pf = log(1+x)/x;
203    // Q = fpminimax(f_log1pf, 7, [|0, D...|], d);
204    // See ./notes/log1pf.sollya
205
206    f_estrin_polyeval8(
207        xd,
208        f64::from_bits(0x3ff0000000000000),
209        f64::from_bits(0xbfe0000000000000),
210        f64::from_bits(0x3fd5555555556aad),
211        f64::from_bits(0xbfd000000000181a),
212        f64::from_bits(0x3fc999998998124e),
213        f64::from_bits(0xbfc55555452e2a2b),
214        f64::from_bits(0x3fc24adb8cde4aa7),
215        f64::from_bits(0xbfc0019db915ef6f),
216    ) * xd
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn log1pf_works() {
225        assert!(f_log1pf(f32::from_bits(0xffefb9a7)).is_nan());
226        assert!(f_log1pf(f32::NAN).is_nan());
227        assert_eq!(f_log1pf(f32::from_bits(0x41078feb)), 2.2484074);
228        assert_eq!(f_log1pf(-0.0000014305108), -0.0000014305118);
229        assert_eq!(f_log1pf(0.0), 0.0);
230        assert_eq!(f_log1pf(2.0), 1.0986123);
231        assert_eq!(f_log1pf(-0.7), -1.2039728);
232        assert_eq!(f_log1pf(-0.0000000000043243), -4.3243e-12);
233        assert_eq!(f_log1pf(f32::INFINITY), f32::INFINITY);
234        assert!(f_log1pf(-2.0).is_nan());
235        assert!(f_log1pf(f32::NAN).is_nan());
236    }
237}