pxfm/logs/
log10p1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::logs::log1pf::special_logf;
31use std::hint::black_box;
32
33static TR: [u64; 65] = [
34    0x3ff0000000000000,
35    0x3fef81f820000000,
36    0x3fef07c1f0000000,
37    0x3fee9131ac000000,
38    0x3fee1e1e1e000000,
39    0x3fedae6077000000,
40    0x3fed41d41d000000,
41    0x3fecd85689000000,
42    0x3fec71c71c000000,
43    0x3fec0e0704000000,
44    0x3febacf915000000,
45    0x3feb4e81b5000000,
46    0x3feaf286bd000000,
47    0x3fea98ef60000000,
48    0x3fea41a41a000000,
49    0x3fe9ec8e95000000,
50    0x3fe999999a000000,
51    0x3fe948b0fd000000,
52    0x3fe8f9c190000000,
53    0x3fe8acb90f000000,
54    0x3fe8618618000000,
55    0x3fe8181818000000,
56    0x3fe7d05f41000000,
57    0x3fe78a4c81000000,
58    0x3fe745d174000000,
59    0x3fe702e05c000000,
60    0x3fe6c16c17000000,
61    0x3fe6816817000000,
62    0x3fe642c859000000,
63    0x3fe6058160000000,
64    0x3fe5c9882c000000,
65    0x3fe58ed231000000,
66    0x3fe5555555000000,
67    0x3fe51d07eb000000,
68    0x3fe4e5e0a7000000,
69    0x3fe4afd6a0000000,
70    0x3fe47ae148000000,
71    0x3fe446f865000000,
72    0x3fe4141414000000,
73    0x3fe3e22cbd000000,
74    0x3fe3b13b14000000,
75    0x3fe3813814000000,
76    0x3fe3521cfb000000,
77    0x3fe323e34a000000,
78    0x3fe2f684be000000,
79    0x3fe2c9fb4e000000,
80    0x3fe29e412a000000,
81    0x3fe27350b9000000,
82    0x3fe2492492000000,
83    0x3fe21fb781000000,
84    0x3fe1f7047e000000,
85    0x3fe1cf06ae000000,
86    0x3fe1a7b961000000,
87    0x3fe1811812000000,
88    0x3fe15b1e5f000000,
89    0x3fe135c811000000,
90    0x3fe1111111000000,
91    0x3fe0ecf56c000000,
92    0x3fe0c97150000000,
93    0x3fe0a6810a000000,
94    0x3fe0842108000000,
95    0x3fe0624dd3000000,
96    0x3fe0410410000000,
97    0x3fe0204081000000,
98    0x3fe0000000000000,
99];
100
101static TL: [u64; 65] = [
102    0xbd4562ec497ef351,
103    0x3f7b9476892ea99c,
104    0x3f8b5e909c959eec,
105    0x3f945f4f59ec84f0,
106    0x3f9af5f92cbcf2aa,
107    0x3fa0ba01a6069052,
108    0x3fa3ed119b99dd41,
109    0x3fa714834298a088,
110    0x3faa30a9d98309c1,
111    0x3fad41d51266b9d9,
112    0x3fb02428c0f62dfc,
113    0x3fb1a23444eea521,
114    0x3fb31b30543f2597,
115    0x3fb48f3ed39bd5e7,
116    0x3fb5fe8049a0bd06,
117    0x3fb769140a6a78ea,
118    0x3fb8cf1836c96595,
119    0x3fba30a9d5551a84,
120    0x3fbb8de4d1ee5b21,
121    0x3fbce6e4202c7bc9,
122    0x3fbe3bc1accaa6ea,
123    0x3fbf8c9683b584b7,
124    0x3fc06cbd68ca86e0,
125    0x3fc11142f19de3a2,
126    0x3fc1b3e71fa795f0,
127    0x3fc254b4d37a3354,
128    0x3fc2f3b6912cab79,
129    0x3fc390f6831144f7,
130    0x3fc42c7e7fffb21a,
131    0x3fc4c65808c779ae,
132    0x3fc55e8c507508c7,
133    0x3fc5f52445deb049,
134    0x3fc68a288c3efe72,
135    0x3fc71da17bdef98b,
136    0x3fc7af9736089c4b,
137    0x3fc84011952a11eb,
138    0x3fc8cf1837a7d6d1,
139    0x3fc95cb2891e3048,
140    0x3fc9e8e7b0f85651,
141    0x3fca73beaa5d9dfe,
142    0x3fcafd3e39454544,
143    0x3fcb856cf060c662,
144    0x3fcc0c5134de0c6d,
145    0x3fcc91f1371bb611,
146    0x3fcd1652ffcd2bc5,
147    0x3fcd997c6f634ae6,
148    0x3fce1b733ab8fbad,
149    0x3fce9c3ceadab4c8,
150    0x3fcf1bdeec438f77,
151    0x3fcf9a5e7a5f906f,
152    0x3fd00be05ac02564,
153    0x3fd04a054d81990c,
154    0x3fd087a083594e33,
155    0x3fd0c4b457098b4f,
156    0x3fd101431aa1f48a,
157    0x3fd13d4f08b98411,
158    0x3fd178da53edaecb,
159    0x3fd1b3e71e9f9391,
160    0x3fd1ee777defd526,
161    0x3fd2288d7b48d874,
162    0x3fd2622b0f52dad8,
163    0x3fd29b522a4c594c,
164    0x3fd2d404b0e305b9,
165    0x3fd30c4478f3f21d,
166    0x3fd34413509f6f4d,
167];
168
169#[cold]
170fn log10p1f_accurate(ax: u32, ux: u32, v: f64, z: f64, l: f64, e: i32) -> f32 {
171    if ax < 0x3d32743eu32 {
172        // |x| < 0x1.64e87cp-5f
173        if ux == 0xa6aba8afu32 {
174            return black_box(f32::from_bits(0xa61519de)) + black_box(f32::from_bits(0x19800000));
175        }
176        if ux == 0xaf39b9a7u32 {
177            return black_box(f32::from_bits(0xaea151a1)) + black_box(f32::from_bits(0x22000000));
178        }
179        if ux == 0x399a7c00u32 {
180            return black_box(f32::from_bits(0x390629e5)) + black_box(f32::from_bits(0x2c800000));
181        }
182        let z = z / (2.0 + z);
183        let z2 = z * z;
184        let z4 = z2 * z2;
185
186        const C: [u64; 4] = [
187            0x3febcb7b1526e50f,
188            0x3fd287a76370129d,
189            0x3fc63c62378fa3db,
190            0x3fbfca4139a42374,
191        ];
192
193        let r0 = f_fmla(z2, f64::from_bits(C[3]), f64::from_bits(C[2]));
194        let r1 = f_fmla(z2, f64::from_bits(C[1]), f64::from_bits(C[0]));
195
196        let r = z * f_fmla(z4, r0, r1);
197        return r as f32;
198    }
199    if ux == 0x7956ba5eu32 {
200        return black_box(f32::from_bits(0x420b5f5d)) + black_box(f32::from_bits(0x35800000));
201    }
202    if ux == 0xbd86ffb9u32 {
203        return black_box(f32::from_bits(0xbcf29a9b)) + black_box(f32::from_bits(0x30000000));
204    }
205    const C: [u64; 7] = [
206        0x3fdbcb7b1526e50e,
207        0xbfcbcb7b1526e53d,
208        0x3fc287a7636f3fa2,
209        0xbfbbcb7b146a14b3,
210        0x3fb63c627d5219cb,
211        0xbfb2880736c8762d,
212        0x3fafc1ecf913961a,
213    ];
214
215    let v2 = v * v;
216
217    let xv0 = f_fmla(v, f64::from_bits(C[1]), f64::from_bits(C[0]));
218    let xv1 = f_fmla(v, f64::from_bits(C[3]), f64::from_bits(C[2]));
219    let xv2 = f_fmla(v, f64::from_bits(C[5]), f64::from_bits(C[4]));
220
221    let xw0 = f_fmla(v2, f64::from_bits(C[6]), xv2);
222    let xw1 = f_fmla(v2, xw0, xv1);
223
224    let mut f = v * f_fmla(v2, xw1, xv0);
225    f += l - f64::from_bits(TL[0]);
226    let r = f_fmla(e as f64, f64::from_bits(0x3fd34413509f79ff), f);
227    r as f32
228}
229
230/// Computes log10(x+1)
231///
232/// Max ULP 0.5
233#[inline]
234pub fn f_log10p1f(x: f32) -> f32 {
235    let z = x as f64;
236    let t = x.to_bits();
237    let ux: u32 = t;
238    if ux >= 0x17fu32 << 23 {
239        // x <= -1
240        return special_logf(x);
241    }
242    let ax = ux & 0x7fff_ffff;
243    if ax == 0 {
244        return f32::copysign(0., x);
245    }
246    if ax >= (0xff << 23) {
247        // +inf, nan
248        return special_logf(x);
249    }
250
251    let mut tz = (z + 1.0).to_bits();
252    let m: u64 = tz & 0x000f_ffff_ffff_ffff;
253    let e: i32 = (tz >> 52).wrapping_sub(1023) as i32;
254    let j: i32 = ((m.wrapping_add((1i64 << 45) as u64)) >> 46) as i32;
255    tz = m | (0x3ffu64 << 52);
256    let ix = f64::from_bits(TR[j as usize]);
257    let l = f64::from_bits(TL[j as usize]);
258    let off = e as f64 * f64::from_bits(0x3fd34413509f79ff) + l;
259    let v = f_fmla(f64::from_bits(tz), ix, -1.);
260
261    const H: [u64; 4] = [
262        0x3fdbcb7b150bf6d8,
263        0xbfcbcb7b1738c07e,
264        0x3fc287de19e795c5,
265        0xbfbbca44edc44bc4,
266    ];
267
268    let v2 = v * v;
269
270    let zwf0 = f_fmla(v, f64::from_bits(H[3]), f64::from_bits(H[2]));
271    let zwf1 = f_fmla(v, f64::from_bits(H[1]), f64::from_bits(H[0]));
272
273    let f = f_fmla(v2, zwf0, zwf1);
274    let r = f_fmla(v, f, off);
275    let ub: f32 = r as f32;
276    let lb: f32 = (r + f64::from_bits(0x3d55c00000000000)) as f32;
277    if ub != lb {
278        return log10p1f_accurate(ax, ux, v, z, l, e);
279    }
280    ub
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    #[test]
288    fn test_log10p1f() {
289        assert_eq!(f_log10p1f(0.0), 0.0);
290        assert_eq!(f_log10p1f(1.0), 0.30103);
291        assert_eq!(f_log10p1f(-0.0432432), -0.019198442);
292        assert_eq!(f_log10p1f(-0.009874634), -0.0043098135);
293        assert_eq!(f_log10p1f(1.2443), 0.35108092);
294        assert!(f_log10p1f(-1.0432432).is_nan());
295    }
296}