pxfm/logs/
log2p1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use std::hint::black_box;
31
32static IX: [u64; 129] = [
33    0x3ff0000000000000,
34    0x3fefc07f01fc0000,
35    0x3fef81f81f820000,
36    0x3fef44659e4a0000,
37    0x3fef07c1f07c0000,
38    0x3feecc07b3020000,
39    0x3fee9131abf00000,
40    0x3fee573ac9020000,
41    0x3fee1e1e1e1e0000,
42    0x3fede5d6e3f80000,
43    0x3fedae6076ba0000,
44    0x3fed77b654b80000,
45    0x3fed41d41d420000,
46    0x3fed0cb58f6e0000,
47    0x3fecd85689040000,
48    0x3feca4b3055e0000,
49    0x3fec71c71c720000,
50    0x3fec3f8f01c40000,
51    0x3fec0e0703820000,
52    0x3febdd2b89940000,
53    0x3febacf914c20000,
54    0x3feb7d6c3dda0000,
55    0x3feb4e81b4e80000,
56    0x3feb2036406c0000,
57    0x3feaf286bca20000,
58    0x3feac5701ac60000,
59    0x3fea98ef606a0000,
60    0x3fea6d01a6d00000,
61    0x3fea41a41a420000,
62    0x3fea16d3f97a0000,
63    0x3fe9ec8e95100000,
64    0x3fe9c2d14ee40000,
65    0x3fe99999999a0000,
66    0x3fe970e4f80c0000,
67    0x3fe948b0fcd60000,
68    0x3fe920fb49d00000,
69    0x3fe8f9c18f9c0000,
70    0x3fe8d3018d300000,
71    0x3fe8acb90f6c0000,
72    0x3fe886e5f0ac0000,
73    0x3fe8618618620000,
74    0x3fe83c977ab20000,
75    0x3fe8181818180000,
76    0x3fe7f405fd020000,
77    0x3fe7d05f417e0000,
78    0x3fe7ad2208e00000,
79    0x3fe78a4c81780000,
80    0x3fe767dce4340000,
81    0x3fe745d1745e0000,
82    0x3fe724287f460000,
83    0x3fe702e05c0c0000,
84    0x3fe6e1f76b440000,
85    0x3fe6c16c16c20000,
86    0x3fe6a13cd1540000,
87    0x3fe6816816820000,
88    0x3fe661ec6a520000,
89    0x3fe642c8590c0000,
90    0x3fe623fa77020000,
91    0x3fe6058160580000,
92    0x3fe5e75bb8d00000,
93    0x3fe5c9882b940000,
94    0x3fe5ac056b020000,
95    0x3fe58ed230820000,
96    0x3fe571ed3c500000,
97    0x3fe5555555560000,
98    0x3fe5390948f40000,
99    0x3fe51d07eae20000,
100    0x3fe5015015020000,
101    0x3fe4e5e0a7300000,
102    0x3fe4cab887260000,
103    0x3fe4afd6a0520000,
104    0x3fe49539e3b20000,
105    0x3fe47ae147ae0000,
106    0x3fe460cbc7f60000,
107    0x3fe446f865620000,
108    0x3fe42d6625d60000,
109    0x3fe4141414140000,
110    0x3fe3fb013fb00000,
111    0x3fe3e22cbce40000,
112    0x3fe3c995a47c0000,
113    0x3fe3b13b13b20000,
114    0x3fe3991c2c180000,
115    0x3fe3813813820000,
116    0x3fe3698df3de0000,
117    0x3fe3521cfb2c0000,
118    0x3fe33ae45b580000,
119    0x3fe323e34a2c0000,
120    0x3fe30d1901300000,
121    0x3fe2f684bda20000,
122    0x3fe2e025c04c0000,
123    0x3fe2c9fb4d820000,
124    0x3fe2b404ad020000,
125    0x3fe29e4129e40000,
126    0x3fe288b012880000,
127    0x3fe27350b8820000,
128    0x3fe25e2270800000,
129    0x3fe24924924a0000,
130    0x3fe23456789a0000,
131    0x3fe21fb781220000,
132    0x3fe20b470c680000,
133    0x3fe1f7047dc20000,
134    0x3fe1e2ef3b400000,
135    0x3fe1cf06ada20000,
136    0x3fe1bb4a40460000,
137    0x3fe1a7b9611a0000,
138    0x3fe19453808c0000,
139    0x3fe1811811820000,
140    0x3fe16e0689420000,
141    0x3fe15b1e5f760000,
142    0x3fe1485f0e0a0000,
143    0x3fe135c811360000,
144    0x3fe12358e75e0000,
145    0x3fe1111111120000,
146    0x3fe0fef010fe0000,
147    0x3fe0ecf56be60000,
148    0x3fe0db20a8900000,
149    0x3fe0c9714fbc0000,
150    0x3fe0b7e6ec260000,
151    0x3fe0a6810a680000,
152    0x3fe0953f39020000,
153    0x3fe0842108420000,
154    0x3fe073260a480000,
155    0x3fe0624dd2f20000,
156    0x3fe05197f7d80000,
157    0x3fe0410410420000,
158    0x3fe03091b5200000,
159    0x3fe0204081020000,
160    0x3fe0101010100000,
161    0x3fe0000000000000,
162];
163static LIX: [u64; 129] = [
164    0x0000000000000000,
165    0xbf86fe50b6f1eafa,
166    0xbf96e79685c160d5,
167    0xbfa11cd1d51955ba,
168    0xbfa6bad37591e030,
169    0xbfac4dfab908ddb5,
170    0xbfb0eb389fab4795,
171    0xbfb3aa2fdd26ae99,
172    0xbfb663f6faca846b,
173    0xbfb918a16e4cb157,
174    0xbfbbc84240a78a13,
175    0xbfbe72ec1181cfb1,
176    0xbfc08c588cd964e4,
177    0xbfc1dcd19759f2e3,
178    0xbfc32ae9e27627c6,
179    0xbfc476a9f989a58a,
180    0xbfc5c01a39fa6533,
181    0xbfc70742d4eed455,
182    0xbfc84c2bd02d6434,
183    0xbfc98edd077e9f0a,
184    0xbfcacf5e2db31eea,
185    0xbfcc0db6cddaa82d,
186    0xbfcd49ee4c33121a,
187    0xbfce840be751d775,
188    0xbfcfbc16b9003e0b,
189    0xbfd0790adbae3fc0,
190    0xbfd11307dad465b5,
191    0xbfd1ac05b2924cc5,
192    0xbfd24407ab0cc410,
193    0xbfd2db10fc4ea424,
194    0xbfd37124cea58697,
195    0xbfd406463b1d455d,
196    0xbfd49a784bcbaa37,
197    0xbfd52dbdfc4f341d,
198    0xbfd5c01a39ff2c9b,
199    0xbfd6518fe46abaa5,
200    0xbfd6e221cd9d6933,
201    0xbfd771d2ba7f5791,
202    0xbfd800a56315ee2a,
203    0xbfd88e9c72df8611,
204    0xbfd91bba891d495f,
205    0xbfd9a8023920fa4d,
206    0xbfda33760a7fbca6,
207    0xbfdabe18797d2eff,
208    0xbfdb47ebf734b923,
209    0xbfdbd0f2e9eb2b84,
210    0xbfdc592fad2be1aa,
211    0xbfdce0a4923cf5e6,
212    0xbfdd6753e02f4ebc,
213    0xbfdded3fd445af00,
214    0xbfde726aa1e558fe,
215    0xbfdef6d67325ba38,
216    0xbfdf7a8568c8aea6,
217    0xbfdffd799a81be87,
218    0x3fdf804ae8d33c40,
219    0x3fdefec61b04af4e,
220    0x3fde7df5fe572606,
221    0x3fddfdd89d5b0009,
222    0x3fdd7e6c0abbd924,
223    0x3fdcffae611a74d6,
224    0x3fdc819dc2d8578c,
225    0x3fdc043859e5bdbc,
226    0x3fdb877c57b47c04,
227    0x3fdb0b67f4f29a66,
228    0x3fda8ff97183ed07,
229    0x3fda152f14293c74,
230    0x3fd99b072a9289ca,
231    0x3fd921800927e284,
232    0x3fd8a8980ac41130,
233    0x3fd8304d90c2859d,
234    0x3fd7b89f02cbd49a,
235    0x3fd7418aceb84ab1,
236    0x3fd6cb0f68656c95,
237    0x3fd6552b49993dc2,
238    0x3fd5dfdcf1eacd7b,
239    0x3fd56b22e6b97c18,
240    0x3fd4f6fbb2ce6943,
241    0x3fd48365e6957b42,
242    0x3fd4106017c0dbcf,
243    0x3fd39de8e15727d9,
244    0x3fd32bfee37489bc,
245    0x3fd2baa0c34989c3,
246    0x3fd249cd2b177fd5,
247    0x3fd1d982c9d50468,
248    0x3fd169c0536677ac,
249    0x3fd0fa848045f67b,
250    0x3fd08bce0d9a7c60,
251    0x3fd01d9bbcf66a2c,
252    0x3fcf5fd8a90e2d85,
253    0x3fce857d3d3af1e5,
254    0x3fcdac22d3ec5f4e,
255    0x3fccd3c712db459a,
256    0x3fcbfc67a7ff3c22,
257    0x3fcb2602497678f4,
258    0x3fca5094b555a1f8,
259    0x3fc97c1cb136b96f,
260    0x3fc8a8980ac8652d,
261    0x3fc7d60496c83f66,
262    0x3fc7046031c7cdaf,
263    0x3fc633a8bf460335,
264    0x3fc563dc2a08b102,
265    0x3fc494f863bbc1de,
266    0x3fc3c6fb6507a37e,
267    0x3fc2f9e32d5257ec,
268    0x3fc22dadc2a627ef,
269    0x3fc1625931802e49,
270    0x3fc097e38cef9519,
271    0x3fbf9c95dc138295,
272    0x3fbe0b1ae90505f6,
273    0x3fbc7b528b5fcffa,
274    0x3fbaed391abb17a1,
275    0x3fb960caf9bd35ea,
276    0x3fb7d60496e3edeb,
277    0x3fb64ce26bf2108e,
278    0x3fb4c560fe5b573b,
279    0x3fb33f7cde24adfb,
280    0x3fb1bb32a5ed9353,
281    0x3fb0387efbd3006e,
282    0x3fad6ebd1f1d0955,
283    0x3faa6f9c37a8beab,
284    0x3fa77394c9d6762c,
285    0x3fa47aa07358e1a4,
286    0x3fa184b8e4d490ef,
287    0x3f9d23afc4d95c78,
288    0x3f9743ee8678a7cb,
289    0x3f916a21e243bf78,
290    0x3f872c7ba20c907e,
291    0x3f7720d9c0536e17,
292    0x0000000000000000,
293];
294
295// |x| < 1.3862943452718848
296#[cold]
297fn log2p1f_small(z: f64, ax: u32, ux: u32) -> f32 {
298    let z2 = z * z;
299    let z4 = z2 * z2;
300    if ax < 0x3b9d9d34u32 {
301        // |x| < 0x1.3b3a68p-8
302        if ax < 0x39638a7eu32 {
303            // |x| < 0x1.c714fcp-13
304            if ax < 0x329c5639u32 {
305                // |x| < 0x1.38ac72p-26
306                const C: [u64; 2] = [0x3ff71547652b82fe, 0xbfe71547652b82ff];
307                let res = z * f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
308                res as f32
309            } else {
310                if ux == 0x32ff7045u32 {
311                    return black_box(f32::from_bits(0x3338428d))
312                        - black_box(f32::from_bits(0x17c00000));
313                }
314                if ux == 0xb395efbbu32 {
315                    return black_box(f32::from_bits(0xb3d85005))
316                        + black_box(f32::from_bits(0x19800000));
317                }
318                if ux == 0x35a14df7u32 {
319                    return black_box(f32::from_bits(0x35e8b690))
320                        + black_box(f32::from_bits(0x1b800000));
321                }
322                if ux == 0x3841cb81u32 {
323                    return black_box(f32::from_bits(0x388bca4f))
324                        + black_box(f32::from_bits(0x1e000000));
325                }
326                const C: [u64; 4] = [
327                    0x3ff71547652b82fe,
328                    0xbfe71547652b82fd,
329                    0x3fdec709ead0c9a7,
330                    0xbfd7154773c1cb29,
331                ];
332
333                let zxf0 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
334                let zxf1 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
335
336                let zxf = z * f_fmla(z2, zxf0, zxf1);
337                zxf as f32
338            }
339        } else {
340            if ux == 0xbac9363du32 {
341                return black_box(f32::from_bits(0xbb114155))
342                    + black_box(f32::from_bits(0x21000000));
343            }
344            const C: [u64; 6] = [
345                0x3ff71547652b82fe,
346                0xbfe71547652b8300,
347                0x3fdec709dc28f51b,
348                0xbfd7154765157748,
349                0x3fd2778a510a3682,
350                0xbfcec745df1551fc,
351            ];
352
353            let zxf0 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
354            let zxf1 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
355            let zxf2 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
356
357            let zw0 = f_fmla(z2, zxf1, zxf2);
358
359            let zxf = z * f_fmla(z4, zxf0, zw0);
360            zxf as f32
361        }
362    } else {
363        const C: [u64; 8] = [
364            0x3ff71547652b82fe,
365            0xbfe71547652b82fb,
366            0x3fdec709dc3b6a73,
367            0xbfd71547652dc090,
368            0x3fd2776c1a889010,
369            0xbfcec7095bd4d208,
370            0x3fca66bec7fc8f70,
371            0xbfc71a900fc3f3f9,
372        ];
373
374        let zxf0 = f_fmla(z, f64::from_bits(C[7]), f64::from_bits(C[6]));
375        let zxf1 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
376        let zxf2 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
377        let zxf3 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
378
379        let zw0 = f_fmla(z2, zxf0, zxf1);
380        let zw1 = f_fmla(z2, zxf2, zxf3);
381
382        let zxf = z * f_fmla(z4, zw0, zw1);
383        zxf as f32
384    }
385}
386
387/// Computes log2(x+1)
388///
389/// Max ULP 0.5
390#[inline]
391pub fn f_log2p1f(x: f32) -> f32 {
392    let mut z = x as f64;
393    let t = x.to_bits();
394    let ux = t;
395    let ax = ux & 0x7fff_ffff;
396    if ux >= 0x17fu32 << 23 {
397        // x <= -1
398        if ux == (0x17fu32 << 23) {
399            return f32::NEG_INFINITY; // -1.0
400        }
401        if ux > (0x1ffu32 << 23) {
402            return x + x;
403        } // nan
404        f32::NAN // x < -1
405    } else if ax >= (0xff << 23) {
406        // +inf, nan
407        if ax > (0xff << 23) {
408            return x + x;
409        } // nan
410        f32::INFINITY
411    } else if ax < 0x3cb7aa26u32 {
412        // |x| < 1.3862943452718848
413        log2p1f_small(z, ax, ux)
414    } else {
415        // |x| >= 0x1.6f544cp-6
416        if ux == 0x4ebd09e3u32 {
417            let h = f32::from_bits(0x41f48013);
418            let l = f32::from_bits(0x35780000);
419            return black_box(h) + black_box(l);
420        }
421        let tp = (z + 1.0).to_bits();
422        let m = tp & 0x000fffffffffffff;
423        let mut e: i32 = (tp >> 52).wrapping_sub(0x3ff) as i32;
424        let j: i32 = ((m as i64).wrapping_add(1i64 << (52 - 8)) >> (52 - 7)) as i32;
425        let k = if j > 53 { 1 } else { 0 };
426        e += k;
427        let xd = m | 0x3ffu64 << 52;
428        z = f_fmla(f64::from_bits(xd), f64::from_bits(IX[j as usize]), -1.0);
429        const C: [u64; 6] = [
430            0x3ff71547652b82fe,
431            0xbfe71547652b82ff,
432            0x3fdec709dc32988b,
433            0xbfd715476521ec2b,
434            0x3fd277801a1ad904,
435            0xbfcec731704d6a88,
436        ];
437        let z2 = z * z;
438        let mut c0 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
439        let c2 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
440        let c4 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
441
442        let zv0 = f_fmla(z2, c4, c2);
443
444        c0 = f_fmla(z2, zv0, c0);
445        let res = f_fmla(z, c0, -f64::from_bits(LIX[j as usize])) + e as f64;
446        res as f32
447    }
448}
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453
454    #[test]
455    fn test_log2p1f() {
456        assert_eq!(f_log2p1f(0.0), 0.0);
457        assert_eq!(f_log2p1f(1.0), 1.0);
458        assert_eq!(f_log2p1f(-0.0432432), -0.063775845);
459        assert_eq!(f_log2p1f(-0.009874634), -0.01431689);
460        assert_eq!(f_log2p1f(1.2443), 1.1662655);
461        assert!(f_log2p1f(-1.0432432).is_nan());
462    }
463}