pxfm/logs/
log2f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, f_fmlaf, set_exponent_f32};
30use crate::polyeval::f_polyeval3;
31
32pub(crate) static LOG2_R: [u64; 128] = [
33    0x0000000000000000,
34    0x3f872c7ba20f7327,
35    0x3f9743ee861f3556,
36    0x3fa184b8e4c56af8,
37    0x3fa77394c9d958d5,
38    0x3fad6ebd1f1febfe,
39    0x3fb1bb32a600549d,
40    0x3fb4c560fe68af88,
41    0x3fb7d60496cfbb4c,
42    0x3fb960caf9abb7ca,
43    0x3fbc7b528b70f1c5,
44    0x3fbf9c95dc1d1165,
45    0x3fc097e38ce60649,
46    0x3fc22dadc2ab3497,
47    0x3fc3c6fb650cde51,
48    0x3fc494f863b8df35,
49    0x3fc633a8bf437ce1,
50    0x3fc7046031c79f85,
51    0x3fc8a8980abfbd32,
52    0x3fc97c1cb13c7ec1,
53    0x3fcb2602497d5346,
54    0x3fcbfc67a7fff4cc,
55    0x3fcdac22d3e441d3,
56    0x3fce857d3d361368,
57    0x3fd01d9bbcfa61d4,
58    0x3fd08bce0d95fa38,
59    0x3fd169c05363f158,
60    0x3fd1d982c9d52708,
61    0x3fd249cd2b13cd6c,
62    0x3fd32bfee370ee68,
63    0x3fd39de8e1559f6f,
64    0x3fd4106017c3eca3,
65    0x3fd4f6fbb2cec598,
66    0x3fd56b22e6b578e5,
67    0x3fd5dfdcf1eeae0e,
68    0x3fd6552b49986277,
69    0x3fd6cb0f6865c8ea,
70    0x3fd7b89f02cf2aad,
71    0x3fd8304d90c11fd3,
72    0x3fd8a8980abfbd32,
73    0x3fd921800924dd3b,
74    0x3fd99b072a96c6b2,
75    0x3fda8ff971810a5e,
76    0x3fdb0b67f4f46810,
77    0x3fdb877c57b1b070,
78    0x3fdc043859e2fdb3,
79    0x3fdc819dc2d45fe4,
80    0x3fdcffae611ad12b,
81    0x3fdd7e6c0abc3579,
82    0x3fddfdd89d586e2b,
83    0x3fde7df5fe538ab3,
84    0x3fdefec61b011f85,
85    0x3fdf804ae8d0cd02,
86    0x3fe0014332be0033,
87    0x3fe042bd4b9a7c99,
88    0x3fe08494c66b8ef0,
89    0x3fe0c6caaf0c5597,
90    0x3fe1096015dee4da,
91    0x3fe14c560fe68af9,
92    0x3fe18fadb6e2d3c2,
93    0x3fe1d368296b5255,
94    0x3fe217868b0c37e8,
95    0x3fe25c0a0463beb0,
96    0x3fe2a0f3c340705c,
97    0x3fe2e644fac04fd8,
98    0x3fe2e644fac04fd8,
99    0x3fe32bfee370ee68,
100    0x3fe37222bb70747c,
101    0x3fe3b8b1c68fa6ed,
102    0x3fe3ffad4e74f1d6,
103    0x3fe44716a2c08262,
104    0x3fe44716a2c08262,
105    0x3fe48eef19317991,
106    0x3fe4d7380dcc422d,
107    0x3fe51ff2e30214bc,
108    0x3fe5692101d9b4a6,
109    0x3fe5b2c3da19723b,
110    0x3fe5b2c3da19723b,
111    0x3fe5fcdce2727ddb,
112    0x3fe6476d98ad990a,
113    0x3fe6927781d932a8,
114    0x3fe6927781d932a8,
115    0x3fe6ddfc2a78fc63,
116    0x3fe729fd26b707c8,
117    0x3fe7767c12967a45,
118    0x3fe7767c12967a45,
119    0x3fe7c37a9227e7fb,
120    0x3fe810fa51bf65fd,
121    0x3fe810fa51bf65fd,
122    0x3fe85efd062c656d,
123    0x3fe8ad846cf369a4,
124    0x3fe8ad846cf369a4,
125    0x3fe8fc924c89ac84,
126    0x3fe94c287492c4db,
127    0x3fe94c287492c4db,
128    0x3fe99c48be2063c8,
129    0x3fe9ecf50bf43f13,
130    0x3fe9ecf50bf43f13,
131    0x3fea3e2f4ac43f60,
132    0x3fea8ff971810a5e,
133    0x3fea8ff971810a5e,
134    0x3feae255819f022d,
135    0x3feb35458761d479,
136    0x3feb35458761d479,
137    0x3feb88cb9a2ab521,
138    0x3feb88cb9a2ab521,
139    0x3febdce9dcc96187,
140    0x3fec31a27dd00b4a,
141    0x3fec31a27dd00b4a,
142    0x3fec86f7b7ea4a89,
143    0x3fec86f7b7ea4a89,
144    0x3fecdcebd2373995,
145    0x3fed338120a6dd9d,
146    0x3fed338120a6dd9d,
147    0x3fed8aba045b01c8,
148    0x3fed8aba045b01c8,
149    0x3fede298ec0bac0d,
150    0x3fede298ec0bac0d,
151    0x3fee3b20546f554a,
152    0x3fee3b20546f554a,
153    0x3fee9452c8a71028,
154    0x3fee9452c8a71028,
155    0x3feeee32e2aeccbf,
156    0x3feeee32e2aeccbf,
157    0x3fef48c34bd1e96f,
158    0x3fef48c34bd1e96f,
159    0x3fefa406bd2443df,
160    0x3ff0000000000000,
161];
162
163/// Logarithm of base 2
164///
165/// Max found ULP 0.4999996
166#[inline]
167pub fn f_log2f(x: f32) -> f32 {
168    let mut x_u = x.to_bits();
169
170    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
171
172    let mut m = -(E_BIAS as i32);
173    if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
174        if x == 0.0 {
175            return f32::NEG_INFINITY;
176        }
177        if x_u == 0x80000000u32 {
178            return f32::NEG_INFINITY;
179        }
180        if x.is_sign_negative() && !x.is_nan() {
181            return f32::NAN + x;
182        }
183        // x is +inf or nan
184        if x.is_nan() || x.is_infinite() {
185            return x + x;
186        }
187        // Normalize denormal inputs.
188        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
189        m -= 23;
190    }
191
192    m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
193    let mant = x_u & 0x007F_FFFF;
194    let index = mant.wrapping_shr(16);
195
196    x_u = set_exponent_f32(x_u, 0x7F);
197
198    let v;
199    let u = f32::from_bits(x_u);
200
201    #[cfg(any(
202        all(
203            any(target_arch = "x86", target_arch = "x86_64"),
204            target_feature = "fma"
205        ),
206        all(target_arch = "aarch64", target_feature = "neon")
207    ))]
208    {
209        use crate::logs::logf::LOG_REDUCTION_F32;
210        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
211    }
212    #[cfg(not(any(
213        all(
214            any(target_arch = "x86", target_arch = "x86_64"),
215            target_feature = "fma"
216        ),
217        all(target_arch = "aarch64", target_feature = "neon")
218    )))]
219    {
220        use crate::logs::LOG_RANGE_REDUCTION;
221        v = f_fmla(
222            u as f64,
223            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
224            -1.0,
225        ); // Exact
226    }
227    // Degree-5 polynomial approximation of log2 generated by Sollya with:
228    // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
229
230    let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
231
232    const COEFFS: [u64; 5] = [
233        0x3ff71547652b8133,
234        0xbfe71547652d1e33,
235        0x3fdec70a098473de,
236        0xbfd7154c5ccdf121,
237        0x3fd2514fd90a130a,
238    ];
239    let v2 = v * v; // Exact
240    let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
241    let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
242    let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
243
244    let r = f_polyeval3(v2, c0, c1, c2);
245    r as f32
246}
247
248/// Natural logarithm using FMA
249///
250/// Max found ULP 0.4999996
251#[inline(always)]
252#[allow(dead_code)]
253pub(crate) fn f_log2fx(x: f32) -> f64 {
254    let mut x_u = x.to_bits();
255
256    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
257
258    let mut m = -(E_BIAS as i32);
259    if x_u == 0x3f80_0000u32 {
260        return 0.0;
261    }
262
263    if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
264        if x == 0.0 {
265            return f64::NEG_INFINITY;
266        }
267        if x_u == 0x80000000u32 {
268            return f64::NEG_INFINITY;
269        }
270        if x.is_sign_negative() && !x.is_nan() {
271            return f64::NAN + x as f64;
272        }
273        // x is +inf or nan
274        if x.is_nan() || x.is_infinite() {
275            return (x + x) as f64;
276        }
277        // Normalize denormal inputs.
278        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
279        m -= 23;
280    }
281
282    m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
283    let mant = x_u & 0x007F_FFFF;
284    let index = mant.wrapping_shr(16);
285
286    x_u = set_exponent_f32(x_u, 0x7F);
287
288    let v;
289    let u = f32::from_bits(x_u);
290
291    #[cfg(any(
292        all(
293            any(target_arch = "x86", target_arch = "x86_64"),
294            target_feature = "fma"
295        ),
296        all(target_arch = "aarch64", target_feature = "neon")
297    ))]
298    {
299        use crate::logs::logf::LOG_REDUCTION_F32;
300        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
301    }
302    #[cfg(not(any(
303        all(
304            any(target_arch = "x86", target_arch = "x86_64"),
305            target_feature = "fma"
306        ),
307        all(target_arch = "aarch64", target_feature = "neon")
308    )))]
309    {
310        use crate::logs::log2::LOG_RANGE_REDUCTION;
311        v = f_fmla(
312            u as f64,
313            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
314            -1.0,
315        ); // Exact
316    }
317    // Degree-5 polynomial approximation of log2 generated by Sollya with:
318    // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
319
320    let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
321
322    const COEFFS: [u64; 5] = [
323        0x3ff71547652b8133,
324        0xbfe71547652d1e33,
325        0x3fdec70a098473de,
326        0xbfd7154c5ccdf121,
327        0x3fd2514fd90a130a,
328    ];
329    let v2 = v * v; // Exact
330    let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
331    let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
332    let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
333
334    f_polyeval3(v2, c0, c1, c2)
335}
336
337/// Dirty log2 using FMA
338#[inline(always)]
339#[allow(dead_code)]
340pub(crate) fn dirty_log2f(d: f32) -> f32 {
341    let mut ix = d.to_bits();
342    /* reduce x into [sqrt(2)/2, sqrt(2)] */
343    ix = ix.wrapping_add(0x3f800000 - 0x3f3504f3);
344    let n = (ix >> 23) as i32 - 0x7f;
345    ix = (ix & 0x007fffff).wrapping_add(0x3f3504f3);
346    let a = f32::from_bits(ix);
347
348    let x = (a - 1.) / (a + 1.);
349
350    let x2 = x * x;
351    let mut u = 0.4121985850084821691e+0;
352    u = f_fmlaf(u, x2, 0.5770780163490337802e+0);
353    u = f_fmlaf(u, x2, 0.9617966939259845749e+0);
354    f_fmlaf(x2 * x, u, f_fmlaf(x, 0.2885390081777926802e+1, n as f32))
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_log2f() {
363        assert!((f_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
364        assert!((f_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
365        assert_eq!(f_log2f(0.), f32::NEG_INFINITY);
366        assert_eq!(f_log2f(1.0), 0.0);
367        assert!(f_log2f(-1.).is_nan());
368        assert!(f_log2f(f32::NAN).is_nan());
369        assert!(f_log2f(f32::NEG_INFINITY).is_nan());
370        assert_eq!(f_log2f(f32::INFINITY), f32::INFINITY);
371    }
372
373    #[test]
374    fn test_dirty_log2f() {
375        assert!((dirty_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
376        assert!((dirty_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
377    }
378}