pxfm/compound/
compound_m1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::double_double::DoubleDouble;
31use crate::dyadic_float::{DyadicFloat128, DyadicSign};
32use crate::logs::log1p_fast_dd;
33use crate::pow_exec::pow_expm1_1;
34
35/// Computes (1+x)^y - 1
36///
37/// max found ULP 0.56
38pub fn f_compound_m1(x: f64, y: f64) -> f64 {
39    /*
40       Rules from IEEE 754-2019 for compound (x, n) with n integer:
41           (a) compound (x, 0) is 1 for x >= -1 or quiet NaN
42           (b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
43           (c) compound (-1, n) is +0 for n > 0
44           (d) compound (+/-0, n) is 1
45           (e) compound (+Inf, n) is +Inf for n > 0
46           (f) compound (+Inf, n) is +0 for n < 0
47           (g) compound (x, n) is qNaN and signals the invalid exception for x < -1
48           (h) compound (qNaN, n) is qNaN for n <> 0.
49    */
50
51    let x_sign = x.is_sign_negative();
52    let y_sign = y.is_sign_negative();
53
54    let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
55    let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
56
57    const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
58
59    let y_mant = y.to_bits() & MANTISSA_MASK;
60    let x_u = x.to_bits();
61    let x_a = x_abs;
62    let y_a = y_abs;
63
64    // If x or y is signaling NaN
65    if x.is_nan() || y.is_nan() {
66        return f64::NAN;
67    }
68
69    let mut s = 1.0;
70
71    // The double precision number that is closest to 1 is (1 - 2^-53), which has
72    //   log2(1 - 2^-53) ~ -1.715...p-53.
73    // So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
74    //   |y * log2(x)| = 0 or > 1075.
75    // Hence, x^y will either overflow or underflow if x is not zero.
76    if y_mant == 0
77        || y_a > 0x43d7_4910_d52d_3052
78        || x_u == 1f64.to_bits()
79        || x_u >= f64::INFINITY.to_bits()
80        || x_u < f64::MIN.to_bits()
81    {
82        // Exceptional exponents.
83        if y == 0.0 {
84            return 0.0;
85        }
86
87        // (h) compound(qNaN, n) is qNaN for n ≠ 0
88        if x.is_nan() {
89            if y != 0. {
90                return x;
91            } // propagate qNaN
92            return 0.0;
93        }
94
95        // (d) compound(±0, n) is 1
96        if x == 0.0 {
97            return 0.0;
98        }
99
100        // (e, f) compound(+Inf, n)
101        if x.is_infinite() && x > 0.0 {
102            return if y > 0. { x } else { -1.0 };
103        }
104
105        // (g) compound(x, n) is qNaN and signals invalid for x < -1
106        if x < -1.0 {
107            // Optional: raise invalid explicitly
108            return f64::NAN;
109        }
110
111        // (b, c) compound(-1, n)
112        if x == -1.0 {
113            return if y < 0. { f64::INFINITY } else { -1.0 };
114        }
115
116        match y_a {
117            // 0x3fe0_0000_0000_0000 => {
118            //     if x == 0.0 {
119            //         return 0.0;
120            //     }
121            //     let z = Dekker::from_full_exact_add(x, 1.0).sqrt();
122            //     if y_sign {
123            //         const M_ONES: DyadicFloat128 = DyadicFloat128 {
124            //             sign: DyadicSign::Neg,
125            //             exponent: -127,
126            //             mantissa: 0x80000000_00000000_00000000_00000000_u128,
127            //         };
128            //         let z = DyadicFloat128::new_from_f64(z.to_f64());
129            //         (z.reciprocal() + M_ONES).fast_as_f64()
130            //     } else {
131            //         const M_ONES: DyadicFloat128 = DyadicFloat128 {
132            //             sign: DyadicSign::Neg,
133            //             exponent: -127,
134            //             mantissa: 0x80000000_00000000_00000000_00000000_u128,
135            //         };
136            //         let z = DyadicFloat128::new_from_f64(z.to_f64());
137            //         (z + M_ONES).fast_as_f64()
138            //     };
139            // }
140            0x3ff0_0000_0000_0000 => {
141                return if y_sign {
142                    let z = DyadicFloat128::new_from_f64(x);
143                    const ONES: DyadicFloat128 = DyadicFloat128 {
144                        sign: DyadicSign::Pos,
145                        exponent: -127,
146                        mantissa: 0x80000000_00000000_00000000_00000000_u128,
147                    };
148                    const M_ONES: DyadicFloat128 = DyadicFloat128 {
149                        sign: DyadicSign::Neg,
150                        exponent: -127,
151                        mantissa: 0x80000000_00000000_00000000_00000000_u128,
152                    };
153                    let p = (z + ONES).reciprocal() + M_ONES;
154                    p.fast_as_f64()
155                } else {
156                    x
157                };
158            }
159            0x4000_0000_0000_0000 => {
160                const ONES: DyadicFloat128 = DyadicFloat128 {
161                    sign: DyadicSign::Pos,
162                    exponent: -127,
163                    mantissa: 0x80000000_00000000_00000000_00000000_u128,
164                };
165                let z0 = DyadicFloat128::new_from_f64(x) + ONES;
166                let z = z0 * z0;
167                const M_ONES: DyadicFloat128 = DyadicFloat128 {
168                    sign: DyadicSign::Neg,
169                    exponent: -127,
170                    mantissa: 0x80000000_00000000_00000000_00000000_u128,
171                };
172                return if y_sign {
173                    (z.reciprocal() + M_ONES).fast_as_f64()
174                } else {
175                    f64::copysign((z + M_ONES).fast_as_f64(), x)
176                };
177            }
178            _ => {}
179        }
180
181        // |y| > |1075 / log2(1 - 2^-53)|.
182        if y_a >= 0x7ff0_0000_0000_0000 {
183            // y is inf or nan
184            if y_mant != 0 {
185                // y is NaN
186                // pow(1, NaN) = 1
187                // pow(x, NaN) = NaN
188                return if x_u == 1f64.to_bits() { 1.0 } else { y };
189            }
190
191            // Now y is +-Inf
192            if f64::from_bits(x_abs).is_nan() {
193                // pow(NaN, +-Inf) = NaN
194                return x;
195            }
196
197            if x_a == 0x3ff0_0000_0000_0000 {
198                // pow(+-1, +-Inf) = 1.0
199                return 0.0;
200            }
201
202            if x == 0.0 && y_sign {
203                // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
204                return f64::INFINITY;
205            }
206            // pow (|x| < 1, -inf) = +inf
207            // pow (|x| < 1, +inf) = 0.0
208            // pow (|x| > 1, -inf) = 0.0
209            // pow (|x| > 1, +inf) = +inf
210            return if (x_a < 1f64.to_bits()) == y_sign {
211                f64::INFINITY
212            } else {
213                -1.0
214            };
215        }
216
217        // y is finite and non-zero.
218
219        if x_u == 1f64.to_bits() {
220            // pow(1, y) = 1
221            return 0.0;
222        }
223
224        if x == 0.0 {
225            let out_is_neg = x_sign && is_odd_integer(y);
226            if y_sign {
227                // pow(0, negative number) = inf
228                return if out_is_neg {
229                    f64::NEG_INFINITY
230                } else {
231                    f64::INFINITY
232                };
233            }
234            // pow(0, positive number) = 0
235            return -1.0;
236        }
237
238        if x_a == f64::INFINITY.to_bits() {
239            let out_is_neg = x_sign && is_odd_integer(y);
240            if y_sign {
241                return if out_is_neg { -1.0 } else { 1.0 };
242            }
243            return if out_is_neg {
244                f64::NEG_INFINITY
245            } else {
246                f64::INFINITY
247            };
248        }
249
250        if x_a > f64::INFINITY.to_bits() {
251            // x is NaN.
252            // pow (aNaN, 0) is already taken care above.
253            return x;
254        }
255
256        // x is finite and negative, and y is a finite integer.
257        if x_sign {
258            if is_integer(y) {
259                if is_odd_integer(y) {
260                    // sign = -1.0;
261                    static CS: [f64; 2] = [1.0, -1.0];
262
263                    // set sign to 1 for y even, to -1 for y odd
264                    let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
265                        0usize
266                    } else {
267                        (y as i64 & 0x1) as usize
268                    };
269                    s = CS[y_parity];
270                }
271            } else {
272                // pow( negative, non-integer ) = NaN
273                return f64::NAN;
274            }
275        }
276
277        // y is finite and non-zero.
278
279        if x_u == 1f64.to_bits() {
280            // pow(1, y) = 1
281            return 0.0;
282        }
283
284        if x == 0.0 {
285            let out_is_neg = x_sign && is_odd_integer(y);
286            if y_sign {
287                // pow(0, negative number) = inf
288                return if out_is_neg {
289                    f64::NEG_INFINITY
290                } else {
291                    f64::INFINITY
292                };
293            }
294            // pow(0, positive number) = 0
295            return if out_is_neg { -0.0 } else { 0.0 };
296        }
297
298        if x_a == f64::INFINITY.to_bits() {
299            let out_is_neg = x_sign && is_odd_integer(y);
300            if y_sign {
301                return -1.;
302            }
303            return if out_is_neg {
304                f64::NEG_INFINITY
305            } else {
306                f64::INFINITY
307            };
308        }
309
310        if x_a > f64::INFINITY.to_bits() {
311            // x is NaN.
312            // pow (aNaN, 0) is already taken care above.
313            return x;
314        }
315    }
316
317    // evaluate (1+x)^y explicitly for integer y in [-1024,1024] range and |x|<2^64
318
319    #[cfg(any(
320        all(
321            any(target_arch = "x86", target_arch = "x86_64"),
322            target_feature = "fma"
323        ),
324        all(target_arch = "aarch64", target_feature = "neon")
325    ))]
326    let straight_path_precondition: bool = true;
327    #[cfg(not(any(
328        all(
329            any(target_arch = "x86", target_arch = "x86_64"),
330            target_feature = "fma"
331        ),
332        all(target_arch = "aarch64", target_feature = "neon")
333    )))]
334    let straight_path_precondition: bool = y.is_sign_positive();
335    // this is correct only for positive exponent number without FMA,
336    // otherwise reciprocal may overflow.
337    if is_integer(y)
338        && y_a <= 0x4059800000000000u64
339        && x_a <= 0x4090000000000000u64
340        && x_a > 0x3cc0_0000_0000_0000
341        && straight_path_precondition
342    {
343        let mut s = DoubleDouble::from_full_exact_add(1.0, x);
344        let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
345
346        // exponentiation by squaring: O(log(y)) complexity
347        let mut acc = if iter_count % 2 != 0 {
348            s
349        } else {
350            DoubleDouble::new(0., 1.)
351        };
352
353        while {
354            iter_count >>= 1;
355            iter_count
356        } != 0
357        {
358            s = DoubleDouble::mult(s, s);
359            if iter_count % 2 != 0 {
360                acc = DoubleDouble::mult(acc, s);
361            }
362        }
363
364        let mut dz = if y.is_sign_negative() {
365            acc.recip()
366        } else {
367            acc
368        };
369
370        dz = DoubleDouble::full_add_f64(dz, -1.);
371        let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
372        let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
373        if ub == lb {
374            return dz.to_f64();
375        }
376        return mul_fixed_power_hard(x, y);
377    }
378
379    // approximate log1p(x)
380    let l = log1p_fast_dd(x);
381
382    let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
383    if ey < 0x36 || ey >= 0x7f5 {
384        return 0.;
385    }
386
387    let r = DoubleDouble::quick_mult_f64(l, y);
388    let res = pow_expm1_1(r, s);
389
390    res.to_f64()
391}
392
393#[cold]
394#[inline(never)]
395fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
396    const ONE: DyadicFloat128 = DyadicFloat128 {
397        sign: DyadicSign::Pos,
398        exponent: -127,
399        mantissa: 0x80000000_00000000_00000000_00000000_u128,
400    };
401    const M_ONE: DyadicFloat128 = DyadicFloat128 {
402        sign: DyadicSign::Neg,
403        exponent: -127,
404        mantissa: 0x80000000_00000000_00000000_00000000_u128,
405    };
406    let mut s = DyadicFloat128::new_from_f64(x) + ONE;
407    let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
408
409    // exponentiation by squaring: O(log(y)) complexity
410    let mut acc = if iter_count % 2 != 0 { s } else { ONE };
411
412    while {
413        iter_count >>= 1;
414        iter_count
415    } != 0
416    {
417        s = s * s;
418        if iter_count % 2 != 0 {
419            acc = acc * s;
420        }
421    }
422
423    if y.is_sign_negative() {
424        (acc.reciprocal() + M_ONE).fast_as_f64()
425    } else {
426        (acc + M_ONE).fast_as_f64()
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433
434    #[test]
435    fn test_compound_exotic() {
436        assert_eq!(
437            f_compound_m1(0.000152587890625, -8.484374999999998),
438            -0.0012936766014690006
439        );
440        assert_eq!(
441            f_compound_m1(
442                0.00000000000000799360578102344,
443                -0.000000000000000000000001654361225106131
444            ),
445            -0.000000000000000000000000000000000000013224311452909338
446        );
447        assert_eq!(
448            f_compound_m1( 4.517647064592699, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000055329046628180653),
449            0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009449932890153435
450        );
451        assert_eq!(f_compound_m1(
452    11944758478933760000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
453                -1242262631503757300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
454            ), -1.);
455    }
456
457    #[test]
458    fn test_compound_m1() {
459        assert_eq!(
460            f_compound_m1(0.0000000000000009991998751296936, -4.),
461            -0.000000000000003996799500518764
462        );
463        assert_eq!(f_compound_m1(-0.003173828125, 25.), -0.0763960132649781);
464        assert_eq!(f_compound_m1(3., 2.8927001953125), 54.154259038961406);
465        assert_eq!(
466            f_compound_m1(-0.43750000000000044, 19.),
467            -0.9999821216263793
468        );
469        assert_eq!(
470            f_compound_m1(127712., -2.0000000000143525),
471            -0.9999999999386903
472        );
473        assert_eq!(
474            f_compound_m1(-0.11718749767214207, 2893226081485815000000000000000.),
475            -1.
476        );
477        assert_eq!(
478            f_compound_m1(2418441935074801400000000., 512.),
479            f64::INFINITY
480        );
481        assert_eq!(
482            f_compound_m1(32.50198364245834, 128000.00000000093),
483            f64::INFINITY
484        );
485        assert_eq!(
486            f_compound_m1(1.584716796877785, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004168916810703412),
487            0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003958869879428553
488        );
489        assert_eq!(
490            f_compound_m1(
491                -0.000000000000000000000000000000001997076793037533,
492                366577337071337140000000000000000f64
493            ),
494            -0.5190938261758579
495        );
496        assert_eq!(f_compound_m1(2.1075630259863374, 0.5), 00.7628281328553664);
497        assert_eq!(f_compound_m1(2.1078916412661783, 0.5), 0.7629213372315222);
498        assert_eq!(f_compound_m1(3.0000000000001115, -0.5), -0.500000000000007);
499        assert_eq!(
500            f_compound_m1(0.0004873839215895903, 3.),
501            0.0014628645098045245
502        );
503
504        assert_eq!(f_compound_m1(-0.483765364602732, 3.), -0.862424399516842);
505        assert_eq!(f_compound_m1(3.0000001192092896, -2.), -0.9375000037252902);
506        assert_eq!(f_compound_m1(29.38323424607434, -1.), -0.9670871115332561);
507
508        assert_eq!(f_compound_m1(-0.4375, 4.), -0.8998870849609375);
509        assert_eq!(
510            f_compound_m1(-0.0039033182037826464, 3.),
511            -0.011664306402886494
512        );
513        assert_eq!(
514            f_compound_m1(0.000000000000000000000000000000000000007715336350455947,
515                          -262034087537726030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
516            -1.,
517        );
518        assert_eq!(f_compound_m1(10.000000059604645, 10.), 25937426005.44638);
519        assert_eq!(f_compound_m1(10., -308.25471555814863), -1.0);
520        assert_eq!(
521            f_compound_m1(5.4172231599824623E-312, 9.4591068440831498E+164),
522            5.124209266851586e-147
523        );
524        assert_eq!(
525            f_compound_m1(5.8776567263633397E-39, 3.4223548116804511E-310),
526            0.0
527        );
528        assert_eq!(
529            f_compound_m1(5.8639503496997932E-148, -7.1936801558778956E+305),
530            0.0
531        );
532        assert_eq!(
533            f_compound_m1(0.9908447265624999,
534                          -19032028850336152000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
535            -1.
536        );
537        assert_eq!(
538            f_compound_m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006952247559980936,
539                          5069789834563405000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
540            3.524643400695958e-163
541        );
542        assert_eq!(
543            f_compound_m1(1.000000000000341,
544                          -69261261804788370000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
545            -1.
546        );
547        assert_eq!(
548            f_compound_m1(
549                0.0000000000000001053438024827798,
550                0.0000000000000001053438024827798
551            ),
552            0.000000000000000000000000000000011097316721530923
553        );
554        assert_eq!(
555            f_compound_m1(
556                0.00000000000000010755285551056508,
557                0.00000000000000010755285551056508
558            ),
559            0.00000000000000000000000000000001156761672847649
560        );
561
562        assert_eq!(f_compound_m1(2.4324324, 1.4324324), 4.850778380908823);
563        assert_eq!(f_compound_m1(2., 5.), 242.);
564        assert_eq!(f_compound_m1(0.4324324, 126.4324324), 5.40545942023447e19);
565        assert!(f_compound_m1(-0.4324324, 126.4324324).is_nan());
566        assert_eq!(f_compound_m1(0.0, 0.0), 0.0);
567        assert_eq!(f_compound_m1(0.0, -1. / 2.), 0.0);
568        assert_eq!(f_compound_m1(-1., -1. / 2.), f64::INFINITY);
569        assert_eq!(f_compound_m1(f64::INFINITY, -1. / 2.), -1.0);
570        assert_eq!(f_compound_m1(f64::INFINITY, 1. / 2.), f64::INFINITY);
571        assert_eq!(f_compound_m1(46.3828125, 46.3828125), 5.248159634773675e77);
572    }
573}