pxfm/cube_roots/
cbrt.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::cube_roots::cbrtf::halley_refine_d;
31use crate::double_double::DoubleDouble;
32use crate::exponents::fast_ldexp;
33use crate::polyeval::f_polyeval4;
34
35/// Computes cube root
36///
37/// Max found ULP 0.5
38pub fn f_cbrt(x: f64) -> f64 {
39    // 1; 2^{1/3}; 2^{2/3}
40    static ESCALE: [f64; 3] = [
41        1.0,
42        f64::from_bits(0x3ff428a2f98d728b),
43        f64::from_bits(0x3ff965fea53d6e3d),
44    ];
45
46    let bits = x.to_bits();
47    let mut exp = ((bits >> 52) & 0x7ff) as i32;
48    let mut mant = bits & ((1u64 << 52) - 1);
49
50    if exp == 0x7ff || x == 0.0 {
51        return x + x;
52    }
53
54    // Normalize subnormal
55    if exp == 0 && x != 0.0 {
56        let norm = x * f64::from_bits(0x4350000000000000); // * 2^54
57        let norm_bits = norm.to_bits();
58        mant = norm_bits & ((1u64 << 52) - 1);
59        exp = ((norm_bits >> 52) & 0x7ff) as i32 - 54;
60    }
61
62    exp -= 1023;
63
64    mant |= 0x3ff << 52;
65    let m = f64::from_bits(mant);
66
67    // Polynomial for x^(1/3) on [1.0; 2.0]
68    // Generated by Sollya:
69    // d = [1.0, 2.0];
70    // f_cbrt = x^(1/3);
71    // Q = fpminimax(f_cbrt, 4, [|D...|], d, relative, floating);
72    // See ./notes/cbrt.sollya
73
74    let p = f_polyeval4(
75        m,
76        f64::from_bits(0x3fe1b0babceeaafa),
77        f64::from_bits(0x3fe2c9a3e8e06a3c),
78        f64::from_bits(0xbfc4dc30afb71885),
79        f64::from_bits(0x3f97a8d3e05458e4),
80    );
81
82    // split exponent e = 3*q + r with r in {0,1,2}
83    // use div_euclid/rem_euclid to get r >= 0
84    let q = exp.div_euclid(3);
85    let rem_scale = exp.rem_euclid(3);
86
87    let z = p * ESCALE[rem_scale as usize];
88
89    let mm = fast_ldexp(m, rem_scale); // bring mantissa into [1;8]
90
91    let r = 1.0 / mm;
92
93    // One Halley's method step
94    // then refine in partial double-double precision with Newton-Raphson iteration
95    let y0 = halley_refine_d(z, mm);
96    let d2y = DoubleDouble::from_exact_mult(y0, y0);
97    let d3y = DoubleDouble::quick_mult_f64(d2y, y0);
98    // Newton-Raphson step
99    // h = (x^3 - a) * r
100    // y1 = y0 - 1/3 * h * y0
101    let h = ((d3y.hi - mm) + d3y.lo) * r;
102    // y1 = y0 - 1/3*y0*(h.lo + h.hi) = y0 - 1/3 *y0*h.lo - 1/3 * y0 * h.hi
103    let y = f_fmla(-f64::from_bits(0x3fd5555555555555), y0 * h, y0);
104
105    f64::copysign(fast_ldexp(y, q), x)
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn test_cbrt() {
114        assert_eq!(f_cbrt(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005432309223745),
115                   0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017579026781511548);
116        assert_eq!(f_cbrt(1.225158611559834), 1.0700336588124544);
117        assert_eq!(f_cbrt(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000139491540182158), 1.1173329935611586e-103);
118        assert_eq!(f_cbrt(27.0), 3.0);
119        assert_eq!(f_cbrt(64.0), 4.0);
120        assert_eq!(f_cbrt(125.0), 5.0);
121        assert_eq!(f_cbrt(216.0), 6.0);
122        assert_eq!(f_cbrt(343.0), 7.0);
123        assert_eq!(f_cbrt(512.0), 8.0);
124        assert_eq!(f_cbrt(729.0), 9.0);
125        assert_eq!(f_cbrt(-729.0), -9.0);
126        assert_eq!(f_cbrt(-512.0), -8.0);
127        assert_eq!(f_cbrt(-343.0), -7.0);
128        assert_eq!(f_cbrt(-216.0), -6.0);
129        assert_eq!(f_cbrt(-125.0), -5.0);
130        assert_eq!(f_cbrt(-64.0), -4.0);
131        assert_eq!(f_cbrt(-27.0), -3.0);
132        assert_eq!(f_cbrt(0.0), 0.0);
133        assert_eq!(f_cbrt(f64::INFINITY), f64::INFINITY);
134        assert_eq!(f_cbrt(f64::NEG_INFINITY), f64::NEG_INFINITY);
135        assert!(f_cbrt(f64::NAN).is_nan());
136    }
137}