pxfm/
sincpi.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::polyeval::f_estrin_polyeval5;
32use crate::sincospi::reduce_pi_64;
33use crate::sincospi_tables::SINPI_K_PI_OVER_64;
34
35/**
36Sinpi on range [0.0, 0.03515625]
37
38Generated poly by Sollya:
39```text
40d = [0, 0.03515625];
41f_sincpi = sin(y*pi)/(y*pi);
42Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10, 12|], [|107...|], d, relative, floating);
43```
44See ./notes/sincpi_at_zero_dd.sollya
45**/
46#[cold]
47fn as_sincpi_zero(x: f64) -> f64 {
48    const C: [(u64, u64); 7] = [
49        (0xb9d3080000000000, 0x3ff0000000000000),
50        (0xbc81873d86314302, 0xbffa51a6625307d3),
51        (0x3c84871b4ffeefae, 0x3fe9f9cb402bc46c),
52        (0xbc5562d6ae037010, 0xbfc86a8e4720db66),
53        (0xbc386c93f4549bac, 0x3f9ac6805cf31ffd),
54        (0x3c0dbda368edfa40, 0xbf633816a3399d4e),
55        (0xbbcf22ccc18f27a9, 0x3f23736e6a59edd9),
56    ];
57    let x2 = DoubleDouble::from_exact_mult(x, x);
58    let mut p = DoubleDouble::quick_mul_add(
59        x2,
60        DoubleDouble::from_bit_pair(C[6]),
61        DoubleDouble::from_bit_pair(C[5]),
62    );
63    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
64    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
65    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
66    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
67    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
68    p.to_f64()
69}
70
71/// Computes sin(PI\*x)/(PI\*x)
72///
73/// Produces normalized sinc.
74///
75/// Max ULP 0.5
76pub fn f_sincpi(x: f64) -> f64 {
77    let ix = x.to_bits();
78    let ax = ix & 0x7fff_ffff_ffff_ffff;
79    if ax == 0 {
80        return 1.;
81    }
82    let e: i32 = (ax >> 52) as i32;
83    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
84    let sgn: i64 = (ix as i64) >> 63;
85    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
86    let mut s: i32 = 1063i32.wrapping_sub(e);
87    if s < 0 {
88        if e == 0x7ff {
89            if (ix << 12) == 0 {
90                return f64::NAN;
91            }
92            return x + x; // case x=NaN
93        }
94        s = -s - 1;
95        if s > 10 {
96            return f64::copysign(0.0, x);
97        }
98        let iq: u64 = (m as u64).wrapping_shl(s as u32);
99        if (iq & 2047) == 0 {
100            return f64::copysign(0.0, x);
101        }
102    }
103
104    if ax <= 0x3fa2000000000000u64 {
105        // |x| <= 0.03515625
106
107        if ax < 0x3c90000000000000u64 {
108            // |x| < f64::EPSILON
109            if ax <= 0x3b05798ee2308c3au64 {
110                // |x| <= 2.2204460492503131e-24
111                return 1.;
112            }
113            // Small values approximated with Taylor poly
114            // sincpi(x) ~ 1 - x^2*Pi^2/6 + O(x^4)
115            #[cfg(any(
116                all(
117                    any(target_arch = "x86", target_arch = "x86_64"),
118                    target_feature = "fma"
119                ),
120                all(target_arch = "aarch64", target_feature = "neon")
121            ))]
122            {
123                const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
124                let p = f_fmla(x, M_SQR_PI_OVER_6 * x, 1.);
125                return p;
126            }
127            #[cfg(not(any(
128                all(
129                    any(target_arch = "x86", target_arch = "x86_64"),
130                    target_feature = "fma"
131                ),
132                all(target_arch = "aarch64", target_feature = "neon")
133            )))]
134            {
135                use crate::common::min_normal_f64;
136                return 1. - min_normal_f64();
137            }
138        }
139
140        // Poly generated by Sollya:
141        // d = [0, 0.03515625];
142        // f_sincpi = sin(y*pi)/(y*pi);
143        // Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
144        // See ./notes/sincpi_at_zero.sollya
145
146        let x2 = x * x;
147
148        let eps = x * f_fmla(
149            x2,
150            f64::from_bits(0x3d00000000000000), // 2^-47
151            f64::from_bits(0x3bd0000000000000), // 2^-66
152        );
153
154        const C: [u64; 5] = [
155            0xbffa51a6625307d3,
156            0x3fe9f9cb402bbeaa,
157            0xbfc86a8e466bbb5b,
158            0x3f9ac66d887e2f38,
159            0xbf628473a38d289a,
160        ];
161
162        const F: DoubleDouble =
163            DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
164
165        let p = f_estrin_polyeval5(
166            x2,
167            f64::from_bits(C[0]),
168            f64::from_bits(C[1]),
169            f64::from_bits(C[2]),
170            f64::from_bits(C[3]),
171            f64::from_bits(C[4]),
172        );
173        let v = DoubleDouble::from_exact_mult(p, x2);
174        let z = DoubleDouble::add(F, v);
175
176        let lb = z.hi + (z.lo - eps);
177        let ub = z.hi + (z.lo + eps);
178        if lb == ub {
179            return lb;
180        }
181        return as_sincpi_zero(x);
182    }
183
184    let si = e.wrapping_sub(1011);
185    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
186        // x is integer or half-integer
187        if (m0.wrapping_shl(si as u32)) == 0 {
188            return f64::copysign(0.0, x); // x is integer
189        }
190        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
191        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
192        #[cfg(any(
193            all(
194                any(target_arch = "x86", target_arch = "x86_64"),
195                target_feature = "fma"
196            ),
197            all(target_arch = "aarch64", target_feature = "neon")
198        ))]
199        {
200            let num = if t == 0 {
201                f64::copysign(1.0, x)
202            } else {
203                -f64::copysign(1.0, x)
204            };
205            const PI: DoubleDouble =
206                DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
207            let r = DoubleDouble::quick_mult_f64(PI, x);
208            let v = DoubleDouble::from_f64_div_dd(num, r);
209            return v.to_f64();
210        }
211
212        #[cfg(not(any(
213            all(
214                any(target_arch = "x86", target_arch = "x86_64"),
215                target_feature = "fma"
216            ),
217            all(target_arch = "aarch64", target_feature = "neon")
218        )))]
219        {
220            use crate::double_double::two_product_compatible;
221            if two_product_compatible(x) {
222                let num = if t == 0 {
223                    f64::copysign(1.0, x)
224                } else {
225                    -f64::copysign(1.0, x)
226                };
227                const PI: DoubleDouble =
228                    DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
229                let r = DoubleDouble::quick_mult_f64(PI, x);
230                let v = DoubleDouble::from_f64_div_dd(num, r);
231                return v.to_f64();
232            } else {
233                use crate::dyadic_float::{DyadicFloat128, DyadicSign};
234                let num = DyadicFloat128::new_from_f64(if t == 0 {
235                    f64::copysign(1.0, x)
236                } else {
237                    -f64::copysign(1.0, x)
238                });
239                const PI: DyadicFloat128 = DyadicFloat128 {
240                    sign: DyadicSign::Pos,
241                    exponent: -126,
242                    mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
243                };
244                let dx = DyadicFloat128::new_from_f64(x);
245                let r = (PI * dx).reciprocal();
246                return (num * r).fast_as_f64();
247            }
248        }
249    }
250
251    let (y, k) = reduce_pi_64(x);
252
253    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
254    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
255    let cos_k = DoubleDouble::from_bit_pair(
256        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
257    );
258
259    let r_sincos = crate::sincospi::sincospi_eval(y);
260
261    const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
262    let scale = DoubleDouble::quick_mult_f64(PI, x);
263
264    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
265    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
266
267    // sin_k_cos_y is always >> cos_k_sin_y
268    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
269    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
270    rr = DoubleDouble::div(rr, scale);
271
272    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
273    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
274
275    if ub == lb {
276        return rr.to_f64();
277    }
278    sincpi_dd(y, sin_k, cos_k, scale)
279}
280
281#[cold]
282fn sincpi_dd(x: f64, sin_k: DoubleDouble, cos_k: DoubleDouble, scale: DoubleDouble) -> f64 {
283    let r_sincos = crate::sincospi::sincospi_eval_dd(x);
284    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
285    let mut rr = DoubleDouble::mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
286    rr = DoubleDouble::div(rr, scale);
287    rr.to_f64()
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_sincpi_zero() {
296        assert_eq!(f_sincpi(2.2204460492503131e-24), 1.0);
297        assert_eq!(f_sincpi(f64::EPSILON), 1.0);
298        assert_eq!(f_sincpi(0.007080019335262543), 0.9999175469662566);
299        assert_eq!(f_sincpi(0.05468860710998057), 0.9950875152844803);
300        assert_eq!(f_sincpi(0.5231231231), 0.6068750737806441);
301        assert_eq!(f_sincpi(1.), 0.);
302        assert_eq!(f_sincpi(-1.), 0.);
303        assert_eq!(f_sincpi(-2.), 0.);
304        assert_eq!(f_sincpi(-3.), 0.);
305        assert!(f_sincpi(f64::INFINITY).is_nan());
306        assert!(f_sincpi(f64::NEG_INFINITY).is_nan());
307        assert!(f_sincpi(f64::NAN).is_nan());
308    }
309}