pxfm/
sincospi.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla, is_odd_integer};
30use crate::double_double::DoubleDouble;
31use crate::polyeval::{f_polyeval3, f_polyeval4};
32use crate::rounding::CpuRound;
33use crate::sin::SinCos;
34use crate::sincospi_tables::SINPI_K_PI_OVER_64;
35
36/**
37Cospi(x) on [0; 0.000244140625]
38
39Generated by Sollya:
40```text
41d = [0, 0.000244140625];
42f_cos = cos(y*pi);
43Q = fpminimax(f_cos, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
44```
45
46See ./notes/cospi_zero_dd.sollya
47**/
48#[cold]
49fn as_cospi_zero(x: f64) -> f64 {
50    const C: [(u64, u64); 5] = [
51        (0xbcb692b71366cc04, 0xc013bd3cc9be45de),
52        (0xbcb32b33fb803bd5, 0x40103c1f081b5ac4),
53        (0xbc9f5b752e98b088, 0xbff55d3c7e3cbff9),
54        (0x3c30023d540b9350, 0x3fce1f506446cb66),
55        (0x3c1a5d47937787d2, 0xbf8a9b062a36ba1c),
56    ];
57    let x2 = DoubleDouble::from_exact_mult(x, x);
58    let mut p = DoubleDouble::quick_mul_add(
59        x2,
60        DoubleDouble::from_bit_pair(C[3]),
61        DoubleDouble::from_bit_pair(C[3]),
62    );
63    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
64    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
65    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
66    p = DoubleDouble::mul_add_f64(x2, p, 1.);
67    p.to_f64()
68}
69
70/**
71Sinpi on range [0.0, 0.03515625]
72
73Generated poly by Sollya:
74```text
75d = [0, 0.03515625];
76
77f_sin = sin(y*pi)/y;
78Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
79```
80See ./notes/sinpi_zero_dd.sollya
81**/
82#[cold]
83fn as_sinpi_zero(x: f64) -> f64 {
84    const C: [(u64, u64); 6] = [
85        (0x3ca1a626311d9056, 0x400921fb54442d18),
86        (0x3cb055f12c462211, 0xc014abbce625be53),
87        (0xbc9789ea63534250, 0x400466bc6775aae1),
88        (0xbc78b86de6962184, 0xbfe32d2cce62874e),
89        (0x3c4eddf7cd887302, 0x3fb507833e2b781f),
90        (0x3bf180c9d4af2894, 0xbf7e2ea4e143707e),
91    ];
92    let x2 = DoubleDouble::from_exact_mult(x, x);
93    let mut p = DoubleDouble::quick_mul_add(
94        x2,
95        DoubleDouble::from_bit_pair(C[5]),
96        DoubleDouble::from_bit_pair(C[4]),
97    );
98    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
99    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
100    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
101    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
102    p = DoubleDouble::quick_mult_f64(p, x);
103    p.to_f64()
104}
105
106// Return k and y, where
107// k = round(x * 64 / pi) and y = (x * 64 / pi) - k.
108#[inline]
109pub(crate) fn reduce_pi_64(x: f64) -> (f64, i64) {
110    let kd = (x * 64.).cpu_round();
111    let y = dd_fmla(kd, -1. / 64., x);
112    (y, unsafe {
113        kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
114    })
115}
116
117#[inline]
118pub(crate) fn sincospi_eval(x: f64) -> SinCos {
119    let x2 = x * x;
120    /*
121        sinpi(pi*x) poly generated by Sollya:
122        d = [0, 0.0078128];
123        f_sin = sin(y*pi)/y;
124        Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|107, D...|], d, relative, floating);
125        See ./notes/sinpi.sollya
126    */
127    let sin_lop = f_polyeval3(
128        x2,
129        f64::from_bits(0xc014abbce625be4d),
130        f64::from_bits(0x400466bc6767f259),
131        f64::from_bits(0xbfe32d176b0b3baf),
132    ) * x2;
133    // We're splitting polynomial in two parts, since first term dominates
134    // we compute: (a0_lo + a0_hi) * x + x * (a1 * x^2 + a2 + x^4) ...
135    let sin_lo = dd_fmla(f64::from_bits(0x3ca1a5c04563817a), x, sin_lop * x);
136    let sin_hi = x * f64::from_bits(0x400921fb54442d18);
137
138    /*
139       cospi(pi*x) poly generated by Sollya:
140       d = [0, 0.015625];
141       f_cos = cos(y*pi);
142       Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, D...|], d, relative, floating);
143       See ./notes/cospi.sollya
144    */
145    let p = f_polyeval3(
146        x2,
147        f64::from_bits(0xc013bd3cc9be45cf),
148        f64::from_bits(0x40103c1f08085ad1),
149        f64::from_bits(0xbff55d1e43463fc3),
150    );
151
152    // We're splitting polynomial in two parts, since first term dominates
153    // we compute: (a0_lo + a0_hi) + (a1 * x^2 + a2 + x^4)...
154    let cos_lo = dd_fmla(p, x2, f64::from_bits(0xbbdf72adefec0800));
155    let cos_hi = f64::from_bits(0x3ff0000000000000);
156
157    let err = f_fmla(
158        x2,
159        f64::from_bits(0x3cb0000000000000), // 2^-52
160        f64::from_bits(0x3c40000000000000), // 2^-59
161    );
162    SinCos {
163        v_sin: DoubleDouble::from_exact_add(sin_hi, sin_lo),
164        v_cos: DoubleDouble::from_exact_add(cos_hi, cos_lo),
165        err,
166    }
167}
168
169#[inline]
170pub(crate) fn sincospi_eval_dd(x: f64) -> SinCos {
171    let x2 = DoubleDouble::from_exact_mult(x, x);
172    // Sin coeffs
173    // poly sin(pi*x) generated by Sollya:
174    // d = [0, 0.0078128];
175    // f_sin = sin(y*pi)/y;
176    // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating);
177    // see ./notes/sinpi_dd.sollya
178    const SC: [(u64, u64); 5] = [
179        (0x3ca1a626330ccf19, 0x400921fb54442d18),
180        (0x3cb05540f6323de9, 0xc014abbce625be53),
181        (0xbc9050fdd1229756, 0x400466bc6775aadf),
182        (0xbc780d406f3472e8, 0xbfe32d2cce5a7bf1),
183        (0x3c4cfcf8b6b817f2, 0x3fb5077069d8a182),
184    ];
185
186    let mut sin_y = DoubleDouble::quick_mul_add(
187        x2,
188        DoubleDouble::from_bit_pair(SC[4]),
189        DoubleDouble::from_bit_pair(SC[3]),
190    );
191    sin_y = DoubleDouble::quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[2]));
192    sin_y = DoubleDouble::quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[1]));
193    sin_y = DoubleDouble::quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[0]));
194    sin_y = DoubleDouble::quick_mult_f64(sin_y, x);
195
196    // Cos coeffs
197    // d = [0, 0.0078128];
198    // f_cos = cos(y*pi);
199    // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating);
200    // See ./notes/cospi_dd.sollya
201    const CC: [(u64, u64); 5] = [
202        (0xbaaa70a580000000, 0x3ff0000000000000),
203        (0xbcb69211d8dd1237, 0xc013bd3cc9be45de),
204        (0xbcbd96cfd637eeb7, 0x40103c1f081b5abf),
205        (0x3c994d75c577f029, 0xbff55d3c7e2e4ba5),
206        (0xbc5c542d998a4e48, 0x3fce1f2f5f747411),
207    ];
208
209    let mut cos_y = DoubleDouble::quick_mul_add(
210        x2,
211        DoubleDouble::from_bit_pair(CC[4]),
212        DoubleDouble::from_bit_pair(CC[3]),
213    );
214    cos_y = DoubleDouble::quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[2]));
215    cos_y = DoubleDouble::quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[1]));
216    cos_y = DoubleDouble::quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[0]));
217    SinCos {
218        v_sin: sin_y,
219        v_cos: cos_y,
220        err: 0.,
221    }
222}
223
224#[cold]
225fn sinpi_dd(x: f64, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
226    let r_sincos = sincospi_eval_dd(x);
227    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
228    let rr = DoubleDouble::mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
229    rr.to_f64()
230}
231
232#[cold]
233fn sincospi_dd(
234    x: f64,
235    sin_sin_k: DoubleDouble,
236    sin_cos_k: DoubleDouble,
237    cos_sin_k: DoubleDouble,
238    cos_cos_k: DoubleDouble,
239) -> (f64, f64) {
240    let r_sincos = sincospi_eval_dd(x);
241
242    let cos_k_sin_y = DoubleDouble::quick_mult(sin_cos_k, r_sincos.v_sin);
243    let rr_sin = DoubleDouble::mul_add(sin_sin_k, r_sincos.v_cos, cos_k_sin_y);
244
245    let cos_k_sin_y = DoubleDouble::quick_mult(cos_cos_k, r_sincos.v_sin);
246    let rr_cos = DoubleDouble::mul_add(cos_sin_k, r_sincos.v_cos, cos_k_sin_y);
247
248    (rr_sin.to_f64(), rr_cos.to_f64())
249}
250
251// [sincospi_eval] gives precision around 2^-66 what is not enough for DD case this method gives 2^-84
252#[inline]
253fn sincospi_eval_extended(x: f64) -> SinCos {
254    let x2 = DoubleDouble::from_exact_mult(x, x);
255    /*
256        sinpi(pi*x) poly generated by Sollya:
257        d = [0, 0.0078128];
258        f_sin = sin(y*pi)/y;
259        Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
260        See ./notes/sinpi.sollya
261    */
262    let sin_lop = f_polyeval3(
263        x2.hi,
264        f64::from_bits(0x400466bc67763662),
265        f64::from_bits(0xbfe32d2cce5aad86),
266        f64::from_bits(0x3fb5077099a1f35b),
267    );
268    let mut v_sin = DoubleDouble::mul_f64_add(
269        x2,
270        sin_lop,
271        DoubleDouble::from_bit_pair((0x3cb0553d6ee5e8ec, 0xc014abbce625be53)),
272    );
273    v_sin = DoubleDouble::mul_add(
274        x2,
275        v_sin,
276        DoubleDouble::from_bit_pair((0x3ca1a626330dd130, 0x400921fb54442d18)),
277    );
278    v_sin = DoubleDouble::quick_mult_f64(v_sin, x);
279
280    /*
281       cospi(pi*x) poly generated by Sollya:
282       d = [0, 0.015625];
283       f_cos = cos(y*pi);
284       Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
285       See ./notes/cospi_fast_dd.sollya
286    */
287    let p = f_polyeval3(
288        x2.hi,
289        f64::from_bits(0x40103c1f081b5abf),
290        f64::from_bits(0xbff55d3c7e2edd89),
291        f64::from_bits(0x3fce1f2fd9d79484),
292    );
293
294    let mut v_cos = DoubleDouble::mul_f64_add(
295        x2,
296        p,
297        DoubleDouble::from_bit_pair((0xbcb69236a9b3ed73, 0xc013bd3cc9be45de)),
298    );
299    v_cos = DoubleDouble::mul_add_f64(x2, v_cos, f64::from_bits(0x3ff0000000000000));
300
301    SinCos {
302        v_sin: DoubleDouble::from_exact_add(v_sin.hi, v_sin.lo),
303        v_cos: DoubleDouble::from_exact_add(v_cos.hi, v_cos.lo),
304        err: 0.,
305    }
306}
307
308pub(crate) fn f_fast_sinpi_dd(x: f64) -> DoubleDouble {
309    let ix = x.to_bits();
310    let ax = ix & 0x7fff_ffff_ffff_ffff;
311    if ax == 0 {
312        return DoubleDouble::new(0., 0.);
313    }
314    let e: i32 = (ax >> 52) as i32;
315    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
316    let sgn: i64 = (ix as i64) >> 63;
317    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
318    let mut s: i32 = 1063i32.wrapping_sub(e);
319    if s < 0 {
320        s = -s - 1;
321        if s > 10 {
322            return DoubleDouble::new(0., f64::copysign(0.0, x));
323        }
324        let iq: u64 = (m as u64).wrapping_shl(s as u32);
325        if (iq & 2047) == 0 {
326            return DoubleDouble::new(0., f64::copysign(0.0, x));
327        }
328    }
329
330    if ax <= 0x3fa2000000000000u64 {
331        // |x| <= 0.03515625
332        const PI: DoubleDouble = DoubleDouble::new(
333            f64::from_bits(0x3ca1a62633145c07),
334            f64::from_bits(0x400921fb54442d18),
335        );
336
337        if ax < 0x3c90000000000000 {
338            // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those
339            // of the function pi*x, and if x is a worst case, then 2*x is another
340            // one in the next binade. For this reason, worst cases are only included
341            // for the binade [2^-1022, 2^-1021). For larger binades,
342            // up to [2^-54,2^-53), worst cases should be deduced by multiplying
343            // by some power of 2.
344            if ax < 0x0350000000000000 {
345                let t = x * f64::from_bits(0x4690000000000000);
346                let z = DoubleDouble::quick_mult_f64(PI, t);
347                let r = z.to_f64();
348                let rs = r * f64::from_bits(0x3950000000000000);
349                let rt = rs * f64::from_bits(0x4690000000000000);
350                return DoubleDouble::new(
351                    0.,
352                    dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs),
353                );
354            }
355            let z = DoubleDouble::quick_mult_f64(PI, x);
356            return z;
357        }
358
359        /*
360           Poly generated by Sollya:
361           d = [0, 0.03515625];
362           f_sin = sin(y*pi)/y;
363           Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, 107, D...|], d, relative, floating);
364
365           See ./notes/sinpi_zero_fast_dd.sollya
366        */
367        const C: [u64; 4] = [
368            0xbfe32d2cce62bd85,
369            0x3fb50783487eb73d,
370            0xbf7e3074f120ad1f,
371            0x3f3e8d9011340e5a,
372        ];
373
374        let x2 = DoubleDouble::from_exact_mult(x, x);
375
376        const C_PI: DoubleDouble =
377            DoubleDouble::from_bit_pair((0x3ca1a626331457a4, 0x400921fb54442d18));
378
379        let p = f_polyeval4(
380            x2.hi,
381            f64::from_bits(C[0]),
382            f64::from_bits(C[1]),
383            f64::from_bits(C[2]),
384            f64::from_bits(C[3]),
385        );
386        let mut r = DoubleDouble::mul_f64_add(
387            x2,
388            p,
389            DoubleDouble::from_bit_pair((0xbc96dd7ae221e58c, 0x400466bc6775aae2)),
390        );
391        r = DoubleDouble::mul_add(
392            x2,
393            r,
394            DoubleDouble::from_bit_pair((0x3cb05511c8a6c478, 0xc014abbce625be53)),
395        );
396        r = DoubleDouble::mul_add(r, x2, C_PI);
397        r = DoubleDouble::quick_mult_f64(r, x);
398        let k = DoubleDouble::from_exact_add(r.hi, r.lo);
399        return k;
400    }
401
402    let si = e.wrapping_sub(1011);
403    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
404        // x is integer or half-integer
405        if (m0.wrapping_shl(si as u32)) == 0 {
406            return DoubleDouble::new(0., f64::copysign(0.0, x)); // x is integer
407        }
408        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
409        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
410        return DoubleDouble::new(
411            0.,
412            if t == 0 {
413                f64::copysign(1.0, x)
414            } else {
415                -f64::copysign(1.0, x)
416            },
417        );
418    }
419
420    let (y, k) = reduce_pi_64(x);
421
422    // // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
423    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
424    let cos_k = DoubleDouble::from_bit_pair(
425        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
426    );
427
428    let r_sincos = sincospi_eval_extended(y);
429
430    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
431    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
432
433    // sin_k_cos_y is always >> cos_k_sin_y
434    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
435    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
436    DoubleDouble::from_exact_add(rr.hi, rr.lo)
437}
438
439/// Computes sin(PI*x)
440///
441/// Max ULP 0.5
442pub fn f_sinpi(x: f64) -> f64 {
443    let ix = x.to_bits();
444    let ax = ix & 0x7fff_ffff_ffff_ffff;
445    if ax == 0 {
446        return x;
447    }
448    let e: i32 = (ax >> 52) as i32;
449    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
450    let sgn: i64 = (ix as i64) >> 63;
451    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
452    let mut s: i32 = 1063i32.wrapping_sub(e);
453    if s < 0 {
454        if e == 0x7ff {
455            if (ix << 12) == 0 {
456                return f64::NAN;
457            }
458            return x + x; // case x=NaN
459        }
460        s = -s - 1;
461        if s > 10 {
462            return f64::copysign(0.0, x);
463        }
464        let iq: u64 = (m as u64).wrapping_shl(s as u32);
465        if (iq & 2047) == 0 {
466            return f64::copysign(0.0, x);
467        }
468    }
469
470    if ax <= 0x3fa2000000000000u64 {
471        // |x| <= 0.03515625
472        const PI: DoubleDouble = DoubleDouble::new(
473            f64::from_bits(0x3ca1a62633145c07),
474            f64::from_bits(0x400921fb54442d18),
475        );
476
477        if ax < 0x3c90000000000000 {
478            // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those
479            // of the function pi*x, and if x is a worst case, then 2*x is another
480            // one in the next binade. For this reason, worst cases are only included
481            // for the binade [2^-1022, 2^-1021). For larger binades,
482            // up to [2^-54,2^-53), worst cases should be deduced by multiplying
483            // by some power of 2.
484            if ax < 0x0350000000000000 {
485                let t = x * f64::from_bits(0x4690000000000000);
486                let z = DoubleDouble::quick_mult_f64(PI, t);
487                let r = z.to_f64();
488                let rs = r * f64::from_bits(0x3950000000000000);
489                let rt = rs * f64::from_bits(0x4690000000000000);
490                return dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs);
491            }
492            let z = DoubleDouble::quick_mult_f64(PI, x);
493            return z.to_f64();
494        }
495
496        /*
497           Poly generated by Sollya:
498           d = [0, 0.03515625];
499           f_sin = sin(y*pi)/y;
500           Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
501
502           See ./notes/sinpi_zero.sollya
503        */
504
505        let x2 = x * x;
506        let x3 = x2 * x;
507        let x4 = x2 * x2;
508
509        let eps = x * f_fmla(
510            x2,
511            f64::from_bits(0x3d00000000000000), // 2^-47
512            f64::from_bits(0x3bd0000000000000), // 2^-66
513        );
514
515        const C: [u64; 4] = [
516            0xc014abbce625be51,
517            0x400466bc67754b46,
518            0xbfe32d2cc12a51f4,
519            0x3fb5060540058476,
520        ];
521
522        const C_PI: DoubleDouble =
523            DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
524
525        let mut z = DoubleDouble::quick_mult_f64(C_PI, x);
526
527        let zl0 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
528        let zl1 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
529
530        z.lo = f_fmla(x3, f_fmla(x4, zl1, zl0), z.lo);
531        let lb = z.hi + (z.lo - eps);
532        let ub = z.hi + (z.lo + eps);
533        if lb == ub {
534            return lb;
535        }
536        return as_sinpi_zero(x);
537    }
538
539    let si = e.wrapping_sub(1011);
540    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
541        // x is integer or half-integer
542        if (m0.wrapping_shl(si as u32)) == 0 {
543            return f64::copysign(0.0, x); // x is integer
544        }
545        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
546        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
547        return if t == 0 {
548            f64::copysign(1.0, x)
549        } else {
550            -f64::copysign(1.0, x)
551        };
552    }
553
554    let (y, k) = reduce_pi_64(x);
555
556    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
557    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
558    let cos_k = DoubleDouble::from_bit_pair(
559        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
560    );
561
562    let r_sincos = sincospi_eval(y);
563
564    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
565    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
566
567    // sin_k_cos_y is always >> cos_k_sin_y
568    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
569    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
570
571    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
572    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
573
574    if ub == lb {
575        return rr.to_f64();
576    }
577    sinpi_dd(y, sin_k, cos_k)
578}
579
580/// Computes cos(PI*x)
581///
582/// Max found ULP 0.5
583pub fn f_cospi(x: f64) -> f64 {
584    let ix = x.to_bits();
585    let ax = ix & 0x7fff_ffff_ffff_ffff;
586    if ax == 0 {
587        return 1.0;
588    }
589    let e: i32 = (ax >> 52) as i32;
590    // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022)
591    let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
592    let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s)
593    if s < 0 {
594        // |x| >= 2^41
595        if e == 0x7ff {
596            // NaN or Inf
597            if ix.wrapping_shl(12) == 0 {
598                return f64::NAN;
599            }
600            return x + x; // NaN
601        }
602        s = -s - 1; // now 2^(41+s) <= |x| < 2^(42+s)
603        if s > 11 {
604            return 1.0;
605        } // |x| >= 2^53
606        let iq: u64 = (m as u64).wrapping_shl(s as u32).wrapping_add(1024);
607        if (iq & 2047) == 0 {
608            return 0.0;
609        }
610    }
611    if ax <= 0x3f30000000000000u64 {
612        // |x| <= 2^-12, |x| <= 0.000244140625
613        if ax <= 0x3e2ccf6429be6621u64 {
614            return 1.0 - f64::from_bits(0x3c80000000000000);
615        }
616        let x2 = x * x;
617        let x4 = x2 * x2;
618        let eps = x2 * f64::from_bits(0x3cfa000000000000);
619
620        /*
621            Generated by Sollya:
622            d = [0, 0.000244140625];
623            f_cos = cos(y*pi);
624            Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
625
626            See ./notes/cospi.sollya
627        */
628
629        const C: [u64; 4] = [
630            0xc013bd3cc9be45de,
631            0x40103c1f081b5ac4,
632            0xbff55d3c7ff79b60,
633            0x3fd24c7b6f7d0690,
634        ];
635
636        let p0 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
637        let p1 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
638
639        let p = x2 * f_fmla(x4, p0, p1);
640        let lb = (p - eps) + 1.;
641        let ub = (p + eps) + 1.;
642        if lb == ub {
643            return lb;
644        }
645        return as_cospi_zero(x);
646    }
647
648    let si: i32 = e.wrapping_sub(1011);
649    if si >= 0 && ((m as u64).wrapping_shl(si as u32) ^ 0x8000000000000000u64) == 0 {
650        return 0.0;
651    }
652
653    let (y, k) = reduce_pi_64(x);
654
655    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
656    let msin_k = DoubleDouble::from_bit_pair(
657        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize],
658    );
659    let cos_k = DoubleDouble::from_bit_pair(
660        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
661    );
662
663    let r_sincos = sincospi_eval(y);
664
665    let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k);
666    let cos_k_msin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k);
667
668    // cos_k_cos_y is always >> cos_k_msin_y
669    let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
670    rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
671
672    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
673    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
674
675    if ub == lb {
676        return rr.to_f64();
677    }
678    sinpi_dd(y, cos_k, msin_k)
679}
680
681/// Computes sin(PI*x) and cos(PI*x)
682///
683/// Max found ULP 0.5
684pub fn f_sincospi(x: f64) -> (f64, f64) {
685    let ix = x.to_bits();
686    let ax = ix & 0x7fff_ffff_ffff_ffff;
687    if ax == 0 {
688        return (x, 1.0);
689    }
690    let e: i32 = (ax >> 52) as i32;
691    // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022)
692    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
693    let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
694    let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s)
695    if s < 0 {
696        // |x| >= 2^41
697        if e == 0x7ff {
698            // NaN or Inf
699            if ix.wrapping_shl(12) == 0 {
700                return (f64::NAN, f64::NAN);
701            }
702            return (x + x, x + x); // NaN
703        }
704        s = -s - 1;
705        if s > 10 {
706            static CF: [f64; 2] = [1., -1.];
707            let is_odd = is_odd_integer(f64::from_bits(ax));
708            let cos_x = CF[is_odd as usize];
709            return (f64::copysign(0.0, x), cos_x);
710        } // |x| >= 2^53
711        let iq: u64 = (m as u64).wrapping_shl(s as u32);
712
713        // sinpi = 0 when multiple of 2048
714        let sin_zero = (iq & 2047) == 0;
715
716        // cospi = 0 when offset-by-half multiple of 2048
717        let cos_zero = ((m as u64).wrapping_shl(s as u32).wrapping_add(1024) & 2047) == 0;
718
719        if sin_zero && cos_zero {
720            // both zero (only possible if NaN or something degenerate)
721        } else if sin_zero {
722            static CF: [f64; 2] = [1., -1.];
723            let is_odd = is_odd_integer(f64::from_bits(ax));
724            let cos_x = CF[is_odd as usize];
725            return (0.0, cos_x); // sin = 0, cos = ±1
726        } else if cos_zero {
727            // x = k / 2 * PI
728            let si = e.wrapping_sub(1011);
729            let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
730            // making sin decision based on quadrant
731            return if t == 0 {
732                (f64::copysign(1.0, x), 0.0)
733            } else {
734                (-f64::copysign(1.0, x), 0.0)
735            }; // sin = ±1, cos = 0
736        }
737    }
738
739    if ax <= 0x3f30000000000000u64 {
740        // |x| <= 2^-12, |x| <= 0.000244140625
741        if ax <= 0x3c90000000000000u64 {
742            const PI: DoubleDouble = DoubleDouble::new(
743                f64::from_bits(0x3ca1a62633145c07),
744                f64::from_bits(0x400921fb54442d18),
745            );
746            let sin_x = if ax < 0x0350000000000000 {
747                let t = x * f64::from_bits(0x4690000000000000);
748                let z = DoubleDouble::quick_mult_f64(PI, t);
749                let r = z.to_f64();
750                let rs = r * f64::from_bits(0x3950000000000000);
751                let rt = rs * f64::from_bits(0x4690000000000000);
752                dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs)
753            } else {
754                let z = DoubleDouble::quick_mult_f64(PI, x);
755                z.to_f64()
756            };
757            return (sin_x, 1.0 - f64::from_bits(0x3c80000000000000));
758        }
759        let x2 = x * x;
760        let x4 = x2 * x2;
761        let cos_eps = x2 * f64::from_bits(0x3cfa000000000000);
762
763        /*
764            Generated by Sollya:
765            d = [0, 0.000244140625];
766            f_cos = cos(y*pi);
767            Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
768
769            See ./notes/cospi.sollya
770        */
771
772        const COS_C: [u64; 4] = [
773            0xc013bd3cc9be45de,
774            0x40103c1f081b5ac4,
775            0xbff55d3c7ff79b60,
776            0x3fd24c7b6f7d0690,
777        ];
778
779        let p0 = f_fmla(x2, f64::from_bits(COS_C[3]), f64::from_bits(COS_C[2]));
780        let p1 = f_fmla(x2, f64::from_bits(COS_C[1]), f64::from_bits(COS_C[0]));
781
782        let p = x2 * f_fmla(x4, p0, p1);
783        let cos_lb = (p - cos_eps) + 1.;
784        let cos_ub = (p + cos_eps) + 1.;
785        let cos_x = if cos_lb == cos_ub {
786            cos_lb
787        } else {
788            as_cospi_zero(x)
789        };
790
791        /*
792            Poly generated by Sollya:
793            d = [0, 0.03515625];
794            f_sin = sin(y*pi)/y;
795            Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
796
797            See ./notes/sinpi_zero.sollya
798        */
799
800        const SIN_C: [u64; 4] = [
801            0xc014abbce625be51,
802            0x400466bc67754b46,
803            0xbfe32d2cc12a51f4,
804            0x3fb5060540058476,
805        ];
806
807        const C_PI: DoubleDouble =
808            DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
809
810        let mut z = DoubleDouble::quick_mult_f64(C_PI, x);
811
812        let x3 = x2 * x;
813
814        let zl0 = f_fmla(x2, f64::from_bits(SIN_C[1]), f64::from_bits(SIN_C[0]));
815        let zl1 = f_fmla(x2, f64::from_bits(SIN_C[3]), f64::from_bits(SIN_C[2]));
816
817        let sin_eps = x * f_fmla(
818            x2,
819            f64::from_bits(0x3d00000000000000), // 2^-47
820            f64::from_bits(0x3bd0000000000000), // 2^-66
821        );
822
823        z.lo = f_fmla(x3, f_fmla(x4, zl1, zl0), z.lo);
824        let sin_lb = z.hi + (z.lo - sin_eps);
825        let sin_ub = z.hi + (z.lo + sin_eps);
826        let sin_x = if sin_lb == sin_ub {
827            sin_lb
828        } else {
829            as_sinpi_zero(x)
830        };
831        return (sin_x, cos_x);
832    }
833
834    let si = e.wrapping_sub(1011);
835    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
836        // x is integer or half-integer
837        if (m0.wrapping_shl(si as u32)) == 0 {
838            static CF: [f64; 2] = [1., -1.];
839            let is_odd = is_odd_integer(f64::from_bits(ax));
840            let cos_x = CF[is_odd as usize];
841            return (f64::copysign(0.0, x), cos_x); // x is integer
842        }
843        // x is half-integer
844        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
845        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
846        return if t == 0 {
847            (f64::copysign(1.0, x), 0.0)
848        } else {
849            (-f64::copysign(1.0, x), 0.0)
850        };
851    }
852
853    let (y, k) = reduce_pi_64(x);
854
855    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
856    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
857    let cos_k = DoubleDouble::from_bit_pair(
858        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
859    );
860    let msin_k = -sin_k;
861
862    let r_sincos = sincospi_eval(y);
863
864    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
865    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
866
867    let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k);
868    let msin_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k);
869
870    // sin_k_cos_y is always >> cos_k_sin_y
871    let mut rr_sin = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
872    rr_sin.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
873
874    let sin_ub = rr_sin.hi + (rr_sin.lo + r_sincos.err); // (rr.lo + ERR);
875    let sin_lb = rr_sin.hi + (rr_sin.lo - r_sincos.err); // (rr.lo - ERR);
876
877    let mut rr_cos = DoubleDouble::from_exact_add(cos_k_cos_y.hi, msin_k_sin_y.hi);
878    rr_cos.lo += cos_k_cos_y.lo + msin_k_sin_y.lo;
879
880    let cos_ub = rr_cos.hi + (rr_cos.lo + r_sincos.err); // (rr.lo + ERR);
881    let cos_lb = rr_cos.hi + (rr_cos.lo - r_sincos.err); // (rr.lo - ERR);
882
883    if sin_ub == sin_lb && cos_lb == cos_ub {
884        return (rr_sin.to_f64(), rr_cos.to_f64());
885    }
886
887    sincospi_dd(y, sin_k, cos_k, cos_k, msin_k)
888}
889
890#[cfg(test)]
891mod tests {
892    use super::*;
893
894    #[test]
895    fn test_sinpi() {
896        assert_eq!(f_sinpi(262143.50006870925), -0.9999999767029883);
897        assert_eq!(f_sinpi(7124076477593855.), 0.);
898        assert_eq!(f_sinpi(-11235582092889474000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), -0.);
899        assert_eq!(f_sinpi(-2.7430620343968443e303), -0.0);
900        assert_eq!(f_sinpi(0.00003195557007273919), 0.00010039138401316004);
901        assert_eq!(f_sinpi(-0.038357843137253766), -0.12021328061499763);
902        assert_eq!(f_sinpi(1.0156097449358867), -0.04901980680173724);
903        assert_eq!(f_sinpi(74.8593852519989), 0.42752597787896457);
904        assert_eq!(f_sinpi(0.500091552734375), 0.9999999586369661);
905        assert_eq!(f_sinpi(0.5307886532952182), 0.9953257438106751);
906        assert_eq!(f_sinpi(3.1415926535897936), -0.43030121700009316);
907        assert_eq!(f_sinpi(-0.5305172747685276), -0.9954077178320563);
908        assert_eq!(f_sinpi(-0.03723630312089732), -0.1167146713267927);
909        assert_eq!(
910            f_sinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022946074000077123),
911            0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007208721750737005
912        );
913        assert_eq!(
914            f_sinpi(0.000000000000000000000000000000000000007413093439574428),
915            2.3288919890141717e-38
916        );
917        assert_eq!(f_sinpi(0.0031909299901270445), 0.0100244343161398578);
918        assert_eq!(f_sinpi(0.11909245901270445), 0.36547215190661003);
919        assert_eq!(f_sinpi(0.99909245901270445), 0.0028511202357662186);
920        assert!(f_sinpi(f64::INFINITY).is_nan());
921        assert!(f_sinpi(f64::NEG_INFINITY).is_nan());
922        assert!(f_sinpi(f64::NAN).is_nan());
923    }
924
925    #[test]
926    fn test_sincospi() {
927        let v0 = f_sincospi(1.0156097449358867);
928        assert_eq!(v0.0, f_sinpi(1.0156097449358867));
929        assert_eq!(v0.1, f_cospi(1.0156097449358867));
930
931        let v1 = f_sincospi(4503599627370496.);
932        assert_eq!(v1.0, f_sinpi(4503599627370496.));
933        assert_eq!(v1.1, f_cospi(4503599627370496.));
934
935        let v1 = f_sincospi(-108.);
936        assert_eq!(v1.0, f_sinpi(-108.));
937        assert_eq!(v1.1, f_cospi(-108.));
938
939        let v1 = f_sincospi(3.);
940        assert_eq!(v1.0, f_sinpi(3.));
941        assert_eq!(v1.1, f_cospi(3.));
942
943        let v1 = f_sincospi(13.5);
944        assert_eq!(v1.0, f_sinpi(13.5));
945        assert_eq!(v1.1, f_cospi(13.5));
946
947        let v1 = f_sincospi(7124076477593855.);
948        assert_eq!(v1.0, f_sinpi(7124076477593855.));
949        assert_eq!(v1.1, f_cospi(7124076477593855.));
950
951        let v1 = f_sincospi(2533419148247186.5);
952        assert_eq!(v1.0, f_sinpi(2533419148247186.5));
953        assert_eq!(v1.1, f_cospi(2533419148247186.5));
954
955        let v1 = f_sincospi(2.2250653705240375E-308);
956        assert_eq!(v1.0, f_sinpi(2.2250653705240375E-308));
957        assert_eq!(v1.1, f_cospi(2.2250653705240375E-308));
958
959        let v1 = f_sincospi(2533420818956351.);
960        assert_eq!(v1.0, f_sinpi(2533420818956351.));
961        assert_eq!(v1.1, f_cospi(2533420818956351.));
962
963        let v1 = f_sincospi(2533822406803233.5);
964        assert_eq!(v1.0, f_sinpi(2533822406803233.5));
965        assert_eq!(v1.1, f_cospi(2533822406803233.5));
966
967        let v1 = f_sincospi(-3040685725640478.5);
968        assert_eq!(v1.0, f_sinpi(-3040685725640478.5));
969        assert_eq!(v1.1, f_cospi(-3040685725640478.5));
970
971        let v1 = f_sincospi(2533419148247186.5);
972        assert_eq!(v1.0, f_sinpi(2533419148247186.5));
973        assert_eq!(v1.1, f_cospi(2533419148247186.5));
974
975        let v1 = f_sincospi(2533420819267583.5);
976        assert_eq!(v1.0, f_sinpi(2533420819267583.5));
977        assert_eq!(v1.1, f_cospi(2533420819267583.5));
978
979        let v1 = f_sincospi(6979704728846336.);
980        assert_eq!(v1.0, f_sinpi(6979704728846336.));
981        assert_eq!(v1.1, f_cospi(6979704728846336.));
982
983        let v1 = f_sincospi(7124076477593855.);
984        assert_eq!(v1.0, f_sinpi(7124076477593855.));
985        assert_eq!(v1.1, f_cospi(7124076477593855.));
986
987        let v1 = f_sincospi(-0.00000000002728839192371484);
988        assert_eq!(v1.0, f_sinpi(-0.00000000002728839192371484));
989        assert_eq!(v1.1, f_cospi(-0.00000000002728839192371484));
990
991        let v1 = f_sincospi(0.00002465398569495569);
992        assert_eq!(v1.0, f_sinpi(0.00002465398569495569));
993        assert_eq!(v1.1, f_cospi(0.00002465398569495569));
994    }
995
996    #[test]
997    fn test_cospi() {
998        assert_eq!(0.9999497540959953, f_cospi(0.0031909299901270445));
999        assert_eq!(0.9308216542079669, f_cospi(0.11909299901270445));
1000        assert_eq!(-0.1536194873288318, f_cospi(0.54909299901270445));
1001        assert!(f_cospi(f64::INFINITY).is_nan());
1002        assert!(f_cospi(f64::NEG_INFINITY).is_nan());
1003        assert!(f_cospi(f64::NAN).is_nan());
1004    }
1005}
pxfm/sincospi.rs

pxfm/
sincospi.rs