pxfm/
double_double.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::get_exponent_f64;
30#[allow(unused_imports)]
31use crate::common::*;
32use std::ops::{Mul, Neg};
33// https://hal.science/hal-01351529v3/document
34
35#[derive(Copy, Clone, Default, Debug)]
36pub(crate) struct DoubleDouble {
37    pub(crate) lo: f64,
38    pub(crate) hi: f64,
39}
40
41impl Neg for DoubleDouble {
42    type Output = Self;
43
44    #[inline]
45    fn neg(self) -> Self::Output {
46        Self {
47            hi: -self.hi,
48            lo: -self.lo,
49        }
50    }
51}
52
53impl DoubleDouble {
54    #[inline]
55    pub(crate) const fn from_bit_pair(pair: (u64, u64)) -> Self {
56        Self {
57            lo: f64::from_bits(pair.0),
58            hi: f64::from_bits(pair.1),
59        }
60    }
61
62    #[inline]
63    pub(crate) const fn new(lo: f64, hi: f64) -> Self {
64        DoubleDouble { lo, hi }
65    }
66
67    // Non FMA helper
68    #[allow(dead_code)]
69    #[inline]
70    pub(crate) const fn split(a: f64) -> DoubleDouble {
71        // CN = 2^N.
72        const CN: f64 = (1 << 27) as f64;
73        const C: f64 = CN + 1.0;
74        let t1 = C * a;
75        let t2 = a - t1;
76        let r_hi = t1 + t2;
77        let r_lo = a - r_hi;
78        DoubleDouble::new(r_lo, r_hi)
79    }
80
81    // Non FMA helper
82    #[allow(dead_code)]
83    #[inline]
84    fn from_exact_mult_impl_non_fma(asz: DoubleDouble, a: f64, b: f64) -> Self {
85        let bs = DoubleDouble::split(b);
86
87        let r_hi = a * b;
88        let t1 = asz.hi * bs.hi - r_hi;
89        let t2 = asz.hi * bs.lo + t1;
90        let t3 = asz.lo * bs.hi + t2;
91        let r_lo = asz.lo * bs.lo + t3;
92        DoubleDouble::new(r_lo, r_hi)
93    }
94
95    // valid only for |a| > b
96    #[inline]
97    pub(crate) const fn from_exact_add(a: f64, b: f64) -> DoubleDouble {
98        let r_hi = a + b;
99        let t = r_hi - a;
100        let r_lo = b - t;
101        DoubleDouble::new(r_lo, r_hi)
102    }
103
104    // valid only for |a| > b
105    #[inline]
106    pub(crate) const fn from_exact_sub(a: f64, b: f64) -> DoubleDouble {
107        let r_hi = a - b;
108        let t = a - r_hi;
109        let r_lo = t - b;
110        DoubleDouble::new(r_lo, r_hi)
111    }
112
113    #[inline]
114    pub(crate) const fn from_full_exact_add(a: f64, b: f64) -> DoubleDouble {
115        let r_hi = a + b;
116        let t1 = r_hi - a;
117        let t2 = r_hi - t1;
118        let t3 = b - t1;
119        let t4 = a - t2;
120        let r_lo = t3 + t4;
121        DoubleDouble::new(r_lo, r_hi)
122    }
123
124    #[allow(unused)]
125    #[inline]
126    pub(crate) fn dd_f64_mul_add(a: f64, b: f64, c: f64) -> f64 {
127        let ddx2 = DoubleDouble::from_exact_mult(a, b);
128        let zv = DoubleDouble::full_add_f64(ddx2, c);
129        zv.to_f64()
130    }
131
132    #[inline]
133    pub(crate) const fn from_full_exact_sub(a: f64, b: f64) -> Self {
134        let r_hi = a - b;
135        let t1 = r_hi - a;
136        let t2 = r_hi - t1;
137        let t3 = -b - t1;
138        let t4 = a - t2;
139        let r_lo = t3 + t4;
140        DoubleDouble::new(r_lo, r_hi)
141    }
142
143    #[inline]
144    pub(crate) fn add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
145        let s = a.hi + b.hi;
146        let d = s - a.hi;
147        let l = ((b.hi - d) + (a.hi + (d - s))) + (a.lo + b.lo);
148        DoubleDouble::new(l, s)
149    }
150
151    #[inline]
152    pub(crate) fn quick_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
153        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
154        let v = a.lo + b.lo;
155        let w = sl + v;
156        DoubleDouble::from_exact_add(sh, w)
157    }
158
159    #[inline]
160    pub(crate) fn quick_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
161        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_sub(a.hi, b.hi);
162        let v = a.lo - b.lo;
163        let w = sl + v;
164        DoubleDouble::from_exact_add(sh, w)
165    }
166
167    #[inline]
168    pub(crate) fn full_dd_add(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
169        let DoubleDouble { hi: sh, lo: sl } = DoubleDouble::from_full_exact_add(a.hi, b.hi);
170        let DoubleDouble { hi: th, lo: tl } = DoubleDouble::from_full_exact_add(a.lo, b.lo);
171        let c = sl + th;
172        let v = DoubleDouble::from_exact_add(sh, c);
173        let w = tl + v.lo;
174        DoubleDouble::from_exact_add(v.hi, w)
175    }
176
177    #[inline]
178    pub(crate) fn full_dd_sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
179        DoubleDouble::full_dd_add(a, -b)
180    }
181
182    #[inline]
183    pub(crate) fn sub(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
184        let s = a.hi - b.hi;
185        let d = s - a.hi;
186        let l = ((-b.hi - d) + (a.hi + (d - s))) + (a.lo - b.lo);
187        DoubleDouble::new(l, s)
188    }
189
190    /// DoubleDouble-style square root for a double-double number
191    #[inline]
192    pub(crate) fn sqrt(self) -> DoubleDouble {
193        let a = self.hi + self.lo;
194
195        if a == 0.0 {
196            return DoubleDouble { hi: 0.0, lo: 0.0 };
197        }
198        if a < 0.0 || a.is_nan() {
199            return DoubleDouble {
200                hi: f64::NAN,
201                lo: 0.0,
202            };
203        }
204        if a.is_infinite() {
205            return DoubleDouble {
206                hi: f64::INFINITY,
207                lo: 0.0,
208            };
209        }
210
211        let x = a.sqrt();
212
213        let x2 = DoubleDouble::from_exact_mult(x, x);
214
215        // Residual = self - x²
216        let mut r = self.hi - x2.hi;
217        r += self.lo;
218        r -= x2.lo;
219
220        let dx = r / (2.0 * x);
221        let hi = x + dx;
222        let lo = (x - hi) + dx;
223
224        DoubleDouble { hi, lo }
225    }
226
227    /// DoubleDouble-style square root for a double-double number
228    #[inline]
229    pub(crate) fn fast_sqrt(self) -> DoubleDouble {
230        let a = self.hi + self.lo;
231        let x = a.sqrt();
232
233        let x2 = DoubleDouble::from_exact_mult(x, x);
234
235        // Residual = self - x²
236        let mut r = self.hi - x2.hi;
237        r += self.lo;
238        r -= x2.lo;
239
240        let dx = r / (2.0 * x);
241        let hi = x + dx;
242        let lo = (x - hi) + dx;
243
244        DoubleDouble { hi, lo }
245    }
246
247    /// `a*b+c`
248    ///
249    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
250    #[inline]
251    pub(crate) fn mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
252        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
253        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
254        DoubleDouble::new(r + q, p)
255    }
256
257    /// `a*b+c`
258    ///
259    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
260    #[inline]
261    pub(crate) fn quick_mul_add_f64(a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
262        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
263        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
264        DoubleDouble::new(r + q, p)
265    }
266
267    /// `a*b+c`
268    ///
269    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
270    #[inline]
271    pub(crate) fn mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> DoubleDouble {
272        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
273        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
274        DoubleDouble::new(r + q, p)
275    }
276
277    // /// Accurate reciprocal: 1 / self
278    // #[inline]
279    // pub(crate) fn recip_raphson(self) -> DoubleDouble {
280    //     let y0 = DoubleDouble::recip(self);
281    //     let z = DoubleDouble::mul_add_f64(-self, y0, 1.0);
282    //     DoubleDouble::mul_add(y0, z, y0)
283    // }
284
285    /// Accurate reciprocal: 1 / self
286    #[inline]
287    pub(crate) fn recip(self) -> DoubleDouble {
288        #[cfg(any(
289            all(
290                any(target_arch = "x86", target_arch = "x86_64"),
291                target_feature = "fma"
292            ),
293            all(target_arch = "aarch64", target_feature = "neon")
294        ))]
295        {
296            let y = 1. / self.hi;
297            let e1 = f_fmla(-self.hi, y, 1.0);
298            let e2 = f_fmla(-self.lo, y, e1);
299            let e = y * e2;
300            DoubleDouble::new(e, y)
301        }
302        #[cfg(not(any(
303            all(
304                any(target_arch = "x86", target_arch = "x86_64"),
305                target_feature = "fma"
306            ),
307            all(target_arch = "aarch64", target_feature = "neon")
308        )))]
309        {
310            let y = 1.0 / self.hi;
311
312            let DoubleDouble { hi: p1, lo: err1 } = DoubleDouble::from_exact_mult(self.hi, y);
313            let e1 = (1.0 - p1) - err1;
314            let DoubleDouble { hi: p2, lo: err2 } = DoubleDouble::from_exact_mult(self.lo, y);
315            let e2 = (e1 - p2) - err2;
316            let e = y * e2;
317
318            DoubleDouble::new(e, y)
319        }
320    }
321
322    #[inline]
323    pub(crate) fn from_recip(b: f64) -> Self {
324        #[cfg(any(
325            all(
326                any(target_arch = "x86", target_arch = "x86_64"),
327                target_feature = "fma"
328            ),
329            all(target_arch = "aarch64", target_feature = "neon")
330        ))]
331        {
332            let x_hi = 1.0 / b;
333            let err = f_fmla(-x_hi, b, 1.0);
334            let x_lo = err / b;
335            Self::new(x_lo, x_hi)
336        }
337        #[cfg(not(any(
338            all(
339                any(target_arch = "x86", target_arch = "x86_64"),
340                target_feature = "fma"
341            ),
342            all(target_arch = "aarch64", target_feature = "neon")
343        )))]
344        {
345            let x_hi = 1.0 / b;
346            let prod = Self::from_exact_mult(x_hi, b);
347            let err = (1.0 - prod.hi) - prod.lo;
348            let x_lo = err / b;
349            Self::new(x_lo, x_hi)
350        }
351    }
352
353    #[inline]
354    pub(crate) fn from_quick_recip(b: f64) -> Self {
355        #[cfg(any(
356            all(
357                any(target_arch = "x86", target_arch = "x86_64"),
358                target_feature = "fma"
359            ),
360            all(target_arch = "aarch64", target_feature = "neon")
361        ))]
362        {
363            let h = 1.0 / b;
364            let hl = f_fmla(h, -b, 1.) * h;
365            DoubleDouble::new(hl, h)
366        }
367        #[cfg(not(any(
368            all(
369                any(target_arch = "x86", target_arch = "x86_64"),
370                target_feature = "fma"
371            ),
372            all(target_arch = "aarch64", target_feature = "neon")
373        )))]
374        {
375            let h = 1.0 / b;
376            let pr = DoubleDouble::from_exact_mult(h, b);
377            let err = (1.0 - pr.hi) - pr.lo;
378            let hl = err * h;
379            DoubleDouble::new(hl, h)
380        }
381    }
382
383    #[inline]
384    pub(crate) fn from_exact_div(a: f64, b: f64) -> Self {
385        #[cfg(any(
386            all(
387                any(target_arch = "x86", target_arch = "x86_64"),
388                target_feature = "fma"
389            ),
390            all(target_arch = "aarch64", target_feature = "neon")
391        ))]
392        {
393            let q_hi = a / b;
394            let r = f_fmla(-q_hi, b, a);
395            let q_lo = r / b;
396            Self::new(q_lo, q_hi)
397        }
398
399        #[cfg(not(any(
400            all(
401                any(target_arch = "x86", target_arch = "x86_64"),
402                target_feature = "fma"
403            ),
404            all(target_arch = "aarch64", target_feature = "neon")
405        )))]
406        {
407            let q_hi = a / b;
408
409            let p = DoubleDouble::from_exact_mult(q_hi, b);
410            let r = DoubleDouble::from_exact_sub(a, p.hi);
411            let r = r.hi + (r.lo - p.lo);
412            let q_lo = r / b;
413
414            Self::new(q_lo, q_hi)
415        }
416    }
417
418    // Resistant to overflow without FMA
419    #[inline]
420    pub(crate) fn from_exact_safe_div(a: f64, b: f64) -> Self {
421        let q_hi = a / b;
422        let r = f64::mul_add(-q_hi, b, a);
423        let q_lo = r / b;
424        Self::new(q_lo, q_hi)
425    }
426
427    #[inline]
428    pub(crate) fn from_sqrt(x: f64) -> Self {
429        #[cfg(any(
430            all(
431                any(target_arch = "x86", target_arch = "x86_64"),
432                target_feature = "fma"
433            ),
434            all(target_arch = "aarch64", target_feature = "neon")
435        ))]
436        {
437            let h = x.sqrt();
438            /* h = sqrt(x) * (1 + e1) with |e1| < 2^-52
439            thus h^2 = x * (1 + e2) with |e2| < 2^-50.999 */
440            let e = -f_fmla(h, h, -x); // exact
441
442            /* e = x - h^2 */
443            let l = e / (h + h);
444            DoubleDouble::new(l, h)
445        }
446        #[cfg(not(any(
447            all(
448                any(target_arch = "x86", target_arch = "x86_64"),
449                target_feature = "fma"
450            ),
451            all(target_arch = "aarch64", target_feature = "neon")
452        )))]
453        {
454            let h = x.sqrt();
455            let prod_hh = DoubleDouble::from_exact_mult(h, h);
456            let e = (x - prod_hh.hi) - prod_hh.lo; // exact
457
458            /* e = x - h^2 */
459            let l = e / (h + h);
460            DoubleDouble::new(l, h)
461        }
462    }
463
464    /// Safe to overflow underflow division using mandatory FMA.
465    #[inline]
466    #[allow(dead_code)]
467    pub(crate) fn div_safe_dd_f64(a: DoubleDouble, b: f64) -> Self {
468        let q1 = a.hi / b;
469        let r = f64::mul_add(-q1, b, a.hi);
470        let r = r + a.lo;
471        let q2 = r / b;
472
473        DoubleDouble::new(q2, q1)
474    }
475
476    #[inline]
477    pub(crate) fn div_dd_f64(a: DoubleDouble, b: f64) -> Self {
478        #[cfg(any(
479            all(
480                any(target_arch = "x86", target_arch = "x86_64"),
481                target_feature = "fma"
482            ),
483            all(target_arch = "aarch64", target_feature = "neon")
484        ))]
485        {
486            let q1 = a.hi / b;
487            let r = f_fmla(-q1, b, a.hi);
488            let r = r + a.lo;
489            let q2 = r / b;
490
491            DoubleDouble::new(q2, q1)
492        }
493        #[cfg(not(any(
494            all(
495                any(target_arch = "x86", target_arch = "x86_64"),
496                target_feature = "fma"
497            ),
498            all(target_arch = "aarch64", target_feature = "neon")
499        )))]
500        {
501            let th = a.hi / b;
502            let prod = DoubleDouble::from_exact_mult(th, b);
503            let beta_h = a.hi - prod.hi;
504            let beta_l = beta_h - prod.lo;
505            let beta = beta_l + a.lo;
506            let tl = beta / b;
507            DoubleDouble::new(tl, th)
508        }
509    }
510
511    // /// Dekker division with one refinement step
512    // #[inline]
513    // pub(crate) fn div_dd_f64_newton_raphson(a: DoubleDouble, b: f64) -> Self {
514    //     // Initial estimate q = a / b
515    //     let q = DoubleDouble::div_dd_f64(a, b);
516    //
517    //     // One Newton-Raphson refinement step:
518    //     // e = a - q * b
519    //     let qb = DoubleDouble::quick_mult_f64(q, b);
520    //     let e = DoubleDouble::sub(a, qb);
521    //     let e_div_b = DoubleDouble::div_dd_f64(e, b);
522    //
523    //     DoubleDouble::add(q, e_div_b)
524    // }
525
526    // /// Dekker division with two Newton-Raphson refinement steps
527    // #[inline]
528    // pub(crate) fn div_dd_f64_newton_raphson_2(a: Dekker, b: f64) -> Self {
529    //     // First estimate: q = a / b (one round of Dekker division)
530    //     let q1 = Dekker::div_dd_f64(a, b);
531    //
532    //     // First refinement: q2 = q1 + (a - q1 * b) / b
533    //     let qb1 = Dekker::quick_mult_f64(q1, b);
534    //     let e1 = Dekker::sub(a, qb1);
535    //     let dq1 = Dekker::div_dd_f64(e1, b);
536    //     let q2 = Dekker::add(q1, dq1);
537    //
538    //     // Second refinement: q3 = q2 + (a - q2 * b) / b
539    //     let qb2 = Dekker::quick_mult_f64(q2, b);
540    //     let e2 = Dekker::sub(a, qb2);
541    //     let dq2 = Dekker::div_dd_f64(e2, b);
542    //
543    //     Dekker::add(q2, dq2)
544    // }
545
546    // #[inline]
547    // pub(crate) fn neg(self) -> Self {
548    //     Self {
549    //         lo: -self.lo, hi: -self.hi,
550    //     }
551    // }
552
553    #[inline]
554    pub(crate) fn from_f64_div_dd(a: f64, b: DoubleDouble) -> Self {
555        let q1 = a / b.hi;
556
557        let prod = DoubleDouble::from_exact_mult(q1, b.hi);
558        let prod_lo = f_fmla(q1, b.lo, prod.lo);
559        let rem = f_fmla(-1.0, prod.hi, a) - prod_lo;
560
561        let q2 = rem / b.hi;
562
563        DoubleDouble::new(q2, q1)
564    }
565
566    // #[inline]
567    // pub(crate) fn mla_f64(a: Dekker, b: f64, c: f64) -> Self {
568    //     let q = Dekker::mult_f64(a, b);
569    //     Dekker::add_f64(q, c)
570    // }
571    //
572    // #[inline]
573    // pub(crate) fn mla_dd_f64(a: Dekker, b: Dekker, c: f64) -> Self {
574    //     let q = Dekker::quick_mult(a, b);
575    //     Dekker::add_f64(q, c)
576    // }
577
578    #[inline]
579    pub(crate) fn div(a: DoubleDouble, b: DoubleDouble) -> DoubleDouble {
580        let q = 1.0 / b.hi;
581        let r_hi = a.hi * q;
582        #[cfg(any(
583            all(
584                any(target_arch = "x86", target_arch = "x86_64"),
585                target_feature = "fma"
586            ),
587            all(target_arch = "aarch64", target_feature = "neon")
588        ))]
589        {
590            let e_hi = f_fmla(b.hi, -r_hi, a.hi);
591            let e_lo = f_fmla(b.lo, -r_hi, a.lo);
592            let r_lo = q * (e_hi + e_lo);
593            DoubleDouble::new(r_lo, r_hi)
594        }
595
596        #[cfg(not(any(
597            all(
598                any(target_arch = "x86", target_arch = "x86_64"),
599                target_feature = "fma"
600            ),
601            all(target_arch = "aarch64", target_feature = "neon")
602        )))]
603        {
604            let b_hi_r_hi = DoubleDouble::from_exact_mult(b.hi, -r_hi);
605            let b_lo_r_hi = DoubleDouble::from_exact_mult(b.lo, -r_hi);
606            let e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
607            let e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
608            let r_lo = q * (e_hi + e_lo);
609            DoubleDouble::new(r_lo, r_hi)
610        }
611    }
612
613    #[inline]
614    pub(crate) fn from_exact_mult(a: f64, b: f64) -> Self {
615        #[cfg(any(
616            all(
617                any(target_arch = "x86", target_arch = "x86_64"),
618                target_feature = "fma"
619            ),
620            all(target_arch = "aarch64", target_feature = "neon")
621        ))]
622        {
623            let r_hi = a * b;
624            let r_lo = f_fmla(a, b, -r_hi);
625            DoubleDouble::new(r_lo, r_hi)
626        }
627        #[cfg(not(any(
628            all(
629                any(target_arch = "x86", target_arch = "x86_64"),
630                target_feature = "fma"
631            ),
632            all(target_arch = "aarch64", target_feature = "neon")
633        )))]
634        {
635            let splat = DoubleDouble::split(a);
636            DoubleDouble::from_exact_mult_impl_non_fma(splat, a, b)
637        }
638    }
639
640    // #[inline]
641    // pub(crate) fn add_f64(&self, other: f64) -> DoubleDouble {
642    //     let r = DoubleDouble::from_exact_add(self.hi, other);
643    //     Dekker::from_exact_add(r.hi, r.lo + self.lo)
644    // }
645
646    // #[inline]
647    // pub(crate) fn to_triple(self) -> TripleDouble {
648    //     TripleDouble::new(0., self.lo, self.hi)
649    // }
650
651    /// Computes `a * b + c`
652    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
653    ///
654    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
655    #[inline]
656    pub(crate) fn mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
657        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
658        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
659        DoubleDouble::new(r + q, p)
660    }
661
662    /// Computes `a * b + c`
663    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
664    ///
665    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
666    ///
667    /// *Correctness*
668    /// |c.hi| > |a.hi * b.hi|
669    #[inline]
670    pub(crate) fn quick_mul_f64_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
671        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
672        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
673        DoubleDouble::new(r + q, p)
674    }
675
676    /// Computes `a * b + c`
677    ///
678    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
679    ///
680    /// *Correctness*
681    /// |c.hi| > |a.hi * b.hi|
682    #[inline]
683    pub(crate) fn quick_mul_f64_add_f64(a: DoubleDouble, b: f64, c: f64) -> Self {
684        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_f64(a, b);
685        let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_exact_add(c, h);
686        DoubleDouble::new(r + q, p)
687    }
688
689    // #[inline]
690    // pub(crate) fn mul_f64_add_full(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
691    //     /*
692    //         double _t1, _t2, _t3, _t4, _t5, _t6, _t7, _t8;   \
693    //                                                  \
694    //         Mul12(&_t1,&_t2,(a),(bh));                       \
695    //         Add12(_t3,_t4,(ch),_t1);                         \
696    //         _t5 = (bl) * (a);                                \
697    //         _t6 = (cl) + _t2;                                \
698    //         _t7 = _t5 + _t6;                                 \
699    //         _t8 = _t7 + _t4;                                 \
700    //         Add12((*(resh)),(*(resl)),_t3,_t8);              \
701    //     */
702    //     let DoubleDouble { hi: t1, lo: t2 } = DoubleDouble::from_exact_mult(a.hi, b);
703    //     let DoubleDouble { hi: t3, lo: t4 } = DoubleDouble::from_full_exact_add(c.hi, t1);
704    //     let t5 = a.lo * b;
705    //     let t6 = c.lo + t2;
706    //     let t7 = t5 + t6;
707    //     let t8 = t7 + t4;
708    //     DoubleDouble::from_full_exact_add(t3, t8)
709    // }
710
711    /// Computes `a * b + c`
712    /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
713    ///
714    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
715    #[inline]
716    pub(crate) fn f64_mul_f64_add(a: f64, b: f64, c: DoubleDouble) -> Self {
717        let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
718        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
719        DoubleDouble::new(r + q, p)
720    }
721
722    // /// Computes `a * b + c`
723    // /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
724    // ///
725    // /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
726    // #[inline]
727    // pub(crate) fn single_mul_add(a: f64, b: f64, c: f64) -> Self {
728    //     let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(a, b);
729    //     let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(c, h);
730    //     DoubleDouble::new(r + q, p)
731    // }
732
733    // /// Computes `a * b + c` safe to overflow without FMA
734    // /// `b` is an `f64`, `a` and `c` are `DoubleDouble`.
735    // ///
736    // /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
737    // #[inline]
738    // pub(crate) fn mul_f64_safe_add(a: DoubleDouble, b: f64, c: DoubleDouble) -> Self {
739    //     let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult_safe_f64(a, b);
740    //     let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
741    //     DoubleDouble::new(r + q, p)
742    // }
743
744    /// `a*b+c`
745    ///
746    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
747    #[inline]
748    pub(crate) fn mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
749        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
750        let DoubleDouble { hi: p, lo: q } = DoubleDouble::full_add_f64(c, h);
751        DoubleDouble::new(r + q, p)
752    }
753
754    /// `a*b+c`
755    ///
756    /// *Accurate dot product (Ogita, Rump and Oishi 2004)*
757    ///
758    /// *Correctness*
759    /// |c.hi| > |a.hi * b.hi|
760    #[inline]
761    pub(crate) fn quick_mul_add(a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> Self {
762        let DoubleDouble { hi: h, lo: r } = DoubleDouble::quick_mult(a, b);
763        let DoubleDouble { hi: p, lo: q } = DoubleDouble::add_f64(c, h);
764        DoubleDouble::new(r + q, p)
765    }
766
767    #[inline]
768    pub(crate) fn quick_mult(a: DoubleDouble, b: DoubleDouble) -> Self {
769        #[cfg(any(
770            all(
771                any(target_arch = "x86", target_arch = "x86_64"),
772                target_feature = "fma"
773            ),
774            all(target_arch = "aarch64", target_feature = "neon")
775        ))]
776        {
777            let mut r = DoubleDouble::from_exact_mult(a.hi, b.hi);
778            let t1 = f_fmla(a.hi, b.lo, r.lo);
779            let t2 = f_fmla(a.lo, b.hi, t1);
780            r.lo = t2;
781            r
782        }
783        #[cfg(not(any(
784            all(
785                any(target_arch = "x86", target_arch = "x86_64"),
786                target_feature = "fma"
787            ),
788            all(target_arch = "aarch64", target_feature = "neon")
789        )))]
790        {
791            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
792            let tl1 = a.hi * b.lo;
793            let tl2 = a.lo * b.hi;
794            let cl2 = tl1 + tl2;
795            let cl3 = cl1 + cl2;
796            DoubleDouble::new(cl3, ch)
797        }
798    }
799
800    #[inline]
801    pub(crate) fn mult(a: DoubleDouble, b: DoubleDouble) -> Self {
802        #[cfg(any(
803            all(
804                any(target_arch = "x86", target_arch = "x86_64"),
805                target_feature = "fma"
806            ),
807            all(target_arch = "aarch64", target_feature = "neon")
808        ))]
809        {
810            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
811            let tl0 = a.lo * b.lo;
812            let tl1 = f_fmla(a.hi, b.lo, tl0);
813            let cl2 = f_fmla(a.lo, b.hi, tl1);
814            let cl3 = cl1 + cl2;
815            DoubleDouble::from_exact_add(ch, cl3)
816        }
817        #[cfg(not(any(
818            all(
819                any(target_arch = "x86", target_arch = "x86_64"),
820                target_feature = "fma"
821            ),
822            all(target_arch = "aarch64", target_feature = "neon")
823        )))]
824        {
825            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b.hi);
826            let tl1 = a.hi * b.lo;
827            let tl2 = a.lo * b.hi;
828            let cl2 = tl1 + tl2;
829            let cl3 = cl1 + cl2;
830            DoubleDouble::from_exact_add(ch, cl3)
831        }
832    }
833
834    #[inline]
835    pub(crate) fn mult_f64(a: DoubleDouble, b: f64) -> Self {
836        #[cfg(any(
837            all(
838                any(target_arch = "x86", target_arch = "x86_64"),
839                target_feature = "fma"
840            ),
841            all(target_arch = "aarch64", target_feature = "neon")
842        ))]
843        {
844            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
845            let cl3 = f_fmla(a.lo, b, cl1);
846            DoubleDouble::from_exact_add(ch, cl3)
847        }
848        #[cfg(not(any(
849            all(
850                any(target_arch = "x86", target_arch = "x86_64"),
851                target_feature = "fma"
852            ),
853            all(target_arch = "aarch64", target_feature = "neon")
854        )))]
855        {
856            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
857            let cl2 = a.lo * b;
858            let t = DoubleDouble::from_exact_add(ch, cl2);
859            let tl2 = t.lo + cl1;
860            DoubleDouble::from_exact_add(t.hi, tl2)
861        }
862    }
863
864    #[inline]
865    pub(crate) fn quick_f64_mult(a: f64, b: DoubleDouble) -> DoubleDouble {
866        DoubleDouble::quick_mult_f64(b, a)
867    }
868
869    #[inline]
870    pub(crate) fn quick_mult_f64(a: DoubleDouble, b: f64) -> Self {
871        #[cfg(any(
872            all(
873                any(target_arch = "x86", target_arch = "x86_64"),
874                target_feature = "fma"
875            ),
876            all(target_arch = "aarch64", target_feature = "neon")
877        ))]
878        {
879            let h = b * a.hi;
880            let l = f_fmla(b, a.lo, f_fmla(b, a.hi, -h));
881            Self { lo: l, hi: h }
882        }
883        #[cfg(not(any(
884            all(
885                any(target_arch = "x86", target_arch = "x86_64"),
886                target_feature = "fma"
887            ),
888            all(target_arch = "aarch64", target_feature = "neon")
889        )))]
890        {
891            let DoubleDouble { hi: ch, lo: cl1 } = DoubleDouble::from_exact_mult(a.hi, b);
892            let cl2 = a.lo * b;
893            let cl3 = cl1 + cl2;
894            DoubleDouble::new(cl3, ch)
895        }
896    }
897
898    // /// Double-double multiplication safe to overflow without FMA
899    // #[inline]
900    // pub(crate) fn quick_mult_safe_f64(a: DoubleDouble, b: f64) -> Self {
901    //     let h = b * a.hi;
902    //     let l = f64::mul_add(b, a.lo, f64::mul_add(b, a.hi, -h));
903    //     Self { lo: l, hi: h }
904    // }
905
906    /// Valid only |a.hi| > |b|
907    #[inline]
908    pub(crate) fn add_f64(a: DoubleDouble, b: f64) -> Self {
909        let t = DoubleDouble::from_exact_add(a.hi, b);
910        let l = a.lo + t.lo;
911        Self { lo: l, hi: t.hi }
912    }
913
914    #[inline]
915    pub(crate) fn full_add_f64(a: DoubleDouble, b: f64) -> Self {
916        let t = DoubleDouble::from_full_exact_add(a.hi, b);
917        let l = a.lo + t.lo;
918        Self { lo: l, hi: t.hi }
919    }
920
921    /// Valid only |b| > |a.hi|
922    #[inline]
923    pub(crate) fn f64_add(b: f64, a: DoubleDouble) -> Self {
924        let t = DoubleDouble::from_exact_add(b, a.hi);
925        let l = a.lo + t.lo;
926        Self { lo: l, hi: t.hi }
927    }
928
929    #[inline]
930    pub(crate) const fn to_f64(self) -> f64 {
931        self.lo + self.hi
932    }
933
934    // #[inline]
935    // pub(crate) fn from_rsqrt(x: f64) -> DoubleDouble {
936    //     let r = DoubleDouble::div_dd_f64(DoubleDouble::from_sqrt(x), x);
937    //     let rx = DoubleDouble::quick_mult_safe_f64(r, x);
938    //     let drx = DoubleDouble::mul_f64_safe_add(r, x, -rx);
939    //     let h = DoubleDouble::mul_add(r, drx, DoubleDouble::mul_add_f64(r, rx, -1.0));
940    //     let dr = DoubleDouble::quick_mult(DoubleDouble::quick_mult_f64(r, 0.5), h);
941    //     DoubleDouble::add(r, dr)
942    // }
943
944    #[inline]
945    pub(crate) fn from_rsqrt_fast(x: f64) -> DoubleDouble {
946        let sqrt_x = DoubleDouble::from_sqrt(x);
947        sqrt_x.recip()
948    }
949}
950
951impl Mul<DoubleDouble> for DoubleDouble {
952    type Output = Self;
953
954    #[inline]
955    fn mul(self, rhs: DoubleDouble) -> Self::Output {
956        DoubleDouble::quick_mult(self, rhs)
957    }
958}
959
960/// check if number is valid for Exact mult
961#[allow(dead_code)]
962#[inline]
963pub(crate) fn two_product_compatible(x: f64) -> bool {
964    let exp = get_exponent_f64(x);
965    !(exp >= 970 || exp <= -970)
966}
967
968#[cfg(test)]
969mod tests {
970    use super::*;
971    #[test]
972    fn test_f64_mult() {
973        let d1 = 1.1231;
974        let d2 = DoubleDouble::new(1e-22, 3.2341);
975        let p = DoubleDouble::quick_f64_mult(d1, d2);
976        assert_eq!(p.hi, 3.6322177100000004);
977        assert_eq!(p.lo, -1.971941841373783e-16);
978    }
979
980    #[test]
981    fn test_mult_64() {
982        let d1 = 1.1231;
983        let d2 = DoubleDouble::new(1e-22, 3.2341);
984        let p = DoubleDouble::mult_f64(d2, d1);
985        assert_eq!(p.hi, 3.6322177100000004);
986        assert_eq!(p.lo, -1.971941841373783e-16);
987    }
988
989    #[test]
990    fn recip_test() {
991        let d1 = 1.54352432142;
992        let recip = DoubleDouble::new(0., d1).recip();
993        assert_eq!(recip.hi, d1.recip());
994        assert_ne!(recip.lo, 0.);
995    }
996
997    #[test]
998    fn from_recip_test() {
999        let d1 = 1.54352432142;
1000        let recip = DoubleDouble::from_recip(d1);
1001        assert_eq!(recip.hi, d1.recip());
1002        assert_ne!(recip.lo, 0.);
1003    }
1004
1005    #[test]
1006    fn from_quick_recip_test() {
1007        let d1 = 1.54352432142;
1008        let recip = DoubleDouble::from_quick_recip(d1);
1009        assert_eq!(recip.hi, d1.recip());
1010        assert_ne!(recip.lo, 0.);
1011    }
1012}