1use crate::bits::EXP_MASK;
30use crate::common::f_fmla;
31use std::ops::{Add, Mul, Sub};
32
33#[repr(u8)]
34#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
35pub(crate) enum DyadicSign {
36 Pos = 0,
37 Neg = 1,
38}
39
40impl DyadicSign {
41 #[inline]
42 pub(crate) fn negate(self) -> Self {
43 match self {
44 DyadicSign::Pos => DyadicSign::Neg,
45 DyadicSign::Neg => DyadicSign::Pos,
46 }
47 }
48
49 #[inline]
50 pub(crate) const fn to_bit(self) -> u8 {
51 match self {
52 DyadicSign::Pos => 0,
53 DyadicSign::Neg => 1,
54 }
55 }
56
57 #[inline]
58 pub(crate) const fn mult(self, rhs: Self) -> Self {
59 if (self as u8) ^ (rhs as u8) != 0 {
60 DyadicSign::Neg
61 } else {
62 DyadicSign::Pos
63 }
64 }
65}
66
67const BITS: u32 = 128;
68
69#[derive(Copy, Clone, Debug)]
70pub(crate) struct DyadicFloat128 {
71 pub(crate) sign: DyadicSign,
72 pub(crate) exponent: i16,
73 pub(crate) mantissa: u128,
74}
75
76#[inline]
77pub(crate) const fn f64_from_parts(sign: DyadicSign, exp: u64, mantissa: u64) -> f64 {
78 let r_sign = (if sign.to_bit() == 0 { 0u64 } else { 1u64 }).wrapping_shl(63);
79 let r_exp = exp.wrapping_shl(52);
80 f64::from_bits(r_sign | r_exp | mantissa)
81}
82
83#[inline]
84pub(crate) fn mulhi_u128(a: u128, b: u128) -> u128 {
85 let a_lo = a as u64 as u128;
86 let a_hi = (a >> 64) as u64 as u128;
87 let b_lo = b as u64 as u128;
88 let b_hi = (b >> 64) as u64 as u128;
89
90 let lo_lo = a_lo * b_lo;
91 let lo_hi = a_lo * b_hi;
92 let hi_lo = a_hi * b_lo;
93 let hi_hi = a_hi * b_hi;
94
95 let carry = (lo_lo >> 64)
96 .wrapping_add(lo_hi & 0xffff_ffff_ffff_ffff)
97 .wrapping_add(hi_lo & 0xffff_ffff_ffff_ffff);
98 let mid = (lo_hi >> 64)
99 .wrapping_add(hi_lo >> 64)
100 .wrapping_add(carry >> 64);
101
102 hi_hi.wrapping_add(mid)
103}
104
105#[inline]
106const fn explicit_exponent(x: f64) -> i16 {
107 let exp = ((x.to_bits() >> 52) & ((1u64 << 11) - 1u64)) as i16 - 1023;
108 if x == 0. {
109 return 0;
110 } else if x.is_subnormal() {
111 const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
112 return 1i16 - EXP_BIAS as i16;
113 }
114 exp
115}
116
117#[inline]
118const fn explicit_mantissa(x: f64) -> u64 {
119 const MASK: u64 = (1u64 << 52) - 1;
120 let sig_bits = x.to_bits() & MASK;
121 if x.is_subnormal() || x == 0. {
122 return sig_bits;
123 }
124 (1u64 << 52) | sig_bits
125}
126
127impl DyadicFloat128 {
128 #[inline]
129 pub(crate) const fn zero() -> Self {
130 Self {
131 sign: DyadicSign::Pos,
132 exponent: 0,
133 mantissa: 0,
134 }
135 }
136
137 #[inline]
138 pub(crate) const fn new_from_f64(x: f64) -> Self {
139 let sign = if x.is_sign_negative() {
140 DyadicSign::Neg
141 } else {
142 DyadicSign::Pos
143 };
144 let exponent = explicit_exponent(x) - 52;
145 let mantissa = explicit_mantissa(x) as u128;
146 let mut new_val = Self {
147 sign,
148 exponent,
149 mantissa,
150 };
151 new_val.normalize();
152 new_val
153 }
154
155 #[inline]
156 pub(crate) fn new(sign: DyadicSign, exponent: i16, mantissa: u128) -> Self {
157 let mut new_item = DyadicFloat128 {
158 sign,
159 exponent,
160 mantissa,
161 };
162 new_item.normalize();
163 new_item
164 }
165
166 #[inline]
167 pub(crate) fn accurate_reciprocal(a: f64) -> Self {
168 let mut r = DyadicFloat128::new_from_f64(4.0 / a); r.exponent -= 2;
170 let ba = DyadicFloat128::new_from_f64(-a);
172 let mut q = ba * r;
173 const F128_ONE: DyadicFloat128 = DyadicFloat128 {
174 sign: DyadicSign::Pos,
175 exponent: -127,
176 mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
177 };
178 q = F128_ONE + q;
179 q = r * q;
180 r + q
181 }
182
183 #[inline]
184 pub(crate) fn from_div_f64(a: f64, b: f64) -> Self {
185 let reciprocal = DyadicFloat128::accurate_reciprocal(b);
186 let da = DyadicFloat128::new_from_f64(a);
187 reciprocal * da
188 }
189
190 #[inline]
193 pub(crate) fn mul_int64(&self, b: i64) -> DyadicFloat128 {
194 if b == 0 {
195 return DyadicFloat128::zero();
196 }
197
198 let abs_b = b.unsigned_abs();
199 let sign = if (b < 0) ^ (self.sign == DyadicSign::Neg) {
200 DyadicSign::Neg
201 } else {
202 DyadicSign::Pos
203 };
204
205 let mut hi_prod = (self.mantissa >> 64).wrapping_mul(abs_b as u128);
206 let m = hi_prod.leading_zeros();
207 hi_prod <<= m;
208
209 let mut lo_prod = (self.mantissa & 0xffff_ffff_ffff_ffff).wrapping_mul(abs_b as u128);
210 lo_prod = (lo_prod << (m - 1)) >> 63;
211
212 let (mut product, overflow) = hi_prod.overflowing_add(lo_prod);
213
214 let mut result = DyadicFloat128 {
215 sign,
216 exponent: self.exponent + 64 - m as i16,
217 mantissa: product,
218 };
219
220 if overflow {
221 product += product & 0x1;
223 result.mantissa = (product >> 1) | (1u128 << 127);
224 result.shift_right(1);
225 }
226
227 result.normalize();
228 result
229 }
230
231 #[inline]
232 fn shift_right(&mut self, amount: u32) {
233 if amount < BITS {
234 self.exponent += amount as i16;
235 self.mantissa = self.mantissa.wrapping_shr(amount);
236 } else {
237 self.exponent = 0;
238 self.mantissa = 0;
239 }
240 }
241
242 #[inline]
243 fn shift_left(&mut self, amount: u32) {
244 if amount < BITS {
245 self.exponent -= amount as i16;
246 self.mantissa = self.mantissa.wrapping_shl(amount);
247 } else {
248 self.exponent = 0;
249 self.mantissa = 0;
250 }
251 }
252
253 #[inline]
255 pub(crate) const fn normalize(&mut self) {
256 if self.mantissa != 0 {
257 let shift_length = self.mantissa.leading_zeros();
258 self.exponent -= shift_length as i16;
259 self.mantissa = self.mantissa.wrapping_shl(shift_length);
260 }
261 }
262
263 #[inline]
264 pub(crate) fn negated(&self) -> Self {
265 Self {
266 sign: self.sign.negate(),
267 exponent: self.exponent,
268 mantissa: self.mantissa,
269 }
270 }
271
272 #[inline]
273 pub(crate) fn quick_sub(&self, rhs: &Self) -> Self {
274 self.quick_add(&rhs.negated())
275 }
276
277 #[inline]
278 pub(crate) fn quick_add(&self, rhs: &Self) -> Self {
279 if self.mantissa == 0 {
280 return *rhs;
281 }
282 if rhs.mantissa == 0 {
283 return *self;
284 }
285 let mut a = *self;
286 let mut b = *rhs;
287
288 let exp_diff = a.exponent.wrapping_sub(b.exponent);
289
290 if exp_diff.abs() >= BITS as i16 {
292 return if a.sign == b.sign {
293 return if a.exponent > b.exponent { a } else { b };
295 } else if a.exponent > b.exponent {
296 a
297 } else {
298 b
299 };
300 }
301
302 if a.exponent > b.exponent {
304 b.shift_right((a.exponent - b.exponent) as u32);
305 } else if b.exponent > a.exponent {
306 a.shift_right((b.exponent - a.exponent) as u32);
307 }
308
309 let mut result = DyadicFloat128::zero();
310
311 if a.sign == b.sign {
312 result.sign = a.sign;
314 result.exponent = a.exponent;
315 result.mantissa = a.mantissa;
316 let (sum, is_overflow) = result.mantissa.overflowing_add(b.mantissa);
317 result.mantissa = sum;
318 if is_overflow {
319 result.shift_right(1);
321 result.mantissa |= 1u128 << 127;
322 }
323 return result;
325 }
326
327 if a.mantissa >= b.mantissa {
329 result.sign = a.sign;
330 result.exponent = a.exponent;
331 result.mantissa = a.mantissa.wrapping_sub(b.mantissa);
332 } else {
333 result.sign = b.sign;
334 result.exponent = b.exponent;
335 result.mantissa = b.mantissa.wrapping_sub(a.mantissa);
336 }
337
338 result.normalize();
339 result
340 }
341
342 #[inline]
343 pub(crate) fn quick_mul(&self, rhs: &Self) -> Self {
344 let mut result = DyadicFloat128 {
345 sign: if self.sign != rhs.sign {
346 DyadicSign::Neg
347 } else {
348 DyadicSign::Pos
349 },
350 exponent: self.exponent + rhs.exponent + BITS as i16,
351 mantissa: 0,
352 };
353
354 if !(self.mantissa == 0 || rhs.mantissa == 0) {
355 result.mantissa = mulhi_u128(self.mantissa, rhs.mantissa);
356 if result.mantissa >> 127 == 0 {
359 result.shift_left(1);
360 }
361 } else {
362 result.mantissa = 0;
363 }
364 result
365 }
366
367 #[inline]
368 pub(crate) fn fast_as_f64(&self) -> f64 {
369 if self.mantissa == 0 {
370 return if self.sign == DyadicSign::Pos {
371 0.
372 } else {
373 -0.0
374 };
375 }
376
377 const PRECISION: u32 = 52 + 1;
379
380 const SIG_MASK: u64 = (1u64 << 52) - 1;
382 const FRACTION_MASK: u64 = (1u64 << 52) - 1;
383 const IMPLICIT_MASK: u64 = SIG_MASK - FRACTION_MASK;
384 const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
385
386 let mut exp_hi = self.exponent as i32 + ((BITS - 1) as i32 + EXP_BIAS as i32);
387
388 if exp_hi > 2 * EXP_BIAS as i32 {
389 let d_hi = f64_from_parts(self.sign, 2 * EXP_BIAS, IMPLICIT_MASK);
391 let two = 2.0f64;
394 let r = two * d_hi;
395 return r;
396 }
397
398 let mut denorm = false;
399 let mut shift = BITS - PRECISION;
400 if exp_hi <= 0 {
401 denorm = true;
403 shift = (BITS - PRECISION) + (1 - exp_hi) as u32;
404
405 exp_hi = EXP_BIAS as i32;
406 }
407
408 let exp_lo = exp_hi.wrapping_sub(PRECISION as i32).wrapping_sub(1);
409
410 let m_hi = if shift >= BITS {
411 0
412 } else {
413 self.mantissa >> shift
414 };
415
416 let d_hi = f64_from_parts(
417 self.sign,
418 exp_hi as u64,
419 (m_hi as u64 & SIG_MASK) | IMPLICIT_MASK,
420 );
421
422 let round_mask = if shift > BITS {
423 0
424 } else {
425 1u128.wrapping_shl(shift.wrapping_sub(1))
426 };
427 let sticky_mask = round_mask.wrapping_sub(1u128);
428
429 let round_bit = (self.mantissa & round_mask) != 0;
430 let sticky_bit = (self.mantissa & sticky_mask) != 0;
431 let round_and_sticky = round_bit as i32 * 2 + sticky_bit as i32;
432
433 let d_lo: f64;
434
435 if exp_lo <= 0 {
436 let scale_up_exponent = 1 - exp_lo;
438 let scale_up_factor = f64_from_parts(
439 DyadicSign::Pos,
440 EXP_BIAS + scale_up_exponent as u64,
441 IMPLICIT_MASK,
442 );
443 let scale_down_factor = f64_from_parts(
444 DyadicSign::Pos,
445 EXP_BIAS - scale_up_exponent as u64,
446 IMPLICIT_MASK,
447 );
448
449 d_lo = f64_from_parts(
450 self.sign,
451 (exp_lo + scale_up_exponent) as u64,
452 IMPLICIT_MASK,
453 );
454
455 return f_fmla(d_lo, round_and_sticky as f64, d_hi * scale_up_factor)
456 * scale_down_factor;
457 }
458
459 d_lo = f64_from_parts(self.sign, exp_lo as u64, IMPLICIT_MASK);
460
461 let r = f_fmla(d_lo, round_and_sticky as f64, d_hi);
463
464 if denorm {
465 const SIG_LEN: u64 = 52;
466 let clear_exp: u64 = (exp_hi as u64) << SIG_LEN;
469 let mut r_bits: u64 = r.to_bits() - clear_exp;
470
471 if r_bits & EXP_MASK == 0 {
472 r_bits -= IMPLICIT_MASK;
475 }
476
477 return f64::from_bits(r_bits);
478 }
479
480 r
481 }
482
483 #[inline]
486 pub(crate) fn reciprocal(self) -> DyadicFloat128 {
487 let guess = 1. / self.fast_as_f64();
493 let mut x = DyadicFloat128::new_from_f64(guess);
494
495 let twos = DyadicFloat128 {
497 sign: DyadicSign::Pos,
498 exponent: -126,
499 mantissa: 0x80000000_00000000_00000000_00000000_u128,
500 };
501
502 x = x * (twos - (self * x));
503 x = x * (twos - (self * x));
504 x
505 }
506
507 }
545
546impl Add<DyadicFloat128> for DyadicFloat128 {
547 type Output = DyadicFloat128;
548 #[inline]
549 fn add(self, rhs: DyadicFloat128) -> Self::Output {
550 self.quick_add(&rhs)
551 }
552}
553
554impl DyadicFloat128 {
555 #[inline]
556 pub(crate) fn biased_exponent(&self) -> i16 {
557 self.exponent + (BITS as i16 - 1)
558 }
559
560 #[inline]
561 pub(crate) fn trunc_to_i64(&self) -> i64 {
562 if self.exponent <= -(BITS as i16) {
563 return 0;
565 }
566 let hi = self.mantissa >> 64;
567 let norm_exp = self.biased_exponent();
568 if norm_exp > 63 {
569 return if self.sign == DyadicSign::Neg {
570 i64::MIN
571 } else {
572 i64::MAX
573 };
574 }
575 let r: i64 = (hi >> (63 - norm_exp)) as i64;
576
577 if self.sign == DyadicSign::Neg { -r } else { r }
578 }
579
580 #[inline]
581 pub(crate) fn round_to_nearest(&self) -> DyadicFloat128 {
582 if self.exponent == -(BITS as i16) {
583 return DyadicFloat128 {
585 sign: self.sign,
586 exponent: -(BITS as i16 - 1),
587 mantissa: 0x80000000_00000000_00000000_00000000_u128,
588 };
589 }
590 if self.exponent <= -((BITS + 1) as i16) {
591 return DyadicFloat128 {
593 sign: self.sign,
594 exponent: 0,
595 mantissa: 0u128,
596 };
597 }
598 const FRACTION_LENGTH: u32 = BITS - 1;
599 let trim_size =
600 (FRACTION_LENGTH as i64).wrapping_sub(self.exponent as i64 + (BITS - 1) as i64) as u128;
601 let half_bit_set =
602 self.mantissa & (1u128.wrapping_shl(trim_size.wrapping_sub(1) as u32)) != 0;
603 let trunc_u: u128 = self
604 .mantissa
605 .wrapping_shr(trim_size as u32)
606 .wrapping_shl(trim_size as u32);
607 if trunc_u == self.mantissa {
608 return *self;
609 }
610
611 let truncated = DyadicFloat128::new(self.sign, self.exponent, trunc_u);
612
613 if !half_bit_set {
614 truncated
617 } else if self.sign == DyadicSign::Neg {
618 let ones = DyadicFloat128 {
619 sign: DyadicSign::Pos,
620 exponent: -(BITS as i16 - 1),
621 mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
622 };
623 truncated - ones
624 } else {
625 let ones = DyadicFloat128 {
626 sign: DyadicSign::Pos,
627 exponent: -(BITS as i16 - 1),
628 mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
629 };
630 truncated + ones
631 }
632 }
633
634 #[inline]
635 pub(crate) fn round_to_nearest_f64(&self) -> f64 {
636 self.round_to_nearest().fast_as_f64()
637 }
638}
639
640impl Sub<DyadicFloat128> for DyadicFloat128 {
641 type Output = DyadicFloat128;
642 #[inline]
643 fn sub(self, rhs: DyadicFloat128) -> Self::Output {
644 self.quick_sub(&rhs)
645 }
646}
647
648impl Mul<DyadicFloat128> for DyadicFloat128 {
649 type Output = DyadicFloat128;
650 #[inline]
651 fn mul(self, rhs: DyadicFloat128) -> Self::Output {
652 self.quick_mul(&rhs)
653 }
654}
655
656#[cfg(test)]
657mod tests {
658 use super::*;
659
660 #[test]
661 fn test_dyadic_float() {
662 let ones = DyadicFloat128 {
663 sign: DyadicSign::Pos,
664 exponent: -127,
665 mantissa: 0x80000000_00000000_00000000_00000000_u128,
666 };
667 let cvt = ones.fast_as_f64();
668 assert_eq!(cvt, 1.0);
669
670 let minus_0_5 = DyadicFloat128 {
671 sign: DyadicSign::Neg,
672 exponent: -128,
673 mantissa: 0x80000000_00000000_00000000_00000000_u128,
674 };
675 let cvt0 = minus_0_5.fast_as_f64();
676 assert_eq!(cvt0, -1.0 / 2.0);
677
678 let minus_1_f4 = DyadicFloat128 {
679 sign: DyadicSign::Neg,
680 exponent: -132,
681 mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
682 };
683 let cvt0 = minus_1_f4.fast_as_f64();
684 assert_eq!(cvt0, -1.0 / 24.0);
685
686 let minus_1_f8 = DyadicFloat128 {
687 sign: DyadicSign::Pos,
688 exponent: -143,
689 mantissa: 0xd00d00d0_0d00d00d_00d00d00_d00d00d0_u128,
690 };
691 let cvt0 = minus_1_f8.fast_as_f64();
692 assert_eq!(cvt0, 1.0 / 40320.0);
693 }
694
695 #[test]
696 fn dyadic_float_add() {
697 let ones = DyadicFloat128 {
698 sign: DyadicSign::Pos,
699 exponent: -127,
700 mantissa: 0x80000000_00000000_00000000_00000000_u128,
701 };
702
703 let cvt = ones.fast_as_f64();
704 assert_eq!(cvt, 1.0);
705
706 let minus_0_5 = DyadicFloat128 {
707 sign: DyadicSign::Neg,
708 exponent: -128,
709 mantissa: 0x80000000_00000000_00000000_00000000_u128,
710 };
711 let cvt0 = ones.quick_add(&minus_0_5).fast_as_f64();
712 assert_eq!(cvt0, 0.5);
713 }
714
715 #[test]
716 fn dyadic_float_mul() {
717 let ones = DyadicFloat128 {
718 sign: DyadicSign::Pos,
719 exponent: -127,
720 mantissa: 0x80000000_00000000_00000000_00000000_u128,
721 };
722
723 let cvt = ones.fast_as_f64();
724 assert_eq!(cvt, 1.0);
725
726 let minus_0_5 = DyadicFloat128 {
727 sign: DyadicSign::Neg,
728 exponent: -128,
729 mantissa: 0x80000000_00000000_00000000_00000000_u128,
730 };
731 let product = ones.quick_mul(&minus_0_5);
732 let cvt0 = product.fast_as_f64();
733 assert_eq!(cvt0, -0.5);
734
735 let twos = DyadicFloat128 {
736 sign: DyadicSign::Pos,
737 exponent: -126,
738 mantissa: 0x80000000_00000000_00000000_00000000_u128,
739 };
740
741 let cvt = twos.fast_as_f64();
742 assert_eq!(cvt, 2.0);
743 }
744
745 #[test]
746 fn dyadic_round_trip() {
747 let z00 = 0.0;
748 let zvt00 = DyadicFloat128::new_from_f64(z00);
749 let b00 = zvt00.fast_as_f64();
750 assert_eq!(b00, z00);
751
752 let zvt000 = DyadicFloat128 {
753 sign: DyadicSign::Pos,
754 exponent: 0,
755 mantissa: 0,
756 };
757 let b000 = zvt000.fast_as_f64();
758 assert_eq!(b000, z00);
759
760 let z0 = 1.0;
761 let zvt0 = DyadicFloat128::new_from_f64(z0);
762 let b0 = zvt0.fast_as_f64();
763 assert_eq!(b0, z0);
764
765 let z1 = 0.5;
766 let zvt1 = DyadicFloat128::new_from_f64(z1);
767 let b1 = zvt1.fast_as_f64();
768 assert_eq!(b1, z1);
769
770 let z2 = -0.5;
771 let zvt2 = DyadicFloat128::new_from_f64(z2);
772 let b2 = zvt2.fast_as_f64();
773 assert_eq!(b2, z2);
774
775 let z3 = -532322.54324324232;
776 let zvt3 = DyadicFloat128::new_from_f64(z3);
777 let b3 = zvt3.fast_as_f64();
778 assert_eq!(b3, z3);
779 }
780
781 #[test]
782 fn dyadic_float_reciprocal() {
783 let ones = DyadicFloat128 {
784 sign: DyadicSign::Pos,
785 exponent: -127,
786 mantissa: 0x80000000_00000000_00000000_00000000_u128,
787 }
788 .reciprocal();
789
790 let cvt = ones.fast_as_f64();
791 assert_eq!(cvt, 1.0);
792
793 let minus_0_5 = DyadicFloat128::new_from_f64(4.).reciprocal();
794 let cvt0 = minus_0_5.fast_as_f64();
795 assert_eq!(cvt0, 0.25);
796 }
797
798 #[test]
799 fn dyadic_float_from_div() {
800 let from_div = DyadicFloat128::from_div_f64(1.0, 4.0);
801 let cvt = from_div.fast_as_f64();
802 assert_eq!(cvt, 0.25);
803 }
804
805 #[test]
806 fn dyadic_float_accurate_reciprocal() {
807 let from_div = DyadicFloat128::accurate_reciprocal(4.0);
808 let cvt = from_div.fast_as_f64();
809 assert_eq!(cvt, 0.25);
810 }
811
812 #[test]
813 fn dyadic_float_mul_int() {
814 let from_div = DyadicFloat128::new_from_f64(4.0);
815 let m1 = from_div.mul_int64(-2);
816 assert_eq!(m1.fast_as_f64(), -8.0);
817
818 let from_div = DyadicFloat128::new_from_f64(-4.0);
819 let m1 = from_div.mul_int64(-2);
820 assert_eq!(m1.fast_as_f64(), 8.0);
821
822 let from_div = DyadicFloat128::new_from_f64(2.5);
823 let m1 = from_div.mul_int64(2);
824 assert_eq!(m1.fast_as_f64(), 5.0);
825 }
826
827 #[test]
828 fn dyadic_float_round() {
829 let from_div = DyadicFloat128::new_from_f64(2.5);
830 let m1 = from_div.round_to_nearest_f64();
831 assert_eq!(m1, 3.0);
832
833 let from_div = DyadicFloat128::new_from_f64(0.5);
834 let m1 = from_div.round_to_nearest_f64();
835 assert_eq!(m1, 1.0);
836
837 let from_div = DyadicFloat128::new_from_f64(-0.5);
838 let m1 = from_div.round_to_nearest_f64();
839 assert_eq!(m1, -1.0);
840
841 let from_div = DyadicFloat128::new_from_f64(-0.351);
842 let m1 = from_div.round_to_nearest_f64();
843 assert_eq!(m1, (-0.351f64).round());
844
845 let from_div = DyadicFloat128::new_from_f64(0.351);
846 let m1 = from_div.round_to_nearest_f64();
847 assert_eq!(m1, 0.351f64.round());
848
849 let z00 = 25.6;
850 let zvt00 = DyadicFloat128::new_from_f64(z00);
851 let b00 = zvt00.round_to_nearest_f64();
852 assert_eq!(b00, 26.);
853 }
854
855 #[test]
856 fn dyadic_int_trunc() {
857 let from_div = DyadicFloat128::new_from_f64(-2.5);
858 let m1 = from_div.trunc_to_i64();
859 assert_eq!(m1, -2);
860
861 let from_div = DyadicFloat128::new_from_f64(2.5);
862 let m1 = from_div.trunc_to_i64();
863 assert_eq!(m1, 2);
864
865 let from_div = DyadicFloat128::new_from_f64(0.5);
866 let m1 = from_div.trunc_to_i64();
867 assert_eq!(m1, 0);
868
869 let from_div = DyadicFloat128::new_from_f64(-0.5);
870 let m1 = from_div.trunc_to_i64();
871 assert_eq!(m1, 0);
872
873 let from_div = DyadicFloat128::new_from_f64(-0.351);
874 let m1 = from_div.trunc_to_i64();
875 assert_eq!(m1, 0);
876
877 let from_div = DyadicFloat128::new_from_f64(0.351);
878 let m1 = from_div.trunc_to_i64();
879 assert_eq!(m1, 0);
880 }
881}