1use crate::bits::biased_exponent_f64;
30use crate::common::*;
31use crate::double_double::DoubleDouble;
32use crate::exponents::expf;
33use crate::logf;
34use crate::logs::LOG2_R;
35use crate::polyeval::{f_polyeval3, f_polyeval6, f_polyeval10};
36use crate::pow_tables::EXP2_MID1;
37use crate::powf_tables::{LOG2_R_TD, LOG2_R2_DD, POWF_R2};
38use crate::round::RoundFinite;
39
40#[inline]
43pub const fn powf(d: f32, n: f32) -> f32 {
44 let value = d.abs();
45 let c = expf(n * logf(value));
46 if n == 1. {
47 return d;
48 }
49 if d < 0.0 {
50 let y = n as i32;
51 if y % 2 == 0 { c } else { -c }
52 } else {
53 c
54 }
55}
56
57#[inline]
58const fn larger_exponent(a: f64, b: f64) -> bool {
59 biased_exponent_f64(a) >= biased_exponent_f64(b)
60}
61
62#[cold]
70#[inline(never)]
71fn powf_dd(idx_x: i32, dx: f64, y6: f64, lo6_hi: f64, exp2_hi_mid: DoubleDouble) -> f64 {
72 let idx2 = f_fmla(
78 dx,
79 f64::from_bits(0x40d0000000000000),
80 f64::from_bits(0x4050000000000000),
81 )
82 .round_finite() as usize;
83 let dx2 = f_fmla(1.0 + dx, f64::from_bits(POWF_R2[idx2]), -1.0); const COEFFS: [(u64, u64); 6] = [
86 (0x3c7777d0ffda25e0, 0x3ff71547652b82fe),
87 (0xbc6777d101cf0a84, 0xbfe71547652b82fe),
88 (0x3c7ce04b5140d867, 0x3fdec709dc3a03fd),
89 (0x3c7137b47e635be5, 0xbfd71547652b82fb),
90 (0xbc5b5a30b3bdb318, 0x3fd2776c516a92a2),
91 (0x3c62d2fbd081e657, 0xbfcec70af1929ca6),
92 ];
93 let dx_dd = DoubleDouble::new(0.0, dx2);
94 let p = f_polyeval6(
95 dx_dd,
96 DoubleDouble::from_bit_pair(COEFFS[0]),
97 DoubleDouble::from_bit_pair(COEFFS[1]),
98 DoubleDouble::from_bit_pair(COEFFS[2]),
99 DoubleDouble::from_bit_pair(COEFFS[3]),
100 DoubleDouble::from_bit_pair(COEFFS[4]),
101 DoubleDouble::from_bit_pair(COEFFS[5]),
102 );
103 let log2_x_lo = DoubleDouble::quick_mult_f64(p, dx2);
105 let log2_r_td = LOG2_R_TD[idx_x as usize];
107 let log2_x_mid = DoubleDouble::new(f64::from_bits(log2_r_td.0), f64::from_bits(log2_r_td.1));
108 let log2_x_m = DoubleDouble::add(DoubleDouble::from_bit_pair(LOG2_R2_DD[idx2]), log2_x_mid);
110 let log2_x = if larger_exponent(log2_x_m.hi, log2_x_lo.hi) {
114 DoubleDouble::add(log2_x_m, log2_x_lo)
115 } else {
116 DoubleDouble::add(log2_x_lo, log2_x_m)
117 };
118 let lo6_hi_dd = DoubleDouble::new(0.0, lo6_hi);
119 let prod = DoubleDouble::quick_mult_f64(log2_x, y6);
121 let lo6 = if larger_exponent(prod.hi, lo6_hi) {
123 DoubleDouble::add(prod, lo6_hi_dd)
124 } else {
125 DoubleDouble::add(lo6_hi_dd, prod)
126 };
127
128 const EXP2_COEFFS: [(u64, u64); 10] = [
129 (0x0000000000000000, 0x3ff0000000000000),
130 (0x3c1abc9e3b398024, 0x3f862e42fefa39ef),
131 (0xbba5e43a5429bddb, 0x3f0ebfbdff82c58f),
132 (0xbb2d33162491268f, 0x3e8c6b08d704a0c0),
133 (0x3a94fb32d240a14e, 0x3e03b2ab6fba4e77),
134 (0x39ee84e916be83e0, 0x3d75d87fe78a6731),
135 (0xb989a447bfddc5e6, 0x3ce430912f86bfb8),
136 (0xb8e31a55719de47f, 0x3c4ffcbfc588ded9),
137 (0xb850ba57164eb36b, 0x3bb62c034beb8339),
138 (0xb7b8483eabd9642d, 0x3b1b5251ff97bee1),
139 ];
140
141 let pp = f_polyeval10(
142 lo6,
143 DoubleDouble::from_bit_pair(EXP2_COEFFS[0]),
144 DoubleDouble::from_bit_pair(EXP2_COEFFS[1]),
145 DoubleDouble::from_bit_pair(EXP2_COEFFS[2]),
146 DoubleDouble::from_bit_pair(EXP2_COEFFS[3]),
147 DoubleDouble::from_bit_pair(EXP2_COEFFS[4]),
148 DoubleDouble::from_bit_pair(EXP2_COEFFS[5]),
149 DoubleDouble::from_bit_pair(EXP2_COEFFS[6]),
150 DoubleDouble::from_bit_pair(EXP2_COEFFS[7]),
151 DoubleDouble::from_bit_pair(EXP2_COEFFS[8]),
152 DoubleDouble::from_bit_pair(EXP2_COEFFS[9]),
153 );
154 let rr = DoubleDouble::quick_mult(exp2_hi_mid, pp);
155
156 let r = DoubleDouble::from_exact_add(rr.hi, rr.lo);
158
159 const FRACTION_MASK: u64 = (1u64 << 52) - 1;
160
161 let mut r_bits = r.hi.to_bits();
162 if ((r_bits & 0xfff_ffff) == 0) && (r.lo != 0.0) {
163 let hi_sign = r.hi.to_bits() >> 63;
164 let lo_sign = r.lo.to_bits() >> 63;
165 if hi_sign == lo_sign {
166 r_bits = r_bits.wrapping_add(1);
167 } else if (r_bits & FRACTION_MASK) > 0 {
168 r_bits = r_bits.wrapping_sub(1);
169 }
170 }
171
172 f64::from_bits(r_bits)
173}
174
175#[inline]
179pub fn f_powf(x: f32, y: f32) -> f32 {
180 let mut x_u = x.to_bits();
181 let x_abs = x_u & 0x7fff_ffff;
182 let mut y = y;
183 let y_u = y.to_bits();
184 let y_abs = y_u & 0x7fff_ffff;
185 let mut x = x;
186
187 if ((y_abs & 0x0007_ffff) == 0) || (y_abs > 0x4f170000) {
188 if x.is_nan() || y.is_nan() {
190 if y.abs() == 0. {
191 return 1.;
192 }
193 if x == 1. {
194 return 1.;
195 }
196 return f32::NAN;
197 }
198
199 if y == 0.0 {
201 return 1.0;
202 }
203
204 match y_abs {
205 0x7f80_0000 => {
206 if x_abs > 0x7f80_0000 {
207 return x;
209 }
210 if x_abs == 0x3f80_0000 {
211 return 1.0;
213 }
214 if x == 0.0 && y_u == 0xff80_0000 {
215 return f32::INFINITY;
217 }
218 return if (x_abs < 0x3f80_0000) == (y_u == 0xff80_0000) {
223 f32::INFINITY
224 } else {
225 0.
226 };
227 }
228 _ => {
229 match y_u {
230 0x3f00_0000 => {
231 if x == 0.0 || x_u == 0xff80_0000 {
233 return x * x;
237 }
238 let r = x.sqrt();
239 return if r.to_bits() != 0x8000_0000 { r } else { 0.0 };
240 }
241 0x3f80_0000 => {
242 return x;
243 } 0x4000_0000 => return x * x, _ => {
246 let is_int = is_integerf(y);
247 if is_int && (y_u > 0x4000_0000) && (y_u <= 0x41c0_0000) {
248 let mut msb: i32 = if x_abs == 0 {
250 32 - 2
251 } else {
252 x_abs.leading_zeros() as i32
253 };
254 msb = if msb > 8 { msb } else { 8 };
255 let mut lsb: i32 = if x_abs == 0 {
256 0
257 } else {
258 x_abs.trailing_zeros() as i32
259 };
260 lsb = if lsb > 23 { 23 } else { lsb };
261 let extra_bits: i32 = 32 - 2 - lsb - msb;
262 let iter = y as i32;
263
264 if extra_bits * iter <= 23 + 2 {
265 let x_d = x as f64;
268 let mut result = x_d;
269 for _ in 1..iter {
270 result *= x_d;
271 }
272 return result as f32;
273 }
274 }
275
276 if y_abs > 0x4f17_0000 {
277 if y_abs > 0x7f80_0000 {
279 if x_u == 0x3f80_0000 {
280 return 1.0;
283 }
284 return y;
286 }
287 y = f32::from_bits((y_u & 0x8000_0000).wrapping_add(0x4f800000u32));
291 }
292 }
293 }
294 }
295 }
296 }
297
298 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
299 let mut ex = -(E_BIAS as i32);
300 let mut sign: u64 = 0;
301
302 if ((x_u & 0x801f_ffffu32) == 0) || x_u >= 0x7f80_0000u32 || x_u < 0x0080_0000u32 {
303 if x.is_nan() {
304 return f32::NAN;
305 }
306
307 if x_u == 0x3f80_0000 {
308 return 1.;
309 }
310
311 let x_is_neg = x.to_bits() > 0x8000_0000;
312
313 if x == 0.0 {
314 let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u));
315 if y_u > 0x8000_0000u32 {
316 return if x_is_neg {
318 f32::NEG_INFINITY
319 } else {
320 f32::INFINITY
321 };
322 }
323 return if out_is_neg { -0.0 } else { 0.0 };
325 }
326
327 if x_abs == 0x7f80_0000u32 {
328 let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u));
330 if y_u >= 0x7fff_ffff {
331 return if out_is_neg { -0.0 } else { 0.0 };
332 }
333 return if out_is_neg {
334 f32::NEG_INFINITY
335 } else {
336 f32::INFINITY
337 };
338 }
339
340 if x_abs > 0x7f80_0000 {
341 return x;
344 }
345
346 if x_abs < 0x0080_0000u32 {
348 ex = ex.wrapping_sub(64);
349 x *= f32::from_bits(0x5f800000);
350 }
351
352 if x.is_sign_negative() {
354 if is_integerf(y) {
355 x = -x;
356 if is_odd_integerf(y) {
357 sign = 0x8000_0000_0000_0000u64;
358 }
359 } else {
360 return f32::NAN;
362 }
363 }
364 }
365
366 x_u = x.to_bits();
370
371 ex = ex.wrapping_add((x_u >> 23) as i32);
373 let e_x = ex as f64;
374 let x_mant = x_u & ((1u32 << 23) - 1);
376 let idx_x = (x_mant >> (23 - 7)) as i32;
377 let m_x = f32::from_bits(x_mant | 0x3f800000);
380
381 let dx;
389 #[cfg(any(
390 all(
391 any(target_arch = "x86", target_arch = "x86_64"),
392 target_feature = "fma"
393 ),
394 all(target_arch = "aarch64", target_feature = "neon")
395 ))]
396 {
397 use crate::logs::LOG_REDUCTION_F32;
398 dx = f_fmlaf(
399 m_x,
400 f32::from_bits(LOG_REDUCTION_F32.0[idx_x as usize]),
401 -1.0,
402 ) as f64; }
404 #[cfg(not(any(
405 all(
406 any(target_arch = "x86", target_arch = "x86_64"),
407 target_feature = "fma"
408 ),
409 all(target_arch = "aarch64", target_feature = "neon")
410 )))]
411 {
412 use crate::logs::LOG_RANGE_REDUCTION;
413 dx = f_fmla(
414 m_x as f64,
415 f64::from_bits(LOG_RANGE_REDUCTION[idx_x as usize]),
416 -1.0,
417 ); }
419
420 const COEFFS: [u64; 6] = [
427 0x3ff71547652b82fe,
428 0xbfe71547652b7a07,
429 0x3fdec709dc458db1,
430 0xbfd715479c2266c9,
431 0x3fd2776ae1ddf8f0,
432 0xbfce7b2178870157,
433 ];
434
435 let dx2 = dx * dx; let c0 = f_fmla(dx, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
437 let c1 = f_fmla(dx, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
438 let c2 = f_fmla(dx, f64::from_bits(COEFFS[5]), f64::from_bits(COEFFS[4]));
439
440 let p = f_polyeval3(dx2, c0, c1, c2);
441
442 let s = f_fmla(dx, p, f64::from_bits(LOG2_R[idx_x as usize]) + e_x);
449
450 let y6 = (y * f32::from_bits(0x42800000)) as f64; let hm = (s * y6).round_finite();
472
473 let lo6_hi = f_fmla(y6, e_x + f64::from_bits(LOG2_R_TD[idx_x as usize].2), -hm); let lo6 = f_fmla(
488 y6,
489 f_fmla(dx, p, f64::from_bits(LOG2_R_TD[idx_x as usize].1)),
490 lo6_hi,
491 );
492
493 let mut hm_i = unsafe { hm.to_int_unchecked::<i64>() };
498 hm_i = if hm_i > (1i64 << 15) {
499 1 << 15
500 } else if hm_i < (-(1i64 << 15)) {
501 -(1 << 15)
502 } else {
503 hm_i
504 };
505
506 let idx_y = hm_i & 0x3f;
507
508 let exp_hi_i = (hm_i >> 6).wrapping_shl(52);
510 let exp_mid_i = EXP2_MID1[idx_y as usize].1;
512 let exp2_hi_mid_i = (exp_hi_i.wrapping_add(exp_mid_i as i64) as u64).wrapping_add(sign);
515 let exp2_hi_mid = f64::from_bits(exp2_hi_mid_i);
516
517 const EXP2_COEFFS: [u64; 6] = [
523 0x3ff0000000000000,
524 0x3f862e42fefa39ef,
525 0x3f0ebfbdff82a23a,
526 0x3e8c6b08d7076268,
527 0x3e03b2ad33f8b48b,
528 0x3d75d870c4d84445,
529 ];
530
531 let lo6_sqr = lo6 * lo6;
532 let d0 = f_fmla(
533 lo6,
534 f64::from_bits(EXP2_COEFFS[1]),
535 f64::from_bits(EXP2_COEFFS[0]),
536 );
537 let d1 = f_fmla(
538 lo6,
539 f64::from_bits(EXP2_COEFFS[3]),
540 f64::from_bits(EXP2_COEFFS[2]),
541 );
542 let d2 = f_fmla(
543 lo6,
544 f64::from_bits(EXP2_COEFFS[5]),
545 f64::from_bits(EXP2_COEFFS[4]),
546 );
547 let pp = f_polyeval3(lo6_sqr, d0, d1, d2);
548
549 let r = pp * exp2_hi_mid;
550 let r_u = r.to_bits();
551
552 #[cfg(any(
553 all(
554 any(target_arch = "x86", target_arch = "x86_64"),
555 target_feature = "fma"
556 ),
557 all(target_arch = "aarch64", target_feature = "neon")
558 ))]
559 const ERR: u64 = 64;
560 #[cfg(not(any(
561 all(
562 any(target_arch = "x86", target_arch = "x86_64"),
563 target_feature = "fma"
564 ),
565 all(target_arch = "aarch64", target_feature = "neon")
566 )))]
567 const ERR: u64 = 128;
568
569 let r_upper = f64::from_bits(r_u + ERR) as f32;
570 let r_lower = f64::from_bits(r_u - ERR) as f32;
571 if r_upper == r_lower {
572 return r_upper;
573 }
574
575 let exp2_hi_mid_dd = DoubleDouble {
577 lo: if idx_y != 0 {
578 f64::from_bits((exp_hi_i as u64).wrapping_add(EXP2_MID1[idx_y as usize].0))
579 } else {
580 0.
581 },
582 hi: exp2_hi_mid,
583 };
584
585 let r_dd = powf_dd(idx_x, dx, y6, lo6_hi, exp2_hi_mid_dd);
586 r_dd as f32
587}
588
589#[inline]
591pub fn dirty_powf(d: f32, n: f32) -> f32 {
592 use crate::exponents::dirty_exp2f;
593 use crate::logs::dirty_log2f;
594 let value = d.abs();
595 let lg = dirty_log2f(value);
596 let c = dirty_exp2f(n * lg);
597 if d < 0.0 {
598 let y = n as i32;
599 if y % 2 == 0 { c } else { -c }
600 } else {
601 c
602 }
603}
604
605#[cfg(test)]
606mod tests {
607 use super::*;
608
609 #[test]
610 fn powf_test() {
611 assert!(
612 (powf(2f32, 3f32) - 8f32).abs() < 1e-6,
613 "Invalid result {}",
614 powf(2f32, 3f32)
615 );
616 assert!(
617 (powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
618 "Invalid result {}",
619 powf(0.5f32, 2f32)
620 );
621 }
622
623 #[test]
624 fn f_powf_test() {
625 assert!(
626 (f_powf(2f32, 3f32) - 8f32).abs() < 1e-6,
627 "Invalid result {}",
628 f_powf(2f32, 3f32)
629 );
630 assert!(
631 (f_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
632 "Invalid result {}",
633 f_powf(0.5f32, 2f32)
634 );
635 assert_eq!(f_powf(0.5f32, 1.5432f32), 0.34312353);
636 assert_eq!(
637 f_powf(f32::INFINITY, 0.00000000000000000000000000000000038518824),
638 f32::INFINITY
639 );
640 assert_eq!(f_powf(f32::NAN, 0.0), 1.);
641 assert_eq!(f_powf(1., f32::NAN), 1.);
642 }
643
644 #[test]
645 fn dirty_powf_test() {
646 assert!(
647 (dirty_powf(2f32, 3f32) - 8f32).abs() < 1e-6,
648 "Invalid result {}",
649 dirty_powf(2f32, 3f32)
650 );
651 assert!(
652 (dirty_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
653 "Invalid result {}",
654 dirty_powf(0.5f32, 2f32)
655 );
656 }
657}