1use crate::bits::{EXP_MASK, get_exponent_f64};
30use crate::common::f_fmla;
31use crate::double_double::DoubleDouble;
32use crate::logs::log1p::{LOG_R1_DD, R1};
33use crate::logs::log1p_dd;
34use crate::polyeval::{f_estrin_polyeval8, f_polyeval4, f_polyeval5};
35
36#[cold]
37fn tiny_hard(x: f64) -> f64 {
38 let x2 = DoubleDouble::from_exact_mult(x, x);
44
45 const C: [(u64, u64); 7] = [
46 (0x3c755555555129de, 0x3fd5555555555555),
47 (0xbbd333352fe28400, 0xbfd0000000000000),
48 (0xbc698cb3ef1ea2dd, 0x3fc999999999999a),
49 (0x3c4269700b3f95d0, 0xbfc55555555546ef),
50 (0x3c61290e9261823e, 0x3fc2492492491565),
51 (0x3c6fb0a243c2a59c, 0xbfc000018af8cb7e),
52 (0x3c3b271ceb5c60a0, 0x3fbc71ca10b30518),
53 ];
54
55 let mut r = DoubleDouble::mul_f64_add(
56 DoubleDouble::from_bit_pair(C[6]),
57 x,
58 DoubleDouble::from_bit_pair(C[5]),
59 );
60 r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[4]));
61 r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[3]));
62 r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[2]));
63 r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[1]));
64 r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[0]));
65 r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
66 r = DoubleDouble::quick_mult(r, x2);
67 r.to_f64()
68}
69
70fn log1pmx_big(x: f64) -> f64 {
71 let mut x_u = x.to_bits();
72
73 let mut x_dd = DoubleDouble::default();
74
75 let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
76
77 const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
78
79 if x_exp >= EXP_BIAS {
80 if x_u >= 0x4650_0000_0000_0000u64 {
82 x_dd.hi = x;
83 } else {
84 x_dd = DoubleDouble::from_exact_add(x, 1.0);
85 }
86 } else {
87 x_dd = DoubleDouble::from_exact_add(1.0, x);
89 }
90
91 let xhi_bits = x_dd.hi.to_bits();
97 let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
98 x_u = xhi_bits;
99 let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
102 let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
103 let e_x = x_e as f64;
104
105 const LOG_2: DoubleDouble = DoubleDouble::new(
106 f64::from_bits(0x3d2ef35793c76730),
107 f64::from_bits(0x3fe62e42fefa3800),
108 );
109
110 let r_dd = LOG_R1_DD[idx as usize];
114
115 let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1));
116 let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0));
119
120 let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
122 let m_hi = 1f64.to_bits() | xhi_frac;
127
128 let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
129 (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
130 } else {
131 0
132 };
133
134 let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
135
136 let r = R1[idx as usize];
148 let v_hi;
149 let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
150
151 #[cfg(any(
152 all(
153 any(target_arch = "x86", target_arch = "x86_64"),
154 target_feature = "fma"
155 ),
156 all(target_arch = "aarch64", target_feature = "neon")
157 ))]
158 {
159 v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); }
161
162 #[cfg(not(any(
163 all(
164 any(target_arch = "x86", target_arch = "x86_64"),
165 target_feature = "fma"
166 ),
167 all(target_arch = "aarch64", target_feature = "neon")
168 )))]
169 {
170 use crate::logs::log1p::RCM1;
171 let c = f64::from_bits(
172 (idx as u64)
173 .wrapping_shl(52 - 7)
174 .wrapping_add(0x3FF0_0000_0000_0000u64),
175 );
176 v_hi = f_fmla(
177 f64::from_bits(r),
178 m_dd.hi - c,
179 f64::from_bits(RCM1[idx as usize]),
180 ); }
182
183 let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
187 v_dd.lo += v_lo.lo;
188
189 let mut r1 = DoubleDouble::from_exact_add(hi, v_dd.hi);
192
193 const P_COEFFS: [u64; 6] = [
198 0xbfe0000000000000,
199 0x3fd5555555555166,
200 0xbfcfffffffdb7746,
201 0x3fc99999a8718a60,
202 0xbfc555874ce8ce22,
203 0x3fc24335555ddbe5,
204 ];
205
206 let v_sq = v_dd.hi * v_dd.hi;
208 let p0 = f_fmla(
209 v_dd.hi,
210 f64::from_bits(P_COEFFS[1]),
211 f64::from_bits(P_COEFFS[0]),
212 );
213 let p1 = f_fmla(
214 v_dd.hi,
215 f64::from_bits(P_COEFFS[3]),
216 f64::from_bits(P_COEFFS[2]),
217 );
218 let p2 = f_fmla(
219 v_dd.hi,
220 f64::from_bits(P_COEFFS[5]),
221 f64::from_bits(P_COEFFS[4]),
222 );
223 let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2);
224
225 const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0];
226 let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }];
227
228 let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi);
229
230 r1.lo = p;
231 r1 = DoubleDouble::from_exact_add(r1.hi, r1.lo);
232 r1 = DoubleDouble::full_add_f64(r1, -x);
233
234 let left = r1.hi + (r1.lo - err);
235 let right = r1.hi + (r1.lo + err);
236 if left == right {
238 return left;
239 }
240 log1pmx_accurate_dd(x)
241}
242
243#[cold]
244fn log1pmx_accurate_dd(x: f64) -> f64 {
245 let r = log1p_dd(x);
246 DoubleDouble::full_add_f64(r, -x).to_f64()
247}
248
249pub fn f_log1pmx(x: f64) -> f64 {
253 let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
254
255 let x_e = (x.to_bits() >> 52) & 0x7ff;
256
257 if !x.is_normal() {
258 if x.is_nan() {
259 return x + x;
260 }
261 if x.is_infinite() {
262 return f64::NAN;
263 }
264 if x == 0. {
265 return x;
266 }
267 }
268
269 if ax > 0x3f90000000000000u64 {
270 if x <= -1. {
272 if x == -1. {
273 return f64::NEG_INFINITY;
274 }
275 return f64::NAN;
276 }
277 return log1pmx_big(x);
278 }
279
280 const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
281
282 if x_e < E_BIAS - 10 {
285 if x_e < E_BIAS - 100 {
286 let x2 = x * x;
289 let dx2 = f_fmla(x, x, -x2);
290 let rl = dx2 * -0.5;
291 return f_fmla(x2, -0.5, rl);
292 }
293
294 let p = f_polyeval5(
301 x,
302 f64::from_bits(0x3fc999999999999a),
303 f64::from_bits(0xbfc55555555546ef),
304 f64::from_bits(0x3fc24924923d3abf),
305 f64::from_bits(0xbfc000018af7f637),
306 f64::from_bits(0x3fbc72984db24b6a),
307 );
308
309 let x2 = DoubleDouble::from_exact_mult(x, x);
310
311 let mut r = DoubleDouble::f64_mul_f64_add(
312 x,
313 p,
314 DoubleDouble::from_bit_pair((0xbbd3330899095800, 0xbfd0000000000000)),
315 );
316 r = DoubleDouble::mul_f64_add(
317 r,
318 x,
319 DoubleDouble::from_bit_pair((0x3c75555538509407, 0x3fd5555555555555)),
320 );
321 r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
322 r = DoubleDouble::quick_mult(r, x2);
323 const ERR: f64 = f64::from_bits(0x3af0000000000000); let ub = r.hi + (r.lo + ERR);
325 let lb = r.hi + (r.lo - ERR);
326 if lb == ub {
327 return r.to_f64();
328 }
329 return tiny_hard(x);
330 }
331
332 let p = f_estrin_polyeval8(
339 x,
340 f64::from_bits(0x3fc9999999999997),
341 f64::from_bits(0xbfc5555555555552),
342 f64::from_bits(0x3fc249249249fb64),
343 f64::from_bits(0xbfc000000000f450),
344 f64::from_bits(0x3fbc71c6e2591149),
345 f64::from_bits(0xbfb999995cf14d86),
346 f64::from_bits(0x3fb7494eb6c2c544),
347 f64::from_bits(0xbfb558bf05690e85),
348 );
349
350 let x2 = DoubleDouble::from_exact_mult(x, x);
351
352 let mut r = DoubleDouble::f64_mul_f64_add(
353 x,
354 p,
355 DoubleDouble::from_bit_pair((0xbb9d89dc15a38000, 0xbfd0000000000000)),
356 );
357 r = DoubleDouble::mul_f64_add(
358 r,
359 x,
360 DoubleDouble::from_bit_pair((0x3c7555a15a94e505, 0x3fd5555555555555)),
361 );
362 r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
363 r = DoubleDouble::quick_mult(r, x2);
364
365 r.to_f64()
366}
367
368#[cfg(test)]
369mod tests {
370 use super::*;
371
372 #[test]
373 fn test_log1pmx() {
374 assert_eq!(
375 f_log1pmx(0.00000000000000005076835735015165),
376 -0.0000000000000000000000000000000012887130540163486
377 );
378 assert_eq!(f_log1pmx(5.), -3.208240530771945);
379 assert_eq!(f_log1pmx(-0.99), -3.6151701859880907);
380 assert_eq!(f_log1pmx(-1.), f64::NEG_INFINITY);
381 assert_eq!(
382 f_log1pmx(1.0000000000008708e-13),
383 -0.0000000000000000000000000050000000000083744
384 );
385 assert_eq!(
386 f_log1pmx(1.0000000000008708e-26),
387 -0.00000000000000000000000000000000000000000000000000005000000000008707
388 );
389 assert_eq!(f_log1pmx(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012890176556069385),
390 -0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000830783258233204);
391 }
392}