1use crate::common::f_fmla;
30use std::hint::black_box;
31
32static IX: [u64; 129] = [
33 0x3ff0000000000000,
34 0x3fefc07f01fc0000,
35 0x3fef81f81f820000,
36 0x3fef44659e4a0000,
37 0x3fef07c1f07c0000,
38 0x3feecc07b3020000,
39 0x3fee9131abf00000,
40 0x3fee573ac9020000,
41 0x3fee1e1e1e1e0000,
42 0x3fede5d6e3f80000,
43 0x3fedae6076ba0000,
44 0x3fed77b654b80000,
45 0x3fed41d41d420000,
46 0x3fed0cb58f6e0000,
47 0x3fecd85689040000,
48 0x3feca4b3055e0000,
49 0x3fec71c71c720000,
50 0x3fec3f8f01c40000,
51 0x3fec0e0703820000,
52 0x3febdd2b89940000,
53 0x3febacf914c20000,
54 0x3feb7d6c3dda0000,
55 0x3feb4e81b4e80000,
56 0x3feb2036406c0000,
57 0x3feaf286bca20000,
58 0x3feac5701ac60000,
59 0x3fea98ef606a0000,
60 0x3fea6d01a6d00000,
61 0x3fea41a41a420000,
62 0x3fea16d3f97a0000,
63 0x3fe9ec8e95100000,
64 0x3fe9c2d14ee40000,
65 0x3fe99999999a0000,
66 0x3fe970e4f80c0000,
67 0x3fe948b0fcd60000,
68 0x3fe920fb49d00000,
69 0x3fe8f9c18f9c0000,
70 0x3fe8d3018d300000,
71 0x3fe8acb90f6c0000,
72 0x3fe886e5f0ac0000,
73 0x3fe8618618620000,
74 0x3fe83c977ab20000,
75 0x3fe8181818180000,
76 0x3fe7f405fd020000,
77 0x3fe7d05f417e0000,
78 0x3fe7ad2208e00000,
79 0x3fe78a4c81780000,
80 0x3fe767dce4340000,
81 0x3fe745d1745e0000,
82 0x3fe724287f460000,
83 0x3fe702e05c0c0000,
84 0x3fe6e1f76b440000,
85 0x3fe6c16c16c20000,
86 0x3fe6a13cd1540000,
87 0x3fe6816816820000,
88 0x3fe661ec6a520000,
89 0x3fe642c8590c0000,
90 0x3fe623fa77020000,
91 0x3fe6058160580000,
92 0x3fe5e75bb8d00000,
93 0x3fe5c9882b940000,
94 0x3fe5ac056b020000,
95 0x3fe58ed230820000,
96 0x3fe571ed3c500000,
97 0x3fe5555555560000,
98 0x3fe5390948f40000,
99 0x3fe51d07eae20000,
100 0x3fe5015015020000,
101 0x3fe4e5e0a7300000,
102 0x3fe4cab887260000,
103 0x3fe4afd6a0520000,
104 0x3fe49539e3b20000,
105 0x3fe47ae147ae0000,
106 0x3fe460cbc7f60000,
107 0x3fe446f865620000,
108 0x3fe42d6625d60000,
109 0x3fe4141414140000,
110 0x3fe3fb013fb00000,
111 0x3fe3e22cbce40000,
112 0x3fe3c995a47c0000,
113 0x3fe3b13b13b20000,
114 0x3fe3991c2c180000,
115 0x3fe3813813820000,
116 0x3fe3698df3de0000,
117 0x3fe3521cfb2c0000,
118 0x3fe33ae45b580000,
119 0x3fe323e34a2c0000,
120 0x3fe30d1901300000,
121 0x3fe2f684bda20000,
122 0x3fe2e025c04c0000,
123 0x3fe2c9fb4d820000,
124 0x3fe2b404ad020000,
125 0x3fe29e4129e40000,
126 0x3fe288b012880000,
127 0x3fe27350b8820000,
128 0x3fe25e2270800000,
129 0x3fe24924924a0000,
130 0x3fe23456789a0000,
131 0x3fe21fb781220000,
132 0x3fe20b470c680000,
133 0x3fe1f7047dc20000,
134 0x3fe1e2ef3b400000,
135 0x3fe1cf06ada20000,
136 0x3fe1bb4a40460000,
137 0x3fe1a7b9611a0000,
138 0x3fe19453808c0000,
139 0x3fe1811811820000,
140 0x3fe16e0689420000,
141 0x3fe15b1e5f760000,
142 0x3fe1485f0e0a0000,
143 0x3fe135c811360000,
144 0x3fe12358e75e0000,
145 0x3fe1111111120000,
146 0x3fe0fef010fe0000,
147 0x3fe0ecf56be60000,
148 0x3fe0db20a8900000,
149 0x3fe0c9714fbc0000,
150 0x3fe0b7e6ec260000,
151 0x3fe0a6810a680000,
152 0x3fe0953f39020000,
153 0x3fe0842108420000,
154 0x3fe073260a480000,
155 0x3fe0624dd2f20000,
156 0x3fe05197f7d80000,
157 0x3fe0410410420000,
158 0x3fe03091b5200000,
159 0x3fe0204081020000,
160 0x3fe0101010100000,
161 0x3fe0000000000000,
162];
163static LIX: [u64; 129] = [
164 0x0000000000000000,
165 0xbf86fe50b6f1eafa,
166 0xbf96e79685c160d5,
167 0xbfa11cd1d51955ba,
168 0xbfa6bad37591e030,
169 0xbfac4dfab908ddb5,
170 0xbfb0eb389fab4795,
171 0xbfb3aa2fdd26ae99,
172 0xbfb663f6faca846b,
173 0xbfb918a16e4cb157,
174 0xbfbbc84240a78a13,
175 0xbfbe72ec1181cfb1,
176 0xbfc08c588cd964e4,
177 0xbfc1dcd19759f2e3,
178 0xbfc32ae9e27627c6,
179 0xbfc476a9f989a58a,
180 0xbfc5c01a39fa6533,
181 0xbfc70742d4eed455,
182 0xbfc84c2bd02d6434,
183 0xbfc98edd077e9f0a,
184 0xbfcacf5e2db31eea,
185 0xbfcc0db6cddaa82d,
186 0xbfcd49ee4c33121a,
187 0xbfce840be751d775,
188 0xbfcfbc16b9003e0b,
189 0xbfd0790adbae3fc0,
190 0xbfd11307dad465b5,
191 0xbfd1ac05b2924cc5,
192 0xbfd24407ab0cc410,
193 0xbfd2db10fc4ea424,
194 0xbfd37124cea58697,
195 0xbfd406463b1d455d,
196 0xbfd49a784bcbaa37,
197 0xbfd52dbdfc4f341d,
198 0xbfd5c01a39ff2c9b,
199 0xbfd6518fe46abaa5,
200 0xbfd6e221cd9d6933,
201 0xbfd771d2ba7f5791,
202 0xbfd800a56315ee2a,
203 0xbfd88e9c72df8611,
204 0xbfd91bba891d495f,
205 0xbfd9a8023920fa4d,
206 0xbfda33760a7fbca6,
207 0xbfdabe18797d2eff,
208 0xbfdb47ebf734b923,
209 0xbfdbd0f2e9eb2b84,
210 0xbfdc592fad2be1aa,
211 0xbfdce0a4923cf5e6,
212 0xbfdd6753e02f4ebc,
213 0xbfdded3fd445af00,
214 0xbfde726aa1e558fe,
215 0xbfdef6d67325ba38,
216 0xbfdf7a8568c8aea6,
217 0xbfdffd799a81be87,
218 0x3fdf804ae8d33c40,
219 0x3fdefec61b04af4e,
220 0x3fde7df5fe572606,
221 0x3fddfdd89d5b0009,
222 0x3fdd7e6c0abbd924,
223 0x3fdcffae611a74d6,
224 0x3fdc819dc2d8578c,
225 0x3fdc043859e5bdbc,
226 0x3fdb877c57b47c04,
227 0x3fdb0b67f4f29a66,
228 0x3fda8ff97183ed07,
229 0x3fda152f14293c74,
230 0x3fd99b072a9289ca,
231 0x3fd921800927e284,
232 0x3fd8a8980ac41130,
233 0x3fd8304d90c2859d,
234 0x3fd7b89f02cbd49a,
235 0x3fd7418aceb84ab1,
236 0x3fd6cb0f68656c95,
237 0x3fd6552b49993dc2,
238 0x3fd5dfdcf1eacd7b,
239 0x3fd56b22e6b97c18,
240 0x3fd4f6fbb2ce6943,
241 0x3fd48365e6957b42,
242 0x3fd4106017c0dbcf,
243 0x3fd39de8e15727d9,
244 0x3fd32bfee37489bc,
245 0x3fd2baa0c34989c3,
246 0x3fd249cd2b177fd5,
247 0x3fd1d982c9d50468,
248 0x3fd169c0536677ac,
249 0x3fd0fa848045f67b,
250 0x3fd08bce0d9a7c60,
251 0x3fd01d9bbcf66a2c,
252 0x3fcf5fd8a90e2d85,
253 0x3fce857d3d3af1e5,
254 0x3fcdac22d3ec5f4e,
255 0x3fccd3c712db459a,
256 0x3fcbfc67a7ff3c22,
257 0x3fcb2602497678f4,
258 0x3fca5094b555a1f8,
259 0x3fc97c1cb136b96f,
260 0x3fc8a8980ac8652d,
261 0x3fc7d60496c83f66,
262 0x3fc7046031c7cdaf,
263 0x3fc633a8bf460335,
264 0x3fc563dc2a08b102,
265 0x3fc494f863bbc1de,
266 0x3fc3c6fb6507a37e,
267 0x3fc2f9e32d5257ec,
268 0x3fc22dadc2a627ef,
269 0x3fc1625931802e49,
270 0x3fc097e38cef9519,
271 0x3fbf9c95dc138295,
272 0x3fbe0b1ae90505f6,
273 0x3fbc7b528b5fcffa,
274 0x3fbaed391abb17a1,
275 0x3fb960caf9bd35ea,
276 0x3fb7d60496e3edeb,
277 0x3fb64ce26bf2108e,
278 0x3fb4c560fe5b573b,
279 0x3fb33f7cde24adfb,
280 0x3fb1bb32a5ed9353,
281 0x3fb0387efbd3006e,
282 0x3fad6ebd1f1d0955,
283 0x3faa6f9c37a8beab,
284 0x3fa77394c9d6762c,
285 0x3fa47aa07358e1a4,
286 0x3fa184b8e4d490ef,
287 0x3f9d23afc4d95c78,
288 0x3f9743ee8678a7cb,
289 0x3f916a21e243bf78,
290 0x3f872c7ba20c907e,
291 0x3f7720d9c0536e17,
292 0x0000000000000000,
293];
294
295#[cold]
297fn log2p1f_small(z: f64, ax: u32, ux: u32) -> f32 {
298 let z2 = z * z;
299 let z4 = z2 * z2;
300 if ax < 0x3b9d9d34u32 {
301 if ax < 0x39638a7eu32 {
303 if ax < 0x329c5639u32 {
305 const C: [u64; 2] = [0x3ff71547652b82fe, 0xbfe71547652b82ff];
307 let res = z * f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
308 res as f32
309 } else {
310 if ux == 0x32ff7045u32 {
311 return black_box(f32::from_bits(0x3338428d))
312 - black_box(f32::from_bits(0x17c00000));
313 }
314 if ux == 0xb395efbbu32 {
315 return black_box(f32::from_bits(0xb3d85005))
316 + black_box(f32::from_bits(0x19800000));
317 }
318 if ux == 0x35a14df7u32 {
319 return black_box(f32::from_bits(0x35e8b690))
320 + black_box(f32::from_bits(0x1b800000));
321 }
322 if ux == 0x3841cb81u32 {
323 return black_box(f32::from_bits(0x388bca4f))
324 + black_box(f32::from_bits(0x1e000000));
325 }
326 const C: [u64; 4] = [
327 0x3ff71547652b82fe,
328 0xbfe71547652b82fd,
329 0x3fdec709ead0c9a7,
330 0xbfd7154773c1cb29,
331 ];
332
333 let zxf0 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
334 let zxf1 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
335
336 let zxf = z * f_fmla(z2, zxf0, zxf1);
337 zxf as f32
338 }
339 } else {
340 if ux == 0xbac9363du32 {
341 return black_box(f32::from_bits(0xbb114155))
342 + black_box(f32::from_bits(0x21000000));
343 }
344 const C: [u64; 6] = [
345 0x3ff71547652b82fe,
346 0xbfe71547652b8300,
347 0x3fdec709dc28f51b,
348 0xbfd7154765157748,
349 0x3fd2778a510a3682,
350 0xbfcec745df1551fc,
351 ];
352
353 let zxf0 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
354 let zxf1 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
355 let zxf2 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
356
357 let zw0 = f_fmla(z2, zxf1, zxf2);
358
359 let zxf = z * f_fmla(z4, zxf0, zw0);
360 zxf as f32
361 }
362 } else {
363 const C: [u64; 8] = [
364 0x3ff71547652b82fe,
365 0xbfe71547652b82fb,
366 0x3fdec709dc3b6a73,
367 0xbfd71547652dc090,
368 0x3fd2776c1a889010,
369 0xbfcec7095bd4d208,
370 0x3fca66bec7fc8f70,
371 0xbfc71a900fc3f3f9,
372 ];
373
374 let zxf0 = f_fmla(z, f64::from_bits(C[7]), f64::from_bits(C[6]));
375 let zxf1 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
376 let zxf2 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
377 let zxf3 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
378
379 let zw0 = f_fmla(z2, zxf0, zxf1);
380 let zw1 = f_fmla(z2, zxf2, zxf3);
381
382 let zxf = z * f_fmla(z4, zw0, zw1);
383 zxf as f32
384 }
385}
386
387#[inline]
391pub fn f_log2p1f(x: f32) -> f32 {
392 let mut z = x as f64;
393 let t = x.to_bits();
394 let ux = t;
395 let ax = ux & 0x7fff_ffff;
396 if ux >= 0x17fu32 << 23 {
397 if ux == (0x17fu32 << 23) {
399 return f32::NEG_INFINITY; }
401 if ux > (0x1ffu32 << 23) {
402 return x + x;
403 } f32::NAN } else if ax >= (0xff << 23) {
406 if ax > (0xff << 23) {
408 return x + x;
409 } f32::INFINITY
411 } else if ax < 0x3cb7aa26u32 {
412 log2p1f_small(z, ax, ux)
414 } else {
415 if ux == 0x4ebd09e3u32 {
417 let h = f32::from_bits(0x41f48013);
418 let l = f32::from_bits(0x35780000);
419 return black_box(h) + black_box(l);
420 }
421 let tp = (z + 1.0).to_bits();
422 let m = tp & 0x000fffffffffffff;
423 let mut e: i32 = (tp >> 52).wrapping_sub(0x3ff) as i32;
424 let j: i32 = ((m as i64).wrapping_add(1i64 << (52 - 8)) >> (52 - 7)) as i32;
425 let k = if j > 53 { 1 } else { 0 };
426 e += k;
427 let xd = m | 0x3ffu64 << 52;
428 z = f_fmla(f64::from_bits(xd), f64::from_bits(IX[j as usize]), -1.0);
429 const C: [u64; 6] = [
430 0x3ff71547652b82fe,
431 0xbfe71547652b82ff,
432 0x3fdec709dc32988b,
433 0xbfd715476521ec2b,
434 0x3fd277801a1ad904,
435 0xbfcec731704d6a88,
436 ];
437 let z2 = z * z;
438 let mut c0 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
439 let c2 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
440 let c4 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
441
442 let zv0 = f_fmla(z2, c4, c2);
443
444 c0 = f_fmla(z2, zv0, c0);
445 let res = f_fmla(z, c0, -f64::from_bits(LIX[j as usize])) + e as f64;
446 res as f32
447 }
448}
449
450#[cfg(test)]
451mod tests {
452 use super::*;
453
454 #[test]
455 fn test_log2p1f() {
456 assert_eq!(f_log2p1f(0.0), 0.0);
457 assert_eq!(f_log2p1f(1.0), 1.0);
458 assert_eq!(f_log2p1f(-0.0432432), -0.063775845);
459 assert_eq!(f_log2p1f(-0.009874634), -0.01431689);
460 assert_eq!(f_log2p1f(1.2443), 1.1662655);
461 assert!(f_log2p1f(-1.0432432).is_nan());
462 }
463}