pxfm/logs/
log1p.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::{EXP_MASK, get_exponent_f64};
30use crate::common::{dyad_fmla, f_fmla};
31use crate::double_double::DoubleDouble;
32use crate::dyadic_float::DyadicFloat128;
33use crate::logs::log1p_dd::log1p_dd;
34use crate::logs::log1p_dyadic::log1p_accurate;
35use crate::polyeval::f_polyeval4;
36
37// R1[i] = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7) )
38pub(crate) static R1: [u64; 129] = [
39    0x3ff0000000000000,
40    0x3fefc00000000000,
41    0x3fef800000000000,
42    0x3fef400000000000,
43    0x3fef000000000000,
44    0x3feec00000000000,
45    0x3feea00000000000,
46    0x3fee600000000000,
47    0x3fee200000000000,
48    0x3fede00000000000,
49    0x3feda00000000000,
50    0x3fed800000000000,
51    0x3fed400000000000,
52    0x3fed000000000000,
53    0x3fece00000000000,
54    0x3feca00000000000,
55    0x3fec800000000000,
56    0x3fec400000000000,
57    0x3fec000000000000,
58    0x3febe00000000000,
59    0x3feba00000000000,
60    0x3feb800000000000,
61    0x3feb400000000000,
62    0x3feb200000000000,
63    0x3feb000000000000,
64    0x3feac00000000000,
65    0x3feaa00000000000,
66    0x3fea600000000000,
67    0x3fea400000000000,
68    0x3fea200000000000,
69    0x3fe9e00000000000,
70    0x3fe9c00000000000,
71    0x3fe9a00000000000,
72    0x3fe9800000000000,
73    0x3fe9400000000000,
74    0x3fe9200000000000,
75    0x3fe9000000000000,
76    0x3fe8e00000000000,
77    0x3fe8a00000000000,
78    0x3fe8800000000000,
79    0x3fe8600000000000,
80    0x3fe8400000000000,
81    0x3fe8200000000000,
82    0x3fe8000000000000,
83    0x3fe7e00000000000,
84    0x3fe7a00000000000,
85    0x3fe7800000000000,
86    0x3fe7600000000000,
87    0x3fe7400000000000,
88    0x3fe7200000000000,
89    0x3fe7000000000000,
90    0x3fe6e00000000000,
91    0x3fe6c00000000000,
92    0x3fe6a00000000000,
93    0x3fe6800000000000,
94    0x3fe6600000000000,
95    0x3fe6400000000000,
96    0x3fe6200000000000,
97    0x3fe6000000000000,
98    0x3fe5e00000000000,
99    0x3fe5c00000000000,
100    0x3fe5a00000000000,
101    0x3fe5800000000000,
102    0x3fe5800000000000,
103    0x3fe5600000000000,
104    0x3fe5400000000000,
105    0x3fe5200000000000,
106    0x3fe5000000000000,
107    0x3fe4e00000000000,
108    0x3fe4c00000000000,
109    0x3fe4a00000000000,
110    0x3fe4a00000000000,
111    0x3fe4800000000000,
112    0x3fe4600000000000,
113    0x3fe4400000000000,
114    0x3fe4200000000000,
115    0x3fe4200000000000,
116    0x3fe4000000000000,
117    0x3fe3e00000000000,
118    0x3fe3c00000000000,
119    0x3fe3c00000000000,
120    0x3fe3a00000000000,
121    0x3fe3800000000000,
122    0x3fe3600000000000,
123    0x3fe3600000000000,
124    0x3fe3400000000000,
125    0x3fe3200000000000,
126    0x3fe3000000000000,
127    0x3fe3000000000000,
128    0x3fe2e00000000000,
129    0x3fe2c00000000000,
130    0x3fe2c00000000000,
131    0x3fe2a00000000000,
132    0x3fe2800000000000,
133    0x3fe2800000000000,
134    0x3fe2600000000000,
135    0x3fe2400000000000,
136    0x3fe2400000000000,
137    0x3fe2200000000000,
138    0x3fe2000000000000,
139    0x3fe2000000000000,
140    0x3fe1e00000000000,
141    0x3fe1c00000000000,
142    0x3fe1c00000000000,
143    0x3fe1a00000000000,
144    0x3fe1a00000000000,
145    0x3fe1800000000000,
146    0x3fe1600000000000,
147    0x3fe1600000000000,
148    0x3fe1400000000000,
149    0x3fe1400000000000,
150    0x3fe1200000000000,
151    0x3fe1200000000000,
152    0x3fe1000000000000,
153    0x3fe0e00000000000,
154    0x3fe0e00000000000,
155    0x3fe0c00000000000,
156    0x3fe0c00000000000,
157    0x3fe0a00000000000,
158    0x3fe0a00000000000,
159    0x3fe0800000000000,
160    0x3fe0800000000000,
161    0x3fe0600000000000,
162    0x3fe0600000000000,
163    0x3fe0400000000000,
164    0x3fe0400000000000,
165    0x3fe0200000000000,
166    0x3fe0200000000000,
167    0x3fe0000000000000,
168];
169
170// Extra constants for exact range reduction when FMA instructions are not
171// available:
172// r * c - 1 for r = 2^-8 * nearestint( 2^8 / (1 + i * 2^-7))
173//           and c = 1 + i * 2^-7
174//           with i = 0..128.
175#[cfg(not(any(
176    all(
177        any(target_arch = "x86", target_arch = "x86_64"),
178        target_feature = "fma"
179    ),
180    target_arch = "aarch64"
181)))]
182pub(crate) static RCM1: [u64; 129] = [
183    0x0000000000000000,
184    0xbf10000000000000,
185    0xbf30000000000000,
186    0xbf42000000000000,
187    0xbf50000000000000,
188    0xbf59000000000000,
189    0x3f5f000000000000,
190    0x3f52800000000000,
191    0x3f30000000000000,
192    0xbf49000000000000,
193    0xbf5f000000000000,
194    0x3f52000000000000,
195    0xbf30000000000000,
196    0xbf5c000000000000,
197    0x3f51000000000000,
198    0xbf45000000000000,
199    0x3f60000000000000,
200    0x3f10000000000000,
201    0xbf60000000000000,
202    0x3f3a000000000000,
203    0xbf5e000000000000,
204    0x3f38000000000000,
205    0xbf61000000000000,
206    0xbf00000000000000,
207    0x3f60000000000000,
208    0xbf4a000000000000,
209    0x3f51000000000000,
210    0xbf5f800000000000,
211    0xbf30000000000000,
212    0x3f56800000000000,
213    0xbf5f000000000000,
214    0xbf3c000000000000,
215    0x3f50000000000000,
216    0x3f63000000000000,
217    0xbf56000000000000,
218    0xbf24000000000000,
219    0x3f50000000000000,
220    0x3f60c00000000000,
221    0xbf60800000000000,
222    0xbf52000000000000,
223    0xbf30000000000000,
224    0x3f42000000000000,
225    0x3f55000000000000,
226    0x3f60000000000000,
227    0x3f65000000000000,
228    0xbf61c00000000000,
229    0xbf5c000000000000,
230    0xbf55800000000000,
231    0xbf50000000000000,
232    0xbf47000000000000,
233    0xbf40000000000000,
234    0xbf36000000000000,
235    0xbf30000000000000,
236    0xbf2c000000000000,
237    0xbf30000000000000,
238    0xbf36000000000000,
239    0xbf40000000000000,
240    0xbf47000000000000,
241    0xbf50000000000000,
242    0xbf55800000000000,
243    0xbf5c000000000000,
244    0xbf61c00000000000,
245    0xbf66000000000000,
246    0x3f65000000000000,
247    0x3f60000000000000,
248    0x3f55000000000000,
249    0x3f42000000000000,
250    0xbf30000000000000,
251    0xbf52000000000000,
252    0xbf60800000000000,
253    0xbf68800000000000,
254    0x3f60c00000000000,
255    0x3f50000000000000,
256    0xbf24000000000000,
257    0xbf56000000000000,
258    0xbf65400000000000,
259    0x3f63000000000000,
260    0x3f50000000000000,
261    0xbf3c000000000000,
262    0xbf5f000000000000,
263    0x3f68000000000000,
264    0x3f56800000000000,
265    0xbf30000000000000,
266    0xbf5f800000000000,
267    0x3f67000000000000,
268    0x3f51000000000000,
269    0xbf4a000000000000,
270    0xbf66000000000000,
271    0x3f60000000000000,
272    0xbf00000000000000,
273    0xbf61000000000000,
274    0x3f64800000000000,
275    0x3f38000000000000,
276    0xbf5e000000000000,
277    0x3f66000000000000,
278    0x3f3a000000000000,
279    0xbf60000000000000,
280    0x3f64800000000000,
281    0x3f10000000000000,
282    0xbf64000000000000,
283    0x3f60000000000000,
284    0xbf45000000000000,
285    0xbf6b000000000000,
286    0x3f51000000000000,
287    0xbf5c000000000000,
288    0x3f65400000000000,
289    0xbf30000000000000,
290    0xbf69c00000000000,
291    0x3f52000000000000,
292    0xbf5f000000000000,
293    0x3f63000000000000,
294    0xbf49000000000000,
295    0x3f6c000000000000,
296    0x3f30000000000000,
297    0xbf68800000000000,
298    0x3f52800000000000,
299    0xbf62000000000000,
300    0x3f5f000000000000,
301    0xbf59000000000000,
302    0x3f64c00000000000,
303    0xbf50000000000000,
304    0x3f69000000000000,
305    0xbf42000000000000,
306    0x3f6c400000000000,
307    0xbf30000000000000,
308    0x3f6e800000000000,
309    0xbf10000000000000,
310    0x3f6fc00000000000,
311    0x0000000000000000,
312];
313
314pub(crate) static LOG_R1_DD: [(u64, u64); 129] = [
315    (0x0000000000000000, 0x0000000000000000),
316    (0xbd10c76b999d2be8, 0x3f80101575890000),
317    (0xbd23dc5b06e2f7d2, 0x3f90205658938000),
318    (0xbd2aa0ba325a0c34, 0x3f98492528c90000),
319    (0x3d0111c05cf1d753, 0x3fa0415d89e74000),
320    (0xbd2c167375bdfd28, 0x3fa466aed42e0000),
321    (0xbd029efbec19afa2, 0x3fa67c94f2d4c000),
322    (0x3d20fc1a353bb42e, 0x3faaaef2d0fb0000),
323    (0xbd0e113e4fc93b7b, 0x3faeea31c006c000),
324    (0xbd25325d560d9e9b, 0x3fb1973bd1466000),
325    (0x3d2cc85ea5db4ed7, 0x3fb3bdf5a7d1e000),
326    (0xbcf53a2582f4e1ef, 0x3fb4d3115d208000),
327    (0x3cec1e8da99ded32, 0x3fb700d30aeac000),
328    (0x3d23115c3abd47da, 0x3fb9335e5d594000),
329    (0xbd0e42b6b94407c8, 0x3fba4e7640b1c000),
330    (0x3d2646d1c65aacd3, 0x3fbc885801bc4000),
331    (0x3d1a89401fa71733, 0x3fbda72763844000),
332    (0xbd2534d64fa10afd, 0x3fbfe89139dbe000),
333    (0x3d21ef78ce2d07f2, 0x3fc1178e8227e000),
334    (0x3d2ca78e44389934, 0x3fc1aa2b7e23f000),
335    (0x3d039d6ccb81b4a1, 0x3fc2d1610c868000),
336    (0x3cc62fa8234b7289, 0x3fc365fcb0159000),
337    (0x3d25837954fdb678, 0x3fc4913d8333b000),
338    (0x3d2633e8e5697dc7, 0x3fc527e5e4a1b000),
339    (0xbd127023eb68981c, 0x3fc5bf406b544000),
340    (0xbd25118de59c21e1, 0x3fc6f0128b757000),
341    (0xbd1c661070914305, 0x3fc7898d85445000),
342    (0xbd073d54aae92cd1, 0x3fc8beafeb390000),
343    (0x3d07f22858a0ff6f, 0x3fc95a5adcf70000),
344    (0x3d29904d6865817a, 0x3fc9f6c407089000),
345    (0xbd0c358d4eace1aa, 0x3fcb31d8575bd000),
346    (0xbd2d4bc4595412b6, 0x3fcbd087383be000),
347    (0xbcf1ec72c5962bd2, 0x3fcc6ffbc6f01000),
348    (0xbd084a7e75b6f6e4, 0x3fcd1037f2656000),
349    (0x3cc212276041f430, 0x3fce530effe71000),
350    (0xbcca211565bb8e11, 0x3fcef5ade4dd0000),
351    (0x3d1bcbecca0cdf30, 0x3fcf991c6cb3b000),
352    (0xbd16f08c1485e94a, 0x3fd01eae5626c800),
353    (0x3d27188b163ceae9, 0x3fd0c42d67616000),
354    (0xbd2c210e63a5f01c, 0x3fd1178e8227e800),
355    (0x3d2b9acdf7a51681, 0x3fd16b5ccbacf800),
356    (0x3d2ca6ed5147bdb7, 0x3fd1bf99635a6800),
357    (0x3d0a87deba46baea, 0x3fd214456d0eb800),
358    (0x3d2c93c1df5bb3b6, 0x3fd269621134d800),
359    (0x3d2a9cfa4a5004f4, 0x3fd2bef07cdc9000),
360    (0x3d116ecdb0f177c8, 0x3fd36b6776be1000),
361    (0x3d183b54b606bd5c, 0x3fd3c25277333000),
362    (0x3d08e436ec90e09d, 0x3fd419b423d5e800),
363    (0xbd2f27ce0967d675, 0x3fd4718dc271c800),
364    (0xbd2e20891b0ad8a4, 0x3fd4c9e09e173000),
365    (0x3d2ebe708164c759, 0x3fd522ae0738a000),
366    (0x3d1fadedee5d40ef, 0x3fd57bf753c8d000),
367    (0xbd0a0b2a08a465dc, 0x3fd5d5bddf596000),
368    (0xbd2db623e731ae00, 0x3fd630030b3ab000),
369    (0x3d20a0d32756eba0, 0x3fd68ac83e9c6800),
370    (0x3d1721657c222d87, 0x3fd6e60ee6af1800),
371    (0x3d2d8b0949dc60b3, 0x3fd741d876c67800),
372    (0x3d29ec7d2efd1778, 0x3fd79e26687cf800),
373    (0xbd272090c812566a, 0x3fd7fafa3bd81800),
374    (0x3d2fd56f3333778a, 0x3fd85855776dc800),
375    (0xbd205ae1e5e70470, 0x3fd8b639a88b3000),
376    (0xbd1766b52ee6307d, 0x3fd914a8635bf800),
377    (0xbd152313a502d9f0, 0x3fd973a343135800),
378    (0xbd152313a502d9f0, 0x3fd973a343135800),
379    (0xbd26279e10d0c0b0, 0x3fd9d32bea15f000),
380    (0x3d23c6457f9d79f5, 0x3fda33440224f800),
381    (0x3d1e36f2bea77a5d, 0x3fda93ed3c8ad800),
382    (0xbd217cc552774458, 0x3fdaf5295248d000),
383    (0x3d1095252d841995, 0x3fdb56fa04462800),
384    (0x3d27d85bf40a666d, 0x3fdbb9611b80e000),
385    (0x3d2cec807fe8e180, 0x3fdc1c60693fa000),
386    (0x3d2cec807fe8e180, 0x3fdc1c60693fa000),
387    (0xbd29b6ddc15249ae, 0x3fdc7ff9c7455800),
388    (0xbd0797c33ec7a6b0, 0x3fdce42f18064800),
389    (0x3d235bafe9a767a8, 0x3fdd490246def800),
390    (0xbd1ea42d60dc616a, 0x3fddae75484c9800),
391    (0xbd1ea42d60dc616a, 0x3fddae75484c9800),
392    (0xbd1326b207322938, 0x3fde148a1a272800),
393    (0xbd2465505372bd08, 0x3fde7b42c3ddb000),
394    (0x3d2f27f45a470251, 0x3fdee2a156b41000),
395    (0x3d2f27f45a470251, 0x3fdee2a156b41000),
396    (0x3d12cde56f014a8b, 0x3fdf4aa7ee031800),
397    (0x3d0085fa3c164935, 0x3fdfb358af7a4800),
398    (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800),
399    (0xbd053ba3b1727b1c, 0x3fe00e5ae5b20800),
400    (0xbd04c45fe79539e0, 0x3fe04360be760400),
401    (0x3d26812241edf5fd, 0x3fe078bf0533c400),
402    (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400),
403    (0x3d1f486b887e7e27, 0x3fe0ae76e2d05400),
404    (0x3d1c299807801742, 0x3fe0e4898611cc00),
405    (0xbd258647bb9ddcb2, 0x3fe11af823c75c00),
406    (0xbd258647bb9ddcb2, 0x3fe11af823c75c00),
407    (0xbd2edd97a293ae49, 0x3fe151c3f6f29800),
408    (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00),
409    (0x3d14cc4ef8ab4650, 0x3fe188ee40f23c00),
410    (0x3cccacdeed70e667, 0x3fe1c07849ae6000),
411    (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800),
412    (0xbd2a7242c9fe81d3, 0x3fe1f8635fc61800),
413    (0x3d12fc066e48667b, 0x3fe230b0d8bebc00),
414    (0xbd0b61f105226250, 0x3fe269621134dc00),
415    (0xbd0b61f105226250, 0x3fe269621134dc00),
416    (0x3d206d2be797882d, 0x3fe2a2786d0ec000),
417    (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000),
418    (0xbd17a6e507b9dc11, 0x3fe2dbf557b0e000),
419    (0xbd274e93c5a0ed9c, 0x3fe315da44340800),
420    (0xbd274e93c5a0ed9c, 0x3fe315da44340800),
421    (0x3d10b83f9527e6ac, 0x3fe35028ad9d8c00),
422    (0xbd218b7abb5569a4, 0x3fe38ae217197800),
423    (0xbd218b7abb5569a4, 0x3fe38ae217197800),
424    (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000),
425    (0xbd02b7367cfe13c2, 0x3fe3c6080c36c000),
426    (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00),
427    (0xbd26ce7930f0c74c, 0x3fe4019c2125cc00),
428    (0xbcfd984f481051f7, 0x3fe43d9ff2f92400),
429    (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400),
430    (0xbd22cb6af94d60aa, 0x3fe47a1527e8a400),
431    (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00),
432    (0x3cef7115ed4c541c, 0x3fe4b6fd6f970c00),
433    (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000),
434    (0xbd2e6c516d93b8fb, 0x3fe4f45a835a5000),
435    (0x3d05ccc45d257531, 0x3fe5322e26867800),
436    (0x3d05ccc45d257531, 0x3fe5322e26867800),
437    (0x3d09980bff3303dd, 0x3fe5707a26bb8c00),
438    (0x3d09980bff3303dd, 0x3fe5707a26bb8c00),
439    (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800),
440    (0x3d2dfa63ac10c9fb, 0x3fe5af405c364800),
441    (0x3d2202380cda46be, 0x3fe5ee82aa241800),
442    (0x3d2202380cda46be, 0x3fe5ee82aa241800),
443    (0x0000000000000000, 0x0000000000000000),
444];
445
446#[inline]
447pub(crate) fn log1p_f64_dyadic(x: f64) -> DyadicFloat128 {
448    let mut x_u = x.to_bits();
449
450    let mut x_dd = DoubleDouble::default();
451
452    let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
453
454    if x_exp >= EXP_BIAS {
455        // |x| >= 1
456        if x_u >= 0x4650_0000_0000_0000u64 {
457            x_dd.hi = x;
458        } else {
459            x_dd = DoubleDouble::from_exact_add(x, 1.0);
460        }
461    } else {
462        // |x| < 1
463        x_dd = DoubleDouble::from_exact_add(1.0, x);
464    }
465
466    const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
467
468    // At this point, x_dd is the exact sum of 1 + x:
469    //   x_dd.hi + x_dd.lo = x + 1.0 exactly.
470    //   |x_dd.hi| >= 2^-54
471    //   |x_dd.lo| < ulp(x_dd.hi)
472
473    let xhi_bits = x_dd.hi.to_bits();
474    let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
475    x_u = xhi_bits;
476    // Range reduction:
477    // Find k such that |x_hi - k * 2^-7| <= 2^-8.
478    let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
479    let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
480
481    // Scale x_dd by 2^(-xh_bits.get_exponent()).
482    let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
483    // Normalize arguments:
484    //   1 <= m_dd.hi < 2
485    //   |m_dd.lo| < 2^-52.
486    // This is exact.
487    let m_hi = 1f64.to_bits() | xhi_frac;
488
489    let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
490        (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
491    } else {
492        0
493    };
494
495    let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
496
497    // Perform range reduction:
498    //   r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
499    //             = (r * m_dd.hi - 1) + r * m_dd.lo
500    //             = v_hi + (v_lo.hi + v_lo.lo)
501    // where:
502    //   v_hi = r * m_dd.hi - 1          (exact)
503    //   v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
504    // Bounds on the values:
505    //   -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
506    //   |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52
507    //   |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
508    let r = R1[idx as usize];
509    let v_hi;
510    let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
511
512    #[cfg(any(
513        all(
514            any(target_arch = "x86", target_arch = "x86_64"),
515            target_feature = "fma"
516        ),
517        target_arch = "aarch64"
518    ))]
519    {
520        v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
521    }
522
523    #[cfg(not(any(
524        all(
525            any(target_arch = "x86", target_arch = "x86_64"),
526            target_feature = "fma"
527        ),
528        target_arch = "aarch64"
529    )))]
530    {
531        let c = f64::from_bits(
532            (idx as u64)
533                .wrapping_shl(52 - 7)
534                .wrapping_add(0x3FF0_0000_0000_0000u64),
535        );
536        v_hi = f_fmla(
537            f64::from_bits(r),
538            m_dd.hi - c,
539            f64::from_bits(RCM1[idx as usize]),
540        ); // Exact
541    }
542
543    // Range reduction output:
544    //   -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
545    //   |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
546    let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
547    v_dd.lo += v_lo.lo;
548
549    log1p_accurate(x_e, idx as usize, v_dd)
550}
551
552/// Computes log(x+1)
553///
554/// Max ULP 0.5
555pub fn f_log1p(x: f64) -> f64 {
556    let mut x_u = x.to_bits();
557
558    let mut x_dd = DoubleDouble::default();
559
560    let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
561
562    const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
563
564    let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
565
566    if e == 0x400 || x == 0. || x <= -1.0 {
567        /* case NaN/Inf, +/-0 or x <= -1 */
568        if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
569            /* NaN or + Inf*/
570            return x + x;
571        }
572        if x <= -1.0
573        /* we use the fact that NaN < -1 is false */
574        {
575            /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */
576            return if x < -1.0 {
577                f64::NAN
578            } else {
579                // x=-1
580                f64::NEG_INFINITY
581            };
582        }
583        return x + x; /* +/-0 */
584    }
585
586    let ax = x_u.wrapping_shl(1);
587
588    if ax < 0x7f60000000000000u64 {
589        // |x| < 0.0625
590        // check case x tiny first to avoid spurious underflow in x*x
591        if ax < 0x7940000000000000u64 {
592            // |x| < 0x1p-53
593            if ax == 0 {
594                return x;
595            }
596            /* we have underflow when |x| < 2^-1022, or when |x| = 2^-1022 and
597            the result is smaller than 2^-1022 in absolute value */
598            let res = dyad_fmla(x.abs(), f64::from_bits(0xbc90000000000000), x);
599            return res;
600        }
601    }
602
603    if x_exp >= EXP_BIAS {
604        // |x| >= 1
605        if x_u >= 0x4650_0000_0000_0000u64 {
606            x_dd.hi = x;
607        } else {
608            x_dd = DoubleDouble::from_exact_add(x, 1.0);
609        }
610    } else {
611        // |x| < 1
612        if x_exp < EXP_BIAS - 52 - 1 {
613            // Quick return when |x| < 2^-53.
614            // Since log(1 + x) = x - x^2/2 + x^3/3 - ...,
615            // for |x| < 2^-53,
616            //   x > log(1 + x) > x - x^2 > x(1 - 2^-54) > x - ulp(x)/2
617            // Thus,
618            //   log(1 + x) = nextafter(x, -inf) for FE_DOWNWARD, or
619            //                                       FE_TOWARDZERO and x > 0,
620            //              = x                  otherwise.
621            if x == 0.0 {
622                return x + x;
623            }
624
625            let tp = 1.0f32;
626            let tn = -1.0f32;
627            let rdp = tp - f32::from_bits(0x3d594caf) != tp;
628            let rdn = tn - f32::from_bits(0x33800000) != tn;
629
630            if x > 0. && rdp {
631                return f64::from_bits(x_u - 1);
632            }
633
634            if x < 0. && rdn {
635                return f64::from_bits(x_u + 1);
636            }
637
638            return if x + x == 0.0 { x + x } else { x };
639        }
640        x_dd = DoubleDouble::from_exact_add(1.0, x);
641    }
642
643    // At this point, x_dd is the exact sum of 1 + x:
644    //   x_dd.hi + x_dd.lo = x + 1.0 exactly.
645    //   |x_dd.hi| >= 2^-54
646    //   |x_dd.lo| < ulp(x_dd.hi)
647
648    let xhi_bits = x_dd.hi.to_bits();
649    let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
650    x_u = xhi_bits;
651    // Range reduction:
652    // Find k such that |x_hi - k * 2^-7| <= 2^-8.
653    let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
654    let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
655    let e_x = x_e as f64;
656
657    const LOG_2: DoubleDouble = DoubleDouble::new(
658        f64::from_bits(0x3d2ef35793c76730),
659        f64::from_bits(0x3fe62e42fefa3800),
660    );
661
662    // hi is exact
663    // ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43
664
665    let r_dd = LOG_R1_DD[idx as usize];
666
667    let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1));
668    // lo errors < |e_x| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo)
669    //           <= 2^11 * 2^(-43-53) = 2^-85
670    let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0));
671
672    // Scale x_dd by 2^(-xh_bits.get_exponent()).
673    let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
674    // Normalize arguments:
675    //   1 <= m_dd.hi < 2
676    //   |m_dd.lo| < 2^-52.
677    // This is exact.
678    let m_hi = 1f64.to_bits() | xhi_frac;
679
680    let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
681        (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
682    } else {
683        0
684    };
685
686    let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
687
688    // Perform range reduction:
689    //   r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
690    //             = (r * m_dd.hi - 1) + r * m_dd.lo
691    //             = v_hi + (v_lo.hi + v_lo.lo)
692    // where:
693    //   v_hi = r * m_dd.hi - 1          (exact)
694    //   v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
695    // Bounds on the values:
696    //   -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
697    //   |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52
698    //   |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
699    let r = R1[idx as usize];
700    let v_hi;
701    let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
702
703    #[cfg(any(
704        all(
705            any(target_arch = "x86", target_arch = "x86_64"),
706            target_feature = "fma"
707        ),
708        target_arch = "aarch64"
709    ))]
710    {
711        v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
712    }
713
714    #[cfg(not(any(
715        all(
716            any(target_arch = "x86", target_arch = "x86_64"),
717            target_feature = "fma"
718        ),
719        target_arch = "aarch64"
720    )))]
721    {
722        let c = f64::from_bits(
723            (idx as u64)
724                .wrapping_shl(52 - 7)
725                .wrapping_add(0x3FF0_0000_0000_0000u64),
726        );
727        v_hi = f_fmla(
728            f64::from_bits(r),
729            m_dd.hi - c,
730            f64::from_bits(RCM1[idx as usize]),
731        ); // Exact
732    }
733
734    // Range reduction output:
735    //   -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
736    //   |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
737    let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
738    v_dd.lo += v_lo.lo;
739
740    // Exact sum:
741    //   r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
742    let r1 = DoubleDouble::from_exact_add(hi, v_dd.hi);
743
744    // Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ...
745    // generated by Sollya with:
746    // > P = fpminimax(log(1 + x)/x, 6, [|1, 1, D...|],
747    //                 [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]);
748    const P_COEFFS: [u64; 6] = [
749        0xbfe0000000000000,
750        0x3fd5555555555166,
751        0xbfcfffffffdb7746,
752        0x3fc99999a8718a60,
753        0xbfc555874ce8ce22,
754        0x3fc24335555ddbe5,
755    ];
756
757    //   C * ulp(v_sq) + err_hi
758    let v_sq = v_dd.hi * v_dd.hi;
759    let p0 = f_fmla(
760        v_dd.hi,
761        f64::from_bits(P_COEFFS[1]),
762        f64::from_bits(P_COEFFS[0]),
763    );
764    let p1 = f_fmla(
765        v_dd.hi,
766        f64::from_bits(P_COEFFS[3]),
767        f64::from_bits(P_COEFFS[2]),
768    );
769    let p2 = f_fmla(
770        v_dd.hi,
771        f64::from_bits(P_COEFFS[5]),
772        f64::from_bits(P_COEFFS[4]),
773    );
774    let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2);
775
776    const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0];
777    let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }];
778
779    let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi);
780
781    let left = r1.hi + (p - err);
782    let right = r1.hi + (p + err);
783    // Ziv's test to see if fast pass is accurate enough.
784    if left == right {
785        return left;
786    }
787    log1p_accurate_dd(x)
788}
789
790#[cold]
791fn log1p_accurate_dd(x: f64) -> f64 {
792    log1p_dd(x).to_f64()
793}
794
795#[cfg(test)]
796mod tests {
797    use super::*;
798
799    #[test]
800    fn test_log1p() {
801        assert_eq!(
802            f_log1p(-0.0000000000000003834186599935256),
803            -0.00000000000000038341865999352564
804        );
805        assert_eq!(f_log1p(0.000032417476177515677), 0.000032416950742490306);
806        assert_eq!(f_log1p(-0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001866527236137164),
807                   -0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001866527236137164);
808        assert_eq!(f_log1p(-0.0016481876264151651), -0.0016495473819346394);
809        assert_eq!(f_log1p(-0.55), -0.7985076962177717);
810        assert_eq!(f_log1p(-0.65), -1.0498221244986778);
811        assert_eq!(f_log1p(1.65), 0.9745596399981308);
812        assert!(f_log1p(-2.).is_nan());
813    }
814}
pxfm/logs/log1p.rs

pxfm/logs/
log1p.rs