pxfm/exponents/
exp_f128.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::dyadic_float::{DyadicFloat128, DyadicSign};
31
32#[inline]
33fn poly_exp_f128(x: DyadicFloat128) -> DyadicFloat128 {
34    static COEFFS_128: [DyadicFloat128; 8] = [
35        DyadicFloat128 {
36            sign: DyadicSign::Pos,
37            exponent: -127,
38            mantissa: 0x80000000_00000000_00000000_00000000_u128,
39        }, // 1.0
40        DyadicFloat128 {
41            sign: DyadicSign::Pos,
42            exponent: -127,
43            mantissa: 0x80000000_00000000_00000000_00000000_u128,
44        }, // 1.0
45        DyadicFloat128 {
46            sign: DyadicSign::Pos,
47            exponent: -128,
48            mantissa: 0x80000000_00000000_00000000_00000000_u128,
49        }, // 0.5
50        DyadicFloat128 {
51            sign: DyadicSign::Pos,
52            exponent: -130,
53            mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
54        }, // 1/6
55        DyadicFloat128 {
56            sign: DyadicSign::Pos,
57            exponent: -132,
58            mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
59        }, // 1/24
60        DyadicFloat128 {
61            sign: DyadicSign::Pos,
62            exponent: -134,
63            mantissa: 0x88888888_88888888_88888888_88888889_u128,
64        }, // 1/120
65        DyadicFloat128 {
66            sign: DyadicSign::Pos,
67            exponent: -137,
68            mantissa: 0xb60b60b6_0b60b60b_60b60b60_b60b60b6_u128,
69        }, // 1/720
70        DyadicFloat128 {
71            sign: DyadicSign::Pos,
72            exponent: -140,
73            mantissa: 0xd00d00d0_0d00d00d_00d00d00_d00d00d0_u128,
74        }, // 1/5040
75    ];
76
77    let mut z = COEFFS_128[7];
78    for i in (0..7).rev() {
79        z = x * z + COEFFS_128[i];
80    }
81    z
82}
83
84#[cold]
85#[inline(never)]
86pub(crate) fn rational128_exp(x: f64) -> DyadicFloat128 {
87    const LOG2_E: f64 = f64::from_bits(0x3ff71547652b82fe);
88    let tmp = f_fmla(x, LOG2_E, f64::from_bits(0x4148000000040000));
89    let k = (tmp.to_bits() >> 19) as i32;
90    let kd = k as f64;
91
92    let idx1: usize = ((k >> 6) & 0x3f) as usize;
93    let idx2 = (k & 0x3f) as usize;
94
95    const MLOG_2_EXP2_M12_HI: f64 = f64::from_bits(0xbf262e42ff000000);
96    const MLOG_2_EXP2_M12_MID_30: f64 = f64::from_bits(0x3d0718432a000000);
97    const MLOG_2_EXP2_M12_LO: f64 = f64::from_bits(0x3b0b0e2633fe0685);
98
99    let t1 = f_fmla(kd, MLOG_2_EXP2_M12_HI, x); // exact
100    let t2 = kd * MLOG_2_EXP2_M12_MID_30; // exact
101    let t3 = kd * MLOG_2_EXP2_M12_LO; // Error < 2^-133
102
103    let dx = DyadicFloat128::new_from_f64(t1)
104        + (DyadicFloat128::new_from_f64(t2) + DyadicFloat128::new_from_f64(t3));
105
106    let exp_mid1 = DyadicFloat128::new_from_f64(f64::from_bits(TD_EXP2_MID1[idx1].2))
107        + (DyadicFloat128::new_from_f64(f64::from_bits(TD_EXP2_MID1[idx1].1))
108            + DyadicFloat128::new_from_f64(f64::from_bits(TD_EXP2_MID1[idx1].0)));
109
110    let exp_mid2 = DyadicFloat128::new_from_f64(f64::from_bits(TD_EXP2_MID2[idx2].2))
111        + (DyadicFloat128::new_from_f64(f64::from_bits(TD_EXP2_MID2[idx2].1))
112            + DyadicFloat128::new_from_f64(f64::from_bits(TD_EXP2_MID2[idx2].0)));
113
114    let exp_mid = exp_mid1 * exp_mid2;
115
116    let p = poly_exp_f128(dx);
117
118    let mut r = exp_mid * p;
119
120    r.exponent += (kd as i64 >> 12) as i16;
121
122    r
123}
124
125// Lookup table for 2^(k * 2^-6) with k = 0..63.
126// Generated by Sollya with:
127// > display=hexadecimal;
128// > prec = 500;
129// > for i from 0 to 63 do {
130//     a = 2^(i * 2^-6);
131//     b = round(a, D, RN);
132//     c = round(a - b, D, RN);
133//     d = round(a - b - c, D, RN);
134//     print("{", d, ",", c, ",", b, "},");
135//   };
136static TD_EXP2_MID1: [(u64, u64, u64); 64] = [
137    (0x0000000000000000, 0x0000000000000000, 0x3ff0000000000000),
138    (0xb919085b0a3d74d5, 0xbc719083535b085d, 0x3ff02c9a3e778061),
139    (0x39105ff94f8d257e, 0x3c8d73e2a475b465, 0x3ff059b0d3158574),
140    (0x39015820d96b414f, 0x3c6186be4bb284ff, 0x3ff0874518759bc8),
141    (0xb9367c9bd6ebf74c, 0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
142    (0xb8e5aa76994e9ddb, 0x3c403a1727c57b53, 0x3ff0e3ec32d3d1a2),
143    (0x3929d58b988f562d, 0xbc96c51039449b3a, 0x3ff11301d0125b51),
144    (0xb932fe7bb4c76416, 0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
145    (0x3924f2406aa13ff0, 0xbc819041b9d78a76, 0x3ff172b83c7d517b),
146    (0x390ad36183926ae8, 0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
147    (0x391ea62d0881b918, 0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
148    (0xb90781dbc16f1ea4, 0x3c8dc775814a8495, 0x3ff2063b88628cd6),
149    (0xb924d89f9af532e0, 0x3c99b07eb6c70573, 0x3ff2387a6e756238),
150    (0x391277393a461b77, 0x3c82bd339940e9d9, 0x3ff26b4565e27cdd),
151    (0x390de54485604690, 0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
152    (0xb91ee9d8f8cb9307, 0x3c90024754db41d5, 0x3ff2d285a6e4030b),
153    (0x3917b7b2f09cd0d9, 0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
154    (0xb93406a2ea6cfc6b, 0x3c932721843659a6, 0x3ff33c08b26416ff),
155    (0x39387e3e12516bfa, 0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
156    (0x3909b0b1ff17c296, 0xbc75e436d661f5e3, 0x3ff3a7db34e59ff7),
157    (0xb92808ba68fa8fb7, 0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
158    (0xb8d32b43eafc6518, 0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
159    (0xb8d0ac312de3d922, 0x3c489b7a04ef80d0, 0x3ff44e086061892d),
160    (0x390e1eebae743ac0, 0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
161    (0x38ec06c7745c2b39, 0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
162    (0xb8f1aa1fd7b685cd, 0xbc94b309d25957e3, 0x3ff4f9b2769d2ca7),
163    (0x390fa733951f214c, 0xbc807abe1db13cad, 0x3ff5342b569d4f82),
164    (0xb90ff86852a613ff, 0x3c99bb2c011d93ad, 0x3ff56f4736b527da),
165    (0xb92744ee506fdafe, 0x3c96324c054647ad, 0x3ff5ab07dd485429),
166    (0xb9395f9ab75fa7d6, 0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
167    (0x3905d8e757cfb991, 0xbc9383c17e40b497, 0x3ff6247eb03a5585),
168    (0x3934a337f4dc0a3b, 0xbc9bb60987591c34, 0x3ff6623882552225),
169    (0x39357d3e3adec175, 0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
170    (0x38ca59f88abbe778, 0xbc6bbe3a683c88ab, 0x3ff6dfb23c651a2f),
171    (0xb92269796953a4c3, 0xbc816e4786887a99, 0x3ff71f75e8ec5f74),
172    (0xb938f8e7fa19e5e8, 0xbc90245957316dd3, 0x3ff75feb564267c9),
173    (0xb8e4217a932d10d4, 0xbc841577ee04992f, 0x3ff7a11473eb0187),
174    (0x38f70a1427f8fcdf, 0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
175    (0x38f0f6ad65cbbac1, 0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
176    (0xb92f16f65181d921, 0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
177    (0xb9130644a7836333, 0x3c96e9f156864b27, 0x3ff8ace5422aa0db),
178    (0x38d3bf26d2b85163, 0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
179    (0x390697e257ac0db2, 0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
180    (0x3937edb9d7144b6f, 0xbc9d185b7c1b85d1, 0x3ff97d829fde4e50),
181    (0x3916376b7943085c, 0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
182    (0x392354084551b4fb, 0xbc9359495d1cd533, 0x3ffa0c667b5de565),
183    (0xb90bfd7adfd63f48, 0xbc9d2f6edb8d41e1, 0x3ffa5503b23e255d),
184    (0x3928b16ae39e8cb9, 0x3c90fac90ef7fd31, 0x3ffa9e6b5579fdbf),
185    (0x393a7fbc3ae675ea, 0x3c97a1cd345dcc81, 0x3ffae89f995ad3ad),
186    (0x3902babc0edda4d9, 0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
187    (0x390aa64481e1ab72, 0xbc75584f7e54ac3b, 0x3ffb7f76f2fb5e47),
188    (0x3929a164050e1258, 0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
189    (0x39199e51125928da, 0x3c811065895048dd, 0x3ffc199bdd85529c),
190    (0xb92fc44c329d5cb2, 0x3c92884dff483cad, 0x3ffc67f12e57d14b),
191    (0x391d8765566b032e, 0x3c7503cbd1e949db, 0x3ffcb720dcef9069),
192    (0xb93e7044039da0f6, 0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
193    (0xb90ab053b05531fc, 0x3c82ed02d75b3707, 0x3ffd5818dcfba487),
194    (0x3937f6246f0ec615, 0x3c9c2300696db532, 0x3ffda9e603db3285),
195    (0x393b7225a944efd6, 0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
196    (0x3921e92cb3c2d278, 0x3c839e8980a9cc8f, 0x3ffe502ee78b3ff6),
197    (0xb92fc0f242bbf3de, 0xbc9e9c23179c2893, 0x3ffea4afa2a490da),
198    (0x393f6dd5d229ff69, 0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
199    (0xb914019bffc80ef3, 0x3c99d3e12dd8a18b, 0x3fff50765b6e4540),
200    (0x38fdc060c36f7651, 0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
201];
202
203// Lookup table for 2^(k * 2^-12) with k = 0..63.
204// Generated by Sollya with:
205// > display=hexadecimal;
206// > prec = 500;
207// > for i from 0 to 63 do {
208//     a = 2^(i * 2^-12);
209//     b = round(a, D, RN);
210//     c = round(a - b, D, RN);
211//     d = round(a - b - c, D, RN);
212//     print("{", d, ",", c, ",", b, "},");
213//   };
214static TD_EXP2_MID2: [(u64, u64, u64); 64] = [
215    (0x0000000000000000, 0x0000000000000000, 0x3ff0000000000000),
216    (0x39339726694630e3, 0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
217    (0x38fe5e06ddd31156, 0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
218    (0x3905a0768b51f609, 0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
219    (0x390d008403605217, 0xbc8d7c96f201bb2f, 0x3ff002c605e2e8cf),
220    (0x39289bc16f765708, 0x3c984711d4c35e9f, 0x3ff003779a95f959),
221    (0xb924535b7f8c1e2d, 0xbc80484245243777, 0x3ff0042936faa3d8),
222    (0xb938ba92f6b25456, 0xbc94b237da2025f9, 0x3ff004dadb113da0),
223    (0xb8e30c72e81f4294, 0xbc75e00e62d6b30d, 0x3ff0058c86da1c0a),
224    (0xb9134a5384e6f0b9, 0x3c9a1d6cedbb9481, 0x3ff0063e3a559473),
225    (0x393f8d0580865d2e, 0xbc94acf197a00142, 0x3ff006eff583fc3d),
226    (0xb90002bcb3ae9a99, 0xbc6eaf2ea42391a5, 0x3ff007a1b865a8ca),
227    (0x390c3c5aedee9851, 0x3c7da93f90835f75, 0x3ff0085382faef83),
228    (0x3927217851d1ec6e, 0xbc86a79084ab093c, 0x3ff00905554425d4),
229    (0xb9180cbca335a7c3, 0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
230    (0xb91706bd4eb22595, 0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
231    (0xb90b55dd523f3c08, 0xbc84f6b2a7609f71, 0x3ff00b1afa5abcbf),
232    (0x39190a1e207cced1, 0xbc7e1a258ea8f71b, 0x3ff00bcceb7707ec),
233    (0x39178d0472db37c5, 0x3c74362ca5bc26f1, 0x3ff00c7ee448ee02),
234    (0xb92bcd4db3cb52fe, 0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
235    (0xb8fcf1b131575ec2, 0xbc6406ac4e81a645, 0x3ff00de2ed0ee0f5),
236    (0xb8f6aaa1fa7ff913, 0x3c9b5a6902767e09, 0x3ff00e94fd0398e0),
237    (0x39168f236dff3218, 0xbc991b2060859321, 0x3ff00f4714af41d3),
238    (0xb92e8bb58067e60a, 0x3c8427068ab22306, 0x3ff00ff93412315c),
239    (0x393d4cd5e1d71fdf, 0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
240    (0x393e4ecf350ebe88, 0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
241    (0x3926a2aa2c89c4f8, 0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
242    (0x3911ca368a20ed05, 0xbc734104ee7edae9, 0x3ff012c1fecd613b),
243    (0x38dedb1095d925cf, 0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
244    (0xb90488c78eded75f, 0x3c7a8cd33b8a1bb3, 0x3ff01426927f5278),
245    (0xb8e7480f5ea1b3c9, 0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
246    (0xb90ae45989a04dd5, 0x3c857ba2dc7e0c73, 0x3ff0158b4517bb88),
247    (0x392bf48007d80987, 0x3c9b61299ab8cdb7, 0x3ff0163da9fb3335),
248    (0x3921aa91a059292c, 0xbc990565902c5f44, 0x3ff016f0169949ed),
249    (0x391b6663292855f5, 0x3c870fc41c5c2d53, 0x3ff017a28af25567),
250    (0x393e7fbca6793d94, 0x3c94b9a6e145d76c, 0x3ff018550706ab62),
251    (0xb915b9f5c7de3b93, 0xbc7008eff5142bf9, 0x3ff019078ad6a19f),
252    (0x3914638bf2f6acab, 0xbc977669f033c7de, 0x3ff019ba16628de2),
253    (0xb92ab237b9a069c5, 0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
254    (0x3933ab358be97cef, 0x3c9371231477ece5, 0x3ff01b1f44af9f9e),
255    (0xb914027b2294bb64, 0x3c75e7626621eb5b, 0x3ff01bd1e77170b4),
256    (0x390656394426c990, 0xbc9bc72b100828a5, 0x3ff01c8491f08f08),
257    (0x390bf9785189bdd8, 0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
258    (0x3927c12f86114fe3, 0x3c816996709da2e2, 0x3ff01de9fe280ac8),
259    (0xb92653d5d24b5d28, 0xbc8c11f5239bf535, 0x3ff01e9cbfe113ef),
260    (0x39204a0cdc1d86d7, 0x3c8e1d4eb5edc6b3, 0x3ff01f4f8958c1c6),
261    (0x392c678c46149782, 0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
262    (0x39348524e1e9df70, 0xbc98f06d8a148a32, 0x3ff020b533856324),
263    (0x3929953ea727ff0b, 0xbc82bf310fc54eb6, 0x3ff02168143b0281),
264    (0xb93ccfbbec22d28e, 0xbc9c95a035eb4175, 0x3ff0221afcb09e3e),
265    (0x3939e2bb6e181de1, 0xbc9491793e46834d, 0x3ff022cdece68c4f),
266    (0x391f17609ae29308, 0xbc73e8d0d9c49091, 0x3ff02380e4dd22ad),
267    (0xb91c7dc2c476bfb8, 0xbc9314aa16278aa3, 0x3ff02433e494b755),
268    (0xb92fab994971d4a3, 0x3c848daf888e9651, 0x3ff024e6ec0da046),
269    (0x392848b62cbdd0af, 0x3c856dc8046821f4, 0x3ff02599fb483385),
270    (0xb92bf603ba715d0c, 0x3c945b42356b9d47, 0x3ff0264d1244c719),
271    (0x39189434e751e1aa, 0xbc7082ef51b61d7e, 0x3ff027003103b10e),
272    (0xb9103b54fd64e8ac, 0x3c72106ed0920a34, 0x3ff027b357854772),
273    (0x3927785ea0acc486, 0xbc9fd4cf26ea5d0f, 0x3ff0286685c9e059),
274    (0xb92ce447fdb35ff9, 0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
275    (0x38f5b884aab5642a, 0x3c564cbba902ca27, 0x3ff029ccf99d720a),
276    (0xb93cfb3e46d7c1c0, 0x3c94383ef231d207, 0x3ff02a803f2d170d),
277    (0xb8f0d40cee4b81af, 0x3c94a47a505b3a47, 0x3ff02b338c811703),
278    (0x3926ae7d36d7c1f7, 0x3c9e47120223467f, 0x3ff02be6e199c811),
279];
280
281#[cfg(test)]
282mod tests {
283    use crate::exponents::exp_f128::rational128_exp;
284
285    #[test]
286    fn test_exp() {
287        assert_eq!(rational128_exp(2.).fast_as_f64(), 7.38905609893065);
288    }
289}