pxfm/logs/
log_range_reduction.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::dyadic_float::{DyadicFloat128, DyadicSign};
30
31// Logarithm range reduction - Step 3:
32//   r(k) = 2^-21 round(2^21 / (1 + k*2^-21)) for k = -80 .. 80.
33// Output range:
34//   [-0x1.01928p-22 , 0x1p-22]
35// We store S[i] = 2^21 (r(i - 80) - 1).
36static S3: [i32; 161] = [
37    0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41,
38    0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31,
39    0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21,
40    0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11,
41    0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0, -0x1,
42    -0x2, -0x3, -0x4, -0x5, -0x6, -0x7, -0x8, -0x9, -0xa, -0xb, -0xc, -0xd, -0xe, -0xf, -0x10,
43    -0x11, -0x12, -0x13, -0x14, -0x15, -0x16, -0x17, -0x18, -0x19, -0x1a, -0x1b, -0x1c, -0x1d,
44    -0x1e, -0x1f, -0x20, -0x21, -0x22, -0x23, -0x24, -0x25, -0x26, -0x27, -0x28, -0x29, -0x2a,
45    -0x2b, -0x2c, -0x2d, -0x2e, -0x2f, -0x30, -0x31, -0x32, -0x33, -0x34, -0x35, -0x36, -0x37,
46    -0x38, -0x39, -0x3a, -0x3b, -0x3c, -0x3d, -0x3e, -0x3f, -0x40, -0x41, -0x42, -0x43, -0x44,
47    -0x45, -0x46, -0x47, -0x48, -0x49, -0x4a, -0x4b, -0x4c, -0x4d, -0x4e, -0x4f, -0x50,
48];
49
50// Logarithm range reduction - Step 4
51//   r(k) = 2^-28 round(2^28 / (1 + k*2^-28)) for k = -65 .. 64.
52// Output range:
53//   [-0x1.0002143p-29 , 0x1p-29]
54// We store S[i] = 2^28 (r(i - 65) - 1).
55static S4: [i32; 130] = [
56    0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32,
57    0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22,
58    0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12,
59    0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
60    -0x1, -0x2, -0x3, -0x4, -0x5, -0x6, -0x7, -0x8, -0x9, -0xa, -0xb, -0xc, -0xd, -0xe, -0xf,
61    -0x10, -0x11, -0x12, -0x13, -0x14, -0x15, -0x16, -0x17, -0x18, -0x19, -0x1a, -0x1b, -0x1c,
62    -0x1d, -0x1e, -0x1f, -0x20, -0x21, -0x22, -0x23, -0x24, -0x25, -0x26, -0x27, -0x28, -0x29,
63    -0x2a, -0x2b, -0x2c, -0x2d, -0x2e, -0x2f, -0x30, -0x31, -0x32, -0x33, -0x34, -0x35, -0x36,
64    -0x37, -0x38, -0x39, -0x3a, -0x3b, -0x3c, -0x3d, -0x3e, -0x3f, -0x40,
65];
66
67// Logarithm range reduction - Step 2:
68//   r(k) = 2^-16 round(2^16 / (1 + k*2^-14)) for k = -2^6 .. 2^7.
69// Output range:
70//   [-0x1.3ffcp-15, 0x1.3e3dp-15]
71// We store S2[i] = 2^16 (r(i - 2^6) - 1).
72static S2: [i32; 193] = [
73    0x101, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1, 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9,
74    0xc5, 0xc1, 0xbd, 0xb9, 0xb4, 0xb0, 0xac, 0xa8, 0xa4, 0xa0, 0x9c, 0x98, 0x94, 0x90, 0x8c, 0x88,
75    0x84, 0x80, 0x7c, 0x78, 0x74, 0x70, 0x6c, 0x68, 0x64, 0x60, 0x5c, 0x58, 0x54, 0x50, 0x4c, 0x48,
76    0x44, 0x40, 0x3c, 0x38, 0x34, 0x30, 0x2c, 0x28, 0x24, 0x20, 0x1c, 0x18, 0x14, 0x10, 0xc, 0x8,
77    0x4, 0x0, -0x4, -0x8, -0xc, -0x10, -0x14, -0x18, -0x1c, -0x20, -0x24, -0x28, -0x2c, -0x30,
78    -0x34, -0x38, -0x3c, -0x40, -0x44, -0x48, -0x4c, -0x50, -0x54, -0x58, -0x5c, -0x60, -0x64,
79    -0x68, -0x6c, -0x70, -0x74, -0x78, -0x7c, -0x80, -0x84, -0x88, -0x8c, -0x90, -0x94, -0x98,
80    -0x9c, -0xa0, -0xa4, -0xa8, -0xac, -0xb0, -0xb4, -0xb7, -0xbb, -0xbf, -0xc3, -0xc7, -0xcb,
81    -0xcf, -0xd3, -0xd7, -0xdb, -0xdf, -0xe3, -0xe7, -0xeb, -0xef, -0xf3, -0xf7, -0xfb, -0xff,
82    -0x103, -0x107, -0x10b, -0x10f, -0x113, -0x117, -0x11b, -0x11f, -0x123, -0x127, -0x12b, -0x12f,
83    -0x133, -0x137, -0x13a, -0x13e, -0x142, -0x146, -0x14a, -0x14e, -0x152, -0x156, -0x15a, -0x15e,
84    -0x162, -0x166, -0x16a, -0x16e, -0x172, -0x176, -0x17a, -0x17e, -0x182, -0x186, -0x18a, -0x18e,
85    -0x192, -0x195, -0x199, -0x19d, -0x1a1, -0x1a5, -0x1a9, -0x1ad, -0x1b1, -0x1b5, -0x1b9, -0x1bd,
86    -0x1c1, -0x1c5, -0x1c9, -0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
87    -0x1f0, -0x1f4, -0x1f8, -0x1fc,
88];
89
90// Perform logarithm range reduction steps 2-4.
91// Inputs from the first step of range reduction:
92//   m_x : the reduced argument after the first step of range reduction
93//         satisfying  -2^-8 <= m_x < 2^-7  and  ulp(m_x) >= 2^-60.
94//   idx1: index of the -log(r1) table from the first step.
95// Outputs of the extra range reduction steps:
96//   sum: adding -log(r1) - log(r2) - log(r3) - log(r4) to the resulted sum.
97//   return value: the reduced argument v satisfying:
98//                 -0x1.0002143p-29 <= v < 0x1p-29,  and  ulp(v) >= 2^(-125).
99pub(crate) fn log_range_reduction(
100    m_x: f64,
101    log_table: &[&[DyadicFloat128]; 4],
102    sum: DyadicFloat128,
103) -> (DyadicFloat128, DyadicFloat128) {
104    let v = (m_x * f64::from_bits(0x43b0000000000000)) as i64; // ulp = 2^-60
105
106    // Range reduction - Step 2
107    // Output range: vv2 in [-0x1.3ffcp-15, 0x1.3e3dp-15].
108    // idx2 = trunc(2^14 * (v + 2^-8 + 2^-15))
109    let idx2 = ((v.wrapping_add(0x10_2000_0000_0000)) >> 46) as usize;
110    let z0 = log_table[1][idx2];
111    let mut sum = sum + z0;
112
113    let s2: i64 = S2[idx2] as i64; // |s| <= 2^-7, ulp = 2^-16
114    let sv2 = s2 * v; // |s*v| < 2^-14, ulp = 2^(-60-16) = 2^-76
115    let spv2 = s2.wrapping_shl(44).wrapping_add(v); // |s + v| < 2^-14, ulp = 2^-60
116    let vv2 = spv2.wrapping_shl(16).wrapping_add(sv2); // |vv2| < 2^-14, ulp = 2^-76
117
118    // Range reduction - Step 3
119    // Output range: vv3 in [-0x1.01928p-22 , 0x1p-22]
120    // idx3 = trunc(2^21 * (v + 80*2^-21 + 2^-22))
121    let idx3 = vv2.wrapping_add(0x2840_0000_0000_0000).wrapping_shr(55) as usize;
122    sum = sum + log_table[2][idx3];
123
124    let s3 = S3[idx3] as i64; // |s| < 2^-13, ulp = 2^-21
125    let spv3: i64 = s3.wrapping_shl(55).wrapping_add(vv2); // |s + v| < 2^-21, ulp = 2^-76
126    // |s*v| < 2^-27, ulp = 2^(-76-21) = 2^-97
127    let sv3: i128 = s3 as i128 * vv2 as i128;
128    // |vv3| < 2^-21, ulp = 2^-97
129    let vv3 = (spv3 as i128).wrapping_shl(21).wrapping_add(sv3);
130
131    // Range reduction - Step 4
132    // Output range: vv4 in [-0x1.0002143p-29 , 0x1p-29]
133    // idx4 = trunc(2^21 * (v + 65*2^-28 + 2^-29))
134    let idx4 = ((((vv3 >> 68) as i32).wrapping_add(131)) >> 1) as usize;
135
136    let z4 = log_table[3][idx4];
137    sum = sum + z4;
138
139    let s4: i128 = S4[idx4] as i128; // |s| < 2^-21, ulp = 2^-28
140    // |s + v| < 2^-28, ulp = 2^-97
141    let spv4 = s4.wrapping_shl(69).wrapping_add(vv3);
142    // |s*v| < 2^-42, ulp = 2^(-97-28) = 2^-125
143    let sv4 = s4 * vv3;
144    // |vv4| < 2^-28, ulp = 2^-125
145    let vv4 = spv4.wrapping_shl(28).wrapping_add(sv4);
146
147    let mut z0 = if vv4 < 0 {
148        DyadicFloat128 {
149            sign: DyadicSign::Neg,
150            exponent: -125,
151            mantissa: (-vv4) as u128,
152        }
153    } else {
154        DyadicFloat128 {
155            sign: DyadicSign::Pos,
156            exponent: -125,
157            mantissa: vv4 as u128,
158        }
159    };
160    z0.normalize();
161
162    (z0, sum)
163}