1use crate::conversions::avx::interpolator::AvxVectorSse;
30use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
31use crate::nd_array::lerp;
32use std::arch::x86_64::*;
33use std::ops::{Add, Mul, Sub};
34
35pub(crate) struct HypercubeAvx<'a> {
39 array: &'a [f32],
40 x_stride: u32,
41 y_stride: u32,
42 z_stride: u32,
43 grid_size: [u8; 4],
44}
45
46trait Fetcher4<T> {
47 fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T;
48}
49
50struct Fetch4Vec3<'a> {
51 array: &'a [f32],
52 x_stride: u32,
53 y_stride: u32,
54 z_stride: u32,
55}
56
57impl Fetcher4<AvxVectorSse> for Fetch4Vec3<'_> {
58 #[inline(always)]
59 fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> AvxVectorSse {
60 let start = (x as u32 * self.x_stride
61 + y as u32 * self.y_stride
62 + z as u32 * self.z_stride
63 + w as u32) as usize
64 * 3;
65 unsafe {
66 let k = self.array.get_unchecked(start..);
67 let lo = _mm_loadu_si64(k.as_ptr() as *const _);
68 let hi = _mm_insert_epi32::<2>(
69 lo,
70 k.get_unchecked(2..).as_ptr().read_unaligned().to_bits() as i32,
71 );
72 AvxVectorSse {
73 v: _mm_castsi128_ps(hi),
74 }
75 }
76 }
77}
78
79impl<'a> HypercubeAvx<'a> {
80 pub(crate) fn new(arr: &'a [f32], grid: [u8; 4], components: usize) -> Self {
81 assert_eq!(
84 grid[0] as usize * grid[1] as usize * grid[2] as usize * grid[3] as usize * components,
85 arr.len()
86 );
87 let z_stride = grid[2] as u32;
88 let y_stride = z_stride * grid[1] as u32;
89 let x_stride = y_stride * grid[0] as u32;
90 HypercubeAvx {
91 array: arr,
92 x_stride,
93 y_stride,
94 z_stride,
95 grid_size: grid,
96 }
97 }
98
99 #[inline(always)]
100 fn quadlinear<
101 T: From<f32>
102 + Add<T, Output = T>
103 + Mul<T, Output = T>
104 + FusedMultiplyAdd<T>
105 + Sub<T, Output = T>
106 + Copy
107 + FusedMultiplyNegAdd<T>,
108 >(
109 &self,
110 lin_x: f32,
111 lin_y: f32,
112 lin_z: f32,
113 lin_w: f32,
114 r: impl Fetcher4<T>,
115 ) -> T {
116 let lin_x = lin_x.max(0.0).min(1.0);
117 let lin_y = lin_y.max(0.0).min(1.0);
118 let lin_z = lin_z.max(0.0).min(1.0);
119 let lin_w = lin_w.max(0.0).min(1.0);
120
121 let scale_x = (self.grid_size[0] as i32 - 1) as f32;
122 let scale_y = (self.grid_size[1] as i32 - 1) as f32;
123 let scale_z = (self.grid_size[2] as i32 - 1) as f32;
124 let scale_w = (self.grid_size[3] as i32 - 1) as f32;
125
126 let x = (lin_x * scale_x).floor() as i32;
127 let y = (lin_y * scale_y).floor() as i32;
128 let z = (lin_z * scale_z).floor() as i32;
129 let w = (lin_w * scale_w).floor() as i32;
130
131 let x_n = (lin_x * scale_x).ceil() as i32;
132 let y_n = (lin_y * scale_y).ceil() as i32;
133 let z_n = (lin_z * scale_z).ceil() as i32;
134 let w_n = (lin_w * scale_w).ceil() as i32;
135
136 let x_d = T::from(lin_x * scale_x - x as f32);
137 let y_d = T::from(lin_y * scale_y - y as f32);
138 let z_d = T::from(lin_z * scale_z - z as f32);
139 let w_d = T::from(lin_w * scale_w - w as f32);
140
141 let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d);
142 let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d);
143 let r_y1 = lerp(r_x1, r_x2, y_d);
144 let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d);
145 let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d);
146 let r_y2 = lerp(r_x3, r_x4, y_d);
147 let r_z1 = lerp(r_y1, r_y2, z_d);
148
149 let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d);
150 let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d);
151 let r_y1 = lerp(r_x1, r_x2, y_d);
152 let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d);
153 let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d);
154 let r_y2 = lerp(r_x3, r_x4, y_d);
155 let r_z2 = lerp(r_y1, r_y2, z_d);
156 lerp(r_z1, r_z2, w_d)
157 }
158
159 #[inline(always)]
160 pub(crate) fn quadlinear_vec3(
161 &self,
162 lin_x: f32,
163 lin_y: f32,
164 lin_z: f32,
165 lin_w: f32,
166 ) -> AvxVectorSse {
167 self.quadlinear(
168 lin_x,
169 lin_y,
170 lin_z,
171 lin_w,
172 Fetch4Vec3 {
173 array: self.array,
174 x_stride: self.x_stride,
175 y_stride: self.y_stride,
176 z_stride: self.z_stride,
177 },
178 )
179 }
180
181 #[cfg(feature = "options")]
182 #[inline(always)]
183 fn pyramid<
184 T: From<f32>
185 + Add<T, Output = T>
186 + Mul<T, Output = T>
187 + FusedMultiplyAdd<T>
188 + Sub<T, Output = T>
189 + Copy
190 + FusedMultiplyNegAdd<T>,
191 >(
192 &self,
193 lin_x: f32,
194 lin_y: f32,
195 lin_z: f32,
196 lin_w: f32,
197 r: impl Fetcher4<T>,
198 ) -> T {
199 let lin_x = lin_x.max(0.0).min(1.0);
200 let lin_y = lin_y.max(0.0).min(1.0);
201 let lin_z = lin_z.max(0.0).min(1.0);
202 let lin_w = lin_w.max(0.0).min(1.0);
203
204 let scale_x = (self.grid_size[0] as i32 - 1) as f32;
205 let scale_y = (self.grid_size[1] as i32 - 1) as f32;
206 let scale_z = (self.grid_size[2] as i32 - 1) as f32;
207 let scale_w = (self.grid_size[3] as i32 - 1) as f32;
208
209 let x = (lin_x * scale_x).floor() as i32;
210 let y = (lin_y * scale_y).floor() as i32;
211 let z = (lin_z * scale_z).floor() as i32;
212 let w = (lin_w * scale_w).floor() as i32;
213
214 let x_n = (lin_x * scale_x).ceil() as i32;
215 let y_n = (lin_y * scale_y).ceil() as i32;
216 let z_n = (lin_z * scale_z).ceil() as i32;
217 let w_n = (lin_w * scale_w).ceil() as i32;
218
219 let dr = lin_x * scale_x - x as f32;
220 let dg = lin_y * scale_y - y as f32;
221 let db = lin_z * scale_z - z as f32;
222 let dw = lin_w * scale_w - w as f32;
223
224 let c0 = r.fetch(x, y, z, w);
225
226 let w0 = if dr > db && dg > db {
227 let x0 = r.fetch(x_n, y_n, z_n, w);
228 let x1 = r.fetch(x_n, y_n, z, w);
229 let x2 = r.fetch(x_n, y, z, w);
230 let x3 = r.fetch(x, y_n, z, w);
231
232 let c1 = x0 - x1;
233 let c2 = x2 - c0;
234 let c3 = x3 - c0;
235 let c4 = c0 - x3 - x2 + x1;
236
237 let s0 = c0.mla(c1, T::from(db));
238 let s1 = s0.mla(c2, T::from(dr));
239 let s2 = s1.mla(c3, T::from(dg));
240 s2.mla(c4, T::from(dr * dg))
241 } else if db > dr && dg > dr {
242 let x0 = r.fetch(x, y, z_n, w);
243 let x1 = r.fetch(x_n, y_n, z_n, w);
244 let x2 = r.fetch(x, y_n, z_n, w);
245 let x3 = r.fetch(x, y_n, z, w);
246
247 let c1 = x0 - c0;
248 let c2 = x1 - x2;
249 let c3 = x3 - c0;
250 let c4 = c0 - x3 - x0 + x2;
251
252 let s0 = c0.mla(c1, T::from(db));
253 let s1 = s0.mla(c2, T::from(dr));
254 let s2 = s1.mla(c3, T::from(dg));
255 s2.mla(c4, T::from(dg * db))
256 } else {
257 let x0 = r.fetch(x, y, z_n, w);
258 let x1 = r.fetch(x_n, y, z, w);
259 let x2 = r.fetch(x_n, y, z_n, w);
260 let x3 = r.fetch(x_n, y_n, z_n, w);
261
262 let c1 = x0 - c0;
263 let c2 = x1 - c0;
264 let c3 = x3 - x2;
265 let c4 = c0 - x1 - x0 + x2;
266
267 let s0 = c0.mla(c1, T::from(db));
268 let s1 = s0.mla(c2, T::from(dr));
269 let s2 = s1.mla(c3, T::from(dg));
270 s2.mla(c4, T::from(db * dr))
271 };
272
273 let c0 = r.fetch(x, y, z, w_n);
274
275 let w1 = if dr > db && dg > db {
276 let x0 = r.fetch(x_n, y_n, z_n, w_n);
277 let x1 = r.fetch(x_n, y_n, z, w_n);
278 let x2 = r.fetch(x_n, y, z, w_n);
279 let x3 = r.fetch(x, y_n, z, w_n);
280
281 let c1 = x0 - x1;
282 let c2 = x2 - c0;
283 let c3 = x3 - c0;
284 let c4 = c0 - x3 - x2 + x1;
285
286 let s0 = c0.mla(c1, T::from(db));
287 let s1 = s0.mla(c2, T::from(dr));
288 let s2 = s1.mla(c3, T::from(dg));
289 s2.mla(c4, T::from(dr * dg))
290 } else if db > dr && dg > dr {
291 let x0 = r.fetch(x, y, z_n, w_n);
292 let x1 = r.fetch(x_n, y_n, z_n, w_n);
293 let x2 = r.fetch(x, y_n, z_n, w_n);
294 let x3 = r.fetch(x, y_n, z, w_n);
295
296 let c1 = x0 - c0;
297 let c2 = x1 - x2;
298 let c3 = x3 - c0;
299 let c4 = c0 - x3 - x0 + x2;
300
301 let s0 = c0.mla(c1, T::from(db));
302 let s1 = s0.mla(c2, T::from(dr));
303 let s2 = s1.mla(c3, T::from(dg));
304 s2.mla(c4, T::from(dg * db))
305 } else {
306 let x0 = r.fetch(x, y, z_n, w_n);
307 let x1 = r.fetch(x_n, y, z, w_n);
308 let x2 = r.fetch(x_n, y, z_n, w_n);
309 let x3 = r.fetch(x_n, y_n, z_n, w_n);
310
311 let c1 = x0 - c0;
312 let c2 = x1 - c0;
313 let c3 = x3 - x2;
314 let c4 = c0 - x1 - x0 + x2;
315
316 let s0 = c0.mla(c1, T::from(db));
317 let s1 = s0.mla(c2, T::from(dr));
318 let s2 = s1.mla(c3, T::from(dg));
319 s2.mla(c4, T::from(db * dr))
320 };
321 w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
322 }
323
324 #[cfg(feature = "options")]
325 #[inline(always)]
326 pub(crate) fn pyramid_vec3(
327 &self,
328 lin_x: f32,
329 lin_y: f32,
330 lin_z: f32,
331 lin_w: f32,
332 ) -> AvxVectorSse {
333 self.pyramid(
334 lin_x,
335 lin_y,
336 lin_z,
337 lin_w,
338 Fetch4Vec3 {
339 array: self.array,
340 x_stride: self.x_stride,
341 y_stride: self.y_stride,
342 z_stride: self.z_stride,
343 },
344 )
345 }
346
347 #[cfg(feature = "options")]
348 #[inline(always)]
349 fn prism<
350 T: From<f32>
351 + Add<T, Output = T>
352 + Mul<T, Output = T>
353 + FusedMultiplyAdd<T>
354 + Sub<T, Output = T>
355 + Copy
356 + FusedMultiplyNegAdd<T>,
357 >(
358 &self,
359 lin_x: f32,
360 lin_y: f32,
361 lin_z: f32,
362 lin_w: f32,
363 r: impl Fetcher4<T>,
364 ) -> T {
365 let lin_x = lin_x.max(0.0).min(1.0);
366 let lin_y = lin_y.max(0.0).min(1.0);
367 let lin_z = lin_z.max(0.0).min(1.0);
368 let lin_w = lin_w.max(0.0).min(1.0);
369
370 let scale_x = (self.grid_size[0] as i32 - 1) as f32;
371 let scale_y = (self.grid_size[1] as i32 - 1) as f32;
372 let scale_z = (self.grid_size[2] as i32 - 1) as f32;
373 let scale_w = (self.grid_size[3] as i32 - 1) as f32;
374
375 let x = (lin_x * scale_x).floor() as i32;
376 let y = (lin_y * scale_y).floor() as i32;
377 let z = (lin_z * scale_z).floor() as i32;
378 let w = (lin_w * scale_w).floor() as i32;
379
380 let x_n = (lin_x * scale_x).ceil() as i32;
381 let y_n = (lin_y * scale_y).ceil() as i32;
382 let z_n = (lin_z * scale_z).ceil() as i32;
383 let w_n = (lin_w * scale_w).ceil() as i32;
384
385 let dr = lin_x * scale_x - x as f32;
386 let dg = lin_y * scale_y - y as f32;
387 let db = lin_z * scale_z - z as f32;
388 let dw = lin_w * scale_w - w as f32;
389
390 let c0 = r.fetch(x, y, z, w);
391
392 let w0 = if db >= dr {
393 let x0 = r.fetch(x, y, z_n, w);
394 let x1 = r.fetch(x_n, y, z_n, w);
395 let x2 = r.fetch(x, y_n, z, w);
396 let x3 = r.fetch(x, y_n, z_n, w);
397 let x4 = r.fetch(x_n, y_n, z_n, w);
398
399 let c1 = x0 - c0;
400 let c2 = x1 - x0;
401 let c3 = x2 - c0;
402 let c4 = c0 - x2 - x0 + x3;
403 let c5 = x0 - x3 - x1 + x4;
404
405 let s0 = c0.mla(c1, T::from(db));
406 let s1 = s0.mla(c2, T::from(dr));
407 let s2 = s1.mla(c3, T::from(dg));
408 let s3 = s2.mla(c4, T::from(dg * db));
409 s3.mla(c5, T::from(dr * dg))
410 } else {
411 let x0 = r.fetch(x_n, y, z, w);
412 let x1 = r.fetch(x_n, y, z_n, w);
413 let x2 = r.fetch(x, y_n, z, w);
414 let x3 = r.fetch(x_n, y_n, z, w);
415 let x4 = r.fetch(x_n, y_n, z_n, w);
416
417 let c1 = x1 - x0;
418 let c2 = x0 - c0;
419 let c3 = x2 - c0;
420 let c4 = x0 - x3 - x1 + x4;
421 let c5 = c0 - x2 - x0 + x3;
422
423 let s0 = c0.mla(c1, T::from(db));
424 let s1 = s0.mla(c2, T::from(dr));
425 let s2 = s1.mla(c3, T::from(dg));
426 let s3 = s2.mla(c4, T::from(dg * db));
427 s3.mla(c5, T::from(dr * dg))
428 };
429
430 let c0 = r.fetch(x, y, z, w_n);
431
432 let w1 = if db >= dr {
433 let x0 = r.fetch(x, y, z_n, w_n);
434 let x1 = r.fetch(x_n, y, z_n, w_n);
435 let x2 = r.fetch(x, y_n, z, w_n);
436 let x3 = r.fetch(x, y_n, z_n, w_n);
437 let x4 = r.fetch(x_n, y_n, z_n, w_n);
438
439 let c1 = x0 - c0;
440 let c2 = x1 - x0;
441 let c3 = x2 - c0;
442 let c4 = c0 - x2 - x0 + x3;
443 let c5 = x0 - x3 - x1 + x4;
444
445 let s0 = c0.mla(c1, T::from(db));
446 let s1 = s0.mla(c2, T::from(dr));
447 let s2 = s1.mla(c3, T::from(dg));
448 let s3 = s2.mla(c4, T::from(dg * db));
449 s3.mla(c5, T::from(dr * dg))
450 } else {
451 let x0 = r.fetch(x_n, y, z, w_n);
452 let x1 = r.fetch(x_n, y, z_n, w_n);
453 let x2 = r.fetch(x, y_n, z, w_n);
454 let x3 = r.fetch(x_n, y_n, z, w_n);
455 let x4 = r.fetch(x_n, y_n, z_n, w_n);
456
457 let c1 = x1 - x0;
458 let c2 = x0 - c0;
459 let c3 = x2 - c0;
460 let c4 = x0 - x3 - x1 + x4;
461 let c5 = c0 - x2 - x0 + x3;
462
463 let s0 = c0.mla(c1, T::from(db));
464 let s1 = s0.mla(c2, T::from(dr));
465 let s2 = s1.mla(c3, T::from(dg));
466 let s3 = s2.mla(c4, T::from(dg * db));
467 s3.mla(c5, T::from(dr * dg))
468 };
469 w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
470 }
471
472 #[cfg(feature = "options")]
473 #[inline(always)]
474 pub(crate) fn prism_vec3(
475 &self,
476 lin_x: f32,
477 lin_y: f32,
478 lin_z: f32,
479 lin_w: f32,
480 ) -> AvxVectorSse {
481 self.prism(
482 lin_x,
483 lin_y,
484 lin_z,
485 lin_w,
486 Fetch4Vec3 {
487 array: self.array,
488 x_stride: self.x_stride,
489 y_stride: self.y_stride,
490 z_stride: self.z_stride,
491 },
492 )
493 }
494
495 #[cfg(feature = "options")]
496 #[inline(always)]
497 fn tetra<
498 T: From<f32>
499 + Add<T, Output = T>
500 + Mul<T, Output = T>
501 + FusedMultiplyAdd<T>
502 + Sub<T, Output = T>
503 + Copy
504 + FusedMultiplyNegAdd<T>,
505 >(
506 &self,
507 lin_x: f32,
508 lin_y: f32,
509 lin_z: f32,
510 lin_w: f32,
511 r: impl Fetcher4<T>,
512 ) -> T {
513 let lin_x = lin_x.max(0.0).min(1.0);
514 let lin_y = lin_y.max(0.0).min(1.0);
515 let lin_z = lin_z.max(0.0).min(1.0);
516 let lin_w = lin_w.max(0.0).min(1.0);
517
518 let scale_x = (self.grid_size[0] as i32 - 1) as f32;
519 let scale_y = (self.grid_size[1] as i32 - 1) as f32;
520 let scale_z = (self.grid_size[2] as i32 - 1) as f32;
521 let scale_w = (self.grid_size[3] as i32 - 1) as f32;
522
523 let x = (lin_x * scale_x).floor() as i32;
524 let y = (lin_y * scale_y).floor() as i32;
525 let z = (lin_z * scale_z).floor() as i32;
526 let w = (lin_w * scale_w).floor() as i32;
527
528 let x_n = (lin_x * scale_x).ceil() as i32;
529 let y_n = (lin_y * scale_y).ceil() as i32;
530 let z_n = (lin_z * scale_z).ceil() as i32;
531 let w_n = (lin_w * scale_w).ceil() as i32;
532
533 let rx = lin_x * scale_x - x as f32;
534 let ry = lin_y * scale_y - y as f32;
535 let rz = lin_z * scale_z - z as f32;
536 let rw = lin_w * scale_w - w as f32;
537
538 let c0 = r.fetch(x, y, z, w);
539 let c2;
540 let c1;
541 let c3;
542 if rx >= ry {
543 if ry >= rz {
544 c1 = r.fetch(x_n, y, z, w) - c0;
546 c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w);
547 c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
548 } else if rx >= rz {
549 c1 = r.fetch(x_n, y, z, w) - c0;
551 c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
552 c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w);
553 } else {
554 c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w);
556 c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
557 c3 = r.fetch(x, y, z_n, w) - c0;
558 }
559 } else if rx >= rz {
560 c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w);
562 c2 = r.fetch(x, y_n, z, w) - c0;
563 c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
564 } else if ry >= rz {
565 c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
567 c2 = r.fetch(x, y_n, z, w) - c0;
568 c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w);
569 } else {
570 c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
572 c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w);
573 c3 = r.fetch(x, y, z_n, w) - c0;
574 }
575 let s0 = c0.mla(c1, T::from(rx));
576 let s1 = s0.mla(c2, T::from(ry));
577 let w0 = s1.mla(c3, T::from(rz));
578
579 let c0 = r.fetch(x, y, z, w_n);
580 let c2;
581 let c1;
582 let c3;
583 if rx >= ry {
584 if ry >= rz {
585 c1 = r.fetch(x_n, y, z, w_n) - c0;
587 c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n);
588 c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
589 } else if rx >= rz {
590 c1 = r.fetch(x_n, y, z, w_n) - c0;
592 c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
593 c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n);
594 } else {
595 c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n);
597 c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
598 c3 = r.fetch(x, y, z_n, w_n) - c0;
599 }
600 } else if rx >= rz {
601 c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n);
603 c2 = r.fetch(x, y_n, z, w_n) - c0;
604 c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
605 } else if ry >= rz {
606 c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
608 c2 = r.fetch(x, y_n, z, w_n) - c0;
609 c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n);
610 } else {
611 c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
613 c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n);
614 c3 = r.fetch(x, y, z_n, w_n) - c0;
615 }
616 let s0 = c0.mla(c1, T::from(rx));
617 let s1 = s0.mla(c2, T::from(ry));
618 let w1 = s1.mla(c3, T::from(rz));
619 w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw))
620 }
621
622 #[cfg(feature = "options")]
623 #[inline(always)]
624 pub(crate) fn tetra_vec3(
625 &self,
626 lin_x: f32,
627 lin_y: f32,
628 lin_z: f32,
629 lin_w: f32,
630 ) -> AvxVectorSse {
631 self.tetra(
632 lin_x,
633 lin_y,
634 lin_z,
635 lin_w,
636 Fetch4Vec3 {
637 array: self.array,
638 x_stride: self.x_stride,
639 y_stride: self.y_stride,
640 z_stride: self.z_stride,
641 },
642 )
643 }
644}