1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx")] {
5 #[derive(Default, Clone, Copy, PartialEq)]
6 #[repr(C, align(32))]
7 pub struct f32x8 { avx: m256 }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq)]
10 #[repr(C, align(32))]
11 pub struct f32x8 { a : f32x4, b : f32x4 }
12 }
13}
14
15macro_rules! const_f32_as_f32x8 {
16 ($i:ident, $f:expr) => {
17 pub const $i: f32x8 =
18 unsafe { ConstUnionHack256bit { f32a8: [$f; 8] }.f32x8 };
19 };
20}
21
22impl f32x8 {
23 const_f32_as_f32x8!(ONE, 1.0);
24 const_f32_as_f32x8!(HALF, 0.5);
25 const_f32_as_f32x8!(ZERO, 0.0);
26 const_f32_as_f32x8!(E, core::f32::consts::E);
27 const_f32_as_f32x8!(FRAC_1_PI, core::f32::consts::FRAC_1_PI);
28 const_f32_as_f32x8!(FRAC_2_PI, core::f32::consts::FRAC_2_PI);
29 const_f32_as_f32x8!(FRAC_2_SQRT_PI, core::f32::consts::FRAC_2_SQRT_PI);
30 const_f32_as_f32x8!(FRAC_1_SQRT_2, core::f32::consts::FRAC_1_SQRT_2);
31 const_f32_as_f32x8!(FRAC_PI_2, core::f32::consts::FRAC_PI_2);
32 const_f32_as_f32x8!(FRAC_PI_3, core::f32::consts::FRAC_PI_3);
33 const_f32_as_f32x8!(FRAC_PI_4, core::f32::consts::FRAC_PI_4);
34 const_f32_as_f32x8!(FRAC_PI_6, core::f32::consts::FRAC_PI_6);
35 const_f32_as_f32x8!(FRAC_PI_8, core::f32::consts::FRAC_PI_8);
36 const_f32_as_f32x8!(LN_2, core::f32::consts::LN_2);
37 const_f32_as_f32x8!(LN_10, core::f32::consts::LN_10);
38 const_f32_as_f32x8!(LOG2_E, core::f32::consts::LOG2_E);
39 const_f32_as_f32x8!(LOG10_E, core::f32::consts::LOG10_E);
40 const_f32_as_f32x8!(LOG10_2, core::f32::consts::LOG10_2);
41 const_f32_as_f32x8!(LOG2_10, core::f32::consts::LOG2_10);
42 const_f32_as_f32x8!(PI, core::f32::consts::PI);
43 const_f32_as_f32x8!(SQRT_2, core::f32::consts::SQRT_2);
44 const_f32_as_f32x8!(TAU, core::f32::consts::TAU);
45}
46
47unsafe impl Zeroable for f32x8 {}
48unsafe impl Pod for f32x8 {}
49
50impl Add for f32x8 {
51 type Output = Self;
52 #[inline]
53 #[must_use]
54 fn add(self, rhs: Self) -> Self::Output {
55 pick! {
56 if #[cfg(target_feature="avx")] {
57 Self { avx: add_m256(self.avx, rhs.avx) }
58 } else {
59 Self {
60 a : self.a.add(rhs.a),
61 b : self.b.add(rhs.b),
62 }
63 }
64 }
65 }
66}
67
68impl Sub for f32x8 {
69 type Output = Self;
70 #[inline]
71 #[must_use]
72 fn sub(self, rhs: Self) -> Self::Output {
73 pick! {
74 if #[cfg(target_feature="avx")] {
75 Self { avx: sub_m256(self.avx, rhs.avx) }
76 } else {
77 Self {
78 a : self.a.sub(rhs.a),
79 b : self.b.sub(rhs.b),
80 }
81 }
82 }
83 }
84}
85
86impl Mul for f32x8 {
87 type Output = Self;
88 #[inline]
89 #[must_use]
90 fn mul(self, rhs: Self) -> Self::Output {
91 pick! {
92 if #[cfg(target_feature="avx")] {
93 Self { avx: mul_m256(self.avx, rhs.avx) }
94 } else {
95 Self {
96 a : self.a.mul(rhs.a),
97 b : self.b.mul(rhs.b),
98 }
99 }
100 }
101 }
102}
103
104impl Div for f32x8 {
105 type Output = Self;
106 #[inline]
107 #[must_use]
108 fn div(self, rhs: Self) -> Self::Output {
109 pick! {
110 if #[cfg(target_feature="avx")] {
111 Self { avx: div_m256(self.avx, rhs.avx) }
112 } else {
113 Self {
114 a : self.a.div(rhs.a),
115 b : self.b.div(rhs.b),
116 }
117 }
118 }
119 }
120}
121
122impl Add<f32> for f32x8 {
123 type Output = Self;
124 #[inline]
125 #[must_use]
126 fn add(self, rhs: f32) -> Self::Output {
127 self.add(Self::splat(rhs))
128 }
129}
130
131impl Sub<f32> for f32x8 {
132 type Output = Self;
133 #[inline]
134 #[must_use]
135 fn sub(self, rhs: f32) -> Self::Output {
136 self.sub(Self::splat(rhs))
137 }
138}
139
140impl Mul<f32> for f32x8 {
141 type Output = Self;
142 #[inline]
143 #[must_use]
144 fn mul(self, rhs: f32) -> Self::Output {
145 self.mul(Self::splat(rhs))
146 }
147}
148
149impl Div<f32> for f32x8 {
150 type Output = Self;
151 #[inline]
152 #[must_use]
153 fn div(self, rhs: f32) -> Self::Output {
154 self.div(Self::splat(rhs))
155 }
156}
157
158impl Add<f32x8> for f32 {
159 type Output = f32x8;
160 #[inline]
161 #[must_use]
162 fn add(self, rhs: f32x8) -> Self::Output {
163 f32x8::splat(self).add(rhs)
164 }
165}
166
167impl Sub<f32x8> for f32 {
168 type Output = f32x8;
169 #[inline]
170 #[must_use]
171 fn sub(self, rhs: f32x8) -> Self::Output {
172 f32x8::splat(self).sub(rhs)
173 }
174}
175
176impl Mul<f32x8> for f32 {
177 type Output = f32x8;
178 #[inline]
179 #[must_use]
180 fn mul(self, rhs: f32x8) -> Self::Output {
181 f32x8::splat(self).mul(rhs)
182 }
183}
184
185impl Div<f32x8> for f32 {
186 type Output = f32x8;
187 #[inline]
188 #[must_use]
189 fn div(self, rhs: f32x8) -> Self::Output {
190 f32x8::splat(self).div(rhs)
191 }
192}
193
194impl BitAnd for f32x8 {
195 type Output = Self;
196 #[inline]
197 #[must_use]
198 fn bitand(self, rhs: Self) -> Self::Output {
199 pick! {
200 if #[cfg(target_feature="avx")] {
201 Self { avx: bitand_m256(self.avx, rhs.avx) }
202 } else {
203 Self {
204 a : self.a.bitand(rhs.a),
205 b : self.b.bitand(rhs.b),
206 }
207 }
208 }
209 }
210}
211
212impl BitOr for f32x8 {
213 type Output = Self;
214 #[inline]
215 #[must_use]
216 fn bitor(self, rhs: Self) -> Self::Output {
217 pick! {
218 if #[cfg(target_feature="avx")] {
219 Self { avx: bitor_m256(self.avx, rhs.avx) }
220 } else {
221 Self {
222 a : self.a.bitor(rhs.a),
223 b : self.b.bitor(rhs.b),
224 }
225 }
226 }
227 }
228}
229
230impl BitXor for f32x8 {
231 type Output = Self;
232 #[inline]
233 #[must_use]
234 fn bitxor(self, rhs: Self) -> Self::Output {
235 pick! {
236 if #[cfg(target_feature="avx")] {
237 Self { avx: bitxor_m256(self.avx, rhs.avx) }
238 } else {
239 Self {
240 a : self.a.bitxor(rhs.a),
241 b : self.b.bitxor(rhs.b),
242 }
243 }
244 }
245 }
246}
247
248impl CmpEq for f32x8 {
249 type Output = Self;
250 #[inline]
251 #[must_use]
252 fn cmp_eq(self, rhs: Self) -> Self::Output {
253 pick! {
254 if #[cfg(target_feature="avx")] {
255 Self { avx: cmp_op_mask_m256::<{cmp_op!(EqualOrdered)}>(self.avx, rhs.avx) }
256 } else {
257 Self {
258 a : self.a.cmp_eq(rhs.a),
259 b : self.b.cmp_eq(rhs.b),
260 }
261 }
262 }
263 }
264}
265
266impl CmpGe for f32x8 {
267 type Output = Self;
268 #[inline]
269 #[must_use]
270 fn cmp_ge(self, rhs: Self) -> Self::Output {
271 pick! {
272 if #[cfg(target_feature="avx")] {
273 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterEqualOrdered)}>(self.avx, rhs.avx) }
274 } else {
275 Self {
276 a : self.a.cmp_ge(rhs.a),
277 b : self.b.cmp_ge(rhs.b),
278 }
279 }
280 }
281 }
282}
283
284impl CmpGt for f32x8 {
285 type Output = Self;
286 #[inline]
287 #[must_use]
288 fn cmp_gt(self, rhs: Self) -> Self::Output {
289 pick! {
290 if #[cfg(target_feature="avx")] {
291 Self { avx: cmp_op_mask_m256::<{cmp_op!(GreaterThanOrdered)}>(self.avx, rhs.avx) }
292 } else {
293 Self {
294 a : self.a.cmp_gt(rhs.a),
295 b : self.b.cmp_gt(rhs.b),
296 }
297 }
298 }
299 }
300}
301
302impl CmpNe for f32x8 {
303 type Output = Self;
304 #[inline]
305 #[must_use]
306 fn cmp_ne(self, rhs: Self) -> Self::Output {
307 pick! {
308 if #[cfg(target_feature="avx")] {
309 Self { avx: cmp_op_mask_m256::<{cmp_op!(NotEqualOrdered)}>(self.avx, rhs.avx) }
310 } else {
311 Self {
312 a : self.a.cmp_ne(rhs.a),
313 b : self.b.cmp_ne(rhs.b),
314 }
315 }
316 }
317 }
318}
319
320impl CmpLe for f32x8 {
321 type Output = Self;
322 #[inline]
323 #[must_use]
324 fn cmp_le(self, rhs: Self) -> Self::Output {
325 pick! {
326 if #[cfg(target_feature="avx")] {
327 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessEqualOrdered)}>(self.avx, rhs.avx) }
328 } else {
329 Self {
330 a : self.a.cmp_le(rhs.a),
331 b : self.b.cmp_le(rhs.b),
332 }
333 }
334 }
335 }
336}
337
338impl CmpLt for f32x8 {
339 type Output = Self;
340 #[inline]
341 #[must_use]
342 fn cmp_lt(self, rhs: Self) -> Self::Output {
343 pick! {
344 if #[cfg(target_feature="avx")] {
345 Self { avx: cmp_op_mask_m256::<{cmp_op!(LessThanOrdered)}>(self.avx, rhs.avx) }
346 } else {
347 Self {
348 a : self.a.cmp_lt(rhs.a),
349 b : self.b.cmp_lt(rhs.b),
350 }
351 }
352 }
353 }
354}
355
356impl f32x8 {
357 #[inline]
358 #[must_use]
359 pub fn new(array: [f32; 8]) -> Self {
360 Self::from(array)
361 }
362 #[inline]
363 #[must_use]
364 pub fn blend(self, t: Self, f: Self) -> Self {
365 pick! {
366 if #[cfg(target_feature="avx")] {
367 Self { avx: blend_varying_m256(f.avx, t.avx, self.avx) }
368 } else {
369 Self {
370 a : self.a.blend(t.a, f.a),
371 b : self.b.blend(t.b, f.b),
372 }
373 }
374 }
375 }
376 #[inline]
377 #[must_use]
378 pub fn abs(self) -> Self {
379 pick! {
380 if #[cfg(target_feature="avx")] {
381 let non_sign_bits = f32x8::from(f32::from_bits(i32::MAX as u32));
382 self & non_sign_bits
383 } else {
384 Self {
385 a : self.a.abs(),
386 b : self.b.abs(),
387 }
388 }
389 }
390 }
391
392 #[inline]
396 #[must_use]
397 pub fn fast_max(self, rhs: Self) -> Self {
398 pick! {
399 if #[cfg(target_feature="avx")] {
400 Self { avx: max_m256(self.avx, rhs.avx) }
401 } else {
402 Self {
403 a : self.a.fast_max(rhs.a),
404 b : self.b.fast_max(rhs.b),
405 }
406 }
407 }
408 }
409
410 #[inline]
413 #[must_use]
414 pub fn max(self, rhs: Self) -> Self {
415 pick! {
416 if #[cfg(target_feature="avx")] {
417 rhs.is_nan().blend(self, Self { avx: max_m256(self.avx, rhs.avx) })
421 } else {
422 Self {
423 a : self.a.max(rhs.a),
424 b : self.b.max(rhs.b),
425 }
426 }
427
428 }
429 }
430
431 #[inline]
435 #[must_use]
436 pub fn fast_min(self, rhs: Self) -> Self {
437 pick! {
438 if #[cfg(target_feature="avx")] {
439 Self { avx: min_m256(self.avx, rhs.avx) }
440 } else {
441 Self {
442 a : self.a.fast_min(rhs.a),
443 b : self.b.fast_min(rhs.b),
444 }
445 }
446 }
447 }
448
449 #[inline]
453 #[must_use]
454 pub fn min(self, rhs: Self) -> Self {
455 pick! {
456 if #[cfg(target_feature="avx")] {
457 rhs.is_nan().blend(self, Self { avx: min_m256(self.avx, rhs.avx) })
461 } else {
462 Self {
463 a : self.a.min(rhs.a),
464 b : self.b.min(rhs.b),
465 }
466 }
467 }
468 }
469 #[inline]
470 #[must_use]
471 pub fn is_nan(self) -> Self {
472 pick! {
473 if #[cfg(target_feature="avx")] {
474 Self { avx: cmp_op_mask_m256::<{cmp_op!(Unordered)}>(self.avx, self.avx) }
475 } else {
476 Self {
477 a : self.a.is_nan(),
478 b : self.b.is_nan(),
479 }
480 }
481 }
482 }
483 #[inline]
484 #[must_use]
485 pub fn is_finite(self) -> Self {
486 let shifted_exp_mask = u32x8::from(0xFF000000);
487 let u: u32x8 = cast(self);
488 let shift_u = u << 1_u64;
489 let out = !(shift_u & shifted_exp_mask).cmp_eq(shifted_exp_mask);
490 cast(out)
491 }
492 #[inline]
493 #[must_use]
494 pub fn is_inf(self) -> Self {
495 let shifted_inf = u32x8::from(0xFF000000);
496 let u: u32x8 = cast(self);
497 let shift_u = u << 1_u64;
498 let out = (shift_u).cmp_eq(shifted_inf);
499 cast(out)
500 }
501
502 #[inline]
503 #[must_use]
504 pub fn round(self) -> Self {
505 pick! {
506 if #[cfg(target_feature="avx")] {
508 Self { avx: round_m256::<{round_op!(Nearest)}>(self.avx) }
509 } else {
510 Self {
511 a : self.a.round(),
512 b : self.b.round(),
513 }
514 }
515 }
516 }
517
518 #[inline]
522 #[must_use]
523 pub fn fast_round_int(self) -> i32x8 {
524 pick! {
525 if #[cfg(target_feature="avx")] {
526 cast(convert_to_i32_m256i_from_m256(self.avx))
527 } else {
528 cast([
529 self.a.fast_round_int(),
530 self.b.fast_round_int()])
531 }
532 }
533 }
534
535 #[inline]
539 #[must_use]
540 pub fn round_int(self) -> i32x8 {
541 pick! {
542 if #[cfg(target_feature="avx")] {
543 let non_nan_mask = self.cmp_eq(self);
545 let non_nan = self & non_nan_mask;
546 let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
547 let cast: i32x8 = cast(convert_to_i32_m256i_from_m256(non_nan.avx));
548 flip_to_max ^ cast
549 } else {
550 cast([
551 self.a.round_int(),
552 self.b.round_int(),
553 ])
554 }
555 }
556 }
557
558 #[inline]
562 #[must_use]
563 pub fn fast_trunc_int(self) -> i32x8 {
564 pick! {
565 if #[cfg(all(target_feature="avx"))] {
566 cast(convert_truncate_to_i32_m256i_from_m256(self.avx))
567 } else {
568 cast([
569 self.a.fast_trunc_int(),
570 self.b.fast_trunc_int(),
571 ])
572 }
573 }
574 }
575
576 #[inline]
580 #[must_use]
581 pub fn trunc_int(self) -> i32x8 {
582 pick! {
583 if #[cfg(target_feature="avx")] {
584 let non_nan_mask = self.cmp_eq(self);
586 let non_nan = self & non_nan_mask;
587 let flip_to_max: i32x8 = cast(self.cmp_ge(Self::splat(2147483648.0)));
588 let cast: i32x8 = cast(convert_truncate_to_i32_m256i_from_m256(non_nan.avx));
589 flip_to_max ^ cast
590 } else {
591 cast([
592 self.a.trunc_int(),
593 self.b.trunc_int(),
594 ])
595 }
596 }
597 }
598 #[inline]
599 #[must_use]
600 pub fn mul_add(self, m: Self, a: Self) -> Self {
601 pick! {
602 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
603 Self { avx: fused_mul_add_m256(self.avx, m.avx, a.avx) }
604 } else if #[cfg(target_feature="avx")] {
605 (self * m) + a
607 } else {
608 Self {
609 a : self.a.mul_add(m.a, a.a),
610 b : self.b.mul_add(m.b, a.b),
611 }
612 }
613 }
614 }
615
616 #[inline]
617 #[must_use]
618 pub fn mul_sub(self, m: Self, a: Self) -> Self {
619 pick! {
620 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
621 Self { avx: fused_mul_sub_m256(self.avx, m.avx, a.avx) }
622 } else if #[cfg(target_feature="avx")] {
623 (self * m) - a
625 } else {
626 Self {
627 a : self.a.mul_sub(m.a, a.a),
628 b : self.b.mul_sub(m.b, a.b),
629 }
630 }
631 }
632 }
633
634 #[inline]
635 #[must_use]
636 pub fn mul_neg_add(self, m: Self, a: Self) -> Self {
637 pick! {
638 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
639 Self { avx: fused_mul_neg_add_m256(self.avx, m.avx, a.avx) }
640 } else if #[cfg(target_feature="avx")] {
641 a - (self * m)
643 } else {
644 Self {
645 a : self.a.mul_neg_add(m.a, a.a),
646 b : self.b.mul_neg_add(m.b, a.b),
647 }
648 }
649 }
650 }
651
652 #[inline]
653 #[must_use]
654 pub fn mul_neg_sub(self, m: Self, a: Self) -> Self {
655 pick! {
656 if #[cfg(all(target_feature="avx",target_feature="fma"))] {
657 Self { avx: fused_mul_neg_sub_m256(self.avx, m.avx, a.avx) }
658 } else if #[cfg(target_feature="avx")] {
659 -(self * m) - a
661 } else {
662 Self {
663 a : self.a.mul_neg_sub(m.a, a.a),
664 b : self.b.mul_neg_sub(m.b, a.b),
665 }
666 }
667 }
668 }
669
670 #[inline]
671 #[must_use]
672 pub fn flip_signs(self, signs: Self) -> Self {
673 self ^ (signs & Self::from(-0.0))
674 }
675
676 #[inline]
677 #[must_use]
678 pub fn copysign(self, sign: Self) -> Self {
679 let magnitude_mask = Self::from(f32::from_bits(u32::MAX >> 1));
680 (self & magnitude_mask) | (sign & Self::from(-0.0))
681 }
682
683 #[allow(non_upper_case_globals)]
684 #[inline]
685 pub fn asin_acos(self) -> (Self, Self) {
686 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
689 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
690 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
691 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
692 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
693
694 let xa = self.abs();
695 let big = xa.cmp_ge(f32x8::splat(0.5));
696
697 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
698 let x2 = xa * xa;
699 let x3 = big.blend(x1, x2);
700
701 let xb = x1.sqrt();
702
703 let x4 = big.blend(xb, xa);
704
705 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
706 let z = z.mul_add(x3 * x4, x4);
707
708 let z1 = z + z;
709
710 let z3 = self.cmp_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
712 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
713 let acos = big.blend(z3, z4);
714
715 let z3 = f32x8::FRAC_PI_2 - z1;
717 let asin = big.blend(z3, z);
718 let asin = asin.flip_signs(self);
719
720 (asin, acos)
721 }
722
723 #[inline]
724 #[must_use]
725 #[allow(non_upper_case_globals)]
726 pub fn asin(self) -> Self {
727 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
730 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
731 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
732 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
733 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
734
735 let xa = self.abs();
736 let big = xa.cmp_ge(f32x8::splat(0.5));
737
738 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
739 let x2 = xa * xa;
740 let x3 = big.blend(x1, x2);
741
742 let xb = x1.sqrt();
743
744 let x4 = big.blend(xb, xa);
745
746 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
747 let z = z.mul_add(x3 * x4, x4);
748
749 let z1 = z + z;
750
751 let z3 = f32x8::FRAC_PI_2 - z1;
753 let asin = big.blend(z3, z);
754 let asin = asin.flip_signs(self);
755
756 asin
757 }
758
759 #[inline]
760 #[must_use]
761 #[allow(non_upper_case_globals)]
762 pub fn acos(self) -> Self {
763 const_f32_as_f32x8!(P4asinf, 4.2163199048E-2);
766 const_f32_as_f32x8!(P3asinf, 2.4181311049E-2);
767 const_f32_as_f32x8!(P2asinf, 4.5470025998E-2);
768 const_f32_as_f32x8!(P1asinf, 7.4953002686E-2);
769 const_f32_as_f32x8!(P0asinf, 1.6666752422E-1);
770
771 let xa = self.abs();
772 let big = xa.cmp_ge(f32x8::splat(0.5));
773
774 let x1 = f32x8::splat(0.5) * (f32x8::ONE - xa);
775 let x2 = xa * xa;
776 let x3 = big.blend(x1, x2);
777
778 let xb = x1.sqrt();
779
780 let x4 = big.blend(xb, xa);
781
782 let z = polynomial_4!(x3, P0asinf, P1asinf, P2asinf, P3asinf, P4asinf);
783 let z = z.mul_add(x3 * x4, x4);
784
785 let z1 = z + z;
786
787 let z3 = self.cmp_lt(f32x8::ZERO).blend(f32x8::PI - z1, z1);
789 let z4 = f32x8::FRAC_PI_2 - z.flip_signs(self);
790 let acos = big.blend(z3, z4);
791
792 acos
793 }
794
795 #[allow(non_upper_case_globals)]
796 #[inline]
797 pub fn atan(self) -> Self {
798 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
801 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
802 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
803 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
804
805 let t = self.abs();
806
807 let notsmal = t.cmp_ge(Self::SQRT_2 - Self::ONE);
811 let notbig = t.cmp_le(Self::SQRT_2 + Self::ONE);
812
813 let mut s = notbig.blend(Self::FRAC_PI_4, Self::FRAC_PI_2);
814 s = notsmal & s;
815
816 let mut a = notbig & t;
817 a = notsmal.blend(a - Self::ONE, a);
818 let mut b = notbig & Self::ONE;
819 b = notsmal.blend(b + t, b);
820 let z = a / b;
821
822 let zz = z * z;
823
824 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
826 re = re.mul_add(zz * z, z) + s;
827
828 re = (self.sign_bit()).blend(-re, re);
830
831 re
832 }
833
834 #[allow(non_upper_case_globals)]
835 #[inline]
836 pub fn atan2(self, x: Self) -> Self {
837 const_f32_as_f32x8!(P3atanf, 8.05374449538E-2);
840 const_f32_as_f32x8!(P2atanf, -1.38776856032E-1);
841 const_f32_as_f32x8!(P1atanf, 1.99777106478E-1);
842 const_f32_as_f32x8!(P0atanf, -3.33329491539E-1);
843
844 let y = self;
845
846 let x1 = x.abs();
848 let y1 = y.abs();
849 let swapxy = y1.cmp_gt(x1);
850 let mut x2 = swapxy.blend(y1, x1);
852 let mut y2 = swapxy.blend(x1, y1);
853
854 let both_infinite = x.is_inf() & y.is_inf();
856 if both_infinite.any() {
857 let minus_one = -Self::ONE;
858 x2 = both_infinite.blend(x2 & minus_one, x2);
859 y2 = both_infinite.blend(y2 & minus_one, y2);
860 }
861
862 let t = y2 / x2;
864
865 let notsmal = t.cmp_ge(Self::SQRT_2 - Self::ONE);
868
869 let a = notsmal.blend(t - Self::ONE, t);
870 let b = notsmal.blend(t + Self::ONE, Self::ONE);
871 let s = notsmal & Self::FRAC_PI_4;
872 let z = a / b;
873
874 let zz = z * z;
875
876 let mut re = polynomial_3!(zz, P0atanf, P1atanf, P2atanf, P3atanf);
878 re = re.mul_add(zz * z, z) + s;
879
880 re = swapxy.blend(Self::FRAC_PI_2 - re, re);
882 re = ((x | y).cmp_eq(Self::ZERO)).blend(Self::ZERO, re);
883 re = (x.sign_bit()).blend(Self::PI - re, re);
884
885 re = (y.sign_bit()).blend(-re, re);
887
888 re
889 }
890
891 #[inline]
892 #[must_use]
893 #[allow(non_upper_case_globals)]
894 pub fn sin_cos(self) -> (Self, Self) {
895 const_f32_as_f32x8!(DP1F, 0.78515625_f32 * 2.0);
899 const_f32_as_f32x8!(DP2F, 2.4187564849853515625E-4_f32 * 2.0);
900 const_f32_as_f32x8!(DP3F, 3.77489497744594108E-8_f32 * 2.0);
901
902 const_f32_as_f32x8!(P0sinf, -1.6666654611E-1);
903 const_f32_as_f32x8!(P1sinf, 8.3321608736E-3);
904 const_f32_as_f32x8!(P2sinf, -1.9515295891E-4);
905
906 const_f32_as_f32x8!(P0cosf, 4.166664568298827E-2);
907 const_f32_as_f32x8!(P1cosf, -1.388731625493765E-3);
908 const_f32_as_f32x8!(P2cosf, 2.443315711809948E-5);
909
910 const_f32_as_f32x8!(TWO_OVER_PI, 2.0 / core::f32::consts::PI);
911
912 let xa = self.abs();
913
914 let y = (xa * TWO_OVER_PI).round();
916 let q: i32x8 = y.round_int();
917
918 let x = y.mul_neg_add(DP3F, y.mul_neg_add(DP2F, y.mul_neg_add(DP1F, xa)));
919
920 let x2 = x * x;
921 let mut s = polynomial_2!(x2, P0sinf, P1sinf, P2sinf) * (x * x2) + x;
922 let mut c = polynomial_2!(x2, P0cosf, P1cosf, P2cosf) * (x2 * x2)
923 + f32x8::from(0.5).mul_neg_add(x2, f32x8::from(1.0));
924
925 let swap = !(q & i32x8::from(1)).cmp_eq(i32x8::from(0));
926
927 let mut overflow: f32x8 = cast(q.cmp_gt(i32x8::from(0x2000000)));
928 overflow &= xa.is_finite();
929 s = overflow.blend(f32x8::from(0.0), s);
930 c = overflow.blend(f32x8::from(1.0), c);
931
932 let mut sin1 = cast::<_, f32x8>(swap).blend(c, s);
934 let sign_sin: i32x8 = (q << 30) ^ cast::<_, i32x8>(self);
935 sin1 = sin1.flip_signs(cast(sign_sin));
936
937 let mut cos1 = cast::<_, f32x8>(swap).blend(s, c);
939 let sign_cos: i32x8 = ((q + i32x8::from(1)) & i32x8::from(2)) << 30;
940 cos1 ^= cast::<_, f32x8>(sign_cos);
941
942 (sin1, cos1)
943 }
944 #[inline]
945 #[must_use]
946 pub fn sin(self) -> Self {
947 let (s, _) = self.sin_cos();
948 s
949 }
950 #[inline]
951 #[must_use]
952 pub fn cos(self) -> Self {
953 let (_, c) = self.sin_cos();
954 c
955 }
956 #[inline]
957 #[must_use]
958 pub fn tan(self) -> Self {
959 let (s, c) = self.sin_cos();
960 s / c
961 }
962 #[inline]
963 #[must_use]
964 pub fn to_degrees(self) -> Self {
965 const_f32_as_f32x8!(RAD_TO_DEG_RATIO, 180.0_f32 / core::f32::consts::PI);
966 self * RAD_TO_DEG_RATIO
967 }
968 #[inline]
969 #[must_use]
970 pub fn to_radians(self) -> Self {
971 const_f32_as_f32x8!(DEG_TO_RAD_RATIO, core::f32::consts::PI / 180.0_f32);
972 self * DEG_TO_RAD_RATIO
973 }
974 #[inline]
975 #[must_use]
976 pub fn recip(self) -> Self {
977 pick! {
978 if #[cfg(target_feature="avx")] {
979 Self { avx: reciprocal_m256(self.avx) }
980 } else {
981 Self {
982 a : self.a.recip(),
983 b : self.b.recip(),
984 }
985 }
986 }
987 }
988 #[inline]
989 #[must_use]
990 pub fn recip_sqrt(self) -> Self {
991 pick! {
992 if #[cfg(target_feature="avx")] {
993 Self { avx: reciprocal_sqrt_m256(self.avx) }
994 } else {
995 Self {
996 a : self.a.recip_sqrt(),
997 b : self.b.recip_sqrt(),
998 }
999 }
1000 }
1001 }
1002 #[inline]
1003 #[must_use]
1004 pub fn sqrt(self) -> Self {
1005 pick! {
1006 if #[cfg(target_feature="avx")] {
1007 Self { avx: sqrt_m256(self.avx) }
1008 } else {
1009 Self {
1010 a : self.a.sqrt(),
1011 b : self.b.sqrt(),
1012 }
1013 }
1014 }
1015 }
1016 #[inline]
1017 #[must_use]
1018 pub fn move_mask(self) -> i32 {
1019 pick! {
1020 if #[cfg(target_feature="avx")] {
1021 move_mask_m256(self.avx)
1022 } else {
1023 (self.b.move_mask() << 4) | self.a.move_mask()
1024 }
1025 }
1026 }
1027 #[inline]
1028 #[must_use]
1029 pub fn any(self) -> bool {
1030 pick! {
1031 if #[cfg(target_feature="avx")] {
1032 move_mask_m256(self.avx) != 0
1033 } else {
1034 self.a.any() || self.b.any()
1035 }
1036 }
1037 }
1038 #[inline]
1039 #[must_use]
1040 pub fn all(self) -> bool {
1041 pick! {
1042 if #[cfg(target_feature="avx")] {
1043 move_mask_m256(self.avx) == 0b11111111
1044 } else {
1045 self.a.all() && self.b.all()
1046 }
1047 }
1048 }
1049 #[inline]
1050 #[must_use]
1051 pub fn none(self) -> bool {
1052 !self.any()
1053 }
1054
1055 #[inline]
1056 #[allow(non_upper_case_globals)]
1057 fn vm_pow2n(self) -> Self {
1058 const_f32_as_f32x8!(pow2_23, 8388608.0);
1059 const_f32_as_f32x8!(bias, 127.0);
1060 let a = self + (bias + pow2_23);
1061 let c = cast::<_, i32x8>(a) << 23;
1062 cast::<_, f32x8>(c)
1063 }
1064
1065 #[inline]
1067 #[must_use]
1068 #[allow(non_upper_case_globals)]
1069 pub fn exp(self) -> Self {
1070 const_f32_as_f32x8!(P0, 1.0 / 2.0);
1071 const_f32_as_f32x8!(P1, 1.0 / 6.0);
1072 const_f32_as_f32x8!(P2, 1. / 24.);
1073 const_f32_as_f32x8!(P3, 1. / 120.);
1074 const_f32_as_f32x8!(P4, 1. / 720.);
1075 const_f32_as_f32x8!(P5, 1. / 5040.);
1076 const_f32_as_f32x8!(LN2D_HI, 0.693359375);
1077 const_f32_as_f32x8!(LN2D_LO, -2.12194440e-4);
1078 let max_x = f32x8::from(87.3);
1079 let r = (self * Self::LOG2_E).round();
1080 let x = r.mul_neg_add(LN2D_HI, self);
1081 let x = r.mul_neg_add(LN2D_LO, x);
1082 let z = polynomial_5!(x, P0, P1, P2, P3, P4, P5);
1083 let x2 = x * x;
1084 let z = z.mul_add(x2, x);
1085 let n2 = Self::vm_pow2n(r);
1086 let z = (z + Self::ONE) * n2;
1087 let in_range = self.abs().cmp_lt(max_x);
1089 let in_range = in_range & self.is_finite();
1090 in_range.blend(z, Self::ZERO)
1091 }
1092
1093 #[inline]
1094 #[allow(non_upper_case_globals)]
1095 fn exponent(self) -> f32x8 {
1096 const_f32_as_f32x8!(pow2_23, 8388608.0);
1097 const_f32_as_f32x8!(bias, 127.0);
1098 let a = cast::<_, u32x8>(self);
1099 let b = a >> 23;
1100 let c = b | cast::<_, u32x8>(pow2_23);
1101 let d = cast::<_, f32x8>(c);
1102 let e = d - (pow2_23 + bias);
1103 e
1104 }
1105
1106 #[inline]
1107 #[allow(non_upper_case_globals)]
1108 fn fraction_2(self) -> Self {
1109 let t1 = cast::<_, u32x8>(self);
1110 let t2 = cast::<_, u32x8>(
1111 (t1 & u32x8::from(0x007FFFFF)) | u32x8::from(0x3F000000),
1112 );
1113 cast::<_, f32x8>(t2)
1114 }
1115 #[inline]
1116 fn is_zero_or_subnormal(self) -> Self {
1117 let t = cast::<_, i32x8>(self);
1118 let t = t & i32x8::splat(0x7F800000);
1119 i32x8::round_float(t.cmp_eq(i32x8::splat(0)))
1120 }
1121 #[inline]
1122 fn infinity() -> Self {
1123 cast::<_, f32x8>(i32x8::splat(0x7F800000))
1124 }
1125 #[inline]
1126 fn nan_log() -> Self {
1127 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1128 }
1129 #[inline]
1130 fn nan_pow() -> Self {
1131 cast::<_, f32x8>(i32x8::splat(0x7FC00000 | 0x101 & 0x003FFFFF))
1132 }
1133 #[inline]
1134 pub fn sign_bit(self) -> Self {
1135 let t1 = cast::<_, i32x8>(self);
1136 let t2 = t1 >> 31;
1137 !cast::<_, f32x8>(t2).cmp_eq(f32x8::ZERO)
1138 }
1139
1140 #[inline]
1142 #[must_use]
1143 pub fn reduce_add(self) -> f32 {
1144 pick! {
1145 if #[cfg(target_feature="avx")]{
1147 let hi_quad = extract_m128_from_m256::<1>(self.avx);
1148 let lo_quad = cast_to_m128_from_m256(self.avx);
1149 let sum_quad = add_m128(lo_quad,hi_quad);
1150 let lo_dual = sum_quad;
1151 let hi_dual = move_high_low_m128(sum_quad,sum_quad);
1152 let sum_dual = add_m128(lo_dual,hi_dual);
1153 let lo = sum_dual;
1154 let hi = shuffle_abi_f32_all_m128::<0b_01>(sum_dual, sum_dual);
1155 let sum = add_m128_s(lo, hi);
1156 get_f32_from_m128_s(sum)
1157 } else {
1158 self.a.reduce_add() + self.b.reduce_add()
1159 }
1160 }
1161 }
1162
1163 #[inline]
1165 #[must_use]
1166 #[allow(non_upper_case_globals)]
1167 pub fn ln(self) -> Self {
1168 const_f32_as_f32x8!(HALF, 0.5);
1169 const_f32_as_f32x8!(P0, 3.3333331174E-1);
1170 const_f32_as_f32x8!(P1, -2.4999993993E-1);
1171 const_f32_as_f32x8!(P2, 2.0000714765E-1);
1172 const_f32_as_f32x8!(P3, -1.6668057665E-1);
1173 const_f32_as_f32x8!(P4, 1.4249322787E-1);
1174 const_f32_as_f32x8!(P5, -1.2420140846E-1);
1175 const_f32_as_f32x8!(P6, 1.1676998740E-1);
1176 const_f32_as_f32x8!(P7, -1.1514610310E-1);
1177 const_f32_as_f32x8!(P8, 7.0376836292E-2);
1178 const_f32_as_f32x8!(LN2F_HI, 0.693359375);
1179 const_f32_as_f32x8!(LN2F_LO, -2.12194440e-4);
1180 const_f32_as_f32x8!(VM_SMALLEST_NORMAL, 1.17549435E-38);
1181
1182 let x1 = self;
1183 let x = Self::fraction_2(x1);
1184 let e = Self::exponent(x1);
1185 let mask = x.cmp_gt(Self::SQRT_2 * HALF);
1186 let x = (!mask).blend(x + x, x);
1187 let fe = mask.blend(e + Self::ONE, e);
1188 let x = x - Self::ONE;
1189 let res = polynomial_8!(x, P0, P1, P2, P3, P4, P5, P6, P7, P8);
1190 let x2 = x * x;
1191 let res = x2 * x * res;
1192 let res = fe.mul_add(LN2F_LO, res);
1193 let res = res + x2.mul_neg_add(HALF, x);
1194 let res = fe.mul_add(LN2F_HI, res);
1195 let overflow = !self.is_finite();
1196 let underflow = x1.cmp_lt(VM_SMALLEST_NORMAL);
1197 let mask = overflow | underflow;
1198 if !mask.any() {
1199 res
1200 } else {
1201 let is_zero = self.is_zero_or_subnormal();
1202 let res = underflow.blend(Self::nan_log(), res);
1203 let res = is_zero.blend(Self::infinity(), res);
1204 let res = overflow.blend(self, res);
1205 res
1206 }
1207 }
1208
1209 #[inline]
1210 #[must_use]
1211 pub fn log2(self) -> Self {
1212 Self::ln(self) * Self::LOG2_E
1213 }
1214 #[inline]
1215 #[must_use]
1216 pub fn log10(self) -> Self {
1217 Self::ln(self) * Self::LOG10_E
1218 }
1219
1220 #[inline]
1221 #[must_use]
1222 #[allow(non_upper_case_globals)]
1223 pub fn pow_f32x8(self, y: Self) -> Self {
1224 const_f32_as_f32x8!(ln2f_hi, 0.693359375);
1225 const_f32_as_f32x8!(ln2f_lo, -2.12194440e-4);
1226 const_f32_as_f32x8!(P0logf, 3.3333331174E-1);
1227 const_f32_as_f32x8!(P1logf, -2.4999993993E-1);
1228 const_f32_as_f32x8!(P2logf, 2.0000714765E-1);
1229 const_f32_as_f32x8!(P3logf, -1.6668057665E-1);
1230 const_f32_as_f32x8!(P4logf, 1.4249322787E-1);
1231 const_f32_as_f32x8!(P5logf, -1.2420140846E-1);
1232 const_f32_as_f32x8!(P6logf, 1.1676998740E-1);
1233 const_f32_as_f32x8!(P7logf, -1.1514610310E-1);
1234 const_f32_as_f32x8!(P8logf, 7.0376836292E-2);
1235
1236 const_f32_as_f32x8!(p2expf, 1.0 / 2.0); const_f32_as_f32x8!(p3expf, 1.0 / 6.0);
1238 const_f32_as_f32x8!(p4expf, 1.0 / 24.0);
1239 const_f32_as_f32x8!(p5expf, 1.0 / 120.0);
1240 const_f32_as_f32x8!(p6expf, 1.0 / 720.0);
1241 const_f32_as_f32x8!(p7expf, 1.0 / 5040.0);
1242
1243 let x1 = self.abs();
1244 let x = x1.fraction_2();
1245 let mask = x.cmp_gt(f32x8::SQRT_2 * f32x8::HALF);
1246 let x = (!mask).blend(x + x, x);
1247
1248 let x = x - f32x8::ONE;
1249 let x2 = x * x;
1250 let lg1 = polynomial_8!(
1251 x, P0logf, P1logf, P2logf, P3logf, P4logf, P5logf, P6logf, P7logf, P8logf
1252 );
1253 let lg1 = lg1 * x2 * x;
1254
1255 let ef = x1.exponent();
1256 let ef = mask.blend(ef + f32x8::ONE, ef);
1257 let e1 = (ef * y).round();
1258 let yr = ef.mul_sub(y, e1);
1259
1260 let lg = f32x8::HALF.mul_neg_add(x2, x) + lg1;
1261 let x2_err = (f32x8::HALF * x).mul_sub(x, f32x8::HALF * x2);
1262 let lg_err = f32x8::HALF.mul_add(x2, lg - x) - lg1;
1263
1264 let e2 = (lg * y * f32x8::LOG2_E).round();
1265 let v = lg.mul_sub(y, e2 * ln2f_hi);
1266 let v = e2.mul_neg_add(ln2f_lo, v);
1267 let v = v - (lg_err + x2_err).mul_sub(y, yr * f32x8::LN_2);
1268
1269 let x = v;
1270 let e3 = (x * f32x8::LOG2_E).round();
1271 let x = e3.mul_neg_add(f32x8::LN_2, x);
1272 let x2 = x * x;
1273 let z = x2.mul_add(
1274 polynomial_5!(x, p2expf, p3expf, p4expf, p5expf, p6expf, p7expf),
1275 x + f32x8::ONE,
1276 );
1277
1278 let ee = e1 + e2 + e3;
1279 let ei = cast::<_, i32x8>(ee.round_int());
1280 let ej = cast::<_, i32x8>(ei + (cast::<_, i32x8>(z) >> 23));
1281
1282 let overflow = cast::<_, f32x8>(ej.cmp_gt(i32x8::splat(0x0FF)))
1283 | (ee.cmp_gt(f32x8::splat(300.0)));
1284 let underflow = cast::<_, f32x8>(ej.cmp_lt(i32x8::splat(0x000)))
1285 | (ee.cmp_lt(f32x8::splat(-300.0)));
1286
1287 let z = cast::<_, f32x8>(cast::<_, i32x8>(z) + (ei << 23));
1289 let z = underflow.blend(f32x8::ZERO, z);
1291 let z = overflow.blend(Self::infinity(), z);
1292
1293 let x_zero = self.is_zero_or_subnormal();
1295 let z = x_zero.blend(
1296 y.cmp_lt(f32x8::ZERO).blend(
1297 Self::infinity(),
1298 y.cmp_eq(f32x8::ZERO).blend(f32x8::ONE, f32x8::ZERO),
1299 ),
1300 z,
1301 );
1302
1303 let x_sign = self.sign_bit();
1304 let z = if x_sign.any() {
1305 let yi = y.cmp_eq(y.round());
1307
1308 let y_odd = cast::<_, i32x8>(y.round_int() << 31).round_float();
1310
1311 let z1 =
1312 yi.blend(z | y_odd, self.cmp_eq(Self::ZERO).blend(z, Self::nan_pow()));
1313
1314 x_sign.blend(z1, z)
1315 } else {
1316 z
1317 };
1318
1319 let x_finite = self.is_finite();
1320 let y_finite = y.is_finite();
1321 let e_finite = ee.is_finite();
1322 if (x_finite & y_finite & (e_finite | x_zero)).all() {
1323 return z;
1324 }
1325
1326 (self.is_nan() | y.is_nan()).blend(self + y, z)
1327 }
1328 #[inline]
1329 pub fn powf(self, y: f32) -> Self {
1330 Self::pow_f32x8(self, f32x8::splat(y))
1331 }
1332
1333 #[must_use]
1335 #[inline]
1336 pub fn transpose(data: [f32x8; 8]) -> [f32x8; 8] {
1337 pick! {
1338 if #[cfg(target_feature="avx")] {
1339 let a0 = unpack_lo_m256(data[0].avx, data[1].avx);
1340 let a1 = unpack_hi_m256(data[0].avx, data[1].avx);
1341 let a2 = unpack_lo_m256(data[2].avx, data[3].avx);
1342 let a3 = unpack_hi_m256(data[2].avx, data[3].avx);
1343 let a4 = unpack_lo_m256(data[4].avx, data[5].avx);
1344 let a5 = unpack_hi_m256(data[4].avx, data[5].avx);
1345 let a6 = unpack_lo_m256(data[6].avx, data[7].avx);
1346 let a7 = unpack_hi_m256(data[6].avx, data[7].avx);
1347
1348 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
1349 (z << 6) | (y << 4) | (x << 2) | w
1350 }
1351
1352 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
1353 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
1354
1355 let b0 = shuffle_m256::<SHUFF_LO>(a0,a2);
1358 let b1 = shuffle_m256::<SHUFF_HI>(a0,a2);
1359 let b2 = shuffle_m256::<SHUFF_LO>(a1,a3);
1360 let b3 = shuffle_m256::<SHUFF_HI>(a1,a3);
1361 let b4 = shuffle_m256::<SHUFF_LO>(a4,a6);
1362 let b5 = shuffle_m256::<SHUFF_HI>(a4,a6);
1363 let b6 = shuffle_m256::<SHUFF_LO>(a5,a7);
1364 let b7 = shuffle_m256::<SHUFF_HI>(a5,a7);
1365
1366 [
1367 f32x8 { avx: permute2z_m256::<0x20>(b0, b4) },
1368 f32x8 { avx: permute2z_m256::<0x20>(b1, b5) },
1369 f32x8 { avx: permute2z_m256::<0x20>(b2, b6) },
1370 f32x8 { avx: permute2z_m256::<0x20>(b3, b7) },
1371 f32x8 { avx: permute2z_m256::<0x31>(b0, b4) },
1372 f32x8 { avx: permute2z_m256::<0x31>(b1, b5) },
1373 f32x8 { avx: permute2z_m256::<0x31>(b2, b6) },
1374 f32x8 { avx: permute2z_m256::<0x31>(b3, b7) }
1375 ]
1376 } else {
1377 #[inline(always)]
1380 fn transpose_column(data: &[f32x8; 8], index: usize) -> f32x8 {
1381 f32x8::new([
1382 data[0].as_array_ref()[index],
1383 data[1].as_array_ref()[index],
1384 data[2].as_array_ref()[index],
1385 data[3].as_array_ref()[index],
1386 data[4].as_array_ref()[index],
1387 data[5].as_array_ref()[index],
1388 data[6].as_array_ref()[index],
1389 data[7].as_array_ref()[index],
1390 ])
1391 }
1392
1393 [
1394 transpose_column(&data, 0),
1395 transpose_column(&data, 1),
1396 transpose_column(&data, 2),
1397 transpose_column(&data, 3),
1398 transpose_column(&data, 4),
1399 transpose_column(&data, 5),
1400 transpose_column(&data, 6),
1401 transpose_column(&data, 7),
1402 ]
1403 }
1404 }
1405 }
1406
1407 #[inline]
1408 pub fn to_array(self) -> [f32; 8] {
1409 cast(self)
1410 }
1411
1412 #[inline]
1413 pub fn as_array_ref(&self) -> &[f32; 8] {
1414 cast_ref(self)
1415 }
1416
1417 #[inline]
1418 pub fn as_array_mut(&mut self) -> &mut [f32; 8] {
1419 cast_mut(self)
1420 }
1421}
1422
1423impl Not for f32x8 {
1424 type Output = Self;
1425 #[inline]
1426 fn not(self) -> Self {
1427 pick! {
1428 if #[cfg(target_feature="avx")] {
1429 Self { avx: self.avx.not() }
1430 } else {
1431 Self {
1432 a : self.a.not(),
1433 b : self.b.not(),
1434 }
1435 }
1436 }
1437 }
1438}