wide/
lib.rs

1#![no_std]
2#![allow(non_camel_case_types)]
3#![warn(clippy::missing_inline_in_public_items)]
4#![allow(clippy::eq_op)]
5#![allow(clippy::excessive_precision)]
6#![allow(clippy::let_and_return)]
7#![allow(clippy::unusual_byte_groupings)]
8#![allow(clippy::misrefactored_assign_op)]
9#![cfg_attr(test, allow(clippy::approx_constant))]
10
11//! A crate to help you go wide.
12//!
13//! This crate provides SIMD-compatible data types.
14//!
15//! When possible, explicit SIMD is used with all the math operations here. As a
16//! fallback, the fact that all the lengths of a fixed length array are doing
17//! the same thing will often make LLVM notice that it should use SIMD
18//! instructions to complete the task. In the worst case, the code just becomes
19//! totally scalar (though the math is still correct, at least).
20//!
21//! ## Crate Features
22//!
23//! * `std`: This causes the feature to link to `std`.
24//!   * Currently this just improves the performance of `sqrt` when an explicit
25//!     SIMD `sqrt` isn't available.
26
27// Note(Lokathor): Due to standard library magic, the std-only methods for f32
28// and f64 will automatically be available simply by declaring this.
29#[cfg(feature = "std")]
30extern crate std;
31
32// TODO
33// Add/Sub/Mul/Div with constant
34// Shuffle left/right/by index
35
36use core::{
37  fmt::{
38    Binary, Debug, Display, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
39  },
40  ops::*,
41};
42
43#[allow(unused_imports)]
44use safe_arch::*;
45
46use bytemuck::*;
47
48#[macro_use]
49mod macros;
50
51macro_rules! pick {
52  ($(if #[cfg($($test:meta),*)] {
53      $($if_tokens:tt)*
54    })else+ else {
55      $($else_tokens:tt)*
56    }) => {
57    pick!{
58      @__forests [ ] ;
59      $( [ {$($test),*} {$($if_tokens)*} ], )*
60      [ { } {$($else_tokens)*} ],
61    }
62  };
63  (if #[cfg($($if_meta:meta),*)] {
64      $($if_tokens:tt)*
65    } $(else if #[cfg($($else_meta:meta),*)] {
66      $($else_tokens:tt)*
67    })*) => {
68    pick!{
69      @__forests [ ] ;
70      [ {$($if_meta),*} {$($if_tokens)*} ],
71      $( [ {$($else_meta),*} {$($else_tokens)*} ], )*
72    }
73  };
74  (@__forests [$($not:meta,)*];) => {
75    /* halt expansion */
76  };
77  (@__forests [$($not:meta,)*]; [{$($m:meta),*} {$($tokens:tt)*}], $($rest:tt)*) => {
78    #[cfg(all( $($m,)* not(any($($not),*)) ))]
79    pick!{ @__identity $($tokens)* }
80    pick!{ @__forests [ $($not,)* $($m,)* ] ; $($rest)* }
81  };
82  (@__identity $($tokens:tt)*) => {
83    $($tokens)*
84  };
85}
86
87// TODO: make these generic over `mul_add`? Worth it?
88
89macro_rules! polynomial_2 {
90  ($x:expr, $c0:expr, $c1:expr, $c2:expr $(,)?) => {{
91    let x = $x;
92    let x2 = x * x;
93    x2.mul_add($c2, x.mul_add($c1, $c0))
94  }};
95}
96
97macro_rules! polynomial_3 {
98  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr $(,)?) => {{
99    let x = $x;
100    let x2 = x * x;
101    $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0))
102  }};
103}
104
105macro_rules! polynomial_4 {
106  ($x:expr, $c0:expr, $c1:expr, $c2:expr ,$c3:expr, $c4:expr $(,)?) => {{
107    let x = $x;
108    let x2 = x * x;
109    let x4 = x2 * x2;
110    $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0)) + $c4 * x4
111  }};
112}
113
114macro_rules! polynomial_5 {
115  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
116    let x = $x;
117    let x2 = x * x;
118    let x4 = x2 * x2;
119    $c3
120      .mul_add(x, $c2)
121      .mul_add(x2, $c5.mul_add(x, $c4).mul_add(x4, $c1.mul_add(x, $c0)))
122  }};
123}
124
125macro_rules! polynomial_5n {
126  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr $(,)?) => {{
127    let x = $x;
128    let x2 = x * x;
129    let x4 = x2 * x2;
130    x2.mul_add(x.mul_add($c3, $c2), (x4.mul_add($c4 + x, x.mul_add($c1, $c0))))
131  }};
132}
133
134macro_rules! polynomial_6 {
135  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr ,$c6:expr $(,)?) => {{
136    let x = $x;
137    let x2 = x * x;
138    let x4 = x2 * x2;
139    x4.mul_add(
140      x2.mul_add($c6, x.mul_add($c5, $c4)),
141      x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
142    )
143  }};
144}
145
146macro_rules! polynomial_6n {
147  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
148    let x = $x;
149    let x2 = x * x;
150    let x4 = x2 * x2;
151    x4.mul_add(
152      x.mul_add($c5, x2 + $c4),
153      x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
154    )
155  }};
156}
157
158macro_rules! polynomial_8 {
159  ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr,  $c6:expr, $c7:expr, $c8:expr $(,)?) => {{
160    let x = $x;
161    let x2 = x * x;
162    let x4 = x2 * x2;
163    let x8 = x4 * x4;
164    x4.mul_add(
165      x2.mul_add($c7.mul_add(x, $c6), x.mul_add($c5, $c4)),
166      x8.mul_add($c8, x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0))),
167    )
168  }};
169}
170
171macro_rules! polynomial_13 {
172  // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
173  ($x:expr,  $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr  $(,)?) => {{
174    let x = $x;
175    let x2 = x * x;
176    let x4 = x2 * x2;
177    let x8 = x4 * x4;
178    x8.mul_add(
179      x4.mul_add(
180        x.mul_add($c13, $c12),
181        x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
182      ),
183      x4.mul_add(
184        x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
185        x2.mul_add(x.mul_add($c3, $c2), x),
186      ),
187    )
188  }};
189}
190
191macro_rules! polynomial_13m {
192  // return  ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 +
193  // (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
194  ($x:expr,  $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr  $(,)?) => {{
195    let x = $x;
196    let x2 = x * x;
197    let x4 = x2 * x2;
198    let x8 = x4 * x4;
199
200    x8.mul_add(
201      x4.mul_add(
202        x.mul_add($c13, $c12),
203        x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
204      ),
205      x4.mul_add(
206        x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
207        x2.mul_add(x.mul_add($c3, $c2), x),
208      ),
209    )
210  }};
211}
212
213mod f32x8_;
214pub use f32x8_::*;
215
216mod f32x4_;
217pub use f32x4_::*;
218
219mod f64x4_;
220pub use f64x4_::*;
221
222mod f64x2_;
223pub use f64x2_::*;
224
225mod i8x16_;
226pub use i8x16_::*;
227
228mod i16x16_;
229pub use i16x16_::*;
230
231mod i8x32_;
232pub use i8x32_::*;
233
234mod i16x8_;
235pub use i16x8_::*;
236
237mod i32x4_;
238pub use i32x4_::*;
239
240mod i32x8_;
241pub use i32x8_::*;
242
243mod i64x2_;
244pub use i64x2_::*;
245
246mod i64x4_;
247pub use i64x4_::*;
248
249mod u8x16_;
250pub use u8x16_::*;
251
252mod u16x8_;
253pub use u16x8_::*;
254
255mod u32x4_;
256pub use u32x4_::*;
257
258mod u32x8_;
259pub use u32x8_::*;
260
261mod u64x2_;
262pub use u64x2_::*;
263
264mod u64x4_;
265pub use u64x4_::*;
266
267#[allow(non_camel_case_types)]
268#[repr(C, align(16))]
269#[rustfmt::skip]
270union ConstUnionHack128bit {
271  f32a4: [f32; 4],
272  f64a2: [f64; 2],
273  i8a16: [i8; 16],
274  i16a8: [i16; 8],
275  i32a4: [i32; 4],
276  i64a2: [i64; 2],
277  u8a16: [u8; 16],
278  u16a8: [u16; 8],
279  u32a4: [u32; 4],
280  u64a2: [u64; 2],
281  f32x4: f32x4,
282  f64x2: f64x2,
283  i8x16: i8x16,
284  i16x8: i16x8,
285  i32x4: i32x4,
286  i64x2: i64x2,
287  u8x16: u8x16,
288  u16x8: u16x8,
289  u32x4: u32x4,
290  u64x2: u64x2,
291  u128:  u128,
292}
293
294#[allow(non_camel_case_types)]
295#[repr(C, align(16))]
296#[rustfmt::skip]
297union ConstUnionHack256bit {
298  f32a8:  [f32; 8],
299  f64a4:  [f64; 4],
300  i8a32:  [i8; 32],
301  i16a16: [i16; 16],
302  i32a8:  [i32; 8],
303  i64a4:  [i64; 4],
304  u8a32:  [u8; 32],
305  u16a16: [u16; 16],
306  u32a8:  [u32; 8],
307  u64a4:  [u64; 4],
308  u128x2: [u128; 2],
309  f32x8:  f32x8,
310  f64x4:  f64x4,
311  i8x32:  i8x32,
312  i16x16: i16x16,
313  i32x8:  i32x8,
314  i64x4:  i64x4,
315  // u8x32:  u8x32,
316  // u16x16: u16x16,
317  u32x8:  u32x8,
318  u64x4:  u64x4,
319}
320
321#[allow(dead_code)]
322fn generic_bit_blend<T>(mask: T, y: T, n: T) -> T
323where
324  T: Copy + BitXor<Output = T> + BitAnd<Output = T>,
325{
326  n ^ ((n ^ y) & mask)
327}
328
329/// given `type.op(type)` and type is Copy, impls `type.op(&type)`
330macro_rules! bulk_impl_op_ref_self_for {
331  ($(($op:ident, $method:ident) => [$($t:ty),+]),+ $(,)?) => {
332    $( // do each trait/list matching given
333      $( // do the current trait for each type in its list.
334        impl $op<&Self> for $t {
335          type Output = Self;
336          #[inline]
337          #[must_use]
338          fn $method(self, rhs: &Self) -> Self::Output {
339            self.$method(*rhs)
340          }
341        }
342      )+
343    )+
344  };
345}
346
347bulk_impl_op_ref_self_for! {
348  (Add, add) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
349  (Sub, sub) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
350  (Mul, mul) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
351  (Div, div) => [f32x8, f32x4, f64x4, f64x2],
352  (BitAnd, bitand) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
353  (BitOr, bitor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
354  (BitXor, bitxor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
355}
356
357/// given `type.op(rhs)` and type is Copy, impls `type.op_assign(rhs)`
358macro_rules! bulk_impl_op_assign_for {
359  ($(($op:ident<$rhs:ty>, $method:ident, $method_assign:ident) => [$($t:ty),+]),+ $(,)?) => {
360    $( // do each trait/list matching given
361      $( // do the current trait for each type in its list.
362        impl $op<$rhs> for $t {
363          #[inline]
364          fn $method_assign(&mut self, rhs: $rhs) {
365            *self = self.$method(rhs);
366          }
367        }
368      )+
369    )+
370  };
371}
372
373// Note: remember to update bulk_impl_op_ref_self_for first or this will give
374// weird errors!
375bulk_impl_op_assign_for! {
376  (AddAssign<Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
377  (AddAssign<&Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
378  (SubAssign<Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
379  (SubAssign<&Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
380  (MulAssign<Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
381  (MulAssign<&Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
382  (DivAssign<Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
383  (DivAssign<&Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
384  (BitAndAssign<Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
385  (BitAndAssign<&Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
386  (BitOrAssign<Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
387  (BitOrAssign<&Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
388  (BitXorAssign<Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
389  (BitXorAssign<&Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
390}
391
392macro_rules! impl_simple_neg {
393  ($($t:ty),+ $(,)?) => {
394    $(
395      impl Neg for $t {
396        type Output = Self;
397        #[inline]
398        #[must_use]
399        fn neg(self) -> Self::Output {
400          Self::default() - self
401        }
402      }
403      impl Neg for &'_ $t {
404        type Output = $t;
405        #[inline]
406        #[must_use]
407        fn neg(self) -> Self::Output {
408          <$t>::default() - *self
409        }
410      }
411    )+
412  };
413}
414
415impl_simple_neg! {
416  f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
417}
418
419macro_rules! impl_simple_not {
420  ($($t:ty),+ $(,)?) => {
421    $(
422      impl Not for $t {
423        type Output = Self;
424        #[inline]
425        #[must_use]
426        fn not(self) -> Self::Output {
427          self ^ cast::<u128, $t>(u128::MAX)
428        }
429      }
430      impl Not for &'_ $t {
431        type Output = $t;
432        #[inline]
433        #[must_use]
434        fn not(self) -> Self::Output {
435          *self ^ cast::<u128, $t>(u128::MAX)
436        }
437      }
438    )+
439  };
440}
441
442impl_simple_not! {
443  f32x4, i8x32, i8x16, i16x8, i16x16, i32x4, i64x2, u8x16, u16x8, u32x4, u64x2,
444}
445
446macro_rules! impl_simple_sum {
447  ($($t:ty),+ $(,)?) => {
448    $(
449      impl<RHS> core::iter::Sum<RHS> for $t where $t: AddAssign<RHS> {
450        #[inline]
451        fn sum<I: Iterator<Item = RHS>>(iter: I) -> Self {
452          let mut total = Self::zeroed();
453          for val in iter {
454            total += val;
455          }
456          total
457        }
458      }
459    )+
460  };
461}
462
463impl_simple_sum! {
464  f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
465}
466
467macro_rules! impl_floating_product {
468  ($($t:ty),+ $(,)?) => {
469    $(
470      impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
471        #[inline]
472        fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
473          let mut total = Self::from(1.0);
474          for val in iter {
475            total *= val;
476          }
477          total
478        }
479      }
480    )+
481  };
482}
483
484impl_floating_product! {
485  f32x8, f32x4, f64x4, f64x2
486}
487
488macro_rules! impl_integer_product {
489  ($($t:ty),+ $(,)?) => {
490    $(
491      impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
492        #[inline]
493        fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
494          let mut total = Self::from(1);
495          for val in iter {
496            total *= val;
497          }
498          total
499        }
500      }
501    )+
502  };
503}
504
505impl_integer_product! {
506  i16x8, i32x4, i32x8,
507}
508
509/// impls `From<a> for b` by just calling `cast`
510macro_rules! impl_from_a_for_b_with_cast {
511  ($(($arr:ty, $simd:ty)),+  $(,)?) => {
512    $(impl From<$arr> for $simd {
513      #[inline]
514      #[must_use]
515      fn from(arr: $arr) -> Self {
516        cast(arr)
517      }
518    }
519    impl From<$simd> for $arr {
520      #[inline]
521      #[must_use]
522      fn from(simd: $simd) -> Self {
523        cast(simd)
524      }
525    })+
526  };
527}
528
529impl_from_a_for_b_with_cast! {
530  ([f32;8], f32x8),
531  ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
532  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
533  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
534}
535
536macro_rules! impl_from_single_value {
537  ($(([$elem:ty;$len:expr], $simd:ty)),+  $(,)?) => {
538    $(impl From<$elem> for $simd {
539      /// Splats the single value given across all lanes.
540      #[inline]
541      #[must_use]
542      fn from(elem: $elem) -> Self {
543        cast([elem; $len])
544      }
545    }
546    impl $simd {
547      #[inline]
548      #[must_use]
549      pub fn splat(elem: $elem) -> $simd {
550        cast([elem; $len])
551      }
552    })+
553  };
554}
555
556impl_from_single_value! {
557  ([f32;8], f32x8),
558  ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
559  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
560  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
561}
562
563/// formatter => [(arr, simd)+],+
564macro_rules! impl_formatter_for {
565  ($($trait:ident => [$(($arr:ty, $simd:ty)),+]),+ $(,)?) => {
566    $( // do per trait
567      $( // do per simd type
568        impl $trait for $simd {
569          #[allow(clippy::missing_inline_in_public_items)]
570          fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
571            let a: $arr = cast(*self);
572            write!(f, "(")?;
573            for (x, a_ref) in a.iter().enumerate() {
574              if x > 0 {
575                write!(f, ", ")?;
576              }
577              $trait::fmt(a_ref, f)?;
578            }
579            write!(f, ")")
580          }
581        }
582      )+
583    )+
584  }
585}
586
587impl_formatter_for! {
588  Binary => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
589  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
590  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
591  Debug => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
592  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
593  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
594  Display => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
595  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
596  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
597  LowerExp => [([f32;8], f32x8), ([f32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
598  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
599  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
600  LowerHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
601  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
602  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
603  Octal => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
604  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
605  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
606  UpperExp => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
607  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
608  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
609  UpperHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
610  ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
611  ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
612}
613
614// With const generics this could be simplified I hope
615macro_rules! from_array {
616  ($ty:ty,$dst:ty,$dst_wide:ident,32) => {
617    impl From<&[$ty]> for $dst_wide {
618      #[inline]
619      fn from(src: &[$ty]) -> $dst_wide {
620        match src.len() {
621          32 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst, src[31] as $dst,]),
622          31 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst,0 as $dst,]),
623          30 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst,0 as $dst,0 as $dst,]),
624          29 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
625          28 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
626          27 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
627          26 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
628          25 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
629          24 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
630          23 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
631          22 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
632          21 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
633          20 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
634          19 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
635          18 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
636          17 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
637          16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
638          15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
639          14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
640          13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
641          12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
642          11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
643          10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
644          9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
645          8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
646          7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
647          6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
648          5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
649          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
650          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
651          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
652          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
653          _ => panic!(
654            "Converting from an array larger than what can be stored in $dst_wide"
655          ),
656        }
657      }
658    }
659  };
660  ($ty:ty,$dst:ty,$dst_wide:ident,16) => {
661    impl From<&[$ty]> for $dst_wide {
662      #[inline]
663      fn from(src: &[$ty]) -> $dst_wide {
664        match src.len() {
665          16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,]),
666          15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,]),
667          14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,]),
668          13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
669          12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
670          11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
671          10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
672          9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
673          8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
674          7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
675          6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
676          5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
677          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
678          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
679          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
680          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
681          _ => panic!(
682            "Converting from an array larger than what can be stored in $dst_wide"
683          ),
684        }
685      }
686    }
687  };
688  ($ty:ty,$dst:ty,$dst_wide:ident,8) => {
689    impl From<&[$ty]> for $dst_wide {
690      #[inline]
691      fn from(src: &[$ty]) -> $dst_wide {
692        match src.len() {
693          8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,]),
694          7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,]),
695          6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,]),
696          5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
697          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
698          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
699          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
700          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
701          0 => $dst_wide::from([0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
702          _ => panic!(
703            "Converting from an array larger than what can be stored in $dst_wide"
704          ),
705        }
706      }
707    }
708  };
709  ($ty:ty,$dst:ty,$dst_wide:ident,4) => {
710    impl From<&[$ty]> for $dst_wide {
711      #[inline]
712      fn from(src: &[$ty]) -> $dst_wide {
713        match src.len() {
714          4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,]),
715          3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,]),
716          2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,]),
717          1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
718          _ => panic!(
719            "Converting from an array larger than what can be stored in $dst_wide"
720          ),
721        }
722      }
723    }
724  };
725}
726
727from_array!(i8, i8, i8x32, 32);
728from_array!(i8, i8, i8x16, 16);
729from_array!(i8, i32, i32x8, 8);
730from_array!(u8, u8, u8x16, 16);
731from_array!(i16, i16, i16x16, 16);
732from_array!(i32, i32, i32x8, 8);
733from_array!(f32, f32, f32x8, 8);
734from_array!(f32, f32, f32x4, 4);
735from_array!(f64, f64, f64x4, 4);
736from_array!(u64, u64, u64x4, 4);
737from_array!(i64, i64, i64x4, 4);
738
739#[allow(unused)]
740fn software_sqrt(x: f64) -> f64 {
741  use core::num::Wrapping;
742  type wu32 = Wrapping<u32>;
743  const fn w(u: u32) -> wu32 {
744    Wrapping(u)
745  }
746  let mut z: f64;
747  let sign: wu32 = w(0x80000000);
748  let mut ix0: i32;
749  let mut s0: i32;
750  let mut q: i32;
751  let mut m: i32;
752  let mut t: i32;
753  let mut i: i32;
754  let mut r: wu32;
755  let mut t1: wu32;
756  let mut s1: wu32;
757  let mut ix1: wu32;
758  let mut q1: wu32;
759  // extract data
760
761  pick! {
762    if #[cfg(target_endian = "little")]
763    {
764      let [low, high]: [u32; 2] = cast(x);
765      ix0 = high as i32;
766      ix1 = w(low);
767    }
768    else
769    {
770      let [high, low]: [u32; 2] = cast(x);
771      ix0 = high as i32;
772      ix1 = w(low);
773    }
774  }
775
776  // inf and nan
777  {
778    if x.is_nan() {
779      return f64::NAN;
780    }
781    if ix0 & 0x7ff00000 == 0x7ff00000 {
782      return x * x + x;
783    }
784  }
785  // handle zero
786  {
787    if ix0 <= 0 {
788      if ((ix0 & (!sign).0 as i32) | (ix1.0 as i32)) == 0 {
789        return x;
790      } else if ix0 < 0 {
791        return (x - x) / (x - x);
792      }
793    }
794  }
795  // normalize
796  {
797    m = ix0 >> 20;
798    if m == 0 {
799      // subnormal
800      while ix0 == 0 {
801        m -= 21;
802        ix0 |= (ix1 >> 11).0 as i32;
803        ix1 <<= 21;
804      }
805      i = 0;
806      while ix0 & 0x00100000 == 0 {
807        ix0 <<= 1;
808        i += 1;
809      }
810      m -= i - 1;
811      ix0 |= (ix1.0 >> (31 - i)) as i32;
812      ix1 <<= i as usize;
813    }
814    // un-bias exponent
815    m -= 1023;
816    ix0 = (ix0 & 0x000fffff) | 0x00100000;
817    if (m & 1) != 0 {
818      // odd m, double the input to make it even
819      ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
820      ix1 += ix1;
821    }
822    m >>= 1;
823  }
824  // generate sqrt bit by bit
825  {
826    ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
827    ix1 += ix1;
828    // q and q1 store the sqrt(x);
829    q = 0;
830    q1 = w(0);
831    s0 = 0;
832    s1 = w(0);
833    // our bit that moves from right to left
834    r = w(0x00200000);
835    while r != w(0) {
836      t = s0 + (r.0 as i32);
837      if t <= ix0 {
838        s0 = t + (r.0 as i32);
839        ix0 -= t;
840        q += (r.0 as i32);
841      }
842      ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
843      ix1 += ix1;
844      r >>= 1;
845    }
846    r = sign;
847    while r != w(0) {
848      t1 = s1 + r;
849      t = s0;
850      if (t < ix0) || ((t == ix0) && (t1 <= ix1)) {
851        s1 = t1 + r;
852        if t1 & sign == sign && (s1 & sign) == w(0) {
853          s0 += 1;
854        }
855        ix0 -= t;
856        if ix1 < t1 {
857          ix0 -= 1;
858        }
859        ix1 -= t1;
860        q1 += r;
861      }
862      ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
863      ix1 += ix1;
864      r >>= 1;
865    }
866  }
867  // use floating add to find out rounding direction
868  {
869    if ix0 | (ix1.0 as i32) != 0 {
870      z = 1.0 - 1.0e-300;
871      if z >= 1.0 {
872        z = 1.0 + 1.0e-300;
873        if q1 == w(0xffffffff) {
874          q1 = w(0);
875          q += 1;
876        } else if z > 1.0 {
877          if q1 == w(0xfffffffe) {
878            q += 1;
879          }
880          q1 += w(2);
881        } else {
882          q1 += q1 & w(1);
883        }
884      }
885    }
886  }
887  // finish up
888  ix0 = (q >> 1) + 0x3fe00000;
889  ix1 = q1 >> 1;
890  if q & 1 == 1 {
891    ix1 |= sign;
892  }
893  ix0 += m << 20;
894
895  pick! {
896    if #[cfg(target_endian = "little")]
897    {
898      cast::<[u32; 2], f64>([ix1.0, ix0 as u32])
899    }
900    else
901    {
902      cast::<[u32; 2], f64>([ix0 as u32, ix1.0])
903    }
904  }
905}
906
907#[test]
908fn test_software_sqrt() {
909  assert!(software_sqrt(f64::NAN).is_nan());
910  assert_eq!(software_sqrt(f64::INFINITY), f64::INFINITY);
911  assert_eq!(software_sqrt(0.0), 0.0);
912  assert_eq!(software_sqrt(-0.0), -0.0);
913  assert!(software_sqrt(-1.0).is_nan());
914  assert!(software_sqrt(f64::NEG_INFINITY).is_nan());
915  assert_eq!(software_sqrt(4.0), 2.0);
916  assert_eq!(software_sqrt(9.0), 3.0);
917  assert_eq!(software_sqrt(16.0), 4.0);
918  assert_eq!(software_sqrt(25.0), 5.0);
919  assert_eq!(software_sqrt(5000.0 * 5000.0), 5000.0);
920}
921
922pub trait CmpEq<Rhs = Self> {
923  type Output;
924  fn cmp_eq(self, rhs: Rhs) -> Self::Output;
925}
926
927pub trait CmpGt<Rhs = Self> {
928  type Output;
929  fn cmp_gt(self, rhs: Rhs) -> Self::Output;
930}
931
932pub trait CmpGe<Rhs = Self> {
933  type Output;
934  fn cmp_ge(self, rhs: Rhs) -> Self::Output;
935}
936
937pub trait CmpNe<Rhs = Self> {
938  type Output;
939  fn cmp_ne(self, rhs: Rhs) -> Self::Output;
940}
941
942pub trait CmpLt<Rhs = Self> {
943  type Output;
944  fn cmp_lt(self, rhs: Rhs) -> Self::Output;
945}
946
947pub trait CmpLe<Rhs = Self> {
948  type Output;
949  fn cmp_le(self, rhs: Rhs) -> Self::Output;
950}
951
952macro_rules! bulk_impl_const_rhs_op {
953  (($op:ident,$method:ident) => [$(($lhs:ty,$rhs:ty),)+]) => {
954    $(
955    impl $op<$rhs> for $lhs {
956      type Output = Self;
957      #[inline]
958      #[must_use]
959      fn $method(self, rhs: $rhs) -> Self::Output {
960        self.$method(<$lhs>::splat(rhs))
961      }
962    }
963    )+
964  };
965}
966
967bulk_impl_const_rhs_op!((CmpEq, cmp_eq) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
968bulk_impl_const_rhs_op!((CmpLt, cmp_lt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
969bulk_impl_const_rhs_op!((CmpGt, cmp_gt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
970bulk_impl_const_rhs_op!((CmpNe, cmp_ne) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
971bulk_impl_const_rhs_op!((CmpLe, cmp_le) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
972bulk_impl_const_rhs_op!((CmpGe, cmp_ge) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);