wide/
i8x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct i8x16 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct i8x16 { pub(crate) simd: v128 }
14
15    impl Default for i8x16 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for i8x16 {
22      fn eq(&self, other: &Self) -> bool {
23        u8x16_all_true(i8x16_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for i8x16 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct i8x16 { pub(crate) neon : int8x16_t }
33
34    impl Default for i8x16 {
35      #[inline]
36      #[must_use]
37      fn default() -> Self {
38        Self::splat(0)
39      }
40    }
41
42    impl PartialEq for i8x16 {
43      #[inline]
44      #[must_use]
45      fn eq(&self, other: &Self) -> bool {
46        unsafe { vminvq_u8(vceqq_s8(self.neon, other.neon))==u8::MAX }
47      }
48    }
49
50    impl Eq for i8x16 { }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq, Eq)]
53    #[repr(C, align(16))]
54    pub struct i8x16 { arr: [i8;16] }
55  }
56}
57
58int_uint_consts!(i8, 16, i8x16, i8x16, i8a16, const_i8_as_i8x16, 128);
59
60unsafe impl Zeroable for i8x16 {}
61unsafe impl Pod for i8x16 {}
62
63impl Add for i8x16 {
64  type Output = Self;
65  #[inline]
66  #[must_use]
67  fn add(self, rhs: Self) -> Self::Output {
68    pick! {
69      if #[cfg(target_feature="sse2")] {
70        Self { sse: add_i8_m128i(self.sse, rhs.sse) }
71      } else if #[cfg(target_feature="simd128")] {
72        Self { simd: i8x16_add(self.simd, rhs.simd) }
73      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74        unsafe { Self { neon: vaddq_s8(self.neon, rhs.neon) } }
75      } else {
76        Self { arr: [
77          self.arr[0].wrapping_add(rhs.arr[0]),
78          self.arr[1].wrapping_add(rhs.arr[1]),
79          self.arr[2].wrapping_add(rhs.arr[2]),
80          self.arr[3].wrapping_add(rhs.arr[3]),
81          self.arr[4].wrapping_add(rhs.arr[4]),
82          self.arr[5].wrapping_add(rhs.arr[5]),
83          self.arr[6].wrapping_add(rhs.arr[6]),
84          self.arr[7].wrapping_add(rhs.arr[7]),
85          self.arr[8].wrapping_add(rhs.arr[8]),
86          self.arr[9].wrapping_add(rhs.arr[9]),
87          self.arr[10].wrapping_add(rhs.arr[10]),
88          self.arr[11].wrapping_add(rhs.arr[11]),
89          self.arr[12].wrapping_add(rhs.arr[12]),
90          self.arr[13].wrapping_add(rhs.arr[13]),
91          self.arr[14].wrapping_add(rhs.arr[14]),
92          self.arr[15].wrapping_add(rhs.arr[15]),
93        ]}
94      }
95    }
96  }
97}
98
99impl Sub for i8x16 {
100  type Output = Self;
101  #[inline]
102  #[must_use]
103  fn sub(self, rhs: Self) -> Self::Output {
104    pick! {
105      if #[cfg(target_feature="sse2")] {
106        Self { sse: sub_i8_m128i(self.sse, rhs.sse) }
107      } else if #[cfg(target_feature="simd128")] {
108        Self { simd: i8x16_sub(self.simd, rhs.simd) }
109      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
110        unsafe {Self { neon: vsubq_s8(self.neon, rhs.neon) }}
111      } else {
112        Self { arr: [
113          self.arr[0].wrapping_sub(rhs.arr[0]),
114          self.arr[1].wrapping_sub(rhs.arr[1]),
115          self.arr[2].wrapping_sub(rhs.arr[2]),
116          self.arr[3].wrapping_sub(rhs.arr[3]),
117          self.arr[4].wrapping_sub(rhs.arr[4]),
118          self.arr[5].wrapping_sub(rhs.arr[5]),
119          self.arr[6].wrapping_sub(rhs.arr[6]),
120          self.arr[7].wrapping_sub(rhs.arr[7]),
121          self.arr[8].wrapping_sub(rhs.arr[8]),
122          self.arr[9].wrapping_sub(rhs.arr[9]),
123          self.arr[10].wrapping_sub(rhs.arr[10]),
124          self.arr[11].wrapping_sub(rhs.arr[11]),
125          self.arr[12].wrapping_sub(rhs.arr[12]),
126          self.arr[13].wrapping_sub(rhs.arr[13]),
127          self.arr[14].wrapping_sub(rhs.arr[14]),
128          self.arr[15].wrapping_sub(rhs.arr[15]),
129        ]}
130      }
131    }
132  }
133}
134
135impl Add<i8> for i8x16 {
136  type Output = Self;
137  #[inline]
138  #[must_use]
139  fn add(self, rhs: i8) -> Self::Output {
140    self.add(Self::splat(rhs))
141  }
142}
143
144impl Sub<i8> for i8x16 {
145  type Output = Self;
146  #[inline]
147  #[must_use]
148  fn sub(self, rhs: i8) -> Self::Output {
149    self.sub(Self::splat(rhs))
150  }
151}
152
153impl Add<i8x16> for i8 {
154  type Output = i8x16;
155  #[inline]
156  #[must_use]
157  fn add(self, rhs: i8x16) -> Self::Output {
158    i8x16::splat(self).add(rhs)
159  }
160}
161
162impl Sub<i8x16> for i8 {
163  type Output = i8x16;
164  #[inline]
165  #[must_use]
166  fn sub(self, rhs: i8x16) -> Self::Output {
167    i8x16::splat(self).sub(rhs)
168  }
169}
170
171impl BitAnd for i8x16 {
172  type Output = Self;
173  #[inline]
174  #[must_use]
175  fn bitand(self, rhs: Self) -> Self::Output {
176    pick! {
177      if #[cfg(target_feature="sse2")] {
178        Self { sse: bitand_m128i(self.sse, rhs.sse) }
179      } else if #[cfg(target_feature="simd128")] {
180        Self { simd: v128_and(self.simd, rhs.simd) }
181      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
182        unsafe {Self { neon: vandq_s8(self.neon, rhs.neon) }}
183      } else {
184        Self { arr: [
185          self.arr[0].bitand(rhs.arr[0]),
186          self.arr[1].bitand(rhs.arr[1]),
187          self.arr[2].bitand(rhs.arr[2]),
188          self.arr[3].bitand(rhs.arr[3]),
189          self.arr[4].bitand(rhs.arr[4]),
190          self.arr[5].bitand(rhs.arr[5]),
191          self.arr[6].bitand(rhs.arr[6]),
192          self.arr[7].bitand(rhs.arr[7]),
193          self.arr[8].bitand(rhs.arr[8]),
194          self.arr[9].bitand(rhs.arr[9]),
195          self.arr[10].bitand(rhs.arr[10]),
196          self.arr[11].bitand(rhs.arr[11]),
197          self.arr[12].bitand(rhs.arr[12]),
198          self.arr[13].bitand(rhs.arr[13]),
199          self.arr[14].bitand(rhs.arr[14]),
200          self.arr[15].bitand(rhs.arr[15]),
201        ]}
202      }
203    }
204  }
205}
206
207impl BitOr for i8x16 {
208  type Output = Self;
209  #[inline]
210  #[must_use]
211  fn bitor(self, rhs: Self) -> Self::Output {
212    pick! {
213      if #[cfg(target_feature="sse2")] {
214        Self { sse: bitor_m128i(self.sse, rhs.sse) }
215      } else if #[cfg(target_feature="simd128")] {
216        Self { simd: v128_or(self.simd, rhs.simd) }
217      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
218        unsafe {Self { neon: vorrq_s8(self.neon, rhs.neon) }}
219      } else {
220        Self { arr: [
221          self.arr[0].bitor(rhs.arr[0]),
222          self.arr[1].bitor(rhs.arr[1]),
223          self.arr[2].bitor(rhs.arr[2]),
224          self.arr[3].bitor(rhs.arr[3]),
225          self.arr[4].bitor(rhs.arr[4]),
226          self.arr[5].bitor(rhs.arr[5]),
227          self.arr[6].bitor(rhs.arr[6]),
228          self.arr[7].bitor(rhs.arr[7]),
229          self.arr[8].bitor(rhs.arr[8]),
230          self.arr[9].bitor(rhs.arr[9]),
231          self.arr[10].bitor(rhs.arr[10]),
232          self.arr[11].bitor(rhs.arr[11]),
233          self.arr[12].bitor(rhs.arr[12]),
234          self.arr[13].bitor(rhs.arr[13]),
235          self.arr[14].bitor(rhs.arr[14]),
236          self.arr[15].bitor(rhs.arr[15]),
237        ]}
238      }
239    }
240  }
241}
242
243impl BitXor for i8x16 {
244  type Output = Self;
245  #[inline]
246  #[must_use]
247  fn bitxor(self, rhs: Self) -> Self::Output {
248    pick! {
249      if #[cfg(target_feature="sse2")] {
250        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
251      } else if #[cfg(target_feature="simd128")] {
252        Self { simd: v128_xor(self.simd, rhs.simd) }
253      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
254        unsafe {Self { neon: veorq_s8(self.neon, rhs.neon) }}
255      } else {
256        Self { arr: [
257          self.arr[0].bitxor(rhs.arr[0]),
258          self.arr[1].bitxor(rhs.arr[1]),
259          self.arr[2].bitxor(rhs.arr[2]),
260          self.arr[3].bitxor(rhs.arr[3]),
261          self.arr[4].bitxor(rhs.arr[4]),
262          self.arr[5].bitxor(rhs.arr[5]),
263          self.arr[6].bitxor(rhs.arr[6]),
264          self.arr[7].bitxor(rhs.arr[7]),
265          self.arr[8].bitxor(rhs.arr[8]),
266          self.arr[9].bitxor(rhs.arr[9]),
267          self.arr[10].bitxor(rhs.arr[10]),
268          self.arr[11].bitxor(rhs.arr[11]),
269          self.arr[12].bitxor(rhs.arr[12]),
270          self.arr[13].bitxor(rhs.arr[13]),
271          self.arr[14].bitxor(rhs.arr[14]),
272          self.arr[15].bitxor(rhs.arr[15]),
273        ]}
274      }
275    }
276  }
277}
278
279impl CmpEq for i8x16 {
280  type Output = Self;
281  #[inline]
282  #[must_use]
283  fn cmp_eq(self, rhs: Self) -> Self::Output {
284    pick! {
285      if #[cfg(target_feature="sse2")] {
286        Self { sse: cmp_eq_mask_i8_m128i(self.sse, rhs.sse) }
287      } else if #[cfg(target_feature="simd128")] {
288        Self { simd: i8x16_eq(self.simd, rhs.simd) }
289      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
290        unsafe {Self { neon: vreinterpretq_s8_u8(vceqq_s8(self.neon, rhs.neon)) }}
291      } else {
292        Self { arr: [
293          if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
294          if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
295          if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
296          if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
297          if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
298          if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
299          if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
300          if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
301          if self.arr[8] == rhs.arr[8] { -1 } else { 0 },
302          if self.arr[9] == rhs.arr[9] { -1 } else { 0 },
303          if self.arr[10] == rhs.arr[10] { -1 } else { 0 },
304          if self.arr[11] == rhs.arr[11] { -1 } else { 0 },
305          if self.arr[12] == rhs.arr[12] { -1 } else { 0 },
306          if self.arr[13] == rhs.arr[13] { -1 } else { 0 },
307          if self.arr[14] == rhs.arr[14] { -1 } else { 0 },
308          if self.arr[15] == rhs.arr[15] { -1 } else { 0 },
309        ]}
310      }
311    }
312  }
313}
314
315impl CmpGt for i8x16 {
316  type Output = Self;
317  #[inline]
318  #[must_use]
319  fn cmp_gt(self, rhs: Self) -> Self::Output {
320    pick! {
321      if #[cfg(target_feature="sse2")] {
322        Self { sse: cmp_gt_mask_i8_m128i(self.sse, rhs.sse) }
323      } else if #[cfg(target_feature="simd128")] {
324        Self { simd: i8x16_gt(self.simd, rhs.simd) }
325      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
326        unsafe {Self { neon: vreinterpretq_s8_u8(vcgtq_s8(self.neon, rhs.neon)) }}
327      } else {
328        Self { arr: [
329          if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
330          if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
331          if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
332          if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
333          if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
334          if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
335          if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
336          if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
337          if self.arr[8] > rhs.arr[8] { -1 } else { 0 },
338          if self.arr[9] > rhs.arr[9] { -1 } else { 0 },
339          if self.arr[10] > rhs.arr[10] { -1 } else { 0 },
340          if self.arr[11] > rhs.arr[11] { -1 } else { 0 },
341          if self.arr[12] > rhs.arr[12] { -1 } else { 0 },
342          if self.arr[13] > rhs.arr[13] { -1 } else { 0 },
343          if self.arr[14] > rhs.arr[14] { -1 } else { 0 },
344          if self.arr[15] > rhs.arr[15] { -1 } else { 0 },
345        ]}
346      }
347    }
348  }
349}
350
351impl CmpLt for i8x16 {
352  type Output = Self;
353  #[inline]
354  #[must_use]
355  fn cmp_lt(self, rhs: Self) -> Self::Output {
356    pick! {
357      if #[cfg(target_feature="sse2")] {
358        Self { sse: cmp_lt_mask_i8_m128i(self.sse, rhs.sse) }
359      } else if #[cfg(target_feature="simd128")] {
360        Self { simd: i8x16_lt(self.simd, rhs.simd) }
361      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
362        unsafe {Self { neon: vreinterpretq_s8_u8(vcltq_s8(self.neon, rhs.neon)) }}
363      } else {
364        Self { arr: [
365          if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
366          if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
367          if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
368          if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
369          if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
370          if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
371          if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
372          if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
373          if self.arr[8] < rhs.arr[8] { -1 } else { 0 },
374          if self.arr[9] < rhs.arr[9] { -1 } else { 0 },
375          if self.arr[10] < rhs.arr[10] { -1 } else { 0 },
376          if self.arr[11] < rhs.arr[11] { -1 } else { 0 },
377          if self.arr[12] < rhs.arr[12] { -1 } else { 0 },
378          if self.arr[13] < rhs.arr[13] { -1 } else { 0 },
379          if self.arr[14] < rhs.arr[14] { -1 } else { 0 },
380          if self.arr[15] < rhs.arr[15] { -1 } else { 0 },
381        ]}
382      }
383    }
384  }
385}
386
387impl i8x16 {
388  #[inline]
389  #[must_use]
390  pub fn new(array: [i8; 16]) -> Self {
391    Self::from(array)
392  }
393
394  /// converts i16 to i8, saturating values that are too large
395  #[inline]
396  #[must_use]
397  pub fn from_i16x16_saturate(v: i16x16) -> i8x16 {
398    pick! {
399      if #[cfg(target_feature="avx2")] {
400        i8x16 { sse: pack_i16_to_i8_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2))  }
401      } else if #[cfg(target_feature="sse2")] {
402        i8x16 { sse: pack_i16_to_i8_m128i( v.a.sse, v.b.sse ) }
403      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
404        use core::arch::aarch64::*;
405
406        unsafe {
407          i8x16 { neon: vcombine_s8(vqmovn_s16(v.a.neon), vqmovn_s16(v.b.neon)) }
408        }
409      } else if #[cfg(target_feature="simd128")] {
410        use core::arch::wasm32::*;
411
412        i8x16 { simd: i8x16_narrow_i16x8(v.a.simd, v.b.simd) }
413      } else {
414        fn clamp(a : i16) -> i8 {
415            if a < i8::MIN as i16 {
416              i8::MIN
417            }
418            else if a > i8::MAX as i16 {
419              i8::MAX
420            } else {
421                a as i8
422            }
423        }
424
425        i8x16::new([
426          clamp(v.as_array_ref()[0]),
427          clamp(v.as_array_ref()[1]),
428          clamp(v.as_array_ref()[2]),
429          clamp(v.as_array_ref()[3]),
430          clamp(v.as_array_ref()[4]),
431          clamp(v.as_array_ref()[5]),
432          clamp(v.as_array_ref()[6]),
433          clamp(v.as_array_ref()[7]),
434          clamp(v.as_array_ref()[8]),
435          clamp(v.as_array_ref()[9]),
436          clamp(v.as_array_ref()[10]),
437          clamp(v.as_array_ref()[11]),
438          clamp(v.as_array_ref()[12]),
439          clamp(v.as_array_ref()[13]),
440          clamp(v.as_array_ref()[14]),
441          clamp(v.as_array_ref()[15]),
442        ])
443      }
444    }
445  }
446
447  /// converts i16 to i8, truncating the upper bits if they are set
448  #[inline]
449  #[must_use]
450  pub fn from_i16x16_truncate(v: i16x16) -> i8x16 {
451    pick! {
452      if #[cfg(target_feature="avx2")] {
453        let a = v.avx2.bitand(set_splat_i16_m256i(0xff));
454        i8x16 { sse: pack_i16_to_u8_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a))  }
455      } else if #[cfg(target_feature="sse2")] {
456        let mask = set_splat_i16_m128i(0xff);
457        i8x16 { sse: pack_i16_to_u8_m128i( v.a.sse.bitand(mask), v.b.sse.bitand(mask) ) }
458      } else {
459        // no super good intrinsics on other platforms... plain old codegen does a reasonable job
460        i8x16::new([
461          v.as_array_ref()[0] as i8,
462          v.as_array_ref()[1] as i8,
463          v.as_array_ref()[2] as i8,
464          v.as_array_ref()[3] as i8,
465          v.as_array_ref()[4] as i8,
466          v.as_array_ref()[5] as i8,
467          v.as_array_ref()[6] as i8,
468          v.as_array_ref()[7] as i8,
469          v.as_array_ref()[8] as i8,
470          v.as_array_ref()[9] as i8,
471          v.as_array_ref()[10] as i8,
472          v.as_array_ref()[11] as i8,
473          v.as_array_ref()[12] as i8,
474          v.as_array_ref()[13] as i8,
475          v.as_array_ref()[14] as i8,
476          v.as_array_ref()[15] as i8,
477        ])
478      }
479    }
480  }
481
482  #[inline]
483  #[must_use]
484  pub fn blend(self, t: Self, f: Self) -> Self {
485    pick! {
486      if #[cfg(target_feature="sse4.1")] {
487        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
488      } else if #[cfg(target_feature="simd128")] {
489        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
490      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
491        unsafe {Self { neon: vbslq_s8(vreinterpretq_u8_s8(self.neon), t.neon, f.neon) }}
492      } else {
493        generic_bit_blend(self, t, f)
494      }
495    }
496  }
497  #[inline]
498  #[must_use]
499  pub fn abs(self) -> Self {
500    pick! {
501      if #[cfg(target_feature="ssse3")] {
502        Self { sse: abs_i8_m128i(self.sse) }
503      } else if #[cfg(target_feature="simd128")] {
504        Self { simd: i8x16_abs(self.simd) }
505      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
506        unsafe {Self { neon: vabsq_s8(self.neon) }}
507      } else {
508        let arr: [i8; 16] = cast(self);
509        cast([
510          arr[0].wrapping_abs(),
511          arr[1].wrapping_abs(),
512          arr[2].wrapping_abs(),
513          arr[3].wrapping_abs(),
514          arr[4].wrapping_abs(),
515          arr[5].wrapping_abs(),
516          arr[6].wrapping_abs(),
517          arr[7].wrapping_abs(),
518          arr[8].wrapping_abs(),
519          arr[9].wrapping_abs(),
520          arr[10].wrapping_abs(),
521          arr[11].wrapping_abs(),
522          arr[12].wrapping_abs(),
523          arr[13].wrapping_abs(),
524          arr[14].wrapping_abs(),
525          arr[15].wrapping_abs(),
526        ])
527      }
528    }
529  }
530  #[inline]
531  #[must_use]
532  pub fn max(self, rhs: Self) -> Self {
533    pick! {
534      if #[cfg(target_feature="sse4.1")] {
535        Self { sse: max_i8_m128i(self.sse, rhs.sse) }
536      } else if #[cfg(target_feature="simd128")] {
537        Self { simd: i8x16_max(self.simd, rhs.simd) }
538      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
539        unsafe {Self { neon: vmaxq_s8(self.neon, rhs.neon) }}
540      } else {
541        self.cmp_lt(rhs).blend(rhs, self)
542      }
543    }
544  }
545  #[inline]
546  #[must_use]
547  pub fn min(self, rhs: Self) -> Self {
548    pick! {
549      if #[cfg(target_feature="sse4.1")] {
550        Self { sse: min_i8_m128i(self.sse, rhs.sse) }
551      } else if #[cfg(target_feature="simd128")] {
552        Self { simd: i8x16_min(self.simd, rhs.simd) }
553      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
554        unsafe {Self { neon: vminq_s8(self.neon, rhs.neon) }}
555      } else {
556        self.cmp_lt(rhs).blend(self, rhs)
557      }
558    }
559  }
560
561  #[inline]
562  #[must_use]
563  pub fn from_slice_unaligned(input: &[i8]) -> Self {
564    assert!(input.len() >= 16);
565
566    pick! {
567      if #[cfg(target_feature="sse2")] {
568        unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
569      } else if #[cfg(target_feature="simd128")] {
570        unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
571      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
572        unsafe { Self { neon: vld1q_s8( input.as_ptr() as *const i8 ) } }
573      } else {
574        // 2018 edition doesn't have try_into
575        unsafe { Self::new( *(input.as_ptr() as * const [i8;16]) ) }
576      }
577    }
578  }
579
580  #[inline]
581  #[must_use]
582  pub fn move_mask(self) -> i32 {
583    pick! {
584      if #[cfg(target_feature="sse2")] {
585        move_mask_i8_m128i(self.sse)
586      } else if #[cfg(target_feature="simd128")] {
587        i8x16_bitmask(self.simd) as i32
588      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
589        unsafe
590        {
591          // set all to 1 if top bit is set, else 0
592          let masked = vcltq_s8(self.neon, vdupq_n_s8(0));
593
594          // select the right bit out of each lane
595          let selectbit : uint8x16_t = core::intrinsics::transmute([1u8, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128]);
596          let out = vandq_u8(masked, selectbit);
597
598          // interleave the lanes so that a 16-bit sum accumulates the bits in the right order
599          let table : uint8x16_t = core::intrinsics::transmute([0u8, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15]);
600          let r = vqtbl1q_u8(out, table);
601
602          // horizontally add the 16-bit lanes
603          vaddvq_u16(vreinterpretq_u16_u8(r)) as i32
604        }
605       } else {
606        ((self.arr[0] < 0) as i32) << 0 |
607        ((self.arr[1] < 0) as i32) << 1 |
608        ((self.arr[2] < 0) as i32) << 2 |
609        ((self.arr[3] < 0) as i32) << 3 |
610        ((self.arr[4] < 0) as i32) << 4 |
611        ((self.arr[5] < 0) as i32) << 5 |
612        ((self.arr[6] < 0) as i32) << 6 |
613        ((self.arr[7] < 0) as i32) << 7 |
614        ((self.arr[8] < 0) as i32) << 8 |
615        ((self.arr[9] < 0) as i32) << 9 |
616        ((self.arr[10] < 0) as i32) << 10 |
617        ((self.arr[11] < 0) as i32) << 11 |
618        ((self.arr[12] < 0) as i32) << 12 |
619        ((self.arr[13] < 0) as i32) << 13 |
620        ((self.arr[14] < 0) as i32) << 14 |
621        ((self.arr[15] < 0) as i32) << 15
622      }
623    }
624  }
625
626  #[inline]
627  #[must_use]
628  pub fn any(self) -> bool {
629    pick! {
630      if #[cfg(target_feature="sse2")] {
631        move_mask_i8_m128i(self.sse) != 0
632      } else if #[cfg(target_feature="simd128")] {
633        u8x16_bitmask(self.simd) != 0
634      } else {
635        let v : [u64;2] = cast(self);
636        ((v[0] | v[1]) & 0x80808080808080) != 0
637      }
638    }
639  }
640  #[inline]
641  #[must_use]
642  pub fn all(self) -> bool {
643    pick! {
644      if #[cfg(target_feature="sse2")] {
645        move_mask_i8_m128i(self.sse) == 0b1111_1111_1111_1111
646      } else if #[cfg(target_feature="simd128")] {
647        u8x16_bitmask(self.simd) == 0b1111_1111_1111_1111
648      } else {
649        let v : [u64;2] = cast(self);
650        (v[0] & v[1] & 0x80808080808080) == 0x80808080808080
651      }
652    }
653  }
654
655  #[inline]
656  #[must_use]
657  pub fn none(self) -> bool {
658    !self.any()
659  }
660
661  #[inline]
662  #[must_use]
663  pub fn saturating_add(self, rhs: Self) -> Self {
664    pick! {
665      if #[cfg(target_feature="sse2")] {
666        Self { sse: add_saturating_i8_m128i(self.sse, rhs.sse) }
667      } else if #[cfg(target_feature="simd128")] {
668        Self { simd: i8x16_add_sat(self.simd, rhs.simd) }
669      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
670        unsafe {Self { neon: vqaddq_s8(self.neon, rhs.neon) }}
671      } else {
672        Self { arr: [
673          self.arr[0].saturating_add(rhs.arr[0]),
674          self.arr[1].saturating_add(rhs.arr[1]),
675          self.arr[2].saturating_add(rhs.arr[2]),
676          self.arr[3].saturating_add(rhs.arr[3]),
677          self.arr[4].saturating_add(rhs.arr[4]),
678          self.arr[5].saturating_add(rhs.arr[5]),
679          self.arr[6].saturating_add(rhs.arr[6]),
680          self.arr[7].saturating_add(rhs.arr[7]),
681          self.arr[8].saturating_add(rhs.arr[8]),
682          self.arr[9].saturating_add(rhs.arr[9]),
683          self.arr[10].saturating_add(rhs.arr[10]),
684          self.arr[11].saturating_add(rhs.arr[11]),
685          self.arr[12].saturating_add(rhs.arr[12]),
686          self.arr[13].saturating_add(rhs.arr[13]),
687          self.arr[14].saturating_add(rhs.arr[14]),
688          self.arr[15].saturating_add(rhs.arr[15]),
689        ]}
690      }
691    }
692  }
693  #[inline]
694  #[must_use]
695  pub fn saturating_sub(self, rhs: Self) -> Self {
696    pick! {
697      if #[cfg(target_feature="sse2")] {
698        Self { sse: sub_saturating_i8_m128i(self.sse, rhs.sse) }
699      } else if #[cfg(target_feature="simd128")] {
700        Self { simd: i8x16_sub_sat(self.simd, rhs.simd) }
701      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
702        unsafe { Self { neon: vqsubq_s8(self.neon, rhs.neon) } }
703      } else {
704        Self { arr: [
705          self.arr[0].saturating_sub(rhs.arr[0]),
706          self.arr[1].saturating_sub(rhs.arr[1]),
707          self.arr[2].saturating_sub(rhs.arr[2]),
708          self.arr[3].saturating_sub(rhs.arr[3]),
709          self.arr[4].saturating_sub(rhs.arr[4]),
710          self.arr[5].saturating_sub(rhs.arr[5]),
711          self.arr[6].saturating_sub(rhs.arr[6]),
712          self.arr[7].saturating_sub(rhs.arr[7]),
713          self.arr[8].saturating_sub(rhs.arr[8]),
714          self.arr[9].saturating_sub(rhs.arr[9]),
715          self.arr[10].saturating_sub(rhs.arr[10]),
716          self.arr[11].saturating_sub(rhs.arr[11]),
717          self.arr[12].saturating_sub(rhs.arr[12]),
718          self.arr[13].saturating_sub(rhs.arr[13]),
719          self.arr[14].saturating_sub(rhs.arr[14]),
720          self.arr[15].saturating_sub(rhs.arr[15]),
721        ]}
722      }
723    }
724  }
725
726  #[inline]
727  pub fn to_array(self) -> [i8; 16] {
728    cast(self)
729  }
730
731  #[inline]
732  pub fn as_array_ref(&self) -> &[i8; 16] {
733    cast_ref(self)
734  }
735
736  #[inline]
737  pub fn as_array_mut(&mut self) -> &mut [i8; 16] {
738    cast_mut(self)
739  }
740}