wide/
i16x16_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="avx2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(32))]
7    pub struct i16x16 { pub(crate) avx2: m256i }
8  } else {
9    #[derive(Default, Clone, Copy, PartialEq, Eq)]
10    #[repr(C, align(32))]
11    pub struct i16x16 { pub(crate) a : i16x8, pub(crate) b : i16x8 }
12  }
13}
14
15int_uint_consts!(i16, 16, i16x16, i16x16, i16a16, const_i16_as_i16x16, 256);
16
17unsafe impl Zeroable for i16x16 {}
18unsafe impl Pod for i16x16 {}
19
20impl Add for i16x16 {
21  type Output = Self;
22  #[inline]
23  #[must_use]
24  fn add(self, rhs: Self) -> Self::Output {
25    pick! {
26      if #[cfg(target_feature="avx2")] {
27        Self { avx2: add_i16_m256i(self.avx2, rhs.avx2) }
28      } else {
29        Self {
30          a : self.a.add(rhs.a),
31          b : self.b.add(rhs.b),
32        }
33      }
34    }
35  }
36}
37
38impl Sub for i16x16 {
39  type Output = Self;
40  #[inline]
41  #[must_use]
42  fn sub(self, rhs: Self) -> Self::Output {
43    pick! {
44      if #[cfg(target_feature="avx2")] {
45        Self { avx2: sub_i16_m256i(self.avx2, rhs.avx2) }
46      } else {
47        Self {
48          a : self.a.sub(rhs.a),
49          b : self.b.sub(rhs.b),
50        }
51      }
52    }
53  }
54}
55
56impl Mul for i16x16 {
57  type Output = Self;
58  #[inline]
59  #[must_use]
60  fn mul(self, rhs: Self) -> Self::Output {
61    pick! {
62      if #[cfg(target_feature="avx2")] {
63        Self { avx2: mul_i16_keep_low_m256i(self.avx2, rhs.avx2) }
64      } else {
65        Self {
66          a : self.a.mul(rhs.a),
67          b : self.b.mul(rhs.b),
68        }
69      }
70    }
71  }
72}
73
74impl Add<i16> for i16x16 {
75  type Output = Self;
76  #[inline]
77  #[must_use]
78  fn add(self, rhs: i16) -> Self::Output {
79    self.add(Self::splat(rhs))
80  }
81}
82
83impl Sub<i16> for i16x16 {
84  type Output = Self;
85  #[inline]
86  #[must_use]
87  fn sub(self, rhs: i16) -> Self::Output {
88    self.sub(Self::splat(rhs))
89  }
90}
91
92impl Mul<i16> for i16x16 {
93  type Output = Self;
94  #[inline]
95  #[must_use]
96  fn mul(self, rhs: i16) -> Self::Output {
97    self.mul(Self::splat(rhs))
98  }
99}
100
101impl Add<i16x16> for i16 {
102  type Output = i16x16;
103  #[inline]
104  #[must_use]
105  fn add(self, rhs: i16x16) -> Self::Output {
106    i16x16::splat(self).add(rhs)
107  }
108}
109
110impl Sub<i16x16> for i16 {
111  type Output = i16x16;
112  #[inline]
113  #[must_use]
114  fn sub(self, rhs: i16x16) -> Self::Output {
115    i16x16::splat(self).sub(rhs)
116  }
117}
118
119impl Mul<i16x16> for i16 {
120  type Output = i16x16;
121  #[inline]
122  #[must_use]
123  fn mul(self, rhs: i16x16) -> Self::Output {
124    i16x16::splat(self).mul(rhs)
125  }
126}
127
128impl BitAnd for i16x16 {
129  type Output = Self;
130  #[inline]
131  #[must_use]
132  fn bitand(self, rhs: Self) -> Self::Output {
133    pick! {
134      if #[cfg(target_feature="avx2")] {
135        Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
136      } else {
137        Self {
138          a : self.a.bitand(rhs.a),
139          b : self.b.bitand(rhs.b),
140        }
141      }
142    }
143  }
144}
145
146impl BitOr for i16x16 {
147  type Output = Self;
148  #[inline]
149  #[must_use]
150  fn bitor(self, rhs: Self) -> Self::Output {
151    pick! {
152      if #[cfg(target_feature="avx2")] {
153        Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
154      } else {
155        Self {
156          a : self.a.bitor(rhs.a),
157          b : self.b.bitor(rhs.b),
158        }
159      }
160    }
161  }
162}
163
164impl BitXor for i16x16 {
165  type Output = Self;
166  #[inline]
167  #[must_use]
168  fn bitxor(self, rhs: Self) -> Self::Output {
169    pick! {
170      if #[cfg(target_feature="avx2")] {
171        Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
172      } else {
173        Self {
174          a : self.a.bitxor(rhs.a),
175          b : self.b.bitxor(rhs.b),
176        }
177      }
178    }
179  }
180}
181
182macro_rules! impl_shl_t_for_i16x16 {
183  ($($shift_type:ty),+ $(,)?) => {
184    $(impl Shl<$shift_type> for i16x16 {
185      type Output = Self;
186      /// Shifts all lanes by the value given.
187      #[inline]
188      #[must_use]
189      fn shl(self, rhs: $shift_type) -> Self::Output {
190        pick! {
191          if #[cfg(target_feature="avx2")] {
192            let shift = cast([rhs as u64, 0]);
193            Self { avx2: shl_all_u16_m256i(self.avx2, shift) }
194          } else {
195            Self {
196              a : self.a.shl(rhs),
197              b : self.b.shl(rhs),
198            }
199          }
200       }
201     }
202    })+
203  };
204}
205impl_shl_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
206
207macro_rules! impl_shr_t_for_i16x16 {
208  ($($shift_type:ty),+ $(,)?) => {
209    $(impl Shr<$shift_type> for i16x16 {
210      type Output = Self;
211      /// Shifts all lanes by the value given.
212      #[inline]
213      #[must_use]
214      fn shr(self, rhs: $shift_type) -> Self::Output {
215        pick! {
216          if #[cfg(target_feature="avx2")] {
217            let shift = cast([rhs as u64, 0]);
218            Self { avx2: shr_all_i16_m256i(self.avx2, shift) }
219          } else {
220            Self {
221              a : self.a.shr(rhs),
222              b : self.b.shr(rhs),
223            }
224          }
225        }
226      }
227    })+
228  };
229}
230impl_shr_t_for_i16x16!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
231
232impl CmpEq for i16x16 {
233  type Output = Self;
234  #[inline]
235  #[must_use]
236  fn cmp_eq(self, rhs: Self) -> Self::Output {
237    pick! {
238      if #[cfg(target_feature="avx2")] {
239        Self { avx2: cmp_eq_mask_i16_m256i(self.avx2, rhs.avx2) }
240      } else {
241        Self {
242          a : self.a.cmp_eq(rhs.a),
243          b : self.b.cmp_eq(rhs.b),
244        }
245      }
246    }
247  }
248}
249
250impl CmpGt for i16x16 {
251  type Output = Self;
252  #[inline]
253  #[must_use]
254  fn cmp_gt(self, rhs: Self) -> Self::Output {
255    pick! {
256      if #[cfg(target_feature="avx2")] {
257        Self { avx2: cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) }
258      } else {
259        Self {
260          a : self.a.cmp_gt(rhs.a),
261          b : self.b.cmp_gt(rhs.b),
262        }
263      }
264    }
265  }
266}
267
268impl CmpLt for i16x16 {
269  type Output = Self;
270  #[inline]
271  #[must_use]
272  fn cmp_lt(self, rhs: Self) -> Self::Output {
273    pick! {
274      if #[cfg(target_feature="avx2")] {
275        Self { avx2: !cmp_gt_mask_i16_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i16_m256i(self.avx2,rhs.avx2) }
276      } else {
277        Self {
278          a : self.a.cmp_lt(rhs.a),
279          b : self.b.cmp_lt(rhs.b),
280        }
281      }
282    }
283  }
284}
285
286impl i16x16 {
287  #[inline]
288  #[must_use]
289  pub fn new(array: [i16; 16]) -> Self {
290    Self::from(array)
291  }
292
293  #[inline]
294  #[must_use]
295  pub fn move_mask(self) -> i32 {
296    pick! {
297      if #[cfg(target_feature="avx2")] {
298        (move_mask_i8_m256i(pack_i16_to_i8_m256i(self.avx2,shuffle_ai_i64_all_m256i::<0b01_00_11_10>(self.avx2))) & 0xffff) as i32
299      } else {
300        self.a.move_mask() | (self.b.move_mask() << 8)
301      }
302    }
303  }
304
305  #[inline]
306  #[must_use]
307  pub fn any(self) -> bool {
308    pick! {
309      if #[cfg(target_feature="avx2")] {
310        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) != 0
311      } else {
312        (self.a | self.b).any()
313      }
314    }
315  }
316  #[inline]
317  #[must_use]
318  pub fn all(self) -> bool {
319    pick! {
320      if #[cfg(target_feature="avx2")] {
321        ((move_mask_i8_m256i(self.avx2) as u32) & 0b10101010101010101010101010101010) == 0b10101010101010101010101010101010
322      } else {
323        (self.a & self.b).all()
324      }
325    }
326  }
327  #[inline]
328  #[must_use]
329  pub fn none(self) -> bool {
330    !self.any()
331  }
332
333  /// widens and sign extends to i16x16
334  #[inline]
335  #[must_use]
336  pub fn from_i8x16(v: i8x16) -> Self {
337    pick! {
338      if #[cfg(target_feature="avx2")] {
339        i16x16 { avx2:convert_to_i16_m256i_from_i8_m128i(v.sse) }
340      } else if #[cfg(target_feature="sse4.1")] {
341        i16x16 {
342          a: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(v.sse) },
343          b: i16x8 { sse: convert_to_i16_m128i_from_lower8_i8_m128i(unpack_high_i64_m128i(v.sse, v.sse)) }
344        }
345      } else if #[cfg(target_feature="sse2")] {
346        i16x16 {
347          a: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_low_i8_m128i(v.sse, v.sse)) },
348          b: i16x8 { sse: shr_imm_i16_m128i::<8>( unpack_high_i8_m128i(v.sse, v.sse)) },
349        }
350      } else {
351
352        i16x16::new([
353          v.as_array_ref()[0] as i16,
354          v.as_array_ref()[1] as i16,
355          v.as_array_ref()[2] as i16,
356          v.as_array_ref()[3] as i16,
357          v.as_array_ref()[4] as i16,
358          v.as_array_ref()[5] as i16,
359          v.as_array_ref()[6] as i16,
360          v.as_array_ref()[7] as i16,
361          v.as_array_ref()[8] as i16,
362          v.as_array_ref()[9] as i16,
363          v.as_array_ref()[10] as i16,
364          v.as_array_ref()[11] as i16,
365          v.as_array_ref()[12] as i16,
366          v.as_array_ref()[13] as i16,
367          v.as_array_ref()[14] as i16,
368          v.as_array_ref()[15] as i16,
369          ])
370      }
371    }
372  }
373
374  #[inline]
375  #[must_use]
376  pub fn blend(self, t: Self, f: Self) -> Self {
377    pick! {
378      if #[cfg(target_feature="avx2")] {
379        Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
380      } else {
381        Self {
382          a : self.a.blend(t.a, f.a),
383          b : self.b.blend(t.b, f.b),
384        }
385      }
386    }
387  }
388
389  /// horizontal add of all the elements of the vector
390  #[inline]
391  #[must_use]
392  pub fn reduce_add(self) -> i16 {
393    let arr: [i16x8; 2] = cast(self);
394
395    (arr[0] + arr[1]).reduce_add()
396  }
397
398  /// horizontal min of all the elements of the vector
399  #[inline]
400  #[must_use]
401  pub fn reduce_min(self) -> i16 {
402    let arr: [i16x8; 2] = cast(self);
403
404    arr[0].min(arr[1]).reduce_min()
405  }
406
407  /// horizontal max of all the elements of the vector
408  #[inline]
409  #[must_use]
410  pub fn reduce_max(self) -> i16 {
411    let arr: [i16x8; 2] = cast(self);
412
413    arr[0].max(arr[1]).reduce_max()
414  }
415
416  #[inline]
417  #[must_use]
418  pub fn abs(self) -> Self {
419    pick! {
420      if #[cfg(target_feature="avx2")] {
421        Self { avx2: abs_i16_m256i(self.avx2) }
422      } else {
423        Self {
424          a : self.a.abs(),
425          b : self.b.abs(),
426        }
427      }
428    }
429  }
430  #[inline]
431  #[must_use]
432  pub fn max(self, rhs: Self) -> Self {
433    pick! {
434      if #[cfg(target_feature="avx2")] {
435        Self { avx2: max_i16_m256i(self.avx2, rhs.avx2) }
436      } else {
437        Self {
438          a : self.a.max(rhs.a),
439          b : self.b.max(rhs.b),
440        }
441      }
442    }
443  }
444  #[inline]
445  #[must_use]
446  pub fn min(self, rhs: Self) -> Self {
447    pick! {
448      if #[cfg(target_feature="avx2")] {
449        Self { avx2: min_i16_m256i(self.avx2, rhs.avx2) }
450      } else {
451        Self {
452          a : self.a.min(rhs.a),
453          b : self.b.min(rhs.b),
454        }
455      }
456    }
457  }
458
459  #[inline]
460  #[must_use]
461  pub fn saturating_add(self, rhs: Self) -> Self {
462    pick! {
463      if #[cfg(target_feature="avx2")] {
464        Self { avx2: add_saturating_i16_m256i(self.avx2, rhs.avx2) }
465      } else {
466        Self {
467          a : self.a.saturating_add(rhs.a),
468          b : self.b.saturating_add(rhs.b),
469        }
470      }
471    }
472  }
473  #[inline]
474  #[must_use]
475  pub fn saturating_sub(self, rhs: Self) -> Self {
476    pick! {
477      if #[cfg(target_feature="avx2")] {
478        Self { avx2: sub_saturating_i16_m256i(self.avx2, rhs.avx2) }
479      } else {
480        Self {
481          a : self.a.saturating_sub(rhs.a),
482          b : self.b.saturating_sub(rhs.b),
483        }
484      }
485    }
486  }
487
488  /// Calculates partial dot product.
489  /// Multiplies packed signed 16-bit integers, producing intermediate signed
490  /// 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit
491  /// integers.
492  #[inline]
493  #[must_use]
494  pub fn dot(self, rhs: Self) -> i32x8 {
495    pick! {
496      if #[cfg(target_feature="avx2")] {
497        i32x8 { avx2:  mul_i16_horizontal_add_m256i(self.avx2, rhs.avx2) }
498      } else {
499        i32x8 {
500          a : self.a.dot(rhs.a),
501          b : self.b.dot(rhs.b),
502        }
503      }
504    }
505  }
506
507  /// Multiply and scale equivilent to ((self * rhs) + 0x4000) >> 15 on each
508  /// lane, effectively multiplying by a 16 bit fixed point number between -1
509  /// and 1. This corresponds to the following instructions:
510  /// - vqrdmulhq_n_s16 instruction on neon
511  /// - i16x8_q15mulr_sat on simd128
512  /// - _mm256_mulhrs_epi16 on avx2
513  /// - emulated via mul_i16_* on sse2
514  #[inline]
515  #[must_use]
516  pub fn mul_scale_round(self, rhs: Self) -> Self {
517    pick! {
518      if #[cfg(target_feature="avx2")] {
519        Self { avx2: mul_i16_scale_round_m256i(self.avx2, rhs.avx2) }
520      } else {
521        Self {
522          a : self.a.mul_scale_round(rhs.a),
523          b : self.b.mul_scale_round(rhs.b),
524        }
525      }
526    }
527  }
528
529  /// Multiply and scale equivilent to ((self * rhs) + 0x4000) >> 15 on each
530  /// lane, effectively multiplying by a 16 bit fixed point number between -1
531  /// and 1. This corresponds to the following instructions:
532  /// - vqrdmulhq_n_s16 instruction on neon
533  /// - i16x8_q15mulr_sat on simd128
534  /// - _mm256_mulhrs_epi16 on avx2
535  /// - emulated via mul_i16_* on sse2
536  #[inline]
537  #[must_use]
538  pub fn mul_scale_round_n(self, rhs: i16) -> Self {
539    pick! {
540      if #[cfg(target_feature="avx2")] {
541        Self { avx2: mul_i16_scale_round_m256i(self.avx2, set_splat_i16_m256i(rhs)) }
542      } else {
543        Self {
544          a : self.a.mul_scale_round_n(rhs),
545          b : self.b.mul_scale_round_n(rhs),
546        }
547      }
548    }
549  }
550
551  #[inline]
552  pub fn to_array(self) -> [i16; 16] {
553    cast(self)
554  }
555
556  #[inline]
557  pub fn as_array_ref(&self) -> &[i16; 16] {
558    cast_ref(self)
559  }
560
561  #[inline]
562  pub fn as_array_mut(&mut self) -> &mut [i16; 16] {
563    cast_mut(self)
564  }
565}