1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct i16x8 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct i16x8 { pub(crate) simd: v128 }
14
15 impl Default for i16x8 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for i16x8 {
22 fn eq(&self, other: &Self) -> bool {
23 u16x8_all_true(i16x8_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for i16x8 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct i16x8 { pub(crate) neon : int16x8_t }
33
34 impl Default for i16x8 {
35 #[inline]
36 #[must_use]
37 fn default() -> Self {
38 Self::splat(0)
39 }
40 }
41
42 impl PartialEq for i16x8 {
43 #[inline]
44 #[must_use]
45 fn eq(&self, other: &Self) -> bool {
46 unsafe { vminvq_u16(vceqq_s16(self.neon, other.neon))==u16::MAX }
47 }
48 }
49
50 impl Eq for i16x8 { }
51 } else {
52 #[derive(Default, Clone, Copy, PartialEq, Eq)]
53 #[repr(C, align(16))]
54 pub struct i16x8 { pub(crate) arr: [i16;8] }
55 }
56}
57
58int_uint_consts!(i16, 8, i16x8, i16x8, i16a8, const_i16_as_i16x8, 128);
59
60unsafe impl Zeroable for i16x8 {}
61unsafe impl Pod for i16x8 {}
62
63impl Add for i16x8 {
64 type Output = Self;
65 #[inline]
66 #[must_use]
67 fn add(self, rhs: Self) -> Self::Output {
68 pick! {
69 if #[cfg(target_feature="sse2")] {
70 Self { sse: add_i16_m128i(self.sse, rhs.sse) }
71 } else if #[cfg(target_feature="simd128")] {
72 Self { simd: i16x8_add(self.simd, rhs.simd) }
73 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74 unsafe { Self { neon: vaddq_s16(self.neon, rhs.neon) } }
75 } else {
76 Self { arr: [
77 self.arr[0].wrapping_add(rhs.arr[0]),
78 self.arr[1].wrapping_add(rhs.arr[1]),
79 self.arr[2].wrapping_add(rhs.arr[2]),
80 self.arr[3].wrapping_add(rhs.arr[3]),
81 self.arr[4].wrapping_add(rhs.arr[4]),
82 self.arr[5].wrapping_add(rhs.arr[5]),
83 self.arr[6].wrapping_add(rhs.arr[6]),
84 self.arr[7].wrapping_add(rhs.arr[7]),
85 ]}
86 }
87 }
88 }
89}
90
91impl Sub for i16x8 {
92 type Output = Self;
93 #[inline]
94 #[must_use]
95 fn sub(self, rhs: Self) -> Self::Output {
96 pick! {
97 if #[cfg(target_feature="sse2")] {
98 Self { sse: sub_i16_m128i(self.sse, rhs.sse) }
99 } else if #[cfg(target_feature="simd128")] {
100 Self { simd: i16x8_sub(self.simd, rhs.simd) }
101 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
102 unsafe {Self { neon: vsubq_s16(self.neon, rhs.neon) }}
103 } else {
104 Self { arr: [
105 self.arr[0].wrapping_sub(rhs.arr[0]),
106 self.arr[1].wrapping_sub(rhs.arr[1]),
107 self.arr[2].wrapping_sub(rhs.arr[2]),
108 self.arr[3].wrapping_sub(rhs.arr[3]),
109 self.arr[4].wrapping_sub(rhs.arr[4]),
110 self.arr[5].wrapping_sub(rhs.arr[5]),
111 self.arr[6].wrapping_sub(rhs.arr[6]),
112 self.arr[7].wrapping_sub(rhs.arr[7]),
113 ]}
114 }
115 }
116 }
117}
118
119impl Mul for i16x8 {
120 type Output = Self;
121 #[inline]
122 #[must_use]
123 fn mul(self, rhs: Self) -> Self::Output {
124 pick! {
125 if #[cfg(target_feature="sse2")] {
126 Self { sse: mul_i16_keep_low_m128i(self.sse, rhs.sse) }
127 } else if #[cfg(target_feature="simd128")] {
128 Self { simd: i16x8_mul(self.simd, rhs.simd) }
129 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
130 unsafe {Self { neon: vmulq_s16(self.neon, rhs.neon) }}
131 } else {
132 Self { arr: [
133 self.arr[0].wrapping_mul(rhs.arr[0]),
134 self.arr[1].wrapping_mul(rhs.arr[1]),
135 self.arr[2].wrapping_mul(rhs.arr[2]),
136 self.arr[3].wrapping_mul(rhs.arr[3]),
137 self.arr[4].wrapping_mul(rhs.arr[4]),
138 self.arr[5].wrapping_mul(rhs.arr[5]),
139 self.arr[6].wrapping_mul(rhs.arr[6]),
140 self.arr[7].wrapping_mul(rhs.arr[7]),
141 ]}
142 }
143 }
144 }
145}
146
147impl Add<i16> for i16x8 {
148 type Output = Self;
149 #[inline]
150 #[must_use]
151 fn add(self, rhs: i16) -> Self::Output {
152 self.add(Self::splat(rhs))
153 }
154}
155
156impl Sub<i16> for i16x8 {
157 type Output = Self;
158 #[inline]
159 #[must_use]
160 fn sub(self, rhs: i16) -> Self::Output {
161 self.sub(Self::splat(rhs))
162 }
163}
164
165impl Mul<i16> for i16x8 {
166 type Output = Self;
167 #[inline]
168 #[must_use]
169 fn mul(self, rhs: i16) -> Self::Output {
170 self.mul(Self::splat(rhs))
171 }
172}
173
174impl Add<i16x8> for i16 {
175 type Output = i16x8;
176 #[inline]
177 #[must_use]
178 fn add(self, rhs: i16x8) -> Self::Output {
179 i16x8::splat(self).add(rhs)
180 }
181}
182
183impl Sub<i16x8> for i16 {
184 type Output = i16x8;
185 #[inline]
186 #[must_use]
187 fn sub(self, rhs: i16x8) -> Self::Output {
188 i16x8::splat(self).sub(rhs)
189 }
190}
191
192impl Mul<i16x8> for i16 {
193 type Output = i16x8;
194 #[inline]
195 #[must_use]
196 fn mul(self, rhs: i16x8) -> Self::Output {
197 i16x8::splat(self).mul(rhs)
198 }
199}
200
201impl BitAnd for i16x8 {
202 type Output = Self;
203 #[inline]
204 #[must_use]
205 fn bitand(self, rhs: Self) -> Self::Output {
206 pick! {
207 if #[cfg(target_feature="sse2")] {
208 Self { sse: bitand_m128i(self.sse, rhs.sse) }
209 } else if #[cfg(target_feature="simd128")] {
210 Self { simd: v128_and(self.simd, rhs.simd) }
211 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
212 unsafe {Self { neon: vandq_s16(self.neon, rhs.neon) }}
213 } else {
214 Self { arr: [
215 self.arr[0].bitand(rhs.arr[0]),
216 self.arr[1].bitand(rhs.arr[1]),
217 self.arr[2].bitand(rhs.arr[2]),
218 self.arr[3].bitand(rhs.arr[3]),
219 self.arr[4].bitand(rhs.arr[4]),
220 self.arr[5].bitand(rhs.arr[5]),
221 self.arr[6].bitand(rhs.arr[6]),
222 self.arr[7].bitand(rhs.arr[7]),
223 ]}
224 }
225 }
226 }
227}
228
229impl BitOr for i16x8 {
230 type Output = Self;
231 #[inline]
232 #[must_use]
233 fn bitor(self, rhs: Self) -> Self::Output {
234 pick! {
235 if #[cfg(target_feature="sse2")] {
236 Self { sse: bitor_m128i(self.sse, rhs.sse) }
237 } else if #[cfg(target_feature="simd128")] {
238 Self { simd: v128_or(self.simd, rhs.simd) }
239 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
240 unsafe {Self { neon: vorrq_s16(self.neon, rhs.neon) }}
241 } else {
242 Self { arr: [
243 self.arr[0].bitor(rhs.arr[0]),
244 self.arr[1].bitor(rhs.arr[1]),
245 self.arr[2].bitor(rhs.arr[2]),
246 self.arr[3].bitor(rhs.arr[3]),
247 self.arr[4].bitor(rhs.arr[4]),
248 self.arr[5].bitor(rhs.arr[5]),
249 self.arr[6].bitor(rhs.arr[6]),
250 self.arr[7].bitor(rhs.arr[7]),
251 ]}
252 }
253 }
254 }
255}
256
257impl BitXor for i16x8 {
258 type Output = Self;
259 #[inline]
260 #[must_use]
261 fn bitxor(self, rhs: Self) -> Self::Output {
262 pick! {
263 if #[cfg(target_feature="sse2")] {
264 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
265 } else if #[cfg(target_feature="simd128")] {
266 Self { simd: v128_xor(self.simd, rhs.simd) }
267 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
268 unsafe {Self { neon: veorq_s16(self.neon, rhs.neon) }}
269 } else {
270 Self { arr: [
271 self.arr[0].bitxor(rhs.arr[0]),
272 self.arr[1].bitxor(rhs.arr[1]),
273 self.arr[2].bitxor(rhs.arr[2]),
274 self.arr[3].bitxor(rhs.arr[3]),
275 self.arr[4].bitxor(rhs.arr[4]),
276 self.arr[5].bitxor(rhs.arr[5]),
277 self.arr[6].bitxor(rhs.arr[6]),
278 self.arr[7].bitxor(rhs.arr[7]),
279 ]}
280 }
281 }
282 }
283}
284
285macro_rules! impl_shl_t_for_i16x8 {
286 ($($shift_type:ty),+ $(,)?) => {
287 $(impl Shl<$shift_type> for i16x8 {
288 type Output = Self;
289 #[inline]
291 #[must_use]
292 fn shl(self, rhs: $shift_type) -> Self::Output {
293 pick! {
294 if #[cfg(target_feature="sse2")] {
295 let shift = cast([rhs as u64, 0]);
296 Self { sse: shl_all_u16_m128i(self.sse, shift) }
297 } else if #[cfg(target_feature="simd128")] {
298 Self { simd: i16x8_shl(self.simd, rhs as u32) }
299 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
300 unsafe {Self { neon: vshlq_s16(self.neon, vmovq_n_s16(rhs as i16)) }}
301 } else {
302 let u = rhs as u64;
303 Self { arr: [
304 self.arr[0] << u,
305 self.arr[1] << u,
306 self.arr[2] << u,
307 self.arr[3] << u,
308 self.arr[4] << u,
309 self.arr[5] << u,
310 self.arr[6] << u,
311 self.arr[7] << u,
312 ]}
313 }
314 }
315 }
316 })+
317 };
318}
319impl_shl_t_for_i16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
320
321macro_rules! impl_shr_t_for_i16x8 {
322 ($($shift_type:ty),+ $(,)?) => {
323 $(impl Shr<$shift_type> for i16x8 {
324 type Output = Self;
325 #[inline]
327 #[must_use]
328 fn shr(self, rhs: $shift_type) -> Self::Output {
329 pick! {
330 if #[cfg(target_feature="sse2")] {
331 let shift = cast([rhs as u64, 0]);
332 Self { sse: shr_all_i16_m128i(self.sse, shift) }
333 } else if #[cfg(target_feature="simd128")] {
334 Self { simd: i16x8_shr(self.simd, rhs as u32) }
335 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
336 unsafe {Self { neon: vshlq_s16(self.neon, vmovq_n_s16( -(rhs as i16))) }}
337 } else {
338 let u = rhs as u64;
339 Self { arr: [
340 self.arr[0] >> u,
341 self.arr[1] >> u,
342 self.arr[2] >> u,
343 self.arr[3] >> u,
344 self.arr[4] >> u,
345 self.arr[5] >> u,
346 self.arr[6] >> u,
347 self.arr[7] >> u,
348 ]}
349 }
350 }
351 }
352 })+
353 };
354}
355impl_shr_t_for_i16x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
356
357impl CmpEq for i16x8 {
358 type Output = Self;
359 #[inline]
360 #[must_use]
361 fn cmp_eq(self, rhs: Self) -> Self::Output {
362 pick! {
363 if #[cfg(target_feature="sse2")] {
364 Self { sse: cmp_eq_mask_i16_m128i(self.sse, rhs.sse) }
365 } else if #[cfg(target_feature="simd128")] {
366 Self { simd: i16x8_eq(self.simd, rhs.simd) }
367 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
368 unsafe {Self { neon: vreinterpretq_s16_u16(vceqq_s16(self.neon, rhs.neon)) }}
369 } else {
370 Self { arr: [
371 if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
372 if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
373 if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
374 if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
375 if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
376 if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
377 if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
378 if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
379 ]}
380 }
381 }
382 }
383}
384
385impl CmpGt for i16x8 {
386 type Output = Self;
387 #[inline]
388 #[must_use]
389 fn cmp_gt(self, rhs: Self) -> Self::Output {
390 pick! {
391 if #[cfg(target_feature="sse2")] {
392 Self { sse: cmp_gt_mask_i16_m128i(self.sse, rhs.sse) }
393 } else if #[cfg(target_feature="simd128")] {
394 Self { simd: i16x8_gt(self.simd, rhs.simd) }
395 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
396 unsafe {Self { neon: vreinterpretq_s16_u16(vcgtq_s16(self.neon, rhs.neon)) }}
397 } else {
398 Self { arr: [
399 if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
400 if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
401 if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
402 if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
403 if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
404 if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
405 if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
406 if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
407 ]}
408 }
409 }
410 }
411}
412
413impl CmpLt for i16x8 {
414 type Output = Self;
415 #[inline]
416 #[must_use]
417 fn cmp_lt(self, rhs: Self) -> Self::Output {
418 pick! {
419 if #[cfg(target_feature="sse2")] {
420 Self { sse: cmp_lt_mask_i16_m128i(self.sse, rhs.sse) }
421 } else if #[cfg(target_feature="simd128")] {
422 Self { simd: i16x8_lt(self.simd, rhs.simd) }
423 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
424 unsafe {Self { neon: vreinterpretq_s16_u16(vcltq_s16(self.neon, rhs.neon)) }}
425 } else {
426 Self { arr: [
427 if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
428 if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
429 if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
430 if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
431 if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
432 if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
433 if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
434 if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
435 ]}
436 }
437 }
438 }
439}
440
441impl i16x8 {
442 #[inline]
443 #[must_use]
444 pub fn new(array: [i16; 8]) -> Self {
445 Self::from(array)
446 }
447
448 #[inline]
449 #[must_use]
450 pub fn move_mask(self) -> i32 {
451 pick! {
452 if #[cfg(target_feature="sse2")] {
453 move_mask_i8_m128i( pack_i16_to_i8_m128i(self.sse,self.sse)) & 0xff
454 } else if #[cfg(target_feature="simd128")] {
455 i16x8_bitmask(self.simd) as i32
456 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
457 unsafe
458 {
459 let masked = vcltq_s16(self.neon, vdupq_n_s16(0));
461
462 let selectbit : uint16x8_t = core::intrinsics::transmute([1u16, 2, 4, 8, 16, 32, 64, 128]);
464 let r = vandq_u16(masked, selectbit);
465
466 vaddvq_u16(r) as i32
468 }
469 } else {
470 ((self.arr[0] < 0) as i32) << 0 |
471 ((self.arr[1] < 0) as i32) << 1 |
472 ((self.arr[2] < 0) as i32) << 2 |
473 ((self.arr[3] < 0) as i32) << 3 |
474 ((self.arr[4] < 0) as i32) << 4 |
475 ((self.arr[5] < 0) as i32) << 5 |
476 ((self.arr[6] < 0) as i32) << 6 |
477 ((self.arr[7] < 0) as i32) << 7
478 }
479 }
480 }
481
482 #[inline]
483 #[must_use]
484 pub fn any(self) -> bool {
485 pick! {
486 if #[cfg(target_feature="sse2")] {
487 (move_mask_i8_m128i(self.sse) & 0b1010101010101010) != 0
488 } else if #[cfg(target_feature="simd128")] {
489 u16x8_bitmask(self.simd) != 0
490 } else {
491 let v : [u64;2] = cast(self);
492 ((v[0] | v[1]) & 0x8000800080008000) != 0
493 }
494 }
495 }
496
497 #[inline]
498 #[must_use]
499 pub fn all(self) -> bool {
500 pick! {
501 if #[cfg(target_feature="sse2")] {
502 (move_mask_i8_m128i(self.sse) & 0b1010101010101010) == 0b1010101010101010
503 } else if #[cfg(target_feature="simd128")] {
504 u16x8_bitmask(self.simd) == 0b11111111
505 } else {
506 let v : [u64;2] = cast(self);
507 (v[0] & v[1] & 0x8000800080008000) == 0x8000800080008000
508 }
509 }
510 }
511
512 #[inline]
513 #[must_use]
514 pub fn none(self) -> bool {
515 !self.any()
516 }
517
518 #[inline]
520 pub fn from_u8x16_low(u: u8x16) -> Self {
521 pick! {
522 if #[cfg(target_feature="sse2")] {
523 Self{ sse: unpack_low_i8_m128i(u.sse, m128i::zeroed()) }
524 } else {
525 let u_arr: [u8; 16] = cast(u);
526 cast([
527 u_arr[0] as u16 as i16,
528 u_arr[1] as u16 as i16,
529 u_arr[2] as u16 as i16,
530 u_arr[3] as u16 as i16,
531 u_arr[4] as u16 as i16,
532 u_arr[5] as u16 as i16,
533 u_arr[6] as u16 as i16,
534 u_arr[7] as u16 as i16,
535 ])
536 }
537 }
538 }
539
540 #[inline]
542 #[must_use]
543 pub fn from_i32x8_saturate(v: i32x8) -> Self {
544 pick! {
545 if #[cfg(target_feature="avx2")] {
546 i16x8 { sse: pack_i32_to_i16_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2)) }
547 } else if #[cfg(target_feature="sse2")] {
548 i16x8 { sse: pack_i32_to_i16_m128i( v.a.sse, v.b.sse ) }
549 } else if #[cfg(target_feature="simd128")] {
550 use core::arch::wasm32::*;
551
552 i16x8 { simd: i16x8_narrow_i32x4(v.a.simd, v.b.simd) }
553 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
554 use core::arch::aarch64::*;
555
556 unsafe {
557 i16x8 { neon: vcombine_s16(vqmovn_s32(v.a.neon), vqmovn_s32(v.b.neon)) }
558 }
559 } else {
560 fn clamp(a : i32) -> i16 {
561 if a < i16::MIN as i32 {
562 i16::MIN
563 }
564 else if a > i16::MAX as i32 {
565 i16::MAX
566 } else {
567 a as i16
568 }
569 }
570
571 i16x8::new([
572 clamp(v.as_array_ref()[0]),
573 clamp(v.as_array_ref()[1]),
574 clamp(v.as_array_ref()[2]),
575 clamp(v.as_array_ref()[3]),
576 clamp(v.as_array_ref()[4]),
577 clamp(v.as_array_ref()[5]),
578 clamp(v.as_array_ref()[6]),
579 clamp(v.as_array_ref()[7]),
580 ])
581 }
582 }
583 }
584
585 #[inline]
587 #[must_use]
588 pub fn from_i32x8_truncate(v: i32x8) -> Self {
589 pick! {
590 if #[cfg(target_feature="avx2")] {
591 let a = v.avx2.bitand(set_splat_i32_m256i(0xffff));
592 i16x8 { sse: pack_i32_to_u16_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a) ) }
593 } else if #[cfg(target_feature="sse2")] {
594 let a = shr_imm_i32_m128i::<16>(shl_imm_u32_m128i::<16>(v.a.sse));
595 let b = shr_imm_i32_m128i::<16>(shl_imm_u32_m128i::<16>(v.b.sse));
596
597 i16x8 { sse: pack_i32_to_i16_m128i( a, b) }
598 } else {
599 i16x8::new([
600 v.as_array_ref()[0] as i16,
601 v.as_array_ref()[1] as i16,
602 v.as_array_ref()[2] as i16,
603 v.as_array_ref()[3] as i16,
604 v.as_array_ref()[4] as i16,
605 v.as_array_ref()[5] as i16,
606 v.as_array_ref()[6] as i16,
607 v.as_array_ref()[7] as i16,
608 ])
609 }
610 }
611 }
612
613 #[inline]
614 #[must_use]
615 pub fn from_slice_unaligned(input: &[i16]) -> Self {
616 assert!(input.len() >= 8);
617
618 pick! {
619 if #[cfg(target_feature="sse2")] {
620 unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
621 } else if #[cfg(target_feature="simd128")] {
622 unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
623 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
624 unsafe { Self { neon: vld1q_s16( input.as_ptr() as *const i16 ) } }
625 } else {
626 unsafe { Self::new( *(input.as_ptr() as * const [i16;8]) ) }
628 }
629 }
630 }
631
632 #[inline]
633 #[must_use]
634 pub fn blend(self, t: Self, f: Self) -> Self {
635 pick! {
636 if #[cfg(target_feature="sse4.1")] {
637 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
638 } else if #[cfg(target_feature="simd128")] {
639 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
640 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
641 unsafe {Self { neon: vbslq_s16(vreinterpretq_u16_s16(self.neon), t.neon, f.neon) }}
642 } else {
643 generic_bit_blend(self, t, f)
644 }
645 }
646 }
647 #[inline]
648 #[must_use]
649 pub fn is_negative(self) -> Self {
650 self.cmp_lt(Self::zeroed())
651 }
652
653 #[inline]
655 #[must_use]
656 pub fn reduce_add(self) -> i16 {
657 let arr: [i16; 8] = cast(self);
658
659 (arr[0].wrapping_add(arr[1]).wrapping_add(arr[2].wrapping_add(arr[3])))
660 .wrapping_add(
661 arr[4].wrapping_add(arr[5]).wrapping_add(arr[6].wrapping_add(arr[7])),
662 )
663 }
664
665 #[inline]
667 #[must_use]
668 pub fn reduce_min(self) -> i16 {
669 let arr: [i16; 8] = cast(self);
670
671 (arr[0].min(arr[1]).min(arr[2].min(arr[3])))
672 .min(arr[4].min(arr[5]).min(arr[6].min(arr[7])))
673 }
674
675 #[inline]
677 #[must_use]
678 pub fn reduce_max(self) -> i16 {
679 let arr: [i16; 8] = cast(self);
680
681 (arr[0].max(arr[1]).max(arr[2].max(arr[3])))
682 .max(arr[4].max(arr[5]).max(arr[6].max(arr[7])))
683 }
684
685 #[inline]
686 #[must_use]
687 pub fn abs(self) -> Self {
688 pick! {
689 if #[cfg(target_feature="sse2")] {
690 let mask = shr_imm_i16_m128i::<15>(self.sse);
691 Self { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
692 } else if #[cfg(target_feature="ssse3")] {
693 Self { sse: abs_i16_m128i(self.sse) }
694 } else if #[cfg(target_feature="simd128")] {
695 Self { simd: i16x8_abs(self.simd) }
696 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
697 unsafe {Self { neon: vabsq_s16(self.neon) }}
698 } else {
699 self.is_negative().blend(self.neg(), self)
700 }
701 }
702 }
703 #[inline]
704 #[must_use]
705 pub fn max(self, rhs: Self) -> Self {
706 pick! {
707 if #[cfg(target_feature="sse2")] {
708 Self { sse: max_i16_m128i(self.sse, rhs.sse) }
709 } else if #[cfg(target_feature="simd128")] {
710 Self { simd: i16x8_max(self.simd, rhs.simd) }
711 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
712 unsafe {Self { neon: vmaxq_s16(self.neon, rhs.neon) }}
713 } else {
714 self.cmp_lt(rhs).blend(rhs, self)
715 }
716 }
717 }
718 #[inline]
719 #[must_use]
720 pub fn min(self, rhs: Self) -> Self {
721 pick! {
722 if #[cfg(target_feature="sse2")] {
723 Self { sse: min_i16_m128i(self.sse, rhs.sse) }
724 } else if #[cfg(target_feature="simd128")] {
725 Self { simd: i16x8_min(self.simd, rhs.simd) }
726 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
727 unsafe {Self { neon: vminq_s16(self.neon, rhs.neon) }}
728 } else {
729 self.cmp_lt(rhs).blend(self, rhs)
730 }
731 }
732 }
733
734 #[inline]
735 #[must_use]
736 pub fn saturating_add(self, rhs: Self) -> Self {
737 pick! {
738 if #[cfg(target_feature="sse2")] {
739 Self { sse: add_saturating_i16_m128i(self.sse, rhs.sse) }
740 } else if #[cfg(target_feature="simd128")] {
741 Self { simd: i16x8_add_sat(self.simd, rhs.simd) }
742 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
743 unsafe {Self { neon: vqaddq_s16(self.neon, rhs.neon) }}
744 } else {
745 Self { arr: [
746 self.arr[0].saturating_add(rhs.arr[0]),
747 self.arr[1].saturating_add(rhs.arr[1]),
748 self.arr[2].saturating_add(rhs.arr[2]),
749 self.arr[3].saturating_add(rhs.arr[3]),
750 self.arr[4].saturating_add(rhs.arr[4]),
751 self.arr[5].saturating_add(rhs.arr[5]),
752 self.arr[6].saturating_add(rhs.arr[6]),
753 self.arr[7].saturating_add(rhs.arr[7]),
754 ]}
755 }
756 }
757 }
758 #[inline]
759 #[must_use]
760 pub fn saturating_sub(self, rhs: Self) -> Self {
761 pick! {
762 if #[cfg(target_feature="sse2")] {
763 Self { sse: sub_saturating_i16_m128i(self.sse, rhs.sse) }
764 } else if #[cfg(target_feature="simd128")] {
765 Self { simd: i16x8_sub_sat(self.simd, rhs.simd) }
766 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
767 unsafe { Self { neon: vqsubq_s16(self.neon, rhs.neon) } }
768 } else {
769 Self { arr: [
770 self.arr[0].saturating_sub(rhs.arr[0]),
771 self.arr[1].saturating_sub(rhs.arr[1]),
772 self.arr[2].saturating_sub(rhs.arr[2]),
773 self.arr[3].saturating_sub(rhs.arr[3]),
774 self.arr[4].saturating_sub(rhs.arr[4]),
775 self.arr[5].saturating_sub(rhs.arr[5]),
776 self.arr[6].saturating_sub(rhs.arr[6]),
777 self.arr[7].saturating_sub(rhs.arr[7]),
778 ]}
779 }
780 }
781 }
782
783 #[inline]
788 #[must_use]
789 pub fn dot(self, rhs: Self) -> i32x4 {
790 pick! {
791 if #[cfg(target_feature="sse2")] {
792 i32x4 { sse: mul_i16_horizontal_add_m128i(self.sse, rhs.sse) }
793 } else if #[cfg(target_feature="simd128")] {
794 i32x4 { simd: i32x4_dot_i16x8(self.simd, rhs.simd) }
795 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
796 unsafe {
797 let pl = vmull_s16(vget_low_s16(self.neon), vget_low_s16(rhs.neon));
798 let ph = vmull_high_s16(self.neon, rhs.neon);
799 i32x4 { neon: vpaddq_s32(pl, ph) }
800 }
801 } else {
802 i32x4 { arr: [
803 (i32::from(self.arr[0]) * i32::from(rhs.arr[0])) + (i32::from(self.arr[1]) * i32::from(rhs.arr[1])),
804 (i32::from(self.arr[2]) * i32::from(rhs.arr[2])) + (i32::from(self.arr[3]) * i32::from(rhs.arr[3])),
805 (i32::from(self.arr[4]) * i32::from(rhs.arr[4])) + (i32::from(self.arr[5]) * i32::from(rhs.arr[5])),
806 (i32::from(self.arr[6]) * i32::from(rhs.arr[6])) + (i32::from(self.arr[7]) * i32::from(rhs.arr[7])),
807 ] }
808 }
809 }
810 }
811
812 #[inline]
820 #[must_use]
821 pub fn mul_scale_round(self, rhs: Self) -> Self {
822 pick! {
823 if #[cfg(target_feature="ssse3")] {
824 Self { sse: mul_i16_scale_round_m128i(self.sse, rhs.sse) }
825 } else if #[cfg(target_feature="sse2")] {
826 let hi = mul_i16_keep_high_m128i(self.sse, rhs.sse);
828 let lo = mul_i16_keep_low_m128i(self.sse, rhs.sse);
829 let mut v1 = unpack_low_i16_m128i(lo, hi);
830 let mut v2 = unpack_high_i16_m128i(lo, hi);
831 let a = set_splat_i32_m128i(0x4000);
832 v1 = shr_imm_i32_m128i::<15>(add_i32_m128i(v1, a));
833 v2 = shr_imm_i32_m128i::<15>(add_i32_m128i(v2, a));
834 let s = pack_i32_to_i16_m128i(v1, v2);
835 Self { sse: s }
836 } else if #[cfg(target_feature="simd128")] {
837 Self { simd: i16x8_q15mulr_sat(self.simd, rhs.simd) }
838 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
839 unsafe { Self { neon: vqrdmulhq_s16(self.neon, rhs.neon) } }
840 } else {
841 Self { arr: [
843 ((i32::from(self.arr[0]) * i32::from(rhs.arr[0]) + 0x4000) >> 15) as i16,
844 ((i32::from(self.arr[1]) * i32::from(rhs.arr[1]) + 0x4000) >> 15) as i16,
845 ((i32::from(self.arr[2]) * i32::from(rhs.arr[2]) + 0x4000) >> 15) as i16,
846 ((i32::from(self.arr[3]) * i32::from(rhs.arr[3]) + 0x4000) >> 15) as i16,
847 ((i32::from(self.arr[4]) * i32::from(rhs.arr[4]) + 0x4000) >> 15) as i16,
848 ((i32::from(self.arr[5]) * i32::from(rhs.arr[5]) + 0x4000) >> 15) as i16,
849 ((i32::from(self.arr[6]) * i32::from(rhs.arr[6]) + 0x4000) >> 15) as i16,
850 ((i32::from(self.arr[7]) * i32::from(rhs.arr[7]) + 0x4000) >> 15) as i16,
851 ]}
852 }
853 }
854 }
855
856 #[must_use]
858 #[inline]
859 pub fn transpose(data: [i16x8; 8]) -> [i16x8; 8] {
860 pick! {
861 if #[cfg(target_feature="sse2")] {
862 let a1 = unpack_low_i16_m128i(data[0].sse, data[1].sse);
863 let a2 = unpack_high_i16_m128i(data[0].sse, data[1].sse);
864 let a3 = unpack_low_i16_m128i(data[2].sse, data[3].sse);
865 let a4 = unpack_high_i16_m128i(data[2].sse, data[3].sse);
866 let a5 = unpack_low_i16_m128i(data[4].sse, data[5].sse);
867 let a6 = unpack_high_i16_m128i(data[4].sse, data[5].sse);
868 let a7 = unpack_low_i16_m128i(data[6].sse, data[7].sse);
869 let a8 = unpack_high_i16_m128i(data[6].sse, data[7].sse);
870
871 let b1 = unpack_low_i32_m128i(a1, a3);
872 let b2 = unpack_high_i32_m128i(a1, a3);
873 let b3 = unpack_low_i32_m128i(a2, a4);
874 let b4 = unpack_high_i32_m128i(a2, a4);
875 let b5 = unpack_low_i32_m128i(a5, a7);
876 let b6 = unpack_high_i32_m128i(a5, a7);
877 let b7 = unpack_low_i32_m128i(a6, a8);
878 let b8 = unpack_high_i32_m128i(a6, a8);
879
880 [
881 i16x8 { sse: unpack_low_i64_m128i(b1, b5) },
882 i16x8 { sse: unpack_high_i64_m128i(b1, b5) },
883 i16x8 { sse: unpack_low_i64_m128i(b2, b6) },
884 i16x8 { sse: unpack_high_i64_m128i(b2, b6) },
885 i16x8 { sse: unpack_low_i64_m128i(b3, b7) },
886 i16x8 { sse: unpack_high_i64_m128i(b3, b7) },
887 i16x8 { sse: unpack_low_i64_m128i(b4, b8) },
888 i16x8 { sse: unpack_high_i64_m128i(b4, b8) } ,
889 ]
890 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
891
892 #[inline] fn vtrq32(a : int16x8_t, b : int16x8_t) -> (int16x8_t, int16x8_t)
893 {
894 unsafe {
895 let r = vtrnq_s32(vreinterpretq_s32_s16(a),vreinterpretq_s32_s16(b));
896 (vreinterpretq_s16_s32(r.0), vreinterpretq_s16_s32(r.1))
897 }
898 }
899
900 unsafe {
901 let (q0,q2) = vtrq32(data[0].neon, data[2].neon);
902 let (q1,q3) = vtrq32(data[1].neon, data[3].neon);
903 let (q4,q6) = vtrq32(data[4].neon, data[6].neon);
904 let (q5,q7) = vtrq32(data[5].neon, data[7].neon);
905
906 let b1 = vtrnq_s16(q0, q1);
907 let b2 = vtrnq_s16(q2, q3);
908 let b3 = vtrnq_s16(q4, q5);
909 let b4 = vtrnq_s16(q6, q7);
910
911 [
915 i16x8 { neon: vcombine_s16(vget_low_s16(b1.0), vget_low_s16(b3.0)) },
916 i16x8 { neon: vcombine_s16(vget_low_s16(b1.1), vget_low_s16(b3.1)) },
917 i16x8 { neon: vcombine_s16(vget_low_s16(b2.0), vget_low_s16(b4.0)) },
918 i16x8 { neon: vcombine_s16(vget_low_s16(b2.1), vget_low_s16(b4.1)) },
919 i16x8 { neon: vcombine_s16(vget_high_s16(b1.0), vget_high_s16(b3.0)) },
920 i16x8 { neon: vcombine_s16(vget_high_s16(b1.1), vget_high_s16(b3.1)) },
921 i16x8 { neon: vcombine_s16(vget_high_s16(b2.0), vget_high_s16(b4.0)) },
922 i16x8 { neon: vcombine_s16(vget_high_s16(b2.1), vget_high_s16(b4.1)) },
923 ]
924 }
925 } else if #[cfg(target_feature="simd128")] {
926 #[inline] fn lo_i16(a : v128, b : v128) -> v128 { i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a,b) }
927 #[inline] fn hi_i16(a : v128, b : v128) -> v128 { i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a,b) }
928 #[inline] fn lo_i32(a : v128, b : v128) -> v128 { i32x4_shuffle::<0, 4, 1, 5>(a,b) }
929 #[inline] fn hi_i32(a : v128, b : v128) -> v128 { i32x4_shuffle::<2, 6, 3, 7>(a,b) }
930 #[inline] fn lo_i64(a : v128, b : v128) -> v128 { i64x2_shuffle::<0, 2>(a,b) }
931 #[inline] fn hi_i64(a : v128, b : v128) -> v128 { i64x2_shuffle::<1, 3>(a,b) }
932
933 let a1 = lo_i16(data[0].simd, data[1].simd);
934 let a2 = hi_i16(data[0].simd, data[1].simd);
935 let a3 = lo_i16(data[2].simd, data[3].simd);
936 let a4 = hi_i16(data[2].simd, data[3].simd);
937 let a5 = lo_i16(data[4].simd, data[5].simd);
938 let a6 = hi_i16(data[4].simd, data[5].simd);
939 let a7 = lo_i16(data[6].simd, data[7].simd);
940 let a8 = hi_i16(data[6].simd, data[7].simd);
941
942 let b1 = lo_i32(a1, a3);
943 let b2 = hi_i32(a1, a3);
944 let b3 = lo_i32(a2, a4);
945 let b4 = hi_i32(a2, a4);
946 let b5 = lo_i32(a5, a7);
947 let b6 = hi_i32(a5, a7);
948 let b7 = lo_i32(a6, a8);
949 let b8 = hi_i32(a6, a8);
950
951 [
952 i16x8 { simd: lo_i64(b1, b5) },
953 i16x8 { simd: hi_i64(b1, b5) },
954 i16x8 { simd: lo_i64(b2, b6) },
955 i16x8 { simd: hi_i64(b2, b6) },
956 i16x8 { simd: lo_i64(b3, b7) },
957 i16x8 { simd: hi_i64(b3, b7) },
958 i16x8 { simd: lo_i64(b4, b8) },
959 i16x8 { simd: hi_i64(b4, b8) } ,
960 ]
961
962 } else {
963 #[inline(always)]
964 fn transpose_column(data: &[i16x8; 8], index: usize) -> i16x8 {
965 i16x8::new([
966 data[0].as_array_ref()[index],
967 data[1].as_array_ref()[index],
968 data[2].as_array_ref()[index],
969 data[3].as_array_ref()[index],
970 data[4].as_array_ref()[index],
971 data[5].as_array_ref()[index],
972 data[6].as_array_ref()[index],
973 data[7].as_array_ref()[index],
974 ])
975 }
976
977 [
978 transpose_column(&data, 0),
979 transpose_column(&data, 1),
980 transpose_column(&data, 2),
981 transpose_column(&data, 3),
982 transpose_column(&data, 4),
983 transpose_column(&data, 5),
984 transpose_column(&data, 6),
985 transpose_column(&data, 7),
986 ]
987 }
988 }
989 }
990
991 #[inline]
992 #[must_use]
993 pub fn mul_scale_round_n(self, rhs: i16) -> Self {
1001 pick! {
1002 if #[cfg(target_feature="ssse3")] {
1003 Self { sse: mul_i16_scale_round_m128i(self.sse, set_splat_i16_m128i(rhs)) }
1004 } else if #[cfg(target_feature="sse2")] {
1005 let r = set_splat_i16_m128i(rhs);
1007 let hi = mul_i16_keep_high_m128i(self.sse, r);
1008 let lo = mul_i16_keep_low_m128i(self.sse, r);
1009 let mut v1 = unpack_low_i16_m128i(lo, hi);
1010 let mut v2 = unpack_high_i16_m128i(lo, hi);
1011 let a = set_splat_i32_m128i(0x4000);
1012 v1 = shr_imm_i32_m128i::<15>(add_i32_m128i(v1, a));
1013 v2 = shr_imm_i32_m128i::<15>(add_i32_m128i(v2, a));
1014 let s = pack_i32_to_i16_m128i(v1, v2);
1015 Self { sse: s }
1016 } else if #[cfg(target_feature="simd128")] {
1017 Self { simd: i16x8_q15mulr_sat(self.simd, i16x8_splat(rhs)) }
1018 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1019 unsafe { Self { neon: vqrdmulhq_n_s16(self.neon, rhs) } }
1020 } else {
1021 Self { arr: [
1023 ((i32::from(self.arr[0]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1024 ((i32::from(self.arr[1]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1025 ((i32::from(self.arr[2]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1026 ((i32::from(self.arr[3]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1027 ((i32::from(self.arr[4]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1028 ((i32::from(self.arr[5]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1029 ((i32::from(self.arr[6]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1030 ((i32::from(self.arr[7]) * i32::from(rhs) + 0x4000) >> 15) as i16,
1031 ]}
1032 }
1033 }
1034 }
1035
1036 #[inline]
1037 pub fn to_array(self) -> [i16; 8] {
1038 cast(self)
1039 }
1040
1041 #[inline]
1042 pub fn as_array_ref(&self) -> &[i16; 8] {
1043 cast_ref(self)
1044 }
1045
1046 #[inline]
1047 pub fn as_array_mut(&mut self) -> &mut [i16; 8] {
1048 cast_mut(self)
1049 }
1050}