1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct i32x4 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct i32x4 { pub(crate) simd: v128 }
14
15 impl Default for i32x4 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for i32x4 {
22 fn eq(&self, other: &Self) -> bool {
23 u32x4_all_true(i32x4_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for i32x4 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct i32x4 { pub(crate) neon : int32x4_t }
33
34 impl Default for i32x4 {
35 #[inline]
36 #[must_use]
37 fn default() -> Self {
38 Self::splat(0)
39 }
40 }
41
42 impl PartialEq for i32x4 {
43 #[inline]
44 #[must_use]
45 fn eq(&self, other: &Self) -> bool {
46 unsafe { vminvq_u32(vceqq_s32(self.neon, other.neon))==u32::MAX }
47 }
48 }
49
50 impl Eq for i32x4 { }
51 } else {
52 #[derive(Default, Clone, Copy, PartialEq, Eq)]
53 #[repr(C, align(16))]
54 pub struct i32x4 { pub(crate) arr: [i32;4] }
55 }
56}
57
58int_uint_consts!(i32, 4, i32x4, i32x4, i32a4, const_i32_as_i32x4, 128);
59
60unsafe impl Zeroable for i32x4 {}
61unsafe impl Pod for i32x4 {}
62
63impl Add for i32x4 {
64 type Output = Self;
65 #[inline]
66 #[must_use]
67 fn add(self, rhs: Self) -> Self::Output {
68 pick! {
69 if #[cfg(target_feature="sse2")] {
70 Self { sse: add_i32_m128i(self.sse, rhs.sse) }
71 } else if #[cfg(target_feature="simd128")] {
72 Self { simd: i32x4_add(self.simd, rhs.simd) }
73 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
74 unsafe { Self { neon: vaddq_s32(self.neon, rhs.neon) } }
75 } else {
76 Self { arr: [
77 self.arr[0].wrapping_add(rhs.arr[0]),
78 self.arr[1].wrapping_add(rhs.arr[1]),
79 self.arr[2].wrapping_add(rhs.arr[2]),
80 self.arr[3].wrapping_add(rhs.arr[3]),
81 ]}
82 }
83 }
84 }
85}
86
87impl Sub for i32x4 {
88 type Output = Self;
89 #[inline]
90 #[must_use]
91 fn sub(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="sse2")] {
94 Self { sse: sub_i32_m128i(self.sse, rhs.sse) }
95 } else if #[cfg(target_feature="simd128")] {
96 Self { simd: i32x4_sub(self.simd, rhs.simd) }
97 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98 unsafe {Self { neon: vsubq_s32(self.neon, rhs.neon) }}
99 } else {
100 Self { arr: [
101 self.arr[0].wrapping_sub(rhs.arr[0]),
102 self.arr[1].wrapping_sub(rhs.arr[1]),
103 self.arr[2].wrapping_sub(rhs.arr[2]),
104 self.arr[3].wrapping_sub(rhs.arr[3]),
105 ]}
106 }
107 }
108 }
109}
110
111impl Mul for i32x4 {
112 type Output = Self;
113 #[inline]
114 #[must_use]
115 fn mul(self, rhs: Self) -> Self::Output {
116 pick! {
117 if #[cfg(target_feature="sse4.1")] {
118 Self { sse: mul_32_m128i(self.sse, rhs.sse) }
119 } else if #[cfg(target_feature="simd128")] {
120 Self { simd: i32x4_mul(self.simd, rhs.simd) }
121 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
122 unsafe {Self { neon: vmulq_s32(self.neon, rhs.neon) }}
123 } else {
124 let arr1: [i32; 4] = cast(self);
125 let arr2: [i32; 4] = cast(rhs);
126 cast([
127 arr1[0].wrapping_mul(arr2[0]),
128 arr1[1].wrapping_mul(arr2[1]),
129 arr1[2].wrapping_mul(arr2[2]),
130 arr1[3].wrapping_mul(arr2[3]),
131 ])
132 }
133 }
134 }
135}
136
137impl Add<i32> for i32x4 {
138 type Output = Self;
139 #[inline]
140 #[must_use]
141 fn add(self, rhs: i32) -> Self::Output {
142 self.add(Self::splat(rhs))
143 }
144}
145
146impl Sub<i32> for i32x4 {
147 type Output = Self;
148 #[inline]
149 #[must_use]
150 fn sub(self, rhs: i32) -> Self::Output {
151 self.sub(Self::splat(rhs))
152 }
153}
154
155impl Mul<i32> for i32x4 {
156 type Output = Self;
157 #[inline]
158 #[must_use]
159 fn mul(self, rhs: i32) -> Self::Output {
160 self.mul(Self::splat(rhs))
161 }
162}
163
164impl Add<i32x4> for i32 {
165 type Output = i32x4;
166 #[inline]
167 #[must_use]
168 fn add(self, rhs: i32x4) -> Self::Output {
169 i32x4::splat(self).add(rhs)
170 }
171}
172
173impl Sub<i32x4> for i32 {
174 type Output = i32x4;
175 #[inline]
176 #[must_use]
177 fn sub(self, rhs: i32x4) -> Self::Output {
178 i32x4::splat(self).sub(rhs)
179 }
180}
181
182impl Mul<i32x4> for i32 {
183 type Output = i32x4;
184 #[inline]
185 #[must_use]
186 fn mul(self, rhs: i32x4) -> Self::Output {
187 i32x4::splat(self).mul(rhs)
188 }
189}
190
191impl BitAnd for i32x4 {
192 type Output = Self;
193 #[inline]
194 #[must_use]
195 fn bitand(self, rhs: Self) -> Self::Output {
196 pick! {
197 if #[cfg(target_feature="sse2")] {
198 Self { sse: bitand_m128i(self.sse, rhs.sse) }
199 } else if #[cfg(target_feature="simd128")] {
200 Self { simd: v128_and(self.simd, rhs.simd) }
201 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
202 unsafe {Self { neon: vandq_s32(self.neon, rhs.neon) }}
203 } else {
204 Self { arr: [
205 self.arr[0].bitand(rhs.arr[0]),
206 self.arr[1].bitand(rhs.arr[1]),
207 self.arr[2].bitand(rhs.arr[2]),
208 self.arr[3].bitand(rhs.arr[3]),
209 ]}
210 }
211 }
212 }
213}
214
215impl BitOr for i32x4 {
216 type Output = Self;
217 #[inline]
218 #[must_use]
219 fn bitor(self, rhs: Self) -> Self::Output {
220 pick! {
221 if #[cfg(target_feature="sse2")] {
222 Self { sse: bitor_m128i(self.sse, rhs.sse) }
223 } else if #[cfg(target_feature="simd128")] {
224 Self { simd: v128_or(self.simd, rhs.simd) }
225 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
226 unsafe {Self { neon: vorrq_s32(self.neon, rhs.neon) }}
227 } else {
228 Self { arr: [
229 self.arr[0].bitor(rhs.arr[0]),
230 self.arr[1].bitor(rhs.arr[1]),
231 self.arr[2].bitor(rhs.arr[2]),
232 self.arr[3].bitor(rhs.arr[3]),
233 ]}
234 }
235 }
236 }
237}
238
239impl BitXor for i32x4 {
240 type Output = Self;
241 #[inline]
242 #[must_use]
243 fn bitxor(self, rhs: Self) -> Self::Output {
244 pick! {
245 if #[cfg(target_feature="sse2")] {
246 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
247 } else if #[cfg(target_feature="simd128")] {
248 Self { simd: v128_xor(self.simd, rhs.simd) }
249 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
250 unsafe {Self { neon: veorq_s32(self.neon, rhs.neon) }}
251 } else {
252 Self { arr: [
253 self.arr[0].bitxor(rhs.arr[0]),
254 self.arr[1].bitxor(rhs.arr[1]),
255 self.arr[2].bitxor(rhs.arr[2]),
256 self.arr[3].bitxor(rhs.arr[3]),
257 ]}
258 }
259 }
260 }
261}
262
263macro_rules! impl_shl_t_for_i32x4 {
264 ($($shift_type:ty),+ $(,)?) => {
265 $(impl Shl<$shift_type> for i32x4 {
266 type Output = Self;
267 #[inline]
269 #[must_use]
270 fn shl(self, rhs: $shift_type) -> Self::Output {
271 pick! {
272 if #[cfg(target_feature="sse2")] {
273 let shift = cast([rhs as u64, 0]);
274 Self { sse: shl_all_u32_m128i(self.sse, shift) }
275 } else if #[cfg(target_feature="simd128")] {
276 Self { simd: i32x4_shl(self.simd, rhs as u32) }
277 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
278 unsafe {Self { neon: vshlq_s32(self.neon, vmovq_n_s32(rhs as i32)) }}
279 } else {
280 let u = rhs as u64;
281 Self { arr: [
282 self.arr[0] << u,
283 self.arr[1] << u,
284 self.arr[2] << u,
285 self.arr[3] << u,
286 ]}
287 }
288 }
289 }
290 })+
291 };
292}
293impl_shl_t_for_i32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
294
295macro_rules! impl_shr_t_for_i32x4 {
296 ($($shift_type:ty),+ $(,)?) => {
297 $(impl Shr<$shift_type> for i32x4 {
298 type Output = Self;
299 #[inline]
301 #[must_use]
302 fn shr(self, rhs: $shift_type) -> Self::Output {
303 pick! {
304 if #[cfg(target_feature="sse2")] {
305 let shift = cast([rhs as u64, 0]);
306 Self { sse: shr_all_i32_m128i(self.sse, shift) }
307 } else if #[cfg(target_feature="simd128")] {
308 Self { simd: i32x4_shr(self.simd, rhs as u32) }
309 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
310 unsafe {Self { neon: vshlq_s32(self.neon, vmovq_n_s32( -(rhs as i32))) }}
311 } else {
312 let u = rhs as u64;
313 Self { arr: [
314 self.arr[0] >> u,
315 self.arr[1] >> u,
316 self.arr[2] >> u,
317 self.arr[3] >> u,
318 ]}
319 }
320 }
321 }
322 })+
323 };
324}
325impl_shr_t_for_i32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
326
327impl CmpEq for i32x4 {
328 type Output = Self;
329 #[inline]
330 #[must_use]
331 fn cmp_eq(self, rhs: Self) -> Self::Output {
332 pick! {
333 if #[cfg(target_feature="sse2")] {
334 Self { sse: cmp_eq_mask_i32_m128i(self.sse, rhs.sse) }
335 } else if #[cfg(target_feature="simd128")] {
336 Self { simd: i32x4_eq(self.simd, rhs.simd) }
337 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
338 unsafe {Self { neon: vreinterpretq_s32_u32(vceqq_s32(self.neon, rhs.neon)) }}
339 } else {
340 Self { arr: [
341 if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
342 if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
343 if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
344 if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
345 ]}
346 }
347 }
348 }
349}
350
351impl CmpGt for i32x4 {
352 type Output = Self;
353 #[inline]
354 #[must_use]
355 fn cmp_gt(self, rhs: Self) -> Self::Output {
356 pick! {
357 if #[cfg(target_feature="sse2")] {
358 Self { sse: cmp_gt_mask_i32_m128i(self.sse, rhs.sse) }
359 } else if #[cfg(target_feature="simd128")] {
360 Self { simd: i32x4_gt(self.simd, rhs.simd) }
361 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
362 unsafe {Self { neon: vreinterpretq_s32_u32(vcgtq_s32(self.neon, rhs.neon)) }}
363 } else {
364 Self { arr: [
365 if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
366 if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
367 if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
368 if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
369 ]}
370 }
371 }
372 }
373}
374
375impl CmpLt for i32x4 {
376 type Output = Self;
377 #[inline]
378 #[must_use]
379 fn cmp_lt(self, rhs: Self) -> Self::Output {
380 pick! {
381 if #[cfg(target_feature="sse2")] {
382 Self { sse: cmp_lt_mask_i32_m128i(self.sse, rhs.sse) }
383 } else if #[cfg(target_feature="simd128")] {
384 Self { simd: i32x4_lt(self.simd, rhs.simd) }
385 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
386 unsafe {Self { neon: vreinterpretq_s32_u32(vcltq_s32(self.neon, rhs.neon)) }}
387 } else {
388 Self { arr: [
389 if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
390 if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
391 if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
392 if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
393 ]}
394 }
395 }
396 }
397}
398
399impl i32x4 {
400 #[inline]
401 #[must_use]
402 pub fn new(array: [i32; 4]) -> Self {
403 Self::from(array)
404 }
405 #[inline]
406 #[must_use]
407 pub fn blend(self, t: Self, f: Self) -> Self {
408 pick! {
409 if #[cfg(target_feature="sse4.1")] {
410 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
411 } else if #[cfg(target_feature="simd128")] {
412 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
413 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
414 unsafe {Self { neon: vbslq_s32(vreinterpretq_u32_s32(self.neon), t.neon, f.neon) }}
415 } else {
416 generic_bit_blend(self, t, f)
417 }
418 }
419 }
420 #[inline]
421 #[must_use]
422 pub fn abs(self) -> Self {
423 pick! {
424 if #[cfg(target_feature="ssse3")] {
425 Self { sse: abs_i32_m128i(self.sse) }
426 } else if #[cfg(target_feature="simd128")] {
427 Self { simd: i32x4_abs(self.simd) }
428 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
429 unsafe {Self { neon: vabsq_s32(self.neon) }}
430 } else {
431 let arr: [i32; 4] = cast(self);
432 cast([
433 arr[0].wrapping_abs(),
434 arr[1].wrapping_abs(),
435 arr[2].wrapping_abs(),
436 arr[3].wrapping_abs(),
437 ])
438 }
439 }
440 }
441
442 #[inline]
444 #[must_use]
445 pub fn reduce_add(self) -> i32 {
446 pick! {
447 if #[cfg(target_feature="sse2")] {
448 let hi64 = unpack_high_i64_m128i(self.sse, self.sse);
449 let sum64 = add_i32_m128i(hi64, self.sse);
450 let hi32 = shuffle_ai_f32_all_m128i::<0b10_11_00_01>(sum64); let sum32 = add_i32_m128i(sum64, hi32);
452 get_i32_from_m128i_s(sum32)
453 } else {
454 let arr: [i32; 4] = cast(self);
455 arr[0].wrapping_add(arr[1]).wrapping_add(
456 arr[2].wrapping_add(arr[3]))
457 }
458 }
459 }
460
461 #[inline]
463 #[must_use]
464 pub fn reduce_max(self) -> i32 {
465 let arr: [i32; 4] = cast(self);
466 arr[0].max(arr[1]).max(arr[2].max(arr[3]))
467 }
468
469 #[inline]
471 #[must_use]
472 pub fn reduce_min(self) -> i32 {
473 let arr: [i32; 4] = cast(self);
474 arr[0].min(arr[1]).min(arr[2].min(arr[3]))
475 }
476
477 #[inline]
478 #[must_use]
479 pub fn max(self, rhs: Self) -> Self {
480 pick! {
481 if #[cfg(target_feature="sse4.1")] {
482 Self { sse: max_i32_m128i(self.sse, rhs.sse) }
483 } else if #[cfg(target_feature="simd128")] {
484 Self { simd: i32x4_max(self.simd, rhs.simd) }
485 } else {
486 self.cmp_lt(rhs).blend(rhs, self)
487 }
488 }
489 }
490 #[inline]
491 #[must_use]
492 pub fn min(self, rhs: Self) -> Self {
493 pick! {
494 if #[cfg(target_feature="sse4.1")] {
495 Self { sse: min_i32_m128i(self.sse, rhs.sse) }
496 } else if #[cfg(target_feature="simd128")] {
497 Self { simd: i32x4_min(self.simd, rhs.simd) }
498 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
499 unsafe {Self { neon: vminq_s32(self.neon, rhs.neon) }}
500 } else {
501 self.cmp_lt(rhs).blend(self, rhs)
502 }
503 }
504 }
505 #[inline]
506 #[must_use]
507 pub fn round_float(self) -> f32x4 {
508 pick! {
509 if #[cfg(target_feature="sse2")] {
510 cast(convert_to_m128_from_i32_m128i(self.sse))
511 } else if #[cfg(target_feature="simd128")] {
512 cast(Self { simd: f32x4_convert_i32x4(self.simd) })
513 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
514 cast(unsafe {Self { neon: vreinterpretq_s32_f32(vcvtq_f32_s32(self.neon)) }})
515 } else {
516 let arr: [i32; 4] = cast(self);
517 cast([
518 arr[0] as f32,
519 arr[1] as f32,
520 arr[2] as f32,
521 arr[3] as f32,
522 ])
523 }
524 }
525 }
526
527 #[inline]
528 #[must_use]
529 pub fn move_mask(self) -> i32 {
530 pick! {
531 if #[cfg(target_feature="sse")] {
532 move_mask_m128(cast(self.sse))
533 } else if #[cfg(target_feature="simd128")] {
534 u32x4_bitmask(self.simd) as i32
535 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
536 unsafe
537 {
538 let masked = vcltq_s32(self.neon, vdupq_n_s32(0));
540
541 let selectbit : uint32x4_t = core::intrinsics::transmute([1u32, 2, 4, 8]);
543 let r = vandq_u32(masked, selectbit);
544
545 vaddvq_u32(r) as i32
547 }
548 } else {
549 (((self.arr[0] as i32) < 0) as i32) << 0 |
550 (((self.arr[1] as i32) < 0) as i32) << 1 |
551 (((self.arr[2] as i32) < 0) as i32) << 2 |
552 (((self.arr[3] as i32) < 0) as i32) << 3
553 }
554 }
555 }
556
557 #[inline]
558 #[must_use]
559 pub fn any(self) -> bool {
560 pick! {
561 if #[cfg(target_feature="sse2")] {
562 (move_mask_i8_m128i(self.sse) & 0b1000100010001000) != 0
563 } else if #[cfg(target_feature="simd128")] {
564 u32x4_bitmask(self.simd) != 0
565 } else {
566 let v : [u64;2] = cast(self);
567 ((v[0] | v[1]) & 0x8000000080000000) != 0
568 }
569 }
570 }
571
572 #[inline]
573 #[must_use]
574 pub fn all(self) -> bool {
575 pick! {
576 if #[cfg(target_feature="sse2")] {
577 (move_mask_i8_m128i(self.sse) & 0b1000100010001000) == 0b1000100010001000
578 } else if #[cfg(target_feature="simd128")] {
579 u32x4_bitmask(self.simd) == 0b1111
580 } else {
581 let v : [u64;2] = cast(self);
582 (v[0] & v[1] & 0x8000000080000000) == 0x8000000080000000
583 }
584 }
585 }
586
587 #[inline]
588 #[must_use]
589 pub fn none(self) -> bool {
590 !self.any()
591 }
592
593 #[inline]
594 pub fn to_array(self) -> [i32; 4] {
595 cast(self)
596 }
597
598 #[inline]
599 pub fn as_array_ref(&self) -> &[i32; 4] {
600 cast_ref(self)
601 }
602
603 #[inline]
604 pub fn as_array_mut(&mut self) -> &mut [i32; 4] {
605 cast_mut(self)
606 }
607}