1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(32))]
7 pub struct i32x8 { pub(crate) avx2: m256i }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq, Eq)]
10 #[repr(C, align(32))]
11 pub struct i32x8 { pub(crate) a : i32x4, pub(crate) b : i32x4}
12 }
13}
14
15int_uint_consts!(i32, 8, i32x8, i32x8, i32a8, const_i32_as_i32x8, 256);
16
17unsafe impl Zeroable for i32x8 {}
18unsafe impl Pod for i32x8 {}
19
20impl Add for i32x8 {
21 type Output = Self;
22 #[inline]
23 #[must_use]
24 fn add(self, rhs: Self) -> Self::Output {
25 pick! {
26 if #[cfg(target_feature="avx2")] {
27 Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
28 } else {
29 Self {
30 a : self.a.add(rhs.a),
31 b : self.b.add(rhs.b),
32 }
33 }
34 }
35 }
36}
37
38impl Sub for i32x8 {
39 type Output = Self;
40 #[inline]
41 #[must_use]
42 fn sub(self, rhs: Self) -> Self::Output {
43 pick! {
44 if #[cfg(target_feature="avx2")] {
45 Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
46 } else {
47 Self {
48 a : self.a.sub(rhs.a),
49 b : self.b.sub(rhs.b),
50 }
51 }
52 }
53 }
54}
55
56impl Mul for i32x8 {
57 type Output = Self;
58 #[inline]
59 #[must_use]
60 fn mul(self, rhs: Self) -> Self::Output {
61 pick! {
62 if #[cfg(target_feature="avx2")] {
63 Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
64 } else {
65 Self {
66 a : self.a.mul(rhs.a),
67 b : self.b.mul(rhs.b),
68 }
69 }
70 }
71 }
72}
73
74impl Add<i32> for i32x8 {
75 type Output = Self;
76 #[inline]
77 #[must_use]
78 fn add(self, rhs: i32) -> Self::Output {
79 self.add(Self::splat(rhs))
80 }
81}
82
83impl Sub<i32> for i32x8 {
84 type Output = Self;
85 #[inline]
86 #[must_use]
87 fn sub(self, rhs: i32) -> Self::Output {
88 self.sub(Self::splat(rhs))
89 }
90}
91
92impl Mul<i32> for i32x8 {
93 type Output = Self;
94 #[inline]
95 #[must_use]
96 fn mul(self, rhs: i32) -> Self::Output {
97 self.mul(Self::splat(rhs))
98 }
99}
100
101impl Add<i32x8> for i32 {
102 type Output = i32x8;
103 #[inline]
104 #[must_use]
105 fn add(self, rhs: i32x8) -> Self::Output {
106 i32x8::splat(self) + rhs
107 }
108}
109
110impl Sub<i32x8> for i32 {
111 type Output = i32x8;
112 #[inline]
113 #[must_use]
114 fn sub(self, rhs: i32x8) -> Self::Output {
115 i32x8::splat(self) - rhs
116 }
117}
118
119impl Mul<i32x8> for i32 {
120 type Output = i32x8;
121 #[inline]
122 #[must_use]
123 fn mul(self, rhs: i32x8) -> Self::Output {
124 i32x8::splat(self) * rhs
125 }
126}
127
128impl BitAnd for i32x8 {
129 type Output = Self;
130 #[inline]
131 #[must_use]
132 fn bitand(self, rhs: Self) -> Self::Output {
133 pick! {
134 if #[cfg(target_feature="avx2")] {
135 Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
136 } else {
137 Self {
138 a : self.a.bitand(rhs.a),
139 b : self.b.bitand(rhs.b),
140 }
141 }
142 }
143 }
144}
145
146impl BitOr for i32x8 {
147 type Output = Self;
148 #[inline]
149 #[must_use]
150 fn bitor(self, rhs: Self) -> Self::Output {
151 pick! {
152 if #[cfg(target_feature="avx2")] {
153 Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
154 } else {
155 Self {
156 a : self.a.bitor(rhs.a),
157 b : self.b.bitor(rhs.b),
158 }
159 } }
160 }
161}
162
163impl BitXor for i32x8 {
164 type Output = Self;
165 #[inline]
166 #[must_use]
167 fn bitxor(self, rhs: Self) -> Self::Output {
168 pick! {
169 if #[cfg(target_feature="avx2")] {
170 Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
171 } else {
172 Self {
173 a : self.a.bitxor(rhs.a),
174 b : self.b.bitxor(rhs.b),
175 }
176 }
177 }
178 }
179}
180
181macro_rules! impl_shl_t_for_i32x8 {
182 ($($shift_type:ty),+ $(,)?) => {
183 $(impl Shl<$shift_type> for i32x8 {
184 type Output = Self;
185 #[inline]
187 #[must_use]
188 fn shl(self, rhs: $shift_type) -> Self::Output {
189 pick! {
190 if #[cfg(target_feature="avx2")] {
191 let shift = cast([rhs as u64, 0]);
192 Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
193 } else {
194 Self {
195 a : self.a.shl(rhs),
196 b : self.b.shl(rhs),
197 }
198 }
199 }
200 }
201 })+
202 };
203}
204impl_shl_t_for_i32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
205
206macro_rules! impl_shr_t_for_i32x8 {
207 ($($shift_type:ty),+ $(,)?) => {
208 $(impl Shr<$shift_type> for i32x8 {
209 type Output = Self;
210 #[inline]
212 #[must_use]
213 fn shr(self, rhs: $shift_type) -> Self::Output {
214 pick! {
215 if #[cfg(target_feature="avx2")] {
216 let shift = cast([rhs as u64, 0]);
217 Self { avx2: shr_all_i32_m256i(self.avx2, shift) }
218 } else {
219 Self {
220 a : self.a.shr(rhs),
221 b : self.b.shr(rhs),
222 }
223 }
224 }
225 }
226 })+
227 };
228}
229
230impl_shr_t_for_i32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
231
232impl CmpEq for i32x8 {
233 type Output = Self;
234 #[inline]
235 #[must_use]
236 fn cmp_eq(self, rhs: Self) -> Self::Output {
237 pick! {
238 if #[cfg(target_feature="avx2")] {
239 Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2) }
240 } else {
241 Self {
242 a : self.a.cmp_eq(rhs.a),
243 b : self.b.cmp_eq(rhs.b),
244 }
245 }
246 }
247 }
248}
249
250impl CmpGt for i32x8 {
251 type Output = Self;
252 #[inline]
253 #[must_use]
254 fn cmp_gt(self, rhs: Self) -> Self::Output {
255 pick! {
256 if #[cfg(target_feature="avx2")] {
257 Self { avx2: cmp_gt_mask_i32_m256i(self.avx2, rhs.avx2) }
258 } else {
259 Self {
260 a : self.a.cmp_gt(rhs.a),
261 b : self.b.cmp_gt(rhs.b),
262 }
263 }
264 }
265 }
266}
267
268impl CmpLt for i32x8 {
269 type Output = Self;
270 #[inline]
271 #[must_use]
272 fn cmp_lt(self, rhs: Self) -> Self::Output {
273 pick! {
274 if #[cfg(target_feature="avx2")] {
275 Self { avx2: !cmp_gt_mask_i32_m256i(self.avx2, rhs.avx2) ^ cmp_eq_mask_i32_m256i(self.avx2,rhs.avx2) }
276 } else {
277 Self {
278 a : self.a.cmp_lt(rhs.a),
279 b : self.b.cmp_lt(rhs.b),
280 }
281 }
282 }
283 }
284}
285impl i32x8 {
286 #[inline]
287 #[must_use]
288 pub fn new(array: [i32; 8]) -> Self {
289 Self::from(array)
290 }
291
292 #[inline]
294 #[must_use]
295 pub fn from_i16x8(v: i16x8) -> Self {
296 pick! {
297 if #[cfg(target_feature="avx2")] {
298 i32x8 { avx2:convert_to_i32_m256i_from_i16_m128i(v.sse) }
299 } else if #[cfg(target_feature="sse2")] {
300 i32x8 {
301 a: i32x4 { sse: shr_imm_i32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
302 b: i32x4 { sse: shr_imm_i32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
303 }
304 } else {
305 i32x8::new([
306 v.as_array_ref()[0] as i32,
307 v.as_array_ref()[1] as i32,
308 v.as_array_ref()[2] as i32,
309 v.as_array_ref()[3] as i32,
310 v.as_array_ref()[4] as i32,
311 v.as_array_ref()[5] as i32,
312 v.as_array_ref()[6] as i32,
313 v.as_array_ref()[7] as i32,
314 ])
315 }
316 }
317 }
318
319 #[inline]
320 #[must_use]
321 pub fn blend(self, t: Self, f: Self) -> Self {
322 pick! {
323 if #[cfg(target_feature="avx2")] {
324 Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
325 } else {
326 Self {
327 a : self.a.blend(t.a, f.a),
328 b : self.b.blend(t.b, f.b)
329 }
330 }
331 }
332 }
333
334 #[inline]
336 #[must_use]
337 pub fn reduce_add(self) -> i32 {
338 let arr: [i32x4; 2] = cast(self);
339 (arr[0] + arr[1]).reduce_add()
340 }
341
342 #[inline]
344 #[must_use]
345 pub fn reduce_max(self) -> i32 {
346 let arr: [i32x4; 2] = cast(self);
347 arr[0].max(arr[1]).reduce_max()
348 }
349
350 #[inline]
352 #[must_use]
353 pub fn reduce_min(self) -> i32 {
354 let arr: [i32x4; 2] = cast(self);
355 arr[0].min(arr[1]).reduce_min()
356 }
357
358 #[inline]
359 #[must_use]
360 pub fn abs(self) -> Self {
361 pick! {
362 if #[cfg(target_feature="avx2")] {
363 Self { avx2: abs_i32_m256i(self.avx2) }
364 } else {
365 Self {
366 a : self.a.abs(),
367 b : self.b.abs(),
368 }
369 }
370 }
371 }
372 #[inline]
373 #[must_use]
374 pub fn max(self, rhs: Self) -> Self {
375 pick! {
376 if #[cfg(target_feature="avx2")] {
377 Self { avx2: max_i32_m256i(self.avx2, rhs.avx2) }
378 } else {
379 Self {
380 a : self.a.max(rhs.a),
381 b : self.b.max(rhs.b),
382 }
383 }
384 }
385 }
386 #[inline]
387 #[must_use]
388 pub fn min(self, rhs: Self) -> Self {
389 pick! {
390 if #[cfg(target_feature="avx2")] {
391 Self { avx2: min_i32_m256i(self.avx2, rhs.avx2) }
392 } else {
393 Self {
394 a : self.a.min(rhs.a),
395 b : self.b.min(rhs.b),
396 }
397 }
398 }
399 }
400 #[inline]
401 #[must_use]
402 pub fn round_float(self) -> f32x8 {
403 pick! {
404 if #[cfg(target_feature="avx2")] {
405 cast(convert_to_m256_from_i32_m256i(self.avx2))
406 } else {
407 cast([
408 self.a.round_float(),
409 self.b.round_float(),
410 ])
411 }
412 }
413 }
414
415 #[inline]
416 #[must_use]
417 pub fn move_mask(self) -> i32 {
418 pick! {
419 if #[cfg(target_feature="avx2")] {
420 move_mask_m256(cast(self.avx2)) as i32
421 } else {
422 self.a.move_mask() | (self.b.move_mask() << 4)
423 }
424 }
425 }
426
427 #[inline]
428 #[must_use]
429 pub fn any(self) -> bool {
430 pick! {
431 if #[cfg(target_feature="avx2")] {
432 ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) != 0
433 } else {
434 (self.a | self.b).any()
435 }
436 }
437 }
438 #[inline]
439 #[must_use]
440 pub fn all(self) -> bool {
441 pick! {
442 if #[cfg(target_feature="avx2")] {
443 ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) == 0b10001000100010001000100010001000
444 } else {
445 (self.a & self.b).all()
446 }
447 }
448 }
449 #[inline]
450 #[must_use]
451 pub fn none(self) -> bool {
452 !self.any()
453 }
454
455 #[must_use]
457 #[inline]
458 pub fn transpose(data: [i32x8; 8]) -> [i32x8; 8] {
459 pick! {
460 if #[cfg(target_feature="avx2")] {
461 let a0 = unpack_low_i32_m256i(data[0].avx2, data[1].avx2);
462 let a1 = unpack_high_i32_m256i(data[0].avx2, data[1].avx2);
463 let a2 = unpack_low_i32_m256i(data[2].avx2, data[3].avx2);
464 let a3 = unpack_high_i32_m256i(data[2].avx2, data[3].avx2);
465 let a4 = unpack_low_i32_m256i(data[4].avx2, data[5].avx2);
466 let a5 = unpack_high_i32_m256i(data[4].avx2, data[5].avx2);
467 let a6 = unpack_low_i32_m256i(data[6].avx2, data[7].avx2);
468 let a7 = unpack_high_i32_m256i(data[6].avx2, data[7].avx2);
469
470 pub const fn mm_shuffle(z: i32, y: i32, x: i32, w: i32) -> i32 {
471 (z << 6) | (y << 4) | (x << 2) | w
472 }
473
474 const SHUFF_LO : i32 = mm_shuffle(1,0,1,0);
475 const SHUFF_HI : i32 = mm_shuffle(3,2,3,2);
476
477 let b0 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a0),cast(a2)));
480 let b1 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a0),cast(a2)));
481 let b2 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a1),cast(a3)));
482 let b3 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a1),cast(a3)));
483 let b4 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a4),cast(a6)));
484 let b5 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a4),cast(a6)));
485 let b6 = cast::<m256,m256i>(shuffle_m256::<SHUFF_LO>(cast(a5),cast(a7)));
486 let b7 = cast::<m256,m256i>(shuffle_m256::<SHUFF_HI>(cast(a5),cast(a7)));
487
488 [
489 i32x8 { avx2: permute2z_m256i::<0x20>(b0, b4) },
490 i32x8 { avx2: permute2z_m256i::<0x20>(b1, b5) },
491 i32x8 { avx2: permute2z_m256i::<0x20>(b2, b6) },
492 i32x8 { avx2: permute2z_m256i::<0x20>(b3, b7) },
493 i32x8 { avx2: permute2z_m256i::<0x31>(b0, b4) },
494 i32x8 { avx2: permute2z_m256i::<0x31>(b1, b5) },
495 i32x8 { avx2: permute2z_m256i::<0x31>(b2, b6) },
496 i32x8 { avx2: permute2z_m256i::<0x31>(b3, b7) }
497 ]
498 } else {
499 #[inline(always)]
502 fn transpose_column(data: &[i32x8; 8], index: usize) -> i32x8 {
503 i32x8::new([
504 data[0].as_array_ref()[index],
505 data[1].as_array_ref()[index],
506 data[2].as_array_ref()[index],
507 data[3].as_array_ref()[index],
508 data[4].as_array_ref()[index],
509 data[5].as_array_ref()[index],
510 data[6].as_array_ref()[index],
511 data[7].as_array_ref()[index],
512 ])
513 }
514
515 [
516 transpose_column(&data, 0),
517 transpose_column(&data, 1),
518 transpose_column(&data, 2),
519 transpose_column(&data, 3),
520 transpose_column(&data, 4),
521 transpose_column(&data, 5),
522 transpose_column(&data, 6),
523 transpose_column(&data, 7),
524 ]
525 }
526 }
527 }
528
529 #[inline]
530 pub fn to_array(self) -> [i32; 8] {
531 cast(self)
532 }
533
534 #[inline]
535 pub fn as_array_ref(&self) -> &[i32; 8] {
536 cast_ref(self)
537 }
538
539 #[inline]
540 pub fn as_array_mut(&mut self) -> &mut [i32; 8] {
541 cast_mut(self)
542 }
543}
544
545impl Not for i32x8 {
546 type Output = Self;
547 #[inline]
548 fn not(self) -> Self {
549 pick! {
550 if #[cfg(target_feature="avx2")] {
551 Self { avx2: self.avx2.not() }
552 } else {
553 Self {
554 a : self.a.not(),
555 b : self.b.not(),
556 }
557 }
558 }
559 }
560}