@@ -68,12 +68,17 @@ struct avx2_vector<int64_t> {
6868 {
6969 return _mm256_set1_epi64x (type_max ());
7070 } // TODO: this should broadcast bits as is?
71+ static opmask_t knot_opmask (opmask_t x)
72+ {
73+ auto allTrue = _mm256_set1_epi64x (0xFFFF'FFFF'FFFF'FFFF );
74+ return _mm256_xor_si256 (x, allTrue);
75+ }
7176 static opmask_t get_partial_loadmask (uint64_t num_to_read)
7277 {
7378 auto mask = ((0x1ull << num_to_read) - 0x1ull );
7479 return convert_int_to_avx2_mask_64bit (mask);
7580 }
76- static ymmi_t seti (int v1, int v2, int v3, int v4)
81+ static ymmi_t seti (int64_t v1, int64_t v2, int64_t v3, int64_t v4)
7782 {
7883 return _mm256_set_epi64x (v1, v2, v3, v4);
7984 }
@@ -209,6 +214,9 @@ struct avx2_vector<int64_t> {
209214 {
210215 return v;
211216 }
217+ static bool all_false (opmask_t k){
218+ return _mm256_movemask_pd (_mm256_castsi256_pd (k)) == 0 ;
219+ }
212220};
213221template <>
214222struct avx2_vector <uint64_t > {
@@ -239,12 +247,17 @@ struct avx2_vector<uint64_t> {
239247 {
240248 return _mm256_set1_epi64x (type_max ());
241249 }
250+ static opmask_t knot_opmask (opmask_t x)
251+ {
252+ auto allTrue = _mm256_set1_epi64x (0xFFFF'FFFF'FFFF'FFFF );
253+ return _mm256_xor_si256 (x, allTrue);
254+ }
242255 static opmask_t get_partial_loadmask (uint64_t num_to_read)
243256 {
244257 auto mask = ((0x1ull << num_to_read) - 0x1ull );
245258 return convert_int_to_avx2_mask_64bit (mask);
246259 }
247- static ymmi_t seti (int v1, int v2, int v3, int v4)
260+ static ymmi_t seti (int64_t v1, int64_t v2, int64_t v3, int64_t v4)
248261 {
249262 return _mm256_set_epi64x (v1, v2, v3, v4);
250263 }
@@ -378,6 +391,9 @@ struct avx2_vector<uint64_t> {
378391 {
379392 return v;
380393 }
394+ static bool all_false (opmask_t k){
395+ return _mm256_movemask_pd (_mm256_castsi256_pd (k)) == 0 ;
396+ }
381397};
382398
383399/*
@@ -421,6 +437,11 @@ struct avx2_vector<double> {
421437 {
422438 return _mm256_set1_pd (type_max ());
423439 }
440+ static opmask_t knot_opmask (opmask_t x)
441+ {
442+ auto allTrue = _mm256_set1_epi64x (0xFFFF'FFFF'FFFF'FFFF );
443+ return _mm256_xor_si256 (x, allTrue);
444+ }
424445 static opmask_t get_partial_loadmask (uint64_t num_to_read)
425446 {
426447 auto mask = ((0x1ull << num_to_read) - 0x1ull );
@@ -440,7 +461,7 @@ struct avx2_vector<double> {
440461 static_assert (type == (0x01 | 0x80 ), " should not reach here" );
441462 }
442463 }
443- static ymmi_t seti (int v1, int v2, int v3, int v4)
464+ static ymmi_t seti (int64_t v1, int64_t v2, int64_t v3, int64_t v4)
444465 {
445466 return _mm256_set_epi64x (v1, v2, v3, v4);
446467 }
@@ -571,6 +592,9 @@ struct avx2_vector<double> {
571592 {
572593 return _mm256_castpd_si256 (v);
573594 }
595+ static bool all_false (opmask_t k){
596+ return _mm256_movemask_pd (_mm256_castsi256_pd (k)) == 0 ;
597+ }
574598};
575599
576600struct avx2_64bit_swizzle_ops {
0 commit comments