@@ -224,21 +224,18 @@ int avx2_double_compressstore32(void *left_addr,
224224 typename avx2_vector<T>::reg_t reg)
225225{
226226 using vtype = avx2_vector<T>;
227- const __m256i oxff = _mm256_set1_epi32 (0xFFFFFFFF );
228227
229228 T *leftStore = (T *)left_addr;
230229 T *rightStore = (T *)right_addr;
231230
232231 int32_t shortMask = convert_avx2_mask_to_int (k);
233232 const __m256i &perm = _mm256_loadu_si256 (
234233 (const __m256i *)avx2_compressstore_lut32_perm[shortMask].data ());
235- const __m256i &left = _mm256_loadu_si256 (
236- (const __m256i *)avx2_compressstore_lut32_left[shortMask].data ());
237234
238235 typename vtype::reg_t temp = vtype::permutevar (reg, perm);
239236
240- vtype::mask_storeu (leftStore, left , temp);
241- vtype::mask_storeu (rightStore, _mm256_xor_si256 (oxff, left) , temp);
237+ vtype::storeu (leftStore, temp);
238+ vtype::storeu (rightStore, temp);
242239
243240 return _mm_popcnt_u32 (shortMask);
244241}
@@ -250,22 +247,19 @@ int32_t avx2_double_compressstore64(void *left_addr,
250247 typename avx2_vector<T>::reg_t reg)
251248{
252249 using vtype = avx2_vector<T>;
253- const __m256i oxff = _mm256_set1_epi32 (0xFFFFFFFF );
254250
255251 T *leftStore = (T *)left_addr;
256252 T *rightStore = (T *)right_addr;
257253
258254 int32_t shortMask = convert_avx2_mask_to_int_64bit (k);
259255 const __m256i &perm = _mm256_loadu_si256 (
260256 (const __m256i *)avx2_compressstore_lut64_perm[shortMask].data ());
261- const __m256i &left = _mm256_loadu_si256 (
262- (const __m256i *)avx2_compressstore_lut64_left[shortMask].data ());
263257
264258 typename vtype::reg_t temp = vtype::cast_from (
265259 _mm256_permutevar8x32_epi32 (vtype::cast_to (reg), perm));
266260
267- vtype::mask_storeu (leftStore, left , temp);
268- vtype::mask_storeu (rightStore, _mm256_xor_si256 (oxff, left) , temp);
261+ vtype::storeu (leftStore, temp);
262+ vtype::storeu (rightStore, temp);
269263
270264 return _mm_popcnt_u32 (shortMask);
271265}
0 commit comments