@@ -384,21 +384,25 @@ X86_SIMD_SORT_INLINE type_t get_pivot_64bit(type_t *arr,
384384                                            const  int64_t  left,
385385                                            const  int64_t  right)
386386{
387-     //  median of 8 
387+     //  median of 8x8 elements 
388388    int64_t  size = (right - left) / 8 ;
389389    using  zmm_t  = typename  vtype::zmm_t ;
390-     __m512i rand_index = _mm512_set_epi64 (left + size,
391-                                           left + 2  * size,
392-                                           left + 3  * size,
393-                                           left + 4  * size,
394-                                           left + 5  * size,
395-                                           left + 6  * size,
396-                                           left + 7  * size,
397-                                           left + 8  * size);
398-     zmm_t  rand_vec = vtype::template  i64gather<sizeof (type_t )>(rand_index, arr);
390+     zmm_t  v[8 ];
391+     for  (int64_t  ii = 0 ; ii < 8 ; ++ii) {
392+         v[ii] = vtype::loadu (arr + left + ii*size);
393+     }
394+     COEX<vtype>(v[0 ], v[1 ]); COEX<vtype>(v[2 ], v[3 ]); /*  step 1 */ 
395+     COEX<vtype>(v[4 ], v[5 ]); COEX<vtype>(v[6 ], v[7 ]);
396+     COEX<vtype>(v[0 ], v[2 ]); COEX<vtype>(v[1 ], v[3 ]); /*  step 2 */ 
397+     COEX<vtype>(v[4 ], v[6 ]); COEX<vtype>(v[5 ], v[7 ]);
398+     COEX<vtype>(v[0 ], v[4 ]); COEX<vtype>(v[1 ], v[2 ]); /*  step 3 */ 
399+     COEX<vtype>(v[5 ], v[6 ]); COEX<vtype>(v[3 ], v[7 ]);
400+     COEX<vtype>(v[1 ], v[5 ]); COEX<vtype>(v[2 ], v[6 ]); /*  step 4 */ 
401+     COEX<vtype>(v[3 ], v[5 ]); COEX<vtype>(v[2 ], v[4 ]); /*  step 5 */ 
402+     COEX<vtype>(v[3 ], v[4 ]);                   /*  step 6 */ 
399403    //  pivot will never be a nan, since there are no nan's!
400-     zmm_t  sort = sort_zmm_64bit<vtype>(rand_vec );
404+     zmm_t  sort = sort_zmm_64bit<vtype>(v[ 3 ] );
401405    return  ((type_t  *)&sort)[4 ];
402406}
403407
404- #endif 
408+ #endif 
0 commit comments