Skip to content

Commit 9d6c9dc

Browse files
author
Raghuveer Devulapalli
committed
Fix formatting
1 parent 1f101ad commit 9d6c9dc

File tree

2 files changed

+49
-41
lines changed

2 files changed

+49
-41
lines changed

src/avx512-64bit-qsort.hpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -587,22 +587,26 @@ X86_SIMD_SORT_INLINE void sort_256_64bit(type_t *arr, int32_t N)
587587
uint64_t combined_mask;
588588
if (N < 192) {
589589
combined_mask = (0x1ull << (N - 128)) - 0x1ull;
590-
load_mask1 = (combined_mask) & 0xFF;
590+
load_mask1 = (combined_mask)&0xFF;
591591
load_mask2 = (combined_mask >> 8) & 0xFF;
592592
load_mask3 = (combined_mask >> 16) & 0xFF;
593593
load_mask4 = (combined_mask >> 24) & 0xFF;
594594
load_mask5 = (combined_mask >> 32) & 0xFF;
595595
load_mask6 = (combined_mask >> 40) & 0xFF;
596596
load_mask7 = (combined_mask >> 48) & 0xFF;
597597
load_mask8 = (combined_mask >> 56) & 0xFF;
598-
load_mask9 = 0x00; load_mask10 = 0x0;
599-
load_mask11 = 0x00; load_mask12 = 0x00;
600-
load_mask13 = 0x00; load_mask14 = 0x00;
601-
load_mask15 = 0x00; load_mask16 = 0x00;
598+
load_mask9 = 0x00;
599+
load_mask10 = 0x0;
600+
load_mask11 = 0x00;
601+
load_mask12 = 0x00;
602+
load_mask13 = 0x00;
603+
load_mask14 = 0x00;
604+
load_mask15 = 0x00;
605+
load_mask16 = 0x00;
602606
}
603607
else {
604608
combined_mask = (0x1ull << (N - 192)) - 0x1ull;
605-
load_mask9 = (combined_mask) & 0xFF;
609+
load_mask9 = (combined_mask)&0xFF;
606610
load_mask10 = (combined_mask >> 8) & 0xFF;
607611
load_mask11 = (combined_mask >> 16) & 0xFF;
608612
load_mask12 = (combined_mask >> 24) & 0xFF;
@@ -721,7 +725,6 @@ X86_SIMD_SORT_INLINE void sort_256_64bit(type_t *arr, int32_t N)
721725
vtype::mask_storeu(arr + 240, load_mask15, zmm[30]);
722726
vtype::mask_storeu(arr + 248, load_mask16, zmm[31]);
723727
}
724-
725728
}
726729

727730
template <typename vtype, typename type_t>

src/avx512-common-qsort.h

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -255,11 +255,13 @@ static inline int64_t partition_avx512_unrolled(type_t *arr,
255255
type_t *biggest)
256256
{
257257
const int num_unroll = 8;
258-
if (right - left <= 2*num_unroll*vtype::numlanes) {
259-
return partition_avx512<vtype>(arr, left, right, pivot, smallest, biggest);
258+
if (right - left <= 2 * num_unroll * vtype::numlanes) {
259+
return partition_avx512<vtype>(
260+
arr, left, right, pivot, smallest, biggest);
260261
}
261262
/* make array length divisible by 8*vtype::numlanes , shortening the array */
262-
for (int32_t i = ((right - left) % (num_unroll*vtype::numlanes)); i > 0; --i) {
263+
for (int32_t i = ((right - left) % (num_unroll * vtype::numlanes)); i > 0;
264+
--i) {
263265
*smallest = std::min(*smallest, arr[left], comparison_func<vtype>);
264266
*biggest = std::max(*biggest, arr[left], comparison_func<vtype>);
265267
if (!comparison_func<vtype>(arr[left], pivot)) {
@@ -281,17 +283,18 @@ static inline int64_t partition_avx512_unrolled(type_t *arr,
281283
// We will now have atleast 16 registers worth of data to process:
282284
// left and right vtype::numlanes values are partitioned at the end
283285
zmm_t vec_left[num_unroll], vec_right[num_unroll];
284-
#pragma GCC unroll 8
286+
#pragma GCC unroll 8
285287
for (int ii = 0; ii < num_unroll; ++ii) {
286-
vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes*ii);
287-
vec_right[ii] = vtype::loadu(arr + (right - vtype::numlanes*(num_unroll-ii)));
288+
vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes * ii);
289+
vec_right[ii] = vtype::loadu(
290+
arr + (right - vtype::numlanes * (num_unroll - ii)));
288291
}
289292
// store points of the vectors
290293
int64_t r_store = right - vtype::numlanes;
291294
int64_t l_store = left;
292295
// indices for loading the elements
293-
left += num_unroll*vtype::numlanes;
294-
right -= num_unroll*vtype::numlanes;
296+
left += num_unroll * vtype::numlanes;
297+
right -= num_unroll * vtype::numlanes;
295298
while (right - left != 0) {
296299
zmm_t curr_vec[num_unroll];
297300
/*
@@ -300,57 +303,59 @@ static inline int64_t partition_avx512_unrolled(type_t *arr,
300303
* otherwise from the left side
301304
*/
302305
if ((r_store + vtype::numlanes) - right < left - l_store) {
303-
right -= num_unroll*vtype::numlanes;
304-
#pragma GCC unroll 8
306+
right -= num_unroll * vtype::numlanes;
307+
#pragma GCC unroll 8
305308
for (int ii = 0; ii < num_unroll; ++ii) {
306-
curr_vec[ii] = vtype::loadu(arr + right + ii*vtype::numlanes);
309+
curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
307310
}
308311
}
309312
else {
310-
#pragma GCC unroll 8
313+
#pragma GCC unroll 8
311314
for (int ii = 0; ii < num_unroll; ++ii) {
312-
curr_vec[ii] = vtype::loadu(arr + left + ii*vtype::numlanes);
315+
curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
313316
}
314-
left += num_unroll*vtype::numlanes;
317+
left += num_unroll * vtype::numlanes;
315318
}
316-
// partition the current vector and save it on both sides of the array
317-
#pragma GCC unroll 8
319+
// partition the current vector and save it on both sides of the array
320+
#pragma GCC unroll 8
318321
for (int ii = 0; ii < num_unroll; ++ii) {
319322
int32_t amount_ge_pivot
320323
= partition_vec<vtype>(arr,
321324
l_store,
322325
r_store + vtype::numlanes,
323326
curr_vec[ii],
324327
pivot_vec,
325-
&min_vec,pick
328+
&min_vec,
326329
&max_vec);
327330
l_store += (vtype::numlanes - amount_ge_pivot);
328331
r_store -= amount_ge_pivot;
329332
}
330333
}
331334

332-
/* partition and save vec_left[8] and vec_right[8] */
333-
#pragma GCC unroll 8
335+
/* partition and save vec_left[8] and vec_right[8] */
336+
#pragma GCC unroll 8
334337
for (int ii = 0; ii < num_unroll; ++ii) {
335-
int32_t amount_ge_pivot = partition_vec<vtype>(arr,
336-
l_store,
337-
r_store + vtype::numlanes,
338-
vec_left[ii],
339-
pivot_vec,
340-
&min_vec,
341-
&max_vec);
338+
int32_t amount_ge_pivot
339+
= partition_vec<vtype>(arr,
340+
l_store,
341+
r_store + vtype::numlanes,
342+
vec_left[ii],
343+
pivot_vec,
344+
&min_vec,
345+
&max_vec);
342346
l_store += (vtype::numlanes - amount_ge_pivot);
343347
r_store -= amount_ge_pivot;
344348
}
345-
#pragma GCC unroll 8
349+
#pragma GCC unroll 8
346350
for (int ii = 0; ii < num_unroll; ++ii) {
347-
int32_t amount_ge_pivot = partition_vec<vtype>(arr,
348-
l_store,
349-
r_store + vtype::numlanes,
350-
vec_right[ii],
351-
pivot_vec,
352-
&min_vec,
353-
&max_vec);
351+
int32_t amount_ge_pivot
352+
= partition_vec<vtype>(arr,
353+
l_store,
354+
r_store + vtype::numlanes,
355+
vec_right[ii],
356+
pivot_vec,
357+
&min_vec,
358+
&max_vec);
354359
l_store += (vtype::numlanes - amount_ge_pivot);
355360
r_store -= amount_ge_pivot;
356361
}

0 commit comments

Comments
 (0)