From 99209b267a563201ac0ccc2bf03ea334264fc8cc Mon Sep 17 00:00:00 2001 From: Amirreza Ashouri Date: Sun, 17 Dec 2023 00:08:43 +0330 Subject: [PATCH] [libc++] [sort] Improve performance of std::sort Let in a wider variety of element types that are eligible to be processed by branchless sort functions, like POD structs and pointers. This change causes `sort_stability.pass.cpp` to prefer branchless sort for `EqualType`, but not if the comparator is an arbitrary lambda. So the test is adjusted to always pass a simple comparator. `nth_element_stability.pass.cpp` and `partial_sort_stability.pass.cpp` already use a simple comparator all the time, so they don't need to be adjusted. We considered forbidding the branchless version for unintentionally expensive comparators, e.g. double a[10]; std::sort(a, a+10, std::less()); Derived *a[10]; std::sort(a, a+10, std::less()); In both cases, it is safe, but more expensive than the original PR intended (since the comparator will be invoked more often in the branchless version). However, it's invoked only 0.1% more often, and benchmarks show that the branchless version is still (much) faster despite the extra invocations. So we leave this alone. Sample benchmarks: - https://godbolt.org/z/x1Waz3vbY - https://godbolt.org/z/5Ph3hfqbc Sanity-check that "branchless sort" is used only for trivially copyable types It relies on copying the elements, so copying must be at least possible, and ideally no more expensive than move; i.e., trivial. No functional change. --- libcxx/include/__algorithm/sort.h | 8 ++++++-- libcxx/test/libcxx/algorithms/sort_stability.pass.cpp | 3 +-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 1b878c33c7a16..6f6d7bd87a593 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -28,8 +28,8 @@ #include <__iterator/iterator_traits.h> #include <__type_traits/conditional.h> #include <__type_traits/disjunction.h> -#include <__type_traits/is_arithmetic.h> #include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/is_trivially_copyable.h> #include <__utility/move.h> #include <__utility/pair.h> #include @@ -144,7 +144,7 @@ template ::value && sizeof(_Tp) <= sizeof(void*) && - is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; + is_trivially_copyable<_Tp>::value && __is_simple_comparator<_Compare>::value>; namespace __detail { @@ -158,6 +158,8 @@ template inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; + static_assert(is_trivially_copyable::value, + "Copying must have no side effects distinguishable from swapping/moving"); bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; *__y = __r ? *__y : *__x; @@ -171,6 +173,8 @@ inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; + static_assert(is_trivially_copyable::value, + "Copying must have no side effects distinguishable from swapping/moving"); bool __r = __c(*__z, *__x); value_type __tmp = __r ? *__z : *__x; *__z = __r ? *__x : *__z; diff --git a/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp b/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp index 712f12c255935..8856ca019f721 100644 --- a/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp +++ b/libcxx/test/libcxx/algorithms/sort_stability.pass.cpp @@ -68,8 +68,7 @@ void test_same() { auto snapshot_custom_v = v; std::sort(v.begin(), v.end()); std::sort(snapshot_v.begin(), snapshot_v.end()); - std::sort(snapshot_custom_v.begin(), snapshot_custom_v.end(), - [](const EqualType&, const EqualType&) { return false; }); + std::sort(snapshot_custom_v.begin(), snapshot_custom_v.end(), std::less()); bool all_equal = true; for (int i = 0; i < kSize; ++i) { if (v[i].value != snapshot_v[i].value || v[i].value != snapshot_custom_v[i].value) {