Skip to content

bpo-28685: Optimize sorted() list.sort() with type-specialized comparisons #582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 50 commits into from
Jan 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
89c278f
Added data-aware optimizations to list.sort()
embg Mar 8, 2017
2ce5e5e
Removed trailing whitespace from listsort_impl
embg Mar 8, 2017
7d2f44a
fixed typo
embg Mar 8, 2017
d752fc7
Added myself to Misc/ACKS
embg Mar 8, 2017
e19728e
Made ISLT comment more in line with the current text
embg Mar 8, 2017
7e74c27
Remove newline
embg Mar 8, 2017
9c566b1
untabify
embg Mar 8, 2017
8876e26
removed newline
embg Mar 8, 2017
8accd71
simplified description of the tuple compare
embg Mar 9, 2017
1567801
grammar
embg Mar 9, 2017
3820cdb
Bugfix -- gcc ignored the error, but clang caught it!
embg Mar 9, 2017
201a468
Bugfix -- assertion in unsafe_long_compare was phrased incorrectly.
embg Mar 9, 2017
c2a9df2
fix typo
embg Mar 9, 2017
37b15b8
Added if (v == w) return 1; to all compares
embg Mar 11, 2017
e402948
Merge branch 'fastsort' of https://github.com/embg/cpython into fastsort
embg Mar 11, 2017
ed9b21f
Added if (v == w) return 0; to all compares, apologies for previous c…
embg Mar 11, 2017
acf4c9d
Folded CompareFuncs into MergeState and added safety check to unsafe_…
embg Mar 12, 2017
395bc7d
formatting
embg Mar 12, 2017
e677586
formatting
embg Mar 12, 2017
6070c72
don't need (v==w) for ints/strings
embg Mar 12, 2017
294aa1c
went back to i=0 for tuples; we can't infer == from < and >, even wit…
embg Mar 12, 2017
ba05b2a
move all declarations to top of their blocks
embg Mar 13, 2017
40ba266
typo
embg Mar 13, 2017
a175939
typo
embg Mar 13, 2017
f0dc847
added Py_NotImplemented check to unsafe_object_compare
embg Mar 13, 2017
15f87a2
ACTUALLY moved declarations to the tops of blocks
embg Mar 13, 2017
15f2f01
fix typo
embg Mar 9, 2017
6afa847
Added if (v == w) return 0; to all compares, apologies for previous c…
embg Mar 11, 2017
af7c027
Folded CompareFuncs into MergeState and added safety check to unsafe_…
embg Mar 12, 2017
20716cb
formatting
embg Mar 12, 2017
5960fbe
formatting
embg Mar 12, 2017
804807b
don't need (v==w) for ints/strings
embg Mar 12, 2017
5db7158
went back to i=0 for tuples; we can't infer == from < and >, even wit…
embg Mar 12, 2017
934d83f
move all declarations to top of their blocks
embg Mar 13, 2017
c536ed3
typo
embg Mar 13, 2017
0b85ac5
typo
embg Mar 13, 2017
a12d784
added Py_NotImplemented check to unsafe_object_compare
embg Mar 13, 2017
a54a4e4
ACTUALLY moved declarations to the tops of blocks
embg Mar 13, 2017
20172fb
Merge branch 'fastsort' of https://github.com/embg/cpython into fastsort
embg Mar 13, 2017
862c761
Made tuple compare precisely consistent with PyObject_RichCompareBool…
embg Mar 13, 2017
dd302b5
Added tests
embg Mar 15, 2017
ab3d520
Implemented all of serhiy-storchaka's changes
embg Mar 15, 2017
dba3f27
Removed braces at the end of unsafe_tuple_compare
embg Mar 15, 2017
c796422
Fixed test_safe_object_compare
embg Mar 16, 2017
fa19903
Fix spacing around PyObject *
rhettinger Jan 28, 2018
014fd8f
Merge in changes to master
rhettinger Jan 28, 2018
e4679e2
Add news blurb
rhettinger Jan 28, 2018
3b3ce52
Update listsort.txt for the optimization
rhettinger Jan 29, 2018
afed812
Fix whitespace
rhettinger Jan 29, 2018
ebb4c1f
Clean-up and fix tests for mutation of __class__.
rhettinger Jan 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions Lib/test/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,120 @@ def my_cmp_reversed(x, y):
self.assertEqual(data, copy2)

#==============================================================================
def check_against_PyObject_RichCompareBool(self, L):
## The idea here is to exploit the fact that unsafe_tuple_compare uses
## PyObject_RichCompareBool for the second elements of tuples. So we have,
## for (most) L, sorted(L) == [y[1] for y in sorted([(0,x) for x in L])]
## This will work as long as __eq__ => not __lt__ for all the objects in L,
## which holds for all the types used below.
##
## Testing this way ensures that the optimized implementation remains consistent
## with the naive implementation, even if changes are made to any of the
## richcompares.
##
## This function tests sorting for three lists (it randomly shuffles each one):
## 1. L
## 2. [(x,) for x in L]
## 3. [((x,),) for x in L]

random.seed(0)
random.shuffle(L)
L_1 = L[:]
L_2 = [(x,) for x in L]
L_3 = [((x,),) for x in L]
for L in [L_1, L_2, L_3]:
optimized = sorted(L)
reference = [y[1] for y in sorted([(0,x) for x in L])]
for (opt, ref) in zip(optimized, reference):
self.assertIs(opt, ref)
#note: not assertEqual! We want to ensure *identical* behavior.

class TestOptimizedCompares(unittest.TestCase):
def test_safe_object_compare(self):
heterogeneous_lists = [[0, 'foo'],
[0.0, 'foo'],
[('foo',), 'foo']]
for L in heterogeneous_lists:
self.assertRaises(TypeError, L.sort)
self.assertRaises(TypeError, [(x,) for x in L].sort)
self.assertRaises(TypeError, [((x,),) for x in L].sort)

float_int_lists = [[1,1.1],
[1<<70,1.1],
[1.1,1],
[1.1,1<<70]]
for L in float_int_lists:
check_against_PyObject_RichCompareBool(self, L)

def test_unsafe_object_compare(self):

# This test is by ppperry. It ensures that unsafe_object_compare is
# verifying ms->key_richcompare == tp->richcompare before comparing.

class WackyComparator(int):
def __lt__(self, other):
elem.__class__ = WackyList2
return int.__lt__(self, other)

class WackyList1(list):
pass

class WackyList2(list):
def __lt__(self, other):
raise ValueError

L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
elem = L[-1]
with self.assertRaises(ValueError):
L.sort()

L = [WackyList1([WackyComparator(i), i]) for i in range(10)]
elem = L[-1]
with self.assertRaises(ValueError):
[(x,) for x in L].sort()

# The following test is also by ppperry. It ensures that
# unsafe_object_compare handles Py_NotImplemented appropriately.
class PointlessComparator:
def __lt__(self, other):
return NotImplemented
L = [PointlessComparator(), PointlessComparator()]
self.assertRaises(TypeError, L.sort)
self.assertRaises(TypeError, [(x,) for x in L].sort)

# The following tests go through various types that would trigger
# ms->key_compare = unsafe_object_compare
lists = [list(range(100)) + [(1<<70)],
[str(x) for x in range(100)] + ['\uffff'],
[bytes(x) for x in range(100)],
[cmp_to_key(lambda x,y: x<y)(x) for x in range(100)]]
for L in lists:
check_against_PyObject_RichCompareBool(self, L)

def test_unsafe_latin_compare(self):
check_against_PyObject_RichCompareBool(self, [str(x) for
x in range(100)])

def test_unsafe_long_compare(self):
check_against_PyObject_RichCompareBool(self, [x for
x in range(100)])

def test_unsafe_float_compare(self):
check_against_PyObject_RichCompareBool(self, [float(x) for
x in range(100)])

def test_unsafe_tuple_compare(self):
# This test was suggested by Tim Peters. It verifies that the tuple
# comparison respects the current tuple compare semantics, which do not
# guarantee that x < x <=> (x,) < (x,)
#
# Note that we don't have to put anything in tuples here, because
# the check function does a tuple test automatically.

check_against_PyObject_RichCompareBool(self, [float('nan')]*100)
check_against_PyObject_RichCompareBool(self, [float('nan') for
_ in range(100)])
#==============================================================================

if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ Tiago Gonçalves
Chris Gonnerman
Shelley Gooch
David Goodger
Elliot Gorokhovsky
Hans de Graaff
Tim Graham
Kim Gräsman
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Optimize list.sort() and sorted() by using type specialized comparisons when
possible.
Loading