Skip to content

Commit ec395c8

Browse files
committed
Ignore benchmark (again) and fix docs
1 parent 034bf10 commit ec395c8

File tree

2 files changed

+53
-18
lines changed

2 files changed

+53
-18
lines changed

core/src/main/scala/org/apache/spark/util/collection/Sorter.java

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
import java.util.Comparator;
2323

2424
/**
25-
* A port of the OpenJDK 6 Arrays.sort(Object[]) function, which utilizes a simple merge sort.
26-
* This has been kept in Java with the original style in order to match very closely with the JDK
27-
* source code, and thus be easy to verify correctness.
25+
* A port of the Android Timsort class, which utilizes a "stable, adaptive, iterative mergesort."
26+
* See the method comment on sort() for more details.
27+
*
28+
* This has been kept in Java with the original style in order to match very closely with the
29+
* Anroid source code, and thus be easy to verify correctness.
2830
*
2931
* The purpose of the port is to generalize the interface to the sort to accept input data formats
3032
* besides simple arrays where every element is sorted individually. For instance, the AppendOnlyMap
@@ -58,6 +60,39 @@ public Sorter(SortDataFormat<K, Buffer> sortDataFormat) {
5860
this.s = sortDataFormat;
5961
}
6062

63+
/**
64+
* A stable, adaptive, iterative mergesort that requires far fewer than
65+
* n lg(n) comparisons when running on partially sorted arrays, while
66+
* offering performance comparable to a traditional mergesort when run
67+
* on random arrays. Like all proper mergesorts, this sort is stable and
68+
* runs O(n log n) time (worst case). In the worst case, this sort requires
69+
* temporary storage space for n/2 object references; in the best case,
70+
* it requires only a small constant amount of space.
71+
*
72+
* This implementation was adapted from Tim Peters's list sort for
73+
* Python, which is described in detail here:
74+
*
75+
* http://svn.python.org/projects/python/trunk/Objects/listsort.txt
76+
*
77+
* Tim's C code may be found here:
78+
*
79+
* http://svn.python.org/projects/python/trunk/Objects/listobject.c
80+
*
81+
* The underlying techniques are described in this paper (and may have
82+
* even earlier origins):
83+
*
84+
* "Optimistic Sorting and Information Theoretic Complexity"
85+
* Peter McIlroy
86+
* SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
87+
* pp 467-474, Austin, Texas, 25-27 January 1993.
88+
*
89+
* While the API to this class consists solely of static methods, it is
90+
* (privately) instantiable; a TimSort instance holds the state of an ongoing
91+
* sort, assuming the input array is large enough to warrant the full-blown
92+
* TimSort. Small arrays are sorted in place, using a binary insertion sort.
93+
*
94+
* @author Josh Bloch
95+
*/
6196
void sort(Buffer a, int lo, int hi, Comparator<? super K> c) {
6297
assert c != null;
6398

core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class SorterSuite extends FunSuite {
7474
* Note that the Java implementation varies tremendously between Java 6 and Java 7, when
7575
* the Java sort changed from merge sort to Timsort.
7676
*/
77-
test("Sorter benchmark") {
77+
ignore("Sorter benchmark") {
7878

7979
/** Runs an experiment several times. */
8080
def runExperiment(name: String)(f: => Unit): Unit = {
@@ -96,21 +96,9 @@ class SorterSuite extends FunSuite {
9696
val numElements = 25000000 // 25 mil
9797
val rand = new XORShiftRandom(123)
9898

99-
// Test primitive sort on float array
100-
val primitiveKeys = Array.tabulate[Float](numElements) { i => rand.nextFloat() }
101-
runExperiment("Java Arrays.sort() on primitive keys") {
102-
Arrays.sort(primitiveKeys)
103-
}
104-
105-
// Test non-primitive sort on float array
10699
val keys = Array.tabulate[JFloat](numElements) { i =>
107100
new JFloat(rand.nextFloat())
108101
}
109-
runExperiment("Java Arrays.sort()") {
110-
Arrays.sort(keys, new Comparator[JFloat] {
111-
override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
112-
})
113-
}
114102

115103
// Test our key-value pairs where each element is a Tuple2[Float, Integer)
116104
val kvTupleArray = Array.tabulate[AnyRef](numElements) { i =>
@@ -123,17 +111,29 @@ class SorterSuite extends FunSuite {
123111
})
124112
}
125113

126-
// Test our Sorter where each element alternates between Float and Integer, non-primitive.
114+
// Test our Sorter where each element alternates between Float and Integer, non-primitive
127115
val keyValueArray = Array.tabulate[AnyRef](numElements * 2) { i =>
128116
if (i % 2 == 0) keys(i / 2) else new Integer(i / 2)
129117
}
130-
131118
val sorter = new Sorter(new KVArraySortDataFormat[JFloat, AnyRef])
132119
runExperiment("KV-sort using Sorter") {
133120
sorter.sort(keyValueArray, 0, keys.length, new Comparator[JFloat] {
134121
override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
135122
})
136123
}
124+
125+
// Test non-primitive sort on float array
126+
runExperiment("Java Arrays.sort()") {
127+
Arrays.sort(keys, new Comparator[JFloat] {
128+
override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
129+
})
130+
}
131+
132+
// Test primitive sort on float array
133+
val primitiveKeys = Array.tabulate[Float](numElements) { i => rand.nextFloat() }
134+
runExperiment("Java Arrays.sort() on primitive keys") {
135+
Arrays.sort(primitiveKeys)
136+
}
137137
}
138138
}
139139

0 commit comments

Comments
 (0)