@@ -131,16 +131,35 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
131131
132132 /**
133133 * Return a subset of this RDD sampled by key (via stratified sampling).
134+ *
135+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
136+ * `fractions`, a key to sampling rate map.
137+ *
138+ * If `exact` is set to false, create the sample via simple random sampling, with one pass
139+ * over the RDD, to produce a sample of size that's approximately equal to the sum of
140+ * math.ceil(numItems * samplingRate) over all key values; otherwise, use additional passes over
141+ * the RDD to create a sample size that's exactly equal to the sum of
142+ * math.ceil(numItems * samplingRate) over all key values.
134143 */
135144 def sampleByKey (withReplacement : Boolean ,
136145 fractions : JMap [K , Double ],
137146 exact : Boolean ,
138147 seed : Long ): JavaPairRDD [K , V ] =
139148 new JavaPairRDD [K , V ](rdd.sampleByKey(withReplacement, fractions, exact, seed))
140149
141-
142150 /**
143151 * Return a subset of this RDD sampled by key (via stratified sampling).
152+ *
153+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
154+ * `fractions`, a key to sampling rate map.
155+ *
156+ * If `exact` is set to false, create the sample via simple random sampling, with one pass
157+ * over the RDD, to produce a sample of size that's approximately equal to the sum of
158+ * math.ceil(numItems * samplingRate) over all key values; otherwise, use additional passes over
159+ * the RDD to create a sample size that's exactly equal to the sum of
160+ * math.ceil(numItems * samplingRate) over all key values.
161+ *
162+ * Use Utils.random.nextLong as the default seed for the random number generator
144163 */
145164 def sampleByKey (withReplacement : Boolean ,
146165 fractions : JMap [K , Double ],
@@ -149,17 +168,33 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
149168
150169 /**
151170 * Return a subset of this RDD sampled by key (via stratified sampling).
171+ *
172+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
173+ * `fractions`, a key to sampling rate map.
174+ *
175+ * Produce a sample of size that's approximately equal to the sum of
176+ * math.ceil(numItems * samplingRate) over all key values with one pass over the RDD via
177+ * simple random sampling.
152178 */
153179 def sampleByKey (withReplacement : Boolean ,
154180 fractions : JMap [K , Double ],
155181 seed : Long ): JavaPairRDD [K , V ] =
156- sampleByKey(withReplacement, fractions, true , seed)
182+ sampleByKey(withReplacement, fractions, false , seed)
157183
158184 /**
159185 * Return a subset of this RDD sampled by key (via stratified sampling).
186+ *
187+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
188+ * `fractions`, a key to sampling rate map.
189+ *
190+ * Produce a sample of size that's approximately equal to the sum of
191+ * math.ceil(numItems * samplingRate) over all key values with one pass over the RDD via
192+ * simple random sampling.
193+ *
194+ * Use Utils.random.nextLong as the default seed for the random number generator
160195 */
161196 def sampleByKey (withReplacement : Boolean , fractions : JMap [K , Double ]): JavaPairRDD [K , V ] =
162- sampleByKey(withReplacement, fractions, true , Utils .random.nextLong)
197+ sampleByKey(withReplacement, fractions, false , Utils .random.nextLong)
163198
164199 /**
165200 * Return the union of this RDD and another one. Any identical elements will appear multiple
0 commit comments