@@ -316,8 +316,9 @@ def _load_stream_without_unbatching(self, stream):
316316 key_batch_stream = self .key_ser ._load_stream_without_unbatching (stream )
317317 val_batch_stream = self .val_ser ._load_stream_without_unbatching (stream )
318318 for (key_batch , val_batch ) in zip (key_batch_stream , val_batch_stream ):
319- # for correctness with repeated cartesian/zip this must be returned as one batch
320- yield product (key_batch , val_batch )
319+ # for correctness with repeated cartesian/zip this must be returned as
320+ # one batch (a list)
321+ yield list (product (key_batch , val_batch ))
321322
322323 def load_stream (self , stream ):
323324 return chain .from_iterable (self ._load_stream_without_unbatching (stream ))
@@ -346,8 +347,9 @@ def _load_stream_without_unbatching(self, stream):
346347 if len (key_batch ) != len (val_batch ):
347348 raise ValueError ("Can not deserialize PairRDD with different number of items"
348349 " in batches: (%d, %d)" % (len (key_batch ), len (val_batch )))
349- # for correctness with repeated cartesian/zip this must be returned as one batch
350- yield zip (key_batch , val_batch )
350+ # for correctness with repeated cartesian/zip this must be returned as
351+ # one batch (a list)
352+ yield list (zip (key_batch , val_batch ))
351353
352354 def load_stream (self , stream ):
353355 return chain .from_iterable (self ._load_stream_without_unbatching (stream ))
0 commit comments