@@ -1496,69 +1496,43 @@ setMethod("zipRDD",
14961496 stop(" Can only zip RDDs which have the same number of partitions." )
14971497 }
14981498
1499- if (getSerializedMode(x ) != getSerializedMode(other ) ||
1500- getSerializedMode(x ) == " byte" ) {
1501- # Append the number of elements in each partition to that partition so that we can later
1502- # check if corresponding partitions of both RDDs have the same number of elements.
1503- #
1504- # Note that this appending also serves the purpose of reserialization, because even if
1505- # any RDD is serialized, we need to reserialize it to make sure its partitions are encoded
1506- # as a single byte array. For example, partitions of an RDD generated from partitionBy()
1507- # may be encoded as multiple byte arrays.
1508- appendLength <- function (part ) {
1509- part [[length(part ) + 1 ]] <- length(part ) + 1
1510- part
1511- }
1512- x <- lapplyPartition(x , appendLength )
1513- other <- lapplyPartition(other , appendLength )
1514- }
1515-
1516- zippedJRDD <- callJMethod(getJRDD(x ), " zip" , getJRDD(other ))
1517- # The zippedRDD's elements are of scala Tuple2 type. The serialized
1518- # flag Here is used for the elements inside the tuples.
1519- serializerMode <- getSerializedMode(x )
1520- zippedRDD <- RDD(zippedJRDD , serializerMode )
1521-
1522- partitionFunc <- function (split , part ) {
1523- len <- length(part )
1524- if (len > 0 ) {
1525- if (serializerMode == " byte" ) {
1526- lengthOfValues <- part [[len ]]
1527- lengthOfKeys <- part [[len - lengthOfValues ]]
1528- stopifnot(len == lengthOfKeys + lengthOfValues )
1529-
1530- # check if corresponding partitions of both RDDs have the same number of elements.
1531- if (lengthOfKeys != lengthOfValues ) {
1532- stop(" Can only zip RDDs with same number of elements in each pair of corresponding partitions." )
1533- }
1534-
1535- if (lengthOfKeys > 1 ) {
1536- keys <- part [1 : (lengthOfKeys - 1 )]
1537- values <- part [(lengthOfKeys + 1 ) : (len - 1 )]
1538- } else {
1539- keys <- list ()
1540- values <- list ()
1541- }
1542- } else {
1543- # Keys, values must have same length here, because this has
1544- # been validated inside the JavaRDD.zip() function.
1545- keys <- part [c(TRUE , FALSE )]
1546- values <- part [c(FALSE , TRUE )]
1547- }
1548- mapply(
1549- function (k , v ) {
1550- list (k , v )
1551- },
1552- keys ,
1553- values ,
1554- SIMPLIFY = FALSE ,
1555- USE.NAMES = FALSE )
1556- } else {
1557- part
1558- }
1559- }
1499+ rdds <- appendPartitionLengths(x , other )
1500+ jrdd <- callJMethod(getJRDD(rdds [[1 ]]), " zip" , getJRDD(rdds [[2 ]]))
1501+ # The jrdd's elements are of scala Tuple2 type. The serialized
1502+ # flag here is used for the elements inside the tuples.
1503+ rdd <- RDD(jrdd , getSerializedMode(rdds [[1 ]]))
1504+
1505+ mergePartitions(rdd , TRUE )
1506+ })
15601507
1561- PipelinedRDD(zippedRDD , partitionFunc )
1508+ # ' Cartesian product of this RDD and another one.
1509+ # '
1510+ # ' Return the Cartesian product of this RDD and another one,
1511+ # ' that is, the RDD of all pairs of elements (a, b) where a
1512+ # ' is in this and b is in other.
1513+ # '
1514+ # ' @param x An RDD.
1515+ # ' @param other An RDD.
1516+ # ' @return A new RDD which is the Cartesian product of these two RDDs.
1517+ # ' @examples
1518+ # '\dontrun{
1519+ # ' sc <- sparkR.init()
1520+ # ' rdd <- parallelize(sc, 1:2)
1521+ # ' sortByKey(cartesian(rdd, rdd))
1522+ # ' # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
1523+ # '}
1524+ # ' @rdname cartesian
1525+ # ' @aliases cartesian,RDD,RDD-method
1526+ setMethod ("cartesian ",
1527+ signature(x = " RDD" , other = " RDD" ),
1528+ function (x , other ) {
1529+ rdds <- appendPartitionLengths(x , other )
1530+ jrdd <- callJMethod(getJRDD(rdds [[1 ]]), " cartesian" , getJRDD(rdds [[2 ]]))
1531+ # The jrdd's elements are of scala Tuple2 type. The serialized
1532+ # flag here is used for the elements inside the tuples.
1533+ rdd <- RDD(jrdd , getSerializedMode(rdds [[1 ]]))
1534+
1535+ mergePartitions(rdd , FALSE )
15621536 })
15631537
15641538# ' Subtract an RDD with another RDD.
0 commit comments