Skip to content

Commit 5496d99

Browse files
[SPARK-40433][SS][PYTHON] Add toJVMRow in PythonSQLUtils to convert pickled PySpark Row to JVM Row
### What changes were proposed in this pull request? This PR adds toJVMRow in PythonSQLUtils to convert pickled PySpark Row to JVM Row. Co-authored with HyukjinKwon . This is a breakdown PR of #37863. ### Why are the changes needed? This change will be leveraged in [SPARK-40434](https://issues.apache.org/jira/browse/SPARK-40434). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A. We will make sure test suites are constructed via E2E manner under [SPARK-40431](https://issues.apache.org/jira/browse/SPARK-40431). Closes #37891 from HeartSaVioR/SPARK-40433. Lead-authored-by: Jungtaek Lim <[email protected]> Co-authored-by: Hyukjin Kwon <[email protected]> Signed-off-by: Jungtaek Lim <[email protected]>
1 parent 3d14b74 commit 5496d99

File tree

1 file changed

+35
-7
lines changed

1 file changed

+35
-7
lines changed

sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,45 @@ import java.net.Socket
2222
import java.nio.channels.Channels
2323
import java.util.Locale
2424

25-
import net.razorvine.pickle.Pickler
25+
import net.razorvine.pickle.{Pickler, Unpickler}
2626

2727
import org.apache.spark.api.python.DechunkedInputStream
2828
import org.apache.spark.internal.Logging
2929
import org.apache.spark.security.SocketAuthServer
3030
import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession}
31-
import org.apache.spark.sql.catalyst.CatalystTypeConverters
31+
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
3232
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
33+
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
3334
import org.apache.spark.sql.catalyst.expressions._
3435
import org.apache.spark.sql.catalyst.expressions.aggregate._
3536
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
3637
import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
3738
import org.apache.spark.sql.execution.arrow.ArrowConverters
3839
import org.apache.spark.sql.execution.python.EvaluatePython
3940
import org.apache.spark.sql.internal.SQLConf
40-
import org.apache.spark.sql.types.DataType
41+
import org.apache.spark.sql.types.{DataType, StructType}
4142

4243
private[sql] object PythonSQLUtils extends Logging {
43-
private lazy val internalRowPickler = {
44+
private def withInternalRowPickler(f: Pickler => Array[Byte]): Array[Byte] = {
4445
EvaluatePython.registerPicklers()
45-
new Pickler(true, false)
46+
val pickler = new Pickler(true, false)
47+
val ret = try {
48+
f(pickler)
49+
} finally {
50+
pickler.close()
51+
}
52+
ret
53+
}
54+
55+
private def withInternalRowUnpickler(f: Unpickler => Any): Any = {
56+
EvaluatePython.registerPicklers()
57+
val unpickler = new Unpickler
58+
val ret = try {
59+
f(unpickler)
60+
} finally {
61+
unpickler.close()
62+
}
63+
ret
4664
}
4765

4866
def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)
@@ -94,8 +112,18 @@ private[sql] object PythonSQLUtils extends Logging {
94112

95113
def toPyRow(row: Row): Array[Byte] = {
96114
assert(row.isInstanceOf[GenericRowWithSchema])
97-
internalRowPickler.dumps(EvaluatePython.toJava(
98-
CatalystTypeConverters.convertToCatalyst(row), row.schema))
115+
withInternalRowPickler(_.dumps(EvaluatePython.toJava(
116+
CatalystTypeConverters.convertToCatalyst(row), row.schema)))
117+
}
118+
119+
def toJVMRow(
120+
arr: Array[Byte],
121+
returnType: StructType,
122+
deserializer: ExpressionEncoder.Deserializer[Row]): Row = {
123+
val fromJava = EvaluatePython.makeFromJava(returnType)
124+
val internalRow =
125+
fromJava(withInternalRowUnpickler(_.loads(arr))).asInstanceOf[InternalRow]
126+
deserializer(internalRow)
99127
}
100128

101129
def castTimestampNTZToLong(c: Column): Column = Column(CastTimestampNTZToLong(c.expr))

0 commit comments

Comments
 (0)