From 479d7e40bb99147045add38865e5872e21bb840b Mon Sep 17 00:00:00 2001 From: echo567 Date: Sun, 16 Nov 2025 19:01:30 +0800 Subject: [PATCH] fix(spark): fix arrow batch converter error --- .../execution/arrow/KyuubiArrowConverters.scala | 15 ++++++--------- .../spark/sql/kyuubi/SparkDatasetHelper.scala | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala index e13653b01cb..29eb53cd74e 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala @@ -275,18 +275,15 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging { Utils.tryWithSafeFinally { // Always write the first row. - while (rowIter.hasNext && ( + while (rowIter.hasNext && (rowCount < limit || limit < 0) && ( // For maxBatchSize and maxRecordsPerBatch, respect whatever smaller. // If the size in bytes is positive (set properly), always write the first row. - rowCountInLastBatch == 0 && maxEstimatedBatchSize > 0 || + rowCountInLastBatch == 0 || // If the size in bytes of rows are 0 or negative, unlimit it. - estimatedBatchSize <= 0 || - estimatedBatchSize < maxEstimatedBatchSize || - // If the size of rows are 0 or negative, unlimit it. - maxRecordsPerBatch <= 0 || - rowCountInLastBatch < maxRecordsPerBatch || - rowCount < limit || - limit < 0)) { + ((estimatedBatchSize <= 0 || estimatedBatchSize < maxEstimatedBatchSize) && + // If the size of rows are 0 or negative, unlimit it. + (maxRecordsPerBatch <= 0 || rowCountInLastBatch < maxRecordsPerBatch)) + )) { val row = rowIter.next() arrowWriter.write(row) estimatedBatchSize += (row match { diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala index 73e7f779934..7e9088c7df5 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala @@ -165,7 +165,7 @@ object SparkDatasetHelper extends Logging { .getConf .getOption("spark.connect.grpc.arrow.maxBatchSize") .orElse(Option("4m")) - .map(JavaUtils.byteStringAs(_, ByteUnit.MiB)) + .map(JavaUtils.byteStringAs(_, ByteUnit.BYTE)) .get }