Skip to content

Commit e0ff530

Browse files
fenzhuGitHub Enterprise
authored andcommitted
[CARMEL-6280] Data download query stuck in job commit phase (#1084)
* [CARMEL-6280] Data download query stuck in job commit phase * Add more.
1 parent aa62055 commit e0ff530

File tree

1 file changed

+51
-23
lines changed

1 file changed

+51
-23
lines changed

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkDownloadDataOperation.scala

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -387,12 +387,14 @@ private[hive] class SparkDownloadDataOperation(
387387
(output, result.schema)
388388
}
389389

390-
result.select(castCols: _*).write
391-
.options(writeOptions)
392-
.option("header", "false")
393-
.format(outputFormat)
394-
.mode(SaveMode.Overwrite)
395-
.save(step1Path.toString)
390+
withCommitAlgorithmV2 {
391+
result.select(castCols: _*).write
392+
.options(writeOptions)
393+
.option("header", "false")
394+
.format(outputFormat)
395+
.mode(SaveMode.Overwrite)
396+
.save(step1Path.toString)
397+
}
396398
val contentSummary = fs.getContentSummary(step1Path)
397399
val dataSize = contentSummary.getLength
398400
val fileCount = contentSummary.getFileCount
@@ -419,18 +421,20 @@ private[hive] class SparkDownloadDataOperation(
419421
}
420422

421423
if (!isSortable && coalesceNum > 0) {
422-
sqlContext.read
423-
.schema(readSchema)
424-
.format(outputFormat)
425-
.options(writeOptions)
426-
.option("header", "false")
427-
.load(step1Path.toString)
428-
.coalesce(coalesceNum)
429-
.write
430-
.options(writeOptions)
431-
.format(outputFormat)
432-
.mode(SaveMode.Overwrite)
433-
.save(step2Path.toString)
424+
withCommitAlgorithmV2 {
425+
sqlContext.read
426+
.schema(readSchema)
427+
.format(outputFormat)
428+
.options(writeOptions)
429+
.option("header", "false")
430+
.load(step1Path.toString)
431+
.coalesce(coalesceNum)
432+
.write
433+
.options(writeOptions)
434+
.format(outputFormat)
435+
.mode(SaveMode.Overwrite)
436+
.save(step2Path.toString)
437+
}
434438

435439
step2Path
436440
} else {
@@ -483,11 +487,13 @@ private[hive] class SparkDownloadDataOperation(
483487
result.repartition()
484488
}
485489

486-
writePlan.write
487-
.options(writeOptions)
488-
.format(outputFormat)
489-
.mode(SaveMode.Overwrite)
490-
.save(outputPath.toString)
490+
withCommitAlgorithmV2 {
491+
writePlan.write
492+
.options(writeOptions)
493+
.format(outputFormat)
494+
.mode(SaveMode.Overwrite)
495+
.save(outputPath.toString)
496+
}
491497

492498
val contentSummary = fs.getContentSummary(outputPath)
493499
val dataSize = contentSummary.getLength
@@ -663,6 +669,28 @@ private[hive] class SparkDownloadDataOperation(
663669
}
664670
sqlContext.sparkContext.closeJobGroup(statementId)
665671
}
672+
673+
private def withCommitAlgorithmV2[T](f: => T): T = {
674+
val originalFileOutputCommitterAlgorithm = sqlContext.sessionState.conf.getConfString(
675+
org.apache.hadoop.mapreduce.lib.output.
676+
FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, "1")
677+
if (!originalFileOutputCommitterAlgorithm.equals("2")) {
678+
sqlContext.sessionState.conf.setConfString(
679+
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.
680+
FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, "2")
681+
logInfo("Set file output committer algorithm as version 2 for download")
682+
}
683+
val res = f
684+
if (!originalFileOutputCommitterAlgorithm.equals("2")) {
685+
sqlContext.sessionState.conf.setConfString(
686+
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.
687+
FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
688+
originalFileOutputCommitterAlgorithm)
689+
logInfo(s"Set file output committer algorithm " +
690+
s"back to version $originalFileOutputCommitterAlgorithm")
691+
}
692+
res
693+
}
666694
}
667695

668696
object SparkDownloadDataOperation {

0 commit comments

Comments
 (0)