|
17 | 17 |
|
18 | 18 | package org.apache.spark.sql.hive.execution |
19 | 19 |
|
20 | | -import java.io.IOException |
| 20 | +import java.io.{File, IOException} |
21 | 21 | import java.net.URI |
22 | 22 | import java.text.SimpleDateFormat |
23 | 23 | import java.util.{Date, Locale, Random} |
@@ -97,12 +97,24 @@ case class InsertIntoHiveTable( |
97 | 97 | val inputPathUri: URI = inputPath.toUri |
98 | 98 | val inputPathName: String = inputPathUri.getPath |
99 | 99 | val fs: FileSystem = inputPath.getFileSystem(hadoopConf) |
100 | | - val stagingPathName: String = |
| 100 | + var stagingPathName: String = |
101 | 101 | if (inputPathName.indexOf(stagingDir) == -1) { |
102 | 102 | new Path(inputPathName, stagingDir).toString |
103 | 103 | } else { |
104 | 104 | inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length) |
105 | 105 | } |
| 106 | + |
| 107 | + // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the |
| 108 | + // staging directory needs to avoid being deleted when users set hive.exec.stagingdir |
| 109 | + // under the table directory. |
| 110 | + if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) && |
| 111 | + !stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) { |
| 112 | + logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " + |
| 113 | + "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " + |
| 114 | + "directory.") |
| 115 | + stagingPathName = new Path(inputPathName, ".hive-staging").toString |
| 116 | + } |
| 117 | + |
106 | 118 | val dir: Path = |
107 | 119 | fs.makeQualified( |
108 | 120 | new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID)) |
|
0 commit comments