Skip to content

Commit 51a387a

Browse files
Mridul Muralidharanmateiz
authored andcommitted
SPARK-1586 Windows build fixes
Unfortunately, this is not exhaustive - particularly hive tests still fail due to path issues. Author: Mridul Muralidharan <[email protected]> This patch had conflicts when merged, resolved by Committer: Matei Zaharia <[email protected]> Closes #505 from mridulm/windows_fixes and squashes the following commits: ef12283 [Mridul Muralidharan] Move to org.apache.commons.lang3 for StringEscapeUtils. Earlier version was buggy appparently cdae406 [Mridul Muralidharan] Remove leaked changes from > 2G fix branch 3267f4b [Mridul Muralidharan] Fix build failures 35b277a [Mridul Muralidharan] Fix Scalastyle failures bc69d14 [Mridul Muralidharan] Change from hardcoded path separator 10c4d78 [Mridul Muralidharan] Use explicit encoding while using getBytes 1337abd [Mridul Muralidharan] fix classpath while running in windows (cherry picked from commit 968c018) Signed-off-by: Matei Zaharia <[email protected]>
1 parent 7b6d774 commit 51a387a

File tree

21 files changed

+185
-116
lines changed

21 files changed

+185
-116
lines changed

bin/compute-classpath.cmd

Lines changed: 88 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,88 @@
1-
@echo off
2-
3-
rem
4-
rem Licensed to the Apache Software Foundation (ASF) under one or more
5-
rem contributor license agreements. See the NOTICE file distributed with
6-
rem this work for additional information regarding copyright ownership.
7-
rem The ASF licenses this file to You under the Apache License, Version 2.0
8-
rem (the "License"); you may not use this file except in compliance with
9-
rem the License. You may obtain a copy of the License at
10-
rem
11-
rem http://www.apache.org/licenses/LICENSE-2.0
12-
rem
13-
rem Unless required by applicable law or agreed to in writing, software
14-
rem distributed under the License is distributed on an "AS IS" BASIS,
15-
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16-
rem See the License for the specific language governing permissions and
17-
rem limitations under the License.
18-
rem
19-
20-
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
21-
rem script and the ExecutorRunner in standalone cluster mode.
22-
23-
set SCALA_VERSION=2.10
24-
25-
rem Figure out where the Spark framework is installed
26-
set FWDIR=%~dp0..\
27-
28-
rem Load environment variables from conf\spark-env.cmd, if it exists
29-
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
30-
31-
rem Build up classpath
32-
set CLASSPATH=%FWDIR%conf
33-
if exist "%FWDIR%RELEASE" (
34-
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
35-
set ASSEMBLY_JAR=%%d
36-
)
37-
) else (
38-
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
39-
set ASSEMBLY_JAR=%%d
40-
)
41-
)
42-
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
43-
44-
if "x%SPARK_TESTING%"=="x1" (
45-
rem Add test clases to path
46-
set CLASSPATH=%CLASSPATH%;%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
47-
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
48-
set CLASSPATH=%CLASSPATH%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
49-
set CLASSPATH=%CLASSPATH%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
50-
set CLASSPATH=%CLASSPATH%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
51-
)
52-
53-
rem Add hadoop conf dir - else FileSystem.*, etc fail
54-
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
55-
rem the configurtion files.
56-
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
57-
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
58-
:no_hadoop_conf_dir
59-
60-
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
61-
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
62-
:no_yarn_conf_dir
63-
64-
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
65-
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
66-
67-
echo %CLASSPATH%
68-
69-
:exit
1+
@echo off
2+
3+
rem
4+
rem Licensed to the Apache Software Foundation (ASF) under one or more
5+
rem contributor license agreements. See the NOTICE file distributed with
6+
rem this work for additional information regarding copyright ownership.
7+
rem The ASF licenses this file to You under the Apache License, Version 2.0
8+
rem (the "License"); you may not use this file except in compliance with
9+
rem the License. You may obtain a copy of the License at
10+
rem
11+
rem http://www.apache.org/licenses/LICENSE-2.0
12+
rem
13+
rem Unless required by applicable law or agreed to in writing, software
14+
rem distributed under the License is distributed on an "AS IS" BASIS,
15+
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
rem See the License for the specific language governing permissions and
17+
rem limitations under the License.
18+
rem
19+
20+
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
21+
rem script and the ExecutorRunner in standalone cluster mode.
22+
23+
set SCALA_VERSION=2.10
24+
25+
rem Figure out where the Spark framework is installed
26+
set FWDIR=%~dp0..\
27+
28+
rem Load environment variables from conf\spark-env.cmd, if it exists
29+
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
30+
31+
rem Build up classpath
32+
set CLASSPATH=%FWDIR%conf
33+
if exist "%FWDIR%RELEASE" (
34+
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
35+
set ASSEMBLY_JAR=%%d
36+
)
37+
) else (
38+
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
39+
set ASSEMBLY_JAR=%%d
40+
)
41+
)
42+
43+
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
44+
45+
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
46+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
47+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
48+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
49+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
50+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
51+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
52+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
53+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
54+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
55+
56+
set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
57+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
58+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
59+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
60+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
61+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
62+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
63+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
64+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
65+
66+
if "x%SPARK_TESTING%"=="x1" (
67+
rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
68+
rem so that local compilation takes precedence over assembled jar
69+
set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
70+
)
71+
72+
rem Add hadoop conf dir - else FileSystem.*, etc fail
73+
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
74+
rem the configurtion files.
75+
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
76+
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
77+
:no_hadoop_conf_dir
78+
79+
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
80+
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
81+
:no_yarn_conf_dir
82+
83+
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
84+
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
85+
86+
echo %CLASSPATH%
87+
88+
:exit

core/src/main/scala/org/apache/spark/SparkSaslClient.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,10 @@ private[spark] class SparkSaslClient(securityMgr: SecurityManager) extends Logg
111111
CallbackHandler {
112112

113113
private val userName: String =
114-
SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes())
114+
SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes("utf-8"))
115115
private val secretKey = securityMgr.getSecretKey()
116-
private val userPassword: Array[Char] =
117-
SparkSaslServer.encodePassword(if (secretKey != null) secretKey.getBytes() else "".getBytes())
116+
private val userPassword: Array[Char] = SparkSaslServer.encodePassword(
117+
if (secretKey != null) secretKey.getBytes("utf-8") else "".getBytes("utf-8"))
118118

119119
/**
120120
* Implementation used to respond to SASL request from the server.

core/src/main/scala/org/apache/spark/SparkSaslServer.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ private[spark] class SparkSaslServer(securityMgr: SecurityManager) extends Loggi
8989
extends CallbackHandler {
9090

9191
private val userName: String =
92-
SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes())
92+
SparkSaslServer.encodeIdentifier(securityMgr.getSaslUser().getBytes("utf-8"))
9393

9494
override def handle(callbacks: Array[Callback]) {
9595
logDebug("In the sasl server callback handler")
@@ -101,7 +101,7 @@ private[spark] class SparkSaslServer(securityMgr: SecurityManager) extends Loggi
101101
case pc: PasswordCallback => {
102102
logDebug("handle: SASL server callback: setting userPassword")
103103
val password: Array[Char] =
104-
SparkSaslServer.encodePassword(securityMgr.getSecretKey().getBytes())
104+
SparkSaslServer.encodePassword(securityMgr.getSecretKey().getBytes("utf-8"))
105105
pc.setPassword(password)
106106
}
107107
case rc: RealmCallback => {
@@ -159,7 +159,7 @@ private[spark] object SparkSaslServer {
159159
* @return Base64-encoded string
160160
*/
161161
def encodeIdentifier(identifier: Array[Byte]): String = {
162-
new String(Base64.encodeBase64(identifier))
162+
new String(Base64.encodeBase64(identifier), "utf-8")
163163
}
164164

165165
/**
@@ -168,7 +168,7 @@ private[spark] object SparkSaslServer {
168168
* @return password as a char array.
169169
*/
170170
def encodePassword(password: Array[Byte]): Array[Char] = {
171-
new String(Base64.encodeBase64(password)).toCharArray()
171+
new String(Base64.encodeBase64(password), "utf-8").toCharArray()
172172
}
173173
}
174174

core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private[spark] class PythonRDD[T: ClassTag](
187187
val exLength = stream.readInt()
188188
val obj = new Array[Byte](exLength)
189189
stream.readFully(obj)
190-
throw new PythonException(new String(obj), readerException)
190+
throw new PythonException(new String(obj, "utf-8"), readerException)
191191
case SpecialLengths.END_OF_DATA_SECTION =>
192192
// We've finished the data section of the output, but we can still
193193
// read some accumulator updates:

core/src/main/scala/org/apache/spark/network/ReceiverTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ private[spark] object ReceiverTest {
2828

2929
manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
3030
/* println("Received [" + msg + "] from [" + id + "] at " + System.currentTimeMillis) */
31-
val buffer = ByteBuffer.wrap("response".getBytes)
31+
val buffer = ByteBuffer.wrap("response".getBytes("utf-8"))
3232
Some(Message.createBufferMessage(buffer, msg.id))
3333
})
3434
Thread.currentThread.join()

core/src/main/scala/org/apache/spark/network/SenderTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ private[spark] object SenderTest {
5454
val responseStr = manager.sendMessageReliablySync(targetConnectionManagerId, dataMessage)
5555
.map { response =>
5656
val buffer = response.asInstanceOf[BufferMessage].buffers(0)
57-
new String(buffer.array)
57+
new String(buffer.array, "utf-8")
5858
}.getOrElse("none")
5959

6060
val finishTime = System.currentTimeMillis

core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ private[spark] class PipedRDD[T: ClassTag](
8787
// When spark.worker.separated.working.directory option is turned on, each
8888
// task will be run in separate directory. This should be resolve file
8989
// access conflict issue
90-
val taskDirectory = "./tasks/" + java.util.UUID.randomUUID.toString
90+
val taskDirectory = "tasks" + File.separator + java.util.UUID.randomUUID.toString
9191
var workInTaskDirectory = false
9292
logDebug("taskDirectory = " + taskDirectory)
93-
if (separateWorkingDir == true) {
93+
if (separateWorkingDir) {
9494
val currentDir = new File(".")
9595
logDebug("currentDir = " + currentDir.getAbsolutePath())
9696
val taskDirFile = new File(taskDirectory)
@@ -106,13 +106,13 @@ private[spark] class PipedRDD[T: ClassTag](
106106
for (file <- currentDir.list(tasksDirFilter)) {
107107
val fileWithDir = new File(currentDir, file)
108108
Utils.symlink(new File(fileWithDir.getAbsolutePath()),
109-
new File(taskDirectory + "/" + fileWithDir.getName()))
109+
new File(taskDirectory + File.separator + fileWithDir.getName()))
110110
}
111111
pb.directory(taskDirFile)
112112
workInTaskDirectory = true
113113
} catch {
114114
case e: Exception => logError("Unable to setup task working directory: " + e.getMessage +
115-
" (" + taskDirectory + ")")
115+
" (" + taskDirectory + ")", e)
116116
}
117117
}
118118

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import scala.io.Source
3030
import scala.reflect.ClassTag
3131

3232
import com.google.common.io.Files
33+
import org.apache.commons.lang.SystemUtils
3334
import com.google.common.util.concurrent.ThreadFactoryBuilder
3435
import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
3536
import org.json4s._
@@ -45,10 +46,13 @@ import org.apache.spark.serializer.{DeserializationStream, SerializationStream,
4546
*/
4647
private[spark] object Utils extends Logging {
4748

48-
val osName = System.getProperty("os.name")
49-
5049
val random = new Random()
5150

51+
def sparkBin(sparkHome: String, which: String): File = {
52+
val suffix = if (SystemUtils.IS_OS_WINDOWS) ".cmd" else ""
53+
new File(sparkHome + File.separator + "bin", which + suffix)
54+
}
55+
5256
/** Serialize an object using Java serialization */
5357
def serialize[T](o: T): Array[Byte] = {
5458
val bos = new ByteArrayOutputStream()
@@ -605,7 +609,7 @@ private[spark] object Utils extends Logging {
605609
*/
606610
def isSymlink(file: File): Boolean = {
607611
if (file == null) throw new NullPointerException("File must not be null")
608-
if (osName.startsWith("Windows")) return false
612+
if (SystemUtils.IS_OS_WINDOWS) return false
609613
val fileInCanonicalDir = if (file.getParent() == null) {
610614
file
611615
} else {
@@ -1008,10 +1012,18 @@ private[spark] object Utils extends Logging {
10081012
if (dst.isAbsolute()) {
10091013
throw new IOException("Destination must be relative")
10101014
}
1011-
val linkCmd = if (osName.startsWith("Windows")) "copy" else "ln -sf"
1015+
var cmdSuffix = ""
1016+
val linkCmd = if (SystemUtils.IS_OS_WINDOWS) {
1017+
// refer to http://technet.microsoft.com/en-us/library/cc771254.aspx
1018+
cmdSuffix = " /s /e /k /h /y /i"
1019+
"cmd /c xcopy "
1020+
} else {
1021+
cmdSuffix = ""
1022+
"ln -sf "
1023+
}
10121024
import scala.sys.process._
1013-
(linkCmd + " " + src.getAbsolutePath() + " " + dst.getPath()) lines_! ProcessLogger(line =>
1014-
(logInfo(line)))
1025+
(linkCmd + src.getAbsolutePath() + " " + dst.getPath() + cmdSuffix) lines_!
1026+
ProcessLogger(line => (logInfo(line)))
10151027
}
10161028

10171029

core/src/test/java/org/apache/spark/JavaAPISuite.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,8 +610,8 @@ public void textFiles() throws IOException {
610610

611611
@Test
612612
public void wholeTextFiles() throws IOException {
613-
byte[] content1 = "spark is easy to use.\n".getBytes();
614-
byte[] content2 = "spark is also easy to use.\n".getBytes();
613+
byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
614+
byte[] content2 = "spark is also easy to use.\n".getBytes("utf-8");
615615

616616
File tempDir = Files.createTempDir();
617617
String tempDirName = tempDir.getAbsolutePath();

examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ object MQTTPublisher {
5656
val msg: String = "hello mqtt demo for spark streaming"
5757

5858
while (true) {
59-
val message: MqttMessage = new MqttMessage(String.valueOf(msg).getBytes())
59+
val message: MqttMessage = new MqttMessage(String.valueOf(msg).getBytes("utf-8"))
6060
msgtopic.publish(message)
6161
println("Published data. topic: " + msgtopic.getName() + " Message: " + message)
6262
}

0 commit comments

Comments
 (0)