From a6b1c60c83902f023e230057a2b0d6ea99a30426 Mon Sep 17 00:00:00 2001 From: qualiu Date: Thu, 25 Aug 2016 18:18:26 +0800 Subject: [PATCH 1/2] Remove double quotes in spark/bin batch files to avoid cutting off arguments that double quoted as contain special character. To simply validate (for example, mysql connection string), cannot just start it : spark-submit.cmd --jars just-to-start "jdbc:mysql://localhost:3306/lzdb?user=guest&password=abc123" my_table After this fix : (1) Keep not working : full path has space like "D:\opengit\spark\bin - Copy\spark-submit.cmd" --jars any-jars "jdbc:mysql://localhost:3306/lzdb?user=guest&password=abc123" my_table (2) Keep working : currently can works. (3) Will work by this fix: argument has quotes if full path no space. By the way, I didn't change the pyspark , R, beeline etc. scripts because they seems work fine for long. What's more in addition, a tool to quickly change the files in spark/bin if you like : https://github.com/qualiu/lzmw (1) Remove quotes : lzmw -i -t "(^cmd.*?/[VCE])\s+\"+(%~dp0\S+\.cmd)\"+" -o "$1 $2" --nf "pyspark|sparkR|beeline|example" -p . -R (2) Add/Restore : lzmw -it "\"*(%~dp0\S+\.cmd)\"*" -o "\"$1\"" -p . -R (3) Or remove the head : lzmw -f "\.cmd$" -it "^cmd /V /E /C " -o "" -p %CD% -R --- bin/spark-class.cmd | 2 +- bin/spark-shell.cmd | 2 +- bin/spark-submit.cmd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/spark-class.cmd b/bin/spark-class.cmd index 3bf3d20cb57b5..19850db9e1e5d 100644 --- a/bin/spark-class.cmd +++ b/bin/spark-class.cmd @@ -20,4 +20,4 @@ rem rem This is the entry point for running a Spark class. To avoid polluting rem the environment, it just launches a new cmd to do the real work. -cmd /V /E /C "%~dp0spark-class2.cmd" %* +cmd /V /E /C %~dp0spark-class2.cmd %* diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd index 991423da6ab99..8f90ba5a0b3b8 100644 --- a/bin/spark-shell.cmd +++ b/bin/spark-shell.cmd @@ -20,4 +20,4 @@ rem rem This is the entry point for running Spark shell. To avoid polluting the rem environment, it just launches a new cmd to do the real work. -cmd /V /E /C "%~dp0spark-shell2.cmd" %* +cmd /V /E /C %~dp0spark-shell2.cmd %* diff --git a/bin/spark-submit.cmd b/bin/spark-submit.cmd index f301606933a95..8f3b84c7b971d 100644 --- a/bin/spark-submit.cmd +++ b/bin/spark-submit.cmd @@ -20,4 +20,4 @@ rem rem This is the entry point for running Spark submit. To avoid polluting the rem environment, it just launches a new cmd to do the real work. -cmd /V /E /C "%~dp0spark-submit2.cmd" %* +cmd /V /E /C %~dp0spark-submit2.cmd %* From 7059639a21138f494e83da5bbfe800dfca7789d8 Mon Sep 17 00:00:00 2001 From: qualiu Date: Fri, 26 Aug 2016 08:17:19 +0800 Subject: [PATCH 2/2] More conservative : Check before adding quotes in spark/bin/spark-submit.cmd files. In fact the same effect as last commit because currently it doesn't work if full path to spark-submit.cmd has space. --- bin/spark-class.cmd | 7 ++++++- bin/spark-shell.cmd | 7 ++++++- bin/spark-submit.cmd | 7 ++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/bin/spark-class.cmd b/bin/spark-class.cmd index 19850db9e1e5d..6a25838ab531f 100644 --- a/bin/spark-class.cmd +++ b/bin/spark-class.cmd @@ -20,4 +20,9 @@ rem rem This is the entry point for running a Spark class. To avoid polluting rem the environment, it just launches a new cmd to do the real work. -cmd /V /E /C %~dp0spark-class2.cmd %* +echo "%~dp0spark-class2.cmd"|findstr /C:" " +if %ERRORLEVEL% EQU 0 ( + cmd /V /E /C "%~dp0spark-class2.cmd" %* +) else ( + cmd /V /E /C %~dp0spark-class2.cmd %* +) diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd index 8f90ba5a0b3b8..47de25fb143e4 100644 --- a/bin/spark-shell.cmd +++ b/bin/spark-shell.cmd @@ -20,4 +20,9 @@ rem rem This is the entry point for running Spark shell. To avoid polluting the rem environment, it just launches a new cmd to do the real work. -cmd /V /E /C %~dp0spark-shell2.cmd %* +echo "%~dp0spark-shell2.cmd"|findstr /C:" " +if %ERRORLEVEL% EQU 0 ( + cmd /V /E /C "%~dp0spark-shell2.cmd" %* +) else ( + cmd /V /E /C %~dp0spark-shell2.cmd %* +) diff --git a/bin/spark-submit.cmd b/bin/spark-submit.cmd index 8f3b84c7b971d..a50692191dd98 100644 --- a/bin/spark-submit.cmd +++ b/bin/spark-submit.cmd @@ -20,4 +20,9 @@ rem rem This is the entry point for running Spark submit. To avoid polluting the rem environment, it just launches a new cmd to do the real work. -cmd /V /E /C %~dp0spark-submit2.cmd %* +echo "%~dp0spark-submit2.cmd"|findstr /C:" " +if %ERRORLEVEL% EQU 0 ( + cmd /V /E /C "%~dp0spark-submit2.cmd" %* +) else ( + cmd /V /E /C %~dp0spark-submit2.cmd %* +)