CodingCat
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.rat-excludes‎
Lines changed: 7 additions & 0 deletions b/‎.rat-excludes‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎assembly/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎assembly/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bagel/src/main/scala/org/apache/spark/bagel/package-info.java‎
Lines changed: 21 additions & 0 deletions b/‎bagel/src/main/scala/org/apache/spark/bagel/package-info.java‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎bagel/src/main/scala/org/apache/spark/bagel/package.scala‎
Lines changed: 23 additions & 0 deletions b/‎bagel/src/main/scala/org/apache/spark/bagel/package.scala‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎bin/compute-classpath.cmd‎
Lines changed: 23 additions & 1 deletion b/‎bin/compute-classpath.cmd‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎bin/pyspark‎
Lines changed: 32 additions & 5 deletions b/‎bin/pyspark‎
Lines changed: 32 additions & 5 deletions
diff --git a/‎bin/pyspark2.cmd‎
Lines changed: 18 additions & 3 deletions b/‎bin/pyspark2.cmd‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎bin/run-example‎
Lines changed: 11 additions & 12 deletions b/‎bin/run-example‎
Lines changed: 11 additions & 12 deletions
@@ -7,7 +7,7 @@
 sbt/*.jar
 .settings
 .cache
-.mima-excludes
+.generated-mima-excludes
 /build/
 work/
 out/
 
@@ -3,6 +3,7 @@ target
 .project
 .classpath
 .mima-excludes
+.generated-mima-excludes
 .rat-excludes
 .*md
 derby.log
@@ -43,3 +44,9 @@ test.out/*
 .*iml
 service.properties
 db.lck
+build/*
+dist/*
+.*out
+.*ipr
+.*iws
+logs
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
 guide, on the project webpage at <http://spark.apache.org/documentation.html>.
 This README file only contains basic setup instructions.
 
-
 ## Building Spark
 
 Spark is built on Scala 2.10. To build Spark and its example programs, run:
 
     ./sbt/sbt assembly
 
+(You do not need to do this if you downloaded a pre-built package.)
+
 ## Interactive Scala Shell
 
 The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
 Spark also comes with several sample programs in the `examples` directory.
 To run one of them, use `./bin/run-example <class> [params]`. For example:
 
-    ./bin/run-example org.apache.spark.examples.SparkLR
+    ./bin/run-example SparkPi
 
-will run the Logistic Regression example locally.
+will run the Pi example locally.
 
 You can set the MASTER environment variable when running examples to submit
 examples to a cluster. This can be a mesos:// or spark:// URL, 
 
@@ -96,7 +96,7 @@
             <filter>
               <artifact>*:*</artifact>
               <excludes>
-                <exclude>org.datanucleus:*</exclude>
+                <exclude>org/datanucleus/**</exclude>
                 <exclude>META-INF/*.SF</exclude>
                 <exclude>META-INF/*.DSA</exclude>
                 <exclude>META-INF/*.RSA</exclude>
 
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Bagel: An implementation of Pregel in Spark. THIS IS DEPRECATED - use Spark's GraphX library.
+ */
+package org.apache.spark.bagel;
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Bagel: An implementation of Pregel in Spark. THIS IS DEPRECATED - use Spark's GraphX library.
+ */
+package object bagel
@@ -20,6 +20,13 @@ rem
 rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 rem script and the ExecutorRunner in standalone cluster mode.
 
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !datanucleus_jars! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
 set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 rem Build up classpath
 set CLASSPATH=%FWDIR%conf
 if exist "%FWDIR%RELEASE" (
-  for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
     set ASSEMBLY_JAR=%%d
   )
 ) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (
 
 set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
 
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+  set datanucleus_dir=%FWDIR%lib
+) else (
+  set datanucleus_dir=%FWDIR%lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+  set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
 set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
 
@@ -17,14 +17,20 @@
 # limitations under the License.
 #
 
-# Figure out where the Scala framework is installed
+# Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
 SCALA_VERSION=2.10
 
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  echo "Usage: ./bin/pyspark [options]"
+  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  exit 0
+fi
+
 # Exit if the user hasn't compiled Spark
 if [ ! -f "$FWDIR/RELEASE" ]; then
   # Exit if the user hasn't compiled Spark
@@ -52,13 +58,34 @@ export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
 export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
 export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
 
+# If IPython options are specified, assume user wants to run IPython
 if [ -n "$IPYTHON_OPTS" ]; then
   IPYTHON=1
 fi
 
-# Only use ipython if no command line arguments were provided [SPARK-1134]
-if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then
-  exec ipython $IPYTHON_OPTS
+# Build up arguments list manually to preserve quotes and backslashes.
+# We export Spark submit arguments as an environment variable because shell.py must run as a
+# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
+
+PYSPARK_SUBMIT_ARGS=""
+whitespace="[[:space:]]"
+for i in "$@"; do
+  if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
+  if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
+  PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
+done
+export PYSPARK_SUBMIT_ARGS
+
+# If a python file is provided, directly run spark-submit.
+if [[ "$1" =~ \.py$ ]]; then
+  echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
+  echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
+  exec $FWDIR/bin/spark-submit "$@"
 else
-  exec "$PYSPARK_PYTHON" "$@"
+  # Only use ipython if no command line arguments were provided [SPARK-1134]
+  if [[ "$IPYTHON" = "1" ]]; then
+    exec ipython $IPYTHON_OPTS
+  else
+    exec "$PYSPARK_PYTHON"
+  fi
 fi
@@ -31,7 +31,7 @@ set FOUND_JAR=0
 for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
   set FOUND_JAR=1
 )
-if "%FOUND_JAR%"=="0" (
+if [%FOUND_JAR%] == [0] (
   echo Failed to find Spark assembly JAR.
   echo You need to build Spark with sbt\sbt assembly before running this program.
   goto exit
@@ -42,15 +42,30 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
 if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 
 rem Figure out which Python to use.
-if "x%PYSPARK_PYTHON%"=="x" set PYSPARK_PYTHON=python
+if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python
 
 set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
 set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py
+set PYSPARK_SUBMIT_ARGS=%*
 
 echo Running %PYSPARK_PYTHON% with PYTHONPATH=%PYTHONPATH%
 
-"%PYSPARK_PYTHON%" %*
+rem Check whether the argument is a file
+for /f %%i in ('echo %1^| findstr /R "\.py"') do (
+  set PYTHON_FILE=%%i
+)
+
+if [%PYTHON_FILE%] == [] (
+  %PYSPARK_PYTHON%
+) else (
+  echo.
+  echo WARNING: Running python applications through ./bin/pyspark.cmd is deprecated as of Spark 1.0.
+  echo Use ./bin/spark-submit ^<python file^>
+  echo.
+  "%FWDIR%\bin\spark-submit.cmd" %PYSPARK_SUBMIT_ARGS%
+)
+
 :exit
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 export SPARK_HOME="$FWDIR"
 EXAMPLES_DIR="$FWDIR"/examples
 
+if [ -n "$1" ]; then
+  EXAMPLE_CLASS="$1"
+  shift
+else
+  echo "Usage: ./bin/run-example <example-class> [example-args]"
+  echo "  - set MASTER=XX to use a specific master"
+  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
+  exit 1
+fi
+
 if [ -f "$FWDIR/RELEASE" ]; then
   export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
 elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,23 +47,12 @@ fi
 
 EXAMPLE_MASTER=${MASTER:-"local[*]"}
 
-if [ -n "$1" ]; then
-  EXAMPLE_CLASS="$1"
-  shift
-else 
-  echo "usage: ./bin/run-example <example-class> [example-args]" 
-  echo "  - set MASTER=XX to use a specific master"
-  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
-  echo
-  exit -1
-fi
-
 if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
   EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
 fi
 
 ./bin/spark-submit \
   --master $EXAMPLE_MASTER \
   --class $EXAMPLE_CLASS \
-  $SPARK_EXAMPLES_JAR \
+  "$SPARK_EXAMPLES_JAR" \
   "$@"