diff --git a/bin/spark-submit b/bin/spark-submit index 4e9d3614e6370..6793b3eb7a17d 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -24,4 +24,8 @@ fi # disable randomized hash for string in Python 3.3+ export PYTHONHASHSEED=0 +#spark-submit最终调用的是spark-class脚本 +#传入的类是org.apache.spark.deploy.SparkSubmit +#及其它传入的参数,如deploy mode、executor-memory等 + exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@" diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index d78b9f1b29685..9c6b9a79043bb 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -69,7 +69,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria } private[spark] def loadFromSystemProperties(silent: Boolean): SparkConf = { - // Load any spark.* system properties + // Load any spark.* system properties加载以spark.开头的系统属性 for ((key, value) <- Utils.getSystemProperties if key.startsWith("spark.")) { set(key, value, silent) } diff --git a/pom.xml b/pom.xml index 49f12703c04df..d9b9aa1baefb2 100644 --- a/pom.xml +++ b/pom.xml @@ -112,6 +112,7 @@ external/kafka-0-10 external/kafka-0-10-assembly external/kafka-0-10-sql + zhangchj diff --git a/zhangchj/pom.xml b/zhangchj/pom.xml new file mode 100644 index 0000000000000..968786b774053 --- /dev/null +++ b/zhangchj/pom.xml @@ -0,0 +1,72 @@ + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0 + + zhangchj + + + + org.spark-project.spark + unused + 1.0.0 + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${project.version} + provided + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-install-plugin + + true + + + + org.apache.maven.plugins + maven-jar-plugin + + ${jars.target.dir} + + + + + \ No newline at end of file diff --git a/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala b/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala new file mode 100644 index 0000000000000..0a1f6d462b153 --- /dev/null +++ b/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala @@ -0,0 +1,23 @@ + +package com.jd.zhangchj.core + +import org.apache.spark.{SparkConf, SparkContext} + +object WordCount { + + def main(args: Array[String]): Unit = { + val logFile: String = "/home/zhangchj/sources/spark/README.md" // Should be some file on your system + //val conf = new SparkConf().setAppName("WordCount").setMaster("local[*]") + val sc = new SparkContext("local[*]","WordCount") + val logData = sc.textFile(logFile, 2).cache() + /* val numAs = logData.filter(line => line.contains("a")).count() + val numBs = logData.filter(line => line.contains("b")).count() + println(s"Lines with a: $numAs, Lines with b: $numBs")*/ + val wordCounts = logData.flatMap(line => line.split(" ")). + map(word => (word, 1)).reduceByKey((a, b) => a + b) + wordCounts.foreach(println(_)) + + sc.stop() + //val wordCounts = textFile.flatMap(line => line.split(" ")).map(word => (word, 1)).reduceByKey((a, b) => a + b) + } +}