From c1b08fb270f1f0f4a7cc2e49dd1e19e1025ba96c Mon Sep 17 00:00:00 2001 From: zhangchj1990 <479224070@qq.com> Date: Sat, 30 Jun 2018 17:22:48 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=87=AA=E6=B5=8B?= =?UTF-8?q?=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/spark-submit | 4 ++ .../scala/org/apache/spark/SparkConf.scala | 2 +- pom.xml | 1 + zhangchj/pom.xml | 72 +++++++++++++++++++ .../com/jd/zhangchj/core/WordCount.scala | 23 ++++++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 zhangchj/pom.xml create mode 100644 zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala diff --git a/bin/spark-submit b/bin/spark-submit index 4e9d3614e6370..6793b3eb7a17d 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -24,4 +24,8 @@ fi # disable randomized hash for string in Python 3.3+ export PYTHONHASHSEED=0 +#spark-submit最终调用的是spark-class脚本 +#传入的类是org.apache.spark.deploy.SparkSubmit +#及其它传入的参数,如deploy mode、executor-memory等 + exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@" diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index d78b9f1b29685..9c6b9a79043bb 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -69,7 +69,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria } private[spark] def loadFromSystemProperties(silent: Boolean): SparkConf = { - // Load any spark.* system properties + // Load any spark.* system properties加载以spark.开头的系统属性 for ((key, value) <- Utils.getSystemProperties if key.startsWith("spark.")) { set(key, value, silent) } diff --git a/pom.xml b/pom.xml index 49f12703c04df..d9b9aa1baefb2 100644 --- a/pom.xml +++ b/pom.xml @@ -112,6 +112,7 @@ external/kafka-0-10 external/kafka-0-10-assembly external/kafka-0-10-sql + zhangchj diff --git a/zhangchj/pom.xml b/zhangchj/pom.xml new file mode 100644 index 0000000000000..968786b774053 --- /dev/null +++ b/zhangchj/pom.xml @@ -0,0 +1,72 @@ + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0 + + zhangchj + + + + org.spark-project.spark + unused + 1.0.0 + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${project.version} + provided + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-install-plugin + + true + + + + org.apache.maven.plugins + maven-jar-plugin + + ${jars.target.dir} + + + + + \ No newline at end of file diff --git a/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala b/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala new file mode 100644 index 0000000000000..c769333ce26ad --- /dev/null +++ b/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala @@ -0,0 +1,23 @@ + +package com.jd.zhangchj.core + +import org.apache.spark.{SparkConf, SparkContext} + +object WordCount { + + def main(args: Array[String]): Unit = { + val logFile: String = "/home/zhangchj/sources/spark/README.md" // Should be some file on your system + val conf = new SparkConf().setAppName("WordCount").setMaster("local[*]") + val sc = new SparkContext(conf) + val logData = sc.textFile(logFile, 2).cache() + /* val numAs = logData.filter(line => line.contains("a")).count() + val numBs = logData.filter(line => line.contains("b")).count() + println(s"Lines with a: $numAs, Lines with b: $numBs")*/ + val wordCounts = logData.flatMap(line => line.split(" ")). + map(word => (word, 1)).reduceByKey((a, b) => a + b) + wordCounts.foreach(println(_)) + + sc.stop() + //val wordCounts = textFile.flatMap(line => line.split(" ")).map(word => (word, 1)).reduceByKey((a, b) => a + b) + } +} From 0ce98171fc025b0c1807b5f4da22695a548beb86 Mon Sep 17 00:00:00 2001 From: zhangchj1990 <479224070@qq.com> Date: Wed, 1 Aug 2018 11:07:13 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=87=AA=E6=B5=8B?= =?UTF-8?q?=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala b/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala index c769333ce26ad..0a1f6d462b153 100644 --- a/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala +++ b/zhangchj/src/main/scala/com/jd/zhangchj/core/WordCount.scala @@ -7,8 +7,8 @@ object WordCount { def main(args: Array[String]): Unit = { val logFile: String = "/home/zhangchj/sources/spark/README.md" // Should be some file on your system - val conf = new SparkConf().setAppName("WordCount").setMaster("local[*]") - val sc = new SparkContext(conf) + //val conf = new SparkConf().setAppName("WordCount").setMaster("local[*]") + val sc = new SparkContext("local[*]","WordCount") val logData = sc.textFile(logFile, 2).cache() /* val numAs = logData.filter(line => line.contains("a")).count() val numBs = logData.filter(line => line.contains("b")).count()