Skip to content

Commit 0393795

Browse files
committed
moved Kinesis examples out of examples/ and back into extras/kinesis-asl
updated the build to only include kinesis-asl inside the examples jar when -Pkinesis-asl is specified
1 parent 691a6be commit 0393795

File tree

7 files changed

+112
-99
lines changed

7 files changed

+112
-99
lines changed

assembly/pom.xml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -185,16 +185,6 @@
185185
</dependency>
186186
</dependencies>
187187
</profile>
188-
<profile>
189-
<id>kinesis-asl</id>
190-
<dependencies>
191-
<dependency>
192-
<groupId>org.apache.spark</groupId>
193-
<artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
194-
<version>${project.version}</version>
195-
</dependency>
196-
</dependencies>
197-
</profile>
198188
<profile>
199189
<id>bigtop-dist</id>
200190
<!-- This profile uses the assembly plugin to create a special "dist" package for BigTop

examples/pom.xml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,19 @@
3434
<name>Spark Project Examples</name>
3535
<url>http://spark.apache.org/</url>
3636

37+
<profiles>
38+
<profile>
39+
<id>kinesis-asl</id>
40+
<dependencies>
41+
<dependency>
42+
<groupId>org.apache.spark</groupId>
43+
<artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
44+
<version>${project.version}</version>
45+
</dependency>
46+
</dependencies>
47+
</profile>
48+
</profiles>
49+
3750
<dependencies>
3851
<dependency>
3952
<groupId>org.apache.spark</groupId>
@@ -96,11 +109,6 @@
96109
<artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
97110
<version>${project.version}</version>
98111
</dependency>
99-
<dependency>
100-
<groupId>org.apache.spark</groupId>
101-
<artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
102-
<version>${project.version}</version>
103-
</dependency>
104112
<dependency>
105113
<groupId>org.apache.hbase</groupId>
106114
<artifactId>hbase</artifactId>

examples/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java renamed to extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ public final class JavaKinesisWordCountASL {
7979
private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
8080
private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
8181

82-
/**
82+
/*
8383
* Make the constructor private to enforce singleton
8484
*/
8585
private JavaKinesisWordCountASL() {
8686
}
8787

8888
public static void main(String[] args) {
89-
/**
89+
/*
9090
* Check that all required args were passed in.
9191
*/
9292
if (args.length < 2) {
@@ -100,41 +100,41 @@ public static void main(String[] args) {
100100

101101
StreamingExamples.setStreamingLogLevels();
102102

103-
/** Populate the appropriate variables from the given args */
103+
/* Populate the appropriate variables from the given args */
104104
String streamName = args[0];
105105
String endpointUrl = args[1];
106-
/** Set the batch interval to a fixed 2000 millis (2 seconds) */
106+
/* Set the batch interval to a fixed 2000 millis (2 seconds) */
107107
Duration batchInterval = new Duration(2000);
108108

109-
/** Create a Kinesis client in order to determine the number of shards for the given stream */
109+
/* Create a Kinesis client in order to determine the number of shards for the given stream */
110110
AmazonKinesisClient kinesisClient = new AmazonKinesisClient(
111111
new DefaultAWSCredentialsProviderChain());
112112
kinesisClient.setEndpoint(endpointUrl);
113113

114-
/** Determine the number of shards from the stream */
114+
/* Determine the number of shards from the stream */
115115
int numShards = kinesisClient.describeStream(streamName)
116116
.getStreamDescription().getShards().size();
117117

118-
/** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */
118+
/* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */
119119
int numStreams = numShards;
120120

121-
/** Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
121+
/* Must add 1 more thread than the number of receivers or the output won't show properly from the driver */
122122
int numSparkThreads = numStreams + 1;
123123

124-
/** Setup the Spark config. */
124+
/* Setup the Spark config. */
125125
SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount").setMaster(
126126
"local[" + numSparkThreads + "]");
127127

128-
/** Kinesis checkpoint interval. Same as batchInterval for this example. */
128+
/* Kinesis checkpoint interval. Same as batchInterval for this example. */
129129
Duration checkpointInterval = batchInterval;
130130

131-
/** Setup the StreamingContext */
131+
/* Setup the StreamingContext */
132132
JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
133133

134-
/** Setup the checkpoint directory used by Spark Streaming */
134+
/* Setup the checkpoint directory used by Spark Streaming */
135135
jssc.checkpoint("/tmp/checkpoint");
136136

137-
/** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
137+
/* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
138138
List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
139139
for (int i = 0; i < numStreams; i++) {
140140
streamsList.add(
@@ -143,27 +143,27 @@ public static void main(String[] args) {
143143
);
144144
}
145145

146-
/** Union all the streams if there is more than 1 stream */
146+
/* Union all the streams if there is more than 1 stream */
147147
JavaDStream<byte[]> unionStreams;
148148
if (streamsList.size() > 1) {
149149
unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
150150
} else {
151-
/** Otherwise, just use the 1 stream */
151+
/* Otherwise, just use the 1 stream */
152152
unionStreams = streamsList.get(0);
153153
}
154154

155-
/**
156-
* Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
157-
* Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
158-
*/
155+
/*
156+
* Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
157+
* Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
158+
*/
159159
JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
160160
@Override
161161
public Iterable<String> call(byte[] line) {
162162
return Lists.newArrayList(WORD_SEPARATOR.split(new String(line)));
163163
}
164164
});
165165

166-
/** Map each word to a (word, 1) tuple, then reduce/aggregate by key. */
166+
/* Map each word to a (word, 1) tuple, then reduce/aggregate by key. */
167167
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
168168
new PairFunction<String, String, Integer>() {
169169
@Override
@@ -177,10 +177,10 @@ public Integer call(Integer i1, Integer i2) {
177177
}
178178
});
179179

180-
/** Print the first 10 wordCounts by key */
180+
/* Print the first 10 wordCounts by key */
181181
wordCounts.print();
182182

183-
/** Start the streaming context and await termination */
183+
/* Start the streaming context and await termination */
184184
jssc.start();
185185
jssc.awaitTermination();
186186
}

examples/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala renamed to extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,20 @@
1818
package org.apache.spark.examples.streaming
1919

2020
import java.nio.ByteBuffer
21-
2221
import scala.util.Random
23-
2422
import org.apache.spark.Logging
2523
import org.apache.spark.SparkConf
2624
import org.apache.spark.storage.StorageLevel
2725
import org.apache.spark.streaming.Milliseconds
2826
import org.apache.spark.streaming.StreamingContext
2927
import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
3028
import org.apache.spark.streaming.kinesis.KinesisUtils
31-
3229
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
3330
import com.amazonaws.services.kinesis.AmazonKinesisClient
3431
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
3532
import com.amazonaws.services.kinesis.model.PutRecordRequest
33+
import org.apache.log4j.Logger
34+
import org.apache.log4j.Level
3635

3736
/**
3837
* Kinesis Spark Streaming WordCount example.
@@ -72,9 +71,7 @@ import com.amazonaws.services.kinesis.model.PutRecordRequest
7271
*/
7372
object KinesisWordCountASL extends Logging {
7473
def main(args: Array[String]) {
75-
/**
76-
* Check that all required args were passed in.
77-
*/
74+
/* Check that all required args were passed in. */
7875
if (args.length < 2) {
7976
System.err.println(
8077
"""
@@ -87,57 +84,57 @@ object KinesisWordCountASL extends Logging {
8784
}
8885

8986
StreamingExamples.setStreamingLogLevels()
90-
91-
/** Populate the appropriate variables from the given args */
87+
88+
/* Populate the appropriate variables from the given args */
9289
val Array(streamName, endpointUrl) = args
9390

94-
/** Determine the number of shards from the stream */
91+
/* Determine the number of shards from the stream */
9592
val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
9693
kinesisClient.setEndpoint(endpointUrl)
9794
val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards()
9895
.size()
9996

100-
/** In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
97+
/* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
10198
val numStreams = numShards
10299

103-
/**
100+
/*
104101
* numSparkThreads should be 1 more thread than the number of receivers.
105102
* This leaves one thread available for actually processing the data.
106103
*/
107104
val numSparkThreads = numStreams + 1
108105

109-
/** Setup the and SparkConfig and StreamingContext */
110-
/** Spark Streaming batch interval */
106+
/* Setup the and SparkConfig and StreamingContext */
107+
/* Spark Streaming batch interval */
111108
val batchInterval = Milliseconds(2000)
112109
val sparkConfig = new SparkConf().setAppName("KinesisWordCount")
113110
.setMaster(s"local[$numSparkThreads]")
114111
val ssc = new StreamingContext(sparkConfig, batchInterval)
115-
/** Setup the checkpoint directory used by Spark Streaming */
112+
/* Setup the checkpoint directory used by Spark Streaming */
116113
ssc.checkpoint("/tmp/checkpoint");
117114

118-
/** Kinesis checkpoint interval. Same as batchInterval for this example. */
115+
/* Kinesis checkpoint interval. Same as batchInterval for this example. */
119116
val kinesisCheckpointInterval = batchInterval
120117

121-
/** Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
118+
/* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
122119
val kinesisStreams = (0 until numStreams).map { i =>
123120
KinesisUtils.createStream(ssc, streamName, endpointUrl, kinesisCheckpointInterval,
124121
InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
125122
}
126123

127-
/** Union all the streams */
124+
/* Union all the streams */
128125
val unionStreams = ssc.union(kinesisStreams)
129126

130-
/** Convert each line of Array[Byte] to String, split into words, and count them */
127+
/* Convert each line of Array[Byte] to String, split into words, and count them */
131128
val words = unionStreams.flatMap(byteArray => new String(byteArray)
132129
.split(" "))
133130

134-
/** Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
131+
/* Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
135132
val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
136133

137-
/** Print the first 10 wordCounts by key */
134+
/* Print the first 10 wordCounts by key */
138135
wordCounts.print()
139136

140-
/** Start the streaming context and await termination */
137+
/* Start the streaming context and await termination */
141138
ssc.start()
142139
ssc.awaitTermination()
143140
}
@@ -169,13 +166,13 @@ object KinesisWordCountProducerASL {
169166

170167
StreamingExamples.setStreamingLogLevels()
171168

172-
/** Populate the appropriate variables from the given args */
169+
/* Populate the appropriate variables from the given args */
173170
val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
174171

175-
/** Generate the records and return the totals */
172+
/* Generate the records and return the totals */
176173
val totals = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt)
177174

178-
/** Print the array of (index, total) tuples */
175+
/* Print the array of (index, total) tuples */
179176
println("Totals")
180177
totals.foreach(total => println(total.toString()))
181178
}
@@ -187,51 +184,70 @@ object KinesisWordCountProducerASL {
187184

188185
val MaxRandomInts = 10
189186

190-
/** Create the Kinesis client */
187+
/* Create the Kinesis client */
191188
val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
192189
kinesisClient.setEndpoint(endpoint)
193190

194191
println(s"Putting records onto stream $stream and endpoint $endpoint at a rate of" +
195192
s" $recordsPerSecond records per second and $wordsPerRecord words per record");
196193

197194
val totals = new Array[Int](MaxRandomInts)
198-
/** Put String records onto the stream per the given recordPerSec and wordsPerRecord */
195+
/* Put String records onto the stream per the given recordPerSec and wordsPerRecord */
199196
for (i <- 1 to 5) {
200197

201-
/** Generate recordsPerSec records to put onto the stream */
198+
/* Generate recordsPerSec records to put onto the stream */
202199
val records = (1 to recordsPerSecond.toInt).map { recordNum =>
203-
/**
200+
/*
204201
* Randomly generate each wordsPerRec words between 0 (inclusive)
205202
* and MAX_RANDOM_INTS (exclusive)
206203
*/
207204
val data = (1 to wordsPerRecord.toInt).map(x => {
208-
/** Generate the random int */
205+
/* Generate the random int */
209206
val randomInt = Random.nextInt(MaxRandomInts)
210207

211-
/** Keep track of the totals */
208+
/* Keep track of the totals */
212209
totals(randomInt) += 1
213210

214211
randomInt.toString()
215212
}).mkString(" ")
216213

217-
/** Create a partitionKey based on recordNum */
214+
/* Create a partitionKey based on recordNum */
218215
val partitionKey = s"partitionKey-$recordNum"
219216

220-
/** Create a PutRecordRequest with an Array[Byte] version of the data */
217+
/* Create a PutRecordRequest with an Array[Byte] version of the data */
221218
val putRecordRequest = new PutRecordRequest().withStreamName(stream)
222219
.withPartitionKey(partitionKey)
223220
.withData(ByteBuffer.wrap(data.getBytes()));
224221

225-
/** Put the record onto the stream and capture the PutRecordResult */
222+
/* Put the record onto the stream and capture the PutRecordResult */
226223
val putRecordResult = kinesisClient.putRecord(putRecordRequest);
227224
}
228225

229-
/** Sleep for a second */
226+
/* Sleep for a second */
230227
Thread.sleep(1000)
231228
println("Sent " + recordsPerSecond + " records")
232229
}
233230

234-
/** Convert the totals to (index, total) tuple */
231+
/* Convert the totals to (index, total) tuple */
235232
(0 to (MaxRandomInts - 1)).zip(totals)
236233
}
237234
}
235+
236+
/**
237+
* Utility functions for Spark Streaming examples.
238+
* This has been lifted from the examples/ project to remove the circular dependency.
239+
*/
240+
object StreamingExamples extends Logging {
241+
242+
/** Set reasonable logging levels for streaming if the user has not configured log4j. */
243+
def setStreamingLogLevels() {
244+
val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
245+
if (!log4jInitialized) {
246+
// We first log something to initialize Spark's default logging, then we override the
247+
// logging level.
248+
logInfo("Setting log level to [WARN] for streaming example." +
249+
" To override add a custom log4j.properties to the classpath.")
250+
Logger.getRootLogger.setLevel(Level.WARN)
251+
}
252+
}
253+
}

extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ private[kinesis] class KinesisCheckpointState(
3232
checkpointInterval: Duration,
3333
currentClock: Clock = new SystemClock())
3434
extends Logging {
35-
/**
36-
* Initialize the checkpoint clock using the given currentClock + checkpointInterval millis
37-
*/
35+
36+
/* Initialize the checkpoint clock using the given currentClock + checkpointInterval millis */
3837
val checkpointClock = new ManualClock()
3938
checkpointClock.setTime(currentClock.currentTime() + checkpointInterval.milliseconds)
4039

0 commit comments

Comments
 (0)