@@ -31,13 +31,13 @@ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
3131import com .amazonaws .services .dynamodbv2 .document .DynamoDB
3232import com .amazonaws .services .kinesis .AmazonKinesisClient
3333import com .amazonaws .services .kinesis .model ._
34- import com .amazonaws .services .kinesis .producer .{KinesisProducer , KinesisProducerConfiguration , UserRecordResult }
35- import com .google .common .util .concurrent .{FutureCallback , Futures }
3634
3735import org .apache .spark .Logging
3836
3937/**
40- * Shared utility methods for performing Kinesis tests that actually transfer data
38+ * Shared utility methods for performing Kinesis tests that actually transfer data.
39+ *
40+ * PLEASE KEEP THIS FILE UNDER src/main AS PYTHON TESTS NEED ACCESS TO THIS FILE!
4141 */
4242private [kinesis] class KinesisTestUtils extends Logging {
4343
@@ -54,7 +54,7 @@ private[kinesis] class KinesisTestUtils extends Logging {
5454 @ volatile
5555 private var _streamName : String = _
5656
57- private lazy val kinesisClient = {
57+ protected lazy val kinesisClient = {
5858 val client = new AmazonKinesisClient (KinesisTestUtils .getAWSCredentials())
5959 client.setEndpoint(endpointUrl)
6060 client
@@ -66,14 +66,12 @@ private[kinesis] class KinesisTestUtils extends Logging {
6666 new DynamoDB (dynamoDBClient)
6767 }
6868
69- private lazy val kinesisProducer : KinesisProducer = {
70- val conf = new KinesisProducerConfiguration ()
71- .setRecordMaxBufferedTime(1000 )
72- .setMaxConnections(1 )
73- .setRegion(regionName)
74- .setMetricsLevel(" none" )
75-
76- new KinesisProducer (conf)
69+ protected def getProducer (aggregate : Boolean ): KinesisDataGenerator = {
70+ if (! aggregate) {
71+ new SimpleDataGenerator (kinesisClient)
72+ } else {
73+ throw new UnsupportedOperationException (" Aggregation is not supported through this code path" )
74+ }
7775 }
7876
7977 def streamName : String = {
@@ -104,41 +102,8 @@ private[kinesis] class KinesisTestUtils extends Logging {
104102 */
105103 def pushData (testData : Seq [Int ], aggregate : Boolean ): Map [String , Seq [(Int , String )]] = {
106104 require(streamCreated, " Stream not yet created, call createStream() to create one" )
107- val shardIdToSeqNumbers = new mutable.HashMap [String , ArrayBuffer [(Int , String )]]()
108-
109- testData.foreach { num =>
110- val str = num.toString
111- val data = ByteBuffer .wrap(str.getBytes())
112- if (aggregate) {
113- val future = kinesisProducer.addUserRecord(streamName, str, data)
114- val kinesisCallBack = new FutureCallback [UserRecordResult ]() {
115- override def onFailure (t : Throwable ): Unit = {} // do nothing
116-
117- override def onSuccess (result : UserRecordResult ): Unit = {
118- val shardId = result.getShardId
119- val seqNumber = result.getSequenceNumber()
120- val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId,
121- new ArrayBuffer [(Int , String )]())
122- sentSeqNumbers += ((num, seqNumber))
123- }
124- }
125-
126- Futures .addCallback(future, kinesisCallBack)
127- kinesisProducer.flushSync() // make sure we send all data before returning the map
128- } else {
129- val putRecordRequest = new PutRecordRequest ().withStreamName(streamName)
130- .withData(data)
131- .withPartitionKey(str)
132-
133- val putRecordResult = kinesisClient.putRecord(putRecordRequest)
134- val shardId = putRecordResult.getShardId
135- val seqNumber = putRecordResult.getSequenceNumber()
136- val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId,
137- new ArrayBuffer [(Int , String )]())
138- sentSeqNumbers += ((num, seqNumber))
139- }
140- }
141-
105+ val producer = getProducer(aggregate)
106+ val shardIdToSeqNumbers = producer.sendData(streamName, testData)
142107 logInfo(s " Pushed $testData: \n\t ${shardIdToSeqNumbers.mkString(" \n\t " )}" )
143108 shardIdToSeqNumbers.toMap
144109 }
@@ -264,3 +229,32 @@ private[kinesis] object KinesisTestUtils {
264229 }
265230 }
266231}
232+
233+ /** A wrapper interface that will allow us to consolidate the code for synthetic data generation. */
234+ private [kinesis] trait KinesisDataGenerator {
235+ /** Sends the data to Kinesis and returns the metadata for everything that has been sent. */
236+ def sendData (streamName : String , data : Seq [Int ]): Map [String , Seq [(Int , String )]]
237+ }
238+
239+ private [kinesis] class SimpleDataGenerator (
240+ client : AmazonKinesisClient ) extends KinesisDataGenerator {
241+ override def sendData (streamName : String , data : Seq [Int ]): Map [String , Seq [(Int , String )]] = {
242+ val shardIdToSeqNumbers = new mutable.HashMap [String , ArrayBuffer [(Int , String )]]()
243+ data.foreach { num =>
244+ val str = num.toString
245+ val data = ByteBuffer .wrap(str.getBytes())
246+ val putRecordRequest = new PutRecordRequest ().withStreamName(streamName)
247+ .withData(data)
248+ .withPartitionKey(str)
249+
250+ val putRecordResult = client.putRecord(putRecordRequest)
251+ val shardId = putRecordResult.getShardId
252+ val seqNumber = putRecordResult.getSequenceNumber()
253+ val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId,
254+ new ArrayBuffer [(Int , String )]())
255+ sentSeqNumbers += ((num, seqNumber))
256+ }
257+
258+ shardIdToSeqNumbers.toMap
259+ }
260+ }
0 commit comments