From 4f1d26163edd275a2537ea9ef616d1e262da884a Mon Sep 17 00:00:00 2001 From: Gabriele Nizzoli Date: Fri, 13 Jun 2014 12:06:31 -0700 Subject: [PATCH] Fix for newFilesOnly logic in file DStream The newFilesOnly logic should be inverted: if newFilesOnly==true then only start reading files older than current time. As the code is now if newFilesOnly==true then it will start to read files that are older than 0L (that is: every file in the directory). --- .../org/apache/spark/streaming/dstream/FileInputDStream.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala index e878285f6a85..9eecbfaef363 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala @@ -45,7 +45,7 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas // Files with mod time earlier than this is ignored. This is updated every interval // such that in the current interval, files older than any file found in the // previous interval will be ignored. Obviously this time keeps moving forward. - private var ignoreTime = if (newFilesOnly) 0L else System.currentTimeMillis() + private var ignoreTime = if (newFilesOnly) System.currentTimeMillis() else 0L // Latest file mod time seen till any point of time @transient private var path_ : Path = null