@@ -60,8 +60,6 @@ public class DefaultFileSystemMonitor implements FileSystemMonitor {
60
60
61
61
private static final Logger LOG = LoggerFactory .getLogger (DefaultFileSystemMonitor .class );
62
62
63
- private static final Duration ON_START_READ_END_LOG_TIMEOUT = Duration .ofSeconds (30 );
64
- private static final Duration DEFAULT_READ_END_LOG_TIMEOUT = Duration .ofSeconds (5 );
65
63
private static final int MAX_SCHEDULE_ATTEMPTS = 3 ;
66
64
67
65
private final FileSystemListing <?> fsListing ;
@@ -99,12 +97,16 @@ public class DefaultFileSystemMonitor implements FileSystemMonitor {
99
97
100
98
private final TaskFileOrder taskFileOrder ;
101
99
100
+ private Duration stateInitialReadTimeout = Duration .ofMinutes (5 );
101
+
102
+ private Duration stateDefaultReadTimeout = Duration .ofSeconds (5 );
103
+
102
104
/**
103
105
* Creates a new {@link DefaultFileSystemMonitor} instance.
104
106
*
105
107
* @param allowTasksReconfigurationAfterTimeoutMs {@code true} to allow tasks reconfiguration after a timeout.
106
108
* @param fsListening the {@link FileSystemListing} to be used for listing object files.
107
- * @param cleanPolicy the {@link GenericFileCleanupPolicy} to be used for cleaning object files.
109
+ * @param cleanPolicy the {@link GenericFileCleanupPolicy} to be used for cleaning object files.
108
110
* @param offsetPolicy the {@link SourceOffsetPolicy} to be used computing offset for object fileS.
109
111
* @param store the {@link StateBackingStore} used for storing object file cursor.
110
112
*/
@@ -162,9 +164,9 @@ public void onStateUpdate(final String key, final FileObject object) {
162
164
final FileObjectMeta removed = scheduled .remove (objectId );
163
165
if (removed == null && status .isOneOf (FileObjectStatus .CLEANED )) {
164
166
LOG .debug (
165
- "Received cleaned status but no object-file currently scheduled for: '{}'. " +
166
- "This warn should only occurred during recovering step" ,
167
- key
167
+ "Received cleaned status but no object-file currently scheduled for: '{}'. " +
168
+ "This warn should only occurred during recovering step" ,
169
+ key
168
170
);
169
171
}
170
172
}
@@ -177,23 +179,29 @@ public void onStateUpdate(final String key, final FileObject object) {
177
179
"with tasks processing files is already started. You can ignore that warning if the connector " +
178
180
" is recovering from a crash or resuming after being paused." );
179
181
}
180
- readStatesToEnd (ON_START_READ_END_LOG_TIMEOUT );
181
- recoverPreviouslyCompletedSources ();
182
- // Trigger a cleanup during initialization to ensure that all cleanable
183
- // object-files are eventually removed before scheduling any tasks.
184
- cleanUpCompletedFiles ();
182
+
183
+ if (readStatesToEnd (stateInitialReadTimeout )) {
184
+ recoverPreviouslyCompletedSources ();
185
+ // Trigger a cleanup during initialization to ensure that all cleanable
186
+ // object-files are eventually removed before scheduling any tasks.
187
+ cleanUpCompletedFiles ();
188
+ } else {
189
+ LOG .warn ("Cannot recover completed files from previous execution. State is empty." );
190
+ }
185
191
LOG .info ("Initialized FileSystemMonitor" );
186
192
}
187
193
188
194
private void recoverPreviouslyCompletedSources () {
189
- LOG .info ("Recovering completed files from a previous execution" );
190
- fileState .states ()
191
- .entrySet ()
192
- .stream ()
193
- .map (it -> it .getValue ().withKey (FileObjectKey .of (it .getKey ())))
194
- .filter (it -> cleanablePredicate .test (it .status ()))
195
- .forEach (cleanable ::add );
196
- LOG .info ("Finished recovering previously completed files : {}" , cleanable );
195
+ if (fileState != null && !fileState .states ().isEmpty ()) {
196
+ LOG .info ("Recovering completed files from a previous execution" );
197
+ fileState .states ()
198
+ .entrySet ()
199
+ .stream ()
200
+ .map (it -> it .getValue ().withKey (FileObjectKey .of (it .getKey ())))
201
+ .filter (it -> cleanablePredicate .test (it .status ()))
202
+ .forEach (cleanable ::add );
203
+ LOG .info ("Finished recovering completed files from previous execution: {}" , cleanable );
204
+ }
197
205
}
198
206
199
207
private boolean readStatesToEnd (final Duration timeout ) {
@@ -202,14 +210,23 @@ private boolean readStatesToEnd(final Duration timeout) {
202
210
fileState = store .snapshot ();
203
211
LOG .debug (
204
212
"Finished reading to end of log and updated states snapshot, new states log position: {}" ,
205
- fileState .offset ());
213
+ fileState .offset ()
214
+ );
206
215
return true ;
207
216
} catch (TimeoutException e ) {
208
217
LOG .warn ("Failed to reach end of states log quickly enough" , e );
209
218
return false ;
210
219
}
211
220
}
212
221
222
+ public void setStateInitialReadTimeout (final Duration stateInitialReadTimeout ) {
223
+ this .stateInitialReadTimeout = stateInitialReadTimeout ;
224
+ }
225
+
226
+ public void setStateDefaultReadTimeout (final Duration stateDefaultReadTimeout ) {
227
+ this .stateDefaultReadTimeout = stateDefaultReadTimeout ;
228
+ }
229
+
213
230
/**
214
231
* {@inheritDoc}
215
232
*/
@@ -267,13 +284,13 @@ private synchronized boolean updateFiles() {
267
284
final boolean noScheduledFiles = scheduled .isEmpty ();
268
285
if (!noScheduledFiles && allowTasksReconfigurationAfterTimeoutMs == Long .MAX_VALUE ) {
269
286
LOG .info (
270
- "Scheduled files still being processed: {}. Skip filesystem listing while waiting for tasks completion" ,
271
- scheduled .size ()
287
+ "Scheduled files still being processed: {}. Skip filesystem listing while waiting for tasks completion" ,
288
+ scheduled .size ()
272
289
);
273
290
return false ;
274
291
}
275
292
276
- boolean toEnd = readStatesToEnd (DEFAULT_READ_END_LOG_TIMEOUT );
293
+ boolean toEnd = readStatesToEnd (stateDefaultReadTimeout );
277
294
if (noScheduledFiles && !toEnd ) {
278
295
LOG .warn ("Failed to read state changelog. Skip filesystem listing due to timeout" );
279
296
return false ;
@@ -315,7 +332,7 @@ private synchronized boolean updateFiles() {
315
332
if (timeout > 0 ) {
316
333
LOG .info (
317
334
"Scheduled files still being processed ({}) but new files detected. " +
318
- "Waiting for {} ms before allowing task reconfiguration" ,
335
+ "Waiting for {} ms before allowing task reconfiguration" ,
319
336
scheduled .size (),
320
337
timeout
321
338
);
@@ -372,13 +389,13 @@ public List<FileObjectMeta> listFilesToSchedule(final int maxFilesToSchedule) {
372
389
do {
373
390
changed .set (false );
374
391
LOG .info (
375
- "Preparing next scheduling using the object files found during last iteration (attempt={}/{})." ,
376
- attempts + 1 ,
377
- MAX_SCHEDULE_ATTEMPTS
392
+ "Preparing next scheduling using the object files found during last iteration (attempt={}/{})." ,
393
+ attempts + 1 ,
394
+ MAX_SCHEDULE_ATTEMPTS
378
395
);
379
396
// Try to read states to end to make sure we do not attempt
380
397
// to schedule an object file that has been cleanup.
381
- final boolean toEnd = readStatesToEnd (DEFAULT_READ_END_LOG_TIMEOUT );
398
+ final boolean toEnd = readStatesToEnd (stateDefaultReadTimeout );
382
399
if (!toEnd ) {
383
400
LOG .warn ("Failed to read state changelog while scheduling object files. Timeout." );
384
401
}
@@ -400,8 +417,8 @@ public List<FileObjectMeta> listFilesToSchedule(final int maxFilesToSchedule) {
400
417
if (changed .get ()) {
401
418
if (attempts == MAX_SCHEDULE_ATTEMPTS ) {
402
419
LOG .warn (
403
- "Failed to prepare the object files after attempts: {}." ,
404
- MAX_SCHEDULE_ATTEMPTS
420
+ "Failed to prepare the object files after attempts: {}." ,
421
+ MAX_SCHEDULE_ATTEMPTS
405
422
);
406
423
// Make sure to clear the schedule list before returning.
407
424
scheduled .clear ();
@@ -415,8 +432,8 @@ public List<FileObjectMeta> listFilesToSchedule(final int maxFilesToSchedule) {
415
432
416
433
if (partitions .isEmpty ()) {
417
434
LOG .warn (
418
- "Filesystem could not be scanned quickly enough, " +
419
- "or no object file was detected after starting the connector."
435
+ "Filesystem could not be scanned quickly enough, " +
436
+ "or no object file was detected after starting the connector."
420
437
);
421
438
}
422
439
return taskFileOrder .sort (partitions );
@@ -434,7 +451,7 @@ public void close() {
434
451
if (running .compareAndSet (true , false )) {
435
452
try {
436
453
LOG .info ("Closing FileSystemMonitor resources" );
437
- readStatesToEnd (DEFAULT_READ_END_LOG_TIMEOUT );
454
+ readStatesToEnd (stateDefaultReadTimeout );
438
455
cleanUpCompletedFiles ();
439
456
LOG .info ("Closed FileSystemMonitor resources" );
440
457
} catch (final Exception e ) {
0 commit comments