|
44 | 44 | import java.util.ArrayList;
|
45 | 45 | import java.util.Collection;
|
46 | 46 | import java.util.Collections;
|
| 47 | +import java.util.HashSet; |
47 | 48 | import java.util.List;
|
| 49 | +import java.util.Set; |
48 | 50 |
|
49 | 51 | /**
|
50 | 52 | * Abstract {@link Sink} for file-based output. An implementation of FileBasedSink writes file-based
|
@@ -312,7 +314,7 @@ public void finalize(Iterable<FileResult> writerResults, PipelineOptions options
|
312 | 314 |
|
313 | 315 | // Optionally remove temporary files.
|
314 | 316 | if (temporaryFileRetention == TemporaryFileRetention.REMOVE) {
|
315 |
| - removeTemporaryFiles(options); |
| 317 | + removeTemporaryFiles(files, options); |
316 | 318 | }
|
317 | 319 | }
|
318 | 320 |
|
@@ -369,22 +371,44 @@ protected final List<String> generateDestinationFilenames(int numFiles) {
|
369 | 371 | return destFilenames;
|
370 | 372 | }
|
371 | 373 |
|
| 374 | + /** |
| 375 | + * Use {@link #removeTemporaryFiles(Collection, PipelineOptions)} instead. |
| 376 | + */ |
| 377 | + @Deprecated |
| 378 | + protected final void removeTemporaryFiles(PipelineOptions options) throws IOException { |
| 379 | + removeTemporaryFiles(Collections.<String>emptyList(), options); |
| 380 | + } |
| 381 | + |
372 | 382 | /**
|
373 | 383 | * Removes temporary output files. Uses the temporary filename to find files to remove.
|
374 | 384 | *
|
| 385 | + * <p>Additionally, to partially mitigate the effects of filesystems with eventually-consistent |
| 386 | + * directory matching APIs, takes a list of files that are known to exist - i.e. removes the |
| 387 | + * union of the known files and files that the filesystem says exist in the directory. |
| 388 | + * |
| 389 | + * <p>Assumes that, if globbing had been strongly consistent, it would have matched all |
| 390 | + * of knownFiles - i.e. on a strongly consistent filesystem, knownFiles can be ignored. |
| 391 | + * |
375 | 392 | * <p>Can be called from subclasses that override {@link FileBasedWriteOperation#finalize}.
|
376 | 393 | * <b>Note:</b>If finalize is overridden and does <b>not</b> rename or otherwise finalize
|
377 | 394 | * temporary files, this method will remove them.
|
378 | 395 | */
|
379 |
| - protected final void removeTemporaryFiles(PipelineOptions options) throws IOException { |
| 396 | + protected final void removeTemporaryFiles( |
| 397 | + Collection<String> knownFiles, PipelineOptions options) throws IOException { |
380 | 398 | String pattern = buildTemporaryFilename(baseTemporaryFilename, "*");
|
381 | 399 | LOG.debug("Finding temporary bundle output files matching {}.", pattern);
|
382 | 400 | FileOperations fileOperations = FileOperationsFactory.getFileOperations(pattern, options);
|
383 | 401 | IOChannelFactory factory = IOChannelUtils.getFactory(pattern);
|
384 | 402 | Collection<String> matches = factory.match(pattern);
|
385 |
| - LOG.debug("{} temporary files matched {}", matches.size(), pattern); |
386 |
| - LOG.debug("Removing {} files.", matches.size()); |
387 |
| - fileOperations.remove(matches); |
| 403 | + Set<String> allMatches = new HashSet<>(matches); |
| 404 | + allMatches.addAll(knownFiles); |
| 405 | + LOG.debug( |
| 406 | + "Removing {} temporary files matching {} ({} matched glob, {} additional known files)", |
| 407 | + allMatches.size(), |
| 408 | + pattern, |
| 409 | + matches.size(), |
| 410 | + allMatches.size() - matches.size()); |
| 411 | + fileOperations.remove(allMatches); |
388 | 412 | }
|
389 | 413 |
|
390 | 414 | /**
|
|
0 commit comments