|
11 | 11 |
|
12 | 12 | import numpy as np |
13 | 13 | import pandas as pd |
14 | | -from delphi_utils import read_params, create_export_csv, GeoMapper |
| 14 | +from delphi_utils import ( |
| 15 | + read_params, |
| 16 | + create_export_csv, |
| 17 | + S3ArchiveDiffer, |
| 18 | + GeoMapper |
| 19 | +) |
15 | 20 |
|
16 | 21 | from .geo import geo_map |
17 | 22 | from .pull import pull_usafacts_data |
@@ -73,6 +78,13 @@ def run_module(): |
73 | 78 | export_dir = params["export_dir"] |
74 | 79 | base_url = params["base_url"] |
75 | 80 | static_file_dir = params["static_file_dir"] |
| 81 | + cache_dir = params["cache_dir"] |
| 82 | + |
| 83 | + arch_diff = S3ArchiveDiffer( |
| 84 | + cache_dir, export_dir, |
| 85 | + params["bucket_name"], "usafacts", |
| 86 | + params["aws_credentials"]) |
| 87 | + arch_diff.update_cache() |
76 | 88 |
|
77 | 89 | map_df = pd.read_csv( |
78 | 90 | join(static_file_dir, "fips_prop_pop.csv"), dtype={"fips": int} |
@@ -105,3 +117,19 @@ def run_module(): |
105 | 117 | geo_res=geo_res, |
106 | 118 | sensor=sensor_name, |
107 | 119 | ) |
| 120 | + |
| 121 | + # Diff exports, and make incremental versions |
| 122 | + _, common_diffs, new_files = arch_diff.diff_exports() |
| 123 | + |
| 124 | + # Archive changed and new files only |
| 125 | + to_archive = [f for f, diff in common_diffs.items() if diff is not None] |
| 126 | + to_archive += new_files |
| 127 | + _, fails = arch_diff.archive_exports(to_archive) |
| 128 | + |
| 129 | + # Filter existing exports to exclude those that failed to archive |
| 130 | + succ_common_diffs = {f: diff for f, diff in common_diffs.items() if f not in fails} |
| 131 | + arch_diff.filter_exports(succ_common_diffs) |
| 132 | + |
| 133 | + # Report failures: someone should probably look at them |
| 134 | + for exported_file in fails: |
| 135 | + print(f"Failed to archive '{exported_file}'") |
0 commit comments