Skip to content

Commit 37b8911

Browse files
committed
common : simplify etag tracking by removing json
The JSON parser is temporarily kept only for backward compatibility. It reads the etag from old .json files to prevent unnecessary re-downloads for existing users. This legacy code can be removed in a future version. Signed-off-by: Adrien Gallouët <[email protected]>
1 parent 02463ab commit 37b8911

File tree

1 file changed

+64
-111
lines changed

1 file changed

+64
-111
lines changed

common/arg.cpp

Lines changed: 64 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,50 @@ struct common_hf_file_res {
217217
std::string mmprojFile;
218218
};
219219

220+
static void write_etag(const std::string & path, const std::string & etag) {
221+
const std::string etag_path = path + ".etag";
222+
write_file(etag_path, etag);
223+
LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
224+
}
225+
226+
static std::string read_etag(const std::string & path) {
227+
std::string none;
228+
const std::string etag_path = path + ".etag";
229+
230+
if (std::filesystem::exists(etag_path)) {
231+
std::ifstream etag_in(etag_path);
232+
if (!etag_in) {
233+
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
234+
return none;
235+
}
236+
std::string etag;
237+
std::getline(etag_in, etag);
238+
return etag;
239+
}
240+
241+
// no etag file, but maybe there is an old .json
242+
// remove this code later
243+
const std::string metadata_path = path + ".json";
244+
245+
if (std::filesystem::exists(metadata_path)) {
246+
std::ifstream metadata_in(metadata_path);
247+
try {
248+
nlohmann::json metadata_json;
249+
metadata_in >> metadata_json;
250+
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
251+
metadata_json.dump().c_str());
252+
if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) {
253+
std::string etag = metadata_json.at("etag");
254+
write_etag(path, etag);
255+
return etag;
256+
}
257+
} catch (const nlohmann::json::exception & e) {
258+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
259+
}
260+
}
261+
return none;
262+
}
263+
220264
#ifdef LLAMA_USE_CURL
221265

222266
bool common_has_curl() {
@@ -373,36 +417,15 @@ static bool common_download_head(CURL * curl,
373417
static bool common_download_file_single_online(const std::string & url,
374418
const std::string & path,
375419
const std::string & bearer_token) {
376-
// If the file exists, check its JSON metadata companion file.
377-
std::string metadata_path = path + ".json";
378420
static const int max_attempts = 3;
379421
static const int retry_delay_seconds = 2;
380422
for (int i = 0; i < max_attempts; ++i) {
381-
nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
382-
std::string etag;
383-
std::string last_modified;
423+
std::string etag;
384424

385425
// Check if the file already exists locally
386426
const auto file_exists = std::filesystem::exists(path);
387427
if (file_exists) {
388-
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
389-
std::ifstream metadata_in(metadata_path);
390-
if (metadata_in.good()) {
391-
try {
392-
metadata_in >> metadata;
393-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
394-
metadata.dump().c_str());
395-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
396-
etag = metadata.at("etag");
397-
}
398-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
399-
last_modified = metadata.at("lastModified");
400-
}
401-
} catch (const nlohmann::json::exception & e) {
402-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
403-
}
404-
}
405-
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
428+
etag = read_etag(path);
406429
} else {
407430
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
408431
}
@@ -440,11 +463,6 @@ static bool common_download_file_single_online(const std::string & url,
440463
headers.etag.c_str());
441464
should_download = true;
442465
should_download_from_scratch = true;
443-
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
444-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
445-
last_modified.c_str(), headers.last_modified.c_str());
446-
should_download = true;
447-
should_download_from_scratch = true;
448466
}
449467
}
450468

@@ -475,15 +493,9 @@ static bool common_download_file_single_online(const std::string & url,
475493
}
476494
}
477495
}
478-
479-
// Write the updated JSON metadata file.
480-
metadata.update({
481-
{ "url", url },
482-
{ "etag", headers.etag },
483-
{ "lastModified", headers.last_modified }
484-
});
485-
write_file(metadata_path, metadata.dump(4));
486-
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
496+
if (head_request_ok) {
497+
write_etag(path, headers.etag);
498+
}
487499

488500
// start the download
489501
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
@@ -664,51 +676,6 @@ static void print_progress(size_t current, size_t total) { // TODO isatty
664676
std::cout.flush();
665677
}
666678

667-
struct common_file_metadata {
668-
std::string etag;
669-
std::string last_modified;
670-
};
671-
672-
static std::optional<common_file_metadata> read_metadata(const std::string & path) {
673-
if (!std::filesystem::exists(path)) {
674-
return std::nullopt;
675-
}
676-
677-
nlohmann::json metadata_json;
678-
common_file_metadata metadata;
679-
680-
std::ifstream metadata_in(path);
681-
try {
682-
metadata_in >> metadata_json;
683-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, path.c_str(),
684-
metadata_json.dump().c_str());
685-
if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) {
686-
metadata.etag = metadata_json.at("etag");
687-
}
688-
if (metadata_json.contains("lastModified") && metadata_json.at("lastModified").is_string()) {
689-
metadata.last_modified = metadata_json.at("lastModified");
690-
}
691-
} catch (const nlohmann::json::exception & e) {
692-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, path.c_str(), e.what());
693-
return std::nullopt;
694-
}
695-
696-
return metadata;
697-
}
698-
699-
static void write_metadata(const std::string & path,
700-
const std::string & url,
701-
const common_file_metadata & metadata) {
702-
nlohmann::json metadata_json = {
703-
{ "url", url },
704-
{ "etag", metadata.etag },
705-
{ "lastModified", metadata.last_modified }
706-
};
707-
708-
write_file(path, metadata_json.dump(4));
709-
LOG_DBG("%s: file metadata saved: %s\n", __func__, path.c_str());
710-
}
711-
712679
static bool common_pull_file(httplib::Client & cli,
713680
const std::string & resolve_path,
714681
const std::string & path_tmp,
@@ -775,8 +742,6 @@ static bool common_pull_file(httplib::Client & cli,
775742
static bool common_download_file_single_online(const std::string & url,
776743
const std::string & path,
777744
const std::string & bearer_token) {
778-
// If the file exists, check its JSON metadata companion file.
779-
std::string metadata_path = path + ".json";
780745
static const int max_attempts = 3;
781746
static const int retry_delay_seconds = 2;
782747

@@ -788,12 +753,11 @@ static bool common_download_file_single_online(const std::string & url,
788753
}
789754
cli.set_default_headers(default_headers);
790755

791-
common_file_metadata last;
792756
const bool file_exists = std::filesystem::exists(path);
757+
758+
std::string last_etag;
793759
if (file_exists) {
794-
if (auto opt = read_metadata(metadata_path)) {
795-
last = *opt;
796-
}
760+
last_etag = read_etag(path);
797761
} else {
798762
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
799763
}
@@ -809,14 +773,9 @@ static bool common_download_file_single_online(const std::string & url,
809773
}
810774
}
811775

812-
common_file_metadata current;
813-
if (head_ok) {
814-
if (head->has_header("ETag")) {
815-
current.etag = head->get_header_value("ETag");
816-
}
817-
if (head->has_header("Last-Modified")) {
818-
current.last_modified = head->get_header_value("Last-Modified");
819-
}
776+
std::string etag;
777+
if (head_ok && head->has_header("ETag")) {
778+
etag = head->get_header_value("ETag");
820779
}
821780

822781
size_t total_size = 0;
@@ -834,16 +793,10 @@ static bool common_download_file_single_online(const std::string & url,
834793
}
835794

836795
bool should_download_from_scratch = false;
837-
if (head_ok) {
838-
if (!last.etag.empty() && last.etag != current.etag) {
839-
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
840-
last.etag.c_str(), current.etag.c_str());
841-
should_download_from_scratch = true;
842-
} else if (!last.last_modified.empty() && last.last_modified != current.last_modified) {
843-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
844-
last.last_modified.c_str(), current.last_modified.c_str());
845-
should_download_from_scratch = true;
846-
}
796+
if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
797+
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
798+
last_etag.c_str(), etag.c_str());
799+
should_download_from_scratch = true;
847800
}
848801

849802
if (file_exists) {
@@ -871,9 +824,8 @@ static bool common_download_file_single_online(const std::string & url,
871824
}
872825

873826
// start the download
874-
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
875-
__func__, show_masked_url(parts).c_str(), path_temporary.c_str(),
876-
current.etag.c_str(), current.last_modified.c_str());
827+
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
828+
__func__, show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
877829
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
878830
if (!was_pull_successful) {
879831
if (i + 1 < max_attempts) {
@@ -883,15 +835,16 @@ static bool common_download_file_single_online(const std::string & url,
883835
} else {
884836
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
885837
}
886-
887838
continue;
888839
}
889840

890841
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
891842
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
892843
return false;
893844
}
894-
write_metadata(metadata_path, url, current);
845+
if (!etag.empty()) {
846+
write_etag(path, etag);
847+
}
895848
break;
896849
}
897850

0 commit comments

Comments
 (0)