Skip to content

Commit 392b9a7

Browse files
committed
v0.0.6:
- Adds support to only scrape repos in `forced_repositories`. - Updates depdencies.
1 parent e81ef5e commit 392b9a7

File tree

8 files changed

+838
-392
lines changed

8 files changed

+838
-392
lines changed

Cargo.lock

Lines changed: 713 additions & 356 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cvmfs_server_scraper"
3-
version = "0.0.5"
3+
version = "0.0.6"
44
edition = "2021"
55
authors = ["Terje Kvernes <[email protected]>"]
66
license = "MIT"
@@ -15,7 +15,7 @@ serde = { version = "1.0", features = ["derive"] }
1515
serde_json = "1"
1616
reqwest = { version = "0", features = ["json"] }
1717
tokio = { version = "1", features = ["full"] }
18-
thiserror = "1"
18+
thiserror = "2"
1919
semver = "1"
2020
chrono = { version = "0.4", features = ["serde"] }
2121
yare = "3"

Changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this will adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) once
77
we reach version 0.1.0, up until then, expect breaking changes.
88

9+
## [0.0.6] - 2025-10-20
10+
11+
### Added
12+
13+
- `scrape` for a server now takes a boolean argument to indicate if only the explicitly listed repositories for that server are to be scraped, overriding `ignored_repositories`.
14+
This parameter is also added to the `scrape_servers` API, in both cases requiring consumers to update their code accordingly. To retain previous behavior, pass `false` to
15+
either function. If using the builder interface, `only_scrape_forced_repositories(true|false)` is available. The default is `false`, retaining previous behavior and requiring no changes.
16+
917
## [0.0.5] - 2024-10-18
1018

1119
### Added

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ async fn main() -> Result<(), CVMFSScraperError> {
4242
let scraped_servers = Scraper::new()
4343
.forced_repositories(repolist)
4444
.ignored_repositories(ignored_repos)
45+
.only_scrape_forced_repositories(false) // Only scrape forced repositories if true, overrides ignored_repositories
4546
.geoapi_servers(DEFAULT_GEOAPI_SERVERS.clone())? // This is the default list
4647
.with_servers(servers) // Transitions to a WithServer state.
4748
.validate()? // Transitions to a ValidatedAndReady state, now immutable.
@@ -74,6 +75,8 @@ For populated servers, the field `backend_detected` will be set to the detected
7475

7576
## What repositories are scraped?
7677

78+
If `only_scrape_forced_repositories` is set to true, only the repositories explicitly passed to the scraper will be scraped, ignoring any ignored repositories. Otherwise, the following rules apply:
79+
7780
- For servers that are set to or detected as CVMFS, the scraper will scrape the union of the detected and configurations explicitly stated repositories.
7881
- For servers that are set to or detected as S3, only the explicitly stated repositories will be scraped (and the scraper will fail if the server type is explicitly set to S3 and no repositories are passed).
7982

src/lib.rs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
//! let scraped_servers = Scraper::new()
5050
//! .forced_repositories(repolist)
5151
//! .ignored_repositories(ignored_repos)
52+
//! .only_scrape_forced_repositories(false) // Only scrape forced repositories if true, overrides ignored_repositories, default false
5253
//! .geoapi_servers(DEFAULT_GEOAPI_SERVERS.clone())? // This is the default list
5354
//! .with_servers(servers) // Transitions to a WithServer state.
5455
//! .validate()? // Transitions to a ValidatedAndReady state, now immutable.
@@ -117,7 +118,7 @@ mod tests {
117118
let futures = servers.into_iter().map(|server| {
118119
let repolist = repolist.clone();
119120
async move {
120-
match server.scrape(repolist.clone(), vec![], None).await {
121+
match server.scrape(repolist.clone(), vec![], false, None).await {
121122
ScrapedServer::Populated(popserver) => {
122123
for repo in repolist {
123124
assert!(popserver.has_repository(repo));
@@ -143,7 +144,7 @@ mod tests {
143144

144145
let repolist = vec!["software.eessi.io", "dev.eessi.io"];
145146

146-
match server.scrape(repolist.clone(), vec![], None).await {
147+
match server.scrape(repolist.clone(), vec![], false, None).await {
147148
ScrapedServer::Populated(_) => {
148149
panic!("Error, should not have succeeded");
149150
}
@@ -163,7 +164,7 @@ mod tests {
163164

164165
let repolist = vec!["software.eessi.io", "dev.eessi.io", "riscv.eessi.io"];
165166
let repoparams: Vec<String> = Vec::new();
166-
let servers = server.scrape(repoparams, vec![], None).await;
167+
let servers = server.scrape(repoparams, vec![], false, None).await;
167168
for repo in repolist {
168169
match servers.clone() {
169170
ScrapedServer::Populated(popserver) => {
@@ -186,7 +187,7 @@ mod tests {
186187

187188
let repolist = vec!["software.eessi.io", "dev.eessi.io", "riscv.eessi.io"];
188189
let popserver = server
189-
.scrape(repolist.clone(), vec![], None)
190+
.scrape(repolist.clone(), vec![], false, None)
190191
.await
191192
.get_populated_server()
192193
.unwrap();
@@ -203,7 +204,7 @@ mod tests {
203204
);
204205

205206
let repoparams: Vec<String> = Vec::new();
206-
let popserver = server.scrape(repoparams, vec![], None).await;
207+
let popserver = server.scrape(repoparams, vec![], false, None).await;
207208
assert!(popserver.is_ok());
208209
let popserver = popserver.get_populated_server().unwrap();
209210
assert_eq!(popserver.backend_type, ServerBackendType::AutoDetect);
@@ -220,7 +221,7 @@ mod tests {
220221

221222
let repolist = vec!["software.eessi.io", "dev.eessi.io", "riscv.eessi.io"];
222223
let popserver = server
223-
.scrape(repolist.clone(), vec![], None)
224+
.scrape(repolist.clone(), vec![], false, None)
224225
.await
225226
.get_populated_server()
226227
.unwrap();
@@ -238,7 +239,7 @@ mod tests {
238239

239240
let repolist = vec!["software.eessi.io", "dev.eessi.io", "riscv.eessi.io"];
240241
let popserver = server
241-
.scrape(repolist.clone(), vec![], None)
242+
.scrape(repolist.clone(), vec![], false, None)
242243
.await
243244
.get_populated_server()
244245
.unwrap();
@@ -269,7 +270,7 @@ mod tests {
269270

270271
let repolist = vec!["software.eessi.io", "dev.eessi.io", "riscv.eessi.io"];
271272
let popserver = server
272-
.scrape(repolist.clone(), vec![], None)
273+
.scrape(repolist.clone(), vec![], false, None)
273274
.await
274275
.get_populated_server()
275276
.unwrap();
@@ -292,7 +293,7 @@ mod tests {
292293
let repolist = vec!["software.eessi.io", "dev.eessi.io", "riscv.eessi.io"];
293294
let ignored_repos = vec!["riscv.eessi.io"];
294295
let popserver = server
295-
.scrape(repolist.clone(), ignored_repos.clone(), None)
296+
.scrape(repolist.clone(), ignored_repos.clone(), false, None)
296297
.await
297298
.get_populated_server()
298299
.unwrap();
@@ -301,6 +302,26 @@ mod tests {
301302
assert!(!popserver.has_repository("riscv.eessi.io"));
302303
}
303304

305+
#[tokio::test]
306+
async fn test_online_cvmfs_server_s1_only_forced_repos() {
307+
let server = Server::new(
308+
ServerType::Stratum1,
309+
ServerBackendType::CVMFS,
310+
Hostname::try_from("aws-eu-central-s1.eessi.science").unwrap(),
311+
);
312+
313+
let repolist = vec!["software.eessi.io", "dev.eessi.io"];
314+
let popserver = server
315+
.scrape(repolist.clone(), vec![], true, None)
316+
.await
317+
.get_populated_server()
318+
.unwrap();
319+
assert!(popserver.has_repository("software.eessi.io"));
320+
assert!(popserver.has_repository("dev.eessi.io"));
321+
322+
assert!(popserver.repositories.len() == 2);
323+
}
324+
304325
#[tokio::test]
305326
async fn test_online_scraping_using_builder_interface() {
306327
let scraper = Scraper::new();

src/models/servers.rs

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -161,16 +161,22 @@ impl Server {
161161
/// also fetch metadata about the server from the repositories.json and meta.json files, if they
162162
/// are available.
163163
///
164-
/// The method takes a list of repositories to scrape (which may be empty unless the backend is S3)
165-
/// and a list of repositories to ignore (this may be empty). It also takes an optional list of
166-
/// geoapi servers to use when fetching geoapi information.
164+
/// ## Arguments
167165
///
168-
/// The method will return a populated server object if the scrape is successful, or a failed server
169-
/// object if the scrape fails.
166+
/// - `repositories`: A list of repositories to scrape. This may be empty unless the backend is S3.
167+
/// - `ignored_repositories`: A list of repositories to ignore. This may be empty.
168+
/// - `only_scrape_forced_repos`: If true, only the repositories provided in the `repositories` argument will be scraped
169+
/// which overrides ignored_repositories. If false, the repositories from repositories.json will be merged with
170+
/// the provided list and then filtered by ignored_repositories.
171+
///
172+
/// ## Returns
173+
///
174+
/// A ScrapedServer enum containing either a PopulatedServer or a FailedServer.
170175
pub async fn scrape<R>(
171176
&self,
172177
repositories: Vec<R>,
173178
ignored_repositories: Vec<R>,
179+
only_scrape_forced_repos: bool,
174180
geoapi_servers: Option<Vec<Hostname>>,
175181
) -> ScrapedServer
176182
where
@@ -228,13 +234,16 @@ impl Server {
228234
}
229235
};
230236
backend_detected = ServerBackendType::CVMFS;
231-
all_repos.extend(
232-
repo_json
233-
.repositories_and_replicas()
234-
.into_iter()
235-
.filter(|r| !ignore.contains(&r.name))
236-
.map(|r| r.name),
237-
);
237+
238+
if !only_scrape_forced_repos {
239+
all_repos.extend(
240+
repo_json
241+
.repositories_and_replicas()
242+
.into_iter()
243+
.filter(|r| !ignore.contains(&r.name))
244+
.map(|r| r.name),
245+
);
246+
};
238247
}
239248
Err(error) => match error {
240249
ScrapeError::FetchError(_) => {
@@ -274,13 +283,15 @@ impl Server {
274283
return ScrapedServer::Failed(self.to_failed_server(error));
275284
}
276285
}
277-
all_repos.extend(
278-
repo_json
279-
.repositories_and_replicas()
280-
.into_iter()
281-
.filter(|r| !ignore.contains(&r.name))
282-
.map(|r| r.name),
283-
);
286+
if !only_scrape_forced_repos {
287+
all_repos.extend(
288+
repo_json
289+
.repositories_and_replicas()
290+
.into_iter()
291+
.filter(|r| !ignore.contains(&r.name))
292+
.map(|r| r.name),
293+
)
294+
};
284295
}
285296
}
286297

0 commit comments

Comments
 (0)