Skip to content

Commit d2fa74a

Browse files
committed
Introduce CIDR filter list to block IP ranges
This is to reject requests from IP ranges that overload the API endpoints by fetching information from multiple clients at the same time. A recent incident highlighted this issue where a number of `crates?page` requests hit the ">0.5 failed requests threshold" alert. These requests timed out due to a high number of expensive requests made. These calls came in from about 100 different IPs in parallel. Before this change individual IPs or specific User Agents could be blocked. These existing approaches have some drawbacks, blocking single IPs would not have done much here as this is quite ineffective for this behaviour, while blocking a specific (common) User Agent may prevent valid usage of the API. Therefore a list of CIDR blocks should be used to check if IPs belong to certain ranges that send too many requests to expensive API endpoints. **Please note** this has to be done very carefully, it's generally a tool that may block whole regions of the internet.
1 parent e75450e commit d2fa74a

File tree

7 files changed

+164
-6
lines changed

7 files changed

+164
-6
lines changed

Cargo.lock

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ cargo-registry-index = { path = "cargo-registry-index" }
3535
cargo-registry-markdown = { path = "cargo-registry-markdown" }
3636
cargo-registry-s3 = { path = "cargo-registry-s3" }
3737
chrono = { version = "=0.4.19", features = ["serde"] }
38+
cidr-utils = "=0.5.6"
3839
clap = { version = "=3.1.18", features = ["derive", "unicode"] }
3940

4041
conduit = "=0.10.0"

src/config.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use cidr_utils::cidr::Ipv4Cidr;
2+
13
use crate::publish_rate_limit::PublishRateLimit;
24
use crate::{env, env_optional, uploaders::Uploader, Env};
35

@@ -25,6 +27,7 @@ pub struct Server {
2527
pub blocked_traffic: Vec<(String, Vec<String>)>,
2628
pub max_allowed_page_offset: u32,
2729
pub page_offset_ua_blocklist: Vec<String>,
30+
pub page_offset_cidr_blocklist: Vec<Ipv4Cidr>,
2831
pub excluded_crate_names: Vec<String>,
2932
pub domain_name: String,
3033
pub allowed_origins: Vec<String>,
@@ -63,6 +66,9 @@ impl Default for Server {
6366
/// be blocked if `WEB_MAX_ALLOWED_PAGE_OFFSET` is exceeded. Including an empty string in the
6467
/// list will block *all* user-agents exceeding the offset. If not set or empty, no blocking
6568
/// will occur.
69+
/// - `WEB_PAGE_OFFSET_CIDR_BLOCKLIST`: A comma separated list of CIDR blocks that will be used
70+
/// to block IP addresses given in the `X-Real-Ip` HTTP header, e.g. `192.168.1.0/24`.
71+
/// If not set or empty, no blocking will occur.
6672
/// - `INSTANCE_METRICS_LOG_EVERY_SECONDS`: How frequently should instance metrics be logged.
6773
/// If the environment variable is not present instance metrics are not logged.
6874
/// - `FORCE_UNCONDITIONAL_REDIRECTS`: Whether to force unconditional redirects in the download
@@ -84,6 +90,13 @@ impl Default for Server {
8490
Some(s) if s.is_empty() => vec![],
8591
Some(s) => s.split(',').map(String::from).collect(),
8692
};
93+
let page_offset_cidr_blocklist =
94+
match env_optional::<String>("WEB_PAGE_OFFSET_CIDR_BLOCKLIST") {
95+
None => vec![],
96+
Some(s) if s.is_empty() => vec![],
97+
Some(s) => s.split(',').map(String::from).collect(),
98+
};
99+
87100
let base = Base::from_environment();
88101
let excluded_crate_names = match env_optional::<String>("EXCLUDED_CRATE_NAMES") {
89102
None => vec![],
@@ -103,6 +116,7 @@ impl Default for Server {
103116
blocked_traffic: blocked_traffic(),
104117
max_allowed_page_offset: env_optional("WEB_MAX_ALLOWED_PAGE_OFFSET").unwrap_or(200),
105118
page_offset_ua_blocklist,
119+
page_offset_cidr_blocklist: parse_cidr_blocks(&page_offset_cidr_blocklist),
106120
excluded_crate_names,
107121
domain_name: domain_name(),
108122
allowed_origins,
@@ -144,6 +158,33 @@ pub(crate) fn domain_name() -> String {
144158
dotenv::var("DOMAIN_NAME").unwrap_or_else(|_| "crates.io".into())
145159
}
146160

161+
/// Parses list of CIDR block strings to valid `Ipv4Cidr` structs.
162+
///
163+
/// The purpose is to be able to block IP ranges that overload the API that contains pagination.
164+
/// A valid CIDR block has the following restriction:
165+
///
166+
/// * Only IPv4 blocks are currently supported.
167+
/// * The minimum number of host prefix bits must be at least 16.
168+
///
169+
fn parse_cidr_blocks(blocks: &[String]) -> Vec<Ipv4Cidr> {
170+
blocks
171+
.iter()
172+
.map(|block| match Ipv4Cidr::from_str(block) {
173+
Ok(cidr) => {
174+
if cidr.get_bits() < 16 {
175+
panic!(
176+
"WEB_PAGE_OFFSET_CIDR_BLOCKLIST must only contain CIDR blocks with \
177+
a host prefix of at least 16 bits."
178+
)
179+
} else {
180+
cidr
181+
}
182+
}
183+
Err(_) => panic!("WEB_PAGE_OFFSET_CIDR_BLOCKLIST only allows IPv4 CIDR blocks"),
184+
})
185+
.collect::<Vec<_>>()
186+
}
187+
147188
fn blocked_traffic() -> Vec<(String, Vec<String>)> {
148189
let pattern_list = dotenv::var("BLOCKED_TRAFFIC").unwrap_or_default();
149190
parse_traffic_patterns(&pattern_list)
@@ -183,3 +224,31 @@ fn parse_traffic_patterns_splits_on_comma_and_looks_for_equal_sign() {
183224

184225
assert_none!(parse_traffic_patterns(pattern_string_3).next());
185226
}
227+
228+
#[test]
229+
fn parse_cidr_block_list_successfully() {
230+
let cidr_blocks = vec!["127.0.0.1/24".to_string(), "192.168.0.1/31".to_string()];
231+
232+
let blocks = parse_cidr_blocks(&cidr_blocks);
233+
assert_eq!(
234+
vec![
235+
Ipv4Cidr::from_str("127.0.0.1/24").unwrap(),
236+
Ipv4Cidr::from_str("192.168.0.1/31").unwrap(),
237+
],
238+
blocks,
239+
);
240+
}
241+
242+
#[test]
243+
#[should_panic]
244+
fn parse_cidr_blocks_panics_when_host_prefix_is_too_low() {
245+
let input = vec!["127.0.0.1/8".to_string()];
246+
parse_cidr_blocks(&input);
247+
}
248+
249+
#[test]
250+
#[should_panic]
251+
fn parse_cidr_blocks_panics_when_ipv6_is_given() {
252+
let input = vec!["2002::1234:abcd:ffff:c0a8:101/64".to_string()];
253+
parse_cidr_blocks(&input);
254+
}

src/controllers/helpers/pagination.rs

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::config::Server;
12
use crate::controllers::prelude::*;
23
use crate::middleware::log_request::add_custom_metadata;
34
use crate::models::helpers::with_count::*;
@@ -11,6 +12,8 @@ use diesel::query_dsl::LoadQuery;
1112
use diesel::sql_types::BigInt;
1213
use indexmap::IndexMap;
1314
use serde::{Deserialize, Serialize};
15+
use std::net::Ipv4Addr;
16+
use std::str::FromStr;
1417
use std::sync::Arc;
1518

1619
const MAX_PAGE_BEFORE_SUSPECTED_BOT: u32 = 10;
@@ -96,14 +99,10 @@ impl PaginationOptionsBuilder {
9699
}
97100

98101
// Block large offsets for known violators of the crawler policy
99-
if let Some(app) = self.limit_page_numbers {
102+
if let Some(ref app) = self.limit_page_numbers {
100103
let config = &app.config;
101-
let user_agent = request_header(req, header::USER_AGENT);
102104
if numeric_page > config.max_allowed_page_offset
103-
&& config
104-
.page_offset_ua_blocklist
105-
.iter()
106-
.any(|blocked| user_agent.contains(blocked))
105+
&& is_useragent_or_ip_blocked(config, req)
107106
{
108107
add_custom_metadata("cause", "large page offset");
109108
return Err(bad_request("requested page offset is too large"));
@@ -257,6 +256,37 @@ impl RawSeekPayload {
257256
}
258257
}
259258

259+
/// Function to check if the request is blocked.
260+
///
261+
/// A request can be blocked if either the User Agent is on the User Agent block list or if the client
262+
/// IP is on the CIDR block list.
263+
fn is_useragent_or_ip_blocked(config: &Server, req: &dyn RequestExt) -> bool {
264+
let user_agent = request_header(req, header::USER_AGENT);
265+
let client_ip = request_header(req, "x-real-ip");
266+
267+
// check if user agent is blocked
268+
if config
269+
.page_offset_ua_blocklist
270+
.iter()
271+
.any(|blocked| user_agent.contains(blocked))
272+
{
273+
return true;
274+
}
275+
276+
// check if client ip is blocked, needs to be an IPv4 address
277+
if let Ok(client_ip) = Ipv4Addr::from_str(client_ip) {
278+
if config
279+
.page_offset_cidr_blocklist
280+
.iter()
281+
.any(|blocked| blocked.contains(client_ip))
282+
{
283+
return true;
284+
}
285+
}
286+
287+
false
288+
}
289+
260290
/// Encode a payload to be used as a seek key.
261291
///
262292
/// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no

src/tests/krate/search.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::util::{RequestHelper, TestApp};
33
use crate::{new_category, new_user};
44
use cargo_registry::models::Category;
55
use cargo_registry::schema::crates;
6+
use cidr_utils::cidr::Ipv4Cidr;
67
use diesel::{dsl::*, prelude::*, update};
78
use http::StatusCode;
89

@@ -822,3 +823,27 @@ fn pagination_parameters_only_accept_integers() {
822823
json!({ "errors": [{ "detail": "invalid digit found in string" }] })
823824
);
824825
}
826+
827+
#[test]
828+
fn pagination_blocks_ip_from_cidr_block_list() {
829+
let (app, anon, user) = TestApp::init()
830+
.with_config(|config| {
831+
config.max_allowed_page_offset = 1;
832+
config.page_offset_cidr_blocklist = vec![Ipv4Cidr::from_str("127.0.0.1/24").unwrap()];
833+
})
834+
.with_user();
835+
let user = user.as_model();
836+
837+
app.db(|conn| {
838+
CrateBuilder::new("pagination_links_1", user.id).expect_build(conn);
839+
CrateBuilder::new("pagination_links_2", user.id).expect_build(conn);
840+
CrateBuilder::new("pagination_links_3", user.id).expect_build(conn);
841+
});
842+
843+
let response = anon.get_with_query::<()>("/api/v1/crates", "page=2&per_page=1");
844+
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
845+
assert_eq!(
846+
response.into_json(),
847+
json!({ "errors": [{ "detail": "requested page offset is too large" }] })
848+
);
849+
}

src/tests/util.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ pub trait RequestHelper {
202202
fn req(method: conduit::Method, path: &str) -> MockRequest {
203203
let mut request = MockRequest::new(method, path);
204204
request.header(header::USER_AGENT, "conduit-test");
205+
request.header("x-real-ip", "127.0.0.1");
205206
request
206207
}
207208

src/tests/util/test_app.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ fn simple_config() -> config::Server {
341341
blocked_traffic: Default::default(),
342342
max_allowed_page_offset: 200,
343343
page_offset_ua_blocklist: vec![],
344+
page_offset_cidr_blocklist: vec![],
344345
excluded_crate_names: vec![],
345346
domain_name: "crates.io".into(),
346347
allowed_origins: Vec::new(),

0 commit comments

Comments
 (0)