Skip to content

Commit 3e32561

Browse files
authored
Merge pull request #4806 from ferrous-systems/feature/use-cidr-block-list
Use env var to support blocking IP ranges
2 parents 1b75015 + e0703b0 commit 3e32561

File tree

7 files changed

+158
-6
lines changed

7 files changed

+158
-6
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ hex = "=0.4.3"
6161
http = "=0.2.7"
6262
hyper = { version = "=0.14.18", features = ["client", "http1"] }
6363
indexmap = { version = "=1.8.1", features = ["serde-1"] }
64+
ipnetwork = "=0.19.0"
6465
tikv-jemallocator = { version = "=0.4.3", features = ['unprefixed_malloc_on_supported_platforms', 'profiling'] }
6566
lettre = { version = "=0.10.0-rc.6", default-features = false, features = ["file-transport", "smtp-transport", "native-tls", "hostname", "builder"] }
6667
minijinja = "=0.15.0"

src/config.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use ipnetwork::IpNetwork;
2+
13
use crate::publish_rate_limit::PublishRateLimit;
24
use crate::{env, env_optional, uploaders::Uploader, Env};
35

@@ -25,6 +27,7 @@ pub struct Server {
2527
pub blocked_traffic: Vec<(String, Vec<String>)>,
2628
pub max_allowed_page_offset: u32,
2729
pub page_offset_ua_blocklist: Vec<String>,
30+
pub page_offset_cidr_blocklist: Vec<IpNetwork>,
2831
pub excluded_crate_names: Vec<String>,
2932
pub domain_name: String,
3033
pub allowed_origins: Vec<String>,
@@ -63,6 +66,9 @@ impl Default for Server {
6366
/// be blocked if `WEB_MAX_ALLOWED_PAGE_OFFSET` is exceeded. Including an empty string in the
6467
/// list will block *all* user-agents exceeding the offset. If not set or empty, no blocking
6568
/// will occur.
69+
/// - `WEB_PAGE_OFFSET_CIDR_BLOCKLIST`: A comma separated list of CIDR blocks that will be used
70+
/// to block IP addresses given in the `X-Real-Ip` HTTP header, e.g. `192.168.1.0/24`.
71+
/// If not set or empty, no blocking will occur.
6672
/// - `INSTANCE_METRICS_LOG_EVERY_SECONDS`: How frequently should instance metrics be logged.
6773
/// If the environment variable is not present instance metrics are not logged.
6874
/// - `FORCE_UNCONDITIONAL_REDIRECTS`: Whether to force unconditional redirects in the download
@@ -84,6 +90,13 @@ impl Default for Server {
8490
Some(s) if s.is_empty() => vec![],
8591
Some(s) => s.split(',').map(String::from).collect(),
8692
};
93+
let page_offset_cidr_blocklist =
94+
match env_optional::<String>("WEB_PAGE_OFFSET_CIDR_BLOCKLIST") {
95+
None => vec![],
96+
Some(s) if s.is_empty() => vec![],
97+
Some(s) => s.split(',').map(String::from).collect(),
98+
};
99+
87100
let base = Base::from_environment();
88101
let excluded_crate_names = match env_optional::<String>("EXCLUDED_CRATE_NAMES") {
89102
None => vec![],
@@ -103,6 +116,7 @@ impl Default for Server {
103116
blocked_traffic: blocked_traffic(),
104117
max_allowed_page_offset: env_optional("WEB_MAX_ALLOWED_PAGE_OFFSET").unwrap_or(200),
105118
page_offset_ua_blocklist,
119+
page_offset_cidr_blocklist: parse_cidr_blocks(&page_offset_cidr_blocklist),
106120
excluded_crate_names,
107121
domain_name: domain_name(),
108122
allowed_origins,
@@ -144,6 +158,41 @@ pub(crate) fn domain_name() -> String {
144158
dotenv::var("DOMAIN_NAME").unwrap_or_else(|_| "crates.io".into())
145159
}
146160

161+
/// Parses list of CIDR block strings to valid `IpNetwork` structs.
162+
///
163+
/// The purpose is to be able to block IP ranges that overload the API that uses pagination.
164+
///
165+
/// The minimum number of bits for a host prefix must be
166+
///
167+
/// * at least 16 for IPv4 based CIDRs.
168+
/// * at least 64 for IPv6 based CIDRs
169+
///
170+
fn parse_cidr_blocks(blocks: &[String]) -> Vec<IpNetwork> {
171+
blocks
172+
.iter()
173+
.map(|block| {
174+
let network = block.parse::<IpNetwork>();
175+
match network {
176+
Ok(cidr) => {
177+
let host_prefix = match cidr {
178+
IpNetwork::V4(_) => 16,
179+
IpNetwork::V6(_) => 64,
180+
};
181+
if cidr.prefix() < host_prefix {
182+
panic!(
183+
"WEB_PAGE_OFFSET_CIDR_BLOCKLIST only allows CIDR blocks with a host prefix \
184+
of at least 16 bits (IPv4) or 64 bits (IPv6)."
185+
);
186+
} else {
187+
cidr
188+
}
189+
},
190+
Err(_) => panic!("WEB_PAGE_OFFSET_CIDR_BLOCKLIST must contain IPv4 or IPv6 CIDR blocks."),
191+
}
192+
})
193+
.collect::<Vec<_>>()
194+
}
195+
147196
fn blocked_traffic() -> Vec<(String, Vec<String>)> {
148197
let pattern_list = dotenv::var("BLOCKED_TRAFFIC").unwrap_or_default();
149198
parse_traffic_patterns(&pattern_list)
@@ -183,3 +232,38 @@ fn parse_traffic_patterns_splits_on_comma_and_looks_for_equal_sign() {
183232

184233
assert_none!(parse_traffic_patterns(pattern_string_3).next());
185234
}
235+
236+
#[test]
237+
fn parse_cidr_block_list_successfully() {
238+
let cidr_blocks = vec!["127.0.0.1/24".to_string(), "192.168.0.1/31".to_string()];
239+
240+
let blocks = parse_cidr_blocks(&cidr_blocks);
241+
assert_eq!(
242+
vec![
243+
"127.0.0.1/24".parse::<IpNetwork>().unwrap(),
244+
"192.168.0.1/31".parse::<IpNetwork>().unwrap(),
245+
],
246+
blocks,
247+
);
248+
}
249+
250+
#[test]
251+
#[should_panic]
252+
fn parse_cidr_blocks_panics_when_host_ipv4_prefix_is_too_low() {
253+
parse_cidr_blocks(&["127.0.0.1/8".to_string()]);
254+
}
255+
256+
#[test]
257+
#[should_panic]
258+
fn parse_cidr_blocks_panics_when_host_ipv6_prefix_is_too_low() {
259+
parse_cidr_blocks(&["2001:0db8:0123:4567:89ab:cdef:1234:5678/56".to_string()]);
260+
}
261+
262+
#[test]
263+
fn parse_ipv6_based_cidr_blocks() {
264+
let input = vec![
265+
"2002::1234:abcd:ffff:c0a8:101/64".to_string(),
266+
"2001:0db8:0123:4567:89ab:cdef:1234:5678/92".to_string(),
267+
];
268+
assert_eq!(2, parse_cidr_blocks(&input).len());
269+
}

src/controllers/helpers/pagination.rs

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::config::Server;
12
use crate::controllers::prelude::*;
23
use crate::middleware::log_request::add_custom_metadata;
34
use crate::models::helpers::with_count::*;
@@ -11,6 +12,8 @@ use diesel::query_dsl::LoadQuery;
1112
use diesel::sql_types::BigInt;
1213
use indexmap::IndexMap;
1314
use serde::{Deserialize, Serialize};
15+
use std::net::IpAddr;
16+
use std::str::FromStr;
1417
use std::sync::Arc;
1518

1619
const MAX_PAGE_BEFORE_SUSPECTED_BOT: u32 = 10;
@@ -96,14 +99,10 @@ impl PaginationOptionsBuilder {
9699
}
97100

98101
// Block large offsets for known violators of the crawler policy
99-
if let Some(app) = self.limit_page_numbers {
102+
if let Some(ref app) = self.limit_page_numbers {
100103
let config = &app.config;
101-
let user_agent = request_header(req, header::USER_AGENT);
102104
if numeric_page > config.max_allowed_page_offset
103-
&& config
104-
.page_offset_ua_blocklist
105-
.iter()
106-
.any(|blocked| user_agent.contains(blocked))
105+
&& is_useragent_or_ip_blocked(config, req)
107106
{
108107
add_custom_metadata("cause", "large page offset");
109108
return Err(bad_request("requested page offset is too large"));
@@ -257,6 +256,37 @@ impl RawSeekPayload {
257256
}
258257
}
259258

259+
/// Function to check if the request is blocked.
260+
///
261+
/// A request can be blocked if either the User Agent is on the User Agent block list or if the client
262+
/// IP is on the CIDR block list.
263+
fn is_useragent_or_ip_blocked(config: &Server, req: &dyn RequestExt) -> bool {
264+
let user_agent = request_header(req, header::USER_AGENT);
265+
let client_ip = request_header(req, "x-real-ip");
266+
267+
// check if user agent is blocked
268+
if config
269+
.page_offset_ua_blocklist
270+
.iter()
271+
.any(|blocked| user_agent.contains(blocked))
272+
{
273+
return true;
274+
}
275+
276+
// check if client ip is blocked, needs to be an IPv4 address
277+
if let Ok(client_ip) = IpAddr::from_str(client_ip) {
278+
if config
279+
.page_offset_cidr_blocklist
280+
.iter()
281+
.any(|blocked| blocked.contains(client_ip))
282+
{
283+
return true;
284+
}
285+
}
286+
287+
false
288+
}
289+
260290
/// Encode a payload to be used as a seek key.
261291
///
262292
/// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no

src/tests/krate/search.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use cargo_registry::models::Category;
55
use cargo_registry::schema::crates;
66
use diesel::{dsl::*, prelude::*, update};
77
use http::StatusCode;
8+
use ipnetwork::IpNetwork;
89

910
#[test]
1011
fn index() {
@@ -822,3 +823,27 @@ fn pagination_parameters_only_accept_integers() {
822823
json!({ "errors": [{ "detail": "invalid digit found in string" }] })
823824
);
824825
}
826+
827+
#[test]
828+
fn pagination_blocks_ip_from_cidr_block_list() {
829+
let (app, anon, user) = TestApp::init()
830+
.with_config(|config| {
831+
config.max_allowed_page_offset = 1;
832+
config.page_offset_cidr_blocklist = vec!["127.0.0.1/24".parse::<IpNetwork>().unwrap()];
833+
})
834+
.with_user();
835+
let user = user.as_model();
836+
837+
app.db(|conn| {
838+
CrateBuilder::new("pagination_links_1", user.id).expect_build(conn);
839+
CrateBuilder::new("pagination_links_2", user.id).expect_build(conn);
840+
CrateBuilder::new("pagination_links_3", user.id).expect_build(conn);
841+
});
842+
843+
let response = anon.get_with_query::<()>("/api/v1/crates", "page=2&per_page=1");
844+
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
845+
assert_eq!(
846+
response.into_json(),
847+
json!({ "errors": [{ "detail": "requested page offset is too large" }] })
848+
);
849+
}

src/tests/util.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ pub trait RequestHelper {
202202
fn req(method: conduit::Method, path: &str) -> MockRequest {
203203
let mut request = MockRequest::new(method, path);
204204
request.header(header::USER_AGENT, "conduit-test");
205+
request.header("x-real-ip", "127.0.0.1");
205206
request
206207
}
207208

src/tests/util/test_app.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ fn simple_config() -> config::Server {
341341
blocked_traffic: Default::default(),
342342
max_allowed_page_offset: 200,
343343
page_offset_ua_blocklist: vec![],
344+
page_offset_cidr_blocklist: vec![],
344345
excluded_crate_names: vec![],
345346
domain_name: "crates.io".into(),
346347
allowed_origins: Vec::new(),

0 commit comments

Comments
 (0)