Skip to content

Commit befc1f2

Browse files
committed
fix more redirect loops in redirector handler
1 parent c0e6171 commit befc1f2

File tree

2 files changed

+59
-22
lines changed

2 files changed

+59
-22
lines changed

src/web/extractors/rustdoc.rs

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ use std::borrow::Cow;
2020
const INDEX_HTML: &str = "index.html";
2121
const FOLDER_AND_INDEX_HTML: &str = "/index.html";
2222

23+
pub(crate) const ROOT_RUSTDOC_HTML_FILES: &[&str] = &[
24+
"all.html",
25+
"help.html",
26+
"settings.html",
27+
"scrape-examples-help.html",
28+
];
29+
2330
#[derive(Clone, Debug, PartialEq)]
2431
pub(crate) enum PageKind {
2532
Rustdoc,
@@ -723,27 +730,38 @@ fn generate_rustdoc_path_for_url(
723730
&& !path.is_empty()
724731
&& path != INDEX_HTML
725732
{
726-
if let Some(target_name) = target_name
727-
&& path == target_name
728-
{
729-
// Sometimes people add a path that is just the target name.
730-
// This matches our `rustdoc_redirector_handler` route,
731-
// without trailing slash (`/{name}/{version}/{target}`).
732-
//
733-
// Even though the rustdoc html handler would also be capable
734-
// handling this input, we want to redirect to the trailing
735-
// slash version (`/{name}/{version}/{target}/`),
736-
// so they are separate.
737-
format!("{}/", target_name)
738-
} else {
733+
// for none-elements paths we have to guarantee that we have a
734+
// trailing slash, otherwise the rustdoc-url won't hit the html-handler and
735+
// lead to redirect loops.
736+
if path.contains('/') {
739737
// just use the given inner to start, if:
740738
// * it's not empty
741739
// * it's not just "index.html"
740+
// * we have a slash in the path.
742741
path.to_string()
742+
} else if ROOT_RUSTDOC_HTML_FILES.iter().any(|&f| f == path) {
743+
// special case: some files are at the root of the rustdoc output,
744+
// without a trailing slash, and the routes are fine with that.
745+
// e.g. `/help.html`, `/settings.html`, `/all.html`, ...
746+
path.to_string()
747+
} else if let Some(target_name) = target_name {
748+
if target_name == path {
749+
// when we have the target name as path, without a trailing slash,
750+
// just add the slash.
751+
format!("{}/", path)
752+
} else {
753+
// when someone just attaches some path to the URL, like
754+
// `/{krate}/{version}/somefile.html`, we assume they meant
755+
// `/{krate}/{version}/{target_name}/somefile.html`.
756+
format!("{}/{}", target_name, path)
757+
}
758+
} else {
759+
// fallback: just attach a slash and redirect.
760+
format!("{}/", path)
743761
}
744762
} else if let Some(target_name) = target_name {
745763
// after having no usable given path, we generate one with the
746-
// target name, if we have one.
764+
// target name, if we have one/.
747765
format!("{}/", target_name)
748766
} else {
749767
// no usable given path:
@@ -1639,23 +1657,26 @@ mod tests {
16391657
assert_eq!(params.rustdoc_url(), "/krate/latest/krate/");
16401658
}
16411659

1642-
#[test_case(Some(PageKind::Rustdoc))]
1643-
#[test_case(None)]
1644-
fn test_only_target_name_without_slash(page_kind: Option<PageKind>) {
1660+
#[test_case("other_path.html", "/krate/latest/krate/other_path.html")]
1661+
#[test_case("other_path", "/krate/latest/krate/other_path"; "without .html")]
1662+
#[test_case("other_path.html", "/krate/latest/krate/other_path.html"; "with .html")]
1663+
#[test_case("settings.html", "/krate/latest/settings.html"; "static routes")]
1664+
#[test_case(KRATE, "/krate/latest/krate/"; "same as target name, without slash")]
1665+
fn test_redirect_some_odd_paths_we_saw(inner_path: &str, expected_url: &str) {
16451666
// test for https://github.com/rust-lang/docs.rs/issues/2989
16461667
let params = RustdocParams::new(KRATE)
1647-
.with_maybe_page_kind(page_kind)
1648-
.try_with_original_uri("/dummy/latest/dummy")
1668+
.with_page_kind(PageKind::Rustdoc)
1669+
.try_with_original_uri(format!("/{KRATE}/latest/{inner_path}"))
16491670
.unwrap()
16501671
.with_req_version(ReqVersion::Latest)
16511672
.with_maybe_doc_target(None::<String>)
1652-
.with_inner_path(KRATE)
1673+
.with_inner_path(inner_path)
16531674
.with_default_target(DEFAULT_TARGET)
16541675
.with_target_name(KRATE)
16551676
.with_doc_targets(TARGETS.iter().cloned());
16561677

16571678
dbg!(&params);
16581679

1659-
assert_eq!(params.rustdoc_url(), "/krate/latest/krate/");
1680+
assert_eq!(params.rustdoc_url(), expected_url);
16601681
}
16611682
}

src/web/rustdoc.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ pub(crate) async fn rustdoc_redirector_handler(
219219
let params = params.with_page_kind(PageKind::Rustdoc);
220220

221221
fn redirect_to_doc(
222+
original_uri: Option<&Uri>,
222223
url: EscapedURI,
223224
cache_policy: CachePolicy,
224225
path_in_crate: Option<&str>,
@@ -229,6 +230,17 @@ pub(crate) async fn rustdoc_redirector_handler(
229230
url
230231
};
231232

233+
if let Some(original_uri) = original_uri
234+
&& original_uri.path() == original_uri.path()
235+
{
236+
return Err(anyhow!(
237+
"infinite redirect detected, \noriginal_uri = {}, redirect_url = {}",
238+
original_uri,
239+
url
240+
)
241+
.into());
242+
}
243+
232244
trace!(%url, ?cache_policy, path_in_crate, "redirect to doc");
233245
Ok(axum_cached_redirect(url, cache_policy)?)
234246
}
@@ -266,6 +278,7 @@ pub(crate) async fn rustdoc_redirector_handler(
266278
let target_uri =
267279
EscapedURI::from_uri(description.href.clone()).append_raw_query(original_query);
268280
return redirect_to_doc(
281+
params.original_uri(),
269282
target_uri,
270283
CachePolicy::ForeverInCdnAndStaleInBrowser,
271284
path_in_crate.as_deref(),
@@ -329,6 +342,7 @@ pub(crate) async fn rustdoc_redirector_handler(
329342

330343
if matched_release.rustdoc_status() {
331344
Ok(redirect_to_doc(
345+
params.original_uri(),
332346
params.rustdoc_url().append_raw_query(original_query),
333347
if matched_release.is_latest_url() {
334348
CachePolicy::ForeverInCdn
@@ -3345,7 +3359,9 @@ mod test {
33453359
#[tokio::test(flavor = "multi_thread")]
33463360
#[test_case("/dummy/"; "only krate")]
33473361
#[test_case("/dummy/latest/"; "with version")]
3348-
#[test_case("/dummy/latest/dummy"; "full without trailing slash")]
3362+
#[test_case("/dummy/latest/dummy"; "target-name as path, without trailing slash")]
3363+
#[test_case("/dummy/latest/other_path"; "other path, without trailing slash")]
3364+
#[test_case("/dummy/latest/other_path.html"; "other html path, without trailing slash")]
33493365
#[test_case("/dummy/latest/dummy/"; "final target")]
33503366
async fn test_full_latest_url_without_trailing_slash(path: &str) -> Result<()> {
33513367
// test for https://github.com/rust-lang/docs.rs/issues/2989

0 commit comments

Comments
 (0)