From d7a702e3c25ae3af0b170a12ad815a25fdd337e3 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 11:04:35 -0400 Subject: [PATCH 01/13] [WIP] Initial try at LOL HTML rewriter --- Cargo.lock | 105 +++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 +- src/utils/html.rs | 93 ++++++++++++++++++++++++--------------- src/utils/mod.rs | 2 +- src/web/rustdoc.rs | 69 ++++++++++++++--------------- 5 files changed, 199 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b7bdb47c5..f4cafeb50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -396,6 +396,7 @@ dependencies = [ "iron 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "kuchiki 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "lol_html 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "mime_guess 2.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "notify 4.0.15 (registry+https://github.com/rust-lang/crates.io-index)", "once_cell 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -538,6 +539,24 @@ dependencies = [ "subtle 2.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "cssparser" +version = "0.25.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cssparser-macros 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "itoa 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", + "procedural-masquerade 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cssparser" version = "0.27.2" @@ -554,6 +573,18 @@ dependencies = [ "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "cssparser-macros" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", + "procedural-masquerade 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cssparser-macros" version = "0.6.0" @@ -1402,6 +1433,23 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "lol_html" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "cssparser 0.25.9 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazycell 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "safemem 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "selectors 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "mac" version = "0.1.1" @@ -2185,6 +2233,11 @@ dependencies = [ "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "procedural-masquerade" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "procfs" version = "0.7.9" @@ -2794,6 +2847,24 @@ dependencies = [ "libc 0.2.70 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "selectors" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "cssparser 0.25.9 (registry+https://github.com/rust-lang/crates.io-index)", + "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", + "precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "servo_arc 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "thin-slice 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "selectors" version = "0.22.0" @@ -2965,6 +3036,14 @@ dependencies = [ "unidecode 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "smallvec" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "maybe-uninit 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "smallvec" version = "1.4.0" @@ -3284,6 +3363,24 @@ name = "thin-slice" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "thiserror" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "thread_local" version = "1.0.1" @@ -3893,7 +3990,9 @@ dependencies = [ "checksum crossbeam-utils 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" "checksum crypto-mac 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0999b4ff4d3446d4ddb19a63e9e00c1876e75cd7000d20e57a693b4b3f08d958" "checksum crypto-mac 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b584a330336237c1eecd3e94266efb216c56ed91225d634cb2991c5f3fd1aeab" +"checksum cssparser 0.25.9 (registry+https://github.com/rust-lang/crates.io-index)" = "fbe18ca4efb9ba3716c6da66cc3d7e673bf59fa576353011f48c4cfddbdd740e" "checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +"checksum cssparser-macros 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "5bb1c84e87c717666564ec056105052331431803d606bd45529b28547b611eef" "checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" "checksum csv 1.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" "checksum csv-core 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" @@ -3991,6 +4090,7 @@ dependencies = [ "checksum lock_api 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" "checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" "checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +"checksum lol_html 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "169299b3b58aa5cd8ad25fd8fe984e93748046d24c80f05aaadd9022f95423ec" "checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" "checksum maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" "checksum markup5ever 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" @@ -4079,6 +4179,7 @@ dependencies = [ "checksum proc-macro-hack 0.5.15 (registry+https://github.com/rust-lang/crates.io-index)" = "0d659fe7c6d27f25e9d80a1a094c223f5246f6a6596453e09d7229bf42750b63" "checksum proc-macro-nested 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8e946095f9d3ed29ec38de908c22f95d9ac008e424c7bcae54c75a79c527c694" "checksum proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "53f5ffe53a6b28e37c9c1ce74893477864d64f74778a93a4beb43c8fa167f639" +"checksum procedural-masquerade 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8f1383dff4092fe903ac180e391a8d4121cc48f08ccf850614b0290c6673b69d" "checksum procfs 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c434e93ef69c216e68e4f417c927b4f31502c3560b72cfdb6827e2321c5c6b3e" "checksum prometheus 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5567486d5778e2c6455b1b90ff1c558f29e751fc018130fa182e15828e728af1" "checksum quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" @@ -4139,6 +4240,7 @@ dependencies = [ "checksum scopeguard 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" "checksum security-framework 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "64808902d7d99f78eaddd2b4e2509713babc3dc3c85ad6f4c447680f3c01e535" "checksum security-framework-sys 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "17bf11d99252f512695eb468de5516e5cf75455521e69dfe343f3b74e4748405" +"checksum selectors 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1b86b100bede4f651059740afc3b6cb83458d7401cb7c1ad96d8a11e91742c86" "checksum selectors 0.22.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" @@ -4159,6 +4261,7 @@ dependencies = [ "checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum slug 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39af1ce888a1253c8b9fcfa36626557650fb487c013620a743262d2769a3e9f3" +"checksum smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "f7b0758c52e15a8b5e3691eae6cc559f08eee9406e548a4477ba4e67770a82b6" "checksum smallvec 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" "checksum socket2 0.3.12 (registry+https://github.com/rust-lang/crates.io-index)" = "03088793f677dce356f3ccc2edb1b314ad191ab702a5de3faf49304f7e104918" "checksum spin 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" @@ -4192,6 +4295,8 @@ dependencies = [ "checksum termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" "checksum thin-slice 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" +"checksum thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08" +"checksum thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" "checksum time 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "3a51cadc5b1eec673a685ff7c33192ff7b7603d0b75446fb354939ee615acb15" diff --git a/Cargo.toml b/Cargo.toml index ba3d01646..6e658d7e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,6 @@ backtrace = "0.3" failure = { version = "0.1.3", features = ["backtrace"] } comrak = { version = "0.3", default-features = false } toml = "0.5" -kuchiki = "0.8" schemamama = "0.3" schemamama_postgres = "0.2" systemstat = "0.1.4" @@ -41,6 +40,7 @@ path-slash = "0.1.3" once_cell = { version = "1.4.0", features = ["parking_lot"] } base64 = "0.12.1" strum = { version = "0.18.0", features = ["derive"] } +lol_html = "0.2" # Async tokio = { version = "0.2.22", features = ["rt-threaded"] } @@ -82,6 +82,7 @@ procfs = "0.7" [dev-dependencies] criterion = "0.3" +kuchiki = "0.8" rand = "0.7.3" [[bench]] diff --git a/src/utils/html.rs b/src/utils/html.rs index 9a24b4561..cce275a74 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -1,40 +1,64 @@ -use crate::error::Result; -use failure::err_msg; -use kuchiki::traits::TendrilSink; -use kuchiki::NodeRef; - -/// Extracts the contents of the `` and `` tags from an HTML document, as well as the -/// classes on the `` tag, if any. -pub fn extract_head_and_body(html: &str) -> Result<(String, String, String)> { - let dom = kuchiki::parse_html().one(html); - - let head = dom - .select_first("head") - .map_err(|_| err_msg("couldn't find tag in rustdoc output"))?; - let body = dom - .select_first("body") - .map_err(|_| err_msg("couldn't find tag in rustdoc output"))?; - - let class = body - .attributes - .borrow() - .get("class") - .map(|v| v.to_owned()) - .unwrap_or_default(); - - Ok((serialize(head.as_node()), serialize(body.as_node()), class)) -} +use crate::web::page::TemplateData; +use lol_html::errors::RewritingError; +use tera::Context; -fn serialize(v: &NodeRef) -> String { - let mut contents = Vec::new(); - for child in v.children() { - child - .serialize(&mut contents) - .expect("serialization failed"); - } - String::from_utf8(contents).expect("non utf-8 html") +pub(crate) fn rewrite_lol( + html: &str, + ctx: Context, + templates: &TemplateData, +) -> Result { + use lol_html::html_content::{ContentType, Element}; + use lol_html::{ElementContentHandlers, RewriteStrSettings}; + + let templates = templates.templates.load(); + let tera_head = templates.render("rustdoc/head.html", &ctx).unwrap(); + let tera_body = templates.render("rustdoc/body.html", &ctx).unwrap(); + + let head_handler = |head: &mut Element| { + head.append(&tera_head, ContentType::Html); + Ok(()) + }; + // Before: ... rustdoc content ... + // After: + // ```html + //
+ // ... rustdoc content ... + //
+ // ``` + let body_handler = |rustdoc_body_class: &mut Element| { + // Add the `rustdoc` classes to the html body + rustdoc_body_class.set_attribute("container-rustdoc", "")?; + rustdoc_body_class.set_attribute("id", "rustdoc_body_wrapper")?; + rustdoc_body_class.set_attribute("tabindex", "-1")?; + // Change the `body` to a `div` + rustdoc_body_class.set_tag_name("div")?; + // Prepend the tera content + rustdoc_body_class.prepend(&tera_body, ContentType::Html); + // Now, make this a full tag + rustdoc_body_class.before("", ContentType::Html); + rustdoc_body_class.after("", ContentType::Html); + + Ok(()) + }; + + let (head_selector, body_selector) = ("head".parse().unwrap(), "body".parse().unwrap()); + let head = ( + &head_selector, + ElementContentHandlers::default().element(head_handler), + ); + let body = ( + &body_selector, + ElementContentHandlers::default().element(body_handler), + ); + let settings = RewriteStrSettings { + element_content_handlers: vec![head, body], + ..RewriteStrSettings::default() + }; + + lol_html::rewrite_str(html, settings) } +/* #[cfg(test)] mod test { #[test] @@ -60,3 +84,4 @@ mod test { assert_eq!(class, "rustdoc struct"); } } +*/ diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 101b3930d..69935dac4 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -4,7 +4,7 @@ pub(crate) use self::cargo_metadata::{CargoMetadata, Package as MetadataPackage} pub(crate) use self::copy::copy_doc_dir; pub use self::daemon::start_daemon; pub use self::github_updater::GithubUpdater; -pub use self::html::extract_head_and_body; +pub(crate) use self::html::rewrite_lol; pub use self::queue::{get_crate_priority, remove_crate_priority, set_crate_priority}; pub use self::queue_builder::queue_builder; pub use self::release_activity_updater::update_release_activity; diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index f4c3f2ae0..63e03cde0 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -2,10 +2,10 @@ use crate::{ db::Pool, - impl_webpage, utils, + utils, web::{ crate_details::CrateDetails, error::Nope, file::File, match_version, metrics, - page::WebPage, redirect_base, MatchSemver, + redirect_base, MatchSemver, }, Config, Storage, }; @@ -188,16 +188,9 @@ struct RustdocPage { latest_version: String, inner_path: String, is_latest_version: bool, - rustdoc_head: String, - rustdoc_body: String, - rustdoc_body_class: String, krate: CrateDetails, } -impl_webpage! { - RustdocPage = "rustdoc/page.html", -} - /// Serves documentation generated by rustdoc. /// /// This includes all HTML files for an individual crate, as well as the `search-index.js`, which is @@ -318,21 +311,6 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { return Ok(file.serve()); } - rendering_time.step("parse html"); - - let file_content = ctry!(req, String::from_utf8(file.0.content)); - // Extract the head and body of the rustdoc file so that we can insert it into our own html - let (rustdoc_head, rustdoc_body, mut rustdoc_body_class) = - ctry!(req, utils::extract_head_and_body(&file_content)); - - // Add the `rustdoc` classes to the html body - if rustdoc_body_class.is_empty() { - rustdoc_body_class = "rustdoc container-rustdoc".to_string(); - } else { - // rustdoc adds its own "rustdoc" class to the body - rustdoc_body_class.push_str(" container-rustdoc"); - } - rendering_time.step("find latest path"); let latest_release = krate.latest_release(); @@ -361,8 +339,6 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { format!("/crate/{}/{}", name, latest_version) }; - rendering_time.step("serve html"); - // The path within this crate version's rustdoc output let inner_path = { let mut inner_path = req_path.clone(); @@ -377,18 +353,37 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { inner_path.join("/") }; + rendering_time.step("rewrite html"); + + let file_content = ctry!(req, std::str::from_utf8(&file.0.content)); + let templates = req + .extensions + .get::() + .expect("missing TemplateData from the request extensions"); // Build the page of documentation - RustdocPage { - latest_path, - latest_version, - inner_path, - is_latest_version, - rustdoc_head, - rustdoc_body, - rustdoc_body_class, - krate, - } - .into_response(req) + let ctx = ctry!( + req, + tera::Context::from_serialize(RustdocPage { + latest_path, + latest_version, + inner_path, + is_latest_version, + krate, + }) + ); + // Extract the head and body of the rustdoc file so that we can insert it into our own html + let html = ctry!(req, utils::rewrite_lol(file_content, ctx, templates)); + /* + let (rustdoc_head, rustdoc_body, mut rustdoc_body_class) = + ctry!(req, utils::extract_head_and_body(&file_content)); + */ + + rendering_time.step("serve html"); + use iron::{headers::ContentType, status::Status}; + let mut response = Response::with((Status::Ok, html)); + response.headers.set(ContentType::html()); + + Ok(response) } /// Checks whether the given path exists. From dad0b55a97e805b23af48a4dd0e857d33c2eccae Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 11:23:00 -0400 Subject: [PATCH 02/13] Fix broken `class="rustdoc"` handling Before, it was generating code like this: ```html
``` Now it generates code like this: ```html
``` --- src/utils/html.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/utils/html.rs b/src/utils/html.rs index cce275a74..e7b8e10c5 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -27,7 +27,15 @@ pub(crate) fn rewrite_lol( // ``` let body_handler = |rustdoc_body_class: &mut Element| { // Add the `rustdoc` classes to the html body - rustdoc_body_class.set_attribute("container-rustdoc", "")?; + let mut tmp; + let klass = if let Some(classes) = rustdoc_body_class.get_attribute("class") { + tmp = classes; + tmp.push_str(" container-rustdoc"); + &tmp + } else { + "container-rustdoc" + }; + rustdoc_body_class.set_attribute("class", klass)?; rustdoc_body_class.set_attribute("id", "rustdoc_body_wrapper")?; rustdoc_body_class.set_attribute("tabindex", "-1")?; // Change the `body` to a `div` From 22b8bc851349812c69b2801e75196186a0c5fe1f Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 11:53:06 -0400 Subject: [PATCH 03/13] Add missing html files --- templates/rustdoc/body.html | 32 ++++++++++++++++++++++++++++++++ templates/rustdoc/head.html | 12 ++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 templates/rustdoc/body.html create mode 100644 templates/rustdoc/head.html diff --git a/templates/rustdoc/body.html b/templates/rustdoc/body.html new file mode 100644 index 000000000..72284be8b --- /dev/null +++ b/templates/rustdoc/body.html @@ -0,0 +1,32 @@ +{%- include "rustdoc/navigation.html" -%} + + diff --git a/templates/rustdoc/head.html b/templates/rustdoc/head.html new file mode 100644 index 000000000..61e38ae5e --- /dev/null +++ b/templates/rustdoc/head.html @@ -0,0 +1,12 @@ +{%- import "macros.html" as macros -%} + + + + + + + {{ macros::doc_title(name=krate.name, version=krate.version) }} + \ No newline at end of file From 7889db9f0da637a190ac1b8772b16851dd5db386 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 11:55:55 -0400 Subject: [PATCH 04/13] Remove unused files - Inline `navigation.html` into `body.html` - Remove `page.html` --- templates/rustdoc/body.html | 222 +++++++++++++++++++++++++++++- templates/rustdoc/navigation.html | 220 ----------------------------- templates/rustdoc/page.html | 64 --------- 3 files changed, 221 insertions(+), 285 deletions(-) delete mode 100644 templates/rustdoc/navigation.html delete mode 100644 templates/rustdoc/page.html diff --git a/templates/rustdoc/body.html b/templates/rustdoc/body.html index 72284be8b..e74067451 100644 --- a/templates/rustdoc/body.html +++ b/templates/rustdoc/body.html @@ -1,4 +1,224 @@ -{%- include "rustdoc/navigation.html" -%} +{%- import "macros.html" as macros -%} + +{# The url of the current release, `/crate/:name/:version` #} +{%- set crate_url = "/crate/" ~ krate.name ~ "/" ~ krate.version -%} + + + - - - - - From 452a0888a4707631c9f406f50595b7924e2f9730 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 12:39:32 -0400 Subject: [PATCH 05/13] Use valid HTML for all tests The way the new LOL rewriter works requires a valid and tag. I think it's pretty safe to assume any HTML generated by rustdoc will have at least those. However, much of the test suite did not, because it was using random content like `b"lah"`. This adds a default HTML content which both makes it easier to write tests and makes the content valid HTML by default. A new function `rustdoc_file_with` was added in case you still need the old behavior. --- src/test/fakes.rs | 14 ++++++- src/web/file.rs | 12 +++--- src/web/rustdoc.rs | 100 +++++++++++++++++++++------------------------ 3 files changed, 65 insertions(+), 61 deletions(-) diff --git a/src/test/fakes.rs b/src/test/fakes.rs index c9ecd9f7a..c6f632275 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -27,6 +27,9 @@ pub(crate) struct FakeRelease<'a> { readme: Option<&'a str>, } +const DEFAULT_CONTENT: &[u8] = + b"default content for test/fakes"; + impl<'a> FakeRelease<'a> { pub(super) fn new(db: &'a TestDatabase, storage: Arc) -> Self { FakeRelease { @@ -121,7 +124,14 @@ impl<'a> FakeRelease<'a> { self } - pub(crate) fn rustdoc_file(mut self, path: &'a str, data: &'a [u8]) -> Self { + /// Since we switch to LOL HTML, all data must have a valid and . + /// To avoid duplicating them in every test, this just makes up some content. + pub(crate) fn rustdoc_file(mut self, path: &'a str) -> Self { + self.rustdoc_files.push((path, DEFAULT_CONTENT)); + self + } + + pub(crate) fn rustdoc_file_with(mut self, path: &'a str, data: &'a [u8]) -> Self { self.rustdoc_files.push((path, data)); self } @@ -217,7 +227,7 @@ impl<'a> FakeRelease<'a> { let index = [&package.name, "index.html"].join("/"); let mut rustdoc_files = self.rustdoc_files; if package.is_library() && !rustdoc_files.iter().any(|(path, _)| path == &index) { - rustdoc_files.push((&index, b"default index content")); + rustdoc_files.push((&index, DEFAULT_CONTENT)); } for (source_path, data) in &self.source_files { if source_path.starts_with("src/") { diff --git a/src/web/file.rs b/src/web/file.rs index 985ab5679..96b4ce8f4 100644 --- a/src/web/file.rs +++ b/src/web/file.rs @@ -115,12 +115,12 @@ mod tests { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("small.html", &[b'A'; MAX_HTML_SIZE / 2] as &[u8]) - .rustdoc_file("exact.html", &[b'A'; MAX_HTML_SIZE] as &[u8]) - .rustdoc_file("big.html", &[b'A'; MAX_HTML_SIZE * 2] as &[u8]) - .rustdoc_file("small.js", &[b'A'; MAX_SIZE / 2] as &[u8]) - .rustdoc_file("exact.js", &[b'A'; MAX_SIZE] as &[u8]) - .rustdoc_file("big.js", &[b'A'; MAX_SIZE * 2] as &[u8]) + .rustdoc_file_with("small.html", &[b'A'; MAX_HTML_SIZE / 2] as &[u8]) + .rustdoc_file_with("exact.html", &[b'A'; MAX_HTML_SIZE] as &[u8]) + .rustdoc_file_with("big.html", &[b'A'; MAX_HTML_SIZE * 2] as &[u8]) + .rustdoc_file_with("small.js", &[b'A'; MAX_SIZE / 2] as &[u8]) + .rustdoc_file_with("exact.js", &[b'A'; MAX_SIZE] as &[u8]) + .rustdoc_file_with("big.js", &[b'A'; MAX_SIZE * 2] as &[u8]) .create()?; let file = |path| { diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 63e03cde0..885d4976b 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -593,7 +593,7 @@ mod test { ) -> Result, failure::Error> { assert_success(path, web)?; let data = web.get(path).send()?.text()?; - println!("{}", data); + println!("fetched path {} and got content {}", path, data); let dom = kuchiki::parse_html().one(data); if let Some(elem) = dom @@ -610,8 +610,8 @@ mod test { } fn latest_version_redirect(path: &str, web: &TestFrontend) -> Result { - try_latest_version_redirect(path, web) - .and_then(|v| v.ok_or_else(|| failure::format_err!("no redirect found for {}", path))) + try_latest_version_redirect(path, web)? + .ok_or_else(|| failure::format_err!("no redirect found for {}", path)) } #[test] @@ -623,12 +623,12 @@ mod test { .name("buggy") .version("0.1.0") .build_result_successful(true) - .rustdoc_file("settings.html", b"some data") - .rustdoc_file("directory_1/index.html", b"some data 1") - .rustdoc_file("directory_2.html/index.html", b"some data 1") - .rustdoc_file("all.html", b"some data 2") - .rustdoc_file("directory_3/.gitignore", b"*.ext") - .rustdoc_file("directory_4/empty_file_no_ext", b"") + .rustdoc_file("settings.html") + .rustdoc_file("directory_1/index.html") + .rustdoc_file("directory_2.html/index.html") + .rustdoc_file("all.html") + .rustdoc_file("directory_3/.gitignore") + .rustdoc_file("directory_4/empty_file_no_ext") .create()?; env.fake_release() .name("buggy") @@ -654,7 +654,7 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/index.html", b"some content") + .rustdoc_file("dummy/index.html") .create()?; let web = env.frontend(); @@ -668,7 +668,7 @@ mod test { env.fake_release() .name("dummy") .version("0.2.0") - .rustdoc_file("dummy/index.html", b"some content") + .rustdoc_file("dummy/index.html") .default_target(target) .create()?; let base = "/dummy/0.2.0/dummy/"; @@ -681,8 +681,8 @@ mod test { env.fake_release() .name("dummy") .version("0.3.0") - .rustdoc_file("dummy/index.html", b"some content") - .rustdoc_file("all.html", b"html") + .rustdoc_file("dummy/index.html") + .rustdoc_file("all.html") .default_target(target) .create()?; let base = "/dummy/0.3.0/dummy/"; @@ -705,15 +705,15 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/blah/index.html", b"lah") - .rustdoc_file("dummy/blah/blah.html", b"lah") - .rustdoc_file("dummy/struct.will-be-deleted.html", b"lah") + .rustdoc_file("dummy/blah/index.html") + .rustdoc_file("dummy/blah/blah.html") + .rustdoc_file("dummy/struct.will-be-deleted.html") .create()?; env.fake_release() .name("dummy") .version("0.2.0") - .rustdoc_file("dummy/blah/index.html", b"lah") - .rustdoc_file("dummy/blah/blah.html", b"lah") + .rustdoc_file("dummy/blah/index.html") + .rustdoc_file("dummy/blah/blah.html") .create()?; let web = env.frontend(); @@ -751,7 +751,7 @@ mod test { .name("dummy") .version("0.1.0") .add_platform("x86_64-pc-windows-msvc") - .rustdoc_file("dummy/struct.Blah.html", b"lah") + .rustdoc_file("dummy/struct.Blah.html") .create()?; env.fake_release() .name("dummy") @@ -797,7 +797,7 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .create()?; env.fake_release() .name("dummy") @@ -819,17 +819,17 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .create()?; env.fake_release() .name("dummy") .version("0.2.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .create()?; env.fake_release() .name("dummy") .version("0.2.1") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .yanked(true) .create()?; @@ -850,19 +850,19 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .yanked(true) .create()?; env.fake_release() .name("dummy") .version("0.2.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .yanked(true) .create()?; env.fake_release() .name("dummy") .version("0.2.1") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .yanked(true) .create()?; @@ -895,7 +895,7 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .yanked(true) .create()?; @@ -904,7 +904,7 @@ mod test { env.fake_release() .name("dummy") .version("0.2.0") - .rustdoc_file("dummy/index.html", b"lah") + .rustdoc_file("dummy/index.html") .yanked(true) .create()?; @@ -938,7 +938,7 @@ mod test { env.fake_release() .name("fake-crate") .version("0.0.1") - .rustdoc_file("fake_crate/index.html", b"some content") + .rustdoc_file("fake_crate/index.html") .create()?; let web = env.frontend(); @@ -964,7 +964,7 @@ mod test { env.fake_release() .name(name) .version(version) - .rustdoc_file(&(name.replace("-", "_") + "/index.html"), b"") + .rustdoc_file(&(name.replace("-", "_") + "/index.html")) .create()?; } @@ -1014,13 +1014,13 @@ mod test { env.fake_release() .name("dummy-dash") .version("0.1.0") - .rustdoc_file("dummy_dash/index.html", b"") + .rustdoc_file("dummy_dash/index.html") .create()?; env.fake_release() .name("dummy_mixed-separators") .version("0.1.0") - .rustdoc_file("dummy_mixed_separators/index.html", b"") + .rustdoc_file("dummy_mixed_separators/index.html") .create()?; let web = env.frontend(); @@ -1126,8 +1126,8 @@ mod test { env.fake_release() .name("dummy") .version("0.1.0") - .rustdoc_file("dummy/index.html", b"some content") - .rustdoc_file("dummy/struct.Dummy.html", b"some other content") + .rustdoc_file("dummy/index.html") + .rustdoc_file("dummy/struct.Dummy.html") .add_target("x86_64-unknown-linux-gnu") .create()?; @@ -1156,8 +1156,8 @@ mod test { env.fake_release() .name("dummy") .version("0.2.0") - .rustdoc_file("dummy/index.html", b"some content") - .rustdoc_file("dummy/struct.Dummy.html", b"some other content") + .rustdoc_file("dummy/index.html") + .rustdoc_file("dummy/struct.Dummy.html") .default_target("x86_64-pc-windows-msvc") .create()?; @@ -1186,8 +1186,8 @@ mod test { env.fake_release() .name("dummy") .version("0.3.0") - .rustdoc_file("dummy/index.html", b"some content") - .rustdoc_file("dummy/struct.Dummy.html", b"some other content") + .rustdoc_file("dummy/index.html") + .rustdoc_file("dummy/struct.Dummy.html") .default_target("x86_64-unknown-linux-gnu") .create()?; @@ -1216,20 +1216,14 @@ mod test { env.fake_release() .name("dummy") .version("0.4.0") - .rustdoc_file("settings.html", b"top-level items") - .rustdoc_file("dummy/index.html", b"some content") - .rustdoc_file("dummy/struct.Dummy.html", b"some other content") - .rustdoc_file("dummy/struct.DefaultOnly.html", b"some otter content") - .rustdoc_file("x86_64-pc-windows-msvc/settings.html", b"top-level items") - .rustdoc_file("x86_64-pc-windows-msvc/dummy/index.html", b"some content") - .rustdoc_file( - "x86_64-pc-windows-msvc/dummy/struct.Dummy.html", - b"some other content", - ) - .rustdoc_file( - "x86_64-pc-windows-msvc/dummy/struct.WindowsOnly.html", - b"some otter content", - ) + .rustdoc_file("settings.html") + .rustdoc_file("dummy/index.html") + .rustdoc_file("dummy/struct.Dummy.html") + .rustdoc_file("dummy/struct.DefaultOnly.html") + .rustdoc_file("x86_64-pc-windows-msvc/settings.html") + .rustdoc_file("x86_64-pc-windows-msvc/dummy/index.html") + .rustdoc_file("x86_64-pc-windows-msvc/dummy/struct.Dummy.html") + .rustdoc_file("x86_64-pc-windows-msvc/dummy/struct.WindowsOnly.html") .default_target("x86_64-unknown-linux-gnu") .add_target("x86_64-pc-windows-msvc") .create()?; @@ -1498,7 +1492,7 @@ mod test { env.fake_release() .name("tokio") .version("0.2.21") - .rustdoc_file("tokio/time/index.html", b"content") + .rustdoc_file("tokio/time/index.html") .create()?; assert_redirect( "/tokio/0.2.21/tokio/time", From d9604a21b1d95aed470e43b89c077eaf30b4d602 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 14:30:02 -0400 Subject: [PATCH 06/13] Cleanup - Remove commented-out tests and code - Fix bad comment - Remove trailing whitespace --- src/test/fakes.rs | 2 +- src/utils/html.rs | 28 ---------------------------- src/web/rustdoc.rs | 4 ---- templates/rustdoc/head.html | 1 - 4 files changed, 1 insertion(+), 34 deletions(-) diff --git a/src/test/fakes.rs b/src/test/fakes.rs index c6f632275..c4d88f024 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -124,7 +124,7 @@ impl<'a> FakeRelease<'a> { self } - /// Since we switch to LOL HTML, all data must have a valid and . + /// Since we switched to LOL HTML, all data must have a valid and . /// To avoid duplicating them in every test, this just makes up some content. pub(crate) fn rustdoc_file(mut self, path: &'a str) -> Self { self.rustdoc_files.push((path, DEFAULT_CONTENT)); diff --git a/src/utils/html.rs b/src/utils/html.rs index e7b8e10c5..9ba9c0dd1 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -65,31 +65,3 @@ pub(crate) fn rewrite_lol( lol_html::rewrite_str(html, settings) } - -/* -#[cfg(test)] -mod test { - #[test] - fn small_html() { - let (head, body, class) = super::extract_head_and_body( - r#"

hello

"# - ).unwrap(); - assert_eq!(head, r#""#); - assert_eq!(body, "

hello

"); - assert_eq!(class, "rustdoc struct"); - } - - // more of an integration test - #[test] - fn parse_regex_html() { - let original = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap(); - let expected_head = std::fs::read_to_string("tests/regex/head.html").unwrap(); - let expected_body = std::fs::read_to_string("tests/regex/body.html").unwrap(); - let (head, body, class) = super::extract_head_and_body(&original).unwrap(); - - assert_eq!(head, expected_head.trim()); - assert_eq!(&body, &expected_body.trim()); - assert_eq!(class, "rustdoc struct"); - } -} -*/ diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 885d4976b..18efd5a81 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -373,10 +373,6 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { ); // Extract the head and body of the rustdoc file so that we can insert it into our own html let html = ctry!(req, utils::rewrite_lol(file_content, ctx, templates)); - /* - let (rustdoc_head, rustdoc_body, mut rustdoc_body_class) = - ctry!(req, utils::extract_head_and_body(&file_content)); - */ rendering_time.step("serve html"); use iron::{headers::ContentType, status::Status}; diff --git a/templates/rustdoc/head.html b/templates/rustdoc/head.html index 61e38ae5e..3fd4aaa6e 100644 --- a/templates/rustdoc/head.html +++ b/templates/rustdoc/head.html @@ -9,4 +9,3 @@ {{ macros::doc_title(name=krate.name, version=krate.version) }} - \ No newline at end of file From adb8929e9de088433484e20c62f74d20556465c1 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 16:19:35 -0400 Subject: [PATCH 07/13] Use HtmlRewriter instead of rewrite_str - Add a memory limit for the parser - Abstract most of rendering into a method on `RustdocPage` - Use bytes for parsing to avoid validating UTF8-encoding twice --- src/utils/html.rs | 24 +++++++++++++++----- src/web/rustdoc.rs | 55 ++++++++++++++++++++++++---------------------- 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/src/utils/html.rs b/src/utils/html.rs index 9ba9c0dd1..6c190626e 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -3,12 +3,12 @@ use lol_html::errors::RewritingError; use tera::Context; pub(crate) fn rewrite_lol( - html: &str, + html: &[u8], ctx: Context, templates: &TemplateData, -) -> Result { +) -> Result, RewritingError> { use lol_html::html_content::{ContentType, Element}; - use lol_html::{ElementContentHandlers, RewriteStrSettings}; + use lol_html::{ElementContentHandlers, HtmlRewriter, MemorySettings, Settings}; let templates = templates.templates.load(); let tera_head = templates.render("rustdoc/head.html", &ctx).unwrap(); @@ -58,10 +58,22 @@ pub(crate) fn rewrite_lol( &body_selector, ElementContentHandlers::default().element(body_handler), ); - let settings = RewriteStrSettings { + let settings = Settings { element_content_handlers: vec![head, body], - ..RewriteStrSettings::default() + memory_settings: MemorySettings { + max_allowed_memory_usage: 1024 * 1024 * 350, // 350 MB, about 1.5x as large as our current largest file + ..MemorySettings::default() + }, + ..Settings::default() }; - lol_html::rewrite_str(html, settings) + // The input and output are always strings, we just use `&[u8]` so we only have to validate once. + let mut buffer = Vec::new(); + let mut writer = HtmlRewriter::try_new(settings, |bytes: &[u8]| { + buffer.extend_from_slice(bytes); + }) + .expect("utf8 is a valid encoding"); + writer.write(html)?; + writer.end()?; + Ok(buffer) } diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 18efd5a81..f0b6aa3ac 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -191,6 +191,27 @@ struct RustdocPage { krate: CrateDetails, } +impl RustdocPage { + fn into_response(self, rustdoc_html: &[u8], req: &mut Request) -> IronResult { + use iron::{headers::ContentType, status::Status}; + + let templates = req + .extensions + .get::() + .expect("missing TemplateData from the request extensions"); + + // Build the page of documentation + let ctx = ctry!(req, tera::Context::from_serialize(self),); + // Extract the head and body of the rustdoc file so that we can insert it into our own html + let html = ctry!(req, utils::rewrite_lol(rustdoc_html, ctx, templates)); + + let mut response = Response::with((Status::Ok, html)); + response.headers.set(ContentType::html()); + + Ok(response) + } +} + /// Serves documentation generated by rustdoc. /// /// This includes all HTML files for an individual crate, as well as the `search-index.js`, which is @@ -354,32 +375,14 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { }; rendering_time.step("rewrite html"); - - let file_content = ctry!(req, std::str::from_utf8(&file.0.content)); - let templates = req - .extensions - .get::() - .expect("missing TemplateData from the request extensions"); - // Build the page of documentation - let ctx = ctry!( - req, - tera::Context::from_serialize(RustdocPage { - latest_path, - latest_version, - inner_path, - is_latest_version, - krate, - }) - ); - // Extract the head and body of the rustdoc file so that we can insert it into our own html - let html = ctry!(req, utils::rewrite_lol(file_content, ctx, templates)); - - rendering_time.step("serve html"); - use iron::{headers::ContentType, status::Status}; - let mut response = Response::with((Status::Ok, html)); - response.headers.set(ContentType::html()); - - Ok(response) + RustdocPage { + latest_path, + latest_version, + inner_path, + is_latest_version, + krate, + } + .into_response(&file.0.content, req) } /// Checks whether the given path exists. From b39d7c3ad5c397836cfd6af8dca86b94fb5a2bc9 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 16:36:25 -0400 Subject: [PATCH 08/13] Use log instead of println in tests --- src/web/rustdoc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index f0b6aa3ac..c3d4304a1 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -592,7 +592,7 @@ mod test { ) -> Result, failure::Error> { assert_success(path, web)?; let data = web.get(path).send()?.text()?; - println!("fetched path {} and got content {}", path, data); + log::info!("fetched path {} and got content {}", path, data); let dom = kuchiki::parse_html().one(data); if let Some(elem) = dom From b888c84eda3538993b00f544cb794380f48c61a3 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 16:42:45 -0400 Subject: [PATCH 09/13] Document `rewrite_lol` --- src/utils/html.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils/html.rs b/src/utils/html.rs index 6c190626e..fee43eab8 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -2,6 +2,11 @@ use crate::web::page::TemplateData; use lol_html::errors::RewritingError; use tera::Context; +/// Rewrite a rustdoc page to have the docs.rs header +/// +/// Given a rustdoc HTML page and a context to serialize it with, +/// render the `rustdoc/` templates with the `html`. +/// The output is an HTML page which has not yet been UTF-8 validated. pub(crate) fn rewrite_lol( html: &[u8], ctx: Context, From cbe962b046e0c5815f0f9d4d12b2b2f07062693c Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 16:44:05 -0400 Subject: [PATCH 10/13] Add whitespace Co-authored-by: Chase Wilson --- src/utils/html.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/html.rs b/src/utils/html.rs index fee43eab8..f22dff104 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -23,6 +23,7 @@ pub(crate) fn rewrite_lol( head.append(&tera_head, ContentType::Html); Ok(()) }; + // Before: ... rustdoc content ... // After: // ```html From ec5ca480f3ee1cdb1f20c2da6393a2b37a8d6768 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 16:49:25 -0400 Subject: [PATCH 11/13] Make parse memory configurable and raise HTML size limit - Raise size limit from 5 MB to 50 MB - Use DOCSRS_MAX_PARSE_MEMORY to configure the max memory, defaulting to 350 MB --- src/config.rs | 5 ++++- src/utils/html.rs | 3 ++- src/web/rustdoc.rs | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/config.rs b/src/config.rs index 003f3e9d4..36f226bca 100644 --- a/src/config.rs +++ b/src/config.rs @@ -28,6 +28,8 @@ pub struct Config { // Max size of the files served by the docs.rs frontend pub(crate) max_file_size: usize, pub(crate) max_file_size_html: usize, + // The most memory that can be used to parse an HTML file + pub(crate) max_parse_memory: usize, } impl Config { @@ -55,7 +57,8 @@ impl Config { github_accesstoken: maybe_env("CRATESFYI_GITHUB_ACCESSTOKEN")?, max_file_size: env("DOCSRS_MAX_FILE_SIZE", 50 * 1024 * 1024)?, - max_file_size_html: env("DOCSRS_MAX_FILE_SIZE_HTML", 5 * 1024 * 1024)?, + max_file_size_html: env("DOCSRS_MAX_FILE_SIZE_HTML", 50 * 1024 * 1024)?, + max_parse_memory: env("DOCSRS_MAX_PARSE_MEMORY", 350 * 1024 * 1024)?, }) } diff --git a/src/utils/html.rs b/src/utils/html.rs index f22dff104..33c2a4648 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -9,6 +9,7 @@ use tera::Context; /// The output is an HTML page which has not yet been UTF-8 validated. pub(crate) fn rewrite_lol( html: &[u8], + max_allowed_memory_usage: usize, ctx: Context, templates: &TemplateData, ) -> Result, RewritingError> { @@ -67,7 +68,7 @@ pub(crate) fn rewrite_lol( let settings = Settings { element_content_handlers: vec![head, body], memory_settings: MemorySettings { - max_allowed_memory_usage: 1024 * 1024 * 350, // 350 MB, about 1.5x as large as our current largest file + max_allowed_memory_usage, ..MemorySettings::default() }, ..Settings::default() diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index c3d4304a1..c47be72b5 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -192,7 +192,7 @@ struct RustdocPage { } impl RustdocPage { - fn into_response(self, rustdoc_html: &[u8], req: &mut Request) -> IronResult { + fn into_response(self, rustdoc_html: &[u8], max_parse_memory: usize, req: &mut Request) -> IronResult { use iron::{headers::ContentType, status::Status}; let templates = req @@ -203,7 +203,7 @@ impl RustdocPage { // Build the page of documentation let ctx = ctry!(req, tera::Context::from_serialize(self),); // Extract the head and body of the rustdoc file so that we can insert it into our own html - let html = ctry!(req, utils::rewrite_lol(rustdoc_html, ctx, templates)); + let html = ctry!(req, utils::rewrite_lol(rustdoc_html, max_parse_memory, ctx, templates)); let mut response = Response::with((Status::Ok, html)); response.headers.set(ContentType::html()); @@ -382,7 +382,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { is_latest_version, krate, } - .into_response(&file.0.content, req) + .into_response(&file.0.content, config.max_parse_memory, req) } /// Checks whether the given path exists. From f78472ba2eac382298ab464ce91d4b256594c4cd Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 16:52:07 -0400 Subject: [PATCH 12/13] Improve comment --- src/utils/html.rs | 1 + src/web/rustdoc.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/utils/html.rs b/src/utils/html.rs index 33c2a4648..b194a9cc0 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -7,6 +7,7 @@ use tera::Context; /// Given a rustdoc HTML page and a context to serialize it with, /// render the `rustdoc/` templates with the `html`. /// The output is an HTML page which has not yet been UTF-8 validated. +/// In practice, the output should always be valid UTF-8. pub(crate) fn rewrite_lol( html: &[u8], max_allowed_memory_usage: usize, diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index c47be72b5..4d756a750 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -192,7 +192,12 @@ struct RustdocPage { } impl RustdocPage { - fn into_response(self, rustdoc_html: &[u8], max_parse_memory: usize, req: &mut Request) -> IronResult { + fn into_response( + self, + rustdoc_html: &[u8], + max_parse_memory: usize, + req: &mut Request, + ) -> IronResult { use iron::{headers::ContentType, status::Status}; let templates = req @@ -203,7 +208,10 @@ impl RustdocPage { // Build the page of documentation let ctx = ctry!(req, tera::Context::from_serialize(self),); // Extract the head and body of the rustdoc file so that we can insert it into our own html - let html = ctry!(req, utils::rewrite_lol(rustdoc_html, max_parse_memory, ctx, templates)); + let html = ctry!( + req, + utils::rewrite_lol(rustdoc_html, max_parse_memory, ctx, templates) + ); let mut response = Response::with((Status::Ok, html)); response.headers.set(ContentType::html()); From 2dc045df0f3ee8d2aa7e528780af14f1dd675a48 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 2 Aug 2020 22:54:06 -0400 Subject: [PATCH 13/13] Only allot 5 MB for LOL parser --- src/config.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index 36f226bca..936018b6a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -58,7 +58,9 @@ impl Config { max_file_size: env("DOCSRS_MAX_FILE_SIZE", 50 * 1024 * 1024)?, max_file_size_html: env("DOCSRS_MAX_FILE_SIZE_HTML", 50 * 1024 * 1024)?, - max_parse_memory: env("DOCSRS_MAX_PARSE_MEMORY", 350 * 1024 * 1024)?, + // LOL HTML only uses as much memory as the size of the start tag! + // https://github.com/rust-lang/docs.rs/pull/930#issuecomment-667729380 + max_parse_memory: env("DOCSRS_MAX_PARSE_MEMORY", 5 * 1024 * 1024)?, }) }