11use crate :: error:: Result ;
22use failure:: err_msg;
3-
4- use html5ever:: driver:: { parse_document, ParseOpts } ;
5- use html5ever:: rcdom:: { Handle , NodeData , RcDom } ;
6- use html5ever:: serialize:: { serialize, SerializeOpts } ;
7- use html5ever:: tendril:: TendrilSink ;
3+ use kuchiki:: traits:: TendrilSink ;
4+ use kuchiki:: NodeRef ;
85
96/// Extracts the contents of the `<head>` and `<body>` tags from an HTML document, as well as the
107/// classes on the `<body>` tag, if any.
118pub fn extract_head_and_body ( html : & str ) -> Result < ( String , String , String ) > {
12- let parser = parse_document ( RcDom :: default ( ) , ParseOpts :: default ( ) ) ;
13- let dom = parser. one ( html) ;
14-
15- let ( head, body) = extract_from_rcdom ( & dom) ?;
16- let class = extract_class ( & body) ;
17-
18- Ok ( ( stringify ( head) , stringify ( body) , class) )
9+ let dom = kuchiki:: parse_html ( ) . one ( html) ;
10+
11+ let head = dom
12+ . select_first ( "head" )
13+ . map_err ( |_| err_msg ( "couldn't find <head> tag in rustdoc output" ) ) ?;
14+ let body = dom
15+ . select_first ( "body" )
16+ . map_err ( |_| err_msg ( "couldn't find <body> tag in rustdoc output" ) ) ?;
17+
18+ let class = body
19+ . attributes
20+ . borrow ( )
21+ . get ( "class" )
22+ . map ( |v| v. to_owned ( ) )
23+ . unwrap_or_default ( ) ;
24+
25+ Ok ( ( serialize ( head. as_node ( ) ) , serialize ( body. as_node ( ) ) , class) )
1926}
2027
21- fn extract_from_rcdom ( dom : & RcDom ) -> Result < ( Handle , Handle ) > {
22- let mut worklist = vec ! [ dom. document. clone( ) ] ;
23- let ( mut head, mut body) = ( None , None ) ;
24-
25- while let Some ( handle) = worklist. pop ( ) {
26- if let NodeData :: Element { ref name, .. } = handle. data {
27- match name. local . as_ref ( ) {
28- "head" => {
29- if head. is_some ( ) {
30- return Err ( err_msg ( "duplicate <head> tag" ) ) ;
31- } else {
32- head = Some ( handle. clone ( ) ) ;
33- }
34- }
35-
36- "body" => {
37- if body. is_some ( ) {
38- return Err ( err_msg ( "duplicate <body> tag" ) ) ;
39- } else {
40- body = Some ( handle. clone ( ) ) ;
41- }
42- }
43-
44- _ => { } // do nothing
45- }
46- }
47-
48- worklist. extend ( handle. children . borrow ( ) . iter ( ) . cloned ( ) ) ;
49- }
50-
51- let head = head. ok_or_else ( || err_msg ( "couldn't find <head> tag in rustdoc output" ) ) ?;
52- let body = body. ok_or_else ( || err_msg ( "couldn't find <body> tag in rustdoc output" ) ) ?;
53- Ok ( ( head, body) )
54- }
55-
56- fn stringify ( node : Handle ) -> String {
57- let mut vec = Vec :: new ( ) ;
58- serialize ( & mut vec, & node, SerializeOpts :: default ( ) ) . expect ( "serializing into buffer failed" ) ;
59-
60- String :: from_utf8 ( vec) . expect ( "html5ever returned non-utf8 data" )
61- }
62-
63- fn extract_class ( node : & Handle ) -> String {
64- match node. data {
65- NodeData :: Element { ref attrs, .. } => {
66- let attrs = attrs. borrow ( ) ;
67-
68- attrs
69- . iter ( )
70- . find ( |a| & a. name . local == "class" )
71- . map_or ( String :: new ( ) , |a| a. value . to_string ( ) )
72- }
73-
74- _ => String :: new ( ) ,
28+ fn serialize ( v : & NodeRef ) -> String {
29+ let mut contents = Vec :: new ( ) ;
30+ for child in v. children ( ) {
31+ child
32+ . serialize ( & mut contents)
33+ . expect ( "serialization failed" ) ;
7534 }
35+ String :: from_utf8 ( contents) . expect ( "non utf-8 html" )
7636}
7737
7838#[ cfg( test) ]
@@ -82,8 +42,7 @@ mod test {
8242 let ( head, body, class) = super :: extract_head_and_body (
8343 r#"<head><meta name="generator" content="rustdoc"></head><body class="rustdoc struct"><p>hello</p>"#
8444 ) . unwrap ( ) ;
85-
86- assert_eq ! ( head, r#"<meta name="generator" content="rustdoc">"# ) ;
45+ assert_eq ! ( head, r#"<meta content="rustdoc" name="generator">"# ) ;
8746 assert_eq ! ( body, "<p>hello</p>" ) ;
8847 assert_eq ! ( class, "rustdoc struct" ) ;
8948 }
0 commit comments