Skip to content

Commit 24bd183

Browse files
author
bors-servo
authored
Auto merge of #329 - derekdreery:docs, r=jdm
Add more documentation I recently submitted a PR to add documentation. This PR adds some more. Note it won't merge as-is because it relies on PR [#199 in servo/string-cache]. [#199 in servo/string-cache]: servo/string-cache#199
2 parents de1b2af + 0db8e21 commit 24bd183

File tree

4 files changed

+143
-28
lines changed

4 files changed

+143
-28
lines changed

markup5ever/build.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,26 +40,36 @@ fn main() {
4040
&Path::new(&manifest_dir).join("data").join("entities.json"),
4141
&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"));
4242

43+
// Create a string cache for local names
4344
let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt");
4445
let mut local_names_atom = string_cache_codegen::AtomType::new("LocalName", "local_name!");
4546
for line in BufReader::new(File::open(&local_names).unwrap()).lines() {
4647
let local_name = line.unwrap();
4748
local_names_atom.atom(&local_name);
4849
local_names_atom.atom(&local_name.to_ascii_lowercase());
4950
}
50-
local_names_atom.write_to(&mut generated).unwrap();
51+
local_names_atom
52+
.with_macro_doc("Takes a local name as a string and returns its key in the string cache.")
53+
.write_to(&mut generated).unwrap();
5154

55+
// Create a string cache for namespace prefixes
5256
string_cache_codegen::AtomType::new("Prefix", "namespace_prefix!")
57+
.with_macro_doc("Takes a namespace prefix string and returns its key in a string cache.")
5358
.atoms(NAMESPACES.iter().map(|&(prefix, _url)| prefix))
5459
.write_to(&mut generated)
5560
.unwrap();
5661

62+
// Create a string cache for namespace urls
5763
string_cache_codegen::AtomType::new("Namespace", "namespace_url!")
64+
.with_macro_doc("Takes a namespace url string and returns its key in a string cache.")
5865
.atoms(NAMESPACES.iter().map(|&(_prefix, url)| url))
5966
.write_to(&mut generated)
6067
.unwrap();
6168

62-
writeln!(generated, "#[macro_export] macro_rules! ns {{").unwrap();
69+
writeln!(generated, r#"
70+
/// Maps the input of `namespace_prefix!` to the output of `namespace_url!`.
71+
#[macro_export] macro_rules! ns {{
72+
"#).unwrap();
6373
for &(prefix, url) in NAMESPACES {
6474
writeln!(generated, "({}) => {{ namespace_url!({:?}) }};", prefix, url).unwrap();
6575
}

markup5ever/interface/mod.rs

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
9+
//! Types for tag and attribute names, and tree-builder functionality.
910
1011
use std::fmt;
1112
use tendril::StrTendril;
@@ -15,7 +16,9 @@ pub use self::tree_builder::{NodeOrText, AppendNode, AppendText, create_element,
1516
pub use self::tree_builder::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
1617
pub use self::tree_builder::{TreeSink, Tracer, NextParserState};
1718

18-
/// https://www.w3.org/TR/REC-xml-names/#dt-expname
19+
/// An [expanded name], containing the tag and the namespace.
20+
///
21+
/// [expanded name]: https://www.w3.org/TR/REC-xml-names/#dt-expname
1922
#[derive(Copy, Clone, Eq, Hash)]
2023
pub struct ExpandedName<'a> {
2124
pub ns: &'a Namespace,
@@ -38,6 +41,24 @@ impl<'a> fmt::Debug for ExpandedName<'a> {
3841
}
3942
}
4043

44+
/// Helper to quickly create an expanded name.
45+
///
46+
/// # Examples
47+
///
48+
/// ```
49+
/// # #[macro_use] extern crate markup5ever;
50+
///
51+
/// # fn main() {
52+
/// use markup5ever::ExpandedName;
53+
///
54+
/// assert_eq!(
55+
/// expanded_name!("", "div"),
56+
/// ExpandedName {
57+
/// ns: &ns!(),
58+
/// local: &local_name!("div")
59+
/// }
60+
/// )
61+
/// # }
4162
#[macro_export]
4263
macro_rules! expanded_name {
4364
("", $local: tt) => {
@@ -56,13 +77,11 @@ macro_rules! expanded_name {
5677

5778
pub mod tree_builder;
5879

59-
/// A name with a namespace.
60-
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)]
61-
/// Fully qualified name. Used to depict names of tags and attributes.
80+
/// A fully qualified name (with a namespace), used to depict names of tags and attributes.
6281
///
63-
/// Used to differentiate between similar XML fragments. For example:
82+
/// Namespaces can be used to differentiate between similar XML fragments. For example:
6483
///
65-
/// ```ignore
84+
/// ```text
6685
/// // HTML
6786
/// <table>
6887
/// <tr>
@@ -82,7 +101,7 @@ pub mod tree_builder;
82101
/// Without XML namespaces, we can't use those two fragments in the same document
83102
/// at the same time. However if we declare a namespace we could instead say:
84103
///
85-
/// ```ignore
104+
/// ```text
86105
/// // Furniture XML
87106
/// <furn:table>
88107
/// <furn:name>African Coffee Table</furn:name>
@@ -91,24 +110,30 @@ pub mod tree_builder;
91110
/// </furn:table>
92111
/// ```
93112
///
94-
/// and bind it to a different name.
113+
/// and bind the prefix `furn` to a different namespace.
95114
///
96115
/// For this reason we parse names that contain a colon in the following way:
97116
///
98-
/// ```ignore
117+
/// ```text
99118
/// <furn:table>
100119
/// | |
101120
/// | +- local name
102121
/// |
103122
/// prefix (when resolved gives namespace_url)
104123
/// ```
124+
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)]
125+
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
105126
pub struct QualName {
127+
/// The namespace before resolution (e.g. `furn` in `<furn:table>` above).
106128
pub prefix: Option<Prefix>,
129+
/// The namespace after resolution.
107130
pub ns: Namespace,
131+
/// The local name (e.g. `table` in `<furn:table>` above).
108132
pub local: LocalName,
109133
}
110134

111135
impl QualName {
136+
/// Simple constructor function.
112137
#[inline]
113138
pub fn new(prefix: Option<Prefix>, ns: Namespace, local: LocalName) -> QualName {
114139
QualName {
@@ -118,6 +143,7 @@ impl QualName {
118143
}
119144
}
120145

146+
/// Take a reference of `self` as an `ExpandedName`, dropping the unresolved prefix.
121147
#[inline]
122148
pub fn expanded(&self) -> ExpandedName {
123149
ExpandedName {
@@ -127,15 +153,17 @@ impl QualName {
127153
}
128154
}
129155

130-
/// A tag attribute.
156+
/// A tag attribute, e.g. `class="test"` in `<div class="test" ...>`.
131157
///
132158
/// The namespace on the attribute name is almost always ns!("").
133159
/// The tokenizer creates all attributes this way, but the tree
134160
/// builder will adjust certain attribute names inside foreign
135161
/// content (MathML, SVG).
136162
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
137163
pub struct Attribute {
164+
/// The name of the attribute (e.g. the `class` in `<div class="test">`)
138165
pub name: QualName,
166+
/// The value of the attribute (e.g. the `"test"` in `<div class="test">`)
139167
pub value: StrTendril,
140168
}
141169

markup5ever/interface/tree_builder.rs

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77
// option. This file may not be copied, modified, or distributed
88
// except according to those terms.
99

10-
/// Something which can be inserted into the DOM.
11-
///
12-
/// Adjacent sibling text nodes are merged into a single node, so
13-
/// the sink may not want to allocate a `Handle` for each.
10+
//! This module contains functionality for managing the DOM, including adding/removing nodes.
11+
//!
12+
//! It can be used by a parser to create the DOM graph structure in memory.
1413
1514
use std::borrow::Cow;
1615
use tendril::StrTendril;
@@ -28,11 +27,17 @@ pub enum NodeOrText<Handle> {
2827
AppendText(StrTendril),
2928
}
3029

31-
/// A document's quirks mode.
30+
/// A document's quirks mode, for compatibility with old browsers. See [quirks mode on wikipedia]
31+
/// for more information.
32+
///
33+
/// [quirks mode on wikipedia]: https://en.wikipedia.org/wiki/Quirks_mode
3234
#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)]
3335
pub enum QuirksMode {
36+
/// Full quirks mode
3437
Quirks,
38+
/// Almost standards mode
3539
LimitedQuirks,
40+
/// Standards mode
3641
NoQuirks,
3742
}
3843

@@ -47,23 +52,59 @@ pub enum NextParserState {
4752
Continue,
4853
}
4954

55+
/// Special properties of an element, useful for tagging elements with this information.
5056
#[derive(Default)]
5157
pub struct ElementFlags {
5258
/// A document fragment should be created, associated with the element,
53-
/// and returned in TreeSink::get_template_contents
59+
/// and returned in TreeSink::get_template_contents.
60+
///
61+
/// See [template-contents in the whatwg spec][whatwg template-contents].
5462
///
55-
/// https://html.spec.whatwg.org/multipage/#template-contents
63+
/// [whatwg template-contents]: https://html.spec.whatwg.org/multipage/#template-contents
5664
pub template: bool,
5765

5866
/// This boolean should be recorded with the element and returned
5967
/// in TreeSink::is_mathml_annotation_xml_integration_point
6068
///
61-
/// https://html.spec.whatwg.org/multipage/#html-integration-point
69+
/// See [html-integration-point in the whatwg spec][whatwg integration-point].
70+
///
71+
/// [whatwg integration-point]: https://html.spec.whatwg.org/multipage/#html-integration-point
6272
pub mathml_annotation_xml_integration_point: bool,
6373

74+
// Prevent construction from outside module
6475
_private: ()
6576
}
6677

78+
/// A constructor for an element.
79+
///
80+
/// # Examples
81+
///
82+
/// Create an element like `<div class="test-class-name"></div>`:
83+
///
84+
/// ```
85+
/// # #[macro_use] extern crate markup5ever;
86+
///
87+
/// # fn main() {
88+
/// use markup5ever::{rcdom, QualName, Attribute};
89+
/// use markup5ever::interface::create_element;
90+
///
91+
/// let mut dom = rcdom::RcDom::default();
92+
/// let el = create_element(&mut dom,
93+
/// // Namespaces and localnames use precomputed interned strings for
94+
/// // speed. Use the macros ns! and local_name! to fetch them.
95+
/// QualName::new(None, ns!(), local_name!("div")),
96+
/// vec![
97+
/// Attribute {
98+
/// name: QualName::new(None, ns!(), local_name!("class")),
99+
/// // In real scenarios, you would use a view onto an existing
100+
/// // string if possible to avoid allocation. Tendrils have utilities
101+
/// // for avoiding allocation & copying wherever possible.
102+
/// value: String::from("test-class-name").into()
103+
/// }
104+
/// ]);
105+
/// # }
106+
///
107+
/// ```
67108
pub fn create_element<Sink>(sink: &mut Sink, name: QualName, attrs: Vec<Attribute>) -> Sink::Handle
68109
where Sink: TreeSink {
69110
let mut flags = ElementFlags::default();
@@ -84,6 +125,10 @@ where Sink: TreeSink {
84125
sink.create_element(name, attrs, flags)
85126
}
86127

128+
/// Methods a parser can use to create the DOM. The DOM provider implements this trait.
129+
///
130+
/// Having this as a trait potentially allows multiple implementations of the DOM to be used with
131+
/// the same parser.
87132
pub trait TreeSink {
88133
/// `Handle` is a reference to a DOM node. The tree builder requires
89134
/// that a `Handle` implements `Clone` to get another reference to
@@ -93,14 +138,14 @@ pub trait TreeSink {
93138
/// The overall result of parsing.
94139
///
95140
/// This should default to Self, but default associated types are not stable yet.
96-
/// (https://github.com/rust-lang/rust/issues/29661)
141+
/// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661)
97142
type Output;
98143

99144
/// Consume this sink and return the overall result of parsing.
100145
///
101146
/// TODO:This should default to `fn finish(self) -> Self::Output { self }`,
102147
/// but default associated types are not stable yet.
103-
/// (https://github.com/rust-lang/rust/issues/29661)
148+
/// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661)
104149
fn finish(self) -> Self::Output;
105150

106151
/// Signal a parse error.
@@ -121,7 +166,9 @@ pub trait TreeSink {
121166
/// an associated document fragment called the "template contents" should
122167
/// also be created. Later calls to self.get_template_contents() with that
123168
/// given element return it.
124-
/// https://html.spec.whatwg.org/multipage/#the-template-element
169+
/// See [the template element in the whatwg spec][whatwg template].
170+
///
171+
/// [whatwg template]: https://html.spec.whatwg.org/multipage/#the-template-element
125172
fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags)
126173
-> Self::Handle;
127174

markup5ever/rcdom.rs

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,30 @@
1111
//!
1212
//! This is sufficient as a static parse tree, but don't build a
1313
//! web browser using it. :)
14+
//!
15+
//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
16+
//! format. For example, the following graph
17+
//!
18+
//! ```text
19+
//! div
20+
//! +- "text node"
21+
//! +- span
22+
//! ```
23+
//! in HTML would be serialized as
24+
//!
25+
//! ```html
26+
//! <div>text node<span></span></div>
27+
//! ```
28+
//!
29+
//! See the [document object model article on wikipedia][dom wiki] for more information.
30+
//!
31+
//! This implementation stores the information associated with each node once, and then hands out
32+
//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
33+
//! a new ref and then a node will outlive the document. Nodes own their children, but only have
34+
//! weak references to their parents.
35+
//!
36+
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
37+
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
1438
1539
use std::cell::{RefCell, Cell};
1640
use std::collections::HashSet;
@@ -33,10 +57,13 @@ use serialize::TraversalScope::{IncludeNode, ChildrenOnly};
3357

3458
/// The different kinds of nodes in the DOM.
3559
pub enum NodeData {
36-
/// The `Document` itself.
60+
/// The `Document` itself - the root node of a HTML document.
3761
Document,
3862

39-
/// A `DOCTYPE` with name, public id, and system id.
63+
/// A `DOCTYPE` with name, public id, and system id. See
64+
/// [document type declaration on wikipedia][dtd wiki].
65+
///
66+
/// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
4067
Doctype {
4168
name: StrTendril,
4269
public_id: StrTendril,
@@ -58,11 +85,14 @@ pub enum NodeData {
5885
name: QualName,
5986
attrs: RefCell<Vec<Attribute>>,
6087

61-
/// For HTML <template> elements, the template contents
62-
/// https://html.spec.whatwg.org/multipage/#template-contents
88+
/// For HTML \<template\> elements, the [template contents].
89+
///
90+
/// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
6391
template_contents: Option<Handle>,
6492

65-
/// https://html.spec.whatwg.org/multipage/#html-integration-point
93+
/// Whether the node is a [HTML integration point].
94+
///
95+
/// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
6696
mathml_annotation_xml_integration_point: bool,
6797
},
6898

0 commit comments

Comments
 (0)