From a4f0a759a7af61344759b842360d264a5f6dcd13 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 22 Nov 2017 16:39:12 +0000 Subject: [PATCH 1/2] Add more documentation --- markup5ever/build.rs | 15 +++++- markup5ever/interface/mod.rs | 48 +++++++++++++++---- markup5ever/interface/tree_builder.rs | 69 ++++++++++++++++++++++----- markup5ever/rcdom.rs | 40 ++++++++++++++-- 4 files changed, 144 insertions(+), 28 deletions(-) diff --git a/markup5ever/build.rs b/markup5ever/build.rs index e98908b2..ed50132a 100644 --- a/markup5ever/build.rs +++ b/markup5ever/build.rs @@ -40,6 +40,7 @@ fn main() { &Path::new(&manifest_dir).join("data").join("entities.json"), &Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs")); + // Create a string cache for local names let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt"); let mut local_names_atom = string_cache_codegen::AtomType::new("LocalName", "local_name!"); for line in BufReader::new(File::open(&local_names).unwrap()).lines() { @@ -47,19 +48,29 @@ fn main() { local_names_atom.atom(&local_name); local_names_atom.atom(&local_name.to_ascii_lowercase()); } - local_names_atom.write_to(&mut generated).unwrap(); + local_names_atom + .with_macro_doc("Takes a local name as a string and returns its key in the a string cache.") + .write_to(&mut generated).unwrap(); + // Create a string cache for namespace prefixes string_cache_codegen::AtomType::new("Prefix", "namespace_prefix!") + .with_macro_doc("Takes a namespace prefix string and returns its key in a string cache.") .atoms(NAMESPACES.iter().map(|&(prefix, _url)| prefix)) .write_to(&mut generated) .unwrap(); + // Create a string cache for namespace urls string_cache_codegen::AtomType::new("Namespace", "namespace_url!") + .with_macro_doc("Takes a namespace url string and returns its key in a string cache.") .atoms(NAMESPACES.iter().map(|&(_prefix, url)| url)) .write_to(&mut generated) .unwrap(); - writeln!(generated, "#[macro_export] macro_rules! ns {{").unwrap(); + // T + writeln!(generated, r#" + /// Maps the input of `namespace_prefix!` to the output of `namespace_url!`. + #[macro_export] macro_rules! ns {{ + "#).unwrap(); for &(prefix, url) in NAMESPACES { writeln!(generated, "({}) => {{ namespace_url!({:?}) }};", prefix, url).unwrap(); } diff --git a/markup5ever/interface/mod.rs b/markup5ever/interface/mod.rs index 381b90b0..cb4d51d1 100644 --- a/markup5ever/interface/mod.rs +++ b/markup5ever/interface/mod.rs @@ -6,6 +6,7 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +//! Types for tag and attribute names, and tree-builder functionality. use std::fmt; use tendril::StrTendril; @@ -15,7 +16,9 @@ pub use self::tree_builder::{NodeOrText, AppendNode, AppendText, create_element, pub use self::tree_builder::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; pub use self::tree_builder::{TreeSink, Tracer, NextParserState}; -/// https://www.w3.org/TR/REC-xml-names/#dt-expname +/// An [expanded name], containing the tag and the namespace. +/// +/// [expanded name]: https://www.w3.org/TR/REC-xml-names/#dt-expname #[derive(Copy, Clone, Eq, Hash)] pub struct ExpandedName<'a> { pub ns: &'a Namespace, @@ -38,6 +41,24 @@ impl<'a> fmt::Debug for ExpandedName<'a> { } } +/// Helper to quickly create an expanded name. +/// +/// # Examples +/// +/// ``` +/// # #[macro_use] extern crate markup5ever; +/// +/// # fn main() { +/// use markup5ever::ExpandedName; +/// +/// assert_eq!( +/// expanded_name!("", "div"), +/// ExpandedName { +/// ns: &ns!(), +/// local: &local_name!("div") +/// } +/// ) +/// # } #[macro_export] macro_rules! expanded_name { ("", $local: tt) => { @@ -56,13 +77,11 @@ macro_rules! expanded_name { pub mod tree_builder; -/// A name with a namespace. -#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] -/// Fully qualified name. Used to depict names of tags and attributes. +/// A fully qualified name (with a namespace), used to depict names of tags and attributes. /// -/// Used to differentiate between similar XML fragments. For example: +/// Namespaces can be used to differentiate between similar XML fragments. For example: /// -/// ```ignore +/// ```text /// // HTML /// /// @@ -82,7 +101,7 @@ pub mod tree_builder; /// Without XML namespaces, we can't use those two fragments in the same document /// at the same time. However if we declare a namespace we could instead say: /// -/// ```ignore +/// ```text /// // Furniture XML /// /// African Coffee Table @@ -91,24 +110,30 @@ pub mod tree_builder; /// /// ``` /// -/// and bind it to a different name. +/// and bind the prefix `furn` to a different namespace. /// /// For this reason we parse names that contain a colon in the following way: /// -/// ```ignore +/// ```text /// /// | | /// | +- local name /// | /// prefix (when resolved gives namespace_url) /// ``` +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)] +#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))] pub struct QualName { + /// The namespace before resolution (e.g. `furn` in `` above). pub prefix: Option, + /// The namespace after resolution. pub ns: Namespace, + /// The local name (e.g. `table` in `` above). pub local: LocalName, } impl QualName { + /// Simple constructor function. #[inline] pub fn new(prefix: Option, ns: Namespace, local: LocalName) -> QualName { QualName { @@ -118,6 +143,7 @@ impl QualName { } } + /// Take a reference as an `ExpandedName`, dropping the unresolved prefix. #[inline] pub fn expanded(&self) -> ExpandedName { ExpandedName { @@ -127,7 +153,7 @@ impl QualName { } } -/// A tag attribute. +/// A tag attribute, e.g. `class="test"` in `
`. /// /// The namespace on the attribute name is almost always ns!(""). /// The tokenizer creates all attributes this way, but the tree @@ -135,7 +161,9 @@ impl QualName { /// content (MathML, SVG). #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)] pub struct Attribute { + /// The name of the attribute (e.g. the `class` in `
`) pub name: QualName, + /// The value of the attribute (e.g. the `"test"` in `
`) pub value: StrTendril, } diff --git a/markup5ever/interface/tree_builder.rs b/markup5ever/interface/tree_builder.rs index 7df650c5..ad8462ff 100644 --- a/markup5ever/interface/tree_builder.rs +++ b/markup5ever/interface/tree_builder.rs @@ -7,10 +7,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/// Something which can be inserted into the DOM. -/// -/// Adjacent sibling text nodes are merged into a single node, so -/// the sink may not want to allocate a `Handle` for each. +//! This module contains functionality for managing the DOM, including adding/removing nodes. +//! +//! It can be used by a parser to create the DOM graph structure in memory. use std::borrow::Cow; use tendril::StrTendril; @@ -28,11 +27,17 @@ pub enum NodeOrText { AppendText(StrTendril), } -/// A document's quirks mode. +/// A document's quirks mode, for compatibility with old browsers. See [quirks mode on wikipedia] +/// for more information. +/// +/// [quirks mode on wikipedia]: https://en.wikipedia.org/wiki/Quirks_mode #[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] pub enum QuirksMode { + /// Full quirks mode Quirks, + /// Almost standards mode LimitedQuirks, + /// Standards mode NoQuirks, } @@ -47,23 +52,59 @@ pub enum NextParserState { Continue, } +/// Special properties of an element, useful for tagging elements with this information. #[derive(Default)] pub struct ElementFlags { /// A document fragment should be created, associated with the element, - /// and returned in TreeSink::get_template_contents + /// and returned in TreeSink::get_template_contents. + /// + /// See [template-contents in the whatwg spec][whatwg template-contents]. /// - /// https://html.spec.whatwg.org/multipage/#template-contents + /// [whatwg template-contents]: https://html.spec.whatwg.org/multipage/#template-contents pub template: bool, /// This boolean should be recorded with the element and returned /// in TreeSink::is_mathml_annotation_xml_integration_point /// - /// https://html.spec.whatwg.org/multipage/#html-integration-point + /// See [html-integration-point in the whatwg spec][whatwg integration-point]. + /// + /// [whatwg integration-point]: https://html.spec.whatwg.org/multipage/#html-integration-point pub mathml_annotation_xml_integration_point: bool, + // Prevent construction from outside module _private: () } +/// A constructor for an element. +/// +/// # Examples +/// +/// Create an element like `
`: +/// +/// ``` +/// # #[macro_use] extern crate markup5ever; +/// +/// # fn main() { +/// use markup5ever::{rcdom, QualName, Attribute}; +/// use markup5ever::interface::create_element; +/// +/// let mut dom = rcdom::RcDom::default(); +/// let el = create_element(&mut dom, +/// // Namespaces and localnames use precomputed interned strings for +/// // speed. Use the macros ns! and local_name! to fetch them. +/// QualName::new(None, ns!(), local_name!("div")), +/// vec![ +/// Attribute { +/// name: QualName::new(None, ns!(), local_name!("class")), +/// // In real scenarios, you would use a view onto an existing +/// // string if possible to avoid allocation. Tendrils have utilities +/// // for avoiding allocation & copying wherever possible. +/// value: String::from("test-class-name").into() +/// } +/// ]); +/// # } +/// +/// ``` pub fn create_element(sink: &mut Sink, name: QualName, attrs: Vec) -> Sink::Handle where Sink: TreeSink { let mut flags = ElementFlags::default(); @@ -84,6 +125,10 @@ where Sink: TreeSink { sink.create_element(name, attrs, flags) } +/// Methods a parser can use to create the DOM. The DOM provider implements this trait. +/// +/// Having this as a trait potentially allows multiple implementations of the DOM to be used with +/// the same parser. pub trait TreeSink { /// `Handle` is a reference to a DOM node. The tree builder requires /// that a `Handle` implements `Clone` to get another reference to @@ -93,14 +138,14 @@ pub trait TreeSink { /// The overall result of parsing. /// /// This should default to Self, but default associated types are not stable yet. - /// (https://github.com/rust-lang/rust/issues/29661) + /// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661) type Output; /// Consume this sink and return the overall result of parsing. /// /// TODO:This should default to `fn finish(self) -> Self::Output { self }`, /// but default associated types are not stable yet. - /// (https://github.com/rust-lang/rust/issues/29661) + /// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661) fn finish(self) -> Self::Output; /// Signal a parse error. @@ -121,7 +166,9 @@ pub trait TreeSink { /// an associated document fragment called the "template contents" should /// also be created. Later calls to self.get_template_contents() with that /// given element return it. - /// https://html.spec.whatwg.org/multipage/#the-template-element + /// See [the template element in the whatwg spec][whatwg template]. + /// + /// [whatwg template]: https://html.spec.whatwg.org/multipage/#the-template-element fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Self::Handle; diff --git a/markup5ever/rcdom.rs b/markup5ever/rcdom.rs index bad332cd..671e7485 100644 --- a/markup5ever/rcdom.rs +++ b/markup5ever/rcdom.rs @@ -11,6 +11,30 @@ //! //! This is sufficient as a static parse tree, but don't build a //! web browser using it. :) +//! +//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like +//! format. For example, the following graph +//! +//! ```text +//! div +//! +- "text node" +//! +- span +//! ``` +//! in HTML would be serialized as +//! +//! ```html +//!
text node
+//! ``` +//! +//! See the [document object model article on wikipedia][dom wiki] for more information. +//! +//! This implementation stores the information associated with each node once, and then hands out +//! refs to children. The nodes themselves are reference-counted to avoid copy - you can create a +//! new ref and then a node will outlive the document. Nodes own their children, but only have weak +//! references to their parents. +//! +//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure) +//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model use std::cell::{RefCell, Cell}; use std::collections::HashSet; @@ -33,10 +57,13 @@ use serialize::TraversalScope::{IncludeNode, ChildrenOnly}; /// The different kinds of nodes in the DOM. pub enum NodeData { - /// The `Document` itself. + /// The `Document` itself - the root node of a HTML document. Document, - /// A `DOCTYPE` with name, public id, and system id. + /// A `DOCTYPE` with name, public id, and system id. See + /// [document type declaration on wikipedia][dtd wiki]. + /// + /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration Doctype { name: StrTendril, public_id: StrTendril, @@ -58,11 +85,14 @@ pub enum NodeData { name: QualName, attrs: RefCell>, - /// For HTML