Skip to content

Add more documentation #329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions markup5ever/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,36 @@ fn main() {
&Path::new(&manifest_dir).join("data").join("entities.json"),
&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"));

// Create a string cache for local names
let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt");
let mut local_names_atom = string_cache_codegen::AtomType::new("LocalName", "local_name!");
for line in BufReader::new(File::open(&local_names).unwrap()).lines() {
let local_name = line.unwrap();
local_names_atom.atom(&local_name);
local_names_atom.atom(&local_name.to_ascii_lowercase());
}
local_names_atom.write_to(&mut generated).unwrap();
local_names_atom
.with_macro_doc("Takes a local name as a string and returns its key in the string cache.")
.write_to(&mut generated).unwrap();

// Create a string cache for namespace prefixes
string_cache_codegen::AtomType::new("Prefix", "namespace_prefix!")
.with_macro_doc("Takes a namespace prefix string and returns its key in a string cache.")
.atoms(NAMESPACES.iter().map(|&(prefix, _url)| prefix))
.write_to(&mut generated)
.unwrap();

// Create a string cache for namespace urls
string_cache_codegen::AtomType::new("Namespace", "namespace_url!")
.with_macro_doc("Takes a namespace url string and returns its key in a string cache.")
.atoms(NAMESPACES.iter().map(|&(_prefix, url)| url))
.write_to(&mut generated)
.unwrap();

writeln!(generated, "#[macro_export] macro_rules! ns {{").unwrap();
writeln!(generated, r#"
/// Maps the input of `namespace_prefix!` to the output of `namespace_url!`.
#[macro_export] macro_rules! ns {{
"#).unwrap();
for &(prefix, url) in NAMESPACES {
writeln!(generated, "({}) => {{ namespace_url!({:?}) }};", prefix, url).unwrap();
}
Expand Down
48 changes: 38 additions & 10 deletions markup5ever/interface/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Types for tag and attribute names, and tree-builder functionality.

use std::fmt;
use tendril::StrTendril;
Expand All @@ -15,7 +16,9 @@ pub use self::tree_builder::{NodeOrText, AppendNode, AppendText, create_element,
pub use self::tree_builder::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
pub use self::tree_builder::{TreeSink, Tracer, NextParserState};

/// https://www.w3.org/TR/REC-xml-names/#dt-expname
/// An [expanded name], containing the tag and the namespace.
///
/// [expanded name]: https://www.w3.org/TR/REC-xml-names/#dt-expname
#[derive(Copy, Clone, Eq, Hash)]
pub struct ExpandedName<'a> {
pub ns: &'a Namespace,
Expand All @@ -38,6 +41,24 @@ impl<'a> fmt::Debug for ExpandedName<'a> {
}
}

/// Helper to quickly create an expanded name.
///
/// # Examples
///
/// ```
/// # #[macro_use] extern crate markup5ever;
///
/// # fn main() {
/// use markup5ever::ExpandedName;
///
/// assert_eq!(
/// expanded_name!("", "div"),
/// ExpandedName {
/// ns: &ns!(),
/// local: &local_name!("div")
/// }
/// )
/// # }
#[macro_export]
macro_rules! expanded_name {
("", $local: tt) => {
Expand All @@ -56,13 +77,11 @@ macro_rules! expanded_name {

pub mod tree_builder;

/// A name with a namespace.
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)]
/// Fully qualified name. Used to depict names of tags and attributes.
/// A fully qualified name (with a namespace), used to depict names of tags and attributes.
///
/// Used to differentiate between similar XML fragments. For example:
/// Namespaces can be used to differentiate between similar XML fragments. For example:
///
/// ```ignore
/// ```text
/// // HTML
/// <table>
/// <tr>
Expand All @@ -82,7 +101,7 @@ pub mod tree_builder;
/// Without XML namespaces, we can't use those two fragments in the same document
/// at the same time. However if we declare a namespace we could instead say:
///
/// ```ignore
/// ```text
/// // Furniture XML
/// <furn:table>
/// <furn:name>African Coffee Table</furn:name>
Expand All @@ -91,24 +110,30 @@ pub mod tree_builder;
/// </furn:table>
/// ```
///
/// and bind it to a different name.
/// and bind the prefix `furn` to a different namespace.
///
/// For this reason we parse names that contain a colon in the following way:
///
/// ```ignore
/// ```text
/// <furn:table>
/// | |
/// | +- local name
/// |
/// prefix (when resolved gives namespace_url)
/// ```
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone)]
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
pub struct QualName {
/// The namespace before resolution (e.g. `furn` in `<furn:table>` above).
pub prefix: Option<Prefix>,
/// The namespace after resolution.
pub ns: Namespace,
/// The local name (e.g. `table` in `<furn:table>` above).
pub local: LocalName,
}

impl QualName {
/// Simple constructor function.
#[inline]
pub fn new(prefix: Option<Prefix>, ns: Namespace, local: LocalName) -> QualName {
QualName {
Expand All @@ -118,6 +143,7 @@ impl QualName {
}
}

/// Take a reference of `self` as an `ExpandedName`, dropping the unresolved prefix.
#[inline]
pub fn expanded(&self) -> ExpandedName {
ExpandedName {
Expand All @@ -127,15 +153,17 @@ impl QualName {
}
}

/// A tag attribute.
/// A tag attribute, e.g. `class="test"` in `<div class="test" ...>`.
///
/// The namespace on the attribute name is almost always ns!("").
/// The tokenizer creates all attributes this way, but the tree
/// builder will adjust certain attribute names inside foreign
/// content (MathML, SVG).
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
pub struct Attribute {
/// The name of the attribute (e.g. the `class` in `<div class="test">`)
pub name: QualName,
/// The value of the attribute (e.g. the `"test"` in `<div class="test">`)
pub value: StrTendril,
}

Expand Down
69 changes: 58 additions & 11 deletions markup5ever/interface/tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

/// Something which can be inserted into the DOM.
///
/// Adjacent sibling text nodes are merged into a single node, so
/// the sink may not want to allocate a `Handle` for each.
//! This module contains functionality for managing the DOM, including adding/removing nodes.
//!
//! It can be used by a parser to create the DOM graph structure in memory.

use std::borrow::Cow;
use tendril::StrTendril;
Expand All @@ -28,11 +27,17 @@ pub enum NodeOrText<Handle> {
AppendText(StrTendril),
}

/// A document's quirks mode.
/// A document's quirks mode, for compatibility with old browsers. See [quirks mode on wikipedia]
/// for more information.
///
/// [quirks mode on wikipedia]: https://en.wikipedia.org/wiki/Quirks_mode
#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)]
pub enum QuirksMode {
/// Full quirks mode
Quirks,
/// Almost standards mode
LimitedQuirks,
/// Standards mode
NoQuirks,
}

Expand All @@ -47,23 +52,59 @@ pub enum NextParserState {
Continue,
}

/// Special properties of an element, useful for tagging elements with this information.
#[derive(Default)]
pub struct ElementFlags {
/// A document fragment should be created, associated with the element,
/// and returned in TreeSink::get_template_contents
/// and returned in TreeSink::get_template_contents.
///
/// See [template-contents in the whatwg spec][whatwg template-contents].
///
/// https://html.spec.whatwg.org/multipage/#template-contents
/// [whatwg template-contents]: https://html.spec.whatwg.org/multipage/#template-contents
pub template: bool,

/// This boolean should be recorded with the element and returned
/// in TreeSink::is_mathml_annotation_xml_integration_point
///
/// https://html.spec.whatwg.org/multipage/#html-integration-point
/// See [html-integration-point in the whatwg spec][whatwg integration-point].
///
/// [whatwg integration-point]: https://html.spec.whatwg.org/multipage/#html-integration-point
pub mathml_annotation_xml_integration_point: bool,

// Prevent construction from outside module
_private: ()
}

/// A constructor for an element.
///
/// # Examples
///
/// Create an element like `<div class="test-class-name"></div>`:
///
/// ```
/// # #[macro_use] extern crate markup5ever;
///
/// # fn main() {
/// use markup5ever::{rcdom, QualName, Attribute};
/// use markup5ever::interface::create_element;
///
/// let mut dom = rcdom::RcDom::default();
/// let el = create_element(&mut dom,
/// // Namespaces and localnames use precomputed interned strings for
/// // speed. Use the macros ns! and local_name! to fetch them.
/// QualName::new(None, ns!(), local_name!("div")),
/// vec![
/// Attribute {
/// name: QualName::new(None, ns!(), local_name!("class")),
/// // In real scenarios, you would use a view onto an existing
/// // string if possible to avoid allocation. Tendrils have utilities
/// // for avoiding allocation & copying wherever possible.
/// value: String::from("test-class-name").into()
/// }
/// ]);
/// # }
///
/// ```
pub fn create_element<Sink>(sink: &mut Sink, name: QualName, attrs: Vec<Attribute>) -> Sink::Handle
where Sink: TreeSink {
let mut flags = ElementFlags::default();
Expand All @@ -84,6 +125,10 @@ where Sink: TreeSink {
sink.create_element(name, attrs, flags)
}

/// Methods a parser can use to create the DOM. The DOM provider implements this trait.
///
/// Having this as a trait potentially allows multiple implementations of the DOM to be used with
/// the same parser.
pub trait TreeSink {
/// `Handle` is a reference to a DOM node. The tree builder requires
/// that a `Handle` implements `Clone` to get another reference to
Expand All @@ -93,14 +138,14 @@ pub trait TreeSink {
/// The overall result of parsing.
///
/// This should default to Self, but default associated types are not stable yet.
/// (https://github.com/rust-lang/rust/issues/29661)
/// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661)
type Output;

/// Consume this sink and return the overall result of parsing.
///
/// TODO:This should default to `fn finish(self) -> Self::Output { self }`,
/// but default associated types are not stable yet.
/// (https://github.com/rust-lang/rust/issues/29661)
/// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661)
fn finish(self) -> Self::Output;

/// Signal a parse error.
Expand All @@ -121,7 +166,9 @@ pub trait TreeSink {
/// an associated document fragment called the "template contents" should
/// also be created. Later calls to self.get_template_contents() with that
/// given element return it.
/// https://html.spec.whatwg.org/multipage/#the-template-element
/// See [the template element in the whatwg spec][whatwg template].
///
/// [whatwg template]: https://html.spec.whatwg.org/multipage/#the-template-element
fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags)
-> Self::Handle;

Expand Down
40 changes: 35 additions & 5 deletions markup5ever/rcdom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,30 @@
//!
//! This is sufficient as a static parse tree, but don't build a
//! web browser using it. :)
//!
//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
//! format. For example, the following graph
//!
//! ```text
//! div
//! +- "text node"
//! +- span
//! ```
//! in HTML would be serialized as
//!
//! ```html
//! <div>text node<span></span></div>
//! ```
//!
//! See the [document object model article on wikipedia][dom wiki] for more information.
//!
//! This implementation stores the information associated with each node once, and then hands out
//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
//! a new ref and then a node will outlive the document. Nodes own their children, but only have
//! weak references to their parents.
//!
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model

use std::cell::{RefCell, Cell};
use std::collections::HashSet;
Expand All @@ -33,10 +57,13 @@ use serialize::TraversalScope::{IncludeNode, ChildrenOnly};

/// The different kinds of nodes in the DOM.
pub enum NodeData {
/// The `Document` itself.
/// The `Document` itself - the root node of a HTML document.
Document,

/// A `DOCTYPE` with name, public id, and system id.
/// A `DOCTYPE` with name, public id, and system id. See
/// [document type declaration on wikipedia][dtd wiki].
///
/// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
Doctype {
name: StrTendril,
public_id: StrTendril,
Expand All @@ -58,11 +85,14 @@ pub enum NodeData {
name: QualName,
attrs: RefCell<Vec<Attribute>>,

/// For HTML <template> elements, the template contents
/// https://html.spec.whatwg.org/multipage/#template-contents
/// For HTML \<template\> elements, the [template contents].
///
/// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
template_contents: Option<Handle>,

/// https://html.spec.whatwg.org/multipage/#html-integration-point
/// Whether the node is a [HTML integration point].
///
/// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
mathml_annotation_xml_integration_point: bool,
},

Expand Down