Skip to content

Initialize logger for rcdom examples, cleanup html5ever a bit #596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion html5ever/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1616,7 +1616,7 @@ mod test {
}

fn finish_str(&self) {
if self.current_str.borrow().len() > 0 {
if !self.current_str.borrow().is_empty() {
let s = self.current_str.take();
self.tokens.borrow_mut().push(CharacterTokens(s));
}
Expand Down
51 changes: 40 additions & 11 deletions html5ever/src/tree_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -947,29 +947,51 @@ where
}
}

/// Reconstruct the active formatting elements.
fn reconstruct_formatting(&self) {
/// <https://html.spec.whatwg.org/#reconstruct-the-active-formatting-elements>
fn reconstruct_active_formatting_elements(&self) {
{
let active_formatting = self.active_formatting.borrow();
let last = unwrap_or_return!(active_formatting.last());

// Step 1. If there are no entries in the list of active formatting elements,
// then there is nothing to reconstruct; stop this algorithm.
let Some(last) = active_formatting.last() else {
return;
};

// Step 2. If the last (most recently added) entry in the list of active formatting elements is a marker,
// or if it is an element that is in the stack of open elements, then there is nothing to reconstruct;
// stop this algorithm.
if self.is_marker_or_open(last) {
return;
}
}

// Step 3. Let entry be the last (most recently added) element in the list of active formatting elements.
// NOTE: We track the index of the element instead
let mut entry_index = self.active_formatting.borrow().len() - 1;
loop {
// Step 4. Rewind: If there are no entries before entry in the list of active formatting elements,
// then jump to the step labeled create.
if entry_index == 0 {
break;
}

// Step 5. Let entry be the entry one earlier than entry in the list of active formatting elements.
entry_index -= 1;

// Step 6. If entry is neither a marker nor an element that is also in the stack of open elements,
// go to the step labeled rewind.
// Step 7. Advance: Let entry be the element one later than entry in the list
// of active formatting elements.
if self.is_marker_or_open(&self.active_formatting.borrow()[entry_index]) {
entry_index += 1;
break;
}
}

loop {
// Step 8. Create: Insert an HTML element for the token for which the element entry was created,
// to obtain new element.
let tag = match self.active_formatting.borrow()[entry_index] {
FormatEntry::Element(_, ref t) => t.clone(),
FormatEntry::Marker => {
Expand All @@ -985,8 +1007,13 @@ where
tag.name.clone(),
tag.attrs.clone(),
);

// Step 9. Replace the entry for entry in the list with an entry for new element.
self.active_formatting.borrow_mut()[entry_index] =
FormatEntry::Element(new_element, tag);

// Step 10. If the entry for new element in the list of active formatting elements is
// not the last entry in the list, return to the step labeled advance.
if entry_index == self.active_formatting.borrow().len() - 1 {
break;
}
Expand Down Expand Up @@ -1091,15 +1118,17 @@ where
self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone()))
}

//§ closing-elements-that-have-implied-end-tags
fn generate_implied_end<TagSet>(&self, set: TagSet)
/// <https://html.spec.whatwg.org/#generate-implied-end-tags>
fn generate_implied_end_tags<TagSet>(&self, set: TagSet)
where
TagSet: Fn(ExpandedName) -> bool,
{
loop {
{
let open_elems = self.open_elems.borrow();
let elem = unwrap_or_return!(open_elems.last());
let Some(elem) = open_elems.last() else {
return;
};
let elem_name = self.sink.elem_name(elem);
if !set(elem_name.expanded()) {
return;
Expand All @@ -1110,7 +1139,7 @@ where
}

fn generate_implied_end_except(&self, except: LocalName) {
self.generate_implied_end(|p| {
self.generate_implied_end_tags(|p| {
if *p.ns == ns!(html) && *p.local == except {
false
} else {
Expand Down Expand Up @@ -1155,8 +1184,8 @@ where
self.pop_until(|p| *p.ns == ns!(html) && *p.local == name)
}

// Pop elements until one with the specified name has been popped.
// Signal an error if it was not the first one.
/// Pop elements until one with the specified name has been popped.
/// Signal an error if it was not the first one.
fn expect_to_close(&self, name: LocalName) {
if self.pop_until_named(name.clone()) != 1 {
self.sink.parse_error(format_if!(
Expand All @@ -1170,7 +1199,7 @@ where

fn close_p_element(&self) {
declare_tag_set!(implied = [cursory_implied_end] - "p");
self.generate_implied_end(implied);
self.generate_implied_end_tags(implied);
self.expect_to_close(local_name!("p"));
}

Expand Down Expand Up @@ -1278,7 +1307,7 @@ where
}

fn close_the_cell(&self) {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
if self.pop_until(td_th) != 1 {
self.sink
.parse_error(Borrowed("expected to close <td> or <th> with cell"));
Expand Down
44 changes: 22 additions & 22 deletions html5ever/src/tree_builder/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ where
if !self.in_html_elem_named(local_name!("template")) {
self.unexpected(&tag);
} else {
self.generate_implied_end(thorough_implied_end);
self.generate_implied_end_tags(thorough_implied_end);
self.expect_to_close(local_name!("template"));
self.clear_active_formatting_to_marker();
self.template_modes.borrow_mut().pop();
Expand Down Expand Up @@ -287,7 +287,7 @@ where
Token::NullCharacter => self.unexpected(&token),

Token::Characters(_, text) => {
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
if any_not_whitespace(&text) {
self.frameset_ok.set(false);
}
Expand Down Expand Up @@ -464,10 +464,10 @@ where
tag @ <button> => {
if self.in_scope_named(default_scope, local_name!("button")) {
self.sink.parse_error(Borrowed("nested buttons"));
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
self.pop_until_named(local_name!("button"));
}
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.insert_element_for(tag);
self.frameset_ok.set(false);
ProcessResult::Done
Expand All @@ -480,7 +480,7 @@ where
if !self.in_scope_named(default_scope, tag.name.clone()) {
self.unexpected(&tag);
} else {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
self.expect_to_close(tag.name);
}
ProcessResult::Done
Expand All @@ -500,7 +500,7 @@ where
self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
return ProcessResult::Done;
}
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
let current = self.current_node().clone();
self.remove_from_stack(&node);
if !self.sink.same_node(&current, &node) {
Expand All @@ -511,7 +511,7 @@ where
self.sink.parse_error(Borrowed("Form element not in scope on </form>"));
return ProcessResult::Done;
}
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
if !self.current_node_named(local_name!("form")) {
self.sink.parse_error(Borrowed("Bad open element on </form>"));
}
Expand Down Expand Up @@ -546,7 +546,7 @@ where

tag @ </h1> </h2> </h3> </h4> </h5> </h6> => {
if self.in_scope(default_scope, |n| self.elem_in(&n, heading_tag)) {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
if !self.current_node_named(tag.name) {
self.sink.parse_error(Borrowed("Closing wrong heading tag"));
}
Expand All @@ -559,23 +559,23 @@ where

tag @ <a> => {
self.handle_misnested_a_tags(&tag);
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.create_formatting_element_for(tag);
ProcessResult::Done
}

tag @ <b> <big> <code> <em> <font> <i> <s> <small> <strike> <strong> <tt> <u> => {
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.create_formatting_element_for(tag);
ProcessResult::Done
}

tag @ <nobr> => {
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
if self.in_scope_named(default_scope, local_name!("nobr")) {
self.sink.parse_error(Borrowed("Nested <nobr>"));
self.adoption_agency(local_name!("nobr"));
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
}
self.create_formatting_element_for(tag);
ProcessResult::Done
Expand All @@ -588,7 +588,7 @@ where
}

tag @ <applet> <marquee> <object> => {
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.insert_element_for(tag);
self.active_formatting.borrow_mut().push(FormatEntry::Marker);
self.frameset_ok.set(false);
Expand All @@ -599,7 +599,7 @@ where
if !self.in_scope_named(default_scope, tag.name.clone()) {
self.unexpected(&tag);
} else {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
self.expect_to_close(tag.name);
self.clear_active_formatting_to_marker();
}
Expand Down Expand Up @@ -630,7 +630,7 @@ where
local_name!("input") => self.is_type_hidden(&tag),
_ => false,
};
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.insert_and_pop_element_for(tag);
if !keep_frameset_ok {
self.frameset_ok.set(false);
Expand Down Expand Up @@ -666,7 +666,7 @@ where

tag @ <xmp> => {
self.close_p_element_in_button_scope();
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.frameset_ok.set(false);
self.parse_raw_data(tag, Rawtext)
}
Expand All @@ -683,7 +683,7 @@ where
// <noscript> handled in wildcard case below

tag @ <select> => {
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.insert_element_for(tag);
self.frameset_ok.set(false);
// NB: mode == InBody but possibly self.mode != mode, if
Expand All @@ -700,14 +700,14 @@ where
if self.current_node_named(local_name!("option")) {
self.pop();
}
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.insert_element_for(tag);
ProcessResult::Done
}

tag @ <rb> <rtc> => {
if self.in_scope_named(default_scope, local_name!("ruby")) {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
}
if !self.current_node_named(local_name!("ruby")) {
self.unexpected(&tag);
Expand Down Expand Up @@ -741,7 +741,7 @@ where
if self.opts.scripting_enabled && tag.name == local_name!("noscript") {
self.parse_raw_data(tag, Rawtext)
} else {
self.reconstruct_formatting();
self.reconstruct_active_formatting_elements();
self.insert_element_for(tag);
ProcessResult::Done
}
Expand Down Expand Up @@ -924,7 +924,7 @@ where
tag @ <caption> <col> <colgroup> <tbody> <td> <tfoot>
<th> <thead> <tr> </table> </caption> => {
if self.in_scope_named(table_scope, local_name!("caption")) {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
self.expect_to_close(local_name!("caption"));
self.clear_active_formatting_to_marker();
match tag {
Expand Down Expand Up @@ -1087,7 +1087,7 @@ where
InsertionMode::InCell => match_token!(token {
tag @ </td> </th> => {
if self.in_scope_named(table_scope, tag.name.clone()) {
self.generate_implied_end(cursory_implied_end);
self.generate_implied_end_tags(cursory_implied_end);
self.expect_to_close(tag.name);
self.clear_active_formatting_to_marker();
self.mode.set(InsertionMode::InRow);
Expand Down
1 change: 1 addition & 0 deletions rcdom/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ xml5ever = { version = "0.22", path = "../xml5ever" }
[dev-dependencies]
libtest-mimic = "0.8.1"
serde_json = "1.0"
env_logger = "0.10"

[[test]]
name = "html-tokenizer"
Expand Down
2 changes: 2 additions & 0 deletions rcdom/examples/hello_xml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use xml5ever::tendril::TendrilSink;
use xml5ever::tree_builder::TreeSink;

fn main() {
env_logger::init();

// To parse a string into a tree of nodes, we need to invoke
// `parse_document` and supply it with a TreeSink implementation (RcDom).
let dom: RcDom = parse_document(RcDom::default(), Default::default()).one("<hello>XML</hello>");
Expand Down
2 changes: 2 additions & 0 deletions rcdom/examples/html2html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use html5ever::{parse_document, serialize};
use rcdom::{RcDom, SerializableHandle};

fn main() {
env_logger::init();

let opts = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
Expand Down
2 changes: 2 additions & 0 deletions rcdom/examples/xml_tree_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ fn walk(prefix: &str, handle: &Handle) {
}

fn main() {
env_logger::init();

let stdin = io::stdin();

// To parse XML into a tree form, we need a TreeSink
Expand Down
2 changes: 1 addition & 1 deletion rcdom/tests/html-tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl TokenLogger {
}

fn finish_str(&self) {
if self.current_str.borrow().len() > 0 {
if !self.current_str.borrow().is_empty() {
let s = self.current_str.take();
self.tokens.borrow_mut().push(CharacterTokens(s));
}
Expand Down
6 changes: 3 additions & 3 deletions rcdom/tests/html-tree-builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ fn parse_tests<It: Iterator<Item = String>>(mut lines: It) -> Vec<HashMap<String

fn serialize(buf: &mut String, indent: usize, handle: Handle) {
buf.push('|');
buf.extend(iter::repeat(" ").take(indent));
buf.extend(iter::repeat_n(" ", indent));

let node = handle;
match node.data {
Expand Down Expand Up @@ -127,7 +127,7 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) {

for attr in attrs.into_iter() {
buf.push('|');
buf.extend(iter::repeat(" ").take(indent + 2));
buf.extend(iter::repeat_n(" ", indent + 2));
match attr.name.ns {
ns!(xlink) => buf.push_str("xlink "),
ns!(xml) => buf.push_str("xml "),
Expand All @@ -152,7 +152,7 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) {
{
if let Some(ref content) = &*template_contents.borrow() {
buf.push('|');
buf.extend(iter::repeat(" ").take(indent + 2));
buf.extend(iter::repeat_n(" ", indent + 2));
buf.push_str("content\n");
for child in content.children.borrow().iter() {
serialize(buf, indent + 4, child.clone());
Expand Down
Loading