Skip to content

Don't re-export xml5ever Token variants #619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 22 additions & 26 deletions rcdom/tests/xml-tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@ use util::runner::{run_all, Test};

use markup5ever::buffer_queue::BufferQueue;
use xml5ever::tendril::{SliceExt, StrTendril};
use xml5ever::tokenizer::{CharacterTokens, Token, TokenSink};
use xml5ever::tokenizer::{CommentToken, EmptyTag, EndTag, ShortTag, StartTag, Tag};
use xml5ever::tokenizer::{Doctype, DoctypeToken, PIToken, Pi};
use xml5ever::tokenizer::{EOFToken, XmlTokenizer, XmlTokenizerOpts};
use xml5ever::tokenizer::{NullCharacterToken, ParseError, TagToken};
use xml5ever::tokenizer::{
Doctype, EmptyTag, EndTag, Pi, ShortTag, StartTag, Tag, Token, TokenSink, XmlTokenizer,
XmlTokenizerOpts,
};
use xml5ever::{ns, Attribute, LocalName, QualName};

mod util {
Expand Down Expand Up @@ -81,7 +80,7 @@ impl TokenLogger {
fn finish_str(&self) {
if !self.current_str.borrow().is_empty() {
let s = self.current_str.take();
self.tokens.borrow_mut().push(CharacterTokens(s));
self.tokens.borrow_mut().push(Token::Characters(s));
}
}

Expand All @@ -96,21 +95,20 @@ impl TokenSink for TokenLogger {

fn process_token(&self, token: Token) -> ProcessResult<()> {
match token {
CharacterTokens(b) => {
self.current_str.borrow_mut().push_slice(&b);
Token::Characters(characters) => {
self.current_str.borrow_mut().push_slice(&characters);
},

NullCharacterToken => {
Token::NullCharacter => {
self.current_str.borrow_mut().push_char('\0');
},

ParseError(_) => {
Token::ParseError(_) => {
if self.exact_errors {
self.push(ParseError(Borrowed("")));
self.push(Token::ParseError(Borrowed("")));
}
},

TagToken(mut t) => {
Token::Tag(mut t) => {
// The spec seems to indicate that one can emit
// erroneous end tags with attrs, but the test
// cases don't contain them.
Expand All @@ -120,11 +118,9 @@ impl TokenSink for TokenLogger {
},
_ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
}
self.push(TagToken(t));
self.push(Token::Tag(t));
},

EOFToken => (),

Token::EndOfFile => (),
_ => self.push(token),
};
ProcessResult::Continue
Expand Down Expand Up @@ -211,7 +207,7 @@ fn json_to_token(js: &Value) -> Token {
// Collect refs here so we don't have to use "ref" in all the patterns below.
let args: Vec<&Value> = parts[1..].iter().collect();
match &*parts[0].get_str() {
"StartTag" => TagToken(Tag {
"StartTag" => Token::Tag(Tag {
kind: StartTag,
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
attrs: args[1]
Expand All @@ -224,19 +220,19 @@ fn json_to_token(js: &Value) -> Token {
.collect(),
}),

"EndTag" => TagToken(Tag {
"EndTag" => Token::Tag(Tag {
kind: EndTag,
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
attrs: vec![],
}),

"ShortTag" => TagToken(Tag {
"ShortTag" => Token::Tag(Tag {
kind: ShortTag,
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
attrs: vec![],
}),

"EmptyTag" => TagToken(Tag {
"EmptyTag" => Token::Tag(Tag {
kind: EmptyTag,
name: QualName::new(None, ns!(), LocalName::from(args[0].get_str())),
attrs: args[1]
Expand All @@ -249,16 +245,16 @@ fn json_to_token(js: &Value) -> Token {
.collect(),
}),

"Comment" => CommentToken(args[0].get_tendril()),
"Comment" => Token::Comment(args[0].get_tendril()),

"Character" => CharacterTokens(args[0].get_tendril()),
"Character" => Token::Characters(args[0].get_tendril()),

"PI" => PIToken(Pi {
"PI" => Token::ProcessingInstruction(Pi {
target: args[0].get_tendril(),
data: args[1].get_tendril(),
}),

"DOCTYPE" => DoctypeToken(Doctype {
"DOCTYPE" => Token::Doctype(Doctype {
name: args[0].get_nullable_tendril(),
public_id: args[1].get_nullable_tendril(),
system_id: args[2].get_nullable_tendril(),
Expand All @@ -278,7 +274,7 @@ fn json_to_tokens(js: &Value, exact_errors: bool) -> Vec<Token> {
for tok in js.as_array().unwrap().iter() {
match *tok {
Value::String(ref s) if &s[..] == "ParseError" => {
let _ = sink.process_token(ParseError(Borrowed("")));
let _ = sink.process_token(Token::ParseError(Borrowed("")));
},
_ => {
let _ = sink.process_token(json_to_token(tok));
Expand Down
21 changes: 9 additions & 12 deletions xml5ever/examples/simple_xml_tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@ use std::io;

use markup5ever::buffer_queue::BufferQueue;
use xml5ever::tendril::{ByteTendril, ReadExt};
use xml5ever::tokenizer::{CharacterTokens, NullCharacterToken, ProcessResult, TagToken};
use xml5ever::tokenizer::{CommentToken, PIToken, Pi};
use xml5ever::tokenizer::{Doctype, DoctypeToken, EOFToken};
use xml5ever::tokenizer::{ParseError, Token, TokenSink, XmlTokenizer};
use xml5ever::tokenizer::{Doctype, Pi, ProcessResult, Token, TokenSink, XmlTokenizer};

struct SimpleTokenPrinter;

Expand All @@ -28,29 +25,29 @@ impl TokenSink for SimpleTokenPrinter {

fn process_token(&self, token: Token) -> ProcessResult<()> {
match token {
CharacterTokens(b) => {
Token::Characters(b) => {
println!("TEXT: {}", &*b);
},
NullCharacterToken => print!("NULL"),
TagToken(tag) => {
Token::NullCharacter => print!("NULL"),
Token::Tag(tag) => {
println!("{:?} {} ", tag.kind, &*tag.name.local);
},
ParseError(err) => {
Token::ParseError(err) => {
println!("ERROR: {err}");
},
PIToken(Pi {
Token::ProcessingInstruction(Pi {
ref target,
ref data,
}) => {
println!("PI : <?{target} {data}?>");
},
CommentToken(ref comment) => {
Token::Comment(ref comment) => {
println!("<!--{comment:?}-->");
},
EOFToken => {
Token::EndOfFile => {
println!("EOF");
},
DoctypeToken(Doctype {
Token::Doctype(Doctype {
ref name,
ref public_id,
..
Expand Down
18 changes: 9 additions & 9 deletions xml5ever/examples/xml_tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ use std::io;

use markup5ever::buffer_queue::BufferQueue;
use xml5ever::tendril::{ByteTendril, ReadExt};
use xml5ever::tokenizer::{CharacterTokens, NullCharacterToken, ProcessResult, TagToken};
use xml5ever::tokenizer::{EmptyTag, EndTag, ShortTag, StartTag};
use xml5ever::tokenizer::{PIToken, Pi};
use xml5ever::tokenizer::{ParseError, Token, TokenSink, XmlTokenizer, XmlTokenizerOpts};
use xml5ever::tokenizer::{
EmptyTag, EndTag, Pi, ProcessResult, ShortTag, StartTag, Token, TokenSink, XmlTokenizer,
XmlTokenizerOpts,
};

#[derive(Clone)]
struct TokenPrinter {
Expand Down Expand Up @@ -48,13 +48,13 @@ impl TokenSink for TokenPrinter {

fn process_token(&self, token: Token) -> ProcessResult<()> {
match token {
CharacterTokens(b) => {
Token::Characters(b) => {
for c in b.chars() {
self.do_char(c);
}
},
NullCharacterToken => self.do_char('\0'),
TagToken(tag) => {
Token::NullCharacter => self.do_char('\0'),
Token::Tag(tag) => {
self.is_char(false);
// This is not proper HTML serialization, of course.
match tag.kind {
Expand All @@ -74,11 +74,11 @@ impl TokenSink for TokenPrinter {
}
println!(">");
},
ParseError(err) => {
Token::ParseError(err) => {
self.is_char(false);
println!("ERROR: {err}");
},
PIToken(Pi { target, data }) => {
Token::ProcessingInstruction(Pi { target, data }) => {
self.is_char(false);
println!("PI : <?{target:?} {data:?}?>");
},
Expand Down
16 changes: 7 additions & 9 deletions xml5ever/src/tokenizer/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ use crate::tokenizer::ProcessResult;
use crate::{Attribute, QualName};

pub use self::TagKind::{EmptyTag, EndTag, ShortTag, StartTag};
pub use self::Token::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
pub use self::Token::{CommentToken, DoctypeToken, PIToken, TagToken};

/// Tag kind denotes which kind of tag did we encounter.
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
Expand Down Expand Up @@ -87,20 +85,20 @@ pub struct Pi {
#[derive(PartialEq, Eq, Debug)]
pub enum Token {
/// Doctype token
DoctypeToken(Doctype),
Doctype(Doctype),
/// Token tag founds. This token applies to all
/// possible kinds of tags (like start, end, empty tag, etc.).
TagToken(Tag),
Tag(Tag),
/// Processing Instruction token
PIToken(Pi),
ProcessingInstruction(Pi),
/// Comment token.
CommentToken(StrTendril),
Comment(StrTendril),
/// Token that represents a series of characters.
CharacterTokens(StrTendril),
Characters(StrTendril),
/// End of File found.
EOFToken,
EndOfFile,
/// NullCharacter encountered.
NullCharacterToken,
NullCharacter,
/// Error happened
ParseError(Cow<'static, str>),
}
Expand Down
24 changes: 11 additions & 13 deletions xml5ever/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@ mod interface;
mod qname;
pub mod states;

pub use self::interface::{CharacterTokens, EOFToken, NullCharacterToken};
pub use self::interface::{CommentToken, DoctypeToken, PIToken, TagToken};
pub use self::interface::{Doctype, Pi};
pub use self::interface::{EmptyTag, EndTag, ShortTag, StartTag};
pub use self::interface::{ParseError, Tag, TagKind, Token, TokenSink};
pub use self::interface::{
Doctype, EmptyTag, EndTag, Pi, ShortTag, StartTag, Tag, TagKind, Token, TokenSink,
};
pub use crate::{LocalName, Namespace, Prefix};

use crate::tendril::StrTendril;
Expand Down Expand Up @@ -397,7 +395,7 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> {
}

fn emit_char(&self, c: char) {
self.process_token(CharacterTokens(StrTendril::from_char(match c {
self.process_token(Token::Characters(StrTendril::from_char(match c {
'\0' => '\u{FFFD}',
c => c,
})));
Expand Down Expand Up @@ -445,7 +443,7 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> {
},
}

let token = TagToken(Tag {
let token = Token::Tag(Tag {
kind: self.current_tag_kind.get(),
name: qname,
attrs: self.current_tag_attrs.take(),
Expand All @@ -456,12 +454,12 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> {

// The string must not contain '\0'!
fn emit_chars(&self, b: StrTendril) {
self.process_token(CharacterTokens(b));
self.process_token(Token::Characters(b));
}

// Emits the current Processing Instruction
fn emit_pi(&self) -> ProcessResult<<Sink as TokenSink>::Handle> {
let token = PIToken(Pi {
let token = Token::ProcessingInstruction(Pi {
target: replace(&mut *self.current_pi_target.borrow_mut(), StrTendril::new()),
data: replace(&mut *self.current_pi_data.borrow_mut(), StrTendril::new()),
});
Expand All @@ -476,21 +474,21 @@ impl<Sink: TokenSink> XmlTokenizer<Sink> {
}

fn emit_eof(&self) {
self.process_token(EOFToken);
self.process_token(Token::EndOfFile);
}

fn emit_error(&self, error: Cow<'static, str>) {
self.process_token(ParseError(error));
self.process_token(Token::ParseError(error));
}

fn emit_current_comment(&self) {
let comment = self.current_comment.take();
self.process_token(CommentToken(comment));
self.process_token(Token::Comment(comment));
}

fn emit_current_doctype(&self) {
let doctype = self.current_doctype.take();
self.process_token(DoctypeToken(doctype));
self.process_token(Token::Doctype(doctype));
}

fn doctype_id(&self, kind: DoctypeKind) -> RefMut<'_, Option<StrTendril>> {
Expand Down
16 changes: 8 additions & 8 deletions xml5ever/src/tree_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,18 +410,18 @@ where
fn process_token(&self, token: tokenizer::Token) -> ProcessResult<Self::Handle> {
// Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type.
let token = match token {
tokenizer::ParseError(e) => {
tokenizer::Token::ParseError(e) => {
self.sink.parse_error(e);
return ProcessResult::Done;
},

tokenizer::DoctypeToken(d) => Token::Doctype(d),
tokenizer::PIToken(instruction) => Token::Pi(instruction),
tokenizer::TagToken(x) => Token::Tag(x),
tokenizer::CommentToken(x) => Token::Comment(x),
tokenizer::NullCharacterToken => Token::NullCharacter,
tokenizer::EOFToken => Token::Eof,
tokenizer::CharacterTokens(x) => Token::Characters(x),
tokenizer::Token::Doctype(d) => Token::Doctype(d),
tokenizer::Token::ProcessingInstruction(instruction) => Token::Pi(instruction),
tokenizer::Token::Tag(x) => Token::Tag(x),
tokenizer::Token::Comment(x) => Token::Comment(x),
tokenizer::Token::NullCharacter => Token::NullCharacter,
tokenizer::Token::EndOfFile => Token::Eof,
tokenizer::Token::Characters(x) => Token::Characters(x),
};

self.process_to_completion(token)
Expand Down
2 changes: 1 addition & 1 deletion xml5ever/src/tree_builder/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub enum XmlPhase {
End,
}

/// A subset/refinement of `tokenizer::XToken`. Everything else is handled
/// A subset/refinement of `tokenizer::Token`. Everything else is handled
/// specially at the beginning of `process_token`.
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum Token {
Expand Down