|
1 | 1 | from __future__ import absolute_import, division, unicode_literals |
2 | 2 |
|
| 3 | +from genshi.core import QName |
3 | 4 | from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT |
4 | 5 | from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT |
5 | 6 | from genshi.output import NamespaceFlattener |
6 | 7 |
|
7 | 8 | from . import _base |
8 | 9 |
|
9 | | -from html5lib.constants import voidElements |
| 10 | +from html5lib.constants import voidElements, namespaces |
10 | 11 |
|
11 | 12 | class TreeWalker(_base.TreeWalker): |
12 | 13 | def __iter__(self): |
13 | | - depth = 0 |
14 | | - ignore_until = None |
| 14 | + # Buffer the events so we can pass in the following one |
15 | 15 | previous = None |
16 | 16 | for event in self.tree: |
17 | 17 | if previous is not None: |
18 | | - if previous[0] == START: |
19 | | - depth += 1 |
20 | | - if ignore_until <= depth: |
21 | | - ignore_until = None |
22 | | - if ignore_until is None: |
23 | | - for token in self.tokens(previous, event): |
24 | | - yield token |
25 | | - if token["type"] == "EmptyTag": |
26 | | - ignore_until = depth |
27 | | - if previous[0] == END: |
28 | | - depth -= 1 |
| 18 | + for token in self.tokens(previous, event): |
| 19 | + yield token |
29 | 20 | previous = event |
| 21 | + |
| 22 | + # Don't forget the final event! |
30 | 23 | if previous is not None: |
31 | | - if ignore_until is None or ignore_until <= depth: |
32 | | - for token in self.tokens(previous, None): |
33 | | - yield token |
34 | | - elif ignore_until is not None: |
35 | | - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") |
| 24 | + for token in self.tokens(previous, None): |
| 25 | + yield token |
36 | 26 |
|
37 | 27 | def tokens(self, event, next): |
38 | 28 | kind, data, pos = event |
39 | 29 | if kind == START: |
40 | | - tag, attrib = data |
| 30 | + tag, attribs = data |
41 | 31 | name = tag.localname |
42 | 32 | namespace = tag.namespace |
43 | | - if tag in voidElements: |
44 | | - for token in self.emptyTag(namespace, name, list(attrib), |
| 33 | + converted_attribs = {} |
| 34 | + for k, v in attribs: |
| 35 | + if isinstance(k, QName): |
| 36 | + converted_attribs[(k.namespace, k.localname)] = v |
| 37 | + else: |
| 38 | + converted_attribs[(None, k)] = v |
| 39 | + |
| 40 | + if namespace == namespaces["html"] and name in voidElements: |
| 41 | + for token in self.emptyTag(namespace, name, converted_attribs, |
45 | 42 | not next or next[0] != END |
46 | 43 | or next[1] != tag): |
47 | 44 | yield token |
48 | 45 | else: |
49 | | - yield self.startTag(namespace, name, list(attrib)) |
| 46 | + yield self.startTag(namespace, name, converted_attribs) |
50 | 47 |
|
51 | 48 | elif kind == END: |
52 | 49 | name = data.localname |
|
0 commit comments