From 75b58bd6209f253bf9045a4de9cc1348205b726e Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 5 Jun 2023 21:56:51 +0200
Subject: [PATCH 1/4] Use a prepending strategy instead of appending in
dom_set_old_ns()
Looping to the end of the list is wasteful. We can just put the new
nodes at the front of the list. I don't believe we can fully prepend,
because libxml2 may assume that the xml namespace is the first one, so
we'll put the new ones as the second one.
---
ext/dom/php_dom.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 6867e5acf128e..144a93fd40530 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1369,11 +1369,16 @@ void dom_normalize (xmlNodePtr nodep)
/* {{{ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) */
void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) {
- xmlNs *cur;
-
if (doc == NULL)
return;
+ ZEND_ASSERT(ns->next == NULL);
+
+ /* Note: we'll use a prepend strategy instead of append to
+ * make sure we don't lose performance when the list is long.
+ * As libxml2 could assume the xml node is the first one, we'll place our
+ * new entries after the first one. */
+
if (doc->oldNs == NULL) {
doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (doc->oldNs == NULL) {
@@ -1383,13 +1388,10 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) {
doc->oldNs->type = XML_LOCAL_NAMESPACE;
doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE);
doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml");
+ } else {
+ ns->next = doc->oldNs->next;
}
-
- cur = doc->oldNs;
- while (cur->next != NULL) {
- cur = cur->next;
- }
- cur->next = ns;
+ doc->oldNs->next = ns;
}
/* }}} end dom_set_old_ns */
From 84284a49e3e63d136bd8206525f0362020a58dad Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 5 Jun 2023 21:58:04 +0200
Subject: [PATCH 2/4] Reuse namespaces from doc->oldNs if possible in
dom_get_ns()
---
ext/dom/php_dom.c | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 144a93fd40530..13c193d88bc13 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1511,22 +1511,38 @@ NAMESPACE_ERR: Raised if
/* {{{ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) */
xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) {
- xmlNsPtr nsptr = NULL;
-
- *errorcode = 0;
+ xmlNsPtr nsptr;
if (! ((prefix && !strcmp (prefix, "xml") && strcmp(uri, (char *)XML_XML_NAMESPACE)) ||
(prefix && !strcmp (prefix, "xmlns") && strcmp(uri, (char *)DOM_XMLNS_NAMESPACE)) ||
(prefix && !strcmp(uri, (char *)DOM_XMLNS_NAMESPACE) && strcmp (prefix, "xmlns")))) {
+ /* Reuse the old namespaces from doc->oldNs if possible, before creating a new one.
+ * This will prevent the oldNs list from growing with duplicates. */
+ xmlDocPtr doc = nodep->doc;
+ if (doc && doc->oldNs != NULL) {
+ nsptr = doc->oldNs;
+ do {
+ if (xmlStrEqual(nsptr->prefix, (xmlChar *)prefix) && xmlStrEqual(nsptr->href, (xmlChar *)uri)) {
+ goto out;
+ }
+ nsptr = nsptr->next;
+ } while (nsptr);
+ }
+ /* Couldn't reuse one, create a new one. */
nsptr = xmlNewNs(nodep, (xmlChar *)uri, (xmlChar *)prefix);
+ if (UNEXPECTED(nsptr == NULL)) {
+ goto err;
+ }
+ } else {
+ goto err;
}
- if (nsptr == NULL) {
- *errorcode = NAMESPACE_ERR;
- }
-
+out:
+ *errorcode = 0;
return nsptr;
-
+err:
+ *errorcode = NAMESPACE_ERR;
+ return NULL;
}
/* }}} end dom_get_ns */
From 726beeb80317cbbe2d95b4d93c6f3086822e9d32 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 7 Jun 2023 20:02:10 +0200
Subject: [PATCH 3/4] Add a test for reconciling a reused namespace
---
ext/dom/tests/reconcile_reused_namespace.phpt | 42 +++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 ext/dom/tests/reconcile_reused_namespace.phpt
diff --git a/ext/dom/tests/reconcile_reused_namespace.phpt b/ext/dom/tests/reconcile_reused_namespace.phpt
new file mode 100644
index 0000000000000..5f9ab6c0d80fa
--- /dev/null
+++ b/ext/dom/tests/reconcile_reused_namespace.phpt
@@ -0,0 +1,42 @@
+--TEST--
+Reconcile a reused namespace from doc->oldNs
+--EXTENSIONS--
+dom
+--FILE--
+createElementNS('http://www.w3.org/2000/xhtml', 'html');
+
+$dom->loadXML(<<
+
+XML);
+$root = $dom->firstElementChild;
+
+echo "Add first\n";
+$element = $dom->createElementNS('http://example.com/B', 'p', 'Hello World');
+$root->appendChild($element);
+
+echo "Add second\n";
+$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World');
+$root->appendChild($element);
+
+echo "Add third\n";
+$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World');
+$root->appendChild($element);
+
+var_dump($dom->saveXML());
+
+?>
+--EXPECT--
+Add first
+Add second
+Add third
+string(201) "
+Hello WorldHello WorldHello World
+"
From 4547312cd9792818c9e3b00cab2044718da539b8 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 7 Jun 2023 20:30:24 +0200
Subject: [PATCH 4/4] Explain why there can't be a cycle between oldNs and
nsDef
---
ext/dom/php_dom.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 13c193d88bc13..d3401f007dc49 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1413,6 +1413,9 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep)
} else {
prevns->next = nsdftptr;
}
+ /* Note: we can't get here if the ns is already on the oldNs list.
+ * This is because in that case the definition won't be on the node, and
+ * therefore won't be in the nodep->nsDef list. */
dom_set_old_ns(doc, curns);
curns = prevns;
}