From 75b58bd6209f253bf9045a4de9cc1348205b726e Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 5 Jun 2023 21:56:51 +0200 Subject: [PATCH 1/4] Use a prepending strategy instead of appending in dom_set_old_ns() Looping to the end of the list is wasteful. We can just put the new nodes at the front of the list. I don't believe we can fully prepend, because libxml2 may assume that the xml namespace is the first one, so we'll put the new ones as the second one. --- ext/dom/php_dom.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 6867e5acf128e..144a93fd40530 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1369,11 +1369,16 @@ void dom_normalize (xmlNodePtr nodep) /* {{{ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) */ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { - xmlNs *cur; - if (doc == NULL) return; + ZEND_ASSERT(ns->next == NULL); + + /* Note: we'll use a prepend strategy instead of append to + * make sure we don't lose performance when the list is long. + * As libxml2 could assume the xml node is the first one, we'll place our + * new entries after the first one. */ + if (doc->oldNs == NULL) { doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); if (doc->oldNs == NULL) { @@ -1383,13 +1388,10 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { doc->oldNs->type = XML_LOCAL_NAMESPACE; doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); + } else { + ns->next = doc->oldNs->next; } - - cur = doc->oldNs; - while (cur->next != NULL) { - cur = cur->next; - } - cur->next = ns; + doc->oldNs->next = ns; } /* }}} end dom_set_old_ns */ From 84284a49e3e63d136bd8206525f0362020a58dad Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 5 Jun 2023 21:58:04 +0200 Subject: [PATCH 2/4] Reuse namespaces from doc->oldNs if possible in dom_get_ns() --- ext/dom/php_dom.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 144a93fd40530..13c193d88bc13 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1511,22 +1511,38 @@ NAMESPACE_ERR: Raised if /* {{{ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) */ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) { - xmlNsPtr nsptr = NULL; - - *errorcode = 0; + xmlNsPtr nsptr; if (! ((prefix && !strcmp (prefix, "xml") && strcmp(uri, (char *)XML_XML_NAMESPACE)) || (prefix && !strcmp (prefix, "xmlns") && strcmp(uri, (char *)DOM_XMLNS_NAMESPACE)) || (prefix && !strcmp(uri, (char *)DOM_XMLNS_NAMESPACE) && strcmp (prefix, "xmlns")))) { + /* Reuse the old namespaces from doc->oldNs if possible, before creating a new one. + * This will prevent the oldNs list from growing with duplicates. */ + xmlDocPtr doc = nodep->doc; + if (doc && doc->oldNs != NULL) { + nsptr = doc->oldNs; + do { + if (xmlStrEqual(nsptr->prefix, (xmlChar *)prefix) && xmlStrEqual(nsptr->href, (xmlChar *)uri)) { + goto out; + } + nsptr = nsptr->next; + } while (nsptr); + } + /* Couldn't reuse one, create a new one. */ nsptr = xmlNewNs(nodep, (xmlChar *)uri, (xmlChar *)prefix); + if (UNEXPECTED(nsptr == NULL)) { + goto err; + } + } else { + goto err; } - if (nsptr == NULL) { - *errorcode = NAMESPACE_ERR; - } - +out: + *errorcode = 0; return nsptr; - +err: + *errorcode = NAMESPACE_ERR; + return NULL; } /* }}} end dom_get_ns */ From 726beeb80317cbbe2d95b4d93c6f3086822e9d32 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Wed, 7 Jun 2023 20:02:10 +0200 Subject: [PATCH 3/4] Add a test for reconciling a reused namespace --- ext/dom/tests/reconcile_reused_namespace.phpt | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 ext/dom/tests/reconcile_reused_namespace.phpt diff --git a/ext/dom/tests/reconcile_reused_namespace.phpt b/ext/dom/tests/reconcile_reused_namespace.phpt new file mode 100644 index 0000000000000..5f9ab6c0d80fa --- /dev/null +++ b/ext/dom/tests/reconcile_reused_namespace.phpt @@ -0,0 +1,42 @@ +--TEST-- +Reconcile a reused namespace from doc->oldNs +--EXTENSIONS-- +dom +--FILE-- +createElementNS('http://www.w3.org/2000/xhtml', 'html'); + +$dom->loadXML(<< + +XML); +$root = $dom->firstElementChild; + +echo "Add first\n"; +$element = $dom->createElementNS('http://example.com/B', 'p', 'Hello World'); +$root->appendChild($element); + +echo "Add second\n"; +$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World'); +$root->appendChild($element); + +echo "Add third\n"; +$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World'); +$root->appendChild($element); + +var_dump($dom->saveXML()); + +?> +--EXPECT-- +Add first +Add second +Add third +string(201) " +Hello WorldHello WorldHello World +" From 4547312cd9792818c9e3b00cab2044718da539b8 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Wed, 7 Jun 2023 20:30:24 +0200 Subject: [PATCH 4/4] Explain why there can't be a cycle between oldNs and nsDef --- ext/dom/php_dom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 13c193d88bc13..d3401f007dc49 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1413,6 +1413,9 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) } else { prevns->next = nsdftptr; } + /* Note: we can't get here if the ns is already on the oldNs list. + * This is because in that case the definition won't be on the node, and + * therefore won't be in the nodep->nsDef list. */ dom_set_old_ns(doc, curns); curns = prevns; }