Skip to content

Commit 4776d12

Browse files
authored
XInclude by xml:id (#198)
1 parent 7edcafd commit 4776d12

File tree

2 files changed

+165
-34
lines changed

2 files changed

+165
-34
lines changed

configure.php

Lines changed: 148 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
| Authors: Dave Barr <[email protected]> |
1717
| Hannes Magnusson <[email protected]> |
1818
| Gwynne Raskind <[email protected]> |
19+
| André L F S Bacci <[email protected]> |
1920
+----------------------------------------------------------------------+
2021
*/
2122

@@ -782,13 +783,15 @@ function dom_load( DOMDocument $dom , string $filename ) : bool
782783
return $dom->load( $filename , $options );
783784
}
784785

785-
function dom_saveload( DOMDocument $dom , string $filename = "" )
786+
function dom_saveload( DOMDocument $dom , string $filename = "" ) : string
786787
{
787788
if ( $filename == "" )
788789
$filename = __DIR__ . "/temp/manual.xml";
789790

790791
$dom->save( $filename );
791792
dom_load( $dom , $filename );
793+
794+
return $filename;
792795
}
793796

794797
echo "Loading and parsing {$ac["INPUT_FILENAME"]}... ";
@@ -807,46 +810,140 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )
807810

808811

809812
echo "Running XInclude/XPointer... ";
810-
$total = 0;
811-
$maxrun = 10; //LIBXML_VERSION >= 21100 ? 1 : 10;
812-
for( $run = 0 ; $run < $maxrun ; $run++ )
813+
814+
$total = xinclude_run_byid( $dom );
815+
$total += xinclude_run_xpointer( $dom );
816+
817+
if ( $total == 0 )
818+
echo "failed.\n";
819+
else
820+
echo "done. Performed $total XIncludes.\n";
821+
822+
xinclude_report();
823+
xinclude_residual( $dom );
824+
825+
function xinclude_run_byid( DOMDocument $dom )
813826
{
814-
if ( $run > 0 )
815-
echo "$run ";
816-
libxml_clear_errors();
817-
$status = (int) $dom->xinclude();
818-
if ( $status <= 0 )
819-
break;
820-
$total += $status;
821-
if ( $maxrun > 1 && $run + 1 >= $maxrun )
827+
$total = 0;
828+
$maxrun = 10; //LIBXML_VERSION >= 21100 ? 1 : 10;
829+
for( $run = 0 ; $run < $maxrun ; $run++ )
822830
{
823-
echo "Recursive XInclude is too deep.\n";
824-
errors_are_bad(-1);
831+
echo "$run ";
832+
$xpath = new DOMXPath( $dom );
833+
$xpath->registerNamespace( "xi" , "http://www.w3.org/2001/XInclude" );
834+
$xincludes = $xpath->query( "//xi:include" );
835+
836+
$changed = false;
837+
foreach( $xincludes as $xinclude )
838+
{
839+
$xpointer = $xinclude->getAttribute( "xpointer" );
840+
$target = $xinclude->ownerDocument->getElementById( $xpointer );
841+
842+
if ( $target == null )
843+
continue;
844+
845+
$other = new DOMDocument( '1.0' , 'utf8' );
846+
$frags = $other->createDocumentFragment();
847+
$other->append( $frags );
848+
$frags->append( $other->importNode( $target , true ) ); // dup add
849+
850+
// "attributes in xml: namespace are not copied"
851+
852+
$oxpth = new DOMXPath( $other );
853+
$attribs = $oxpth->query( "//@*" );
854+
855+
foreach( $attribs as $attrib )
856+
if ( $attrib->prefix == "xml" )
857+
$attrib->parentNode->removeAttribute( $attrib->nodeName );
858+
859+
$insert = $dom->importNode( $frags , true ); // dup
860+
$xinclude->parentNode->insertBefore( $insert , $xinclude ); // add
861+
$xinclude->parentNode->removeChild( $xinclude ); // del
862+
863+
$total++;
864+
$changed = true;
865+
libxml_clear_errors();
866+
}
867+
868+
if ( ! $changed )
869+
return $total;
825870
}
871+
echo "XInclude nested too deeply (xml:id).\n";
872+
errors_are_bad( 1 );
826873
}
827874

828-
if ($total == 0) {
829-
echo "failed.\n";
830-
} else {
831-
echo "done. Performed $total XIncludes.\n";
875+
function xinclude_run_xpointer( DOMDocument $dom ) : int
876+
{
877+
$total = 0;
878+
$maxrun = 10; //LIBXML_VERSION >= 21100 ? 1 : 10;
879+
for( $run = 0 ; $run < $maxrun ; $run++ )
880+
{
881+
echo "$run ";
882+
$status = (int) $dom->xinclude();
883+
884+
if ( $status <= 0 )
885+
{
886+
return $total;
887+
}
888+
$total += $status;
889+
libxml_clear_errors();
890+
}
891+
echo "XInclude nested too deeply (xpointer).\n";
892+
errors_are_bad( 1 );
832893
}
833-
flush();
834894

835-
if ( $ac['XPOINTER_REPORTING'] == 'yes' || $ac['LANG'] == 'en' )
895+
function xinclude_report()
836896
{
897+
global $ac;
898+
899+
$report = $ac['XPOINTER_REPORTING'] == 'yes' || $ac['LANG'] == 'en';
900+
$output = $ac['STDERR_TO_STDOUT'] == 'yes' ? STDOUT : STDERR;
901+
$fatal = $ac['LANG'] == 'en';
902+
837903
$errors = libxml_get_errors();
838-
$output = ( $ac['STDERR_TO_STDOUT'] == 'yes' ) ? STDOUT : STDERR;
839-
if ( count( $errors ) > 0 )
904+
libxml_clear_errors();
905+
906+
if ( ! $report )
907+
return;
908+
909+
$count = 0;
910+
$prefix = realpath( __DIR__ );
911+
912+
$prevLine = -1;
913+
$prevClmn = -1;
914+
915+
foreach( $errors as $error )
840916
{
841-
fprintf( $output , "\n");
842-
foreach( $errors as $error )
843-
fprintf( $output , "{$error->message}\n");
844-
if ( $ac['LANG'] == 'en' )
845-
errors_are_bad(1);
917+
$msg = $error->message;
918+
$file = $error->file;
919+
$line = $error->line;
920+
$clmn = $error->column;
921+
922+
if ( $prevLine == $line && $prevClmn == $clmn )
923+
continue; // XPointer failures double reports sometimes
924+
$prevLine = $line;
925+
$prevClmn = $clmn;
926+
927+
$msg = rtrim( $msg );
928+
if ( str_starts_with( $file , $prefix ) )
929+
$file = substr( $file , strlen( $prefix ) + 1 );
930+
931+
if ( $count == 0 )
932+
fprintf( $output , "\n" );
933+
934+
fprintf( $output , "[{$file} {$line}:{$clmn}] $msg\n" );
935+
$count++;
936+
}
937+
938+
if ( $count > 0 )
939+
{
940+
fprintf( $output , "\n" );
941+
if ( $fatal )
942+
errors_are_bad( 1 );
846943
}
847944
}
848945

849-
if ( $ac['LANG'] != 'en' )
946+
function xinclude_residual( DOMDocument $dom )
850947
{
851948
// XInclude failures are soft errors on translations, so remove
852949
// residual XInclude tags on translations to keep them building.
@@ -872,11 +969,11 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )
872969
case "tbody":
873970
$fixup = "<row><entry></entry></row>";
874971
break;
875-
// case "variablelist":
876-
// $fixup = "<varlistentry><term>></term><listitem><simpara></simpara></listitem></varlistentry>";
877-
// break;
972+
case "variablelist":
973+
$fixup = "<varlistentry><term></term><listitem><simpara></simpara></listitem></varlistentry>";
974+
break;
878975
default:
879-
echo "Unknown parent element of failed XInclude: $tagName\n";
976+
echo "Unknown parent of failed XInclude: $tagName\n";
880977
$explain = true;
881978
continue 2;
882979
}
@@ -899,7 +996,25 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )
899996
state. Please report any "Unknown parent" messages on the doc-base
900997
repository, and focus on fixing XInclude/XPointers failures above.\n\n
901998
MSG;
902-
exit(-1); // stop here, do not let more messages further confuse the matter
999+
exit(1); // stop here, do not let more messages further confuse the matter
1000+
}
1001+
1002+
// XInclude by xml:id never duplicates xml:id, horever, also using
1003+
// XInclude by XPath/XPointer may start causing duplications
1004+
// (see docs/structure.md). Crude and ugly fixup ahead, beware!
1005+
1006+
$list = array();
1007+
$nodes = $xpath->query( "//*[@xml:id]" );
1008+
foreach( $nodes as $node )
1009+
{
1010+
$id = $node->getAttribute( "xml:id" );
1011+
if ( isset( $list[ $id ] ) )
1012+
{
1013+
if ( ! str_contains( $id , '..' ) )
1014+
echo " Random removing duplicated xml:id: $id\n";
1015+
$node->removeAttribute( "xml:id" );
1016+
}
1017+
$list[ $id ] = $id;
9031018
}
9041019
}
9051020

@@ -1001,4 +1116,3 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )
10011116

10021117
errors_are_bad(1); // Tell the shell that this script finished with an error.
10031118
}
1004-
?>

docs/structure.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ The PHP Manual sources are stored in Git repositories.
55
To checkout the PHP Manual sources, follow the steps in [Setting up a documentation environment](local-setup.md)
66

77
## File structure
8+
89
**Note for translators:** if any of the source files don't exist in your translation, the English content will be used
910
during the building process. This means that you *must not* place untranslated files in your translation tree. Otherwise,
1011
it will lead to a mess, confusion and may break some tools.
@@ -42,3 +43,19 @@ There are some other important files:
4243
Including common warnings, notes, etc.
4344
- *translation.xml* - this file is used to store all central translation info, like a small
4445
intro text for translators and the persons list. This file is not present in the English tree.
46+
47+
## `xml:id` structure
48+
49+
The PHP is complex, and uses `xml:id` extensively. For chunking,
50+
linking and XInclude purposes. So some care is necessary to avoid
51+
collisions. There are two pseudo-types of IDs used in manuals.
52+
53+
* **Structural IDs:** IDs that are present on structural elements of
54+
DocBook XML (like `<chapter>`, `<section>` and so on);
55+
56+
* **XInclude IDs:** IDs that are used as target of XIncludes.
57+
58+
Structural IDs are in the pattern `id.id`, while XInclude IDs use the
59+
pattern `structural.id..local.name`. That is, Structural IDs, the
60+
name parts are separated with a single dot, while XInclude IDs start
61+
with an Structural ID, an `..` separator, and a local path suffix.

0 commit comments

Comments
 (0)