1- /* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6 +)
1+ /* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7 +)
22 __ __ _
33 ___\ \/ /_ __ __ _| |_
44 / _ \\ /| '_ \ / _` | __|
3434 Copyright (c) 2019 Vadim Zeitlin <[email protected] > 3535 Copyright (c) 2021 Dong-hee Na <[email protected] > 3636 Copyright (c) 2022 Samanta Navarro <[email protected] > 37+ Copyright (c) 2022 Jeffrey Walton <[email protected] > 3738 Licensed under the MIT license:
3839
3940 Permission is hereby granted, free of charge, to any person obtaining
133134 * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
134135 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
135136 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
136- * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
137+ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
137138 * Windows >=Vista (rand_s): _WIN32. \
138139 \
139140 If insist on not using any of these, bypass this error by defining \
@@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
722723 return XML_ParserCreate_MM (encodingName , NULL , tmp );
723724}
724725
726+ // "xml=http://www.w3.org/XML/1998/namespace"
725727static const XML_Char implicitContext []
726728 = {ASCII_x , ASCII_m , ASCII_l , ASCII_EQUALS , ASCII_h ,
727729 ASCII_t , ASCII_t , ASCII_p , ASCII_COLON , ASCII_SLASH ,
@@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
37043706 return XML_ERROR_NONE ;
37053707}
37063708
3709+ static XML_Bool
3710+ is_rfc3986_uri_char (XML_Char candidate ) {
3711+ // For the RFC 3986 ANBF grammar see
3712+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3713+
3714+ switch (candidate ) {
3715+ // From rule "ALPHA" (uppercase half)
3716+ case 'A' :
3717+ case 'B' :
3718+ case 'C' :
3719+ case 'D' :
3720+ case 'E' :
3721+ case 'F' :
3722+ case 'G' :
3723+ case 'H' :
3724+ case 'I' :
3725+ case 'J' :
3726+ case 'K' :
3727+ case 'L' :
3728+ case 'M' :
3729+ case 'N' :
3730+ case 'O' :
3731+ case 'P' :
3732+ case 'Q' :
3733+ case 'R' :
3734+ case 'S' :
3735+ case 'T' :
3736+ case 'U' :
3737+ case 'V' :
3738+ case 'W' :
3739+ case 'X' :
3740+ case 'Y' :
3741+ case 'Z' :
3742+
3743+ // From rule "ALPHA" (lowercase half)
3744+ case 'a' :
3745+ case 'b' :
3746+ case 'c' :
3747+ case 'd' :
3748+ case 'e' :
3749+ case 'f' :
3750+ case 'g' :
3751+ case 'h' :
3752+ case 'i' :
3753+ case 'j' :
3754+ case 'k' :
3755+ case 'l' :
3756+ case 'm' :
3757+ case 'n' :
3758+ case 'o' :
3759+ case 'p' :
3760+ case 'q' :
3761+ case 'r' :
3762+ case 's' :
3763+ case 't' :
3764+ case 'u' :
3765+ case 'v' :
3766+ case 'w' :
3767+ case 'x' :
3768+ case 'y' :
3769+ case 'z' :
3770+
3771+ // From rule "DIGIT"
3772+ case '0' :
3773+ case '1' :
3774+ case '2' :
3775+ case '3' :
3776+ case '4' :
3777+ case '5' :
3778+ case '6' :
3779+ case '7' :
3780+ case '8' :
3781+ case '9' :
3782+
3783+ // From rule "pct-encoded"
3784+ case '%' :
3785+
3786+ // From rule "unreserved"
3787+ case '-' :
3788+ case '.' :
3789+ case '_' :
3790+ case '~' :
3791+
3792+ // From rule "gen-delims"
3793+ case ':' :
3794+ case '/' :
3795+ case '?' :
3796+ case '#' :
3797+ case '[' :
3798+ case ']' :
3799+ case '@' :
3800+
3801+ // From rule "sub-delims"
3802+ case '!' :
3803+ case '$' :
3804+ case '&' :
3805+ case '\'' :
3806+ case '(' :
3807+ case ')' :
3808+ case '*' :
3809+ case '+' :
3810+ case ',' :
3811+ case ';' :
3812+ case '=' :
3813+ return XML_TRUE ;
3814+
3815+ default :
3816+ return XML_FALSE ;
3817+ }
3818+ }
3819+
37073820/* addBinding() overwrites the value of prefix->binding without checking.
37083821 Therefore one must keep track of the old value outside of addBinding().
37093822*/
37103823static enum XML_Error
37113824addBinding (XML_Parser parser , PREFIX * prefix , const ATTRIBUTE_ID * attId ,
37123825 const XML_Char * uri , BINDING * * bindingsPtr ) {
3826+ // "http://www.w3.org/XML/1998/namespace"
37133827 static const XML_Char xmlNamespace []
37143828 = {ASCII_h , ASCII_t , ASCII_t , ASCII_p , ASCII_COLON ,
37153829 ASCII_SLASH , ASCII_SLASH , ASCII_w , ASCII_w , ASCII_w ,
@@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
37203834 ASCII_e , ASCII_s , ASCII_p , ASCII_a , ASCII_c ,
37213835 ASCII_e , '\0' };
37223836 static const int xmlLen = (int )sizeof (xmlNamespace ) / sizeof (XML_Char ) - 1 ;
3837+ // "http://www.w3.org/2000/xmlns/"
37233838 static const XML_Char xmlnsNamespace []
37243839 = {ASCII_h , ASCII_t , ASCII_t , ASCII_p , ASCII_COLON , ASCII_SLASH ,
37253840 ASCII_SLASH , ASCII_w , ASCII_w , ASCII_w , ASCII_PERIOD , ASCII_w ,
@@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
37603875 && (len > xmlnsLen || uri [len ] != xmlnsNamespace [len ]))
37613876 isXMLNS = XML_FALSE ;
37623877
3763- // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3764- // we have to at least make sure that the XML processor on top of
3765- // Expat (that is splitting tag names by namespace separator into
3766- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3767- // by an attacker putting additional namespace separator characters
3768- // into namespace declarations. That would be ambiguous and not to
3769- // be expected.
3770- if (parser -> m_ns && (uri [len ] == parser -> m_namespaceSeparator )) {
3878+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
3879+ // today (and is not REQUIRED to do so with regard to the XML 1.0
3880+ // namespaces specification) we have to at least make sure, that
3881+ // the application on top of Expat (that is likely splitting expanded
3882+ // element names ("qualified names") of form
3883+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3884+ // in its element handler code) cannot be confused by an attacker
3885+ // putting additional namespace separator characters into namespace
3886+ // declarations. That would be ambiguous and not to be expected.
3887+ //
3888+ // While the HTML API docs of function XML_ParserCreateNS have been
3889+ // advising against use of a namespace separator character that can
3890+ // appear in a URI for >20 years now, some widespread applications
3891+ // are using URI characters (':' (colon) in particular) for a
3892+ // namespace separator, in practice. To keep these applications
3893+ // functional, we only reject namespaces URIs containing the
3894+ // application-chosen namespace separator if the chosen separator
3895+ // is a non-URI character with regard to RFC 3986.
3896+ if (parser -> m_ns && (uri [len ] == parser -> m_namespaceSeparator )
3897+ && ! is_rfc3986_uri_char (uri [len ])) {
37713898 return XML_ERROR_SYNTAX ;
37723899 }
37733900 }
0 commit comments