@@ -864,108 +864,107 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
864864
865865PyObject * PyCodec_BackslashReplaceErrors (PyObject * exc )
866866{
867- PyObject * object ;
868- Py_ssize_t i ;
869- Py_ssize_t start ;
870- Py_ssize_t end ;
871- PyObject * res ;
872- Py_UCS1 * outp ;
873- int ressize ;
874- Py_UCS4 c ;
875-
867+ PyObject * obj ;
868+ Py_ssize_t objlen , start , end , slen ;
876869 if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
877- const unsigned char * p ;
878- if (PyUnicodeDecodeError_GetStart (exc , & start ))
879- return NULL ;
880- if (PyUnicodeDecodeError_GetEnd (exc , & end ))
881- return NULL ;
882- if (!(object = PyUnicodeDecodeError_GetObject (exc )))
870+ if (_PyUnicodeError_GetParams (exc ,
871+ & obj , & objlen ,
872+ & start , & end , & slen , true) < 0 )
873+ {
883874 return NULL ;
884- p = ( const unsigned char * ) PyBytes_AS_STRING ( object );
885- res = PyUnicode_New (4 * ( end - start ) , 127 );
875+ }
876+ PyObject * res = PyUnicode_New (4 * slen , 127 );
886877 if (res == NULL ) {
887- Py_DECREF (object );
878+ Py_DECREF (obj );
888879 return NULL ;
889880 }
890- outp = PyUnicode_1BYTE_DATA (res );
891- for (i = start ; i < end ; i ++ , outp += 4 ) {
892- unsigned char c = p [i ];
881+ Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
882+ const unsigned char * p = (const unsigned char * )PyBytes_AS_STRING (obj );
883+ for (Py_ssize_t i = start ; i < end ; i ++ , outp += 4 ) {
884+ const unsigned char ch = p [i ];
893885 outp [0 ] = '\\' ;
894886 outp [1 ] = 'x' ;
895- outp [2 ] = Py_hexdigits [(c >> 4 ) & 0xf ];
896- outp [3 ] = Py_hexdigits [c & 0xf ];
887+ outp [2 ] = Py_hexdigits [(ch >> 4 ) & 0xf ];
888+ outp [3 ] = Py_hexdigits [ch & 0xf ];
897889 }
898-
899890 assert (_PyUnicode_CheckConsistency (res , 1 ));
900- Py_DECREF (object );
891+ Py_DECREF (obj );
901892 return Py_BuildValue ("(Nn)" , res , end );
902893 }
903- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
904- if (PyUnicodeEncodeError_GetStart (exc , & start ))
905- return NULL ;
906- if (PyUnicodeEncodeError_GetEnd (exc , & end ))
907- return NULL ;
908- if (!(object = PyUnicodeEncodeError_GetObject (exc )))
909- return NULL ;
910- }
911- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeTranslateError )) {
912- if (PyUnicodeTranslateError_GetStart (exc , & start ))
913- return NULL ;
914- if (PyUnicodeTranslateError_GetEnd (exc , & end ))
915- return NULL ;
916- if (!(object = PyUnicodeTranslateError_GetObject (exc )))
894+
895+ if (
896+ PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )
897+ || PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeTranslateError )
898+ ) {
899+ if (_PyUnicodeError_GetParams (exc ,
900+ & obj , & objlen ,
901+ & start , & end , & slen , false) < 0 )
902+ {
917903 return NULL ;
904+ }
918905 }
919906 else {
920907 wrong_exception_type (exc );
921908 return NULL ;
922909 }
923910
924- if (end - start > PY_SSIZE_T_MAX / (1 + 1 + 8 ))
925- end = start + PY_SSIZE_T_MAX / (1 + 1 + 8 );
926- for (i = start , ressize = 0 ; i < end ; ++ i ) {
911+ // The number of characters that each character 'ch' contributes
912+ // in the result is 1 + 1 + k, where k >= min{t >= 1 | 16^t > ch}
913+ // and will be formatted as "\\" + ('U'|'u'|'x') + HEXDIGITS,
914+ // where the number of hexdigits is either 2, 4, or 8 (not 6).
915+ // Since the Unicode range is below 10^7, we choose k = 8 whence
916+ // each "block" requires at most 1 + 1 + 8 characters.
917+ if (slen > PY_SSIZE_T_MAX / (1 + 1 + 8 )) {
918+ end = start + PY_SSIZE_T_MAX / (1 + 1 + 8 );
919+ end = Py_MIN (end , objlen );
920+ slen = Py_MAX (0 , end - start );
921+ }
922+
923+ Py_ssize_t ressize = 0 ;
924+ for (Py_ssize_t i = start ; i < end ; ++ i ) {
927925 /* object is guaranteed to be "ready" */
928- c = PyUnicode_READ_CHAR (object , i );
926+ Py_UCS4 c = PyUnicode_READ_CHAR (obj , i );
929927 if (c >= 0x10000 ) {
930- ressize += 1 + 1 + 8 ;
928+ ressize += 1 + 1 + 8 ;
931929 }
932930 else if (c >= 0x100 ) {
933- ressize += 1 + 1 + 4 ;
931+ ressize += 1 + 1 + 4 ;
932+ }
933+ else {
934+ ressize += 1 + 1 + 2 ;
934935 }
935- else
936- ressize += 1 + 1 + 2 ;
937936 }
938- res = PyUnicode_New (ressize , 127 );
937+ PyObject * res = PyUnicode_New (ressize , 127 );
939938 if (res == NULL ) {
940- Py_DECREF (object );
939+ Py_DECREF (obj );
941940 return NULL ;
942941 }
943- outp = PyUnicode_1BYTE_DATA (res );
944- for (i = start ; i < end ; ++ i ) {
945- c = PyUnicode_READ_CHAR (object , i );
942+ Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
943+ for (Py_ssize_t i = start ; i < end ; ++ i ) {
944+ Py_UCS4 c = PyUnicode_READ_CHAR (obj , i );
946945 * outp ++ = '\\' ;
947946 if (c >= 0x00010000 ) {
948947 * outp ++ = 'U' ;
949- * outp ++ = Py_hexdigits [(c >> 28 )& 0xf ];
950- * outp ++ = Py_hexdigits [(c >> 24 )& 0xf ];
951- * outp ++ = Py_hexdigits [(c >> 20 )& 0xf ];
952- * outp ++ = Py_hexdigits [(c >> 16 )& 0xf ];
953- * outp ++ = Py_hexdigits [(c >> 12 )& 0xf ];
954- * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
948+ * outp ++ = Py_hexdigits [(c >> 28 ) & 0xf ];
949+ * outp ++ = Py_hexdigits [(c >> 24 ) & 0xf ];
950+ * outp ++ = Py_hexdigits [(c >> 20 ) & 0xf ];
951+ * outp ++ = Py_hexdigits [(c >> 16 ) & 0xf ];
952+ * outp ++ = Py_hexdigits [(c >> 12 ) & 0xf ];
953+ * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
955954 }
956955 else if (c >= 0x100 ) {
957956 * outp ++ = 'u' ;
958- * outp ++ = Py_hexdigits [(c >> 12 )& 0xf ];
959- * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
957+ * outp ++ = Py_hexdigits [(c >> 12 ) & 0xf ];
958+ * outp ++ = Py_hexdigits [(c >> 8 ) & 0xf ];
960959 }
961- else
960+ else {
962961 * outp ++ = 'x' ;
963- * outp ++ = Py_hexdigits [(c >>4 )& 0xf ];
964- * outp ++ = Py_hexdigits [c & 0xf ];
962+ }
963+ * outp ++ = Py_hexdigits [(c >> 4 ) & 0xf ];
964+ * outp ++ = Py_hexdigits [c & 0xf ];
965965 }
966-
967966 assert (_PyUnicode_CheckConsistency (res , 1 ));
968- Py_DECREF (object );
967+ Py_DECREF (obj );
969968 return Py_BuildValue ("(Nn)" , res , end );
970969}
971970
0 commit comments