@@ -2961,46 +2961,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
29612961static PyObject *
29622962UnicodeEncodeError_str (PyObject * self )
29632963{
2964- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
2964+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
29652965 PyObject * result = NULL ;
29662966 PyObject * reason_str = NULL ;
29672967 PyObject * encoding_str = NULL ;
29682968
2969- if (! uself -> object )
2969+ if (exc -> object == NULL ) {
29702970 /* Not properly initialized. */
29712971 return PyUnicode_FromString ("" );
2972+ }
29722973
29732974 /* Get reason and encoding as strings, which they might not be if
29742975 they've been modified after we were constructed. */
2975- reason_str = PyObject_Str (uself -> reason );
2976- if (reason_str == NULL )
2976+ reason_str = PyObject_Str (exc -> reason );
2977+ if (reason_str == NULL ) {
29772978 goto done ;
2978- encoding_str = PyObject_Str (uself -> encoding );
2979- if (encoding_str == NULL )
2979+ }
2980+ encoding_str = PyObject_Str (exc -> encoding );
2981+ if (encoding_str == NULL ) {
29802982 goto done ;
2983+ }
2984+
2985+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
2986+ Py_ssize_t start = exc -> start , end = exc -> end ;
29812987
2982- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
2983- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
2988+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
2989+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
29842990 const char * fmt ;
2985- if (badchar <= 0xff )
2991+ if (badchar <= 0xff ) {
29862992 fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U" ;
2987- else if (badchar <= 0xffff )
2993+ }
2994+ else if (badchar <= 0xffff ) {
29882995 fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U" ;
2989- else
2996+ }
2997+ else {
29902998 fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U" ;
2999+ }
29913000 result = PyUnicode_FromFormat (
29923001 fmt ,
29933002 encoding_str ,
29943003 (int )badchar ,
2995- uself -> start ,
3004+ start ,
29963005 reason_str );
29973006 }
29983007 else {
29993008 result = PyUnicode_FromFormat (
30003009 "'%U' codec can't encode characters in position %zd-%zd: %U" ,
30013010 encoding_str ,
3002- uself -> start ,
3003- uself -> end - 1 ,
3011+ start ,
3012+ end - 1 ,
30043013 reason_str );
30053014 }
30063015done :
@@ -3074,41 +3083,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
30743083static PyObject *
30753084UnicodeDecodeError_str (PyObject * self )
30763085{
3077- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3086+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
30783087 PyObject * result = NULL ;
30793088 PyObject * reason_str = NULL ;
30803089 PyObject * encoding_str = NULL ;
30813090
3082- if (! uself -> object )
3091+ if (exc -> object == NULL ) {
30833092 /* Not properly initialized. */
30843093 return PyUnicode_FromString ("" );
3094+ }
30853095
30863096 /* Get reason and encoding as strings, which they might not be if
30873097 they've been modified after we were constructed. */
3088- reason_str = PyObject_Str (uself -> reason );
3089- if (reason_str == NULL )
3098+ reason_str = PyObject_Str (exc -> reason );
3099+ if (reason_str == NULL ) {
30903100 goto done ;
3091- encoding_str = PyObject_Str (uself -> encoding );
3092- if (encoding_str == NULL )
3101+ }
3102+ encoding_str = PyObject_Str (exc -> encoding );
3103+ if (encoding_str == NULL ) {
30933104 goto done ;
3105+ }
3106+
3107+ Py_ssize_t len = PyBytes_GET_SIZE (exc -> object );
3108+ Py_ssize_t start = exc -> start , end = exc -> end ;
30943109
3095- if (uself -> start < PyBytes_GET_SIZE ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3096- int byte = (int )(PyBytes_AS_STRING ((( PyUnicodeErrorObject * ) self ) -> object )[uself -> start ]& 0xff );
3110+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3111+ int badbyte = (int )(PyBytes_AS_STRING (exc -> object )[start ] & 0xff );
30973112 result = PyUnicode_FromFormat (
30983113 "'%U' codec can't decode byte 0x%02x in position %zd: %U" ,
30993114 encoding_str ,
3100- byte ,
3101- uself -> start ,
3115+ badbyte ,
3116+ start ,
31023117 reason_str );
31033118 }
31043119 else {
31053120 result = PyUnicode_FromFormat (
31063121 "'%U' codec can't decode bytes in position %zd-%zd: %U" ,
31073122 encoding_str ,
3108- uself -> start ,
3109- uself -> end - 1 ,
3110- reason_str
3111- );
3123+ start ,
3124+ end - 1 ,
3125+ reason_str );
31123126 }
31133127done :
31143128 Py_XDECREF (reason_str );
@@ -3171,42 +3185,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
31713185static PyObject *
31723186UnicodeTranslateError_str (PyObject * self )
31733187{
3174- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3188+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
31753189 PyObject * result = NULL ;
31763190 PyObject * reason_str = NULL ;
31773191
3178- if (! uself -> object )
3192+ if (exc -> object == NULL ) {
31793193 /* Not properly initialized. */
31803194 return PyUnicode_FromString ("" );
3195+ }
31813196
31823197 /* Get reason as a string, which it might not be if it's been
31833198 modified after we were constructed. */
3184- reason_str = PyObject_Str (uself -> reason );
3185- if (reason_str == NULL )
3199+ reason_str = PyObject_Str (exc -> reason );
3200+ if (reason_str == NULL ) {
31863201 goto done ;
3202+ }
3203+
3204+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
3205+ Py_ssize_t start = exc -> start , end = exc -> end ;
31873206
3188- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3189- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
3207+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3208+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
31903209 const char * fmt ;
3191- if (badchar <= 0xff )
3210+ if (badchar <= 0xff ) {
31923211 fmt = "can't translate character '\\x%02x' in position %zd: %U" ;
3193- else if (badchar <= 0xffff )
3212+ }
3213+ else if (badchar <= 0xffff ) {
31943214 fmt = "can't translate character '\\u%04x' in position %zd: %U" ;
3195- else
3215+ }
3216+ else {
31963217 fmt = "can't translate character '\\U%08x' in position %zd: %U" ;
3218+ }
31973219 result = PyUnicode_FromFormat (
31983220 fmt ,
31993221 (int )badchar ,
3200- uself -> start ,
3201- reason_str
3202- );
3203- } else {
3222+ start ,
3223+ reason_str );
3224+ }
3225+ else {
32043226 result = PyUnicode_FromFormat (
32053227 "can't translate characters in position %zd-%zd: %U" ,
3206- uself -> start ,
3207- uself -> end - 1 ,
3208- reason_str
3209- );
3228+ start ,
3229+ end - 1 ,
3230+ reason_str );
32103231 }
32113232done :
32123233 Py_XDECREF (reason_str );
0 commit comments