@@ -2959,46 +2959,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
29592959static PyObject *
29602960UnicodeEncodeError_str (PyObject * self )
29612961{
2962- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
2962+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
29632963 PyObject * result = NULL ;
29642964 PyObject * reason_str = NULL ;
29652965 PyObject * encoding_str = NULL ;
29662966
2967- if (! uself -> object )
2967+ if (exc -> object == NULL ) {
29682968 /* Not properly initialized. */
29692969 return PyUnicode_FromString ("" );
2970+ }
29702971
29712972 /* Get reason and encoding as strings, which they might not be if
29722973 they've been modified after we were constructed. */
2973- reason_str = PyObject_Str (uself -> reason );
2974- if (reason_str == NULL )
2974+ reason_str = PyObject_Str (exc -> reason );
2975+ if (reason_str == NULL ) {
29752976 goto done ;
2976- encoding_str = PyObject_Str (uself -> encoding );
2977- if (encoding_str == NULL )
2977+ }
2978+ encoding_str = PyObject_Str (exc -> encoding );
2979+ if (encoding_str == NULL ) {
29782980 goto done ;
2981+ }
2982+
2983+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
2984+ Py_ssize_t start = exc -> start , end = exc -> end ;
29792985
2980- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
2981- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
2986+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
2987+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
29822988 const char * fmt ;
2983- if (badchar <= 0xff )
2989+ if (badchar <= 0xff ) {
29842990 fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U" ;
2985- else if (badchar <= 0xffff )
2991+ }
2992+ else if (badchar <= 0xffff ) {
29862993 fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U" ;
2987- else
2994+ }
2995+ else {
29882996 fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U" ;
2997+ }
29892998 result = PyUnicode_FromFormat (
29902999 fmt ,
29913000 encoding_str ,
29923001 (int )badchar ,
2993- uself -> start ,
3002+ start ,
29943003 reason_str );
29953004 }
29963005 else {
29973006 result = PyUnicode_FromFormat (
29983007 "'%U' codec can't encode characters in position %zd-%zd: %U" ,
29993008 encoding_str ,
3000- uself -> start ,
3001- uself -> end - 1 ,
3009+ start ,
3010+ end - 1 ,
30023011 reason_str );
30033012 }
30043013done :
@@ -3072,41 +3081,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
30723081static PyObject *
30733082UnicodeDecodeError_str (PyObject * self )
30743083{
3075- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3084+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
30763085 PyObject * result = NULL ;
30773086 PyObject * reason_str = NULL ;
30783087 PyObject * encoding_str = NULL ;
30793088
3080- if (! uself -> object )
3089+ if (exc -> object == NULL ) {
30813090 /* Not properly initialized. */
30823091 return PyUnicode_FromString ("" );
3092+ }
30833093
30843094 /* Get reason and encoding as strings, which they might not be if
30853095 they've been modified after we were constructed. */
3086- reason_str = PyObject_Str (uself -> reason );
3087- if (reason_str == NULL )
3096+ reason_str = PyObject_Str (exc -> reason );
3097+ if (reason_str == NULL ) {
30883098 goto done ;
3089- encoding_str = PyObject_Str (uself -> encoding );
3090- if (encoding_str == NULL )
3099+ }
3100+ encoding_str = PyObject_Str (exc -> encoding );
3101+ if (encoding_str == NULL ) {
30913102 goto done ;
3103+ }
3104+
3105+ Py_ssize_t len = PyBytes_GET_SIZE (exc -> object );
3106+ Py_ssize_t start = exc -> start , end = exc -> end ;
30923107
3093- if (uself -> start < PyBytes_GET_SIZE ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3094- int byte = (int )(PyBytes_AS_STRING ((( PyUnicodeErrorObject * ) self ) -> object )[uself -> start ]& 0xff );
3108+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3109+ int badbyte = (int )(PyBytes_AS_STRING (exc -> object )[start ] & 0xff );
30953110 result = PyUnicode_FromFormat (
30963111 "'%U' codec can't decode byte 0x%02x in position %zd: %U" ,
30973112 encoding_str ,
3098- byte ,
3099- uself -> start ,
3113+ badbyte ,
3114+ start ,
31003115 reason_str );
31013116 }
31023117 else {
31033118 result = PyUnicode_FromFormat (
31043119 "'%U' codec can't decode bytes in position %zd-%zd: %U" ,
31053120 encoding_str ,
3106- uself -> start ,
3107- uself -> end - 1 ,
3108- reason_str
3109- );
3121+ start ,
3122+ end - 1 ,
3123+ reason_str );
31103124 }
31113125done :
31123126 Py_XDECREF (reason_str );
@@ -3169,42 +3183,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
31693183static PyObject *
31703184UnicodeTranslateError_str (PyObject * self )
31713185{
3172- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3186+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
31733187 PyObject * result = NULL ;
31743188 PyObject * reason_str = NULL ;
31753189
3176- if (! uself -> object )
3190+ if (exc -> object == NULL ) {
31773191 /* Not properly initialized. */
31783192 return PyUnicode_FromString ("" );
3193+ }
31793194
31803195 /* Get reason as a string, which it might not be if it's been
31813196 modified after we were constructed. */
3182- reason_str = PyObject_Str (uself -> reason );
3183- if (reason_str == NULL )
3197+ reason_str = PyObject_Str (exc -> reason );
3198+ if (reason_str == NULL ) {
31843199 goto done ;
3200+ }
3201+
3202+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
3203+ Py_ssize_t start = exc -> start , end = exc -> end ;
31853204
3186- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3187- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
3205+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3206+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
31883207 const char * fmt ;
3189- if (badchar <= 0xff )
3208+ if (badchar <= 0xff ) {
31903209 fmt = "can't translate character '\\x%02x' in position %zd: %U" ;
3191- else if (badchar <= 0xffff )
3210+ }
3211+ else if (badchar <= 0xffff ) {
31923212 fmt = "can't translate character '\\u%04x' in position %zd: %U" ;
3193- else
3213+ }
3214+ else {
31943215 fmt = "can't translate character '\\U%08x' in position %zd: %U" ;
3216+ }
31953217 result = PyUnicode_FromFormat (
31963218 fmt ,
31973219 (int )badchar ,
3198- uself -> start ,
3199- reason_str
3200- );
3201- } else {
3220+ start ,
3221+ reason_str );
3222+ }
3223+ else {
32023224 result = PyUnicode_FromFormat (
32033225 "can't translate characters in position %zd-%zd: %U" ,
3204- uself -> start ,
3205- uself -> end - 1 ,
3206- reason_str
3207- );
3226+ start ,
3227+ end - 1 ,
3228+ reason_str );
32083229 }
32093230done :
32103231 Py_XDECREF (reason_str );
0 commit comments