@@ -2994,46 +2994,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
29942994static PyObject *
29952995UnicodeEncodeError_str (PyObject * self )
29962996{
2997- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
2997+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
29982998 PyObject * result = NULL ;
29992999 PyObject * reason_str = NULL ;
30003000 PyObject * encoding_str = NULL ;
30013001
3002- if (! uself -> object )
3002+ if (exc -> object == NULL ) {
30033003 /* Not properly initialized. */
30043004 return PyUnicode_FromString ("" );
3005+ }
30053006
30063007 /* Get reason and encoding as strings, which they might not be if
30073008 they've been modified after we were constructed. */
3008- reason_str = PyObject_Str (uself -> reason );
3009- if (reason_str == NULL )
3009+ reason_str = PyObject_Str (exc -> reason );
3010+ if (reason_str == NULL ) {
30103011 goto done ;
3011- encoding_str = PyObject_Str (uself -> encoding );
3012- if (encoding_str == NULL )
3012+ }
3013+ encoding_str = PyObject_Str (exc -> encoding );
3014+ if (encoding_str == NULL ) {
30133015 goto done ;
3016+ }
3017+
3018+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
3019+ Py_ssize_t start = exc -> start , end = exc -> end ;
30143020
3015- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3016- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
3021+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3022+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
30173023 const char * fmt ;
3018- if (badchar <= 0xff )
3024+ if (badchar <= 0xff ) {
30193025 fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U" ;
3020- else if (badchar <= 0xffff )
3026+ }
3027+ else if (badchar <= 0xffff ) {
30213028 fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U" ;
3022- else
3029+ }
3030+ else {
30233031 fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U" ;
3032+ }
30243033 result = PyUnicode_FromFormat (
30253034 fmt ,
30263035 encoding_str ,
30273036 (int )badchar ,
3028- uself -> start ,
3037+ start ,
30293038 reason_str );
30303039 }
30313040 else {
30323041 result = PyUnicode_FromFormat (
30333042 "'%U' codec can't encode characters in position %zd-%zd: %U" ,
30343043 encoding_str ,
3035- uself -> start ,
3036- uself -> end - 1 ,
3044+ start ,
3045+ end - 1 ,
30373046 reason_str );
30383047 }
30393048done :
@@ -3107,41 +3116,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
31073116static PyObject *
31083117UnicodeDecodeError_str (PyObject * self )
31093118{
3110- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3119+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
31113120 PyObject * result = NULL ;
31123121 PyObject * reason_str = NULL ;
31133122 PyObject * encoding_str = NULL ;
31143123
3115- if (! uself -> object )
3124+ if (exc -> object == NULL ) {
31163125 /* Not properly initialized. */
31173126 return PyUnicode_FromString ("" );
3127+ }
31183128
31193129 /* Get reason and encoding as strings, which they might not be if
31203130 they've been modified after we were constructed. */
3121- reason_str = PyObject_Str (uself -> reason );
3122- if (reason_str == NULL )
3131+ reason_str = PyObject_Str (exc -> reason );
3132+ if (reason_str == NULL ) {
31233133 goto done ;
3124- encoding_str = PyObject_Str (uself -> encoding );
3125- if (encoding_str == NULL )
3134+ }
3135+ encoding_str = PyObject_Str (exc -> encoding );
3136+ if (encoding_str == NULL ) {
31263137 goto done ;
3138+ }
3139+
3140+ Py_ssize_t len = PyBytes_GET_SIZE (exc -> object );
3141+ Py_ssize_t start = exc -> start , end = exc -> end ;
31273142
3128- if (uself -> start < PyBytes_GET_SIZE ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3129- int byte = (int )(PyBytes_AS_STRING ((( PyUnicodeErrorObject * ) self ) -> object )[uself -> start ]& 0xff );
3143+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3144+ int badbyte = (int )(PyBytes_AS_STRING (exc -> object )[start ] & 0xff );
31303145 result = PyUnicode_FromFormat (
31313146 "'%U' codec can't decode byte 0x%02x in position %zd: %U" ,
31323147 encoding_str ,
3133- byte ,
3134- uself -> start ,
3148+ badbyte ,
3149+ start ,
31353150 reason_str );
31363151 }
31373152 else {
31383153 result = PyUnicode_FromFormat (
31393154 "'%U' codec can't decode bytes in position %zd-%zd: %U" ,
31403155 encoding_str ,
3141- uself -> start ,
3142- uself -> end - 1 ,
3143- reason_str
3144- );
3156+ start ,
3157+ end - 1 ,
3158+ reason_str );
31453159 }
31463160done :
31473161 Py_XDECREF (reason_str );
@@ -3204,42 +3218,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
32043218static PyObject *
32053219UnicodeTranslateError_str (PyObject * self )
32063220{
3207- PyUnicodeErrorObject * uself = (PyUnicodeErrorObject * )self ;
3221+ PyUnicodeErrorObject * exc = (PyUnicodeErrorObject * )self ;
32083222 PyObject * result = NULL ;
32093223 PyObject * reason_str = NULL ;
32103224
3211- if (! uself -> object )
3225+ if (exc -> object == NULL ) {
32123226 /* Not properly initialized. */
32133227 return PyUnicode_FromString ("" );
3228+ }
32143229
32153230 /* Get reason as a string, which it might not be if it's been
32163231 modified after we were constructed. */
3217- reason_str = PyObject_Str (uself -> reason );
3218- if (reason_str == NULL )
3232+ reason_str = PyObject_Str (exc -> reason );
3233+ if (reason_str == NULL ) {
32193234 goto done ;
3235+ }
3236+
3237+ Py_ssize_t len = PyUnicode_GET_LENGTH (exc -> object );
3238+ Py_ssize_t start = exc -> start , end = exc -> end ;
32203239
3221- if (uself -> start < PyUnicode_GET_LENGTH ( uself -> object ) && uself -> end == uself -> start + 1 ) {
3222- Py_UCS4 badchar = PyUnicode_ReadChar (uself -> object , uself -> start );
3240+ if (( start >= 0 && start < len ) && ( end >= 0 && end <= len ) && end == start + 1 ) {
3241+ Py_UCS4 badchar = PyUnicode_ReadChar (exc -> object , start );
32233242 const char * fmt ;
3224- if (badchar <= 0xff )
3243+ if (badchar <= 0xff ) {
32253244 fmt = "can't translate character '\\x%02x' in position %zd: %U" ;
3226- else if (badchar <= 0xffff )
3245+ }
3246+ else if (badchar <= 0xffff ) {
32273247 fmt = "can't translate character '\\u%04x' in position %zd: %U" ;
3228- else
3248+ }
3249+ else {
32293250 fmt = "can't translate character '\\U%08x' in position %zd: %U" ;
3251+ }
32303252 result = PyUnicode_FromFormat (
32313253 fmt ,
32323254 (int )badchar ,
3233- uself -> start ,
3234- reason_str
3235- );
3236- } else {
3255+ start ,
3256+ reason_str );
3257+ }
3258+ else {
32373259 result = PyUnicode_FromFormat (
32383260 "can't translate characters in position %zd-%zd: %U" ,
3239- uself -> start ,
3240- uself -> end - 1 ,
3241- reason_str
3242- );
3261+ start ,
3262+ end - 1 ,
3263+ reason_str );
32433264 }
32443265done :
32453266 Py_XDECREF (reason_str );
0 commit comments