@@ -1359,76 +1359,91 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
13591359}
13601360
13611361
1362+ // --- handler: 'surrogateescape' ---------------------------------------------
1363+
13621364static PyObject *
1363- PyCodec_SurrogateEscapeErrors (PyObject * exc )
1365+ _PyCodec_SurrogateEscapeUnicodeEncodeError (PyObject * exc )
13641366{
1365- PyObject * restuple ;
1366- PyObject * object ;
1367- Py_ssize_t i ;
1368- Py_ssize_t start ;
1369- Py_ssize_t end ;
1370- PyObject * res ;
1367+ PyObject * obj ;
1368+ Py_ssize_t start , end , slen ;
1369+ if (_PyUnicodeError_GetParams (exc ,
1370+ & obj , NULL ,
1371+ & start , & end , & slen , false) < 0 )
1372+ {
1373+ return NULL ;
1374+ }
13711375
1372- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
1373- char * outp ;
1374- if (PyUnicodeEncodeError_GetStart (exc , & start ))
1375- return NULL ;
1376- if (PyUnicodeEncodeError_GetEnd (exc , & end ))
1377- return NULL ;
1378- if (!(object = PyUnicodeEncodeError_GetObject (exc )))
1379- return NULL ;
1380- res = PyBytes_FromStringAndSize (NULL , end - start );
1381- if (!res ) {
1382- Py_DECREF (object );
1383- return NULL ;
1384- }
1385- outp = PyBytes_AsString (res );
1386- for (i = start ; i < end ; i ++ ) {
1387- /* object is guaranteed to be "ready" */
1388- Py_UCS4 ch = PyUnicode_READ_CHAR (object , i );
1389- if (ch < 0xdc80 || ch > 0xdcff ) {
1390- /* Not a UTF-8b surrogate, fail with original exception */
1391- PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1392- Py_DECREF (res );
1393- Py_DECREF (object );
1394- return NULL ;
1395- }
1396- * outp ++ = ch - 0xdc00 ;
1397- }
1398- restuple = Py_BuildValue ("(On)" , res , end );
1399- Py_DECREF (res );
1400- Py_DECREF (object );
1401- return restuple ;
1376+ PyObject * res = PyBytes_FromStringAndSize (NULL , slen );
1377+ if (res == NULL ) {
1378+ Py_DECREF (obj );
1379+ return NULL ;
14021380 }
1403- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
1404- PyObject * str ;
1405- const unsigned char * p ;
1406- Py_UCS2 ch [4 ]; /* decode up to 4 bad bytes. */
1407- int consumed = 0 ;
1408- if (PyUnicodeDecodeError_GetStart (exc , & start ))
1409- return NULL ;
1410- if (PyUnicodeDecodeError_GetEnd (exc , & end ))
1411- return NULL ;
1412- if (!(object = PyUnicodeDecodeError_GetObject (exc )))
1413- return NULL ;
1414- p = (const unsigned char * )PyBytes_AS_STRING (object );
1415- while (consumed < 4 && consumed < end - start ) {
1416- /* Refuse to escape ASCII bytes. */
1417- if (p [start + consumed ] < 128 )
1418- break ;
1419- ch [consumed ] = 0xdc00 + p [start + consumed ];
1420- consumed ++ ;
1421- }
1422- Py_DECREF (object );
1423- if (!consumed ) {
1424- /* codec complained about ASCII byte. */
1381+
1382+ char * outp = PyBytes_AsString (res );
1383+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
1384+ Py_UCS4 ch = PyUnicode_READ_CHAR (obj , i );
1385+ if (ch < 0xdc80 || ch > 0xdcff ) {
1386+ /* Not a UTF-8b surrogate, fail with original exception. */
1387+ Py_DECREF (obj );
1388+ Py_DECREF (res );
14251389 PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
14261390 return NULL ;
14271391 }
1428- str = PyUnicode_FromKindAndData (PyUnicode_2BYTE_KIND , ch , consumed );
1429- if (str == NULL )
1430- return NULL ;
1431- return Py_BuildValue ("(Nn)" , str , start + consumed );
1392+ * outp ++ = ch - 0xdc00 ;
1393+ }
1394+ Py_DECREF (obj );
1395+
1396+ return Py_BuildValue ("(Nn)" , res , end );
1397+ }
1398+
1399+
1400+ static PyObject *
1401+ _PyCodec_SurrogateEscapeUnicodeDecodeError (PyObject * exc )
1402+ {
1403+ PyObject * obj ;
1404+ Py_ssize_t start , end , slen ;
1405+ if (_PyUnicodeError_GetParams (exc ,
1406+ & obj , NULL ,
1407+ & start , & end , & slen , true) < 0 )
1408+ {
1409+ return NULL ;
1410+ }
1411+
1412+ Py_UCS2 ch [4 ]; /* decode up to 4 bad bytes. */
1413+ int consumed = 0 ;
1414+ const unsigned char * p = (const unsigned char * )PyBytes_AS_STRING (obj );
1415+ while (consumed < 4 && consumed < slen ) {
1416+ /* Refuse to escape ASCII bytes. */
1417+ if (p [start + consumed ] < 128 ) {
1418+ break ;
1419+ }
1420+ ch [consumed ] = 0xdc00 + p [start + consumed ];
1421+ consumed ++ ;
1422+ }
1423+ Py_DECREF (obj );
1424+
1425+ if (consumed == 0 ) {
1426+ /* Codec complained about ASCII byte. */
1427+ PyErr_SetObject (PyExceptionInstance_Class (exc ), exc );
1428+ return NULL ;
1429+ }
1430+
1431+ PyObject * str = PyUnicode_FromKindAndData (PyUnicode_2BYTE_KIND , ch , consumed );
1432+ if (str == NULL ) {
1433+ return NULL ;
1434+ }
1435+ return Py_BuildValue ("(Nn)" , str , start + consumed );
1436+ }
1437+
1438+
1439+ static PyObject *
1440+ PyCodec_SurrogateEscapeErrors (PyObject * exc )
1441+ {
1442+ if (_PyIsUnicodeEncodeError (exc )) {
1443+ return _PyCodec_SurrogateEscapeUnicodeEncodeError (exc );
1444+ }
1445+ else if (_PyIsUnicodeDecodeError (exc )) {
1446+ return _PyCodec_SurrogateEscapeUnicodeDecodeError (exc );
14321447 }
14331448 else {
14341449 wrong_exception_type (exc );
@@ -1485,11 +1500,13 @@ surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14851500}
14861501
14871502
1488- static PyObject * surrogateescape_errors (PyObject * self , PyObject * exc )
1503+ static inline PyObject *
1504+ surrogateescape_errors (PyObject * Py_UNUSED (self ), PyObject * exc )
14891505{
14901506 return PyCodec_SurrogateEscapeErrors (exc );
14911507}
14921508
1509+
14931510PyStatus
14941511_PyCodec_InitRegistry (PyInterpreterState * interp )
14951512{
0 commit comments