@@ -840,9 +840,6 @@ ensure_unicode(PyObject *obj)
840840
841841/* --- Unicode Object ----------------------------------------------------- */
842842
843- static PyObject *
844- fixup (PyObject * self , Py_UCS4 (* fixfct )(PyObject * s ));
845-
846843static inline Py_ssize_t
847844findchar (const void * s , int kind ,
848845 Py_ssize_t size , Py_UCS4 ch ,
@@ -9062,42 +9059,6 @@ PyUnicode_Translate(PyObject *str,
90629059 return _PyUnicode_TranslateCharmap (str , mapping , errors );
90639060}
90649061
9065- static Py_UCS4
9066- fix_decimal_and_space_to_ascii (PyObject * self )
9067- {
9068- /* No need to call PyUnicode_READY(self) because this function is only
9069- called as a callback from fixup() which does it already. */
9070- const Py_ssize_t len = PyUnicode_GET_LENGTH (self );
9071- const int kind = PyUnicode_KIND (self );
9072- void * data = PyUnicode_DATA (self );
9073- Py_UCS4 maxchar = 127 , ch , fixed ;
9074- int modified = 0 ;
9075- Py_ssize_t i ;
9076-
9077- for (i = 0 ; i < len ; ++ i ) {
9078- ch = PyUnicode_READ (kind , data , i );
9079- fixed = 0 ;
9080- if (ch > 127 ) {
9081- if (Py_UNICODE_ISSPACE (ch ))
9082- fixed = ' ' ;
9083- else {
9084- const int decimal = Py_UNICODE_TODECIMAL (ch );
9085- if (decimal >= 0 )
9086- fixed = '0' + decimal ;
9087- }
9088- if (fixed != 0 ) {
9089- modified = 1 ;
9090- maxchar = Py_MAX (maxchar , fixed );
9091- PyUnicode_WRITE (kind , data , i , fixed );
9092- }
9093- else
9094- maxchar = Py_MAX (maxchar , ch );
9095- }
9096- }
9097-
9098- return (modified ) ? maxchar : 0 ;
9099- }
9100-
91019062PyObject *
91029063_PyUnicode_TransformDecimalAndSpaceToASCII (PyObject * unicode )
91039064{
@@ -9107,12 +9068,42 @@ _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode)
91079068 }
91089069 if (PyUnicode_READY (unicode ) == -1 )
91099070 return NULL ;
9110- if (PyUnicode_MAX_CHAR_VALUE (unicode ) <= 127 ) {
9071+ if (PyUnicode_IS_ASCII (unicode )) {
91119072 /* If the string is already ASCII, just return the same string */
91129073 Py_INCREF (unicode );
91139074 return unicode ;
91149075 }
9115- return fixup (unicode , fix_decimal_and_space_to_ascii );
9076+
9077+ Py_ssize_t len = PyUnicode_GET_LENGTH (unicode );
9078+ PyObject * result = PyUnicode_New (len , 127 );
9079+ if (result == NULL ) {
9080+ return NULL ;
9081+ }
9082+
9083+ Py_UCS1 * out = PyUnicode_1BYTE_DATA (result );
9084+ int kind = PyUnicode_KIND (unicode );
9085+ const void * data = PyUnicode_DATA (unicode );
9086+ Py_ssize_t i ;
9087+ for (i = 0 ; i < len ; ++ i ) {
9088+ Py_UCS4 ch = PyUnicode_READ (kind , data , i );
9089+ if (ch < 127 ) {
9090+ out [i ] = ch ;
9091+ }
9092+ else if (Py_UNICODE_ISSPACE (ch )) {
9093+ out [i ] = ' ' ;
9094+ }
9095+ else {
9096+ int decimal = Py_UNICODE_TODECIMAL (ch );
9097+ if (decimal < 0 ) {
9098+ out [i ] = '?' ;
9099+ _PyUnicode_LENGTH (result ) = i + 1 ;
9100+ break ;
9101+ }
9102+ out [i ] = '0' + decimal ;
9103+ }
9104+ }
9105+
9106+ return result ;
91169107}
91179108
91189109PyObject *
@@ -9588,69 +9579,6 @@ PyUnicode_Tailmatch(PyObject *str,
95889579 return tailmatch (str , substr , start , end , direction );
95899580}
95909581
9591- /* Apply fixfct filter to the Unicode object self and return a
9592- reference to the modified object */
9593-
9594- static PyObject *
9595- fixup (PyObject * self ,
9596- Py_UCS4 (* fixfct )(PyObject * s ))
9597- {
9598- PyObject * u ;
9599- Py_UCS4 maxchar_old , maxchar_new = 0 ;
9600- PyObject * v ;
9601-
9602- u = _PyUnicode_Copy (self );
9603- if (u == NULL )
9604- return NULL ;
9605- maxchar_old = PyUnicode_MAX_CHAR_VALUE (u );
9606-
9607- /* fix functions return the new maximum character in a string,
9608- if the kind of the resulting unicode object does not change,
9609- everything is fine. Otherwise we need to change the string kind
9610- and re-run the fix function. */
9611- maxchar_new = fixfct (u );
9612-
9613- if (maxchar_new == 0 ) {
9614- /* no changes */ ;
9615- if (PyUnicode_CheckExact (self )) {
9616- Py_DECREF (u );
9617- Py_INCREF (self );
9618- return self ;
9619- }
9620- else
9621- return u ;
9622- }
9623-
9624- maxchar_new = align_maxchar (maxchar_new );
9625-
9626- if (maxchar_new == maxchar_old )
9627- return u ;
9628-
9629- /* In case the maximum character changed, we need to
9630- convert the string to the new category. */
9631- v = PyUnicode_New (PyUnicode_GET_LENGTH (self ), maxchar_new );
9632- if (v == NULL ) {
9633- Py_DECREF (u );
9634- return NULL ;
9635- }
9636- if (maxchar_new > maxchar_old ) {
9637- /* If the maxchar increased so that the kind changed, not all
9638- characters are representable anymore and we need to fix the
9639- string again. This only happens in very few cases. */
9640- _PyUnicode_FastCopyCharacters (v , 0 ,
9641- self , 0 , PyUnicode_GET_LENGTH (self ));
9642- maxchar_old = fixfct (v );
9643- assert (maxchar_old > 0 && maxchar_old <= maxchar_new );
9644- }
9645- else {
9646- _PyUnicode_FastCopyCharacters (v , 0 ,
9647- u , 0 , PyUnicode_GET_LENGTH (self ));
9648- }
9649- Py_DECREF (u );
9650- assert (_PyUnicode_CheckConsistency (v , 1 ));
9651- return v ;
9652- }
9653-
96549582static PyObject *
96559583ascii_upper_or_lower (PyObject * self , int lower )
96569584{
0 commit comments