@@ -14270,6 +14270,163 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1427014270 return Py_BuildValue ("(N)" , copy );
1427114271}
1427214272
14273+ /*
14274+ This function searchs the longest common leading whitespace
14275+ of all lines in the [src, end).
14276+ It returns the length of the common leading whitespace and sets `output` to
14277+ point to the beginning of the common leading whitespace if length > 0.
14278+ */
14279+ static Py_ssize_t
14280+ search_longest_common_leading_whitespace (
14281+ const char * const src ,
14282+ const char * const end ,
14283+ const char * * output )
14284+ {
14285+ // [_start, _start + _len)
14286+ // describes the current longest common leading whitespace
14287+ const char * _start = NULL ;
14288+ Py_ssize_t _len = 0 ;
14289+
14290+ for (const char * iter = src ; iter < end ; ++ iter ) {
14291+ const char * line_start = iter ;
14292+ const char * leading_whitespace_end = NULL ;
14293+
14294+ // scan the whole line
14295+ while (iter < end && * iter != '\n' ) {
14296+ if (!leading_whitespace_end && * iter != ' ' && * iter != '\t' ) {
14297+ /* `iter` points to the first non-whitespace character
14298+ in this line */
14299+ if (iter == line_start ) {
14300+ // some line has no indent, fast exit!
14301+ return 0 ;
14302+ }
14303+ leading_whitespace_end = iter ;
14304+ }
14305+ ++ iter ;
14306+ }
14307+
14308+ // if this line has all white space, skip it
14309+ if (!leading_whitespace_end ) {
14310+ continue ;
14311+ }
14312+
14313+ if (!_start ) {
14314+ // update the first leading whitespace
14315+ _start = line_start ;
14316+ _len = leading_whitespace_end - line_start ;
14317+ assert (_len > 0 );
14318+ }
14319+ else {
14320+ /* We then compare with the current longest leading whitespace.
14321+
14322+ [line_start, leading_whitespace_end) is the leading
14323+ whitespace of this line,
14324+
14325+ [_start, _start + _len) is the leading whitespace of the
14326+ current longest leading whitespace. */
14327+ Py_ssize_t new_len = 0 ;
14328+ const char * _iter = _start , * line_iter = line_start ;
14329+
14330+ while (_iter < _start + _len && line_iter < leading_whitespace_end
14331+ && * _iter == * line_iter )
14332+ {
14333+ ++ _iter ;
14334+ ++ line_iter ;
14335+ ++ new_len ;
14336+ }
14337+
14338+ _len = new_len ;
14339+ if (_len == 0 ) {
14340+ // No common things now, fast exit!
14341+ return 0 ;
14342+ }
14343+ }
14344+ }
14345+
14346+ assert (_len >= 0 );
14347+ if (_len > 0 ) {
14348+ * output = _start ;
14349+ }
14350+ return _len ;
14351+ }
14352+
14353+ /* Dedent a string.
14354+ Behaviour is expected to be an exact match of `textwrap.dedent`.
14355+ Return a new reference on success, NULL with exception set on error.
14356+ */
14357+ PyObject *
14358+ _PyUnicode_Dedent (PyObject * unicode )
14359+ {
14360+ Py_ssize_t src_len = 0 ;
14361+ const char * src = PyUnicode_AsUTF8AndSize (unicode , & src_len );
14362+ if (!src ) {
14363+ return NULL ;
14364+ }
14365+ assert (src_len >= 0 );
14366+ if (src_len == 0 ) {
14367+ return Py_NewRef (unicode );
14368+ }
14369+
14370+ const char * const end = src + src_len ;
14371+
14372+ // [whitespace_start, whitespace_start + whitespace_len)
14373+ // describes the current longest common leading whitespace
14374+ const char * whitespace_start = NULL ;
14375+ Py_ssize_t whitespace_len = search_longest_common_leading_whitespace (
14376+ src , end , & whitespace_start );
14377+
14378+ if (whitespace_len == 0 ) {
14379+ return Py_NewRef (unicode );
14380+ }
14381+
14382+ // now we should trigger a dedent
14383+ char * dest = PyMem_Malloc (src_len );
14384+ if (!dest ) {
14385+ PyErr_NoMemory ();
14386+ return NULL ;
14387+ }
14388+ char * dest_iter = dest ;
14389+
14390+ for (const char * iter = src ; iter < end ; ++ iter ) {
14391+ const char * line_start = iter ;
14392+ bool in_leading_space = true;
14393+
14394+ // iterate over a line to find the end of a line
14395+ while (iter < end && * iter != '\n' ) {
14396+ if (in_leading_space && * iter != ' ' && * iter != '\t' ) {
14397+ in_leading_space = false;
14398+ }
14399+ ++ iter ;
14400+ }
14401+
14402+ // invariant: *iter == '\n' or iter == end
14403+ bool append_newline = iter < end ;
14404+
14405+ // if this line has all white space, write '\n' and continue
14406+ if (in_leading_space && append_newline ) {
14407+ * dest_iter ++ = '\n' ;
14408+ continue ;
14409+ }
14410+
14411+ /* copy [new_line_start + whitespace_len, iter) to buffer, then
14412+ conditionally append '\n' */
14413+
14414+ Py_ssize_t new_line_len = iter - line_start - whitespace_len ;
14415+ assert (new_line_len >= 0 );
14416+ memcpy (dest_iter , line_start + whitespace_len , new_line_len );
14417+
14418+ dest_iter += new_line_len ;
14419+
14420+ if (append_newline ) {
14421+ * dest_iter ++ = '\n' ;
14422+ }
14423+ }
14424+
14425+ PyObject * res = PyUnicode_FromStringAndSize (dest , dest_iter - dest );
14426+ PyMem_Free (dest );
14427+ return res ;
14428+ }
14429+
1427314430static PyMethodDef unicode_methods [] = {
1427414431 UNICODE_ENCODE_METHODDEF
1427514432 UNICODE_REPLACE_METHODDEF
0 commit comments