@@ -228,187 +228,149 @@ pymain_import_readline(const PyConfig *config)
228228 }
229229}
230230
231+ /* Strip common leading whitespace, just as textwrap.dedent.
232+ It stoles 1 reference from bytes if succeeded, else it will return NULL. */
233+ static PyObject * dedent_utf8_bytes (PyObject * bytes ) {
234+ if (bytes == NULL || !PyBytes_CheckExact (bytes )) {
235+ return NULL ;
236+ }
231237
232- /* Strip common leading whitespace utf encoded string */
233- static PyObject *
234- _utf_8_bytes_dedent (PyObject * bytes ){
235- char * input_data ;
238+ char * start ;
236239 Py_ssize_t nchars ;
237- bool curr_reading_whitespace = true;
238- int curr_num_leading_spaces = 0 ;
239- int curr_num_leading_tabs = 0 ;
240- int num_common_leading_chars ;
241- char c ;
242-
243- PyBytes_AsStringAndSize (bytes , & input_data , & nchars );
244-
245- int num_common_leading_spaces = nchars + 1 ;
246- int num_common_leading_tabs = nchars + 1 ;
247-
248- char * data_iter ;
249-
250- data_iter = input_data ;
251- while ( (c = * data_iter ++ ) ){
252- if (c == '\n' ) {
253- // Finished reading the line
254- if (!curr_reading_whitespace ) {
255- // If the line had some non-whitespace characters
256- // update the current common leading tab/space count
257- if (curr_num_leading_spaces < num_common_leading_spaces ) {
258- num_common_leading_spaces = curr_num_leading_spaces ;
259- }
260- if (curr_num_leading_tabs < num_common_leading_tabs ) {
261- num_common_leading_tabs = curr_num_leading_tabs ;
240+
241+ if (PyBytes_AsStringAndSize (bytes , & start , & nchars ) != 0 ) {
242+ return NULL ;
243+ }
244+
245+ char * end = start + nchars ;
246+ assert (start < end );
247+
248+ char * candidate_start = NULL ;
249+ Py_ssize_t candidate_len = 0 ;
250+
251+ for (char * iter = start ; iter < end ; ++ iter ) {
252+ char * line_start = iter ;
253+ char * leading_whitespace_end = NULL ;
254+
255+ // scan the whole line
256+ char c = 0 ;
257+ while (iter < end && (c = * iter ) != '\n' ) {
258+ if (!leading_whitespace_end && c != ' ' && c != '\t' ) {
259+ if (iter == line_start ) {
260+ // some line has no indent, fast exit!
261+ return bytes ;
262262 }
263+ leading_whitespace_end = iter ;
263264 }
264- // About to start reading a new line
265- curr_reading_whitespace = true;
266- curr_num_leading_spaces = 0 ;
267- curr_num_leading_tabs = 0 ;
268- }
269- else if (curr_reading_whitespace ){
270- if (c == ' ' && curr_num_leading_tabs == 0 ) {
271- curr_num_leading_spaces ++ ;
272- }
273- else if (c == '\t' && curr_num_leading_spaces == 0 ) {
274- curr_num_leading_tabs ++ ;
275- }
276- else {
277- // Encountered a non-whitespace character
278- curr_reading_whitespace = false;
279- }
265+ ++ iter ;
280266 }
281- }
282- if (num_common_leading_spaces > num_common_leading_tabs ){
283- num_common_leading_chars = num_common_leading_spaces ;
284- }
285- else {
286- num_common_leading_chars = num_common_leading_tabs ;
287- }
288-
289- if (num_common_leading_chars > 0 ) {
290- // We need to trigger a dedent
291- char * new_data ;
292- char * curr_line_ptr = input_data ;
293- char * next_line_ptr ;
294- char * new_start_loc ;
295- Py_ssize_t new_line_len ;
296- _PyBytesWriter writer ;
297- _PyBytesWriter_Init (& writer );
298- new_data = _PyBytesWriter_Alloc (& writer , nchars );
299- if (new_data == NULL ) {
300- return NULL ;
267+
268+ // we reach the end of a line
269+
270+ // if this line has all white space, skip it
271+ if (!leading_whitespace_end ) {
272+ continue ;
301273 }
302274
303- data_iter = input_data ;
304- c = * data_iter ;
305- while (c ) {
306- // Find the end of the current line.
307- while ( (c = * data_iter ++ ) != '\n' ){
308- if (c == NULL ) {
275+ if (!candidate_start ) {
276+ candidate_start = line_start ;
277+ candidate_len = leading_whitespace_end - line_start ;
278+ assert (candidate_len > 0 );
279+ } else {
280+ /* We then compare with the current longest leading whitespace.
281+
282+ [line_start, leading_whitespace_end) is the leading whitespace of
283+ this line,
284+
285+ [candidate_start, candidate_start + candidate_len)
286+ is the leading whitespace of the current longest leading
287+ whitespace. */
288+ Py_ssize_t new_candidate_len = 0 ;
289+
290+ for (char * candidate_iter = candidate_start ,
291+ * line_iter = line_start ;
292+ candidate_iter < candidate_start + candidate_len &&
293+ line_iter < leading_whitespace_end ;
294+ ++ candidate_iter , ++ line_iter ) {
295+ if (* candidate_iter != * line_iter ) {
309296 break ;
310297 }
298+ ++ new_candidate_len ;
311299 }
312- next_line_ptr = data_iter ;
313- Py_ssize_t line_len = next_line_ptr - curr_line_ptr ;
314- if (line_len > num_common_leading_chars ){
315- new_start_loc = curr_line_ptr + num_common_leading_chars ;
316- new_line_len = line_len - num_common_leading_chars ;
317- }
318- else {
319- new_start_loc = curr_line_ptr ;
320- new_line_len = line_len ;
300+
301+ candidate_len = new_candidate_len ;
302+ if (candidate_len == 0 ) {
303+ return bytes ;
321304 }
322- // Copy this line over to the new buffer (removing common
323- // leading chars)
324- new_data = _PyBytesWriter_WriteBytes (& writer , new_data , new_start_loc , new_line_len );
325- curr_line_ptr = next_line_ptr ;
326305 }
327- PyObject * new_bytes = _PyBytesWriter_Finish (& writer , new_data );
328- return new_bytes ;
329- }
330- else {
331- // No leading chars, no work to be done.
332- return bytes ;
333- }
334-
335- #if 0
336- // Allocate new data for the output as a copy of the input
337- PyBytesObject * new_bytes = PyBytes_FromStringAndSize (input_data , nchars );
338- if (new_bytes == NULL ) {
339- return NULL ;
340306 }
341- char * new_data = PyBytes_AsString (new_bytes );
342307
343- // Step 1: Find N = the common number leading whitespace chars
344- Py_ssize_t num_common_leading_spaces = nchars + 1 ;
308+ assert (candidate_len > 0 );
345309
346- // Count the number of leading spaces on each line
347- // Use the output array as a temporary buffer (we will repopulate it later)
348- char * line = strtok (new_data , "\n" );
349- while (line ) {
350- // Move the pointer up to the first non-space character
351- char * first_nonspace = line ;
352- while (* first_nonspace == ' ' ){
353- first_nonspace ++ ;
354- }
355- // Only check lines that contain non-whitespace characters
356- if (* first_nonspace != '\0' ) {
310+ // trigger a dedent
311+ char * p ;
312+ PyObject * new_bytes ;
313+ char * line_start ;
314+ Py_ssize_t new_line_len ;
315+ bool in_leading_space ;
316+ _PyBytesWriter writer ;
357317
358- Py_ssize_t num_leading_spaces = first_nonspace - line ;
359- if (num_leading_spaces < num_common_leading_spaces ) {
360- num_common_leading_spaces = num_leading_spaces ;
361- }
362- }
363- line = strtok (NULL , "\n" );
318+ _PyBytesWriter_Init (& writer );
319+ p = _PyBytesWriter_Alloc (& writer , nchars );
320+ if (p == NULL ) {
321+ goto error ;
364322 }
365323
366- char * end_ptr = input_data + nchars ;
367- char * curr_line_ptr = input_data ;
368- char * next_line_ptr ;
369- char * new_start_loc ;
370- Py_ssize_t new_line_len ;
324+ for (char * iter = start ; iter < end ; ++ iter ) {
325+ line_start = iter ;
371326
372- // Step 2: Remove N leading whitespace chars from each line by copying data
373- // (except leading spaces) from the input buffer to the output buffer one
374- // line at a time.
375-
376- char * curr_dst = new_data ;
377- while (curr_line_ptr < end_ptr ) {
378- // Find the end of the current line.
379- next_line_ptr = strstr (curr_line_ptr , "\n" );
380- if (next_line_ptr == NULL ) {
381- next_line_ptr = end_ptr ;
382- }
383- else {
384- next_line_ptr ++ ;
327+ // iterate over a line
328+ while (iter < end && * iter != '\n' ) {
329+ if (in_leading_space && * iter != ' ' && * iter != '\t' ) {
330+ in_leading_space = false;
331+ }
332+ ++ iter ;
385333 }
386334
387- Py_ssize_t line_len = next_line_ptr - curr_line_ptr ;
335+ // invariant: *iter == '\n' or iter == end
388336
389- if (line_len > num_common_leading_spaces ){
390- new_start_loc = curr_line_ptr + num_common_leading_spaces ;
391- new_line_len = line_len - num_common_leading_spaces ;
337+ // if this line has all white space, write '\n'
338+ if (in_leading_space ) {
339+ p = _PyBytesWriter_Prepare (& writer , p , 1 );
340+ if (p == NULL ) {
341+ goto error ;
342+ }
343+ * p ++ = '\n' ;
344+ continue ;
392345 }
393- else {
394- new_start_loc = curr_line_ptr ;
395- new_line_len = line_len ;
346+
347+ // copy [new_line_start + candidate_len, iter) to buffer, then append
348+ // '\n'
349+ new_line_len = iter - line_start - candidate_len ;
350+ assert (new_line_len >= 0 );
351+ p = _PyBytesWriter_Prepare (& writer , p , new_line_len + 1 );
352+ if (p == NULL ) {
353+ goto error ;
396354 }
355+ memcpy (p , line_start + candidate_len , new_line_len );
397356
398- // Copy the part of the line we want to keep to the new location
399- strncpy (curr_dst , new_start_loc , new_line_len );
400- curr_dst += new_line_len ;
357+ p += new_line_len ;
401358
402- curr_line_ptr = next_line_ptr ;
359+ // this may always append '\n' at the end of the input
360+ * p ++ = '\n' ;
361+ }
362+
363+ new_bytes = _PyBytesWriter_Finish (& writer , p );
364+ if (new_bytes == NULL ) {
365+ goto error ;
403366 }
404- // null terminate the string (is this sufficient?)
405- (* curr_dst ) = NULL ;
406367 return new_bytes ;
407- #endif
408368
369+ error :
370+ _PyBytesWriter_Dealloc (& writer );
371+ return NULL ;
409372}
410373
411-
412374static int
413375pymain_run_command (wchar_t * command )
414376{
@@ -430,17 +392,9 @@ pymain_run_command(wchar_t *command)
430392 goto error ;
431393 }
432394
433- // Only perform auto-dedent if the string starts with a newline
434- if (* PyBytes_AsString (bytes ) == '\n' ) {
435- PyObject * new_bytes = _utf_8_bytes_dedent (bytes );
436- if (new_bytes == NULL ) {
437- goto error ;
438- }
439- if (new_bytes != bytes ) {
440- // dedent allocated new bytes, replace the old with the new
441- Py_DECREF (bytes );
442- }
443- bytes = new_bytes ;
395+ bytes = dedent_utf8_bytes (bytes );
396+ if (bytes == NULL ) {
397+ goto error ;
444398 }
445399
446400 PyCompilerFlags cf = _PyCompilerFlags_INIT ;
0 commit comments