Skip to content

Commit d336ac7

Browse files
committed
reimplement it to imitate textwrap.dedent
1 parent 5e7983f commit d336ac7

File tree

1 file changed

+115
-161
lines changed

1 file changed

+115
-161
lines changed

Modules/main.c

Lines changed: 115 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -228,187 +228,149 @@ pymain_import_readline(const PyConfig *config)
228228
}
229229
}
230230

231+
/* Strip common leading whitespace, just as textwrap.dedent.
232+
It stoles 1 reference from bytes if succeeded, else it will return NULL. */
233+
static PyObject *dedent_utf8_bytes(PyObject *bytes) {
234+
if (bytes == NULL || !PyBytes_CheckExact(bytes)) {
235+
return NULL;
236+
}
231237

232-
/* Strip common leading whitespace utf encoded string */
233-
static PyObject*
234-
_utf_8_bytes_dedent(PyObject *bytes){
235-
char *input_data;
238+
char *start;
236239
Py_ssize_t nchars;
237-
bool curr_reading_whitespace = true;
238-
int curr_num_leading_spaces = 0;
239-
int curr_num_leading_tabs = 0;
240-
int num_common_leading_chars;
241-
char c;
242-
243-
PyBytes_AsStringAndSize(bytes, &input_data, &nchars);
244-
245-
int num_common_leading_spaces = nchars + 1;
246-
int num_common_leading_tabs = nchars + 1;
247-
248-
char *data_iter;
249-
250-
data_iter = input_data;
251-
while ( (c = *data_iter++) ){
252-
if (c == '\n') {
253-
// Finished reading the line
254-
if (!curr_reading_whitespace) {
255-
// If the line had some non-whitespace characters
256-
// update the current common leading tab/space count
257-
if (curr_num_leading_spaces < num_common_leading_spaces) {
258-
num_common_leading_spaces = curr_num_leading_spaces;
259-
}
260-
if (curr_num_leading_tabs < num_common_leading_tabs) {
261-
num_common_leading_tabs = curr_num_leading_tabs;
240+
241+
if (PyBytes_AsStringAndSize(bytes, &start, &nchars) != 0) {
242+
return NULL;
243+
}
244+
245+
char *end = start + nchars;
246+
assert(start < end);
247+
248+
char *candidate_start = NULL;
249+
Py_ssize_t candidate_len = 0;
250+
251+
for (char *iter = start; iter < end; ++iter) {
252+
char *line_start = iter;
253+
char *leading_whitespace_end = NULL;
254+
255+
// scan the whole line
256+
char c = 0;
257+
while (iter < end && (c = *iter) != '\n') {
258+
if (!leading_whitespace_end && c != ' ' && c != '\t') {
259+
if (iter == line_start) {
260+
// some line has no indent, fast exit!
261+
return bytes;
262262
}
263+
leading_whitespace_end = iter;
263264
}
264-
// About to start reading a new line
265-
curr_reading_whitespace = true;
266-
curr_num_leading_spaces = 0;
267-
curr_num_leading_tabs = 0;
268-
}
269-
else if (curr_reading_whitespace){
270-
if (c == ' ' && curr_num_leading_tabs == 0) {
271-
curr_num_leading_spaces++;
272-
}
273-
else if (c == '\t' && curr_num_leading_spaces == 0) {
274-
curr_num_leading_tabs++;
275-
}
276-
else {
277-
// Encountered a non-whitespace character
278-
curr_reading_whitespace = false;
279-
}
265+
++iter;
280266
}
281-
}
282-
if (num_common_leading_spaces > num_common_leading_tabs){
283-
num_common_leading_chars = num_common_leading_spaces;
284-
}
285-
else {
286-
num_common_leading_chars = num_common_leading_tabs;
287-
}
288-
289-
if (num_common_leading_chars > 0) {
290-
// We need to trigger a dedent
291-
char *new_data;
292-
char *curr_line_ptr = input_data;
293-
char *next_line_ptr;
294-
char *new_start_loc;
295-
Py_ssize_t new_line_len;
296-
_PyBytesWriter writer;
297-
_PyBytesWriter_Init(&writer);
298-
new_data = _PyBytesWriter_Alloc(&writer, nchars);
299-
if (new_data == NULL) {
300-
return NULL;
267+
268+
// we reach the end of a line
269+
270+
// if this line has all white space, skip it
271+
if (!leading_whitespace_end) {
272+
continue;
301273
}
302274

303-
data_iter = input_data;
304-
c = *data_iter;
305-
while (c) {
306-
// Find the end of the current line.
307-
while ( (c = *data_iter++) != '\n' ){
308-
if (c == NULL) {
275+
if (!candidate_start) {
276+
candidate_start = line_start;
277+
candidate_len = leading_whitespace_end - line_start;
278+
assert(candidate_len > 0);
279+
} else {
280+
/* We then compare with the current longest leading whitespace.
281+
282+
[line_start, leading_whitespace_end) is the leading whitespace of
283+
this line,
284+
285+
[candidate_start, candidate_start + candidate_len)
286+
is the leading whitespace of the current longest leading
287+
whitespace. */
288+
Py_ssize_t new_candidate_len = 0;
289+
290+
for (char *candidate_iter = candidate_start,
291+
*line_iter = line_start;
292+
candidate_iter < candidate_start + candidate_len &&
293+
line_iter < leading_whitespace_end;
294+
++candidate_iter, ++line_iter) {
295+
if (*candidate_iter != *line_iter) {
309296
break;
310297
}
298+
++new_candidate_len;
311299
}
312-
next_line_ptr = data_iter;
313-
Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
314-
if (line_len > num_common_leading_chars){
315-
new_start_loc = curr_line_ptr + num_common_leading_chars;
316-
new_line_len = line_len - num_common_leading_chars;
317-
}
318-
else {
319-
new_start_loc = curr_line_ptr;
320-
new_line_len = line_len;
300+
301+
candidate_len = new_candidate_len;
302+
if (candidate_len == 0) {
303+
return bytes;
321304
}
322-
// Copy this line over to the new buffer (removing common
323-
// leading chars)
324-
new_data = _PyBytesWriter_WriteBytes(&writer, new_data, new_start_loc, new_line_len);
325-
curr_line_ptr = next_line_ptr;
326305
}
327-
PyObject *new_bytes = _PyBytesWriter_Finish(&writer, new_data);
328-
return new_bytes;
329-
}
330-
else {
331-
// No leading chars, no work to be done.
332-
return bytes;
333-
}
334-
335-
#if 0
336-
// Allocate new data for the output as a copy of the input
337-
PyBytesObject *new_bytes = PyBytes_FromStringAndSize(input_data, nchars);
338-
if (new_bytes == NULL) {
339-
return NULL;
340306
}
341-
char *new_data = PyBytes_AsString(new_bytes);
342307

343-
// Step 1: Find N = the common number leading whitespace chars
344-
Py_ssize_t num_common_leading_spaces = nchars + 1;
308+
assert(candidate_len > 0);
345309

346-
// Count the number of leading spaces on each line
347-
// Use the output array as a temporary buffer (we will repopulate it later)
348-
char *line = strtok(new_data, "\n");
349-
while (line) {
350-
// Move the pointer up to the first non-space character
351-
char *first_nonspace = line;
352-
while (*first_nonspace == ' '){
353-
first_nonspace++;
354-
}
355-
// Only check lines that contain non-whitespace characters
356-
if (*first_nonspace != '\0') {
310+
// trigger a dedent
311+
char *p;
312+
PyObject *new_bytes;
313+
char *line_start;
314+
Py_ssize_t new_line_len;
315+
bool in_leading_space;
316+
_PyBytesWriter writer;
357317

358-
Py_ssize_t num_leading_spaces = first_nonspace - line;
359-
if (num_leading_spaces < num_common_leading_spaces) {
360-
num_common_leading_spaces = num_leading_spaces;
361-
}
362-
}
363-
line = strtok(NULL, "\n");
318+
_PyBytesWriter_Init(&writer);
319+
p = _PyBytesWriter_Alloc(&writer, nchars);
320+
if (p == NULL) {
321+
goto error;
364322
}
365323

366-
char *end_ptr = input_data + nchars;
367-
char *curr_line_ptr = input_data;
368-
char *next_line_ptr;
369-
char *new_start_loc;
370-
Py_ssize_t new_line_len;
324+
for (char *iter = start; iter < end; ++iter) {
325+
line_start = iter;
371326

372-
// Step 2: Remove N leading whitespace chars from each line by copying data
373-
// (except leading spaces) from the input buffer to the output buffer one
374-
// line at a time.
375-
376-
char *curr_dst = new_data;
377-
while (curr_line_ptr < end_ptr) {
378-
// Find the end of the current line.
379-
next_line_ptr = strstr(curr_line_ptr, "\n");
380-
if (next_line_ptr == NULL) {
381-
next_line_ptr = end_ptr;
382-
}
383-
else {
384-
next_line_ptr++;
327+
// iterate over a line
328+
while (iter < end && *iter != '\n') {
329+
if (in_leading_space && *iter != ' ' && *iter != '\t') {
330+
in_leading_space = false;
331+
}
332+
++iter;
385333
}
386334

387-
Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
335+
// invariant: *iter == '\n' or iter == end
388336

389-
if (line_len > num_common_leading_spaces){
390-
new_start_loc = curr_line_ptr + num_common_leading_spaces;
391-
new_line_len = line_len - num_common_leading_spaces;
337+
// if this line has all white space, write '\n'
338+
if (in_leading_space) {
339+
p = _PyBytesWriter_Prepare(&writer, p, 1);
340+
if (p == NULL) {
341+
goto error;
342+
}
343+
*p++ = '\n';
344+
continue;
392345
}
393-
else {
394-
new_start_loc = curr_line_ptr;
395-
new_line_len = line_len;
346+
347+
// copy [new_line_start + candidate_len, iter) to buffer, then append
348+
// '\n'
349+
new_line_len = iter - line_start - candidate_len;
350+
assert(new_line_len >= 0);
351+
p = _PyBytesWriter_Prepare(&writer, p, new_line_len + 1);
352+
if (p == NULL) {
353+
goto error;
396354
}
355+
memcpy(p, line_start + candidate_len, new_line_len);
397356

398-
// Copy the part of the line we want to keep to the new location
399-
strncpy(curr_dst, new_start_loc, new_line_len);
400-
curr_dst += new_line_len;
357+
p += new_line_len;
401358

402-
curr_line_ptr = next_line_ptr;
359+
// this may always append '\n' at the end of the input
360+
*p++ = '\n';
361+
}
362+
363+
new_bytes = _PyBytesWriter_Finish(&writer, p);
364+
if (new_bytes == NULL) {
365+
goto error;
403366
}
404-
// null terminate the string (is this sufficient?)
405-
(*curr_dst) = NULL;
406367
return new_bytes;
407-
#endif
408368

369+
error:
370+
_PyBytesWriter_Dealloc(&writer);
371+
return NULL;
409372
}
410373

411-
412374
static int
413375
pymain_run_command(wchar_t *command)
414376
{
@@ -430,17 +392,9 @@ pymain_run_command(wchar_t *command)
430392
goto error;
431393
}
432394

433-
// Only perform auto-dedent if the string starts with a newline
434-
if (*PyBytes_AsString(bytes) == '\n') {
435-
PyObject *new_bytes = _utf_8_bytes_dedent(bytes);
436-
if (new_bytes == NULL) {
437-
goto error;
438-
}
439-
if (new_bytes != bytes) {
440-
// dedent allocated new bytes, replace the old with the new
441-
Py_DECREF(bytes);
442-
}
443-
bytes = new_bytes;
395+
bytes = dedent_utf8_bytes(bytes);
396+
if (bytes == NULL) {
397+
goto error;
444398
}
445399

446400
PyCompilerFlags cf = _PyCompilerFlags_INIT;

0 commit comments

Comments
 (0)