Skip to content

Commit 5e7983f

Browse files
committed
Handwritten char iter and _PyBytesWriter_
1 parent 6205c05 commit 5e7983f

File tree

1 file changed

+102
-4
lines changed

1 file changed

+102
-4
lines changed

Modules/main.c

Lines changed: 102 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,105 @@ static PyObject*
234234
_utf_8_bytes_dedent(PyObject *bytes){
235235
char *input_data;
236236
Py_ssize_t nchars;
237+
bool curr_reading_whitespace = true;
238+
int curr_num_leading_spaces = 0;
239+
int curr_num_leading_tabs = 0;
240+
int num_common_leading_chars;
241+
char c;
237242

238243
PyBytes_AsStringAndSize(bytes, &input_data, &nchars);
239244

245+
int num_common_leading_spaces = nchars + 1;
246+
int num_common_leading_tabs = nchars + 1;
247+
248+
char *data_iter;
249+
250+
data_iter = input_data;
251+
while ( (c = *data_iter++) ){
252+
if (c == '\n') {
253+
// Finished reading the line
254+
if (!curr_reading_whitespace) {
255+
// If the line had some non-whitespace characters
256+
// update the current common leading tab/space count
257+
if (curr_num_leading_spaces < num_common_leading_spaces) {
258+
num_common_leading_spaces = curr_num_leading_spaces;
259+
}
260+
if (curr_num_leading_tabs < num_common_leading_tabs) {
261+
num_common_leading_tabs = curr_num_leading_tabs;
262+
}
263+
}
264+
// About to start reading a new line
265+
curr_reading_whitespace = true;
266+
curr_num_leading_spaces = 0;
267+
curr_num_leading_tabs = 0;
268+
}
269+
else if (curr_reading_whitespace){
270+
if (c == ' ' && curr_num_leading_tabs == 0) {
271+
curr_num_leading_spaces++;
272+
}
273+
else if (c == '\t' && curr_num_leading_spaces == 0) {
274+
curr_num_leading_tabs++;
275+
}
276+
else {
277+
// Encountered a non-whitespace character
278+
curr_reading_whitespace = false;
279+
}
280+
}
281+
}
282+
if (num_common_leading_spaces > num_common_leading_tabs){
283+
num_common_leading_chars = num_common_leading_spaces;
284+
}
285+
else {
286+
num_common_leading_chars = num_common_leading_tabs;
287+
}
288+
289+
if (num_common_leading_chars > 0) {
290+
// We need to trigger a dedent
291+
char *new_data;
292+
char *curr_line_ptr = input_data;
293+
char *next_line_ptr;
294+
char *new_start_loc;
295+
Py_ssize_t new_line_len;
296+
_PyBytesWriter writer;
297+
_PyBytesWriter_Init(&writer);
298+
new_data = _PyBytesWriter_Alloc(&writer, nchars);
299+
if (new_data == NULL) {
300+
return NULL;
301+
}
302+
303+
data_iter = input_data;
304+
c = *data_iter;
305+
while (c) {
306+
// Find the end of the current line.
307+
while ( (c = *data_iter++) != '\n' ){
308+
if (c == NULL) {
309+
break;
310+
}
311+
}
312+
next_line_ptr = data_iter;
313+
Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
314+
if (line_len > num_common_leading_chars){
315+
new_start_loc = curr_line_ptr + num_common_leading_chars;
316+
new_line_len = line_len - num_common_leading_chars;
317+
}
318+
else {
319+
new_start_loc = curr_line_ptr;
320+
new_line_len = line_len;
321+
}
322+
// Copy this line over to the new buffer (removing common
323+
// leading chars)
324+
new_data = _PyBytesWriter_WriteBytes(&writer, new_data, new_start_loc, new_line_len);
325+
curr_line_ptr = next_line_ptr;
326+
}
327+
PyObject *new_bytes = _PyBytesWriter_Finish(&writer, new_data);
328+
return new_bytes;
329+
}
330+
else {
331+
// No leading chars, no work to be done.
332+
return bytes;
333+
}
334+
335+
#if 0
240336
// Allocate new data for the output as a copy of the input
241337
PyBytesObject *new_bytes = PyBytes_FromStringAndSize(input_data, nchars);
242338
if (new_bytes == NULL) {
@@ -245,8 +341,6 @@ _utf_8_bytes_dedent(PyObject *bytes){
245341
char *new_data = PyBytes_AsString(new_bytes);
246342

247343
// Step 1: Find N = the common number leading whitespace chars
248-
249-
// so we can use the descructive strtok to tokenize the input.
250344
Py_ssize_t num_common_leading_spaces = nchars + 1;
251345

252346
// Count the number of leading spaces on each line
@@ -309,8 +403,9 @@ _utf_8_bytes_dedent(PyObject *bytes){
309403
}
310404
// null terminate the string (is this sufficient?)
311405
(*curr_dst) = NULL;
312-
313406
return new_bytes;
407+
#endif
408+
314409
}
315410

316411

@@ -341,7 +436,10 @@ pymain_run_command(wchar_t *command)
341436
if (new_bytes == NULL) {
342437
goto error;
343438
}
344-
Py_DECREF(bytes);
439+
if (new_bytes != bytes) {
440+
// dedent allocated new bytes, replace the old with the new
441+
Py_DECREF(bytes);
442+
}
345443
bytes = new_bytes;
346444
}
347445

0 commit comments

Comments
 (0)