@@ -1087,6 +1087,44 @@ def string_array_replace_from_nan_rep(
1087
1087
return arr
1088
1088
1089
1089
1090
+ @ cython.boundscheck (False )
1091
+ @ cython.wraparound (False )
1092
+ def convert_json_to_lines (object arr ):
1093
+ """
1094
+ replace comma separated json with line feeds, paying special attention
1095
+ to quotes & brackets
1096
+ """
1097
+ cdef:
1098
+ Py_ssize_t i = 0 , num_open_brackets_seen = 0 , in_quotes = 0 , length
1099
+ ndarray[uint8_t] narr
1100
+ unsigned char v, comma, left_bracket, right_brack, newline
1101
+
1102
+ newline = ord (' \n ' )
1103
+ comma = ord (' ,' )
1104
+ left_bracket = ord (' {' )
1105
+ right_bracket = ord (' }' )
1106
+ quote = ord (' "' )
1107
+ backslash = ord (' \\ ' )
1108
+
1109
+ narr = np.frombuffer(arr.encode(' utf-8' ), dtype = ' u1' ).copy()
1110
+ length = narr.shape[0 ]
1111
+ for i in range (length):
1112
+ v = narr[i]
1113
+ if v == quote and i > 0 and narr[i - 1 ] != backslash:
1114
+ in_quotes = ~ in_quotes
1115
+ if v == comma: # commas that should be \n
1116
+ if num_open_brackets_seen == 0 and not in_quotes:
1117
+ narr[i] = newline
1118
+ elif v == left_bracket:
1119
+ if not in_quotes:
1120
+ num_open_brackets_seen += 1
1121
+ elif v == right_bracket:
1122
+ if not in_quotes:
1123
+ num_open_brackets_seen -= 1
1124
+
1125
+ return narr.tostring().decode(' utf-8' )
1126
+
1127
+
1090
1128
@ cython.boundscheck (False )
1091
1129
@ cython.wraparound (False )
1092
1130
def write_csv_rows (list data , ndarray data_index ,
0 commit comments