|
53 | 53 | DatetimeIndex, |
54 | 54 | NaT, |
55 | 55 | Timestamp, |
56 | | - concat, |
57 | 56 | isna, |
58 | 57 | to_datetime, |
59 | 58 | to_timedelta, |
@@ -1663,7 +1662,7 @@ def read( |
1663 | 1662 | # restarting at 0 for each chunk. |
1664 | 1663 | if index_col is None: |
1665 | 1664 | ix = np.arange(self._lines_read - read_lines, self._lines_read) |
1666 | | - data = data.set_index(ix) |
| 1665 | + data.index = ix # set attr instead of set_index to avoid copy |
1667 | 1666 |
|
1668 | 1667 | if columns is not None: |
1669 | 1668 | try: |
@@ -1779,19 +1778,18 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra |
1779 | 1778 | if dtype not in (np.float32, np.float64): |
1780 | 1779 | dtype = np.float64 |
1781 | 1780 | replacement = Series(series, dtype=dtype) |
| 1781 | + if not replacement._values.flags["WRITEABLE"]: |
| 1782 | + # only relevant for ArrayManager; construction |
| 1783 | + # path for BlockManager ensures writeability |
| 1784 | + replacement = replacement.copy() |
1782 | 1785 | # Note: operating on ._values is much faster than directly |
1783 | 1786 | # TODO: can we fix that? |
1784 | 1787 | replacement._values[missing] = np.nan |
1785 | 1788 | replacements[colname] = replacement |
| 1789 | + |
1786 | 1790 | if replacements: |
1787 | | - columns = data.columns |
1788 | | - replacement_df = DataFrame(replacements, copy=False) |
1789 | | - replaced = concat( |
1790 | | - [data.drop(replacement_df.columns, axis=1), replacement_df], |
1791 | | - axis=1, |
1792 | | - copy=False, |
1793 | | - ) |
1794 | | - data = replaced[columns] |
| 1791 | + for col in replacements: |
| 1792 | + data[col] = replacements[col] |
1795 | 1793 | return data |
1796 | 1794 |
|
1797 | 1795 | def _insert_strls(self, data: DataFrame) -> DataFrame: |
|
0 commit comments