diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0e58a5d1b3591..4db4239ea1b54 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6423,7 +6423,44 @@ def _append_list_of_frames(self, other, *args, **kwargs): from pandas.core.indexes.api import _normalize_dataframes from pandas.core.reshape.concat import concat - # TODO: sorting behavior when sort=None + # sorting behavior when sort=None + # TODO: remove when kwarg value change + if sort is None: + # stabilish desired behavior + if _obj_type in (dict, Series): + # dict/ser + + sort = False + warn = False + elif _item_type in (dict, Series): + # [dict]/[ser] + + if (self.columns.get_indexer(other[0].columns) >= 0).all(): + # self.columns >= other[0].columns + sort = False + warn = False + else: + sort = True + types = [df.columns.dtype for df in [self] + other] + common = find_common_type(types) + warn = (common == object) + else: + # frame/[frame] + + if all(self.columns.equals(df.columns) for df in other): + # all values the same + sort = False + warn = False + else: + sort = True + types = [df.columns.dtype for df in [self] + other] + common = find_common_type(types) + warn = (common == object) + + # warn if necessary + if warn: + from pandas.core.indexes.api import _sort_msg + warnings.warn(_sort_msg, FutureWarning) # The behavior of concat is a bit problematic as it is. To get around, # we prepare the DataFrames before feeding them into concat. diff --git a/pandas/tests/reshape/test_append.py b/pandas/tests/reshape/test_append.py index 17e02fdffcc72..d2e07ec8d92ff 100644 --- a/pandas/tests/reshape/test_append.py +++ b/pandas/tests/reshape/test_append.py @@ -4,6 +4,7 @@ import pytest import pandas as pd +from pandas import DataFrame, Index, Series from pandas.core.indexes.base import InvalidIndexError from pandas.util.testing import assert_frame_equal @@ -328,6 +329,148 @@ def test_no_unecessary_upcast(self, sort): assert_frame_equal(result, expected) +class TestAppendSortNone(object): + """Regression tests to preserve the behavior of sort=None + """ + + def generate_frames(self, compare, special): + if compare == 'lt': + if special: + df1 = DataFrame([[11, 12]], columns=[2, 1]) + df2 = DataFrame([[13, 14, 15]], columns=[3, 2, 1]) + else: + df1 = DataFrame([[11, 12]], columns=list('ba')) + df2 = DataFrame([[13, 14, 15]], columns=list('cba')) + elif compare == 'eq': + if special: + df1 = DataFrame([[11, 12, 13]], columns=[3, 2, 1]) + df2 = DataFrame([[14, 15, 16]], columns=[3, 2, 1]) + else: + df1 = DataFrame([[11, 12, 13]], columns=list('cba')) + df2 = DataFrame([[14, 15, 16]], columns=list('cba')) + elif compare == 'gt': + if special: + df1 = DataFrame([[11, 12, 13]], columns=[3, 2, 1]) + df2 = DataFrame([[14, 15]], columns=[2, 1]) + else: + df1 = DataFrame([[11, 12, 13]], columns=list('cba')) + df2 = DataFrame([[14, 15]], columns=list('ba')) + elif compare == 'dups': + # special category for duplicates + # assumes compare = 'eq' + if special: + df1 = DataFrame([[11, 12, 13]], columns=[3, 3, 1]) + df2 = DataFrame([[14, 15, 16]], columns=[3, 3, 1]) + else: + df1 = DataFrame([[11, 12, 13]], columns=list('cca')) + df2 = DataFrame([[14, 15, 16]], columns=list('cca')) + + # avoid upcasting problems + df1 = df1.astype('float64') + df2 = df2.astype('float64') + + return df1, df2 + + def merge_indexes(self, idx1, idx2, sort): + len1 = idx1.size + len2 = idx2.size + + if len1 < len2: + # match 'lt' in self.generate_frames + vals1 = idx1.tolist() + vals2 = [idx2.tolist()[0]] + result = Index(vals1 + vals2) + else: + result = idx1.copy() + + return result.sort_values() if sort else result + + def merge_frames(self, df1, df2, sort): + new_index = self.merge_indexes(df1.columns, df2.columns, sort) + df1 = df1.reindex(new_index, axis=1) + df2 = df2.reindex(new_index, axis=1) + + values = np.vstack([df1.values[0, :], df2.values[0, :]]) + result = DataFrame(values, columns=new_index) + return result + + @pytest.mark.parametrize('input_type', ['series', 'dict']) + @pytest.mark.parametrize('special', [True, False]) + @pytest.mark.parametrize('compare', ['lt', 'eq', 'gt', 'dups']) + def test_append_series_dict(self, compare, special, input_type): + # When appending a Series or dict, the resulting columns come unsorted + # and no warning is raised. + + sorts = False + warns = False + + df1, df2 = self.generate_frames(compare, special) + if input_type == 'series': + other = df2.loc[0] + else: + other = df2.loc[0].to_dict() + if compare == 'dups': + return + + ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None) + expected = self.merge_frames(df1, df2, sorts) + with ctx: + result = df1.append(other, ignore_index=True, sort=None) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('input_type', ['[series]', '[dict]']) + @pytest.mark.parametrize('special', [True, False]) + @pytest.mark.parametrize('compare', ['lt', 'eq', 'gt']) # dups won't work + def test_append_list_of_series_dict(self, compare, special, input_type): + # When appending a list of Series or list of dicts, the behavior is + # as specified below. + + if compare in ('gt', 'eq'): + sorts = False + warns = False + else: + sorts = True + warns = not special + + df1, df2 = self.generate_frames(compare, special) + if input_type == '[series]': + other = [df2.loc[0]] + else: + other = [df2.loc[0].to_dict()] + + ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None) + expected = self.merge_frames(df1, df2, sorts) + with ctx: + result = df1.append(other, ignore_index=True, sort=None) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('input_type', ['dataframe', '[dataframe]']) + @pytest.mark.parametrize('special', [True, False]) + @pytest.mark.parametrize('compare', ['lt', 'eq', 'gt', 'dups']) + def test_append_dframe_list_of_dframe(self, compare, special, input_type): + # When appenindg a DataFrame of list of DataFrames, the behavior is as + # specified below. + + if compare in ('dups', 'eq'): + sorts = False + warns = False + else: + sorts = True + warns = not special + + df1, df2 = self.generate_frames(compare, special) + if input_type == 'dataframe': + other = df2 + else: + other = [df2] + + ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None) + expected = self.merge_frames(df1, df2, sorts) + with ctx: + result = df1.append(other, ignore_index=True, sort=None) + assert_frame_equal(result, expected) + + class TestAppendColumnsIndex(object): @pytest.mark.parametrize('idx_name3', [None, 'foo', 'bar', 'baz']) @pytest.mark.parametrize('idx_name2', [None, 'foo', 'bar', 'baz'])