|
20 | 20 | import pandas as pd |
21 | 21 | import pandas.core.nanops as nanops |
22 | 22 | import pandas.core.algorithms as algorithms |
23 | | -import pandas.io.formats.printing as printing |
24 | 23 |
|
25 | 24 | import pandas.util.testing as tm |
26 | 25 | import pandas.util._test_decorators as td |
@@ -841,87 +840,62 @@ def wrapper(x): |
841 | 840 | expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) |
842 | 841 | tm.assert_series_equal(r1, expected) |
843 | 842 |
|
844 | | - def test_mode(self): |
845 | | - df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], |
846 | | - "B": [10, 10, 10, np.nan, 3, 4], |
847 | | - "C": [8, 8, 8, 9, 9, 9], |
848 | | - "D": np.arange(6, dtype='int64'), |
849 | | - "E": [8, 8, 1, 1, 3, 3]}) |
850 | | - tm.assert_frame_equal(df[["A"]].mode(), |
851 | | - pd.DataFrame({"A": [12]})) |
852 | | - expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ |
853 | | - to_frame() |
854 | | - tm.assert_frame_equal(df[["D"]].mode(), expected) |
855 | | - expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() |
856 | | - tm.assert_frame_equal(df[["E"]].mode(), expected) |
857 | | - tm.assert_frame_equal(df[["A", "B"]].mode(), |
858 | | - pd.DataFrame({"A": [12], "B": [10.]})) |
859 | | - tm.assert_frame_equal(df.mode(), |
860 | | - pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, |
861 | | - np.nan, np.nan], |
862 | | - "B": [10, np.nan, np.nan, np.nan, |
863 | | - np.nan, np.nan], |
864 | | - "C": [8, 9, np.nan, np.nan, np.nan, |
865 | | - np.nan], |
866 | | - "D": [0, 1, 2, 3, 4, 5], |
867 | | - "E": [1, 3, 8, np.nan, np.nan, |
868 | | - np.nan]})) |
869 | | - |
870 | | - # outputs in sorted order |
871 | | - df["C"] = list(reversed(df["C"])) |
872 | | - printing.pprint_thing(df["C"]) |
873 | | - printing.pprint_thing(df["C"].mode()) |
874 | | - a, b = (df[["A", "B", "C"]].mode(), |
875 | | - pd.DataFrame({"A": [12, np.nan], |
876 | | - "B": [10, np.nan], |
877 | | - "C": [8, 9]})) |
878 | | - printing.pprint_thing(a) |
879 | | - printing.pprint_thing(b) |
880 | | - tm.assert_frame_equal(a, b) |
881 | | - # should work with heterogeneous types |
882 | | - df = pd.DataFrame({"A": np.arange(6, dtype='int64'), |
883 | | - "B": pd.date_range('2011', periods=6), |
884 | | - "C": list('abcdef')}) |
885 | | - exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), |
886 | | - dtype=df["A"].dtype), |
887 | | - "B": pd.Series(pd.date_range('2011', periods=6), |
888 | | - dtype=df["B"].dtype), |
889 | | - "C": pd.Series(list('abcdef'), |
890 | | - dtype=df["C"].dtype)}) |
891 | | - tm.assert_frame_equal(df.mode(), exp) |
892 | | - |
893 | | - def test_mode_dropna(self): |
894 | | - # GH 17534 |
895 | | - # Test the dropna=False parameter for mode |
896 | | - |
897 | | - df = pd.DataFrame({"A": [1, np.nan, np.nan, np.nan], |
898 | | - "B": [np.nan, np.nan, 'a', np.nan], |
899 | | - "C": Categorical([np.nan, np.nan, 'a', np.nan]), |
900 | | - "D": to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
901 | | - "E": to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
902 | | - "F": [1, 1, np.nan, np.nan], |
903 | | - "G": [np.nan, np.nan, 'a', 'a'], |
904 | | - "H": Categorical(['a', np.nan, 'a', np.nan]), |
905 | | - "I": to_datetime(['2000-1-2', '2000-1-2', |
| 843 | + @pytest.mark.parametrize("dropna, expected", [ |
| 844 | + (True, {'A': [12], |
| 845 | + 'B': [10.0], |
| 846 | + 'C': [1.0], |
| 847 | + 'D': ['a'], |
| 848 | + 'E': Categorical(['a'], categories=['a']), |
| 849 | + 'F': to_datetime(['2000-1-2']), |
| 850 | + 'G': to_timedelta(['1 days'])}), |
| 851 | + (False, {'A': [12], |
| 852 | + 'B': [10.0], |
| 853 | + 'C': [np.nan], |
| 854 | + 'D': np.array([np.nan], dtype=object), |
| 855 | + 'E': Categorical([np.nan], categories=['a']), |
| 856 | + 'F': [pd.NaT], |
| 857 | + 'G': to_timedelta([pd.NaT])}), |
| 858 | + (True, {'H': [8, 9, np.nan, np.nan], |
| 859 | + 'I': [8, 9, np.nan, np.nan], |
| 860 | + 'J': [1, np.nan, np.nan, np.nan], |
| 861 | + 'K': ['a', np.nan, np.nan, np.nan], |
| 862 | + 'L': Categorical(['a', np.nan, np.nan, np.nan], |
| 863 | + categories=['a']), |
| 864 | + 'M': to_datetime(['2000-1-2', 'NaT', 'NaT', 'NaT']), |
| 865 | + 'N': to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
| 866 | + 'O': [0, 1, 2, 3]}), |
| 867 | + (False, {'H': [8, 9, np.nan, np.nan], |
| 868 | + 'I': [8, 9, np.nan, np.nan], |
| 869 | + 'J': [1, np.nan, np.nan, np.nan], |
| 870 | + 'K': [np.nan, 'a', np.nan, np.nan], |
| 871 | + 'L': Categorical([np.nan, 'a', np.nan, np.nan], |
| 872 | + categories=['a']), |
| 873 | + 'M': to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
| 874 | + 'N': to_timedelta(['nan', '1 days', 'nan', 'nan']), |
| 875 | + 'O': [0, 1, 2, 3]}) |
| 876 | + ]) |
| 877 | + def test_mode_dropna(self, dropna, expected): |
| 878 | + |
| 879 | + df = pd.DataFrame({"A": [12, 12, 19, 11], |
| 880 | + "B": [10, 10, np.nan, 3], |
| 881 | + "C": [1, np.nan, np.nan, np.nan], |
| 882 | + "D": [np.nan, np.nan, 'a', np.nan], |
| 883 | + "E": Categorical([np.nan, np.nan, 'a', np.nan]), |
| 884 | + "F": to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
| 885 | + "G": to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
| 886 | + "H": [8, 8, 9, 9], |
| 887 | + "I": [9, 9, 8, 8], |
| 888 | + "J": [1, 1, np.nan, np.nan], |
| 889 | + "K": [np.nan, np.nan, 'a', 'a'], |
| 890 | + "L": Categorical(['a', np.nan, 'a', np.nan]), |
| 891 | + "M": to_datetime(['2000-1-2', '2000-1-2', |
906 | 892 | 'NaT', 'NaT']), |
907 | | - "J": to_timedelta(['1 days', 'nan', |
908 | | - '1 days', 'nan'])}) |
909 | | - |
910 | | - result = df.loc[:, 'A':'E'].mode(dropna=False) |
911 | | - expected = pd.DataFrame({'A': [np.nan], |
912 | | - 'B': np.array([np.nan], dtype=object), |
913 | | - 'C': Categorical([np.nan], categories=['a']), |
914 | | - 'D': [pd.NaT], |
915 | | - 'E': to_timedelta([pd.NaT])}) |
916 | | - tm.assert_frame_equal(result, expected) |
917 | | - |
918 | | - result = df.loc[:, 'F':'J'].mode(dropna=False) |
919 | | - expected = pd.DataFrame({'F': [1, np.nan], |
920 | | - 'G': [np.nan, 'a'], |
921 | | - 'H': Categorical([np.nan, 'a'], |
922 | | - categories=['a']), |
923 | | - 'I': to_datetime(['NaT', '2000-1-2']), |
924 | | - 'J': to_timedelta(['nan', '1 days'])}) |
| 893 | + "N": to_timedelta(['1 days', 'nan', |
| 894 | + '1 days', 'nan']), |
| 895 | + "O": np.arange(4, dtype='int64')}) |
| 896 | + |
| 897 | + result = df[sorted(list(expected.keys()))].mode(dropna=dropna) |
| 898 | + expected = pd.DataFrame(expected) |
925 | 899 | tm.assert_frame_equal(result, expected) |
926 | 900 |
|
927 | 901 | def test_operators_timedelta64(self): |
|
0 commit comments