| 
12 | 12 | # pylint: disable=E1101,E1103  | 
13 | 13 | # pylint: disable=W0212,W0231,W0703,W0622  | 
14 | 14 | 
 
  | 
 | 15 | +import functools  | 
15 | 16 | import collections  | 
16 | 17 | import itertools  | 
17 | 18 | import sys  | 
 | 
25 | 26 | from pandas.core.common import (isnull, notnull, PandasError, _try_sort,  | 
26 | 27 |                                 _default_index, _maybe_upcast, _is_sequence,  | 
27 | 28 |                                 _infer_dtype_from_scalar, _values_from_object,  | 
28 |  | -                                is_list_like)  | 
 | 29 | +                                is_list_like, _get_dtype)  | 
29 | 30 | from pandas.core.generic import NDFrame, _shared_docs  | 
30 | 31 | from pandas.core.index import Index, MultiIndex, _ensure_index  | 
31 | 32 | from pandas.core.indexing import (_maybe_droplevels,  | 
32 | 33 |                                   _convert_to_index_sliceable,  | 
33 |  | -                                  _check_bool_indexer, _maybe_convert_indices)  | 
 | 34 | +                                  _check_bool_indexer)  | 
34 | 35 | from pandas.core.internals import (BlockManager,  | 
35 | 36 |                                    create_block_manager_from_arrays,  | 
36 | 37 |                                    create_block_manager_from_blocks)  | 
37 | 38 | from pandas.core.series import Series  | 
38 | 39 | import pandas.computation.expressions as expressions  | 
39 | 40 | from pandas.computation.eval import eval as _eval  | 
40 |  | -from pandas.computation.scope import _ensure_scope  | 
41 | 41 | from numpy import percentile as _quantile  | 
42 | 42 | from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u,  | 
43 | 43 |                           OrderedDict, raise_with_traceback)  | 
@@ -1867,6 +1867,118 @@ def eval(self, expr, **kwargs):  | 
1867 | 1867 |         kwargs['resolvers'] = kwargs.get('resolvers', ()) + resolvers  | 
1868 | 1868 |         return _eval(expr, **kwargs)  | 
1869 | 1869 | 
 
  | 
 | 1870 | +    def select_dtypes(self, include=None, exclude=None):  | 
 | 1871 | +        """Return a subset of a DataFrame including/excluding columns based on  | 
 | 1872 | +        their ``dtype``.  | 
 | 1873 | +
  | 
 | 1874 | +        Parameters  | 
 | 1875 | +        ----------  | 
 | 1876 | +        include, exclude : list-like  | 
 | 1877 | +            A list of dtypes or strings to be included/excluded. You must pass  | 
 | 1878 | +            in a non-empty sequence for at least one of these.  | 
 | 1879 | +
  | 
 | 1880 | +        Raises  | 
 | 1881 | +        ------  | 
 | 1882 | +        ValueError  | 
 | 1883 | +            * If both of ``include`` and ``exclude`` are empty  | 
 | 1884 | +            * If ``include`` and ``exclude`` have overlapping elements  | 
 | 1885 | +            * If any kind of string dtype is passed in.  | 
 | 1886 | +        TypeError  | 
 | 1887 | +            * If either of ``include`` or ``exclude`` is not a sequence  | 
 | 1888 | +
  | 
 | 1889 | +        Returns  | 
 | 1890 | +        -------  | 
 | 1891 | +        subset : DataFrame  | 
 | 1892 | +            The subset of the frame including the dtypes in ``include`` and  | 
 | 1893 | +            excluding the dtypes in ``exclude``.  | 
 | 1894 | +
  | 
 | 1895 | +        Notes  | 
 | 1896 | +        -----  | 
 | 1897 | +        * To select all *numeric* types use the numpy dtype ``numpy.number``  | 
 | 1898 | +        * To select strings you must use the ``object`` dtype, but note that  | 
 | 1899 | +          this will return *all* object dtype columns  | 
 | 1900 | +        * See the `numpy dtype hierarchy  | 
 | 1901 | +        <http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html>`__  | 
 | 1902 | +
  | 
 | 1903 | +        Examples  | 
 | 1904 | +        --------  | 
 | 1905 | +        >>> df = pd.DataFrame({'a': np.random.randn(6).astype('f4'),  | 
 | 1906 | +        ...                    'b': [True, False] * 3,  | 
 | 1907 | +        ...                    'c': [1.0, 2.0] * 3})  | 
 | 1908 | +        >>> df  | 
 | 1909 | +                a      b  c  | 
 | 1910 | +        0  0.3962   True  1  | 
 | 1911 | +        1  0.1459  False  2  | 
 | 1912 | +        2  0.2623   True  1  | 
 | 1913 | +        3  0.0764  False  2  | 
 | 1914 | +        4 -0.9703   True  1  | 
 | 1915 | +        5 -1.2094  False  2  | 
 | 1916 | +        >>> df.select_dtypes(include=['float64'])  | 
 | 1917 | +           c  | 
 | 1918 | +        0  1  | 
 | 1919 | +        1  2  | 
 | 1920 | +        2  1  | 
 | 1921 | +        3  2  | 
 | 1922 | +        4  1  | 
 | 1923 | +        5  2  | 
 | 1924 | +        >>> df.select_dtypes(exclude=['floating'])  | 
 | 1925 | +               b  | 
 | 1926 | +        0   True  | 
 | 1927 | +        1  False  | 
 | 1928 | +        2   True  | 
 | 1929 | +        3  False  | 
 | 1930 | +        4   True  | 
 | 1931 | +        5  False  | 
 | 1932 | +        """  | 
 | 1933 | +        include, exclude = include or (), exclude or ()  | 
 | 1934 | +        if not (com.is_list_like(include) and com.is_list_like(exclude)):  | 
 | 1935 | +            raise TypeError('include and exclude must both be non-string'  | 
 | 1936 | +                            ' sequences')  | 
 | 1937 | +        selection = tuple(map(frozenset, (include, exclude)))  | 
 | 1938 | + | 
 | 1939 | +        if not any(selection):  | 
 | 1940 | +            raise ValueError('at least one of include or exclude must be '  | 
 | 1941 | +                             'nonempty')  | 
 | 1942 | + | 
 | 1943 | +        # convert the myriad valid dtypes object to a single representation  | 
 | 1944 | +        include, exclude = map(lambda x:  | 
 | 1945 | +                               frozenset(map(com._get_dtype_from_object, x)),  | 
 | 1946 | +                               selection)  | 
 | 1947 | +        for dtypes in (include, exclude):  | 
 | 1948 | +            com._invalidate_string_dtypes(dtypes)  | 
 | 1949 | + | 
 | 1950 | +        # can't both include AND exclude!  | 
 | 1951 | +        if not include.isdisjoint(exclude):  | 
 | 1952 | +            raise ValueError('include and exclude overlap on %s'  | 
 | 1953 | +                             % (include & exclude))  | 
 | 1954 | + | 
 | 1955 | +        # empty include/exclude -> defaults to True  | 
 | 1956 | +        # three cases (we've already raised if both are empty)  | 
 | 1957 | +        # case 1: empty include, nonempty exclude  | 
 | 1958 | +        # we have True, True, ... True for include, same for exclude  | 
 | 1959 | +        # in the loop below we get the excluded  | 
 | 1960 | +        # and when we call '&' below we get only the excluded  | 
 | 1961 | +        # case 2: nonempty include, empty exclude  | 
 | 1962 | +        # same as case 1, but with include  | 
 | 1963 | +        # case 3: both nonempty  | 
 | 1964 | +        # the "union" of the logic of case 1 and case 2:  | 
 | 1965 | +        # we get the included and excluded, and return their logical and  | 
 | 1966 | +        include_these = Series(not bool(include), index=self.columns)  | 
 | 1967 | +        exclude_these = Series(not bool(exclude), index=self.columns)  | 
 | 1968 | + | 
 | 1969 | +        def is_dtype_instance_mapper(column, dtype):  | 
 | 1970 | +            return column, functools.partial(issubclass, dtype.type)  | 
 | 1971 | + | 
 | 1972 | +        for column, f in itertools.starmap(is_dtype_instance_mapper,  | 
 | 1973 | +                                           self.dtypes.iteritems()):  | 
 | 1974 | +            if include:  # checks for the case of empty include or exclude  | 
 | 1975 | +                include_these[column] = any(map(f, include))  | 
 | 1976 | +            if exclude:  | 
 | 1977 | +                exclude_these[column] = not any(map(f, exclude))  | 
 | 1978 | + | 
 | 1979 | +        dtype_indexer = include_these & exclude_these  | 
 | 1980 | +        return self.loc[com._get_info_slice(self, dtype_indexer)]  | 
 | 1981 | + | 
1870 | 1982 |     def _box_item_values(self, key, values):  | 
1871 | 1983 |         items = self.columns[self.columns.get_loc(key)]  | 
1872 | 1984 |         if values.ndim == 2:  | 
 | 
0 commit comments