pandas-dev
diff --git a/‎.travis.yml‎
Lines changed: 11 additions & 1 deletion b/‎.travis.yml‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/algorithms.py‎
Lines changed: 14 additions & 3 deletions b/‎asv_bench/benchmarks/algorithms.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎ci/build39.sh‎
Lines changed: 21 additions & 0 deletions b/‎ci/build39.sh‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎ci/setup_env.sh‎
Lines changed: 5 additions & 0 deletions b/‎ci/setup_env.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/source/reference/extensions.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/reference/extensions.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/user_guide/groupby.rst‎
Lines changed: 27 additions & 0 deletions b/‎doc/source/user_guide/groupby.rst‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎doc/source/user_guide/timeseries.rst‎
Lines changed: 73 additions & 6 deletions b/‎doc/source/user_guide/timeseries.rst‎
Lines changed: 73 additions & 6 deletions
@@ -27,6 +27,11 @@ matrix:
   fast_finish: true
 
   include:
+    # In allowed failures
+    - dist: bionic
+      python: 3.9-dev
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
     - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
 
@@ -53,6 +58,11 @@ matrix:
       services:
         - mysql
         - postgresql
+  allow_failures:
+  - dist: bionic
+    python: 3.9-dev
+    env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network)"
 
 before_install:
   - echo "before_install"
@@ -83,7 +93,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - source activate pandas-dev
+  - if [ "$JOB" != "3.9-dev" ]; then source activate pandas-dev; fi
   - ci/run_tests.sh
 
 after_script:
 
@@ -34,7 +34,16 @@ class Factorize:
     params = [
         [True, False],
         [True, False],
-        ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+        [
+            "int",
+            "uint",
+            "float",
+            "string",
+            "datetime64[ns]",
+            "datetime64[ns, tz]",
+            "Int64",
+            "boolean",
+        ],
     ]
     param_names = ["unique", "sort", "dtype"]
 
@@ -49,13 +58,15 @@ def setup(self, unique, sort, dtype):
             "datetime64[ns, tz]": pd.date_range(
                 "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
             ),
+            "Int64": pd.array(np.arange(N), dtype="Int64"),
+            "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
         }[dtype]
         if not unique:
             data = data.repeat(5)
-        self.idx = data
+        self.data = data
 
     def time_factorize(self, unique, sort, dtype):
-        self.idx.factorize(sort=sort)
+        pd.factorize(self.data, sort=sort)
 
 
 class Duplicated:
 
@@ -0,0 +1,21 @@
+#!/bin/bash -e
+# Special build for python3.9 until numpy puts its own wheels up
+
+sudo apt-get install build-essential gcc xvfb
+pip install --no-deps -U pip wheel setuptools
+pip install python-dateutil pytz pytest pytest-xdist hypothesis
+pip install cython --pre # https://github.com/cython/cython/issues/3395
+
+git clone https://github.com/numpy/numpy
+cd numpy
+python setup.py build_ext --inplace
+python setup.py install
+cd ..
+rm -rf numpy
+
+python setup.py build_ext -inplace
+python -m pip install --no-build-isolation -e .
+
+python -c "import sys; print(sys.version_info)"
+python -c "import pandas as pd"
+python -c "import hypothesis"
@@ -1,5 +1,10 @@
 #!/bin/bash -e
 
+if [ "$JOB" == "3.9-dev" ]; then
+    /bin/bash ci/build39.sh
+    exit 0
+fi
+
 # edit the locale file if needed
 if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
     echo "Adding locale to the first line of pandas/__init__.py"
 
@@ -45,6 +45,7 @@ objects.
       api.extensions.ExtensionArray.copy
       api.extensions.ExtensionArray.view
       api.extensions.ExtensionArray.dropna
+      api.extensions.ExtensionArray.equals
       api.extensions.ExtensionArray.factorize
       api.extensions.ExtensionArray.fillna
       api.extensions.ExtensionArray.isna
 
@@ -199,6 +199,33 @@ For example, the groups created by ``groupby()`` below are in the order they app
    df3.groupby(['X']).get_group('B')
 
 
+.. _groupby.dropna:
+
+.. versionadded:: 1.1.0
+
+GroupBy dropna
+^^^^^^^^^^^^^^
+
+By default ``NA`` values are excluded from group keys during the ``groupby`` operation. However,
+in case you want to include ``NA`` values in group keys, you could pass ``dropna=False`` to achieve it.
+
+.. ipython:: python
+
+    df_list = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
+    df_dropna = pd.DataFrame(df_list, columns=["a", "b", "c"])
+
+    df_dropna
+
+.. ipython:: python
+
+    # Default `dropna` is set to True, which will exclude NaNs in keys
+    df_dropna.groupby(by=["b"], dropna=True).sum()
+
+    # In order to allow NaN in keys, set `dropna` to False
+    df_dropna.groupby(by=["b"], dropna=False).sum()
+
+The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys.
+
 
 .. _groupby.attributes:
 
 
@@ -1572,19 +1572,16 @@ end of the interval is closed:
 
    ts.resample('5Min', closed='left').mean()
 
-Parameters like ``label`` and ``loffset`` are used to manipulate the resulting
-labels. ``label`` specifies whether the result is labeled with the beginning or
-the end of the interval. ``loffset`` performs a time adjustment on the output
-labels.
+Parameters like ``label`` are used to manipulate the resulting labels.
+``label`` specifies whether the result is labeled with the beginning or
+the end of the interval.
 
 .. ipython:: python
 
    ts.resample('5Min').mean()  # by default label='left'
 
    ts.resample('5Min', label='left').mean()
 
-   ts.resample('5Min', label='left', loffset='1s').mean()
-
 .. warning::
 
     The default values for ``label`` and ``closed`` is '**left**' for all
@@ -1789,6 +1786,58 @@ natural and functions similarly to :py:func:`itertools.groupby`:
 
 See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more.
 
+.. _timeseries.adjust-the-start-of-the-bins:
+
+Use `origin` or `offset` to adjust the start of the bins
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1.0
+
+The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like `30D`) or that divide a day evenly (like `90s` or `1min`). This can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can specify a fixed Timestamp with the argument ``origin``.
+
+For example:
+
+.. ipython:: python
+
+    start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'
+    middle = '2000-10-02 00:00:00'
+    rng = pd.date_range(start, end, freq='7min')
+    ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
+    ts
+
+Here we can see that, when using ``origin`` with its default value (``'start_day'``), the result after ``'2000-10-02 00:00:00'`` are not identical depending on the start of time series:
+
+.. ipython:: python
+
+    ts.resample('17min', origin='start_day').sum()
+    ts[middle:end].resample('17min', origin='start_day').sum()
+
+
+Here we can see that, when setting ``origin`` to ``'epoch'``, the result after ``'2000-10-02 00:00:00'`` are identical depending on the start of time series:
+
+.. ipython:: python
+
+   ts.resample('17min', origin='epoch').sum()
+   ts[middle:end].resample('17min', origin='epoch').sum()
+
+
+If needed you can use a custom timestamp for ``origin``:
+
+.. ipython:: python
+
+   ts.resample('17min', origin='2001-01-01').sum()
+   ts[middle:end].resample('17min', origin=pd.Timestamp('2001-01-01')).sum()
+
+If needed you can just adjust the bins with an ``offset`` Timedelta that would be added to the default ``origin``.
+Those two examples are equivalent for this time series:
+
+.. ipython:: python
+
+    ts.resample('17min', origin='start').sum()
+    ts.resample('17min', offset='23h30min').sum()
+
+
+Note the use of ``'start'`` for ``origin`` on the last example. In that case, ``origin`` will be set to the first value of the timeseries.
 
 .. _timeseries.periods:
 
@@ -2265,6 +2314,24 @@ you can use the ``tz_convert`` method.
     Instead, the datetime needs to be localized using the ``localize`` method
     on the ``pytz`` time zone object.
 
+.. warning::
+
+    If you are using dates beyond 2038-01-18, due to current deficiencies
+    in the underlying libraries caused by the year 2038 problem, daylight saving time (DST) adjustments
+    to timezone aware dates will not be applied. If and when the underlying libraries are fixed,
+    the DST transitions will be applied. It should be noted though, that time zone data for far future time zones
+    are likely to be inaccurate, as they are simple extrapolations of the current set of (regularly revised) rules.
+
+    For example, for two dates that are in British Summer Time (and so would normally be GMT+1), both the following asserts evaluate as true:
+
+    .. ipython:: python
+
+       d_2037 = '2037-03-31T010101'
+       d_2038 = '2038-03-31T010101'
+       DST = 'Europe/London'
+       assert pd.Timestamp(d_2037, tz=DST) != pd.Timestamp(d_2037, tz='GMT')
+       assert pd.Timestamp(d_2038, tz=DST) == pd.Timestamp(d_2038, tz='GMT')
+
 Under the hood, all timestamps are stored in UTC. Values from a time zone aware
 :class:`DatetimeIndex` or :class:`Timestamp` will have their fields (day, hour, minute, etc.)
 localized to the time zone. However, timestamps with the same UTC value are