Skip to content
4 changes: 1 addition & 3 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -449,10 +449,7 @@ Other Enhancements
- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here <io.feather>`.
- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`)
- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback let me know if you want me to revert the deletion of these blank lines.


- ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`)

- ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`).
- ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`).
- ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`).
Expand Down Expand Up @@ -1302,6 +1299,7 @@ Other API Changes
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv()`` and will be removed in the future (:issue:`12665`)
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
- ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`)
- ``Series.map()`` now respects default values of dictionary subclasses with a ``__missing__`` method, such as ``collections.Counter`` (:issue:`15999`)
- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`)
- ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`)
- ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`)
Expand Down
41 changes: 33 additions & 8 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2079,8 +2079,8 @@ def map(self, arg, na_action=None):
two bar
three baz

Mapping a dictionary keys on the index labels works similar as
with a `Series`:
If `arg` is a dictionary, return a new Series with values converted
according to the dictionary's mapping:

>>> z = {1: 'A', 2: 'B', 3: 'C'}

Expand All @@ -2094,16 +2094,14 @@ def map(self, arg, na_action=None):

>>> s = pd.Series([1, 2, 3, np.nan])

>>> s2 = s.map(lambda x: 'this is a string {}'.format(x),
na_action=None)
>>> s2 = s.map('this is a string {}'.format, na_action=None)
0 this is a string 1.0
1 this is a string 2.0
2 this is a string 3.0
3 this is a string nan
dtype: object

>>> s3 = s.map(lambda x: 'this is a string {}'.format(x),
na_action='ignore')
>>> s3 = s.map('this is a string {}'.format, na_action='ignore')
0 this is a string 1.0
1 this is a string 2.0
2 this is a string 3.0
Expand All @@ -2115,6 +2113,23 @@ def map(self, arg, na_action=None):
Series.apply: For applying more complex functions on a Series
DataFrame.apply: Apply a function row-/column-wise
DataFrame.applymap: Apply a function elementwise on a whole DataFrame

Notes
-----
When `arg` is a dictionary, values in Series that are not in the
dictionary (as keys) are converted to ``NaN``. However, if the
dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
provides a method for default values), then this default is used
rather than ``NaN``:

>>> from collections import Counter
>>> counter = Counter()
>>> counter['bar'] += 1
>>> y.map(counter)
1 0
2 1
3 0
dtype: int64
"""

if is_extension_type(self.dtype):
Expand All @@ -2132,13 +2147,23 @@ def map_f(values, f):
else:
map_f = lib.map_infer

if isinstance(arg, (dict, Series)):
if isinstance(arg, dict):
if isinstance(arg, dict):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a 1-line comment here on what you are doing

if hasattr(arg, '__missing__'):
# If a dictionary subclass defines a default value method,
# convert arg to a lookup function (GH #15999).
dict_with_default = arg
arg = lambda x: dict_with_default[x]
else:
# Dictionary does not have a default. Thus it's safe to
# convert to an indexed series for efficiency.
arg = self._constructor(arg, index=arg.keys())

if isinstance(arg, Series):
# arg is a Series
indexer = arg.index.get_indexer(values)
new_values = algorithms.take_1d(arg._values, indexer)
else:
# arg is a function
new_values = map_f(values, arg)

return self._constructor(new_values,
Expand Down
42 changes: 41 additions & 1 deletion pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612

from collections import OrderedDict
from collections import Counter, defaultdict, OrderedDict
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -411,6 +411,46 @@ def test_map_dict_with_tuple_keys(self):
tm.assert_series_equal(df['labels'], df['expected_labels'],
check_names=False)

def test_map_counter(self):
s = Series(['a', 'b', 'c'], index=[1, 2, 3])
counter = Counter()
counter['b'] = 5
counter['c'] += 1
result = s.map(counter)
expected = Series([0, 5, 1], index=[1, 2, 3])
assert_series_equal(result, expected)

def test_map_defaultdict(self):
s = Series([1, 2, 3], index=['a', 'b', 'c'])
default_dict = defaultdict(lambda: 'blank')
default_dict[1] = 'stuff'
result = s.map(default_dict)
expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c'])
assert_series_equal(result, expected)

def test_map_dict_subclass_with_missing(self):
"""
Test Series.map with a dictionary subclass that defines __missing__,
i.e. sets a default value (GH #15999).
"""
class DictWithMissing(dict):
def __missing__(self, key):
return 'missing'
s = Series([1, 2, 3])
dictionary = DictWithMissing({3: 'three'})
result = s.map(dictionary)
expected = Series(['missing', 'missing', 'three'])
assert_series_equal(result, expected)

def test_map_dict_subclass_without_missing(self):
class DictWithoutMissing(dict):
pass
s = Series([1, 2, 3])
dictionary = DictWithoutMissing({3: 'three'})
result = s.map(dictionary)
expected = Series([np.nan, np.nan, 'three'])
assert_series_equal(result, expected)

def test_map_box(self):
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
s = pd.Series(vals)
Expand Down