Skip to content

Commit ddc42b2

Browse files
committed
* fix #225 : added axis argument to LArray.sort_values.
* fix #478 : implemented LArray.sort_values() (i.e. no argument passed).
1 parent 6bdd8be commit ddc42b2

File tree

3 files changed

+153
-49
lines changed

3 files changed

+153
-49
lines changed

doc/source/changes/version_0_27.rst.inc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,42 @@ New features
5757
Miscellaneous improvements
5858
--------------------------
5959

60+
* allowed to sort values of an array along an axis (closes :issue:`225`):
61+
>>> a = LArray([[10, 2, 4], [3, 7, 1]], "sex=M,F; nat=EU,FO,BE")
62+
>>> a
63+
sex\nat EU FO BE
64+
M 10 2 4
65+
F 3 7 1
66+
>>> a.sort_values(axis='sex')
67+
sex*\nat EU FO BE
68+
0 3 2 1
69+
1 10 7 4
70+
>>> a.sort_values(axis='nat')
71+
sex\nat* 0 1 2
72+
M 2 4 10
73+
F 1 3 7
74+
75+
* method `LArray.sort_values` can be called without argument (closes :issue:`478`):
76+
77+
>>> arr = LArray([0, 1, 6, 3, -1], "a=a0..a4")
78+
>>> arr
79+
a a0 a1 a2 a3 a4
80+
0 1 6 3 -1
81+
>>> arr.sort_values()
82+
a a4 a0 a1 a3 a2
83+
-1 0 1 3 6
84+
85+
If the array has more than one dimension, axes are combined together:
86+
87+
>>> a = LArray([[10, 2, 4], [3, 7, 1]], "sex=M,F; nat=EU,FO,BE")
88+
>>> a
89+
sex\nat EU FO BE
90+
M 10 2 4
91+
F 3 7 1
92+
>>> a.sort_values()
93+
sex_nat F_BE M_FO F_EU M_BE F_FO M_EU
94+
1 2 3 4 7 10
95+
6096
* made the editor more responsive when switching to or changing the filter of large arrays (closes :issue:`93`).
6197

6298
* added support for coloring numeric values for object arrays (e.g. arrays containing both strings and numbers).

larray/core/array.py

Lines changed: 102 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,13 +1586,19 @@ def align(self, other, join='outer', fill_value=nan, axes=None):
15861586
left_axes, right_axes = self.axes.align(other.axes, join=join, axes=axes)
15871587
return self.reindex(left_axes, fill_value=fill_value), other.reindex(right_axes, fill_value=fill_value)
15881588

1589-
def sort_values(self, key, reverse=False):
1589+
def sort_values(self, key=None, axis=None, reverse=False):
15901590
"""Sorts values of the array.
15911591
15921592
Parameters
15931593
----------
15941594
key : scalar or tuple or Group
15951595
Key along which to sort. Must have exactly one dimension less than ndim.
1596+
Cannot be used in combination with `axis` argument.
1597+
If both `key` and `axis` are None, sort array with all axes combined.
1598+
Defaults to None.
1599+
axis : int or str or Axis
1600+
Axis along which to sort. Cannot be used in combination with `key` argument.
1601+
Defaults to None.
15961602
reverse : bool, optional
15971603
Sort values in descending order. Defaults to False (ascending order).
15981604
@@ -1603,55 +1609,102 @@ def sort_values(self, key, reverse=False):
16031609
16041610
Examples
16051611
--------
1606-
>>> sex = Axis('sex=M,F')
1607-
>>> nat = Axis('nat=EU,FO,BE')
1608-
>>> xtype = Axis('type=type1,type2')
1609-
>>> a = LArray([[10, 2, 4], [3, 7, 1]], [sex, nat])
1610-
>>> a
1611-
sex\\nat EU FO BE
1612-
M 10 2 4
1613-
F 3 7 1
1614-
>>> a.sort_values('F')
1615-
sex\\nat BE EU FO
1616-
M 4 10 2
1617-
F 1 3 7
1618-
>>> a.sort_values('F', reverse=True)
1619-
sex\\nat FO EU BE
1620-
M 2 10 4
1621-
F 7 3 1
1622-
>>> b = LArray([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]],
1623-
... [sex, xtype, nat])
1624-
>>> b
1625-
sex type\\nat EU FO BE
1626-
M type1 10 2 4
1627-
M type2 3 7 1
1628-
F type1 5 1 6
1629-
F type2 2 8 9
1630-
>>> b.sort_values(('M', 'type2'))
1631-
sex type\\nat BE EU FO
1632-
M type1 4 10 2
1633-
M type2 1 3 7
1634-
F type1 6 5 1
1635-
F type2 9 2 8
1612+
sort the whole array (no key or axis given)
1613+
1614+
>>> arr_1D = LArray([10, 2, 4], 'a=a0..a2')
1615+
>>> arr_1D
1616+
a a0 a1 a2
1617+
10 2 4
1618+
>>> arr_1D.sort_values()
1619+
a a1 a2 a0
1620+
2 4 10
1621+
>>> arr_2D = LArray([[10, 2, 4], [3, 7, 1]], 'a=a0,a1; b=b0..b2')
1622+
>>> arr_2D
1623+
a\\b b0 b1 b2
1624+
a0 10 2 4
1625+
a1 3 7 1
1626+
>>> # if the array has more than one dimension, sort array with all axes combined
1627+
>>> arr_2D.sort_values()
1628+
a_b a1_b2 a0_b1 a1_b0 a0_b2 a1_b1 a0_b0
1629+
1 2 3 4 7 10
1630+
1631+
Sort along a given key
1632+
1633+
>>> # sort columns according to the values of the row associated with the label 'a1'
1634+
>>> arr_2D.sort_values('a1')
1635+
a\\b b2 b0 b1
1636+
a0 4 10 2
1637+
a1 1 3 7
1638+
>>> arr_2D.sort_values('a1', reverse=True)
1639+
a\\b b1 b0 b2
1640+
a0 2 10 4
1641+
a1 7 3 1
1642+
>>> arr_3D = LArray([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]],
1643+
... 'a=a0,a1; b=b0,b1; c=c0..c2')
1644+
>>> arr_3D
1645+
a b\\c c0 c1 c2
1646+
a0 b0 10 2 4
1647+
a0 b1 3 7 1
1648+
a1 b0 5 1 6
1649+
a1 b1 2 8 9
1650+
>>> # sort columns according to the values of the row associated with the labels 'a0' and 'b1'
1651+
>>> arr_3D.sort_values(('a0', 'b1'))
1652+
a b\\c c2 c0 c1
1653+
a0 b0 4 10 2
1654+
a0 b1 1 3 7
1655+
a1 b0 6 5 1
1656+
a1 b1 9 2 8
1657+
1658+
Sort along an axis
1659+
1660+
>>> arr_2D
1661+
a\\b b0 b1 b2
1662+
a0 10 2 4
1663+
a1 3 7 1
1664+
>>> # sort values along axis 'a'
1665+
>>> # equivalent to sorting the values of each column of the array
1666+
>>> arr_2D.sort_values(axis='a')
1667+
a*\\b b0 b1 b2
1668+
0 3 2 1
1669+
1 10 7 4
1670+
>>> # sort values along axis 'b'
1671+
>>> # equivalent to sorting the values of each row of the array
1672+
>>> arr_2D.sort_values(axis='b')
1673+
a\\b* 0 1 2
1674+
a0 2 4 10
1675+
a1 1 3 7
16361676
"""
1637-
subset = self[key]
1638-
if subset.ndim > 1:
1639-
raise NotImplementedError("sort_values key must have one dimension less than array.ndim")
1640-
assert subset.ndim == 1
1641-
axis = subset.axes[0]
1642-
indicesofsorted = subset.indicesofsorted()
1643-
1644-
# FIXME: .data shouldn't be necessary, but currently, if we do not do it, we get
1645-
# IGroup(nat EU FO BE
1646-
# 1 2 0, axis='nat')
1647-
# which sorts the *data* correctly, but the labels on the nat axis are not sorted (because the __getitem__ in
1648-
# that case reuse the key axis as-is -- like it should).
1649-
# Both use cases have value, but I think reordering the ticks should be the default. Now, I am unsure where to
1650-
# change this. Probably in IGroupMaker.__getitem__, but then how do I get the "not reordering labels" behavior
1651-
# that I have now?
1652-
# FWIW, using .data, I get IGroup([1, 2, 0], axis='nat'), which works.
1653-
sorter = axis.i[indicesofsorted.data]
1654-
res = self[sorter]
1677+
if key is not None and axis is not None:
1678+
raise ValueError("Arguments key and axis are exclusive and cannot be used in combination")
1679+
if axis is not None:
1680+
axis = self.axes[axis]
1681+
axis_idx = self.axes.index(axis)
1682+
data = np.sort(self.data, axis_idx)
1683+
new_axes = self.axes.replace(axis_idx, Axis(len(axis), axis.name))
1684+
res = LArray(data, new_axes)
1685+
elif key is not None:
1686+
subset = self[key]
1687+
if subset.ndim > 1:
1688+
raise NotImplementedError("sort_values key must have one dimension less than array.ndim")
1689+
assert subset.ndim == 1
1690+
axis = subset.axes[0]
1691+
indicesofsorted = subset.indicesofsorted()
1692+
1693+
# FIXME: .data shouldn't be necessary, but currently, if we do not do it, we get
1694+
# IGroup(nat EU FO BE
1695+
# 1 2 0, axis='nat')
1696+
# which sorts the *data* correctly, but the labels on the nat axis are not sorted (because the __getitem__ in
1697+
# that case reuse the key axis as-is -- like it should).
1698+
# Both use cases have value, but I think reordering the ticks should be the default. Now, I am unsure where to
1699+
# change this. Probably in IGroupMaker.__getitem__, but then how do I get the "not reordering labels" behavior
1700+
# that I have now?
1701+
# FWIW, using .data, I get IGroup([1, 2, 0], axis='nat'), which works.
1702+
sorter = axis.i[indicesofsorted.data]
1703+
res = self[sorter]
1704+
else:
1705+
res = self.combine_axes()
1706+
indicesofsorted = np.argsort(res.data)
1707+
res = res.i[indicesofsorted]
16551708
return res[axis[::-1]] if reverse else res
16561709

16571710
def sort_axes(self, axes=None, reverse=False):

larray/tests/test_array.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2285,6 +2285,21 @@ def test_sequence(self):
22852285
res = sequence('b=b0..b2', ndtest(3) * 3, 1.0)
22862286
assert_array_equal(ndtest((3, 3), dtype=float), res)
22872287

2288+
def test_sort_values(self):
2289+
# 1D arrays
2290+
arr = LArray([0, 1, 6, 3, -1], "a=a0..a4")
2291+
res = arr.sort_values()
2292+
expected = LArray([-1, 0, 1, 3, 6], "a=a4,a0,a1,a3,a2")
2293+
assert_array_equal(res, expected)
2294+
2295+
# 3D arrays
2296+
arr = LArray([[[10, 2, 4], [3, 7, 1]], [[5, 1, 6], [2, 8, 9]]],
2297+
'a=a0,a1; b=b0,b1; c=c0..c2')
2298+
res = arr.sort_values(axis='c')
2299+
expected = LArray([[[2, 4, 10], [1, 3, 7]], [[1, 5, 6], [2, 8, 9]]],
2300+
[Axis('a=a0,a1'), Axis('b=b0,b1'), Axis(3, 'c')])
2301+
assert_array_equal(res, expected)
2302+
22882303
def test_set_labels(self):
22892304
la = self.small.copy()
22902305
la.set_labels(X.sex, ['Man', 'Woman'], inplace=True)

0 commit comments

Comments
 (0)