Skip to content

Commit 058102a

Browse files
wkzkuba-moo
authored andcommitted
net: dsa: Link aggregation support
Monitor the following events and notify the driver when: - A DSA port joins/leaves a LAG. - A LAG, made up of DSA ports, joins/leaves a bridge. - A DSA port in a LAG is enabled/disabled (enabled meaning "distributing" in 802.3ad LACP terms). When a LAG joins a bridge, the DSA subsystem will treat that as each individual port joining the bridge. The driver may look at the port's LAG device pointer to see if it is associated with any LAG, if that is required. This is analogue to how switchdev events are replicated out to all lower devices when reaching e.g. a LAG. Drivers can optionally request that DSA maintain a linear mapping from a LAG ID to the corresponding netdev by setting ds->num_lag_ids to the desired size. In the event that the hardware is not capable of offloading a particular LAG for any reason (the typical case being use of exotic modes like broadcast), DSA will take a hands-off approach, allowing the LAG to be formed as a pure software construct. This is reported back through the extended ACK, but is otherwise transparent to the user. Signed-off-by: Tobias Waldekranz <[email protected]> Reviewed-by: Vladimir Oltean <[email protected]> Tested-by: Vladimir Oltean <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 5696c8a commit 058102a

File tree

6 files changed

+381
-7
lines changed

6 files changed

+381
-7
lines changed

include/net/dsa.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,41 @@ struct dsa_switch_tree {
149149

150150
/* List of DSA links composing the routing table */
151151
struct list_head rtable;
152+
153+
/* Maps offloaded LAG netdevs to a zero-based linear ID for
154+
* drivers that need it.
155+
*/
156+
struct net_device **lags;
157+
unsigned int lags_len;
152158
};
153159

160+
#define dsa_lags_foreach_id(_id, _dst) \
161+
for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \
162+
if ((_dst)->lags[(_id)])
163+
164+
#define dsa_lag_foreach_port(_dp, _dst, _lag) \
165+
list_for_each_entry((_dp), &(_dst)->ports, list) \
166+
if ((_dp)->lag_dev == (_lag))
167+
168+
static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst,
169+
unsigned int id)
170+
{
171+
return dst->lags[id];
172+
}
173+
174+
static inline int dsa_lag_id(struct dsa_switch_tree *dst,
175+
struct net_device *lag)
176+
{
177+
unsigned int id;
178+
179+
dsa_lags_foreach_id(id, dst) {
180+
if (dsa_lag_dev(dst, id) == lag)
181+
return id;
182+
}
183+
184+
return -ENODEV;
185+
}
186+
154187
/* TC matchall action types */
155188
enum dsa_port_mall_action_type {
156189
DSA_PORT_MALL_MIRROR,
@@ -220,6 +253,8 @@ struct dsa_port {
220253
bool devlink_port_setup;
221254
struct phylink *pl;
222255
struct phylink_config pl_config;
256+
struct net_device *lag_dev;
257+
bool lag_tx_enabled;
223258

224259
struct list_head list;
225260

@@ -340,6 +375,14 @@ struct dsa_switch {
340375
*/
341376
bool mtu_enforcement_ingress;
342377

378+
/* Drivers that benefit from having an ID associated with each
379+
* offloaded LAG should set this to the maximum number of
380+
* supported IDs. DSA will then maintain a mapping of _at
381+
* least_ these many IDs, accessible to drivers via
382+
* dsa_lag_id().
383+
*/
384+
unsigned int num_lag_ids;
385+
343386
size_t num_ports;
344387
};
345388

@@ -626,6 +669,13 @@ struct dsa_switch_ops {
626669
void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index,
627670
int sw_index, int port,
628671
struct net_device *br);
672+
int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index,
673+
int port);
674+
int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index,
675+
int port, struct net_device *lag,
676+
struct netdev_lag_upper_info *info);
677+
int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index,
678+
int port, struct net_device *lag);
629679

630680
/*
631681
* PTP functionality
@@ -657,6 +707,16 @@ struct dsa_switch_ops {
657707
int (*port_change_mtu)(struct dsa_switch *ds, int port,
658708
int new_mtu);
659709
int (*port_max_mtu)(struct dsa_switch *ds, int port);
710+
711+
/*
712+
* LAG integration
713+
*/
714+
int (*port_lag_change)(struct dsa_switch *ds, int port);
715+
int (*port_lag_join)(struct dsa_switch *ds, int port,
716+
struct net_device *lag,
717+
struct netdev_lag_upper_info *info);
718+
int (*port_lag_leave)(struct dsa_switch *ds, int port,
719+
struct net_device *lag);
660720
};
661721

662722
#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \

net/dsa/dsa2.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,65 @@
2121
static DEFINE_MUTEX(dsa2_mutex);
2222
LIST_HEAD(dsa_tree_list);
2323

24+
/**
25+
* dsa_lag_map() - Map LAG netdev to a linear LAG ID
26+
* @dst: Tree in which to record the mapping.
27+
* @lag: Netdev that is to be mapped to an ID.
28+
*
29+
* dsa_lag_id/dsa_lag_dev can then be used to translate between the
30+
* two spaces. The size of the mapping space is determined by the
31+
* driver by setting ds->num_lag_ids. It is perfectly legal to leave
32+
* it unset if it is not needed, in which case these functions become
33+
* no-ops.
34+
*/
35+
void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
36+
{
37+
unsigned int id;
38+
39+
if (dsa_lag_id(dst, lag) >= 0)
40+
/* Already mapped */
41+
return;
42+
43+
for (id = 0; id < dst->lags_len; id++) {
44+
if (!dsa_lag_dev(dst, id)) {
45+
dst->lags[id] = lag;
46+
return;
47+
}
48+
}
49+
50+
/* No IDs left, which is OK. Some drivers do not need it. The
51+
* ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
52+
* returns an error for this device when joining the LAG. The
53+
* driver can then return -EOPNOTSUPP back to DSA, which will
54+
* fall back to a software LAG.
55+
*/
56+
}
57+
58+
/**
59+
* dsa_lag_unmap() - Remove a LAG ID mapping
60+
* @dst: Tree in which the mapping is recorded.
61+
* @lag: Netdev that was mapped.
62+
*
63+
* As there may be multiple users of the mapping, it is only removed
64+
* if there are no other references to it.
65+
*/
66+
void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
67+
{
68+
struct dsa_port *dp;
69+
unsigned int id;
70+
71+
dsa_lag_foreach_port(dp, dst, lag)
72+
/* There are remaining users of this mapping */
73+
return;
74+
75+
dsa_lags_foreach_id(id, dst) {
76+
if (dsa_lag_dev(dst, id) == lag) {
77+
dst->lags[id] = NULL;
78+
break;
79+
}
80+
}
81+
}
82+
2483
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
2584
{
2685
struct dsa_switch_tree *dst;
@@ -578,6 +637,32 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
578637
dsa_master_teardown(dp->master);
579638
}
580639

640+
static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
641+
{
642+
unsigned int len = 0;
643+
struct dsa_port *dp;
644+
645+
list_for_each_entry(dp, &dst->ports, list) {
646+
if (dp->ds->num_lag_ids > len)
647+
len = dp->ds->num_lag_ids;
648+
}
649+
650+
if (!len)
651+
return 0;
652+
653+
dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
654+
if (!dst->lags)
655+
return -ENOMEM;
656+
657+
dst->lags_len = len;
658+
return 0;
659+
}
660+
661+
static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
662+
{
663+
kfree(dst->lags);
664+
}
665+
581666
static int dsa_tree_setup(struct dsa_switch_tree *dst)
582667
{
583668
bool complete;
@@ -605,12 +690,18 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
605690
if (err)
606691
goto teardown_switches;
607692

693+
err = dsa_tree_setup_lags(dst);
694+
if (err)
695+
goto teardown_master;
696+
608697
dst->setup = true;
609698

610699
pr_info("DSA: tree %d setup\n", dst->index);
611700

612701
return 0;
613702

703+
teardown_master:
704+
dsa_tree_teardown_master(dst);
614705
teardown_switches:
615706
dsa_tree_teardown_switches(dst);
616707
teardown_default_cpu:
@@ -626,6 +717,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
626717
if (!dst->setup)
627718
return;
628719

720+
dsa_tree_teardown_lags(dst);
721+
629722
dsa_tree_teardown_master(dst);
630723

631724
dsa_tree_teardown_switches(dst);

net/dsa/dsa_priv.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ enum {
2020
DSA_NOTIFIER_BRIDGE_LEAVE,
2121
DSA_NOTIFIER_FDB_ADD,
2222
DSA_NOTIFIER_FDB_DEL,
23+
DSA_NOTIFIER_LAG_CHANGE,
24+
DSA_NOTIFIER_LAG_JOIN,
25+
DSA_NOTIFIER_LAG_LEAVE,
2326
DSA_NOTIFIER_MDB_ADD,
2427
DSA_NOTIFIER_MDB_DEL,
2528
DSA_NOTIFIER_VLAN_ADD,
@@ -55,6 +58,15 @@ struct dsa_notifier_mdb_info {
5558
int port;
5659
};
5760

61+
/* DSA_NOTIFIER_LAG_* */
62+
struct dsa_notifier_lag_info {
63+
struct net_device *lag;
64+
int sw_index;
65+
int port;
66+
67+
struct netdev_lag_upper_info *info;
68+
};
69+
5870
/* DSA_NOTIFIER_VLAN_* */
5971
struct dsa_notifier_vlan_info {
6072
const struct switchdev_obj_port_vlan *vlan;
@@ -134,6 +146,11 @@ void dsa_port_disable_rt(struct dsa_port *dp);
134146
void dsa_port_disable(struct dsa_port *dp);
135147
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
136148
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
149+
int dsa_port_lag_change(struct dsa_port *dp,
150+
struct netdev_lag_lower_state_info *linfo);
151+
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
152+
struct netdev_lag_upper_info *uinfo);
153+
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
137154
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering);
138155
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
139156
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
@@ -159,6 +176,22 @@ int dsa_port_link_register_of(struct dsa_port *dp);
159176
void dsa_port_link_unregister_of(struct dsa_port *dp);
160177
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
161178

179+
static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
180+
struct net_device *dev)
181+
{
182+
/* Switchdev offloading can be configured on: */
183+
184+
if (dev == dp->slave)
185+
/* DSA ports directly connected to a bridge. */
186+
return true;
187+
188+
if (dp->lag_dev == dev)
189+
/* DSA ports connected to a bridge via a LAG */
190+
return true;
191+
192+
return false;
193+
}
194+
162195
/* slave.c */
163196
extern const struct dsa_device_ops notag_netdev_ops;
164197
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -248,6 +281,9 @@ int dsa_switch_register_notifier(struct dsa_switch *ds);
248281
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
249282

250283
/* dsa2.c */
284+
void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag);
285+
void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag);
286+
251287
extern struct list_head dsa_tree_list;
252288

253289
#endif

net/dsa/port.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,85 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
191191
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
192192
}
193193

194+
int dsa_port_lag_change(struct dsa_port *dp,
195+
struct netdev_lag_lower_state_info *linfo)
196+
{
197+
struct dsa_notifier_lag_info info = {
198+
.sw_index = dp->ds->index,
199+
.port = dp->index,
200+
};
201+
bool tx_enabled;
202+
203+
if (!dp->lag_dev)
204+
return 0;
205+
206+
/* On statically configured aggregates (e.g. loadbalance
207+
* without LACP) ports will always be tx_enabled, even if the
208+
* link is down. Thus we require both link_up and tx_enabled
209+
* in order to include it in the tx set.
210+
*/
211+
tx_enabled = linfo->link_up && linfo->tx_enabled;
212+
213+
if (tx_enabled == dp->lag_tx_enabled)
214+
return 0;
215+
216+
dp->lag_tx_enabled = tx_enabled;
217+
218+
return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info);
219+
}
220+
221+
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
222+
struct netdev_lag_upper_info *uinfo)
223+
{
224+
struct dsa_notifier_lag_info info = {
225+
.sw_index = dp->ds->index,
226+
.port = dp->index,
227+
.lag = lag,
228+
.info = uinfo,
229+
};
230+
int err;
231+
232+
dsa_lag_map(dp->ds->dst, lag);
233+
dp->lag_dev = lag;
234+
235+
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
236+
if (err) {
237+
dp->lag_dev = NULL;
238+
dsa_lag_unmap(dp->ds->dst, lag);
239+
}
240+
241+
return err;
242+
}
243+
244+
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
245+
{
246+
struct dsa_notifier_lag_info info = {
247+
.sw_index = dp->ds->index,
248+
.port = dp->index,
249+
.lag = lag,
250+
};
251+
int err;
252+
253+
if (!dp->lag_dev)
254+
return;
255+
256+
/* Port might have been part of a LAG that in turn was
257+
* attached to a bridge.
258+
*/
259+
if (dp->bridge_dev)
260+
dsa_port_bridge_leave(dp, dp->bridge_dev);
261+
262+
dp->lag_tx_enabled = false;
263+
dp->lag_dev = NULL;
264+
265+
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
266+
if (err)
267+
pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n",
268+
err);
269+
270+
dsa_lag_unmap(dp->ds->dst, lag);
271+
}
272+
194273
/* Must be called under rcu_read_lock() */
195274
static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
196275
bool vlan_filtering)

0 commit comments

Comments
 (0)