Skip to content

Commit 920767a

Browse files
committed
Merge branch 'liquidio-improve-soft-command-response-handling'
Weilin Chang says: ==================== liquidio: improve soft command/response handling Change soft command handling to fix the possible race condition when the process handles a response of a soft command that was already freed by an application which got timeout for this request. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 0927f71 + 64fecd3 commit 920767a

File tree

14 files changed

+627
-802
lines changed

14 files changed

+627
-802
lines changed

drivers/net/ethernet/cavium/liquidio/lio_core.c

Lines changed: 54 additions & 178 deletions
Large diffs are not rendered by default.

drivers/net/ethernet/cavium/liquidio/lio_ethtool.c

Lines changed: 77 additions & 179 deletions
Large diffs are not rendered by default.

drivers/net/ethernet/cavium/liquidio/lio_main.c

Lines changed: 138 additions & 169 deletions
Large diffs are not rendered by default.

drivers/net/ethernet/cavium/liquidio/lio_vf_main.c

Lines changed: 83 additions & 111 deletions
Large diffs are not rendered by default.

drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c

Lines changed: 16 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -49,44 +49,25 @@ static const struct net_device_ops lio_vf_rep_ndev_ops = {
4949
.ndo_change_mtu = lio_vf_rep_change_mtu,
5050
};
5151

52-
static void
53-
lio_vf_rep_send_sc_complete(struct octeon_device *oct,
54-
u32 status, void *ptr)
55-
{
56-
struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
57-
struct lio_vf_rep_sc_ctx *ctx =
58-
(struct lio_vf_rep_sc_ctx *)sc->ctxptr;
59-
struct lio_vf_rep_resp *resp =
60-
(struct lio_vf_rep_resp *)sc->virtrptr;
61-
62-
if (status != OCTEON_REQUEST_TIMEOUT && READ_ONCE(resp->status))
63-
WRITE_ONCE(resp->status, 0);
64-
65-
complete(&ctx->complete);
66-
}
67-
6852
static int
6953
lio_vf_rep_send_soft_command(struct octeon_device *oct,
7054
void *req, int req_size,
7155
void *resp, int resp_size)
7256
{
7357
int tot_resp_size = sizeof(struct lio_vf_rep_resp) + resp_size;
74-
int ctx_size = sizeof(struct lio_vf_rep_sc_ctx);
7558
struct octeon_soft_command *sc = NULL;
7659
struct lio_vf_rep_resp *rep_resp;
77-
struct lio_vf_rep_sc_ctx *ctx;
7860
void *sc_req;
7961
int err;
8062

8163
sc = (struct octeon_soft_command *)
8264
octeon_alloc_soft_command(oct, req_size,
83-
tot_resp_size, ctx_size);
65+
tot_resp_size, 0);
8466
if (!sc)
8567
return -ENOMEM;
8668

87-
ctx = (struct lio_vf_rep_sc_ctx *)sc->ctxptr;
88-
memset(ctx, 0, ctx_size);
89-
init_completion(&ctx->complete);
69+
init_completion(&sc->complete);
70+
sc->sc_status = OCTEON_REQUEST_PENDING;
9071

9172
sc_req = (struct lio_vf_rep_req *)sc->virtdptr;
9273
memcpy(sc_req, req, req_size);
@@ -98,23 +79,24 @@ lio_vf_rep_send_soft_command(struct octeon_device *oct,
9879
sc->iq_no = 0;
9980
octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
10081
OPCODE_NIC_VF_REP_CMD, 0, 0, 0);
101-
sc->callback = lio_vf_rep_send_sc_complete;
102-
sc->callback_arg = sc;
103-
sc->wait_time = LIO_VF_REP_REQ_TMO_MS;
10482

10583
err = octeon_send_soft_command(oct, sc);
10684
if (err == IQ_SEND_FAILED)
10785
goto free_buff;
10886

109-
wait_for_completion_timeout(&ctx->complete,
110-
msecs_to_jiffies
111-
(2 * LIO_VF_REP_REQ_TMO_MS));
87+
err = wait_for_sc_completion_timeout(oct, sc, 0);
88+
if (err)
89+
return err;
90+
11291
err = READ_ONCE(rep_resp->status) ? -EBUSY : 0;
11392
if (err)
11493
dev_err(&oct->pci_dev->dev, "VF rep send config failed\n");
115-
116-
if (resp)
94+
else if (resp)
11795
memcpy(resp, (rep_resp + 1), resp_size);
96+
97+
WRITE_ONCE(sc->caller_is_done, true);
98+
return err;
99+
118100
free_buff:
119101
octeon_free_soft_command(oct, sc);
120102

@@ -404,7 +386,7 @@ lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev)
404386
}
405387

406388
sc = (struct octeon_soft_command *)
407-
octeon_alloc_soft_command(oct, 0, 0, 0);
389+
octeon_alloc_soft_command(oct, 0, 16, 0);
408390
if (!sc) {
409391
dev_err(&oct->pci_dev->dev, "VF rep: Soft command alloc failed\n");
410392
goto xmit_failed;
@@ -413,13 +395,15 @@ lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev)
413395
/* Multiple buffers are not used for vf_rep packets. */
414396
if (skb_shinfo(skb)->nr_frags != 0) {
415397
dev_err(&oct->pci_dev->dev, "VF rep: nr_frags != 0. Dropping packet\n");
398+
octeon_free_soft_command(oct, sc);
416399
goto xmit_failed;
417400
}
418401

419402
sc->dmadptr = dma_map_single(&oct->pci_dev->dev,
420403
skb->data, skb->len, DMA_TO_DEVICE);
421404
if (dma_mapping_error(&oct->pci_dev->dev, sc->dmadptr)) {
422405
dev_err(&oct->pci_dev->dev, "VF rep: DMA mapping failed\n");
406+
octeon_free_soft_command(oct, sc);
423407
goto xmit_failed;
424408
}
425409

@@ -440,6 +424,7 @@ lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev)
440424
if (status == IQ_SEND_FAILED) {
441425
dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr,
442426
sc->datasize, DMA_TO_DEVICE);
427+
octeon_free_soft_command(oct, sc);
443428
goto xmit_failed;
444429
}
445430

drivers/net/ethernet/cavium/liquidio/octeon_config.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,10 @@ struct octeon_config {
438438
#define MAX_BAR1_IOREMAP_SIZE (16 * OCTEON_BAR1_ENTRY_SIZE)
439439

440440
/* Response lists - 1 ordered, 1 unordered-blocking, 1 unordered-nonblocking
441+
* 1 process done list, 1 zombie lists(timeouted sc list)
441442
* NoResponse Lists are now maintained with each IQ. (Dec' 2007).
442443
*/
443-
#define MAX_RESPONSE_LISTS 4
444+
#define MAX_RESPONSE_LISTS 6
444445

445446
/* Opcode hash bits. The opcode is hashed on the lower 6-bits to lookup the
446447
* dispatch table.

drivers/net/ethernet/cavium/liquidio/octeon_iq.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,13 +292,19 @@ struct octeon_soft_command {
292292
u32 ctxsize;
293293

294294
/** Time out and callback */
295-
size_t wait_time;
296-
size_t timeout;
295+
size_t expiry_time;
297296
u32 iq_no;
298297
void (*callback)(struct octeon_device *, u32, void *);
299298
void *callback_arg;
299+
300+
int caller_is_done;
301+
u32 sc_status;
302+
struct completion complete;
300303
};
301304

305+
/* max timeout (in milli sec) for soft request */
306+
#define LIO_SC_MAX_TMO_MS 60000
307+
302308
/** Maximum number of buffers to allocate into soft command buffer pool
303309
*/
304310
#define MAX_SOFT_COMMAND_BUFFERS 256
@@ -319,6 +325,8 @@ struct octeon_sc_buffer_pool {
319325
(((octeon_dev_ptr)->instr_queue[iq_no]->stats.field) += count)
320326

321327
int octeon_setup_sc_buffer_pool(struct octeon_device *oct);
328+
int octeon_free_sc_done_list(struct octeon_device *oct);
329+
int octeon_free_sc_zombie_list(struct octeon_device *oct);
322330
int octeon_free_sc_buffer_pool(struct octeon_device *oct);
323331
struct octeon_soft_command *
324332
octeon_alloc_soft_command(struct octeon_device *oct,

drivers/net/ethernet/cavium/liquidio/octeon_main.h

Lines changed: 59 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -146,46 +146,70 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
146146
return 1;
147147
}
148148

149+
/* input parameter:
150+
* sc: pointer to a soft request
151+
* timeout: milli sec which an application wants to wait for the
152+
response of the request.
153+
* 0: the request will wait until its response gets back
154+
* from the firmware within LIO_SC_MAX_TMO_MS milli sec.
155+
* It the response does not return within
156+
* LIO_SC_MAX_TMO_MS milli sec, lio_process_ordered_list()
157+
* will move the request to zombie response list.
158+
*
159+
* return value:
160+
* 0: got the response from firmware for the sc request.
161+
* errno -EINTR: user abort the command.
162+
* errno -ETIME: user spefified timeout value has been expired.
163+
* errno -EBUSY: the response of the request does not return in
164+
* resonable time (LIO_SC_MAX_TMO_MS).
165+
* the sc wll be move to zombie response list by
166+
* lio_process_ordered_list()
167+
*
168+
* A request with non-zero return value, the sc->caller_is_done
169+
* will be marked 1.
170+
* When getting a request with zero return value, the requestor
171+
* should mark sc->caller_is_done with 1 after examing the
172+
* response of sc.
173+
* lio_process_ordered_list() will free the soft command on behalf
174+
* of the soft command requestor.
175+
* This is to fix the possible race condition of both timeout process
176+
* and lio_process_ordered_list()/callback function to free a
177+
* sc strucutre.
178+
*/
149179
static inline int
150-
sleep_cond(wait_queue_head_t *wait_queue, int *condition)
180+
wait_for_sc_completion_timeout(struct octeon_device *oct_dev,
181+
struct octeon_soft_command *sc,
182+
unsigned long timeout)
151183
{
152184
int errno = 0;
153-
wait_queue_entry_t we;
154-
155-
init_waitqueue_entry(&we, current);
156-
add_wait_queue(wait_queue, &we);
157-
while (!(READ_ONCE(*condition))) {
158-
set_current_state(TASK_INTERRUPTIBLE);
159-
if (signal_pending(current)) {
160-
errno = -EINTR;
161-
goto out;
162-
}
163-
schedule();
185+
long timeout_jiff;
186+
187+
if (timeout)
188+
timeout_jiff = msecs_to_jiffies(timeout);
189+
else
190+
timeout_jiff = MAX_SCHEDULE_TIMEOUT;
191+
192+
timeout_jiff =
193+
wait_for_completion_interruptible_timeout(&sc->complete,
194+
timeout_jiff);
195+
if (timeout_jiff == 0) {
196+
dev_err(&oct_dev->pci_dev->dev, "%s: sc is timeout\n",
197+
__func__);
198+
WRITE_ONCE(sc->caller_is_done, true);
199+
errno = -ETIME;
200+
} else if (timeout_jiff == -ERESTARTSYS) {
201+
dev_err(&oct_dev->pci_dev->dev, "%s: sc is interrupted\n",
202+
__func__);
203+
WRITE_ONCE(sc->caller_is_done, true);
204+
errno = -EINTR;
205+
} else if (sc->sc_status == OCTEON_REQUEST_TIMEOUT) {
206+
dev_err(&oct_dev->pci_dev->dev, "%s: sc has fatal timeout\n",
207+
__func__);
208+
WRITE_ONCE(sc->caller_is_done, true);
209+
errno = -EBUSY;
164210
}
165-
out:
166-
set_current_state(TASK_RUNNING);
167-
remove_wait_queue(wait_queue, &we);
168-
return errno;
169-
}
170211

171-
/* Gives up the CPU for a timeout period.
172-
* Check that the condition is not true before we go to sleep for a
173-
* timeout period.
174-
*/
175-
static inline void
176-
sleep_timeout_cond(wait_queue_head_t *wait_queue,
177-
int *condition,
178-
int timeout)
179-
{
180-
wait_queue_entry_t we;
181-
182-
init_waitqueue_entry(&we, current);
183-
add_wait_queue(wait_queue, &we);
184-
set_current_state(TASK_INTERRUPTIBLE);
185-
if (!(*condition))
186-
schedule_timeout(timeout);
187-
set_current_state(TASK_RUNNING);
188-
remove_wait_queue(wait_queue, &we);
212+
return errno;
189213
}
190214

191215
#ifndef ROUNDUP4

drivers/net/ethernet/cavium/liquidio/octeon_network.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,6 @@
3535
#define LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
3636
#define LIO_IFSTATE_RESETTING 0x10
3737

38-
struct liquidio_if_cfg_context {
39-
u32 octeon_id;
40-
wait_queue_head_t wc;
41-
int cond;
42-
};
43-
4438
struct liquidio_if_cfg_resp {
4539
u64 rh;
4640
struct liquidio_if_cfg_info cfg_info;
@@ -87,12 +81,6 @@ struct oct_nic_seapi_resp {
8781
u64 status;
8882
};
8983

90-
struct liquidio_nic_seapi_ctl_context {
91-
int octeon_id;
92-
u32 status;
93-
struct completion complete;
94-
};
95-
9684
/** LiquidIO per-interface network private data */
9785
struct lio {
9886
/** State of the interface. Rx/Tx happens only in the RUNNING state. */
@@ -234,10 +222,6 @@ int lio_wait_for_clean_oq(struct octeon_device *oct);
234222
*/
235223
void liquidio_set_ethtool_ops(struct net_device *netdev);
236224

237-
void lio_if_cfg_callback(struct octeon_device *oct,
238-
u32 status __attribute__((unused)),
239-
void *buf);
240-
241225
void lio_delete_glists(struct lio *lio);
242226

243227
int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_qs);

drivers/net/ethernet/cavium/liquidio/octeon_nic.c

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct,
7575
else
7676
sc->cmd.cmd2.rptr = sc->dmarptr;
7777

78-
sc->wait_time = 1000;
79-
sc->timeout = jiffies + sc->wait_time;
78+
sc->expiry_time = jiffies + msecs_to_jiffies(LIO_SC_MAX_TMO_MS);
8079

8180
return sc;
8281
}
@@ -92,29 +91,6 @@ int octnet_send_nic_data_pkt(struct octeon_device *oct,
9291
ndata->reqtype);
9392
}
9493

95-
static void octnet_link_ctrl_callback(struct octeon_device *oct,
96-
u32 status,
97-
void *sc_ptr)
98-
{
99-
struct octeon_soft_command *sc = (struct octeon_soft_command *)sc_ptr;
100-
struct octnic_ctrl_pkt *nctrl;
101-
102-
nctrl = (struct octnic_ctrl_pkt *)sc->ctxptr;
103-
104-
/* Call the callback function if status is zero (meaning OK) or status
105-
* contains a firmware status code bigger than zero (meaning the
106-
* firmware is reporting an error).
107-
* If no response was expected, status is OK if the command was posted
108-
* successfully.
109-
*/
110-
if ((!status || status > FIRMWARE_STATUS_CODE(0)) && nctrl->cb_fn) {
111-
nctrl->status = status;
112-
nctrl->cb_fn(nctrl);
113-
}
114-
115-
octeon_free_soft_command(oct, sc);
116-
}
117-
11894
static inline struct octeon_soft_command
11995
*octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct,
12096
struct octnic_ctrl_pkt *nctrl)
@@ -127,17 +103,14 @@ static inline struct octeon_soft_command
127103
uddsize = (u32)(nctrl->ncmd.s.more * 8);
128104

129105
datasize = OCTNET_CMD_SIZE + uddsize;
130-
rdatasize = (nctrl->wait_time) ? 16 : 0;
106+
rdatasize = 16;
131107

132108
sc = (struct octeon_soft_command *)
133-
octeon_alloc_soft_command(oct, datasize, rdatasize,
134-
sizeof(struct octnic_ctrl_pkt));
109+
octeon_alloc_soft_command(oct, datasize, rdatasize, 0);
135110

136111
if (!sc)
137112
return NULL;
138113

139-
memcpy(sc->ctxptr, nctrl, sizeof(struct octnic_ctrl_pkt));
140-
141114
data = (u8 *)sc->virtdptr;
142115

143116
memcpy(data, &nctrl->ncmd, OCTNET_CMD_SIZE);
@@ -154,9 +127,8 @@ static inline struct octeon_soft_command
154127
octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_CMD,
155128
0, 0, 0);
156129

157-
sc->callback = octnet_link_ctrl_callback;
158-
sc->callback_arg = sc;
159-
sc->wait_time = nctrl->wait_time;
130+
init_completion(&sc->complete);
131+
sc->sc_status = OCTEON_REQUEST_PENDING;
160132

161133
return sc;
162134
}
@@ -199,5 +171,26 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
199171
}
200172

201173
spin_unlock_bh(&oct->cmd_resp_wqlock);
174+
175+
switch (nctrl->ncmd.s.cmd) {
176+
/* caller holds lock, can not sleep */
177+
case OCTNET_CMD_CHANGE_DEVFLAGS:
178+
case OCTNET_CMD_SET_MULTI_LIST:
179+
case OCTNET_CMD_SET_UC_LIST:
180+
WRITE_ONCE(sc->caller_is_done, true);
181+
return retval;
182+
}
183+
184+
retval = wait_for_sc_completion_timeout(oct, sc, 0);
185+
if (retval)
186+
return (retval);
187+
188+
nctrl->sc_status = sc->sc_status;
189+
retval = nctrl->sc_status;
190+
if (nctrl->cb_fn)
191+
nctrl->cb_fn(nctrl);
192+
193+
WRITE_ONCE(sc->caller_is_done, true);
194+
202195
return retval;
203196
}

0 commit comments

Comments
 (0)