Skip to content

Commit 128abd0

Browse files
Rajkumar Manoharankvalo
authored andcommitted
ath10k: reuse copy engine 5 (htt rx) descriptors
Whenever htt rx indication i.e target to host messages are received on rx copy engine (CE5), the message will be freed after processing the response. Then CE 5 will be refilled with new descriptors at post rx processing. This memory alloc and free operations can be avoided by reusing the same descriptors. During CE pipe allocation, full ring is not initialized i.e n-1 entries are filled up. So for CE 5 full ring should be filled up to reuse descriptors. Moreover CE 5 write index will be updated in single shot instead of incremental access. This could avoid multiple pci_write and ce_ring access. From experiments, It improves CPU usage by ~3% in IPQ4019 platform. Signed-off-by: Rajkumar Manoharan <[email protected]> Signed-off-by: Kalle Valo <[email protected]>
1 parent 24d9ef5 commit 128abd0

File tree

3 files changed

+84
-5
lines changed

3 files changed

+84
-5
lines changed

drivers/net/wireless/ath/ath10k/ce.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,8 @@ int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
411411

412412
lockdep_assert_held(&ar_pci->ce_lock);
413413

414-
if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
414+
if ((pipe->id != 5) &&
415+
CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
415416
return -ENOSPC;
416417

417418
desc->addr = __cpu_to_le32(paddr);
@@ -425,6 +426,19 @@ int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
425426
return 0;
426427
}
427428

429+
void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
430+
{
431+
struct ath10k *ar = pipe->ar;
432+
struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
433+
unsigned int nentries_mask = dest_ring->nentries_mask;
434+
unsigned int write_index = dest_ring->write_index;
435+
u32 ctrl_addr = pipe->ctrl_addr;
436+
437+
write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
438+
ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
439+
dest_ring->write_index = write_index;
440+
}
441+
428442
int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
429443
{
430444
struct ath10k *ar = pipe->ar;
@@ -478,8 +492,11 @@ int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
478492
*per_transfer_contextp =
479493
dest_ring->per_transfer_context[sw_index];
480494

481-
/* sanity */
482-
dest_ring->per_transfer_context[sw_index] = NULL;
495+
/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
496+
* So update transfer context all CEs except CE5.
497+
*/
498+
if (ce_state->id != 5)
499+
dest_ring->per_transfer_context[sw_index] = NULL;
483500

484501
/* Update sw_index */
485502
sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);

drivers/net/wireless/ath/ath10k/ce.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe);
166166
int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe);
167167
int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr);
168168
int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr);
169+
void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries);
169170

170171
/* recv flags */
171172
/* Data is byte-swapped */
@@ -410,6 +411,8 @@ static inline u32 ath10k_ce_base_address(struct ath10k *ar, unsigned int ce_id)
410411
(((int)(toidx)-(int)(fromidx)) & (nentries_mask))
411412

412413
#define CE_RING_IDX_INCR(nentries_mask, idx) (((idx) + 1) & (nentries_mask))
414+
#define CE_RING_IDX_ADD(nentries_mask, idx, num) \
415+
(((idx) + (num)) & (nentries_mask))
413416

414417
#define CE_WRAPPER_INTERRUPT_SUMMARY_HOST_MSI_LSB \
415418
ar->regs->ce_wrap_intr_sum_host_msi_lsb

drivers/net/wireless/ath/ath10k/pci.c

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,8 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
809809
spin_lock_bh(&ar_pci->ce_lock);
810810
num = __ath10k_ce_rx_num_free_bufs(ce_pipe);
811811
spin_unlock_bh(&ar_pci->ce_lock);
812-
while (num--) {
812+
813+
while (num >= 0) {
813814
ret = __ath10k_pci_rx_post_buf(pipe);
814815
if (ret) {
815816
if (ret == -ENOSPC)
@@ -819,6 +820,7 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
819820
ATH10K_PCI_RX_POST_RETRY_MS);
820821
break;
821822
}
823+
num--;
822824
}
823825
}
824826

@@ -1212,6 +1214,63 @@ static void ath10k_pci_process_rx_cb(struct ath10k_ce_pipe *ce_state,
12121214
ath10k_pci_rx_post_pipe(pipe_info);
12131215
}
12141216

1217+
static void ath10k_pci_process_htt_rx_cb(struct ath10k_ce_pipe *ce_state,
1218+
void (*callback)(struct ath10k *ar,
1219+
struct sk_buff *skb))
1220+
{
1221+
struct ath10k *ar = ce_state->ar;
1222+
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1223+
struct ath10k_pci_pipe *pipe_info = &ar_pci->pipe_info[ce_state->id];
1224+
struct ath10k_ce_pipe *ce_pipe = pipe_info->ce_hdl;
1225+
struct sk_buff *skb;
1226+
struct sk_buff_head list;
1227+
void *transfer_context;
1228+
unsigned int nbytes, max_nbytes, nentries;
1229+
int orig_len;
1230+
1231+
/* No need to aquire ce_lock for CE5, since this is the only place CE5
1232+
* is processed other than init and deinit. Before releasing CE5
1233+
* buffers, interrupts are disabled. Thus CE5 access is serialized.
1234+
*/
1235+
__skb_queue_head_init(&list);
1236+
while (ath10k_ce_completed_recv_next_nolock(ce_state, &transfer_context,
1237+
&nbytes) == 0) {
1238+
skb = transfer_context;
1239+
max_nbytes = skb->len + skb_tailroom(skb);
1240+
1241+
if (unlikely(max_nbytes < nbytes)) {
1242+
ath10k_warn(ar, "rxed more than expected (nbytes %d, max %d)",
1243+
nbytes, max_nbytes);
1244+
continue;
1245+
}
1246+
1247+
dma_sync_single_for_cpu(ar->dev, ATH10K_SKB_RXCB(skb)->paddr,
1248+
max_nbytes, DMA_FROM_DEVICE);
1249+
skb_put(skb, nbytes);
1250+
__skb_queue_tail(&list, skb);
1251+
}
1252+
1253+
nentries = skb_queue_len(&list);
1254+
while ((skb = __skb_dequeue(&list))) {
1255+
ath10k_dbg(ar, ATH10K_DBG_PCI, "pci rx ce pipe %d len %d\n",
1256+
ce_state->id, skb->len);
1257+
ath10k_dbg_dump(ar, ATH10K_DBG_PCI_DUMP, NULL, "pci rx: ",
1258+
skb->data, skb->len);
1259+
1260+
orig_len = skb->len;
1261+
callback(ar, skb);
1262+
skb_push(skb, orig_len - skb->len);
1263+
skb_reset_tail_pointer(skb);
1264+
skb_trim(skb, 0);
1265+
1266+
/*let device gain the buffer again*/
1267+
dma_sync_single_for_device(ar->dev, ATH10K_SKB_RXCB(skb)->paddr,
1268+
skb->len + skb_tailroom(skb),
1269+
DMA_FROM_DEVICE);
1270+
}
1271+
ath10k_ce_rx_update_write_idx(ce_pipe, nentries);
1272+
}
1273+
12151274
/* Called by lower (CE) layer when data is received from the Target. */
12161275
static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
12171276
{
@@ -1268,7 +1327,7 @@ static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state)
12681327
*/
12691328
ath10k_ce_per_engine_service(ce_state->ar, 4);
12701329

1271-
ath10k_pci_process_rx_cb(ce_state, ath10k_pci_htt_rx_deliver);
1330+
ath10k_pci_process_htt_rx_cb(ce_state, ath10k_pci_htt_rx_deliver);
12721331
}
12731332

12741333
int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id,

0 commit comments

Comments
 (0)