Skip to content

Commit 6703a60

Browse files
committed
Merge branch 'net-tls-small-TX-offload-optimizations'
Jakub Kicinski says: ==================== net/tls: small TX offload optimizations This set brings small TLS TX device optimizations. The biggest gain comes from fixing a misuse of non temporal copy instructions. On a synthetic workload modelled after customer's RFC application I see 3-5% percent gain. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents fcd8c62 + e681cc6 commit 6703a60

File tree

1 file changed

+91
-39
lines changed

1 file changed

+91
-39
lines changed

net/tls/tls_device.c

Lines changed: 91 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)
122122

123123
static void destroy_record(struct tls_record_info *record)
124124
{
125-
int nr_frags = record->num_frags;
126-
skb_frag_t *frag;
125+
int i;
127126

128-
while (nr_frags-- > 0) {
129-
frag = &record->frags[nr_frags];
130-
__skb_frag_unref(frag);
131-
}
127+
for (i = 0; i < record->num_frags; i++)
128+
__skb_frag_unref(&record->frags[i]);
132129
kfree(record);
133130
}
134131

@@ -259,33 +256,15 @@ static int tls_push_record(struct sock *sk,
259256
struct tls_context *ctx,
260257
struct tls_offload_context_tx *offload_ctx,
261258
struct tls_record_info *record,
262-
struct page_frag *pfrag,
263-
int flags,
264-
unsigned char record_type)
259+
int flags)
265260
{
266261
struct tls_prot_info *prot = &ctx->prot_info;
267262
struct tcp_sock *tp = tcp_sk(sk);
268-
struct page_frag dummy_tag_frag;
269263
skb_frag_t *frag;
270264
int i;
271265

272-
/* fill prepend */
273-
frag = &record->frags[0];
274-
tls_fill_prepend(ctx,
275-
skb_frag_address(frag),
276-
record->len - prot->prepend_size,
277-
record_type,
278-
prot->version);
279-
280-
/* HW doesn't care about the data in the tag, because it fills it. */
281-
dummy_tag_frag.page = skb_frag_page(frag);
282-
dummy_tag_frag.offset = 0;
283-
284-
tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
285266
record->end_seq = tp->write_seq + record->len;
286-
spin_lock_irq(&offload_ctx->lock);
287-
list_add_tail(&record->list, &offload_ctx->records_list);
288-
spin_unlock_irq(&offload_ctx->lock);
267+
list_add_tail_rcu(&record->list, &offload_ctx->records_list);
289268
offload_ctx->open_record = NULL;
290269

291270
if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags))
@@ -307,6 +286,38 @@ static int tls_push_record(struct sock *sk,
307286
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
308287
}
309288

289+
static int tls_device_record_close(struct sock *sk,
290+
struct tls_context *ctx,
291+
struct tls_record_info *record,
292+
struct page_frag *pfrag,
293+
unsigned char record_type)
294+
{
295+
struct tls_prot_info *prot = &ctx->prot_info;
296+
int ret;
297+
298+
/* append tag
299+
* device will fill in the tag, we just need to append a placeholder
300+
* use socket memory to improve coalescing (re-using a single buffer
301+
* increases frag count)
302+
* if we can't allocate memory now, steal some back from data
303+
*/
304+
if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
305+
sk->sk_allocation))) {
306+
ret = 0;
307+
tls_append_frag(record, pfrag, prot->tag_size);
308+
} else {
309+
ret = prot->tag_size;
310+
if (record->len <= prot->overhead_size)
311+
return -ENOMEM;
312+
}
313+
314+
/* fill prepend */
315+
tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
316+
record->len - prot->overhead_size,
317+
record_type, prot->version);
318+
return ret;
319+
}
320+
310321
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
311322
struct page_frag *pfrag,
312323
size_t prepend_size)
@@ -361,6 +372,31 @@ static int tls_do_allocation(struct sock *sk,
361372
return 0;
362373
}
363374

375+
static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
376+
{
377+
size_t pre_copy, nocache;
378+
379+
pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1);
380+
if (pre_copy) {
381+
pre_copy = min(pre_copy, bytes);
382+
if (copy_from_iter(addr, pre_copy, i) != pre_copy)
383+
return -EFAULT;
384+
bytes -= pre_copy;
385+
addr += pre_copy;
386+
}
387+
388+
nocache = round_down(bytes, SMP_CACHE_BYTES);
389+
if (copy_from_iter_nocache(addr, nocache, i) != nocache)
390+
return -EFAULT;
391+
bytes -= nocache;
392+
addr += nocache;
393+
394+
if (bytes && copy_from_iter(addr, bytes, i) != bytes)
395+
return -EFAULT;
396+
397+
return 0;
398+
}
399+
364400
static int tls_push_data(struct sock *sk,
365401
struct iov_iter *msg_iter,
366402
size_t size, int flags,
@@ -434,12 +470,10 @@ static int tls_push_data(struct sock *sk,
434470
copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
435471
copy = min_t(size_t, copy, (max_open_record_len - record->len));
436472

437-
if (copy_from_iter_nocache(page_address(pfrag->page) +
438-
pfrag->offset,
439-
copy, msg_iter) != copy) {
440-
rc = -EFAULT;
473+
rc = tls_device_copy_data(page_address(pfrag->page) +
474+
pfrag->offset, copy, msg_iter);
475+
if (rc)
441476
goto handle_error;
442-
}
443477
tls_append_frag(record, pfrag, copy);
444478

445479
size -= copy;
@@ -457,13 +491,24 @@ static int tls_push_data(struct sock *sk,
457491

458492
if (done || record->len >= max_open_record_len ||
459493
(record->num_frags >= MAX_SKB_FRAGS - 1)) {
494+
rc = tls_device_record_close(sk, tls_ctx, record,
495+
pfrag, record_type);
496+
if (rc) {
497+
if (rc > 0) {
498+
size += rc;
499+
} else {
500+
size = orig_size;
501+
destroy_record(record);
502+
ctx->open_record = NULL;
503+
break;
504+
}
505+
}
506+
460507
rc = tls_push_record(sk,
461508
tls_ctx,
462509
ctx,
463510
record,
464-
pfrag,
465-
tls_push_record_flags,
466-
record_type);
511+
tls_push_record_flags);
467512
if (rc < 0)
468513
break;
469514
}
@@ -538,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
538583
/* if retransmit_hint is irrelevant start
539584
* from the beggining of the list
540585
*/
541-
info = list_first_entry(&context->records_list,
542-
struct tls_record_info, list);
586+
info = list_first_entry_or_null(&context->records_list,
587+
struct tls_record_info, list);
588+
if (!info)
589+
return NULL;
543590
record_sn = context->unacked_record_sn;
544591
}
545592

546-
list_for_each_entry_from(info, &context->records_list, list) {
593+
/* We just need the _rcu for the READ_ONCE() */
594+
rcu_read_lock();
595+
list_for_each_entry_from_rcu(info, &context->records_list, list) {
547596
if (before(seq, info->end_seq)) {
548597
if (!context->retransmit_hint ||
549598
after(info->end_seq,
@@ -552,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
552601
context->retransmit_hint = info;
553602
}
554603
*p_record_sn = record_sn;
555-
return info;
604+
goto exit_rcu_unlock;
556605
}
557606
record_sn++;
558607
}
608+
info = NULL;
559609

560-
return NULL;
610+
exit_rcu_unlock:
611+
rcu_read_unlock();
612+
return info;
561613
}
562614
EXPORT_SYMBOL(tls_get_record);
563615

0 commit comments

Comments
 (0)