@@ -407,16 +407,42 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
407407 mptcp_sk (sk )-> timer_ival = tout > 0 ? tout : TCP_RTO_MIN ;
408408}
409409
410- static void mptcp_send_ack (struct mptcp_sock * msk )
410+ static bool mptcp_subflow_active (struct mptcp_subflow_context * subflow )
411+ {
412+ struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
413+
414+ /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
415+ if (subflow -> request_join && !subflow -> fully_established )
416+ return false;
417+
418+ /* only send if our side has not closed yet */
419+ return ((1 << ssk -> sk_state ) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT ));
420+ }
421+
422+ static void mptcp_send_ack (struct mptcp_sock * msk , bool force )
411423{
412424 struct mptcp_subflow_context * subflow ;
425+ struct sock * pick = NULL ;
413426
414427 mptcp_for_each_subflow (msk , subflow ) {
415428 struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
416429
417- lock_sock (ssk );
418- tcp_send_ack (ssk );
419- release_sock (ssk );
430+ if (force ) {
431+ lock_sock (ssk );
432+ tcp_send_ack (ssk );
433+ release_sock (ssk );
434+ continue ;
435+ }
436+
437+ /* if the hintes ssk is still active, use it */
438+ pick = ssk ;
439+ if (ssk == msk -> ack_hint )
440+ break ;
441+ }
442+ if (!force && pick ) {
443+ lock_sock (pick );
444+ tcp_cleanup_rbuf (pick , 1 );
445+ release_sock (pick );
420446 }
421447}
422448
@@ -468,7 +494,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
468494
469495 ret = true;
470496 mptcp_set_timeout (sk , NULL );
471- mptcp_send_ack (msk );
497+ mptcp_send_ack (msk , true );
472498 mptcp_close_wake_up (sk );
473499 }
474500 return ret ;
@@ -483,7 +509,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
483509 unsigned int moved = 0 ;
484510 bool more_data_avail ;
485511 struct tcp_sock * tp ;
486- u32 old_copied_seq ;
487512 bool done = false;
488513 int sk_rbuf ;
489514
@@ -500,7 +525,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
500525
501526 pr_debug ("msk=%p ssk=%p" , msk , ssk );
502527 tp = tcp_sk (ssk );
503- old_copied_seq = tp -> copied_seq ;
504528 do {
505529 u32 map_remaining , offset ;
506530 u32 seq = tp -> copied_seq ;
@@ -564,11 +588,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
564588 break ;
565589 }
566590 } while (more_data_avail );
591+ msk -> ack_hint = ssk ;
567592
568593 * bytes += moved ;
569- if (tp -> copied_seq != old_copied_seq )
570- tcp_cleanup_rbuf (ssk , 1 );
571-
572594 return done ;
573595}
574596
@@ -672,19 +694,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
672694 if (atomic_read (& sk -> sk_rmem_alloc ) > sk_rbuf )
673695 goto wake ;
674696
675- if (move_skbs_to_msk (msk , ssk ))
676- goto wake ;
697+ move_skbs_to_msk (msk , ssk );
677698
678- /* mptcp socket is owned, release_cb should retry */
679- if (!test_and_set_bit (TCP_DELACK_TIMER_DEFERRED ,
680- & sk -> sk_tsq_flags )) {
681- sock_hold (sk );
682-
683- /* need to try again, its possible release_cb() has already
684- * been called after the test_and_set_bit() above.
685- */
686- move_skbs_to_msk (msk , ssk );
687- }
688699wake :
689700 if (wake )
690701 sk -> sk_data_ready (sk );
@@ -1095,18 +1106,6 @@ static void mptcp_nospace(struct mptcp_sock *msk)
10951106 mptcp_clean_una ((struct sock * )msk );
10961107}
10971108
1098- static bool mptcp_subflow_active (struct mptcp_subflow_context * subflow )
1099- {
1100- struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
1101-
1102- /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
1103- if (subflow -> request_join && !subflow -> fully_established )
1104- return false;
1105-
1106- /* only send if our side has not closed yet */
1107- return ((1 << ssk -> sk_state ) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT ));
1108- }
1109-
11101109#define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
11111110 sizeof(struct tcphdr) - \
11121111 MAX_TCP_OPTION_SPACE - \
@@ -1534,7 +1533,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
15341533 msk -> rcvq_space .time = mstamp ;
15351534}
15361535
1537- static bool __mptcp_move_skbs (struct mptcp_sock * msk )
1536+ static bool __mptcp_move_skbs (struct mptcp_sock * msk , unsigned int rcv )
15381537{
15391538 unsigned int moved = 0 ;
15401539 bool done ;
@@ -1553,12 +1552,16 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
15531552
15541553 slowpath = lock_sock_fast (ssk );
15551554 done = __mptcp_move_skbs_from_subflow (msk , ssk , & moved );
1555+ if (moved && rcv ) {
1556+ WRITE_ONCE (msk -> rmem_pending , min (rcv , moved ));
1557+ tcp_cleanup_rbuf (ssk , 1 );
1558+ WRITE_ONCE (msk -> rmem_pending , 0 );
1559+ }
15561560 unlock_sock_fast (ssk , slowpath );
15571561 } while (!done );
15581562
15591563 if (mptcp_ofo_queue (msk ) || moved > 0 ) {
1560- if (!mptcp_check_data_fin ((struct sock * )msk ))
1561- mptcp_send_ack (msk );
1564+ mptcp_check_data_fin ((struct sock * )msk );
15621565 return true;
15631566 }
15641567 return false;
@@ -1582,8 +1585,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
15821585 target = sock_rcvlowat (sk , flags & MSG_WAITALL , len );
15831586 __mptcp_flush_join_list (msk );
15841587
1585- while ( len > ( size_t ) copied ) {
1586- int bytes_read ;
1588+ for (;; ) {
1589+ int bytes_read , old_space ;
15871590
15881591 bytes_read = __mptcp_recvmsg_mskq (msk , msg , len - copied );
15891592 if (unlikely (bytes_read < 0 )) {
@@ -1595,9 +1598,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
15951598 copied += bytes_read ;
15961599
15971600 if (skb_queue_empty (& sk -> sk_receive_queue ) &&
1598- __mptcp_move_skbs (msk ))
1601+ __mptcp_move_skbs (msk , len - copied ))
15991602 continue ;
16001603
1604+ /* be sure to advertise window change */
1605+ old_space = READ_ONCE (msk -> old_wspace );
1606+ if ((tcp_space (sk ) - old_space ) >= old_space )
1607+ mptcp_send_ack (msk , false);
1608+
16011609 /* only the master socket status is relevant here. The exit
16021610 * conditions mirror closely tcp_recvmsg()
16031611 */
@@ -1650,7 +1658,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
16501658 /* .. race-breaker: ssk might have gotten new data
16511659 * after last __mptcp_move_skbs() returned false.
16521660 */
1653- if (unlikely (__mptcp_move_skbs (msk )))
1661+ if (unlikely (__mptcp_move_skbs (msk , 0 )))
16541662 set_bit (MPTCP_DATA_READY , & msk -> flags );
16551663 } else if (unlikely (!test_bit (MPTCP_DATA_READY , & msk -> flags ))) {
16561664 /* data to read but mptcp_wait_data() cleared DATA_READY */
@@ -1881,7 +1889,6 @@ static void mptcp_worker(struct work_struct *work)
18811889 if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
18821890 __mptcp_close_subflow (msk );
18831891
1884- __mptcp_move_skbs (msk );
18851892 if (mptcp_send_head (sk ))
18861893 mptcp_push_pending (sk , 0 );
18871894
@@ -1965,6 +1972,7 @@ static int __mptcp_init_sock(struct sock *sk)
19651972 msk -> out_of_order_queue = RB_ROOT ;
19661973 msk -> first_pending = NULL ;
19671974
1975+ msk -> ack_hint = NULL ;
19681976 msk -> first = NULL ;
19691977 inet_csk (sk )-> icsk_sync_mss = mptcp_sync_mss ;
19701978
@@ -2500,8 +2508,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
25002508 return - EOPNOTSUPP ;
25012509}
25022510
2503- #define MPTCP_DEFERRED_ALL (TCPF_DELACK_TIMER_DEFERRED | \
2504- TCPF_WRITE_TIMER_DEFERRED)
2511+ #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
25052512
25062513/* this is very alike tcp_release_cb() but we must handle differently a
25072514 * different set of events
@@ -2519,16 +2526,6 @@ static void mptcp_release_cb(struct sock *sk)
25192526
25202527 sock_release_ownership (sk );
25212528
2522- if (flags & TCPF_DELACK_TIMER_DEFERRED ) {
2523- struct mptcp_sock * msk = mptcp_sk (sk );
2524- struct sock * ssk ;
2525-
2526- ssk = mptcp_subflow_recv_lookup (msk );
2527- if (!ssk || sk -> sk_state == TCP_CLOSE ||
2528- !schedule_work (& msk -> work ))
2529- __sock_put (sk );
2530- }
2531-
25322529 if (flags & TCPF_WRITE_TIMER_DEFERRED ) {
25332530 mptcp_retransmit_handler (sk );
25342531 __sock_put (sk );
0 commit comments