@@ -662,19 +662,19 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
662662
663663 skb = skb_peek (& ssk -> sk_receive_queue );
664664 if (!skb ) {
665- /* if no data is found, a racing workqueue/recvmsg
666- * already processed the new data, stop here or we
667- * can enter an infinite loop
665+ /* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
666+ * a different CPU can have already processed the pending
667+ * data, stop here or we can enter an infinite loop
668668 */
669669 if (!moved )
670670 done = true;
671671 break ;
672672 }
673673
674674 if (__mptcp_check_fallback (msk )) {
675- /* if we are running under the workqueue, TCP could have
676- * collapsed skbs between dummy map creation and now
677- * be sure to adjust the size
675+ /* Under fallback skbs have no MPTCP extension and TCP could
676+ * collapse them between the dummy map creation and the
677+ * current dequeue. Be sure to adjust the map size.
678678 */
679679 map_remaining = skb -> len ;
680680 subflow -> map_data_len = skb -> len ;
@@ -1707,7 +1707,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17071707 goto out ;
17081708 } else if (ret ) {
17091709 release_sock (ssk );
1710- goto out ;
1710+ goto do_error ;
17111711 }
17121712 release_sock (ssk );
17131713 }
@@ -1717,9 +1717,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17171717 if ((1 << sk -> sk_state ) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT )) {
17181718 ret = sk_stream_wait_connect (sk , & timeo );
17191719 if (ret )
1720- goto out ;
1720+ goto do_error ;
17211721 }
17221722
1723+ ret = - EPIPE ;
1724+ if (unlikely (sk -> sk_err || (sk -> sk_shutdown & SEND_SHUTDOWN )))
1725+ goto do_error ;
1726+
17231727 pfrag = sk_page_frag (sk );
17241728
17251729 while (msg_data_left (msg )) {
@@ -1728,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17281732 bool dfrag_collapsed ;
17291733 size_t psize , offset ;
17301734
1731- if (sk -> sk_err || (sk -> sk_shutdown & SEND_SHUTDOWN )) {
1732- ret = - EPIPE ;
1733- goto out ;
1734- }
1735-
17361735 /* reuse tail pfrag, if possible, or carve a new one from the
17371736 * page allocator
17381737 */
@@ -1764,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17641763 if (copy_page_from_iter (dfrag -> page , offset , psize ,
17651764 & msg -> msg_iter ) != psize ) {
17661765 ret = - EFAULT ;
1767- goto out ;
1766+ goto do_error ;
17681767 }
17691768
17701769 /* data successfully copied into the write queue */
@@ -1796,15 +1795,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17961795 __mptcp_push_pending (sk , msg -> msg_flags );
17971796 ret = sk_stream_wait_memory (sk , & timeo );
17981797 if (ret )
1799- goto out ;
1798+ goto do_error ;
18001799 }
18011800
18021801 if (copied )
18031802 __mptcp_push_pending (sk , msg -> msg_flags );
18041803
18051804out :
18061805 release_sock (sk );
1807- return copied ? : ret ;
1806+ return copied ;
1807+
1808+ do_error :
1809+ if (copied )
1810+ goto out ;
1811+
1812+ copied = sk_stream_error (sk , msg -> msg_flags , ret );
1813+ goto out ;
18081814}
18091815
18101816static int __mptcp_recvmsg_mskq (struct mptcp_sock * msk ,
@@ -2307,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23072313
23082314 lock_sock_nested (ssk , SINGLE_DEPTH_NESTING );
23092315
2310- if (flags & MPTCP_CF_FASTCLOSE )
2316+ if (flags & MPTCP_CF_FASTCLOSE ) {
2317+ /* be sure to force the tcp_disconnect() path,
2318+ * to generate the egress reset
2319+ */
2320+ ssk -> sk_lingertime = 0 ;
2321+ sock_set_flag (ssk , SOCK_LINGER );
23112322 subflow -> send_fastclose = 1 ;
2323+ }
23122324
23132325 need_push = (flags & MPTCP_CF_PUSH ) && __mptcp_retransmit_pending_data (sk );
23142326 if (!dispose_it ) {
@@ -2441,12 +2453,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
24412453 unlock_sock_fast (tcp_sk , slow );
24422454 }
24432455
2456+ /* Mirror the tcp_reset() error propagation */
2457+ switch (sk -> sk_state ) {
2458+ case TCP_SYN_SENT :
2459+ sk -> sk_err = ECONNREFUSED ;
2460+ break ;
2461+ case TCP_CLOSE_WAIT :
2462+ sk -> sk_err = EPIPE ;
2463+ break ;
2464+ case TCP_CLOSE :
2465+ return ;
2466+ default :
2467+ sk -> sk_err = ECONNRESET ;
2468+ }
2469+
24442470 inet_sk_state_store (sk , TCP_CLOSE );
24452471 sk -> sk_shutdown = SHUTDOWN_MASK ;
24462472 smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
24472473 set_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags );
24482474
2449- mptcp_close_wake_up (sk );
2475+ /* the calling mptcp_worker will properly destroy the socket */
2476+ if (sock_flag (sk , SOCK_DEAD ))
2477+ return ;
2478+
2479+ sk -> sk_state_change (sk );
2480+ sk_error_report (sk );
24502481}
24512482
24522483static void __mptcp_retrans (struct sock * sk )
@@ -2552,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
25522583 mptcp_reset_timeout (msk , 0 );
25532584}
25542585
2586+ static void mptcp_do_fastclose (struct sock * sk )
2587+ {
2588+ struct mptcp_subflow_context * subflow , * tmp ;
2589+ struct mptcp_sock * msk = mptcp_sk (sk );
2590+
2591+ mptcp_for_each_subflow_safe (msk , subflow , tmp )
2592+ __mptcp_close_ssk (sk , mptcp_subflow_tcp_sock (subflow ),
2593+ subflow , MPTCP_CF_FASTCLOSE );
2594+ }
2595+
25552596static void mptcp_worker (struct work_struct * work )
25562597{
25572598 struct mptcp_sock * msk = container_of (work , struct mptcp_sock , work );
@@ -2580,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work)
25802621 * closed, but we need the msk around to reply to incoming DATA_FIN,
25812622 * even if it is orphaned and in FIN_WAIT2 state
25822623 */
2583- if (sock_flag (sk , SOCK_DEAD ) &&
2584- (mptcp_check_close_timeout (sk ) || sk -> sk_state == TCP_CLOSE )) {
2585- inet_sk_state_store (sk , TCP_CLOSE );
2586- __mptcp_destroy_sock (sk );
2587- goto unlock ;
2624+ if (sock_flag (sk , SOCK_DEAD )) {
2625+ if (mptcp_check_close_timeout (sk )) {
2626+ inet_sk_state_store (sk , TCP_CLOSE );
2627+ mptcp_do_fastclose (sk );
2628+ }
2629+ if (sk -> sk_state == TCP_CLOSE ) {
2630+ __mptcp_destroy_sock (sk );
2631+ goto unlock ;
2632+ }
25882633 }
25892634
25902635 if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
@@ -2825,6 +2870,18 @@ static void __mptcp_destroy_sock(struct sock *sk)
28252870 sock_put (sk );
28262871}
28272872
2873+ static __poll_t mptcp_check_readable (struct mptcp_sock * msk )
2874+ {
2875+ /* Concurrent splices from sk_receive_queue into receive_queue will
2876+ * always show at least one non-empty queue when checked in this order.
2877+ */
2878+ if (skb_queue_empty_lockless (& ((struct sock * )msk )-> sk_receive_queue ) &&
2879+ skb_queue_empty_lockless (& msk -> receive_queue ))
2880+ return 0 ;
2881+
2882+ return EPOLLIN | EPOLLRDNORM ;
2883+ }
2884+
28282885bool __mptcp_close (struct sock * sk , long timeout )
28292886{
28302887 struct mptcp_subflow_context * subflow ;
@@ -2838,8 +2895,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
28382895 goto cleanup ;
28392896 }
28402897
2841- if (mptcp_close_state (sk ))
2898+ if (mptcp_check_readable (msk )) {
2899+ /* the msk has read data, do the MPTCP equivalent of TCP reset */
2900+ inet_sk_state_store (sk , TCP_CLOSE );
2901+ mptcp_do_fastclose (sk );
2902+ } else if (mptcp_close_state (sk )) {
28422903 __mptcp_wr_shutdown (sk );
2904+ }
28432905
28442906 sk_stream_wait_close (sk , timeout );
28452907
@@ -3656,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
36563718 return err ;
36573719}
36583720
3659- static __poll_t mptcp_check_readable (struct mptcp_sock * msk )
3660- {
3661- /* Concurrent splices from sk_receive_queue into receive_queue will
3662- * always show at least one non-empty queue when checked in this order.
3663- */
3664- if (skb_queue_empty_lockless (& ((struct sock * )msk )-> sk_receive_queue ) &&
3665- skb_queue_empty_lockless (& msk -> receive_queue ))
3666- return 0 ;
3667-
3668- return EPOLLIN | EPOLLRDNORM ;
3669- }
3670-
36713721static __poll_t mptcp_check_writeable (struct mptcp_sock * msk )
36723722{
36733723 struct sock * sk = (struct sock * )msk ;
@@ -3718,7 +3768,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
37183768 if (sk -> sk_shutdown & RCV_SHUTDOWN )
37193769 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
37203770
3721- /* This barrier is coupled with smp_wmb() in tcp_reset () */
3771+ /* This barrier is coupled with smp_wmb() in __mptcp_error_report () */
37223772 smp_rmb ();
37233773 if (sk -> sk_err )
37243774 mask |= EPOLLERR ;
0 commit comments