@@ -41,6 +41,12 @@ static int smbd_post_recv(
4141 struct smbd_response * response );
4242
4343static int smbd_post_send_empty (struct smbd_connection * info );
44+ static int smbd_post_send_data (
45+ struct smbd_connection * info ,
46+ struct kvec * iov , int n_vec , int remaining_data_length );
47+ static int smbd_post_send_page (struct smbd_connection * info ,
48+ struct page * page , unsigned long offset ,
49+ size_t size , int remaining_data_length );
4450
4551/* SMBD version number */
4652#define SMBD_V1 0x0100
@@ -177,6 +183,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work)
177183 log_rdma_event (INFO , "cancelling send immediate work\n" );
178184 cancel_delayed_work_sync (& info -> send_immediate_work );
179185
186+ log_rdma_event (INFO , "wait for all send to finish\n" );
187+ wait_event (info -> wait_smbd_send_pending ,
188+ info -> smbd_send_pending == 0 );
189+
180190 log_rdma_event (INFO , "wait for all recv to finish\n" );
181191 wake_up_interruptible (& info -> wait_reassembly_queue );
182192 wait_event (info -> wait_smbd_recv_pending ,
@@ -1077,6 +1087,24 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
10771087 return rc ;
10781088}
10791089
1090+ /*
1091+ * Send a page
1092+ * page: the page to send
1093+ * offset: offset in the page to send
1094+ * size: length in the page to send
1095+ * remaining_data_length: remaining data to send in this payload
1096+ */
1097+ static int smbd_post_send_page (struct smbd_connection * info , struct page * page ,
1098+ unsigned long offset , size_t size , int remaining_data_length )
1099+ {
1100+ struct scatterlist sgl ;
1101+
1102+ sg_init_table (& sgl , 1 );
1103+ sg_set_page (& sgl , page , size , offset );
1104+
1105+ return smbd_post_send_sgl (info , & sgl , size , remaining_data_length );
1106+ }
1107+
10801108/*
10811109 * Send an empty message
10821110 * Empty message is used to extend credits to peer to for keep live
@@ -1088,6 +1116,35 @@ static int smbd_post_send_empty(struct smbd_connection *info)
10881116 return smbd_post_send_sgl (info , NULL , 0 , 0 );
10891117}
10901118
1119+ /*
1120+ * Send a data buffer
1121+ * iov: the iov array describing the data buffers
1122+ * n_vec: number of iov array
1123+ * remaining_data_length: remaining data to send following this packet
1124+ * in segmented SMBD packet
1125+ */
1126+ static int smbd_post_send_data (
1127+ struct smbd_connection * info , struct kvec * iov , int n_vec ,
1128+ int remaining_data_length )
1129+ {
1130+ int i ;
1131+ u32 data_length = 0 ;
1132+ struct scatterlist sgl [SMBDIRECT_MAX_SGE ];
1133+
1134+ if (n_vec > SMBDIRECT_MAX_SGE ) {
1135+ cifs_dbg (VFS , "Can't fit data to SGL, n_vec=%d\n" , n_vec );
1136+ return - ENOMEM ;
1137+ }
1138+
1139+ sg_init_table (sgl , n_vec );
1140+ for (i = 0 ; i < n_vec ; i ++ ) {
1141+ data_length += iov [i ].iov_len ;
1142+ sg_set_buf (& sgl [i ], iov [i ].iov_base , iov [i ].iov_len );
1143+ }
1144+
1145+ return smbd_post_send_sgl (info , sgl , data_length , remaining_data_length );
1146+ }
1147+
10911148/*
10921149 * Post a receive request to the transport
10931150 * The remote peer can only send data when a receive request is posted
@@ -1652,6 +1709,9 @@ struct smbd_connection *_smbd_get_connection(
16521709 queue_delayed_work (info -> workqueue , & info -> idle_timer_work ,
16531710 info -> keep_alive_interval * HZ );
16541711
1712+ init_waitqueue_head (& info -> wait_smbd_send_pending );
1713+ info -> smbd_send_pending = 0 ;
1714+
16551715 init_waitqueue_head (& info -> wait_smbd_recv_pending );
16561716 info -> smbd_recv_pending = 0 ;
16571717
@@ -1943,3 +2003,189 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
19432003 msg -> msg_iter .count = 0 ;
19442004 return rc ;
19452005}
2006+
2007+ /*
2008+ * Send data to transport
2009+ * Each rqst is transported as a SMBDirect payload
2010+ * rqst: the data to write
2011+ * return value: 0 if successfully write, otherwise error code
2012+ */
2013+ int smbd_send (struct smbd_connection * info , struct smb_rqst * rqst )
2014+ {
2015+ struct kvec vec ;
2016+ int nvecs ;
2017+ int size ;
2018+ int buflen = 0 , remaining_data_length ;
2019+ int start , i , j ;
2020+ int max_iov_size =
2021+ info -> max_send_size - sizeof (struct smbd_data_transfer );
2022+ struct kvec iov [SMBDIRECT_MAX_SGE ];
2023+ int rc ;
2024+
2025+ info -> smbd_send_pending ++ ;
2026+ if (info -> transport_status != SMBD_CONNECTED ) {
2027+ rc = - ENODEV ;
2028+ goto done ;
2029+ }
2030+
2031+ /*
2032+ * This usually means a configuration error
2033+ * We use RDMA read/write for packet size > rdma_readwrite_threshold
2034+ * as long as it's properly configured we should never get into this
2035+ * situation
2036+ */
2037+ if (rqst -> rq_nvec + rqst -> rq_npages > SMBDIRECT_MAX_SGE ) {
2038+ log_write (ERR , "maximum send segment %x exceeding %x\n" ,
2039+ rqst -> rq_nvec + rqst -> rq_npages , SMBDIRECT_MAX_SGE );
2040+ rc = - EINVAL ;
2041+ goto done ;
2042+ }
2043+
2044+ /*
2045+ * Remove the RFC1002 length defined in MS-SMB2 section 2.1
2046+ * It is used only for TCP transport
2047+ * In future we may want to add a transport layer under protocol
2048+ * layer so this will only be issued to TCP transport
2049+ */
2050+ iov [0 ].iov_base = (char * )rqst -> rq_iov [0 ].iov_base + 4 ;
2051+ iov [0 ].iov_len = rqst -> rq_iov [0 ].iov_len - 4 ;
2052+ buflen += iov [0 ].iov_len ;
2053+
2054+ /* total up iov array first */
2055+ for (i = 1 ; i < rqst -> rq_nvec ; i ++ ) {
2056+ iov [i ].iov_base = rqst -> rq_iov [i ].iov_base ;
2057+ iov [i ].iov_len = rqst -> rq_iov [i ].iov_len ;
2058+ buflen += iov [i ].iov_len ;
2059+ }
2060+
2061+ /* add in the page array if there is one */
2062+ if (rqst -> rq_npages ) {
2063+ buflen += rqst -> rq_pagesz * (rqst -> rq_npages - 1 );
2064+ buflen += rqst -> rq_tailsz ;
2065+ }
2066+
2067+ if (buflen + sizeof (struct smbd_data_transfer ) >
2068+ info -> max_fragmented_send_size ) {
2069+ log_write (ERR , "payload size %d > max size %d\n" ,
2070+ buflen , info -> max_fragmented_send_size );
2071+ rc = - EINVAL ;
2072+ goto done ;
2073+ }
2074+
2075+ remaining_data_length = buflen ;
2076+
2077+ log_write (INFO , "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
2078+ "rq_tailsz=%d buflen=%d\n" ,
2079+ rqst -> rq_nvec , rqst -> rq_npages , rqst -> rq_pagesz ,
2080+ rqst -> rq_tailsz , buflen );
2081+
2082+ start = i = iov [0 ].iov_len ? 0 : 1 ;
2083+ buflen = 0 ;
2084+ while (true) {
2085+ buflen += iov [i ].iov_len ;
2086+ if (buflen > max_iov_size ) {
2087+ if (i > start ) {
2088+ remaining_data_length -=
2089+ (buflen - iov [i ].iov_len );
2090+ log_write (INFO , "sending iov[] from start=%d "
2091+ "i=%d nvecs=%d "
2092+ "remaining_data_length=%d\n" ,
2093+ start , i , i - start ,
2094+ remaining_data_length );
2095+ rc = smbd_post_send_data (
2096+ info , & iov [start ], i - start ,
2097+ remaining_data_length );
2098+ if (rc )
2099+ goto done ;
2100+ } else {
2101+ /* iov[start] is too big, break it */
2102+ nvecs = (buflen + max_iov_size - 1 )/max_iov_size ;
2103+ log_write (INFO , "iov[%d] iov_base=%p buflen=%d"
2104+ " break to %d vectors\n" ,
2105+ start , iov [start ].iov_base ,
2106+ buflen , nvecs );
2107+ for (j = 0 ; j < nvecs ; j ++ ) {
2108+ vec .iov_base =
2109+ (char * )iov [start ].iov_base +
2110+ j * max_iov_size ;
2111+ vec .iov_len = max_iov_size ;
2112+ if (j == nvecs - 1 )
2113+ vec .iov_len =
2114+ buflen -
2115+ max_iov_size * (nvecs - 1 );
2116+ remaining_data_length -= vec .iov_len ;
2117+ log_write (INFO ,
2118+ "sending vec j=%d iov_base=%p"
2119+ " iov_len=%zu "
2120+ "remaining_data_length=%d\n" ,
2121+ j , vec .iov_base , vec .iov_len ,
2122+ remaining_data_length );
2123+ rc = smbd_post_send_data (
2124+ info , & vec , 1 ,
2125+ remaining_data_length );
2126+ if (rc )
2127+ goto done ;
2128+ }
2129+ i ++ ;
2130+ }
2131+ start = i ;
2132+ buflen = 0 ;
2133+ } else {
2134+ i ++ ;
2135+ if (i == rqst -> rq_nvec ) {
2136+ /* send out all remaining vecs */
2137+ remaining_data_length -= buflen ;
2138+ log_write (INFO ,
2139+ "sending iov[] from start=%d i=%d "
2140+ "nvecs=%d remaining_data_length=%d\n" ,
2141+ start , i , i - start ,
2142+ remaining_data_length );
2143+ rc = smbd_post_send_data (info , & iov [start ],
2144+ i - start , remaining_data_length );
2145+ if (rc )
2146+ goto done ;
2147+ break ;
2148+ }
2149+ }
2150+ log_write (INFO , "looping i=%d buflen=%d\n" , i , buflen );
2151+ }
2152+
2153+ /* now sending pages if there are any */
2154+ for (i = 0 ; i < rqst -> rq_npages ; i ++ ) {
2155+ buflen = (i == rqst -> rq_npages - 1 ) ?
2156+ rqst -> rq_tailsz : rqst -> rq_pagesz ;
2157+ nvecs = (buflen + max_iov_size - 1 ) / max_iov_size ;
2158+ log_write (INFO , "sending pages buflen=%d nvecs=%d\n" ,
2159+ buflen , nvecs );
2160+ for (j = 0 ; j < nvecs ; j ++ ) {
2161+ size = max_iov_size ;
2162+ if (j == nvecs - 1 )
2163+ size = buflen - j * max_iov_size ;
2164+ remaining_data_length -= size ;
2165+ log_write (INFO , "sending pages i=%d offset=%d size=%d"
2166+ " remaining_data_length=%d\n" ,
2167+ i , j * max_iov_size , size , remaining_data_length );
2168+ rc = smbd_post_send_page (
2169+ info , rqst -> rq_pages [i ], j * max_iov_size ,
2170+ size , remaining_data_length );
2171+ if (rc )
2172+ goto done ;
2173+ }
2174+ }
2175+
2176+ done :
2177+ /*
2178+ * As an optimization, we don't wait for individual I/O to finish
2179+ * before sending the next one.
2180+ * Send them all and wait for pending send count to get to 0
2181+ * that means all the I/Os have been out and we are good to return
2182+ */
2183+
2184+ wait_event (info -> wait_send_payload_pending ,
2185+ atomic_read (& info -> send_payload_pending ) == 0 );
2186+
2187+ info -> smbd_send_pending -- ;
2188+ wake_up (& info -> wait_smbd_send_pending );
2189+
2190+ return rc ;
2191+ }
0 commit comments