Skip to content

Commit d649e1b

Browse files
longlimsftsmfrench
authored andcommitted
CIFS: SMBD: Implement function to send data via RDMA send
The transport doesn't maintain send buffers or send queue for transferring payload via RDMA send. There is no data copy in the transport on send. Signed-off-by: Long Li <[email protected]> Signed-off-by: Steve French <[email protected]> Reviewed-by: Pavel Shilovsky <[email protected]> Reviewed-by: Ronnie Sahlberg <[email protected]>
1 parent 2fef137 commit d649e1b

File tree

2 files changed

+251
-0
lines changed

2 files changed

+251
-0
lines changed

fs/cifs/smbdirect.c

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ static int smbd_post_recv(
4141
struct smbd_response *response);
4242

4343
static int smbd_post_send_empty(struct smbd_connection *info);
44+
static int smbd_post_send_data(
45+
struct smbd_connection *info,
46+
struct kvec *iov, int n_vec, int remaining_data_length);
47+
static int smbd_post_send_page(struct smbd_connection *info,
48+
struct page *page, unsigned long offset,
49+
size_t size, int remaining_data_length);
4450

4551
/* SMBD version number */
4652
#define SMBD_V1 0x0100
@@ -177,6 +183,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work)
177183
log_rdma_event(INFO, "cancelling send immediate work\n");
178184
cancel_delayed_work_sync(&info->send_immediate_work);
179185

186+
log_rdma_event(INFO, "wait for all send to finish\n");
187+
wait_event(info->wait_smbd_send_pending,
188+
info->smbd_send_pending == 0);
189+
180190
log_rdma_event(INFO, "wait for all recv to finish\n");
181191
wake_up_interruptible(&info->wait_reassembly_queue);
182192
wait_event(info->wait_smbd_recv_pending,
@@ -1077,6 +1087,24 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
10771087
return rc;
10781088
}
10791089

1090+
/*
1091+
* Send a page
1092+
* page: the page to send
1093+
* offset: offset in the page to send
1094+
* size: length in the page to send
1095+
* remaining_data_length: remaining data to send in this payload
1096+
*/
1097+
static int smbd_post_send_page(struct smbd_connection *info, struct page *page,
1098+
unsigned long offset, size_t size, int remaining_data_length)
1099+
{
1100+
struct scatterlist sgl;
1101+
1102+
sg_init_table(&sgl, 1);
1103+
sg_set_page(&sgl, page, size, offset);
1104+
1105+
return smbd_post_send_sgl(info, &sgl, size, remaining_data_length);
1106+
}
1107+
10801108
/*
10811109
* Send an empty message
10821110
* Empty message is used to extend credits to peer to for keep live
@@ -1088,6 +1116,35 @@ static int smbd_post_send_empty(struct smbd_connection *info)
10881116
return smbd_post_send_sgl(info, NULL, 0, 0);
10891117
}
10901118

1119+
/*
1120+
* Send a data buffer
1121+
* iov: the iov array describing the data buffers
1122+
* n_vec: number of iov array
1123+
* remaining_data_length: remaining data to send following this packet
1124+
* in segmented SMBD packet
1125+
*/
1126+
static int smbd_post_send_data(
1127+
struct smbd_connection *info, struct kvec *iov, int n_vec,
1128+
int remaining_data_length)
1129+
{
1130+
int i;
1131+
u32 data_length = 0;
1132+
struct scatterlist sgl[SMBDIRECT_MAX_SGE];
1133+
1134+
if (n_vec > SMBDIRECT_MAX_SGE) {
1135+
cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
1136+
return -ENOMEM;
1137+
}
1138+
1139+
sg_init_table(sgl, n_vec);
1140+
for (i = 0; i < n_vec; i++) {
1141+
data_length += iov[i].iov_len;
1142+
sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len);
1143+
}
1144+
1145+
return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length);
1146+
}
1147+
10911148
/*
10921149
* Post a receive request to the transport
10931150
* The remote peer can only send data when a receive request is posted
@@ -1652,6 +1709,9 @@ struct smbd_connection *_smbd_get_connection(
16521709
queue_delayed_work(info->workqueue, &info->idle_timer_work,
16531710
info->keep_alive_interval*HZ);
16541711

1712+
init_waitqueue_head(&info->wait_smbd_send_pending);
1713+
info->smbd_send_pending = 0;
1714+
16551715
init_waitqueue_head(&info->wait_smbd_recv_pending);
16561716
info->smbd_recv_pending = 0;
16571717

@@ -1943,3 +2003,189 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
19432003
msg->msg_iter.count = 0;
19442004
return rc;
19452005
}
2006+
2007+
/*
2008+
* Send data to transport
2009+
* Each rqst is transported as a SMBDirect payload
2010+
* rqst: the data to write
2011+
* return value: 0 if successfully write, otherwise error code
2012+
*/
2013+
int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
2014+
{
2015+
struct kvec vec;
2016+
int nvecs;
2017+
int size;
2018+
int buflen = 0, remaining_data_length;
2019+
int start, i, j;
2020+
int max_iov_size =
2021+
info->max_send_size - sizeof(struct smbd_data_transfer);
2022+
struct kvec iov[SMBDIRECT_MAX_SGE];
2023+
int rc;
2024+
2025+
info->smbd_send_pending++;
2026+
if (info->transport_status != SMBD_CONNECTED) {
2027+
rc = -ENODEV;
2028+
goto done;
2029+
}
2030+
2031+
/*
2032+
* This usually means a configuration error
2033+
* We use RDMA read/write for packet size > rdma_readwrite_threshold
2034+
* as long as it's properly configured we should never get into this
2035+
* situation
2036+
*/
2037+
if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) {
2038+
log_write(ERR, "maximum send segment %x exceeding %x\n",
2039+
rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE);
2040+
rc = -EINVAL;
2041+
goto done;
2042+
}
2043+
2044+
/*
2045+
* Remove the RFC1002 length defined in MS-SMB2 section 2.1
2046+
* It is used only for TCP transport
2047+
* In future we may want to add a transport layer under protocol
2048+
* layer so this will only be issued to TCP transport
2049+
*/
2050+
iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4;
2051+
iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
2052+
buflen += iov[0].iov_len;
2053+
2054+
/* total up iov array first */
2055+
for (i = 1; i < rqst->rq_nvec; i++) {
2056+
iov[i].iov_base = rqst->rq_iov[i].iov_base;
2057+
iov[i].iov_len = rqst->rq_iov[i].iov_len;
2058+
buflen += iov[i].iov_len;
2059+
}
2060+
2061+
/* add in the page array if there is one */
2062+
if (rqst->rq_npages) {
2063+
buflen += rqst->rq_pagesz * (rqst->rq_npages - 1);
2064+
buflen += rqst->rq_tailsz;
2065+
}
2066+
2067+
if (buflen + sizeof(struct smbd_data_transfer) >
2068+
info->max_fragmented_send_size) {
2069+
log_write(ERR, "payload size %d > max size %d\n",
2070+
buflen, info->max_fragmented_send_size);
2071+
rc = -EINVAL;
2072+
goto done;
2073+
}
2074+
2075+
remaining_data_length = buflen;
2076+
2077+
log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
2078+
"rq_tailsz=%d buflen=%d\n",
2079+
rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
2080+
rqst->rq_tailsz, buflen);
2081+
2082+
start = i = iov[0].iov_len ? 0 : 1;
2083+
buflen = 0;
2084+
while (true) {
2085+
buflen += iov[i].iov_len;
2086+
if (buflen > max_iov_size) {
2087+
if (i > start) {
2088+
remaining_data_length -=
2089+
(buflen-iov[i].iov_len);
2090+
log_write(INFO, "sending iov[] from start=%d "
2091+
"i=%d nvecs=%d "
2092+
"remaining_data_length=%d\n",
2093+
start, i, i-start,
2094+
remaining_data_length);
2095+
rc = smbd_post_send_data(
2096+
info, &iov[start], i-start,
2097+
remaining_data_length);
2098+
if (rc)
2099+
goto done;
2100+
} else {
2101+
/* iov[start] is too big, break it */
2102+
nvecs = (buflen+max_iov_size-1)/max_iov_size;
2103+
log_write(INFO, "iov[%d] iov_base=%p buflen=%d"
2104+
" break to %d vectors\n",
2105+
start, iov[start].iov_base,
2106+
buflen, nvecs);
2107+
for (j = 0; j < nvecs; j++) {
2108+
vec.iov_base =
2109+
(char *)iov[start].iov_base +
2110+
j*max_iov_size;
2111+
vec.iov_len = max_iov_size;
2112+
if (j == nvecs-1)
2113+
vec.iov_len =
2114+
buflen -
2115+
max_iov_size*(nvecs-1);
2116+
remaining_data_length -= vec.iov_len;
2117+
log_write(INFO,
2118+
"sending vec j=%d iov_base=%p"
2119+
" iov_len=%zu "
2120+
"remaining_data_length=%d\n",
2121+
j, vec.iov_base, vec.iov_len,
2122+
remaining_data_length);
2123+
rc = smbd_post_send_data(
2124+
info, &vec, 1,
2125+
remaining_data_length);
2126+
if (rc)
2127+
goto done;
2128+
}
2129+
i++;
2130+
}
2131+
start = i;
2132+
buflen = 0;
2133+
} else {
2134+
i++;
2135+
if (i == rqst->rq_nvec) {
2136+
/* send out all remaining vecs */
2137+
remaining_data_length -= buflen;
2138+
log_write(INFO,
2139+
"sending iov[] from start=%d i=%d "
2140+
"nvecs=%d remaining_data_length=%d\n",
2141+
start, i, i-start,
2142+
remaining_data_length);
2143+
rc = smbd_post_send_data(info, &iov[start],
2144+
i-start, remaining_data_length);
2145+
if (rc)
2146+
goto done;
2147+
break;
2148+
}
2149+
}
2150+
log_write(INFO, "looping i=%d buflen=%d\n", i, buflen);
2151+
}
2152+
2153+
/* now sending pages if there are any */
2154+
for (i = 0; i < rqst->rq_npages; i++) {
2155+
buflen = (i == rqst->rq_npages-1) ?
2156+
rqst->rq_tailsz : rqst->rq_pagesz;
2157+
nvecs = (buflen + max_iov_size - 1) / max_iov_size;
2158+
log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
2159+
buflen, nvecs);
2160+
for (j = 0; j < nvecs; j++) {
2161+
size = max_iov_size;
2162+
if (j == nvecs-1)
2163+
size = buflen - j*max_iov_size;
2164+
remaining_data_length -= size;
2165+
log_write(INFO, "sending pages i=%d offset=%d size=%d"
2166+
" remaining_data_length=%d\n",
2167+
i, j*max_iov_size, size, remaining_data_length);
2168+
rc = smbd_post_send_page(
2169+
info, rqst->rq_pages[i], j*max_iov_size,
2170+
size, remaining_data_length);
2171+
if (rc)
2172+
goto done;
2173+
}
2174+
}
2175+
2176+
done:
2177+
/*
2178+
* As an optimization, we don't wait for individual I/O to finish
2179+
* before sending the next one.
2180+
* Send them all and wait for pending send count to get to 0
2181+
* that means all the I/Os have been out and we are good to return
2182+
*/
2183+
2184+
wait_event(info->wait_send_payload_pending,
2185+
atomic_read(&info->send_payload_pending) == 0);
2186+
2187+
info->smbd_send_pending--;
2188+
wake_up(&info->wait_smbd_send_pending);
2189+
2190+
return rc;
2191+
}

fs/cifs/smbdirect.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ struct smbd_connection {
9292

9393
/* Activity accoutning */
9494
/* Pending reqeusts issued from upper layer */
95+
int smbd_send_pending;
96+
wait_queue_head_t wait_smbd_send_pending;
97+
9598
int smbd_recv_pending;
9699
wait_queue_head_t wait_smbd_recv_pending;
97100

@@ -257,6 +260,7 @@ void smbd_destroy(struct smbd_connection *info);
257260

258261
/* Interface for carrying upper layer I/O through send/recv */
259262
int smbd_recv(struct smbd_connection *info, struct msghdr *msg);
263+
int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst);
260264

261265
#else
262266
#define cifs_rdma_enabled(server) 0
@@ -266,6 +270,7 @@ static inline void *smbd_get_connection(
266270
static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; }
267271
static inline void smbd_destroy(struct smbd_connection *info) {}
268272
static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; }
273+
static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; }
269274
#endif
270275

271276
#endif

0 commit comments

Comments
 (0)