Skip to content

Commit b92ce55

Browse files
author
Jens Axboe
committed
[PATCH] splice: add direct fd <-> fd splicing support
It's more efficient for sendfile() emulation. Basically we cache an internal private pipe and just use that as the intermediate area for pages. Direct splicing is not available from sys_splice(), it is only meant to be used for sendfile() emulation. Additional patch from Ingo Molnar to avoid the PIPE_BUFFERS loop at exit for the normal fast path. Signed-off-by: Jens Axboe <[email protected]>
1 parent 529565d commit b92ce55

File tree

6 files changed

+150
-21
lines changed

6 files changed

+150
-21
lines changed

fs/pipe.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -691,12 +691,10 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
691691
return info;
692692
}
693693

694-
void free_pipe_info(struct inode *inode)
694+
void __free_pipe_info(struct pipe_inode_info *info)
695695
{
696696
int i;
697-
struct pipe_inode_info *info = inode->i_pipe;
698697

699-
inode->i_pipe = NULL;
700698
for (i = 0; i < PIPE_BUFFERS; i++) {
701699
struct pipe_buffer *buf = info->bufs + i;
702700
if (buf->ops)
@@ -707,6 +705,12 @@ void free_pipe_info(struct inode *inode)
707705
kfree(info);
708706
}
709707

708+
void free_pipe_info(struct inode *inode)
709+
{
710+
__free_pipe_info(inode->i_pipe);
711+
inode->i_pipe = NULL;
712+
}
713+
710714
static struct vfsmount *pipe_mnt __read_mostly;
711715
static int pipefs_delete_dentry(struct dentry *dentry)
712716
{

fs/splice.c

Lines changed: 130 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,7 @@ EXPORT_SYMBOL(generic_splice_sendpage);
680680
* Attempt to initiate a splice from pipe to file.
681681
*/
682682
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
683-
loff_t __user *off_out, size_t len,
684-
unsigned int flags)
683+
size_t len, unsigned int flags)
685684
{
686685
loff_t pos;
687686
int ret;
@@ -692,9 +691,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
692691
if (!(out->f_mode & FMODE_WRITE))
693692
return -EBADF;
694693

695-
if (off_out && copy_from_user(&out->f_pos, off_out, sizeof(loff_t)))
696-
return -EFAULT;
697-
698694
pos = out->f_pos;
699695

700696
ret = rw_verify_area(WRITE, out, &pos, len);
@@ -707,9 +703,8 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
707703
/*
708704
* Attempt to initiate a splice from a file to a pipe.
709705
*/
710-
static long do_splice_to(struct file *in, loff_t __user *off_in,
711-
struct pipe_inode_info *pipe, size_t len,
712-
unsigned int flags)
706+
static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
707+
size_t len, unsigned int flags)
713708
{
714709
loff_t pos, isize, left;
715710
int ret;
@@ -720,9 +715,6 @@ static long do_splice_to(struct file *in, loff_t __user *off_in,
720715
if (!(in->f_mode & FMODE_READ))
721716
return -EBADF;
722717

723-
if (off_in && copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
724-
return -EFAULT;
725-
726718
pos = in->f_pos;
727719

728720
ret = rw_verify_area(READ, in, &pos, len);
@@ -740,6 +732,118 @@ static long do_splice_to(struct file *in, loff_t __user *off_in,
740732
return in->f_op->splice_read(in, pipe, len, flags);
741733
}
742734

735+
long do_splice_direct(struct file *in, struct file *out, size_t len,
736+
unsigned int flags)
737+
{
738+
struct pipe_inode_info *pipe;
739+
long ret, bytes;
740+
umode_t i_mode;
741+
int i;
742+
743+
/*
744+
* We require the input being a regular file, as we don't want to
745+
* randomly drop data for eg socket -> socket splicing. Use the
746+
* piped splicing for that!
747+
*/
748+
i_mode = in->f_dentry->d_inode->i_mode;
749+
if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
750+
return -EINVAL;
751+
752+
/*
753+
* neither in nor out is a pipe, setup an internal pipe attached to
754+
* 'out' and transfer the wanted data from 'in' to 'out' through that
755+
*/
756+
pipe = current->splice_pipe;
757+
if (!pipe) {
758+
pipe = alloc_pipe_info(NULL);
759+
if (!pipe)
760+
return -ENOMEM;
761+
762+
/*
763+
* We don't have an immediate reader, but we'll read the stuff
764+
* out of the pipe right after the move_to_pipe(). So set
765+
* PIPE_READERS appropriately.
766+
*/
767+
pipe->readers = 1;
768+
769+
current->splice_pipe = pipe;
770+
}
771+
772+
/*
773+
* do the splice
774+
*/
775+
ret = 0;
776+
bytes = 0;
777+
778+
while (len) {
779+
size_t read_len, max_read_len;
780+
781+
/*
782+
* Do at most PIPE_BUFFERS pages worth of transfer:
783+
*/
784+
max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
785+
786+
ret = do_splice_to(in, pipe, max_read_len, flags);
787+
if (unlikely(ret < 0))
788+
goto out_release;
789+
790+
read_len = ret;
791+
792+
/*
793+
* NOTE: nonblocking mode only applies to the input. We
794+
* must not do the output in nonblocking mode as then we
795+
* could get stuck data in the internal pipe:
796+
*/
797+
ret = do_splice_from(pipe, out, read_len,
798+
flags & ~SPLICE_F_NONBLOCK);
799+
if (unlikely(ret < 0))
800+
goto out_release;
801+
802+
bytes += ret;
803+
len -= ret;
804+
805+
/*
806+
* In nonblocking mode, if we got back a short read then
807+
* that was due to either an IO error or due to the
808+
* pagecache entry not being there. In the IO error case
809+
* the _next_ splice attempt will produce a clean IO error
810+
* return value (not a short read), so in both cases it's
811+
* correct to break out of the loop here:
812+
*/
813+
if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
814+
break;
815+
}
816+
817+
pipe->nrbufs = pipe->curbuf = 0;
818+
819+
return bytes;
820+
821+
out_release:
822+
/*
823+
* If we did an incomplete transfer we must release
824+
* the pipe buffers in question:
825+
*/
826+
for (i = 0; i < PIPE_BUFFERS; i++) {
827+
struct pipe_buffer *buf = pipe->bufs + i;
828+
829+
if (buf->ops) {
830+
buf->ops->release(pipe, buf);
831+
buf->ops = NULL;
832+
}
833+
}
834+
pipe->nrbufs = pipe->curbuf = 0;
835+
836+
/*
837+
* If we transferred some data, return the number of bytes:
838+
*/
839+
if (bytes > 0)
840+
return bytes;
841+
842+
return ret;
843+
}
844+
845+
EXPORT_SYMBOL(do_splice_direct);
846+
743847
/*
744848
* Determine where to splice to/from.
745849
*/
@@ -749,25 +853,33 @@ static long do_splice(struct file *in, loff_t __user *off_in,
749853
{
750854
struct pipe_inode_info *pipe;
751855

752-
if (off_out && out->f_op->llseek == no_llseek)
753-
return -EINVAL;
754-
if (off_in && in->f_op->llseek == no_llseek)
755-
return -EINVAL;
756-
757856
pipe = in->f_dentry->d_inode->i_pipe;
758857
if (pipe) {
759858
if (off_in)
760859
return -ESPIPE;
860+
if (off_out) {
861+
if (out->f_op->llseek == no_llseek)
862+
return -EINVAL;
863+
if (copy_from_user(&out->f_pos, off_out,
864+
sizeof(loff_t)))
865+
return -EFAULT;
866+
}
761867

762-
return do_splice_from(pipe, out, off_out, len, flags);
868+
return do_splice_from(pipe, out, len, flags);
763869
}
764870

765871
pipe = out->f_dentry->d_inode->i_pipe;
766872
if (pipe) {
767873
if (off_out)
768874
return -ESPIPE;
875+
if (off_in) {
876+
if (in->f_op->llseek == no_llseek)
877+
return -EINVAL;
878+
if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
879+
return -EFAULT;
880+
}
769881

770-
return do_splice_to(in, off_in, pipe, len, flags);
882+
return do_splice_to(in, pipe, len, flags);
771883
}
772884

773885
return -EINVAL;

include/linux/fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,8 @@ extern void do_generic_mapping_read(struct address_space *mapping,
16131613
loff_t *, read_descriptor_t *, read_actor_t);
16141614
extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int);
16151615
extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int);
1616+
extern long do_splice_direct(struct file *in, struct file *out,
1617+
size_t len, unsigned int flags);
16161618
extern void
16171619
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
16181620
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,

include/linux/pipe_fs_i.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ void pipe_wait(struct pipe_inode_info *pipe);
5858

5959
struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
6060
void free_pipe_info(struct inode * inode);
61+
void __free_pipe_info(struct pipe_inode_info *);
6162

6263
/*
6364
* splice is tied to pipes as a transport (at least for now), so we'll just

include/linux/sched.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,7 @@ static inline void prefetch_stack(struct task_struct *t) { }
684684

685685
struct audit_context; /* See audit.c */
686686
struct mempolicy;
687+
struct pipe_inode_info;
687688

688689
enum sleep_type {
689690
SLEEP_NORMAL,
@@ -882,6 +883,11 @@ struct task_struct {
882883

883884
atomic_t fs_excl; /* holding fs exclusive resources */
884885
struct rcu_head rcu;
886+
887+
/*
888+
* cache last used pipe for splice
889+
*/
890+
struct pipe_inode_info *splice_pipe;
885891
};
886892

887893
static inline pid_t process_group(struct task_struct *tsk)

kernel/exit.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <linux/mutex.h>
3535
#include <linux/futex.h>
3636
#include <linux/compat.h>
37+
#include <linux/pipe_fs_i.h>
3738

3839
#include <asm/uaccess.h>
3940
#include <asm/unistd.h>
@@ -941,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code)
941942
if (tsk->io_context)
942943
exit_io_context();
943944

945+
if (tsk->splice_pipe)
946+
__free_pipe_info(tsk->splice_pipe);
947+
944948
/* PF_DEAD causes final put_task_struct after we schedule. */
945949
preempt_disable();
946950
BUG_ON(tsk->flags & PF_DEAD);

0 commit comments

Comments
 (0)