diff --git a/lib/std/c.zig b/lib/std/c.zig index 48a3039f51c1..4eb271c87cdf 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -142,7 +142,7 @@ pub extern "c" fn sendto( buf: *const c_void, len: usize, flags: u32, - dest_addr: *const sockaddr, + dest_addr: ?*const sockaddr, addrlen: socklen_t, ) isize; diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 524c82211e30..d5ecf6bd81e6 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -55,6 +55,22 @@ pub extern "c" fn clock_get_time(clock_serv: clock_serv_t, cur_time: *mach_times pub extern "c" fn host_get_clock_service(host: host_t, clock_id: clock_id_t, clock_serv: ?[*]clock_serv_t) kern_return_t; pub extern "c" fn mach_port_deallocate(task: ipc_space_t, name: mach_port_name_t) kern_return_t; +pub const sf_hdtr = extern struct { + headers: [*]iovec_const, + hdr_cnt: c_int, + trailers: [*]iovec_const, + trl_cnt: c_int, +}; + +pub extern "c" fn sendfile( + out_fd: fd_t, + in_fd: fd_t, + offset: off_t, + len: *off_t, + sf_hdtr: ?*sf_hdtr, + flags: u32, +) c_int; + pub fn sigaddset(set: *sigset_t, signo: u5) void { set.* |= @as(u32, 1) << (signo - 1); } diff --git a/lib/std/c/freebsd.zig b/lib/std/c/freebsd.zig index 4c6614c978b3..dacfa4384dc0 100644 --- a/lib/std/c/freebsd.zig +++ b/lib/std/c/freebsd.zig @@ -8,6 +8,22 @@ pub extern "c" fn getdents(fd: c_int, buf_ptr: [*]u8, nbytes: usize) usize; pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int; pub extern "c" fn getrandom(buf_ptr: [*]u8, buf_len: usize, flags: c_uint) isize; +pub const sf_hdtr = extern struct { + headers: [*]iovec_const, + hdr_cnt: c_int, + trailers: [*]iovec_const, + trl_cnt: c_int, +}; +pub extern "c" fn sendfile( + out_fd: fd_t, + in_fd: fd_t, + offset: ?*off_t, + nbytes: usize, + sf_hdtr: ?*sf_hdtr, + sbytes: ?*off_t, + flags: u32, +) c_int; + pub const dl_iterate_phdr_callback = extern fn (info: *dl_phdr_info, size: usize, data: ?*c_void) c_int; pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*c_void) c_int; diff --git a/lib/std/c/linux.zig b/lib/std/c/linux.zig index be32536d6f5b..7ac5ecd3fe0c 100644 --- a/lib/std/c/linux.zig +++ b/lib/std/c/linux.zig @@ -82,6 +82,13 @@ pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int; pub extern "c" fn memfd_create(name: [*:0]const u8, flags: c_uint) c_int; +pub extern "c" fn sendfile( + out_fd: fd_t, + in_fd: fd_t, + offset: ?*off_t, + count: usize, +) isize; + pub const pthread_attr_t = extern struct { __size: [56]u8, __align: c_long, diff --git a/lib/std/event/loop.zig b/lib/std/event/loop.zig index 80ba5a79b53d..e62f15d59ab5 100644 --- a/lib/std/event/loop.zig +++ b/lib/std/event/loop.zig @@ -236,7 +236,8 @@ pub const Loop = struct { var extra_thread_index: usize = 0; errdefer { // writing 8 bytes to an eventfd cannot fail - noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable; + const amt = noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable; + assert(amt == wakeup_bytes.len); while (extra_thread_index != 0) { extra_thread_index -= 1; self.extra_threads[extra_thread_index].wait(); @@ -682,7 +683,8 @@ pub const Loop = struct { .linux => { self.posixFsRequest(&self.os_data.fs_end_request); // writing 8 bytes to an eventfd cannot fail - noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable; + const amt = noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable; + assert(amt == wakeup_bytes.len); return; }, .macosx, .freebsd, .netbsd, .dragonfly => { @@ -831,7 +833,7 @@ pub const Loop = struct { /// Performs an async `os.write` using a separate thread. /// `fd` must block and not return EAGAIN. - pub fn write(self: *Loop, fd: os.fd_t, bytes: []const u8) os.WriteError!void { + pub fn write(self: *Loop, fd: os.fd_t, bytes: []const u8) os.WriteError!usize { var req_node = Request.Node{ .data = .{ .msg = .{ @@ -852,7 +854,7 @@ pub const Loop = struct { /// Performs an async `os.writev` using a separate thread. /// `fd` must block and not return EAGAIN. - pub fn writev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const) os.WriteError!void { + pub fn writev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const) os.WriteError!usize { var req_node = Request.Node{ .data = .{ .msg = .{ @@ -873,7 +875,7 @@ pub const Loop = struct { /// Performs an async `os.pwritev` using a separate thread. /// `fd` must block and not return EAGAIN. - pub fn pwritev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const, offset: u64) os.WriteError!void { + pub fn pwritev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const, offset: u64) os.WriteError!usize { var req_node = Request.Node{ .data = .{ .msg = .{ @@ -1137,7 +1139,7 @@ pub const Loop = struct { pub const Write = struct { fd: os.fd_t, bytes: []const u8, - result: Error!void, + result: Error!usize, pub const Error = os.WriteError; }; @@ -1145,7 +1147,7 @@ pub const Loop = struct { pub const WriteV = struct { fd: os.fd_t, iov: []const os.iovec_const, - result: Error!void, + result: Error!usize, pub const Error = os.WriteError; }; @@ -1154,9 +1156,9 @@ pub const Loop = struct { fd: os.fd_t, iov: []const os.iovec_const, offset: usize, - result: Error!void, + result: Error!usize, - pub const Error = os.WriteError; + pub const Error = os.PWriteError; }; pub const PReadV = struct { @@ -1165,7 +1167,7 @@ pub const Loop = struct { offset: usize, result: Error!usize, - pub const Error = os.ReadError; + pub const Error = os.PReadError; }; pub const Open = struct { diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 5077c52cd994..769d4b395c09 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -153,7 +153,7 @@ pub fn updateFileMode(source_path: []const u8, dest_path: []const u8, mode: ?Fil var buf: [mem.page_size * 6]u8 = undefined; while (true) { const amt = try in_stream.readFull(buf[0..]); - try atomic_file.file.write(buf[0..amt]); + try atomic_file.file.writeAll(buf[0..amt]); if (amt != buf.len) { try atomic_file.file.updateTimes(src_stat.atime, src_stat.mtime); try atomic_file.finish(); @@ -1329,7 +1329,7 @@ pub const Dir = struct { pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) !void { var file = try self.createFile(sub_path, .{}); defer file.close(); - try file.write(data); + try file.writeAll(data); } pub const AccessError = os.AccessError; diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index c243eeb62cfb..2715129934e1 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -228,63 +228,169 @@ pub const File = struct { } pub const ReadError = os.ReadError; + pub const PReadError = os.PReadError; pub fn read(self: File, buffer: []u8) ReadError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { return std.event.Loop.instance.?.read(self.handle, buffer); + } else { + return os.read(self.handle, buffer); } - return os.read(self.handle, buffer); } - pub fn pread(self: File, buffer: []u8, offset: u64) ReadError!usize { + pub fn readAll(self: File, buffer: []u8) ReadError!void { + var index: usize = 0; + while (index < buffer.len) { + index += try self.read(buffer[index..]); + } + } + + pub fn pread(self: File, buffer: []u8, offset: u64) PReadError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { - return std.event.Loop.instance.?.pread(self.handle, buffer); + return std.event.Loop.instance.?.pread(self.handle, buffer, offset); + } else { + return os.pread(self.handle, buffer, offset); + } + } + + pub fn preadAll(self: File, buffer: []u8, offset: u64) PReadError!void { + var index: usize = 0; + while (index < buffer.len) { + index += try self.pread(buffer[index..], offset + index); } - return os.pread(self.handle, buffer, offset); } pub fn readv(self: File, iovecs: []const os.iovec) ReadError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { return std.event.Loop.instance.?.readv(self.handle, iovecs); + } else { + return os.readv(self.handle, iovecs); } - return os.readv(self.handle, iovecs); } - pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) ReadError!usize { + /// The `iovecs` parameter is mutable because this function needs to mutate the fields in + /// order to handle partial reads from the underlying OS layer. + pub fn readvAll(self: File, iovecs: []os.iovec) ReadError!void { + var i: usize = 0; + while (true) { + var amt = try self.readv(iovecs[i..]); + while (amt >= iovecs[i].iov_len) { + amt -= iovecs[i].iov_len; + i += 1; + if (i >= iovecs.len) return; + } + iovecs[i].iov_base += amt; + iovecs[i].iov_len -= amt; + } + } + + pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) PReadError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { return std.event.Loop.instance.?.preadv(self.handle, iovecs, offset); + } else { + return os.preadv(self.handle, iovecs, offset); + } + } + + /// The `iovecs` parameter is mutable because this function needs to mutate the fields in + /// order to handle partial reads from the underlying OS layer. + pub fn preadvAll(self: File, iovecs: []const os.iovec, offset: u64) PReadError!void { + var i: usize = 0; + var off: usize = 0; + while (true) { + var amt = try self.preadv(iovecs[i..], offset + off); + off += amt; + while (amt >= iovecs[i].iov_len) { + amt -= iovecs[i].iov_len; + i += 1; + if (i >= iovecs.len) return; + } + iovecs[i].iov_base += amt; + iovecs[i].iov_len -= amt; } - return os.preadv(self.handle, iovecs, offset); } pub const WriteError = os.WriteError; + pub const PWriteError = os.PWriteError; - pub fn write(self: File, bytes: []const u8) WriteError!void { + pub fn write(self: File, bytes: []const u8) WriteError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { return std.event.Loop.instance.?.write(self.handle, bytes); + } else { + return os.write(self.handle, bytes); } - return os.write(self.handle, bytes); } - pub fn pwrite(self: File, bytes: []const u8, offset: u64) WriteError!void { + pub fn writeAll(self: File, bytes: []const u8) WriteError!void { + var index: usize = 0; + while (index < bytes.len) { + index += try self.write(bytes[index..]); + } + } + + pub fn pwrite(self: File, bytes: []const u8, offset: u64) PWriteError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { return std.event.Loop.instance.?.pwrite(self.handle, bytes, offset); + } else { + return os.pwrite(self.handle, bytes, offset); } - return os.pwrite(self.handle, bytes, offset); } - pub fn writev(self: File, iovecs: []const os.iovec_const) WriteError!void { + pub fn pwriteAll(self: File, bytes: []const u8, offset: u64) PWriteError!void { + var index: usize = 0; + while (index < bytes.len) { + index += try self.pwrite(bytes[index..], offset + index); + } + } + + pub fn writev(self: File, iovecs: []const os.iovec_const) WriteError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { return std.event.Loop.instance.?.writev(self.handle, iovecs); + } else { + return os.writev(self.handle, iovecs); } - return os.writev(self.handle, iovecs); } - pub fn pwritev(self: File, iovecs: []const os.iovec_const, offset: usize) WriteError!void { + /// The `iovecs` parameter is mutable because this function needs to mutate the fields in + /// order to handle partial writes from the underlying OS layer. + pub fn writevAll(self: File, iovecs: []os.iovec_const) WriteError!void { + var i: usize = 0; + while (true) { + var amt = try self.writev(iovecs[i..]); + while (amt >= iovecs[i].iov_len) { + amt -= iovecs[i].iov_len; + i += 1; + if (i >= iovecs.len) return; + } + iovecs[i].iov_base += amt; + iovecs[i].iov_len -= amt; + } + } + + pub fn pwritev(self: File, iovecs: []os.iovec_const, offset: usize) PWriteError!usize { if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) { - return std.event.Loop.instance.?.pwritev(self.handle, iovecs); + return std.event.Loop.instance.?.pwritev(self.handle, iovecs, offset); + } else { + return os.pwritev(self.handle, iovecs, offset); + } + } + + /// The `iovecs` parameter is mutable because this function needs to mutate the fields in + /// order to handle partial writes from the underlying OS layer. + pub fn pwritevAll(self: File, iovecs: []os.iovec_const, offset: usize) PWriteError!void { + var i: usize = 0; + var off: usize = 0; + while (true) { + var amt = try self.pwritev(iovecs[i..], offset + off); + off += amt; + while (amt >= iovecs[i].iov_len) { + amt -= iovecs[i].iov_len; + i += 1; + if (i >= iovecs.len) return; + } + iovecs[i].iov_base += amt; + iovecs[i].iov_len -= amt; } - return os.pwritev(self.handle, iovecs); } pub fn inStream(file: File) InStream { @@ -335,7 +441,7 @@ pub const File = struct { pub const Error = WriteError; pub const Stream = io.OutStream(Error); - fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void { + fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize { const self = @fieldParentPtr(OutStream, "stream", out_stream); return self.file.write(bytes); } diff --git a/lib/std/io.zig b/lib/std/io.zig index d0bf26d54875..99e9391f1d47 100644 --- a/lib/std/io.zig +++ b/lib/std/io.zig @@ -437,10 +437,11 @@ pub fn BitInStream(endian: builtin.Endian, comptime Error: type) type { }; } -/// This is a simple OutStream that writes to a fixed buffer, and returns an error -/// when it runs out of space. +/// This is a simple OutStream that writes to a fixed buffer. If the returned number +/// of bytes written is less than requested, the buffer is full. +/// Returns error.OutOfMemory when no bytes would be written. pub const SliceOutStream = struct { - pub const Error = error{OutOfSpace}; + pub const Error = error{OutOfMemory}; pub const Stream = OutStream(Error); stream: Stream, @@ -464,9 +465,11 @@ pub const SliceOutStream = struct { self.pos = 0; } - fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void { + fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize { const self = @fieldParentPtr(SliceOutStream, "stream", out_stream); + if (bytes.len == 0) return 0; + assert(self.pos <= self.slice.len); const n = if (self.pos + bytes.len <= self.slice.len) @@ -477,9 +480,9 @@ pub const SliceOutStream = struct { std.mem.copy(u8, self.slice[self.pos .. self.pos + n], bytes[0..n]); self.pos += n; - if (n < bytes.len) { - return Error.OutOfSpace; - } + if (n == 0) return error.OutOfMemory; + + return n; } }; @@ -508,7 +511,9 @@ pub const NullOutStream = struct { }; } - fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {} + fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize { + return bytes.len; + } }; test "io.NullOutStream" { @@ -536,10 +541,11 @@ pub fn CountingOutStream(comptime OutStreamError: type) type { }; } - fn writeFn(out_stream: *Stream, bytes: []const u8) OutStreamError!void { + fn writeFn(out_stream: *Stream, bytes: []const u8) OutStreamError!usize { const self = @fieldParentPtr(Self, "stream", out_stream); try self.child_stream.write(bytes); self.bytes_written += bytes.len; + return bytes.len; } }; } @@ -588,13 +594,14 @@ pub fn BufferedOutStreamCustom(comptime buffer_size: usize, comptime OutStreamEr } } - fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void { + fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize { const self = @fieldParentPtr(Self, "stream", out_stream); if (bytes.len >= self.fifo.writableLength()) { try self.flush(); - return self.unbuffered_out_stream.write(bytes); + return self.unbuffered_out_stream.writeOnce(bytes); } self.fifo.writeAssumeCapacity(bytes); + return bytes.len; } }; } @@ -614,9 +621,10 @@ pub const BufferOutStream = struct { }; } - fn writeFn(out_stream: *Stream, bytes: []const u8) !void { + fn writeFn(out_stream: *Stream, bytes: []const u8) !usize { const self = @fieldParentPtr(BufferOutStream, "stream", out_stream); - return self.buffer.append(bytes); + try self.buffer.append(bytes); + return bytes.len; } }; @@ -734,17 +742,17 @@ pub fn BitOutStream(endian: builtin.Endian, comptime Error: type) type { self.bit_count = 0; } - pub fn write(self_stream: *Stream, buffer: []const u8) Error!void { + pub fn write(self_stream: *Stream, buffer: []const u8) Error!usize { var self = @fieldParentPtr(Self, "stream", self_stream); - //@NOTE: I'm not sure this is a good idea, maybe flushBits should be forced + // TODO: I'm not sure this is a good idea, maybe flushBits should be forced if (self.bit_count > 0) { for (buffer) |b, i| try self.writeBits(b, u8_bit_count); - return; + return buffer.len; } - return self.out_stream.write(buffer); + return self.out_stream.writeOnce(buffer); } }; } diff --git a/lib/std/io/c_out_stream.zig b/lib/std/io/c_out_stream.zig index 8b341e693746..adaa3fcbafdc 100644 --- a/lib/std/io/c_out_stream.zig +++ b/lib/std/io/c_out_stream.zig @@ -20,10 +20,10 @@ pub const COutStream = struct { }; } - fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void { + fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize { const self = @fieldParentPtr(COutStream, "stream", out_stream); const amt_written = std.c.fwrite(bytes.ptr, 1, bytes.len, self.c_file); - if (amt_written == bytes.len) return; + if (amt_written >= 0) return amt_written; switch (std.c._errno().*) { 0 => unreachable, os.EINVAL => unreachable, diff --git a/lib/std/io/out_stream.zig b/lib/std/io/out_stream.zig index 7f534865f56e..cb75b27bf11d 100644 --- a/lib/std/io/out_stream.zig +++ b/lib/std/io/out_stream.zig @@ -14,13 +14,13 @@ pub fn OutStream(comptime WriteError: type) type { const Self = @This(); pub const Error = WriteError; pub const WriteFn = if (std.io.is_async) - async fn (self: *Self, bytes: []const u8) Error!void + async fn (self: *Self, bytes: []const u8) Error!usize else - fn (self: *Self, bytes: []const u8) Error!void; + fn (self: *Self, bytes: []const u8) Error!usize; writeFn: WriteFn, - pub fn write(self: *Self, bytes: []const u8) Error!void { + pub fn writeOnce(self: *Self, bytes: []const u8) Error!usize { if (std.io.is_async) { // Let's not be writing 0xaa in safe modes for upwards of 4 MiB for every stream write. @setRuntimeSafety(false); @@ -31,6 +31,13 @@ pub fn OutStream(comptime WriteError: type) type { } } + pub fn write(self: *Self, bytes: []const u8) Error!void { + var index: usize = 0; + while (index != bytes.len) { + index += try self.writeOnce(bytes[index..]); + } + } + pub fn print(self: *Self, comptime format: []const u8, args: var) Error!void { return std.fmt.format(self, Error, write, format, args); } diff --git a/lib/std/io/test.zig b/lib/std/io/test.zig index f1840b49e3cb..1ab0f82313b9 100644 --- a/lib/std/io/test.zig +++ b/lib/std/io/test.zig @@ -134,13 +134,13 @@ test "SliceOutStream" { try ss.stream.write("world"); expect(mem.eql(u8, ss.getWritten(), "Helloworld")); - expectError(error.OutOfSpace, ss.stream.write("!")); + expectError(error.OutOfMemory, ss.stream.write("!")); expect(mem.eql(u8, ss.getWritten(), "Helloworld")); ss.reset(); expect(ss.getWritten().len == 0); - expectError(error.OutOfSpace, ss.stream.write("Hello world!")); + expectError(error.OutOfMemory, ss.stream.write("Hello world!")); expect(mem.eql(u8, ss.getWritten(), "Hello worl")); } @@ -617,7 +617,7 @@ test "File seek ops" { fs.cwd().deleteFile(tmp_file_name) catch {}; } - try file.write(&([_]u8{0x55} ** 8192)); + try file.writeAll(&([_]u8{0x55} ** 8192)); // Seek to the end try file.seekFromEnd(0); diff --git a/lib/std/os.zig b/lib/std/os.zig index 127ada8fe5e7..8913a1599f5f 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -298,6 +298,11 @@ pub const ReadError = error{ /// buf.len. If 0 bytes were read, that means EOF. /// If the application has a global event loop enabled, EAGAIN is handled /// via the event loop. Otherwise EAGAIN results in error.WouldBlock. +/// +/// Linux has a limit on how many bytes may be transferred in one `read` call, which is `0x7ffff000` +/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as +/// well as stuffing the errno codes into the last `4096` values. This is noted on the `read` man page. +/// For POSIX the limit is `math.maxInt(isize)`. pub fn read(fd: fd_t, buf: []u8) ReadError!usize { if (builtin.os.tag == .windows) { return windows.ReadFile(fd, buf, null); @@ -316,8 +321,15 @@ pub fn read(fd: fd_t, buf: []u8) ReadError!usize { } } + // Prevents EINVAL. + const max_count = switch (std.Target.current.os.tag) { + .linux => 0x7ffff000, + else => math.maxInt(isize), + }; + const adjusted_len = math.min(max_count, buf.len); + while (true) { - const rc = system.read(fd, buf.ptr, buf.len); + const rc = system.read(fd, buf.ptr, adjusted_len); switch (errno(rc)) { 0 => return @intCast(usize, rc), EINTR => continue, @@ -352,32 +364,18 @@ pub fn read(fd: fd_t, buf: []u8) ReadError!usize { /// * Windows /// On these systems, the read races with concurrent writes to the same file descriptor. pub fn readv(fd: fd_t, iov: []const iovec) ReadError!usize { - if (builtin.os.tag == .windows) { - // TODO batch these into parallel requests - var off: usize = 0; - var iov_i: usize = 0; - var inner_off: usize = 0; - while (true) { - const v = iov[iov_i]; - const amt_read = try read(fd, v.iov_base[inner_off .. v.iov_len - inner_off]); - off += amt_read; - inner_off += amt_read; - if (inner_off == v.len) { - iov_i += 1; - inner_off = 0; - if (iov_i == iov.len) { - return off; - } - } - if (amt_read == 0) return off; // EOF - } else unreachable; // TODO https://github.com/ziglang/zig/issues/707 + if (std.Target.current.os.tag == .windows) { + // TODO does Windows have a way to read an io vector? + if (iov.len == 0) return @as(usize, 0); + const first = iov[0]; + return read(fd, first.iov_base[0..first.iov_len]); } while (true) { // TODO handle the case when iov_len is too large and get rid of this @intCast - const rc = system.readv(fd, iov.ptr, @intCast(u32, iov.len)); + const rc = system.readv(fd, iov.ptr, iov_count); switch (errno(rc)) { - 0 => return @bitCast(usize, rc), + 0 => return @intCast(usize, rc), EINTR => continue, EINVAL => unreachable, EFAULT => unreachable, @@ -397,6 +395,8 @@ pub fn readv(fd: fd_t, iov: []const iovec) ReadError!usize { } } +pub const PReadError = ReadError || error{Unseekable}; + /// Number of bytes read is returned. Upon reading end-of-file, zero is returned. /// /// Retries when interrupted by a signal. @@ -405,7 +405,7 @@ pub fn readv(fd: fd_t, iov: []const iovec) ReadError!usize { /// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`. /// On Windows, if the application has a global event loop enabled, I/O Completion Ports are /// used to perform the I/O. `error.WouldBlock` is not possible on Windows. -pub fn pread(fd: fd_t, buf: []u8, offset: u64) ReadError!usize { +pub fn pread(fd: fd_t, buf: []u8, offset: u64) PReadError!usize { if (builtin.os.tag == .windows) { return windows.ReadFile(fd, buf, offset); } @@ -429,6 +429,9 @@ pub fn pread(fd: fd_t, buf: []u8, offset: u64) ReadError!usize { ENOBUFS => return error.SystemResources, ENOMEM => return error.SystemResources, ECONNRESET => return error.ConnectionResetByPeer, + ENXIO => return error.Unseekable, + ESPIPE => return error.Unseekable, + EOVERFLOW => return error.Unseekable, else => |err| return unexpectedErrno(err), } } @@ -448,75 +451,23 @@ pub fn pread(fd: fd_t, buf: []u8, offset: u64) ReadError!usize { /// * Darwin /// * Windows /// On these systems, the read races with concurrent writes to the same file descriptor. -pub fn preadv(fd: fd_t, iov: []const iovec, offset: u64) ReadError!usize { - if (comptime std.Target.current.isDarwin()) { - // Darwin does not have preadv but it does have pread. - var off: usize = 0; - var iov_i: usize = 0; - var inner_off: usize = 0; - while (true) { - const v = iov[iov_i]; - const rc = darwin.pread(fd, v.iov_base + inner_off, v.iov_len - inner_off, offset + off); - const err = darwin.getErrno(rc); - switch (err) { - 0 => { - const amt_read = @bitCast(usize, rc); - off += amt_read; - inner_off += amt_read; - if (inner_off == v.iov_len) { - iov_i += 1; - inner_off = 0; - if (iov_i == iov.len) { - return off; - } - } - if (rc == 0) return off; // EOF - continue; - }, - EINTR => continue, - EINVAL => unreachable, - EFAULT => unreachable, - ESPIPE => unreachable, // fd is not seekable - EAGAIN => if (std.event.Loop.instance) |loop| { - loop.waitUntilFdReadable(fd); - continue; - } else { - return error.WouldBlock; - }, - EBADF => unreachable, // always a race condition - EIO => return error.InputOutput, - EISDIR => return error.IsDir, - ENOBUFS => return error.SystemResources, - ENOMEM => return error.SystemResources, - else => return unexpectedErrno(err), - } - } +pub fn preadv(fd: fd_t, iov: []const iovec, offset: u64) PReadError!usize { + const have_pread_but_not_preadv = switch (std.Target.current.os.tag) { + .windows, .macosx, .ios, .watchos, .tvos => true, + else => false, + }; + if (have_pread_but_not_preadv) { + // We could loop here; but proper usage of `preadv` must handle partial reads anyway. + // So we simply read into the first vector only. + if (iov.len == 0) return @as(usize, 0); + const first = iov[0]; + return pread(fd, first.iov_base[0..first.iov_len], offset); } - if (builtin.os.tag == .windows) { - // TODO batch these into parallel requests - var off: usize = 0; - var iov_i: usize = 0; - var inner_off: usize = 0; - while (true) { - const v = iov[iov_i]; - const amt_read = try pread(fd, v.iov_base[inner_off .. v.iov_len - inner_off], offset + off); - off += amt_read; - inner_off += amt_read; - if (inner_off == v.len) { - iov_i += 1; - inner_off = 0; - if (iov_i == iov.len) { - return off; - } - } - if (amt_read == 0) return off; // EOF - } else unreachable; // TODO https://github.com/ziglang/zig/issues/707 - } + const iov_count = math.cast(u31, iov.len) catch math.maxInt(u31); while (true) { - // TODO handle the case when iov_len is too large and get rid of this @intCast - const rc = system.preadv(fd, iov.ptr, @intCast(u32, iov.len), offset); + const rc = system.preadv(fd, iov.ptr, iov_count, offset); switch (errno(rc)) { 0 => return @bitCast(usize, rc), EINTR => continue, @@ -533,6 +484,9 @@ pub fn preadv(fd: fd_t, iov: []const iovec, offset: u64) ReadError!usize { EISDIR => return error.IsDir, ENOBUFS => return error.SystemResources, ENOMEM => return error.SystemResources, + ENXIO => return error.Unseekable, + ESPIPE => return error.Unseekable, + EOVERFLOW => return error.Unseekable, else => |err| return unexpectedErrno(err), } } @@ -553,10 +507,28 @@ pub const WriteError = error{ WouldBlock, } || UnexpectedError; -/// Write to a file descriptor. Keeps trying if it gets interrupted. -/// If the application has a global event loop enabled, EAGAIN is handled -/// via the event loop. Otherwise EAGAIN results in error.WouldBlock. -pub fn write(fd: fd_t, bytes: []const u8) WriteError!void { +/// Write to a file descriptor. +/// Retries when interrupted by a signal. +/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero. +/// +/// Note that a successful write() may transfer fewer than count bytes. Such partial writes can +/// occur for various reasons; for example, because there was insufficient space on the disk +/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or +/// similar was interrupted by a signal handler after it had transferred some, but before it had +/// transferred all of the requested bytes. In the event of a partial write, the caller can make +/// another write() call to transfer the remaining bytes. The subsequent call will either +/// transfer further bytes or may result in an error (e.g., if the disk is now full). +/// +/// For POSIX systems, if the application has a global event loop enabled, EAGAIN is handled +/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`. +/// On Windows, if the application has a global event loop enabled, I/O Completion Ports are +/// used to perform the I/O. `error.WouldBlock` is not possible on Windows. +/// +/// Linux has a limit on how many bytes may be transferred in one `write` call, which is `0x7ffff000` +/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as +/// well as stuffing the errno codes into the last `4096` values. This is noted on the `write` man page. +/// The corresponding POSIX limit is `math.maxInt(isize)`. +pub fn write(fd: fd_t, bytes: []const u8) WriteError!usize { if (builtin.os.tag == .windows) { return windows.WriteFile(fd, bytes, null); } @@ -568,26 +540,21 @@ pub fn write(fd: fd_t, bytes: []const u8) WriteError!void { }}; var nwritten: usize = undefined; switch (wasi.fd_write(fd, &ciovs, ciovs.len, &nwritten)) { - 0 => return, + 0 => return nwritten, else => |err| return unexpectedErrno(err), } } - // Linux can return EINVAL when write amount is > 0x7ffff000 - // See https://github.com/ziglang/zig/pull/743#issuecomment-363165856 - // TODO audit this. Shawn Landden says that this is not actually true. - // if this logic should stay, move it to std.os.linux - const max_bytes_len = 0x7ffff000; + const max_count = switch (std.Target.current.os.tag) { + .linux => 0x7ffff000, + else => math.maxInt(isize), + }; + const adjusted_len = math.min(max_count, bytes.len); - var index: usize = 0; - while (index < bytes.len) { - const amt_to_write = math.min(bytes.len - index, @as(usize, max_bytes_len)); - const rc = system.write(fd, bytes.ptr + index, amt_to_write); + while (true) { + const rc = system.write(fd, bytes.ptr, adjusted_len); switch (errno(rc)) { - 0 => { - index += @intCast(usize, rc); - continue; - }, + 0 => return @intCast(usize, rc), EINTR => continue, EINVAL => unreachable, EFAULT => unreachable, @@ -611,14 +578,36 @@ pub fn write(fd: fd_t, bytes: []const u8) WriteError!void { } /// Write multiple buffers to a file descriptor. -/// If the application has a global event loop enabled, EAGAIN is handled -/// via the event loop. Otherwise EAGAIN results in error.WouldBlock. -pub fn writev(fd: fd_t, iov: []const iovec_const) WriteError!void { +/// Retries when interrupted by a signal. +/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero. +/// +/// Note that a successful write() may transfer fewer bytes than supplied. Such partial writes can +/// occur for various reasons; for example, because there was insufficient space on the disk +/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or +/// similar was interrupted by a signal handler after it had transferred some, but before it had +/// transferred all of the requested bytes. In the event of a partial write, the caller can make +/// another write() call to transfer the remaining bytes. The subsequent call will either +/// transfer further bytes or may result in an error (e.g., if the disk is now full). +/// +/// For POSIX systems, if the application has a global event loop enabled, EAGAIN is handled +/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`. +/// On Windows, if the application has a global event loop enabled, I/O Completion Ports are +/// used to perform the I/O. `error.WouldBlock` is not possible on Windows. +/// +/// If `iov.len` is larger than will fit in a `u31`, a partial write will occur. +pub fn writev(fd: fd_t, iov: []const iovec_const) WriteError!usize { + if (std.Target.current.os.tag == .windows) { + // TODO does Windows have a way to write an io vector? + if (iov.len == 0) return @as(usize, 0); + const first = iov[0]; + return write(fd, first.iov_base[0..first.iov_len]); + } + + const iov_count = math.cast(u31, iov.len) catch math.maxInt(u31); while (true) { - // TODO handle the case when iov_len is too large and get rid of this @intCast - const rc = system.writev(fd, iov.ptr, @intCast(u32, iov.len)); + const rc = system.writev(fd, iov.ptr, iov_count); switch (errno(rc)) { - 0 => return, + 0 => return @intCast(usize, rc), EINTR => continue, EINVAL => unreachable, EFAULT => unreachable, @@ -641,23 +630,45 @@ pub fn writev(fd: fd_t, iov: []const iovec_const) WriteError!void { } } +pub const PWriteError = WriteError || error{Unseekable}; + /// Write to a file descriptor, with a position offset. -/// /// Retries when interrupted by a signal. +/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero. +/// +/// Note that a successful write() may transfer fewer bytes than supplied. Such partial writes can +/// occur for various reasons; for example, because there was insufficient space on the disk +/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or +/// similar was interrupted by a signal handler after it had transferred some, but before it had +/// transferred all of the requested bytes. In the event of a partial write, the caller can make +/// another write() call to transfer the remaining bytes. The subsequent call will either +/// transfer further bytes or may result in an error (e.g., if the disk is now full). /// /// For POSIX systems, if the application has a global event loop enabled, EAGAIN is handled /// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`. /// On Windows, if the application has a global event loop enabled, I/O Completion Ports are /// used to perform the I/O. `error.WouldBlock` is not possible on Windows. -pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) WriteError!void { +/// +/// Linux has a limit on how many bytes may be transferred in one `pwrite` call, which is `0x7ffff000` +/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as +/// well as stuffing the errno codes into the last `4096` values. This is noted on the `write` man page. +/// The corresponding POSIX limit is `math.maxInt(isize)`. +pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) PWriteError!usize { if (std.Target.current.os.tag == .windows) { return windows.WriteFile(fd, bytes, offset); } + // Prevent EINVAL. + const max_count = switch (std.Target.current.os.tag) { + .linux => 0x7ffff000, + else => math.maxInt(isize), + }; + const adjusted_len = math.min(max_count, bytes.len); + while (true) { - const rc = system.pwrite(fd, bytes.ptr, bytes.len, offset); + const rc = system.pwrite(fd, bytes.ptr, adjusted_len, offset); switch (errno(rc)) { - 0 => return, + 0 => return @intCast(usize, rc), EINTR => continue, EINVAL => unreachable, EFAULT => unreachable, @@ -675,84 +686,54 @@ pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) WriteError!void { ENOSPC => return error.NoSpaceLeft, EPERM => return error.AccessDenied, EPIPE => return error.BrokenPipe, + ENXIO => return error.Unseekable, + ESPIPE => return error.Unseekable, + EOVERFLOW => return error.Unseekable, else => |err| return unexpectedErrno(err), } } } /// Write multiple buffers to a file descriptor, with a position offset. -/// /// Retries when interrupted by a signal. +/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero. +/// +/// Note that a successful write() may transfer fewer than count bytes. Such partial writes can +/// occur for various reasons; for example, because there was insufficient space on the disk +/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or +/// similar was interrupted by a signal handler after it had transferred some, but before it had +/// transferred all of the requested bytes. In the event of a partial write, the caller can make +/// another write() call to transfer the remaining bytes. The subsequent call will either +/// transfer further bytes or may result in an error (e.g., if the disk is now full). /// /// If the application has a global event loop enabled, EAGAIN is handled /// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`. /// -/// This operation is non-atomic on the following systems: +/// The following systems do not have this syscall, and will return partial writes if more than one +/// vector is provided: /// * Darwin /// * Windows -/// On these systems, the write races with concurrent writes to the same file descriptor, and -/// the file can be in a partially written state when an error occurs. -pub fn pwritev(fd: fd_t, iov: []const iovec_const, offset: u64) WriteError!void { - if (comptime std.Target.current.isDarwin()) { - // Darwin does not have pwritev but it does have pwrite. - var off: usize = 0; - var iov_i: usize = 0; - var inner_off: usize = 0; - while (true) { - const v = iov[iov_i]; - const rc = darwin.pwrite(fd, v.iov_base + inner_off, v.iov_len - inner_off, offset + off); - const err = darwin.getErrno(rc); - switch (err) { - 0 => { - const amt_written = @bitCast(usize, rc); - off += amt_written; - inner_off += amt_written; - if (inner_off == v.iov_len) { - iov_i += 1; - inner_off = 0; - if (iov_i == iov.len) { - return; - } - } - continue; - }, - EINTR => continue, - ESPIPE => unreachable, // `fd` is not seekable. - EINVAL => unreachable, - EFAULT => unreachable, - EAGAIN => if (std.event.Loop.instance) |loop| { - loop.waitUntilFdWritable(fd); - continue; - } else { - return error.WouldBlock; - }, - EBADF => unreachable, // Always a race condition. - EDESTADDRREQ => unreachable, // `connect` was never called. - EDQUOT => return error.DiskQuota, - EFBIG => return error.FileTooBig, - EIO => return error.InputOutput, - ENOSPC => return error.NoSpaceLeft, - EPERM => return error.AccessDenied, - EPIPE => return error.BrokenPipe, - else => return unexpectedErrno(err), - } - } - } +/// +/// If `iov.len` is larger than will fit in a `u31`, a partial write will occur. +pub fn pwritev(fd: fd_t, iov: []const iovec_const, offset: u64) PWriteError!usize { + const have_pwrite_but_not_pwritev = switch (std.Target.current.os.tag) { + .windows, .macosx, .ios, .watchos, .tvos => true, + else => false, + }; - if (std.Target.current.os.tag == .windows) { - var off = offset; - for (iov) |item| { - try pwrite(fd, item.iov_base[0..item.iov_len], off); - off += buf.len; - } - return; + if (have_pwrite_but_not_pwritev) { + // We could loop here; but proper usage of `pwritev` must handle partial writes anyway. + // So we simply write the first vector only. + if (iov.len == 0) return @as(usize, 0); + const first = iov[0]; + return pwrite(fd, first.iov_base[0..first.iov_len], offset); } + const iov_count = math.cast(u31, iov.len) catch math.maxInt(u31); while (true) { - // TODO handle the case when iov_len is too large and get rid of this @intCast - const rc = system.pwritev(fd, iov.ptr, @intCast(u32, iov.len), offset); + const rc = system.pwritev(fd, iov.ptr, iov_count, offset); switch (errno(rc)) { - 0 => return, + 0 => return @intCast(usize, rc), EINTR => continue, EINVAL => unreachable, EFAULT => unreachable, @@ -770,6 +751,9 @@ pub fn pwritev(fd: fd_t, iov: []const iovec_const, offset: u64) WriteError!void ENOSPC => return error.NoSpaceLeft, EPERM => return error.AccessDenied, EPIPE => return error.BrokenPipe, + ENXIO => return error.Unseekable, + ESPIPE => return error.Unseekable, + EOVERFLOW => return error.Unseekable, else => |err| return unexpectedErrno(err), } } @@ -3389,7 +3373,6 @@ pub const SendError = error{ /// The socket type requires that message be sent atomically, and the size of the message /// to be sent made this impossible. The message is not transmitted. - /// MessageTooBig, /// The output queue for a network interface was full. This generally indicates that the @@ -3498,6 +3481,314 @@ pub fn send( return sendto(sockfd, buf, flags, null, 0); } +pub const SendFileError = PReadError || WriteError || SendError; + +fn count_iovec_bytes(iovs: []const iovec_const) usize { + var count: usize = 0; + for (iovs) |iov| { + count += iov.iov_len; + } + return count; +} + +/// Transfer data between file descriptors, with optional headers and trailers. +/// Returns the number of bytes written. This will be zero if `in_offset` falls beyond the end of the file. +/// +/// The `sendfile` call copies `count` bytes from one file descriptor to another. When possible, +/// this is done within the operating system kernel, which can provide better performance +/// characteristics than transferring data from kernel to user space and back, such as with +/// `read` and `write` calls. When `count` is `0`, it means to copy until the end of the input file has been +/// reached. Note, however, that partial writes are still possible in this case. +/// +/// `in_fd` must be a file descriptor opened for reading, and `out_fd` must be a file descriptor +/// opened for writing. They may be any kind of file descriptor; however, if `in_fd` is not a regular +/// file system file, it may cause this function to fall back to calling `read` and `write`, in which case +/// atomicity guarantees no longer apply. +/// +/// Copying begins reading at `in_offset`. The input file descriptor seek position is ignored and not updated. +/// If the output file descriptor has a seek position, it is updated as bytes are written. +/// +/// `flags` has different meanings per operating system; refer to the respective man pages. +/// +/// These systems support atomically sending everything, including headers and trailers: +/// * macOS +/// * FreeBSD +/// +/// These systems support in-kernel data copying, but headers and trailers are not sent atomically: +/// * Linux +/// +/// Other systems fall back to calling `read` / `write`. +/// +/// Linux has a limit on how many bytes may be transferred in one `sendfile` call, which is `0x7ffff000` +/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as +/// well as stuffing the errno codes into the last `4096` values. This is cited on the `sendfile` man page. +/// The corresponding POSIX limit on this is `math.maxInt(isize)`. +pub fn sendfile( + out_fd: fd_t, + in_fd: fd_t, + in_offset: u64, + count: usize, + headers: []const iovec_const, + trailers: []const iovec_const, + flags: u32, +) SendFileError!usize { + var header_done = false; + var total_written: usize = 0; + + // Prevents EOVERFLOW. + const max_count = switch (std.Target.current.os.tag) { + .linux => 0x7ffff000, + else => math.maxInt(isize), + }; + + switch (std.Target.current.os.tag) { + .linux => sf: { + // sendfile() first appeared in Linux 2.2, glibc 2.1. + const call_sf = comptime if (builtin.link_libc) + std.c.versionCheck(.{ .major = 2, .minor = 1 }).ok + else + std.Target.current.os.version_range.linux.range.max.order(.{ .major = 2, .minor = 2 }) != .lt; + if (!call_sf) break :sf; + + if (headers.len != 0) { + const amt = try writev(out_fd, headers); + total_written += amt; + if (amt < count_iovec_bytes(headers)) return total_written; + header_done = true; + } + + // Here we match BSD behavior, making a zero count value send as many bytes as possible. + const adjusted_count = if (count == 0) max_count else math.min(count, max_count); + + while (true) { + var offset: off_t = @bitCast(off_t, in_offset); + const rc = system.sendfile(out_fd, in_fd, &offset, adjusted_count); + switch (errno(rc)) { + 0 => { + const amt = @bitCast(usize, rc); + total_written += amt; + if (count == 0 and amt == 0) { + // We have detected EOF from `in_fd`. + break; + } else if (amt < count) { + return total_written; + } else { + break; + } + }, + + EBADF => unreachable, // Always a race condition. + EFAULT => unreachable, // Segmentation fault. + EOVERFLOW => unreachable, // We avoid passing too large of a `count`. + ENOTCONN => unreachable, // `out_fd` is an unconnected socket. + + EINVAL, ENOSYS => { + // EINVAL could be any of the following situations: + // * Descriptor is not valid or locked + // * an mmap(2)-like operation is not available for in_fd + // * count is negative + // * out_fd has the O_APPEND flag set + // Because of the "mmap(2)-like operation" possibility, we fall back to doing read/write + // manually, the same as ENOSYS. + break :sf; + }, + EAGAIN => if (std.event.Loop.instance) |loop| { + loop.waitUntilFdWritable(out_fd); + continue; + } else { + return error.WouldBlock; + }, + EIO => return error.InputOutput, + EPIPE => return error.BrokenPipe, + ENOMEM => return error.SystemResources, + ENXIO => return error.Unseekable, + ESPIPE => return error.Unseekable, + else => |err| { + const discard = unexpectedErrno(err); + break :sf; + }, + } + } + + if (trailers.len != 0) { + total_written += try writev(out_fd, trailers); + } + + return total_written; + }, + .freebsd => sf: { + var hdtr_data: std.c.sf_hdtr = undefined; + var hdtr: ?*std.c.sf_hdtr = null; + if (headers.len != 0 or trailers.len != 0) { + // Here we carefully avoid `@intCast` by returning partial writes when + // too many io vectors are provided. + const hdr_cnt = math.cast(u31, headers.len) catch math.maxInt(u31); + if (headers.len > hdr_cnt) return writev(out_fd, headers); + + const trl_cnt = math.cast(u31, trailers.len) catch math.maxInt(u31); + + hdtr_data = std.c.sf_hdtr{ + .headers = headers.ptr, + .hdr_cnt = hdr_cnt, + .trailers = trailers.ptr, + .trl_cnt = trl_cnt, + }; + hdtr = &hdtr_data; + } + + const adjusted_count = math.min(count, max_count); + + while (true) { + var sbytes: off_t = undefined; + const err = errno(system.sendfile(out_fd, in_fd, in_offset, adjusted_count, hdtr, &sbytes, flags)); + const amt = @bitCast(usize, sbytes); + switch (err) { + 0 => return amt, + + EBADF => unreachable, // Always a race condition. + EFAULT => unreachable, // Segmentation fault. + ENOTCONN => unreachable, // `out_fd` is an unconnected socket. + + EINVAL, EOPNOTSUPP, ENOTSOCK, ENOSYS => { + // EINVAL could be any of the following situations: + // * The fd argument is not a regular file. + // * The s argument is not a SOCK_STREAM type socket. + // * The offset argument is negative. + // Because of some of these possibilities, we fall back to doing read/write + // manually, the same as ENOSYS. + break :sf; + }, + + EINTR => if (amt != 0) return amt else continue, + + EAGAIN => if (amt != 0) { + return amt; + } else if (std.event.Loop.instance) |loop| { + loop.waitUntilFdWritable(out_fd); + continue; + } else { + return error.WouldBlock; + }, + + EBUSY => if (amt != 0) { + return amt; + } else if (std.event.Loop.instance) |loop| { + loop.waitUntilFdReadable(in_fd); + continue; + } else { + return error.WouldBlock; + }, + + EIO => return error.InputOutput, + ENOBUFS => return error.SystemResources, + EPIPE => return error.BrokenPipe, + + else => { + const discard = unexpectedErrno(err); + if (amt != 0) { + return amt; + } else { + break :sf; + } + }, + } + } + }, + .macosx, .ios, .tvos, .watchos => sf: { + var hdtr_data: std.c.sf_hdtr = undefined; + var hdtr: ?*std.c.sf_hdtr = null; + if (headers.len != 0 or trailers.len != 0) { + // Here we carefully avoid `@intCast` by returning partial writes when + // too many io vectors are provided. + const hdr_cnt = math.cast(u31, headers.len) catch math.maxInt(u31); + if (headers.len > hdr_cnt) return writev(out_fd, headers); + + const trl_cnt = math.cast(u31, trailers.len) catch math.maxInt(u31); + + hdtr_data = std.c.sf_hdtr{ + .headers = headers.ptr, + .hdr_cnt = hdr_cnt, + .trailers = trailers.ptr, + .trl_cnt = trl_cnt, + }; + hdtr = &hdtr_data; + } + + const adjusted_count = math.min(count, max_count); + + while (true) { + var sbytes: off_t = adjusted_count; + const err = errno(system.sendfile(out_fd, in_fd, in_offset, &sbytes, hdtr, flags)); + const amt = @bitCast(usize, sbytes); + switch (err) { + 0 => return amt, + + EBADF => unreachable, // Always a race condition. + EFAULT => unreachable, // Segmentation fault. + EINVAL => unreachable, + ENOTCONN => unreachable, // `out_fd` is an unconnected socket. + + ENOTSUP, ENOTSOCK, ENOSYS => break :sf, + + EINTR => if (amt != 0) return amt else continue, + + EAGAIN => if (amt != 0) { + return amt; + } else if (std.event.Loop.instance) |loop| { + loop.waitUntilFdWritable(out_fd); + continue; + } else { + return error.WouldBlock; + }, + + EIO => return error.InputOutput, + EPIPE => return error.BrokenPipe, + + else => { + _ = unexpectedErrno(err); + if (amt != 0) { + return amt; + } else { + break :sf; + } + }, + } + } + }, + else => {}, // fall back to read/write + } + + if (headers.len != 0 and !header_done) { + const amt = try writev(out_fd, headers); + total_written += amt; + if (amt < count_iovec_bytes(headers)) return total_written; + } + + rw: { + var buf: [8 * 4096]u8 = undefined; + // Here we match BSD behavior, making a zero count value send as many bytes as possible. + const adjusted_count = if (count == 0) buf.len else math.min(buf.len, count); + const amt_read = try pread(in_fd, buf[0..adjusted_count], in_offset); + if (amt_read == 0) { + if (count == 0) { + // We have detected EOF from `in_fd`. + break :rw; + } else { + return total_written; + } + } + const amt_written = try write(out_fd, buf[0..amt_read]); + total_written += amt_written; + if (amt_written < count or count == 0) return total_written; + } + + if (trailers.len != 0) { + total_written += try writev(out_fd, trailers); + } + + return total_written; +} + pub const PollError = error{ /// The kernel had no space to allocate file descriptor tables. SystemResources, diff --git a/lib/std/os/bits/linux/arm64.zig b/lib/std/os/bits/linux/arm64.zig index 8dcebc5ddf00..386e889873ba 100644 --- a/lib/std/os/bits/linux/arm64.zig +++ b/lib/std/os/bits/linux/arm64.zig @@ -82,6 +82,7 @@ pub const SYS_pread64 = 67; pub const SYS_pwrite64 = 68; pub const SYS_preadv = 69; pub const SYS_pwritev = 70; +pub const SYS_sendfile = 71; pub const SYS_pselect6 = 72; pub const SYS_ppoll = 73; pub const SYS_signalfd4 = 74; diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 30dba85e5186..c2fc06bc9b60 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -316,8 +316,19 @@ pub fn symlinkat(existing: [*:0]const u8, newfd: i32, newpath: [*:0]const u8) us return syscall3(SYS_symlinkat, @ptrToInt(existing), @bitCast(usize, @as(isize, newfd)), @ptrToInt(newpath)); } -pub fn pread(fd: i32, buf: [*]u8, count: usize, offset: usize) usize { - return syscall4(SYS_pread, @bitCast(usize, @as(isize, fd)), @ptrToInt(buf), count, offset); +pub fn pread(fd: i32, buf: [*]u8, count: usize, offset: u64) usize { + if (@hasDecl(@This(), "SYS_pread64")) { + return syscall5( + SYS_pread64, + @bitCast(usize, @as(isize, fd)), + @ptrToInt(buf), + count, + @truncate(usize, offset), + @truncate(usize, offset >> 32), + ); + } else { + return syscall4(SYS_pread, @bitCast(usize, @as(isize, fd)), @ptrToInt(buf), count, offset); + } } pub fn access(path: [*:0]const u8, mode: u32) usize { @@ -846,6 +857,26 @@ pub fn sendto(fd: i32, buf: [*]const u8, len: usize, flags: u32, addr: ?*const s return syscall6(SYS_sendto, @bitCast(usize, @as(isize, fd)), @ptrToInt(buf), len, flags, @ptrToInt(addr), @intCast(usize, alen)); } +pub fn sendfile(outfd: i32, infd: i32, offset: ?*i64, count: usize) usize { + if (@hasDecl(@This(), "SYS_sendfile64")) { + return syscall4( + SYS_sendfile64, + @bitCast(usize, @as(isize, outfd)), + @bitCast(usize, @as(isize, infd)), + @ptrToInt(offset), + count, + ); + } else { + return syscall4( + SYS_sendfile, + @bitCast(usize, @as(isize, outfd)), + @bitCast(usize, @as(isize, infd)), + @ptrToInt(offset), + count, + ); + } +} + pub fn socketpair(domain: i32, socket_type: i32, protocol: i32, fd: [2]i32) usize { if (builtin.arch == .i386) { return socketcall(SC_socketpair, &[4]usize{ @intCast(usize, domain), @intCast(usize, socket_type), @intCast(usize, protocol), @ptrToInt(&fd[0]) }); diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index 197edd82c134..717380ea30eb 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -44,6 +44,114 @@ fn testThreadIdFn(thread_id: *Thread.Id) void { thread_id.* = Thread.getCurrentId(); } +test "sendfile" { + try fs.makePath(a, "os_test_tmp"); + defer fs.deleteTree("os_test_tmp") catch {}; + + var dir = try fs.cwd().openDirList("os_test_tmp"); + defer dir.close(); + + const line1 = "line1\n"; + const line2 = "second line\n"; + var vecs = [_]os.iovec_const{ + .{ + .iov_base = line1, + .iov_len = line1.len, + }, + .{ + .iov_base = line2, + .iov_len = line2.len, + }, + }; + + var src_file = try dir.createFileC("sendfile1.txt", .{ .read = true }); + defer src_file.close(); + + try src_file.writevAll(&vecs); + + var dest_file = try dir.createFileC("sendfile2.txt", .{ .read = true }); + defer dest_file.close(); + + const header1 = "header1\n"; + const header2 = "second header\n"; + var headers = [_]os.iovec_const{ + .{ + .iov_base = header1, + .iov_len = header1.len, + }, + .{ + .iov_base = header2, + .iov_len = header2.len, + }, + }; + + const trailer1 = "trailer1\n"; + const trailer2 = "second trailer\n"; + var trailers = [_]os.iovec_const{ + .{ + .iov_base = trailer1, + .iov_len = trailer1.len, + }, + .{ + .iov_base = trailer2, + .iov_len = trailer2.len, + }, + }; + + var written_buf: [header1.len + header2.len + 10 + trailer1.len + trailer2.len]u8 = undefined; + try sendfileAll(dest_file.handle, src_file.handle, 1, 10, &headers, &trailers, 0); + + try dest_file.preadAll(&written_buf, 0); + expect(mem.eql(u8, &written_buf, "header1\nsecond header\nine1\nsecontrailer1\nsecond trailer\n")); +} + +fn sendfileAll( + out_fd: os.fd_t, + in_fd: os.fd_t, + offset: u64, + count: usize, + headers: []os.iovec_const, + trailers: []os.iovec_const, + flags: u32, +) os.SendFileError!void { + var amt: usize = undefined; + hdrs: { + var i: usize = 0; + while (i < headers.len) { + amt = try os.sendfile(out_fd, in_fd, offset, count, headers[i..], trailers, flags); + while (amt >= headers[i].iov_len) { + amt -= headers[i].iov_len; + i += 1; + if (i >= headers.len) break :hdrs; + } + headers[i].iov_base += amt; + headers[i].iov_len -= amt; + } + } + var off = amt; + while (off < count) { + amt = try os.sendfile(out_fd, in_fd, offset + off, count - off, &[0]os.iovec_const{}, trailers, flags); + off += amt; + } + amt = off - count; + var i: usize = 0; + while (i < trailers.len) { + while (amt >= headers[i].iov_len) { + amt -= trailers[i].iov_len; + i += 1; + if (i >= trailers.len) return; + } + trailers[i].iov_base += amt; + trailers[i].iov_len -= amt; + if (std.Target.current.os.tag == .windows) { + amt = try os.writev(out_fd, trailers[i..]); + } else { + // Here we must use send because it's the only way to give the flags. + amt = try os.send(out_fd, trailers[i].iov_base[0..trailers[i].iov_len], flags); + } + } +} + test "std.Thread.getCurrentId" { if (builtin.single_threaded) return error.SkipZigTest; @@ -103,7 +211,7 @@ test "AtomicFile" { { var af = try fs.AtomicFile.init(test_out_file, File.default_mode); defer af.deinit(); - try af.file.write(test_content); + try af.file.writeAll(test_content); try af.finish(); } const content = try io.readFileAlloc(testing.allocator, test_out_file); @@ -226,7 +334,7 @@ test "pipe" { return error.SkipZigTest; var fds = try os.pipe(); - try os.write(fds[1], "hello"); + expect((try os.write(fds[1], "hello")) == 5); var buf: [16]u8 = undefined; expect((try os.read(fds[0], buf[0..])) == 5); testing.expectEqualSlices(u8, buf[0..5], "hello"); @@ -248,7 +356,7 @@ test "memfd_create" { else => |e| return e, }; defer std.os.close(fd); - try std.os.write(fd, "test"); + expect((try std.os.write(fd, "test")) == 4); try std.os.lseek_SET(fd, 0); var buf: [10]u8 = undefined; diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index cc0d446b1200..92124511bd3f 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -424,7 +424,7 @@ pub const WriteFileError = error{ Unexpected, }; -pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError!void { +pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError!usize { if (std.event.Loop.instance) |loop| { // TODO support async WriteFile with no offset const off = offset.?; @@ -445,8 +445,8 @@ pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError _ = CreateIoCompletionPort(fd, loop.os_data.io_port, undefined, undefined); loop.beginOneEvent(); suspend { - // TODO replace this @intCast with a loop that writes all the bytes - _ = kernel32.WriteFile(fd, bytes.ptr, @intCast(windows.DWORD, bytes.len), null, &resume_node.base.overlapped); + const adjusted_len = math.cast(windows.DWORD, bytes.len) catch maxInt(windows.DWORD); + _ = kernel32.WriteFile(fd, bytes.ptr, adjusted_len, null, &resume_node.base.overlapped); } var bytes_transferred: windows.DWORD = undefined; if (kernel32.GetOverlappedResult(fd, &resume_node.base.overlapped, &bytes_transferred, FALSE) == 0) { @@ -460,6 +460,7 @@ pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError else => |err| return windows.unexpectedError(err), } } + return bytes_transferred; } else { var bytes_written: DWORD = undefined; var overlapped_data: OVERLAPPED = undefined; @@ -473,18 +474,19 @@ pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError }; break :blk &overlapped_data; } else null; - // TODO replace this @intCast with a loop that writes all the bytes - if (kernel32.WriteFile(handle, bytes.ptr, @intCast(u32, bytes.len), &bytes_written, overlapped) == 0) { + const adjusted_len = math.cast(u32, bytes.len) catch maxInt(u32); + if (kernel32.WriteFile(handle, bytes.ptr, adjusted_len, &bytes_written, overlapped) == 0) { switch (kernel32.GetLastError()) { .INVALID_USER_BUFFER => return error.SystemResources, .NOT_ENOUGH_MEMORY => return error.SystemResources, .OPERATION_ABORTED => return error.OperationAborted, .NOT_ENOUGH_QUOTA => return error.SystemResources, - .IO_PENDING => unreachable, // this function is for blocking files only + .IO_PENDING => unreachable, .BROKEN_PIPE => return error.BrokenPipe, else => |err| return unexpectedError(err), } } + return bytes_written; } } diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index ee6ee37d5d1a..625aef313197 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -29,7 +29,7 @@ pub fn render(allocator: *mem.Allocator, stream: var, tree: *ast.Tree) (@TypeOf( source_index: usize, source: []const u8, - fn write(iface_stream: *Stream, bytes: []const u8) StreamError!void { + fn write(iface_stream: *Stream, bytes: []const u8) StreamError!usize { const self = @fieldParentPtr(MyStream, "stream", iface_stream); if (!self.anything_changed_ptr.*) { @@ -45,7 +45,7 @@ pub fn render(allocator: *mem.Allocator, stream: var, tree: *ast.Tree) (@TypeOf( } } - try self.child_stream.write(bytes); + return self.child_stream.writeOnce(bytes); } }; var my_stream = MyStream{ @@ -2443,14 +2443,15 @@ const FindByteOutStream = struct { }; } - fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void { + fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize { const self = @fieldParentPtr(Self, "stream", out_stream); - if (self.byte_found) return; + if (self.byte_found) return bytes.len; self.byte_found = blk: { for (bytes) |b| if (b == self.byte) break :blk true; break :blk false; }; + return bytes.len; } }; diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig index 38a90f4c60e4..7f0609c1123e 100644 --- a/lib/std/zig/system.zig +++ b/lib/std/zig/system.zig @@ -796,6 +796,7 @@ pub const NativeTargetInfo = struct { error.SystemResources => return error.SystemResources, error.IsDir => return error.UnableToReadElfFile, error.BrokenPipe => return error.UnableToReadElfFile, + error.Unseekable => return error.UnableToReadElfFile, error.ConnectionResetByPeer => return error.UnableToReadElfFile, error.Unexpected => return error.Unexpected, error.InputOutput => return error.FileSystem, diff --git a/test/standalone/cat/main.zig b/test/standalone/cat/main.zig index 34439f9c242d..8539a0de0f5b 100644 --- a/test/standalone/cat/main.zig +++ b/test/standalone/cat/main.zig @@ -42,6 +42,7 @@ fn usage(exe: []const u8) !void { return error.Invalid; } +// TODO use copy_file_range fn cat_file(stdout: fs.File, file: fs.File) !void { var buf: [1024 * 4]u8 = undefined; @@ -55,7 +56,7 @@ fn cat_file(stdout: fs.File, file: fs.File) !void { break; } - stdout.write(buf[0..bytes_read]) catch |err| { + stdout.writeAll(buf[0..bytes_read]) catch |err| { warn("Unable to write to stdout: {}\n", .{@errorName(err)}); return err; }; diff --git a/test/standalone/hello_world/hello.zig b/test/standalone/hello_world/hello.zig index e3fc5c0e3eab..eabb226eb2cc 100644 --- a/test/standalone/hello_world/hello.zig +++ b/test/standalone/hello_world/hello.zig @@ -1,8 +1,5 @@ const std = @import("std"); pub fn main() !void { - const stdout_file = std.io.getStdOut(); - // If this program encounters pipe failure when printing to stdout, exit - // with an error. - try stdout_file.write("Hello, world!\n"); + try std.io.getStdOut().writeAll("Hello, World!\n"); }