From ff8544daa59fa27b1e5c90e5ccdb6c513d719d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 27 Nov 2023 14:51:51 +0100 Subject: [PATCH 01/29] tar: refactor code to be more testable Split reading/parsing tar file and writing results to the disk in two separate steps. So we can later test parsing part without need to write everyting to the disk. --- lib/std/tar.zig | 242 ++++++++++++++++++++++++++++++------------------ 1 file changed, 153 insertions(+), 89 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index c39cc6e4323e..b41f0d8683c1 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -174,6 +174,144 @@ const Buffer = struct { } }; +fn Iterator(comptime ReaderType: type) type { + return struct { + file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + file_name_len: usize = 0, + buffer: Buffer = .{}, + reader: ReaderType, + pad_len: usize = 0, + diagnostics: ?*Options.Diagnostics, + + const Self = @This(); + + const File = struct { + file_name: []const u8, + link_name: []const u8, + size: usize, + file_type: Header.FileType, + iter: *Self, + + pub fn write(self: File, writer: anytype) !void { + const rounded_file_size = std.mem.alignForward(u64, self.size, 512); + var file_off: usize = 0; + while (true) { + const temp = try self.iter.buffer.readChunk(self.iter.reader, @intCast(rounded_file_size + 512 - file_off)); + if (temp.len == 0) return error.UnexpectedEndOfStream; + const slice = temp[0..@intCast(@min(self.size - file_off, temp.len))]; + try writer.writeAll(slice); + + file_off += slice.len; + self.iter.buffer.advance(slice.len); + if (file_off >= self.size) { + return; + // self.iter.buffer.advance(pad_len); + // continue :header; + } + } + } + + pub fn skip(self: File) void { + _ = self; + unreachable; + } + }; + + pub fn next(self: *Self) !?File { + self.buffer.advance(self.pad_len); + self.pad_len = 0; + self.file_name_len = 0; + + while (true) { + const chunk = try self.buffer.readChunk(self.reader, 1024); + switch (chunk.len) { + 0 => return null, + 1...511 => return error.UnexpectedEndOfStream, + else => {}, + } + self.buffer.advance(512); + + const header: Header = .{ .bytes = chunk[0..512] }; + const file_size = try header.fileSize(); + const file_type = header.fileType(); + const link_name = header.linkName(); + const rounded_file_size = std.mem.alignForward(u64, file_size, 512); + self.pad_len = @intCast(rounded_file_size - file_size); + const file_name = if (self.file_name_len == 0) + try header.fullFileName(&self.file_name_buffer) + else + self.file_name_buffer[0..self.file_name_len]; + + switch (file_type) { + .directory, .normal, .symbolic_link => { + return File{ + .file_name = file_name, + .link_name = link_name, + .size = file_size, + .file_type = file_type, + .iter = self, + }; + }, + .global_extended_header => { + self.buffer.skip(self.reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig; + }, + .extended_header => { + if (file_size == 0) { + self.buffer.advance(@intCast(rounded_file_size)); + continue; + } + + const chunk_size: usize = @intCast(rounded_file_size + 512); + var data_off: usize = 0; + const file_name_override_len = while (data_off < file_size) { + const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off); + if (slice.len == 0) return error.UnexpectedEndOfStream; + const remaining_size: usize = @intCast(file_size - data_off); + const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size); + + if (std.mem.eql(u8, attr_info.key, "path")) { + if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong; + self.buffer.advance(attr_info.value_off); + data_off += attr_info.value_off; + break attr_info.value_len; + } + + try self.buffer.skip(self.reader, attr_info.size); + data_off += attr_info.size; + } else 0; + + var i: usize = 0; + while (i < file_name_override_len) { + const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off - i); + if (slice.len == 0) return error.UnexpectedEndOfStream; + const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len)); + @memcpy(self.file_name_buffer[i .. i + copy_size], slice[0..copy_size]); + self.buffer.advance(copy_size); + i += copy_size; + } + + try self.buffer.skip(self.reader, @intCast(rounded_file_size - data_off - file_name_override_len)); + self.file_name_len = file_name_override_len; + continue; + }, + .hard_link => return error.TarUnsupportedFileType, + else => { + const d = self.diagnostics orelse return error.TarUnsupportedFileType; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, file_name), + .file_type = file_type, + } }); + }, + } + } + } + }; +} + +pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) { + return .{ .reader = reader, .diagnostics = diagnostics }; +} + pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { switch (options.mode_mode) { .ignore => {}, @@ -186,37 +324,20 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi @panic("TODO: unimplemented: tar ModeMode.executable_bit_only"); }, } - var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - var file_name_override_len: usize = 0; - var buffer: Buffer = .{}; - header: while (true) { - const chunk = try buffer.readChunk(reader, 1024); - switch (chunk.len) { - 0 => return, - 1...511 => return error.UnexpectedEndOfStream, - else => {}, - } - buffer.advance(512); - - const header: Header = .{ .bytes = chunk[0..512] }; - const file_size = try header.fileSize(); - const rounded_file_size = std.mem.alignForward(u64, file_size, 512); - const pad_len: usize = @intCast(rounded_file_size - file_size); - const unstripped_file_name = if (file_name_override_len > 0) - file_name_buffer[0..file_name_override_len] - else - try header.fullFileName(&file_name_buffer); - file_name_override_len = 0; - switch (header.fileType()) { + + var iter = iterator(reader, options.diagnostics); + + while (try iter.next()) |iter_file| { + switch (iter_file.file_type) { .directory => { - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + const file_name = try stripComponents(iter_file.file_name, options.strip_components); if (file_name.len != 0 and !options.exclude_empty_directories) { try dir.makePath(file_name); } }, .normal => { - if (file_size == 0 and unstripped_file_name.len == 0) return; - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + if (iter_file.size == 0 and iter_file.file_name.len == 0) return; + const file_name = try stripComponents(iter_file.file_name, options.strip_components); const file = dir.createFile(file_name, .{}) catch |err| switch (err) { error.FileNotFound => again: { @@ -240,68 +361,17 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi }; defer if (file) |f| f.close(); - var file_off: usize = 0; - while (true) { - const temp = try buffer.readChunk(reader, @intCast(rounded_file_size + 512 - file_off)); - if (temp.len == 0) return error.UnexpectedEndOfStream; - const slice = temp[0..@intCast(@min(file_size - file_off, temp.len))]; - if (file) |f| try f.writeAll(slice); - - file_off += slice.len; - buffer.advance(slice.len); - if (file_off >= file_size) { - buffer.advance(pad_len); - continue :header; - } + if (file) |f| { + try iter_file.write(f); + } else { + iter_file.skip(); } }, - .extended_header => { - if (file_size == 0) { - buffer.advance(@intCast(rounded_file_size)); - continue; - } - - const chunk_size: usize = @intCast(rounded_file_size + 512); - var data_off: usize = 0; - file_name_override_len = while (data_off < file_size) { - const slice = try buffer.readChunk(reader, chunk_size - data_off); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const remaining_size: usize = @intCast(file_size - data_off); - const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size); - - if (std.mem.eql(u8, attr_info.key, "path")) { - if (attr_info.value_len > file_name_buffer.len) return error.NameTooLong; - buffer.advance(attr_info.value_off); - data_off += attr_info.value_off; - break attr_info.value_len; - } - - try buffer.skip(reader, attr_info.size); - data_off += attr_info.size; - } else 0; - - var i: usize = 0; - while (i < file_name_override_len) { - const slice = try buffer.readChunk(reader, chunk_size - data_off - i); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len)); - @memcpy(file_name_buffer[i .. i + copy_size], slice[0..copy_size]); - buffer.advance(copy_size); - i += copy_size; - } - - try buffer.skip(reader, @intCast(rounded_file_size - data_off - file_name_override_len)); - continue :header; - }, - .global_extended_header => { - buffer.skip(reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig; - }, - .hard_link => return error.TarUnsupportedFileType, .symbolic_link => { // The file system path of the symbolic link. - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + const file_name = try stripComponents(iter_file.file_name, options.strip_components); // The data inside the symbolic link. - const link_name = header.linkName(); + const link_name = iter_file.link_name; dir.symLink(link_name, file_name, .{}) catch |err| again: { const code = code: { @@ -323,13 +393,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi } }); }; }, - else => |file_type| { - const d = options.diagnostics orelse return error.TarUnsupportedFileType; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, unstripped_file_name), - .file_type = file_type, - } }); - }, + else => unreachable, } } } From 4381241237fc6ff18ee889571774d929700ce7a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 27 Nov 2023 17:17:28 +0100 Subject: [PATCH 02/29] tar: refactor Buffer Move reader into Buffer and make it BufferedReader. This doesn't introduce any new functionality just grouping similar things. --- lib/std/tar.zig | 174 ++++++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 80 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index b41f0d8683c1..9fa51bdc81da 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -136,51 +136,90 @@ pub const Header = struct { } }; -const Buffer = struct { - buffer: [512 * 8]u8 = undefined, - start: usize = 0, - end: usize = 0, +fn BufferedReader(comptime ReaderType: type) type { + return struct { + unbuffered_reader: ReaderType, + buffer: [512 * 8]u8 = undefined, + start: usize = 0, + end: usize = 0, - pub fn readChunk(b: *Buffer, reader: anytype, count: usize) ![]const u8 { - b.ensureCapacity(1024); + const Self = @This(); - const ask = @min(b.buffer.len - b.end, count -| (b.end - b.start)); - b.end += try reader.readAtLeast(b.buffer[b.end..], ask); + pub fn readChunk(self: *Self, count: usize) ![]const u8 { + self.ensureCapacity(1024); - return b.buffer[b.start..b.end]; - } + const ask = @min(self.buffer.len - self.end, count -| (self.end - self.start)); + self.end += try self.unbuffered_reader.readAtLeast(self.buffer[self.end..], ask); - pub fn advance(b: *Buffer, count: usize) void { - b.start += count; - assert(b.start <= b.end); - } + return self.buffer[self.start..self.end]; + } - pub fn skip(b: *Buffer, reader: anytype, count: usize) !void { - if (b.start + count > b.end) { - try reader.skipBytes(b.start + count - b.end, .{}); - b.start = b.end; - } else { - b.advance(count); + pub fn advance(self: *Self, count: usize) void { + self.start += count; + assert(self.start <= self.end); + } + + pub fn skip(self: *Self, count: usize) !void { + if (self.start + count > self.end) { + try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{}); + self.start = self.end; + } else { + self.advance(count); + } } - } - inline fn ensureCapacity(b: *Buffer, count: usize) void { - if (b.buffer.len - b.start < count) { - const dest_end = b.end - b.start; - @memcpy(b.buffer[0..dest_end], b.buffer[b.start..b.end]); - b.end = dest_end; - b.start = 0; + inline fn ensureCapacity(self: *Self, count: usize) void { + if (self.buffer.len - self.start < count) { + const dest_end = self.end - self.start; + @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]); + self.end = dest_end; + self.start = 0; + } } - } -}; + + pub fn write(self: *Self, writer: anytype, size: usize) !void { + const rounded_file_size = std.mem.alignForward(usize, size, 512); + const chunk_size = rounded_file_size + 512; + const pad_len: usize = rounded_file_size - size; + + var file_off: usize = 0; + while (true) { + const temp = try self.readChunk(chunk_size - file_off); + if (temp.len == 0) return error.UnexpectedEndOfStream; + const slice = temp[0..@min(size - file_off, temp.len)]; + try writer.writeAll(slice); + + file_off += slice.len; + self.advance(slice.len); + if (file_off >= size) { + self.advance(pad_len); + return; + } + } + } + + pub fn copy(self: *Self, dst_buffer: []u8, size: usize) !void { + const rounded_file_size = std.mem.alignForward(usize, size, 512); + const chunk_size = rounded_file_size + 512; + + var i: usize = 0; + while (i < size) { + const slice = try self.readChunk(chunk_size - i); + if (slice.len == 0) return error.UnexpectedEndOfStream; + const copy_size: usize = @min(size - i, slice.len); + @memcpy(dst_buffer[i .. i + copy_size], slice[0..copy_size]); + self.advance(copy_size); + i += copy_size; + } + } + }; +} fn Iterator(comptime ReaderType: type) type { return struct { file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, file_name_len: usize = 0, - buffer: Buffer = .{}, - reader: ReaderType, - pad_len: usize = 0, + reader: BufferedReader(ReaderType), diagnostics: ?*Options.Diagnostics, const Self = @This(); @@ -193,50 +232,32 @@ fn Iterator(comptime ReaderType: type) type { iter: *Self, pub fn write(self: File, writer: anytype) !void { - const rounded_file_size = std.mem.alignForward(u64, self.size, 512); - var file_off: usize = 0; - while (true) { - const temp = try self.iter.buffer.readChunk(self.iter.reader, @intCast(rounded_file_size + 512 - file_off)); - if (temp.len == 0) return error.UnexpectedEndOfStream; - const slice = temp[0..@intCast(@min(self.size - file_off, temp.len))]; - try writer.writeAll(slice); - - file_off += slice.len; - self.iter.buffer.advance(slice.len); - if (file_off >= self.size) { - return; - // self.iter.buffer.advance(pad_len); - // continue :header; - } - } + try self.iter.reader.write(writer, self.size); } - pub fn skip(self: File) void { - _ = self; - unreachable; + pub fn skip(self: File) !void { + const rounded_file_size = std.mem.alignForward(usize, self.size, 512); + try self.iter.reader.skip(rounded_file_size); } }; pub fn next(self: *Self) !?File { - self.buffer.advance(self.pad_len); - self.pad_len = 0; self.file_name_len = 0; - while (true) { - const chunk = try self.buffer.readChunk(self.reader, 1024); + const chunk = try self.reader.readChunk(1024); switch (chunk.len) { 0 => return null, 1...511 => return error.UnexpectedEndOfStream, else => {}, } - self.buffer.advance(512); + self.reader.advance(512); const header: Header = .{ .bytes = chunk[0..512] }; const file_size = try header.fileSize(); const file_type = header.fileType(); const link_name = header.linkName(); - const rounded_file_size = std.mem.alignForward(u64, file_size, 512); - self.pad_len = @intCast(rounded_file_size - file_size); + const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512); + const file_name = if (self.file_name_len == 0) try header.fullFileName(&self.file_name_buffer) else @@ -253,44 +274,33 @@ fn Iterator(comptime ReaderType: type) type { }; }, .global_extended_header => { - self.buffer.skip(self.reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig; + self.reader.skip(rounded_file_size) catch return error.TarHeadersTooBig; }, .extended_header => { - if (file_size == 0) { - self.buffer.advance(@intCast(rounded_file_size)); - continue; - } + if (file_size == 0) continue; - const chunk_size: usize = @intCast(rounded_file_size + 512); + const chunk_size: usize = rounded_file_size + 512; var data_off: usize = 0; const file_name_override_len = while (data_off < file_size) { - const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off); + const slice = try self.reader.readChunk(chunk_size - data_off); if (slice.len == 0) return error.UnexpectedEndOfStream; - const remaining_size: usize = @intCast(file_size - data_off); + const remaining_size: usize = file_size - data_off; const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size); if (std.mem.eql(u8, attr_info.key, "path")) { if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong; - self.buffer.advance(attr_info.value_off); + self.reader.advance(attr_info.value_off); data_off += attr_info.value_off; break attr_info.value_len; } - try self.buffer.skip(self.reader, attr_info.size); + try self.reader.skip(attr_info.size); data_off += attr_info.size; } else 0; - var i: usize = 0; - while (i < file_name_override_len) { - const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off - i); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len)); - @memcpy(self.file_name_buffer[i .. i + copy_size], slice[0..copy_size]); - self.buffer.advance(copy_size); - i += copy_size; - } + try self.reader.copy(&self.file_name_buffer, file_name_override_len); - try self.buffer.skip(self.reader, @intCast(rounded_file_size - data_off - file_name_override_len)); + try self.reader.skip(rounded_file_size - data_off - file_name_override_len); self.file_name_len = file_name_override_len; continue; }, @@ -309,7 +319,11 @@ fn Iterator(comptime ReaderType: type) type { } pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) { - return .{ .reader = reader, .diagnostics = diagnostics }; + const ReaderType = @TypeOf(reader); + return .{ + .reader = BufferedReader(ReaderType){ .unbuffered_reader = reader }, + .diagnostics = diagnostics, + }; } pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { @@ -364,7 +378,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi if (file) |f| { try iter_file.write(f); } else { - iter_file.skip(); + try iter_file.skip(); } }, .symbolic_link => { From 18170633754afb68c5831bfe4534c64af93ba55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 27 Nov 2023 21:37:30 +0100 Subject: [PATCH 03/29] tar: add initial test cases Just adding tests, without changing functionality. --- lib/std/tar.zig | 334 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 319 insertions(+), 15 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 9fa51bdc81da..48f6f84dfb14 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -134,6 +134,13 @@ pub const Header = struct { } return header.bytes[start..i]; } + + pub fn isZeroBlock(header: Header) bool { + for (header.bytes) |b| { + if (b != 0) return false; + } + return true; + } }; fn BufferedReader(comptime ReaderType: type) type { @@ -225,7 +232,7 @@ fn Iterator(comptime ReaderType: type) type { const Self = @This(); const File = struct { - file_name: []const u8, + name: []const u8, link_name: []const u8, size: usize, file_type: Header.FileType, @@ -239,6 +246,31 @@ fn Iterator(comptime ReaderType: type) type { const rounded_file_size = std.mem.alignForward(usize, self.size, 512); try self.iter.reader.skip(rounded_file_size); } + + fn chksum(self: File) ![16]u8 { + var cs = [_]u8{0} ** 16; + if (self.size == 0) return cs; + + var buffer: [512]u8 = undefined; + var h = std.crypto.hash.Md5.init(.{}); + + var remaining_bytes: usize = self.size; + while (remaining_bytes > 0) { + const copy_size = @min(buffer.len, remaining_bytes); + try self.iter.reader.copy(&buffer, copy_size); + h.update(buffer[0..copy_size]); + remaining_bytes -= copy_size; + } + h.final(&cs); + try self.skipPadding(); + return cs; + } + + fn skipPadding(self: File) !void { + const rounded_file_size = std.mem.alignForward(usize, self.size, 512); + const pad_len: usize = rounded_file_size - self.size; + self.iter.reader.advance(pad_len); + } }; pub fn next(self: *Self) !?File { @@ -253,6 +285,7 @@ fn Iterator(comptime ReaderType: type) type { self.reader.advance(512); const header: Header = .{ .bytes = chunk[0..512] }; + if (header.isZeroBlock()) return null; const file_size = try header.fileSize(); const file_type = header.fileType(); const link_name = header.linkName(); @@ -266,10 +299,10 @@ fn Iterator(comptime ReaderType: type) type { switch (file_type) { .directory, .normal, .symbolic_link => { return File{ - .file_name = file_name, - .link_name = link_name, + .name = file_name, .size = file_size, .file_type = file_type, + .link_name = link_name, .iter = self, }; }, @@ -341,19 +374,19 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi var iter = iterator(reader, options.diagnostics); - while (try iter.next()) |iter_file| { - switch (iter_file.file_type) { + while (try iter.next()) |file| { + switch (file.file_type) { .directory => { - const file_name = try stripComponents(iter_file.file_name, options.strip_components); + const file_name = try stripComponents(file.name, options.strip_components); if (file_name.len != 0 and !options.exclude_empty_directories) { try dir.makePath(file_name); } }, .normal => { - if (iter_file.size == 0 and iter_file.file_name.len == 0) return; - const file_name = try stripComponents(iter_file.file_name, options.strip_components); + if (file.size == 0 and file.name.len == 0) return; + const file_name = try stripComponents(file.name, options.strip_components); - const file = dir.createFile(file_name, .{}) catch |err| switch (err) { + const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) { error.FileNotFound => again: { const code = code: { if (std.fs.path.dirname(file_name)) |dir_name| { @@ -373,19 +406,19 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi }, else => |e| return e, }; - defer if (file) |f| f.close(); + defer if (fs_file) |f| f.close(); - if (file) |f| { - try iter_file.write(f); + if (fs_file) |f| { + try file.write(f); } else { - try iter_file.skip(); + try file.skip(); } }, .symbolic_link => { // The file system path of the symbolic link. - const file_name = try stripComponents(iter_file.file_name, options.strip_components); + const file_name = try stripComponents(file.name, options.strip_components); // The data inside the symbolic link. - const link_name = iter_file.link_name; + const link_name = file.link_name; dir.symLink(link_name, file_name, .{}) catch |err| again: { const code = code: { @@ -473,3 +506,274 @@ test parsePaxAttribute { const std = @import("std.zig"); const assert = std.debug.assert; + +const TestCase = struct { + const File = struct { + const empty_string = &[0]u8{}; + + name: []const u8, + size: usize = 0, + link_name: []const u8 = empty_string, + file_type: Header.FileType = .normal, + }; + + path: []const u8, + files: []const File = &[_]TestCase.File{}, + chksums: []const []const u8 = &[_][]const u8{}, + err: ?anyerror = null, +}; + +test "Go test cases" { + const test_dir = try std.fs.openDirAbsolute("/usr/local/go/src/archive/tar/testdata", .{}); + const cases = [_]TestCase{ + .{ + .path = "gnu.tar", + .files = &[_]TestCase.File{ + .{ + .name = "small.txt", + .size = 5, + .file_type = .normal, + }, + .{ + .name = "small2.txt", + .size = 11, + .file_type = .normal, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "sparse-formats.tar", + .err = error.TarUnsupportedFileType, + }, + .{ + .path = "star.tar", + .files = &[_]TestCase.File{ + .{ + .name = "small.txt", + .size = 5, + .file_type = .normal, + }, + .{ + .name = "small2.txt", + .size = 11, + .file_type = .normal, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "v7.tar", + .files = &[_]TestCase.File{ + .{ + .name = "small.txt", + .size = 5, + .file_type = .normal, + }, + .{ + .name = "small2.txt", + .size = 11, + .file_type = .normal, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "pax.tar", + .files = &[_]TestCase.File{ + .{ + .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .size = 7, + .file_type = .normal, + }, + .{ + .name = "a/b", + .size = 0, + .file_type = .symbolic_link, + .link_name = "1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545", + // TODO fix reading link name from pax header + // .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }, + }, + .chksums = &[_][]const u8{ + "3c382e8f5b6631aa2db52643912ffd4a", + }, + }, + // TODO: this should fail + // .{ + // .path = "pax-bad-hdr-file.tar", + // .err = error.TarBadHeader, + // }, + // .{ + // .path = "pax-bad-mtime-file.tar", + // .err = error.TarBadHeader, + // }, + // + // TODO: giving wrong result because we are not reading pax size header + // .{ + // .path = "pax-pos-size-file.tar", + // .files = &[_]TestCase.File{ + // .{ + // .name = "foo", + // .size = 999, + // .file_type = .normal, + // }, + // }, + // .chksums = &[_][]const u8{ + // "0afb597b283fe61b5d4879669a350556", + // }, + // }, + .{ + // has pax records which we are not interested in + .path = "pax-records.tar", + .files = &[_]TestCase.File{ + .{ + .name = "file", + }, + }, + }, + .{ + // has global records which we are ignoring + .path = "pax-global-records.tar", + .files = &[_]TestCase.File{ + .{ + .name = "file1", + }, + .{ + .name = "file2", + }, + .{ + .name = "file3", + }, + .{ + .name = "file4", + }, + }, + }, + .{ + .path = "nil-uid.tar", + .files = &[_]TestCase.File{ + .{ + .name = "P1050238.JPG.log", + .size = 14, + .file_type = .normal, + }, + }, + .chksums = &[_][]const u8{ + "08d504674115e77a67244beac19668f5", + }, + }, + .{ + // has xattrs and pax records which we are ignoring + .path = "xattrs.tar", + .files = &[_]TestCase.File{ + .{ + .name = "small.txt", + .size = 5, + .file_type = .normal, + }, + .{ + .name = "small2.txt", + .size = 11, + .file_type = .normal, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "gnu-multi-hdrs.tar", + .err = error.TarUnsupportedFileType, + }, + .{ + .path = "gnu-incremental.tar", + .err = error.TarUnsupportedFileType, + }, + // .{ + // .path = "pax-multi-hdrs.tar", + // }, + // .{ + // .path = "gnu-long-nul.tar", + // .files = &[_]TestCase.File{ + // .{ + // .name = "012233456789", + // }, + // }, + // }, + // .{ + // .path = "gnu-utf8.tar", + // .files = &[_]TestCase.File{ + // .{ + // .name = "012233456789", + // }, + // }, + // }, + // + .{ + .path = "gnu-not-utf8.tar", + .files = &[_]TestCase.File{ + .{ + .name = "hi\x80\x81\x82\x83bye", + }, + }, + }, + // TODO some files with errors: + // pax-nul-xattrs.tar, pax-nul-path.tar, neg-size.tar, issue10968.tar, issue11169.tar, issue12435.tar + .{ + .path = "trailing-slash.tar", + .files = &[_]TestCase.File{ + .{ + .name = "123456789/" ** 30, + .file_type = .directory, + }, + }, + }, + }; + + for (cases) |case| { + // if (!std.mem.eql(u8, case.path, "pax.tar")) continue; + + var fs_file = try test_dir.openFile(case.path, .{}); + defer fs_file.close(); + + var iter = iterator(fs_file.reader(), null); + var i: usize = 0; + while (iter.next() catch |err| { + if (case.err) |e| { + try std.testing.expectEqual(e, err); + continue; + } else { + return err; + } + }) |actual| { + const expected = case.files[i]; + try std.testing.expectEqualStrings(expected.name, actual.name); + try std.testing.expectEqual(expected.size, actual.size); + try std.testing.expectEqual(expected.file_type, actual.file_type); + try std.testing.expectEqualStrings(expected.link_name, actual.link_name); + + if (case.chksums.len > i) { + var actual_chksum = try actual.chksum(); + var hex_to_bytes_buffer: [16]u8 = undefined; + const expected_chksum = try std.fmt.hexToBytes(&hex_to_bytes_buffer, case.chksums[i]); + // std.debug.print("actual chksum: {s}\n", .{std.fmt.fmtSliceHexLower(&actual_chksum)}); + try std.testing.expectEqualStrings(expected_chksum, &actual_chksum); + } else { + try actual.skip(); // skip file content + } + i += 1; + } + try std.testing.expectEqual(case.files.len, i); + } +} From be5d04ab7922d84b59dad06de3df378b94827d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 27 Nov 2023 22:23:16 +0100 Subject: [PATCH 04/29] tar: add pax linkpath attribute parsing Name of symbolic link can be also found in pax attribute. --- lib/std/tar.zig | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 48f6f84dfb14..9f2fa924406f 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -226,6 +226,9 @@ fn Iterator(comptime ReaderType: type) type { return struct { file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, file_name_len: usize = 0, + link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + link_name_len: usize = 0, + reader: BufferedReader(ReaderType), diagnostics: ?*Options.Diagnostics, @@ -275,6 +278,8 @@ fn Iterator(comptime ReaderType: type) type { pub fn next(self: *Self) !?File { self.file_name_len = 0; + self.link_name_len = 0; + while (true) { const chunk = try self.reader.readChunk(1024); switch (chunk.len) { @@ -287,10 +292,12 @@ fn Iterator(comptime ReaderType: type) type { const header: Header = .{ .bytes = chunk[0..512] }; if (header.isZeroBlock()) return null; const file_size = try header.fileSize(); - const file_type = header.fileType(); - const link_name = header.linkName(); const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512); - + const file_type = header.fileType(); + const link_name = if (self.link_name_len == 0) + header.linkName() + else + self.link_name_buffer[0..self.link_name_len]; const file_name = if (self.file_name_len == 0) try header.fullFileName(&self.file_name_buffer) else @@ -314,7 +321,7 @@ fn Iterator(comptime ReaderType: type) type { const chunk_size: usize = rounded_file_size + 512; var data_off: usize = 0; - const file_name_override_len = while (data_off < file_size) { + while (data_off < file_size) { const slice = try self.reader.readChunk(chunk_size - data_off); if (slice.len == 0) return error.UnexpectedEndOfStream; const remaining_size: usize = file_size - data_off; @@ -323,18 +330,22 @@ fn Iterator(comptime ReaderType: type) type { if (std.mem.eql(u8, attr_info.key, "path")) { if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong; self.reader.advance(attr_info.value_off); - data_off += attr_info.value_off; - break attr_info.value_len; + try self.reader.copy(&self.file_name_buffer, attr_info.value_len); + self.file_name_len = attr_info.value_len; + self.reader.advance(1); + } else if (std.mem.eql(u8, attr_info.key, "linkpath")) { + if (attr_info.value_len > self.link_name_buffer.len) return error.NameTooLong; + self.reader.advance(attr_info.value_off); + try self.reader.copy(&self.link_name_buffer, attr_info.value_len); + self.link_name_len = attr_info.value_len; + self.reader.advance(1); + } else { + try self.reader.skip(attr_info.size); } - - try self.reader.skip(attr_info.size); data_off += attr_info.size; - } else 0; - - try self.reader.copy(&self.file_name_buffer, file_name_override_len); + } + try self.reader.skip(rounded_file_size - data_off); - try self.reader.skip(rounded_file_size - data_off - file_name_override_len); - self.file_name_len = file_name_override_len; continue; }, .hard_link => return error.TarUnsupportedFileType, @@ -599,9 +610,7 @@ test "Go test cases" { .name = "a/b", .size = 0, .file_type = .symbolic_link, - .link_name = "1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545", - // TODO fix reading link name from pax header - // .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", }, }, .chksums = &[_][]const u8{ From 6d5283e83550998953f8784ba2b08a413a41baf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 28 Nov 2023 23:07:37 +0100 Subject: [PATCH 05/29] tar: refactor reader and iterator Make it more readable. --- lib/std/tar.zig | 314 +++++++++++++++++++++++++++--------------------- 1 file changed, 180 insertions(+), 134 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 9f2fa924406f..9ea8f1965204 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -62,8 +62,10 @@ pub const Options = struct { }; }; +const block_size = 512; + pub const Header = struct { - bytes: *const [512]u8, + bytes: *const [block_size]u8, pub const FileType = enum(u8) { normal_alias = 0, @@ -135,7 +137,7 @@ pub const Header = struct { return header.bytes[start..i]; } - pub fn isZeroBlock(header: Header) bool { + pub fn isZero(header: Header) bool { for (header.bytes) |b| { if (b != 0) return false; } @@ -146,7 +148,7 @@ pub const Header = struct { fn BufferedReader(comptime ReaderType: type) type { return struct { unbuffered_reader: ReaderType, - buffer: [512 * 8]u8 = undefined, + buffer: [block_size * 8]u8 = undefined, start: usize = 0, end: usize = 0, @@ -161,6 +163,17 @@ fn BufferedReader(comptime ReaderType: type) type { return self.buffer[self.start..self.end]; } + pub fn readBlock(self: *Self) !?[]const u8 { + const block_bytes = try self.readChunk(block_size * 2); + switch (block_bytes.len) { + 0 => return null, + 1...(block_size - 1) => return error.UnexpectedEndOfStream, + else => {}, + } + self.advance(block_size); + return block_bytes[0..block_size]; + } + pub fn advance(self: *Self, count: usize) void { self.start += count; assert(self.start <= self.end); @@ -175,6 +188,14 @@ fn BufferedReader(comptime ReaderType: type) type { } } + pub fn skipPadding(self: *Self, file_size: usize) !void { + return self.skip(filePadding(file_size)); + } + + pub fn skipFile(self: *Self, file_size: usize) !void { + return self.skip(roundedFileSize(file_size)); + } + inline fn ensureCapacity(self: *Self, count: usize) void { if (self.buffer.len - self.start < count) { const dest_end = self.end - self.start; @@ -185,179 +206,200 @@ fn BufferedReader(comptime ReaderType: type) type { } pub fn write(self: *Self, writer: anytype, size: usize) !void { - const rounded_file_size = std.mem.alignForward(usize, size, 512); - const chunk_size = rounded_file_size + 512; - const pad_len: usize = rounded_file_size - size; - - var file_off: usize = 0; - while (true) { - const temp = try self.readChunk(chunk_size - file_off); - if (temp.len == 0) return error.UnexpectedEndOfStream; - const slice = temp[0..@min(size - file_off, temp.len)]; + var rdr = self.sliceReader(size, true); + while (try rdr.next()) |slice| { try writer.writeAll(slice); + } + } - file_off += slice.len; - self.advance(slice.len); - if (file_off >= size) { - self.advance(pad_len); - return; - } + // copy dst.len bytes into dst + pub fn copy(self: *Self, dst: []u8) ![]const u8 { + var rdr = self.sliceReader(dst.len, true); + var pos: usize = 0; + while (try rdr.next()) |slice| : (pos += slice.len) { + @memcpy(dst[pos .. pos + slice.len], slice); } + return dst; } - pub fn copy(self: *Self, dst_buffer: []u8, size: usize) !void { - const rounded_file_size = std.mem.alignForward(usize, size, 512); - const chunk_size = rounded_file_size + 512; - - var i: usize = 0; - while (i < size) { - const slice = try self.readChunk(chunk_size - i); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const copy_size: usize = @min(size - i, slice.len); - @memcpy(dst_buffer[i .. i + copy_size], slice[0..copy_size]); - self.advance(copy_size); - i += copy_size; + const SliceReader = struct { + size: usize, + chunk_size: usize, + offset: usize, + reader: *Self, + auto_advance: bool, + + fn next(self: *@This()) !?[]const u8 { + if (self.offset >= self.size) return null; + + const temp = try self.reader.readChunk(self.chunk_size - self.offset); + if (temp.len == 0) return error.UnexpectedEndOfStream; + const slice = temp[0..@min(self.remainingSize(), temp.len)]; + if (self.auto_advance) try self.advance(slice.len); + return slice; + } + + fn advance(self: *@This(), len: usize) !void { + self.offset += len; + try self.reader.skip(len); } + + fn copy(self: *@This(), dst: []u8) ![]const u8 { + _ = try self.reader.copy(dst); + self.offset += dst.len; + return dst; + } + + fn remainingSize(self: *@This()) usize { + return self.size - self.offset; + } + }; + + pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader { + return .{ + .size = size, + .chunk_size = roundedFileSize(size) + block_size, + .offset = 0, + .reader = self, + .auto_advance = auto_advance, + }; } }; } +// file_size rouneded to te block boundary +inline fn roundedFileSize(file_size: usize) usize { + return std.mem.alignForward(usize, file_size, block_size); +} + +// number of padding bytes at the last file block +inline fn filePadding(file_size: usize) usize { + return roundedFileSize(file_size) - file_size; +} + fn Iterator(comptime ReaderType: type) type { + const BufferedReaderType = BufferedReader(ReaderType); return struct { - file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, - file_name_len: usize = 0, - link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, - link_name_len: usize = 0, + attrs: struct { + buffer: [std.fs.MAX_PATH_BYTES * 2]u8 = undefined, + tail: usize = 0, + + fn alloc(self: *@This(), size: usize) ![]u8 { + if (size > self.len()) return error.NameTooLong; + const head = self.tail; + self.tail += size; + assert(self.tail <= self.buffer.len); + return self.buffer[head..self.tail]; + } + + fn free(self: *@This()) void { + self.tail = 0; + } + + fn len(self: *@This()) usize { + return self.buffer.len - self.tail; + } + } = .{}, - reader: BufferedReader(ReaderType), + reader: BufferedReaderType, diagnostics: ?*Options.Diagnostics, const Self = @This(); const File = struct { - name: []const u8, - link_name: []const u8, - size: usize, - file_type: Header.FileType, - iter: *Self, + name: []const u8 = &[_]u8{}, + link_name: []const u8 = &[_]u8{}, + size: usize = 0, + file_type: Header.FileType = .normal, + reader: *BufferedReaderType, pub fn write(self: File, writer: anytype) !void { - try self.iter.reader.write(writer, self.size); + try self.reader.write(writer, self.size); + try self.skipPadding(); } pub fn skip(self: File) !void { - const rounded_file_size = std.mem.alignForward(usize, self.size, 512); - try self.iter.reader.skip(rounded_file_size); + try self.reader.skip(roundedFileSize(self.size)); + } + + fn skipPadding(self: File) !void { + try self.reader.skip(filePadding(self.size)); } fn chksum(self: File) ![16]u8 { - var cs = [_]u8{0} ** 16; - if (self.size == 0) return cs; + var sum = [_]u8{0} ** 16; + if (self.size == 0) return sum; - var buffer: [512]u8 = undefined; + var rdr = self.reader.sliceReader(self.size, true); var h = std.crypto.hash.Md5.init(.{}); - - var remaining_bytes: usize = self.size; - while (remaining_bytes > 0) { - const copy_size = @min(buffer.len, remaining_bytes); - try self.iter.reader.copy(&buffer, copy_size); - h.update(buffer[0..copy_size]); - remaining_bytes -= copy_size; + while (try rdr.next()) |slice| { + h.update(slice); } - h.final(&cs); + h.final(&sum); try self.skipPadding(); - return cs; - } - - fn skipPadding(self: File) !void { - const rounded_file_size = std.mem.alignForward(usize, self.size, 512); - const pad_len: usize = rounded_file_size - self.size; - self.iter.reader.advance(pad_len); + return sum; } }; + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". pub fn next(self: *Self) !?File { - self.file_name_len = 0; - self.link_name_len = 0; - - while (true) { - const chunk = try self.reader.readChunk(1024); - switch (chunk.len) { - 0 => return null, - 1...511 => return error.UnexpectedEndOfStream, - else => {}, - } - self.reader.advance(512); - - const header: Header = .{ .bytes = chunk[0..512] }; - if (header.isZeroBlock()) return null; - const file_size = try header.fileSize(); - const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512); - const file_type = header.fileType(); - const link_name = if (self.link_name_len == 0) - header.linkName() - else - self.link_name_buffer[0..self.link_name_len]; - const file_name = if (self.file_name_len == 0) - try header.fullFileName(&self.file_name_buffer) - else - self.file_name_buffer[0..self.file_name_len]; + var file: File = .{ .reader = &self.reader }; + self.attrs.free(); + + while (try self.reader.readBlock()) |block_bytes| { + const block: Header = .{ .bytes = block_bytes[0..block_size] }; + if (block.isZero()) return null; + const file_type = block.fileType(); + const file_size = try block.fileSize(); switch (file_type) { .directory, .normal, .symbolic_link => { - return File{ - .name = file_name, - .size = file_size, - .file_type = file_type, - .link_name = link_name, - .iter = self, - }; + if (file.size == 0) file.size = file_size; + if (file.name.len == 0) + file.name = try block.fullFileName((try self.attrs.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]); + if (file.link_name.len == 0) file.link_name = block.linkName(); + file.file_type = file_type; + return file; }, .global_extended_header => { - self.reader.skip(rounded_file_size) catch return error.TarHeadersTooBig; + self.reader.skipFile(file_size) catch return error.TarHeadersTooBig; }, .extended_header => { if (file_size == 0) continue; - const chunk_size: usize = rounded_file_size + 512; - var data_off: usize = 0; - while (data_off < file_size) { - const slice = try self.reader.readChunk(chunk_size - data_off); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const remaining_size: usize = file_size - data_off; - const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size); - - if (std.mem.eql(u8, attr_info.key, "path")) { - if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong; - self.reader.advance(attr_info.value_off); - try self.reader.copy(&self.file_name_buffer, attr_info.value_len); - self.file_name_len = attr_info.value_len; - self.reader.advance(1); - } else if (std.mem.eql(u8, attr_info.key, "linkpath")) { - if (attr_info.value_len > self.link_name_buffer.len) return error.NameTooLong; - self.reader.advance(attr_info.value_off); - try self.reader.copy(&self.link_name_buffer, attr_info.value_len); - self.link_name_len = attr_info.value_len; - self.reader.advance(1); + var rdr = self.reader.sliceReader(file_size, false); + while (try rdr.next()) |slice| { + const attr = try parsePaxAttribute(slice, rdr.remainingSize()); + try rdr.advance(attr.value_off); + if (attr.is("path")) { + file.name = try rdr.copy(try self.attrs.alloc(attr.value_len)); + } else if (attr.is("linkpath")) { + file.link_name = try rdr.copy(try self.attrs.alloc(attr.value_len)); + } else if (attr.is("size")) { + var buf = [_]u8{'0'} ** 32; + file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10); } else { - try self.reader.skip(attr_info.size); + try rdr.advance(attr.value_len); } - data_off += attr_info.size; + try rdr.advance(1); } - try self.reader.skip(rounded_file_size - data_off); - - continue; + try self.reader.skipPadding(file_size); }, .hard_link => return error.TarUnsupportedFileType, else => { const d = self.diagnostics orelse return error.TarUnsupportedFileType; try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, file_name), + .file_name = try d.allocator.dupe(u8, block.name()), .file_type = file_type, } }); }, } } + return null; } }; } @@ -481,6 +523,10 @@ const PaxAttributeInfo = struct { key: []const u8, value_off: usize, value_len: usize, + + inline fn is(self: @This(), key: []const u8) bool { + return (std.mem.eql(u8, self.key, key)); + } }; fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo { @@ -515,7 +561,7 @@ test parsePaxAttribute { try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0)); } -const std = @import("std.zig"); +const std = @import("std"); const assert = std.debug.assert; const TestCase = struct { @@ -628,19 +674,19 @@ test "Go test cases" { // }, // // TODO: giving wrong result because we are not reading pax size header - // .{ - // .path = "pax-pos-size-file.tar", - // .files = &[_]TestCase.File{ - // .{ - // .name = "foo", - // .size = 999, - // .file_type = .normal, - // }, - // }, - // .chksums = &[_][]const u8{ - // "0afb597b283fe61b5d4879669a350556", - // }, - // }, + .{ + .path = "pax-pos-size-file.tar", + .files = &[_]TestCase.File{ + .{ + .name = "foo", + .size = 999, + .file_type = .normal, + }, + }, + .chksums = &[_][]const u8{ + "0afb597b283fe61b5d4879669a350556", + }, + }, .{ // has pax records which we are not interested in .path = "pax-records.tar", From e1424b84b87903df265cc052f3dac17d1ec1c3be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 29 Nov 2023 15:28:38 +0100 Subject: [PATCH 06/29] tar: add parsing size in gnu extended format Reference: https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions If the leading byte is 0x80 (128), the non-leading bytes of the field are concatenated in big-endian order, with the result being a positive number expressed in binary form. --- lib/std/tar.zig | 75 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 9ea8f1965204..16726a1e5c1e 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -62,10 +62,10 @@ pub const Options = struct { }; }; -const block_size = 512; +const BLOCK_SIZE = 512; pub const Header = struct { - bytes: *const [block_size]u8, + bytes: *const [BLOCK_SIZE]u8, pub const FileType = enum(u8) { normal_alias = 0, @@ -84,6 +84,19 @@ pub const Header = struct { pub fn fileSize(header: Header) !u64 { const raw = header.bytes[124..][0..12]; + // If the leading byte is 0xff (255), all the bytes of the field + // (including the leading byte) are concatenated in big-endian order, + // with the result being a negative number expressed in two’s + // complement form. + if (raw[0] == 0xff) return error.SizeNegative; + // If the leading byte is 0x80 (128), the non-leading bytes of the + // field are concatenated in big-endian order. + if (raw[0] == 0x80) { + if (raw[1] + raw[2] + raw[3] != 0) return error.SizeTooBig; + return std.mem.readInt(u64, raw[4..12], .big); + } + // Zero-filled octal number in ASCII. Each numeric field of width w + // contains w minus 1 digits, and a null const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); if (rtrimmed.len == 0) return 0; @@ -148,7 +161,7 @@ pub const Header = struct { fn BufferedReader(comptime ReaderType: type) type { return struct { unbuffered_reader: ReaderType, - buffer: [block_size * 8]u8 = undefined, + buffer: [BLOCK_SIZE * 8]u8 = undefined, start: usize = 0, end: usize = 0, @@ -164,14 +177,14 @@ fn BufferedReader(comptime ReaderType: type) type { } pub fn readBlock(self: *Self) !?[]const u8 { - const block_bytes = try self.readChunk(block_size * 2); + const block_bytes = try self.readChunk(BLOCK_SIZE * 2); switch (block_bytes.len) { 0 => return null, - 1...(block_size - 1) => return error.UnexpectedEndOfStream, + 1...(BLOCK_SIZE - 1) => return error.UnexpectedEndOfStream, else => {}, } - self.advance(block_size); - return block_bytes[0..block_size]; + self.advance(BLOCK_SIZE); + return block_bytes[0..BLOCK_SIZE]; } pub fn advance(self: *Self, count: usize) void { @@ -258,7 +271,7 @@ fn BufferedReader(comptime ReaderType: type) type { pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader { return .{ .size = size, - .chunk_size = roundedFileSize(size) + block_size, + .chunk_size = roundedFileSize(size) + BLOCK_SIZE, .offset = 0, .reader = self, .auto_advance = auto_advance, @@ -267,12 +280,12 @@ fn BufferedReader(comptime ReaderType: type) type { }; } -// file_size rouneded to te block boundary +// File size rounded to te block boundary. inline fn roundedFileSize(file_size: usize) usize { - return std.mem.alignForward(usize, file_size, block_size); + return std.mem.alignForward(usize, file_size, BLOCK_SIZE); } -// number of padding bytes at the last file block +// Number of padding bytes in the last file block. inline fn filePadding(file_size: usize) usize { return roundedFileSize(file_size) - file_size; } @@ -341,17 +354,18 @@ fn Iterator(comptime ReaderType: type) type { } }; - // Externally, Next iterates through the tar archive as if it is a series of - // files. Internally, the tar format often uses fake "files" to add meta - // data that describes the next file. These meta data "files" should not - // normally be visible to the outside. As such, this loop iterates through - // one or more "header files" until it finds a "normal file". + // Externally, `next` iterates through the tar archive as if it is a + // series of files. Internally, the tar format often uses fake "files" + // to add meta data that describes the next file. These meta data + // "files" should not normally be visible to the outside. As such, this + // loop iterates through one or more "header files" until it finds a + // "normal file". pub fn next(self: *Self) !?File { var file: File = .{ .reader = &self.reader }; self.attrs.free(); while (try self.reader.readBlock()) |block_bytes| { - const block: Header = .{ .bytes = block_bytes[0..block_size] }; + const block: Header = .{ .bytes = block_bytes[0..BLOCK_SIZE] }; if (block.isZero()) return null; const file_type = block.fileType(); const file_size = try block.fileSize(); @@ -572,6 +586,7 @@ const TestCase = struct { size: usize = 0, link_name: []const u8 = empty_string, file_type: Header.FileType = .normal, + truncated: bool = false, // when there is no file body, just header, usefull for huge files }; path: []const u8, @@ -794,10 +809,32 @@ test "Go test cases" { }, }, }, + .{ + // Has size in gnu extended format. To represent size bigger than 8 GB. + .path = "writer-big.tar", + .files = &[_]TestCase.File{ + .{ + .name = "tmp/16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .truncated = true, + }, + }, + }, + .{ + // Size in gnu extended format, and name in pax attribute. + .path = "writer-big-long.tar", + .files = &[_]TestCase.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .truncated = true, + }, + }, + }, }; for (cases) |case| { - // if (!std.mem.eql(u8, case.path, "pax.tar")) continue; + //if (!std.mem.eql(u8, case.path, "pax-pos-size-file.tar")) continue; var fs_file = try test_dir.openFile(case.path, .{}); defer fs_file.close(); @@ -825,7 +862,7 @@ test "Go test cases" { // std.debug.print("actual chksum: {s}\n", .{std.fmt.fmtSliceHexLower(&actual_chksum)}); try std.testing.expectEqualStrings(expected_chksum, &actual_chksum); } else { - try actual.skip(); // skip file content + if (!expected.truncated) try actual.skip(); // skip file content } i += 1; } From 169f28d3e6a908717a0e42323ba1a0ee765976da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 29 Nov 2023 15:31:22 +0100 Subject: [PATCH 07/29] tar: fix import path --- lib/std/tar.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 16726a1e5c1e..6e1390990581 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,3 +1,6 @@ +const std = @import("std.zig"); +const assert = std.debug.assert; + pub const Options = struct { /// Number of directory levels to skip when extracting files. strip_components: u32 = 0, From 16c40fc4713c195c7a6b8544c9dffbfc6201dc9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 29 Nov 2023 17:17:20 +0100 Subject: [PATCH 08/29] tar: add header chksum checking --- lib/std/tar.zig | 102 ++++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 38 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 6e1390990581..40ca26da7972 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -85,31 +85,6 @@ pub const Header = struct { _, }; - pub fn fileSize(header: Header) !u64 { - const raw = header.bytes[124..][0..12]; - // If the leading byte is 0xff (255), all the bytes of the field - // (including the leading byte) are concatenated in big-endian order, - // with the result being a negative number expressed in two’s - // complement form. - if (raw[0] == 0xff) return error.SizeNegative; - // If the leading byte is 0x80 (128), the non-leading bytes of the - // field are concatenated in big-endian order. - if (raw[0] == 0x80) { - if (raw[1] + raw[2] + raw[3] != 0) return error.SizeTooBig; - return std.mem.readInt(u64, raw[4..12], .big); - } - // Zero-filled octal number in ASCII. Each numeric field of width w - // contains w minus 1 digits, and a null - const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); - const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); - if (rtrimmed.len == 0) return 0; - return std.fmt.parseInt(u64, rtrimmed, 8); - } - - pub fn is_ustar(header: Header) bool { - return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00"); - } - /// Includes prefix concatenated, if any. /// Return value may point into Header buffer, or might point into the /// argument buffer. @@ -128,15 +103,27 @@ pub const Header = struct { } pub fn name(header: Header) []const u8 { - return str(header, 0, 0 + 100); + return header.str(0, 100); + } + + pub fn fileSize(header: Header) !u64 { + return header.numeric(124, 12); + } + + pub fn chksum(header: Header) !u64 { + return header.octal(148, 8); } pub fn linkName(header: Header) []const u8 { - return str(header, 157, 157 + 100); + return header.str(157, 100); + } + + pub fn is_ustar(header: Header) bool { + return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00"); } pub fn prefix(header: Header) []const u8 { - return str(header, 345, 345 + 155); + return header.str(345, 155); } pub fn fileType(header: Header) FileType { @@ -145,7 +132,8 @@ pub const Header = struct { return result; } - fn str(header: Header, start: usize, end: usize) []const u8 { + fn str(header: Header, start: usize, len: usize) []const u8 { + const end = start + len; var i: usize = start; while (i < end) : (i += 1) { if (header.bytes[i] == 0) break; @@ -153,11 +141,52 @@ pub const Header = struct { return header.bytes[start..i]; } - pub fn isZero(header: Header) bool { - for (header.bytes) |b| { - if (b != 0) return false; + fn numeric(header: Header, start: usize, len: usize) !u64 { + const raw = header.bytes[start..][0..len]; + // If the leading byte is 0xff (255), all the bytes of the field + // (including the leading byte) are concatenated in big-endian order, + // with the result being a negative number expressed in two’s + // complement form. + if (raw[0] == 0xff) return error.TarNumericValueNegative; + // If the leading byte is 0x80 (128), the non-leading bytes of the + // field are concatenated in big-endian order. + if (raw[0] == 0x80) { + if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig; + return std.mem.readInt(u64, raw[4..12], .big); } - return true; + return try header.octal(start, len); + } + + fn octal(header: Header, start: usize, len: usize) !u64 { + const raw = header.bytes[start..][0..len]; + // Zero-filled octal number in ASCII. Each numeric field of width w + // contains w minus 1 digits, and a null + const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); + const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); + if (rtrimmed.len == 0) return 0; + return std.fmt.parseInt(u64, rtrimmed, 8); + } + + // Sum of all bytes in the header block. The chksum field is treated as if + // it were filled with spaces (ASCII 32). + fn computeChksum(header: Header) u64 { + var sum: u64 = 0; + for (header.bytes, 0..) |b, i| { + if (148 <= i and i < 156) continue; // skip chksum field bytes + sum += b; + } + // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces. + return if (sum > 0) sum + 256 else 0; + } + + // Checks calculated chksum with value of chksum field. + // Returns error or chksum value. + // Zero value indicates empty block. + pub fn checkChksum(header: Header) !u64 { + const field = try header.chksum(); + const computed = header.computeChksum(); + if (field != computed) return error.TarHeaderChksum; + return field; } }; @@ -368,8 +397,8 @@ fn Iterator(comptime ReaderType: type) type { self.attrs.free(); while (try self.reader.readBlock()) |block_bytes| { - const block: Header = .{ .bytes = block_bytes[0..BLOCK_SIZE] }; - if (block.isZero()) return null; + const block = Header{ .bytes = block_bytes[0..BLOCK_SIZE] }; + if (try block.checkChksum() == 0) return null; // zero block found const file_type = block.fileType(); const file_size = try block.fileSize(); @@ -578,9 +607,6 @@ test parsePaxAttribute { try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0)); } -const std = @import("std"); -const assert = std.debug.assert; - const TestCase = struct { const File = struct { const empty_string = &[0]u8{}; From 48b160c1bf75f602acabc3b43eca56b8aa4abf4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 29 Nov 2023 20:30:08 +0100 Subject: [PATCH 09/29] tar: handle pax null attrs and pax attr ending --- lib/std/tar.zig | 79 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 40ca26da7972..b6dd517d3f58 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -164,7 +164,7 @@ pub const Header = struct { const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); if (rtrimmed.len == 0) return 0; - return std.fmt.parseInt(u64, rtrimmed, 8); + return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader; } // Sum of all bytes in the header block. The chksum field is treated as if @@ -289,6 +289,10 @@ fn BufferedReader(comptime ReaderType: type) type { try self.reader.skip(len); } + fn byte(self: *@This()) u8 { + return self.reader.buffer[self.reader.start]; + } + fn copy(self: *@This(), dst: []u8) ![]const u8 { _ = try self.reader.copy(dst); self.offset += dst.len; @@ -416,21 +420,25 @@ fn Iterator(comptime ReaderType: type) type { }, .extended_header => { if (file_size == 0) continue; + // TODO: ovo resetiranje je nezgodno + self.attrs.free(); + file = File{ .reader = &self.reader }; var rdr = self.reader.sliceReader(file_size, false); while (try rdr.next()) |slice| { const attr = try parsePaxAttribute(slice, rdr.remainingSize()); try rdr.advance(attr.value_off); if (attr.is("path")) { - file.name = try rdr.copy(try self.attrs.alloc(attr.value_len)); + file.name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len))); } else if (attr.is("linkpath")) { - file.link_name = try rdr.copy(try self.attrs.alloc(attr.value_len)); + file.link_name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len))); } else if (attr.is("size")) { var buf = [_]u8{'0'} ** 32; file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10); } else { try rdr.advance(attr.value_len); } + if (rdr.byte() != '\n') return error.InvalidPaxAttribute; try rdr.advance(1); } try self.reader.skipPadding(file_size); @@ -582,15 +590,21 @@ fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo { if (kv_size > max_size) { return error.InvalidPaxAttribute; } + const key = data[pos_space + 1 .. pos_equals]; return .{ .size = kv_size, - .key = data[pos_space + 1 .. pos_equals], + .key = try noNull(key), .value_off = pos_equals + 1, .value_len = kv_size - pos_equals - 2, }; } -test parsePaxAttribute { +fn noNull(str: []const u8) ![]const u8 { + if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute; + return str; +} + +test "parsePaxAttribute" { const expectEqual = std.testing.expectEqual; const expectEqualStrings = std.testing.expectEqualStrings; const expectError = std.testing.expectError; @@ -605,6 +619,7 @@ test parsePaxAttribute { try expectEqual(attr_info, try parsePaxAttribute(header, 1012)); try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010)); try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0)); + try expectError(error.InvalidPaxAttribute, parsePaxAttribute("13 pa\x00th=abc\n", 1024)); // null in key } const TestCase = struct { @@ -633,12 +648,10 @@ test "Go test cases" { .{ .name = "small.txt", .size = 5, - .file_type = .normal, }, .{ .name = "small2.txt", .size = 11, - .file_type = .normal, }, }, .chksums = &[_][]const u8{ @@ -656,12 +669,10 @@ test "Go test cases" { .{ .name = "small.txt", .size = 5, - .file_type = .normal, }, .{ .name = "small2.txt", .size = 11, - .file_type = .normal, }, }, .chksums = &[_][]const u8{ @@ -675,12 +686,10 @@ test "Go test cases" { .{ .name = "small.txt", .size = 5, - .file_type = .normal, }, .{ .name = "small2.txt", .size = 11, - .file_type = .normal, }, }, .chksums = &[_][]const u8{ @@ -694,7 +703,6 @@ test "Go test cases" { .{ .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", .size = 7, - .file_type = .normal, }, .{ .name = "a/b", @@ -707,18 +715,25 @@ test "Go test cases" { "3c382e8f5b6631aa2db52643912ffd4a", }, }, - // TODO: this should fail - // .{ - // .path = "pax-bad-hdr-file.tar", - // .err = error.TarBadHeader, - // }, + .{ + // pax attribute don't end with \n + .path = "pax-bad-hdr-file.tar", + // .files = &[_]TestCase.File{ + // .{ + // .name = "PAX1/PAX1/long-path-name", + // .size = 684, + // }, + // }, + .err = error.InvalidPaxAttribute, + }, + // // .{ // .path = "pax-bad-mtime-file.tar", // .err = error.TarBadHeader, // }, // - // TODO: giving wrong result because we are not reading pax size header .{ + // size is in pax attribute .path = "pax-pos-size-file.tar", .files = &[_]TestCase.File{ .{ @@ -799,9 +814,17 @@ test "Go test cases" { .path = "gnu-incremental.tar", .err = error.TarUnsupportedFileType, }, - // .{ - // .path = "pax-multi-hdrs.tar", - // }, + .{ + // should use values only from last pax header + .path = "pax-multi-hdrs.tar", + .files = &[_]TestCase.File{ + .{ + .name = "bar", + .link_name = "PAX4/PAX4/long-linkpath-name", + .file_type = .symbolic_link, + }, + }, + }, // .{ // .path = "gnu-long-nul.tar", // .files = &[_]TestCase.File{ @@ -827,8 +850,20 @@ test "Go test cases" { }, }, }, + .{ + .path = "neg-size.tar", + .err = error.TarHeader, + }, + .{ + .path = "pax-nul-path.tar", + .err = error.InvalidPaxAttribute, + }, + .{ + .path = "pax-nul-xattrs.tar", + .err = error.InvalidPaxAttribute, + }, // TODO some files with errors: - // pax-nul-xattrs.tar, pax-nul-path.tar, neg-size.tar, issue10968.tar, issue11169.tar, issue12435.tar + // issue10968.tar, issue11169.tar, issue12435.tar .{ .path = "trailing-slash.tar", .files = &[_]TestCase.File{ From c761dfc1761b38be8d1dc72dd4c0cbf07d2c0eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 29 Nov 2023 21:37:13 +0100 Subject: [PATCH 10/29] tar: add gnu path and link extensions handling --- lib/std/tar.zig | 142 +++++++++++++++++++++++++++++++----------------- 1 file changed, 92 insertions(+), 50 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index b6dd517d3f58..f22ee0e73309 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -82,6 +82,10 @@ pub const Header = struct { contiguous = '7', global_extended_header = 'g', extended_header = 'x', + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + gnu_long_name = 'L', + gnu_long_link = 'K', _, }; @@ -119,7 +123,8 @@ pub const Header = struct { } pub fn is_ustar(header: Header) bool { - return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00"); + const magic = header.bytes[257..][0..6]; + return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' '); } pub fn prefix(header: Header) []const u8 { @@ -133,12 +138,7 @@ pub const Header = struct { } fn str(header: Header, start: usize, len: usize) []const u8 { - const end = start + len; - var i: usize = start; - while (i < end) : (i += 1) { - if (header.bytes[i] == 0) break; - } - return header.bytes[start..i]; + return nullStr(header.bytes[start .. start + len]); } fn numeric(header: Header, start: usize, len: usize) !u64 { @@ -190,6 +190,14 @@ pub const Header = struct { } }; +// break string on first null char +fn nullStr(str: []const u8) []const u8 { + for (str, 0..) |c, i| { + if (c == 0) return str[0..i]; + } + return str; +} + fn BufferedReader(comptime ReaderType: type) type { return struct { unbuffered_reader: ReaderType, @@ -274,7 +282,7 @@ fn BufferedReader(comptime ReaderType: type) type { reader: *Self, auto_advance: bool, - fn next(self: *@This()) !?[]const u8 { + pub fn next(self: *@This()) !?[]const u8 { if (self.offset >= self.size) return null; const temp = try self.reader.readChunk(self.chunk_size - self.offset); @@ -284,22 +292,22 @@ fn BufferedReader(comptime ReaderType: type) type { return slice; } - fn advance(self: *@This(), len: usize) !void { + pub fn advance(self: *@This(), len: usize) !void { self.offset += len; try self.reader.skip(len); } - fn byte(self: *@This()) u8 { + pub fn byte(self: *@This()) u8 { return self.reader.buffer[self.reader.start]; } - fn copy(self: *@This(), dst: []u8) ![]const u8 { + pub fn copy(self: *@This(), dst: []u8) ![]const u8 { _ = try self.reader.copy(dst); self.offset += dst.len; return dst; } - fn remainingSize(self: *@This()) usize { + pub fn remainingSize(self: *@This()) usize { return self.size - self.offset; } }; @@ -443,6 +451,14 @@ fn Iterator(comptime ReaderType: type) type { } try self.reader.skipPadding(file_size); }, + .gnu_long_name => { + file.name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size))); + try self.reader.skipPadding(file_size); + }, + .gnu_long_link => { + file.link_name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size))); + try self.reader.skipPadding(file_size); + }, .hard_link => return error.TarUnsupportedFileType, else => { const d = self.diagnostics orelse return error.TarUnsupportedFileType; @@ -624,22 +640,20 @@ test "parsePaxAttribute" { const TestCase = struct { const File = struct { - const empty_string = &[0]u8{}; - name: []const u8, size: usize = 0, - link_name: []const u8 = empty_string, + link_name: []const u8 = &[0]u8{}, file_type: Header.FileType = .normal, truncated: bool = false, // when there is no file body, just header, usefull for huge files }; - path: []const u8, - files: []const File = &[_]TestCase.File{}, - chksums: []const []const u8 = &[_][]const u8{}, - err: ?anyerror = null, + path: []const u8, // path to the tar archive file on dis + files: []const File = &[_]TestCase.File{}, // expected files to found in archive + chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content + err: ?anyerror = null, // parsing should fail with this error }; -test "Go test cases" { +test "tar: Go test cases" { const test_dir = try std.fs.openDirAbsolute("/usr/local/go/src/archive/tar/testdata", .{}); const cases = [_]TestCase{ .{ @@ -718,12 +732,6 @@ test "Go test cases" { .{ // pax attribute don't end with \n .path = "pax-bad-hdr-file.tar", - // .files = &[_]TestCase.File{ - // .{ - // .name = "PAX1/PAX1/long-path-name", - // .size = 684, - // }, - // }, .err = error.InvalidPaxAttribute, }, // @@ -808,9 +816,16 @@ test "Go test cases" { }, .{ .path = "gnu-multi-hdrs.tar", - .err = error.TarUnsupportedFileType, + .files = &[_]TestCase.File{ + .{ + .name = "GNU2/GNU2/long-path-name", + .link_name = "GNU4/GNU4/long-linkpath-name", + .file_type = .symbolic_link, + }, + }, }, .{ + // has gnu type D (directory) and S (sparse) blocks .path = "gnu-incremental.tar", .err = error.TarUnsupportedFileType, }, @@ -825,23 +840,22 @@ test "Go test cases" { }, }, }, - // .{ - // .path = "gnu-long-nul.tar", - // .files = &[_]TestCase.File{ - // .{ - // .name = "012233456789", - // }, - // }, - // }, - // .{ - // .path = "gnu-utf8.tar", - // .files = &[_]TestCase.File{ - // .{ - // .name = "012233456789", - // }, - // }, - // }, - // + .{ + .path = "gnu-long-nul.tar", + .files = &[_]TestCase.File{ + .{ + .name = "0123456789", + }, + }, + }, + .{ + .path = "gnu-utf8.tar", + .files = &[_]TestCase.File{ + .{ + .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + }, + }, + }, .{ .path = "gnu-not-utf8.tar", .files = &[_]TestCase.File{ @@ -851,19 +865,47 @@ test "Go test cases" { }, }, .{ - .path = "neg-size.tar", - .err = error.TarHeader, + // null in pax key + .path = "pax-nul-xattrs.tar", + .err = error.InvalidPaxAttribute, }, .{ .path = "pax-nul-path.tar", .err = error.InvalidPaxAttribute, }, .{ - .path = "pax-nul-xattrs.tar", - .err = error.InvalidPaxAttribute, + .path = "neg-size.tar", + .err = error.TarHeader, + }, + .{ + .path = "issue10968.tar", + .err = error.TarHeader, + }, + .{ + .path = "issue11169.tar", + .err = error.TarHeader, + }, + .{ + .path = "issue12435.tar", + .err = error.TarHeaderChksum, + }, + .{ + // has magic with space at end instead of null + .path = "invalid-go17.tar", + .files = &[_]TestCase.File{ + .{ + .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + }, + }, + }, + .{ + .path = "ustar-file-devs.tar", + .files = &[_]TestCase.File{ + .{ + .name = "file", + }, + }, }, - // TODO some files with errors: - // issue10968.tar, issue11169.tar, issue12435.tar .{ .path = "trailing-slash.tar", .files = &[_]TestCase.File{ From 6e7a39c935b13dddc9153e534e5af8fe12bc5cac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Thu, 30 Nov 2023 21:28:10 +0100 Subject: [PATCH 11/29] tar: refactor reading pax attributes --- lib/std/tar.zig | 303 +++++++++++++++++++++++++++++++----------------- 1 file changed, 197 insertions(+), 106 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index f22ee0e73309..d6a51a94cf11 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -198,6 +198,16 @@ fn nullStr(str: []const u8) []const u8 { return str; } +// File size rounded to te block boundary. +inline fn roundedFileSize(file_size: usize) usize { + return std.mem.alignForward(usize, file_size, BLOCK_SIZE); +} + +// Number of padding bytes in the last file block. +inline fn filePadding(file_size: usize) usize { + return roundedFileSize(file_size) - file_size; +} + fn BufferedReader(comptime ReaderType: type) type { return struct { unbuffered_reader: ReaderType, @@ -207,16 +217,32 @@ fn BufferedReader(comptime ReaderType: type) type { const Self = @This(); - pub fn readChunk(self: *Self, count: usize) ![]const u8 { - self.ensureCapacity(1024); - + fn readChunk(self: *Self, count: usize) ![]const u8 { + self.ensureCapacity(BLOCK_SIZE * 2); const ask = @min(self.buffer.len - self.end, count -| (self.end - self.start)); self.end += try self.unbuffered_reader.readAtLeast(self.buffer[self.end..], ask); - return self.buffer[self.start..self.end]; } - pub fn readBlock(self: *Self) !?[]const u8 { + // Returns slice of size count or part of it. + pub fn readSlice(self: *Self, count: usize) ![]const u8 { + if (count <= self.end - self.start) { + // fastpath, we have enough bytes in buffer + return self.buffer[self.start .. self.start + count]; + } + + const chunk_size = roundedFileSize(count) + BLOCK_SIZE; + const temp = try self.readChunk(chunk_size); + if (temp.len == 0) return error.UnexpectedEndOfStream; + return temp[0..@min(count, temp.len)]; + } + + // Returns tar header block, 512 bytes. Before reading advances buffer + // for padding of the previous block, to position reader at the start of + // new block. After reading advances for block size, to position reader + // at the start of the file body. + pub fn readBlock(self: *Self, padding: usize) !?[]const u8 { + try self.skip(padding); const block_bytes = try self.readChunk(BLOCK_SIZE * 2); switch (block_bytes.len) { 0 => return null, @@ -227,11 +253,19 @@ fn BufferedReader(comptime ReaderType: type) type { return block_bytes[0..BLOCK_SIZE]; } + // Retruns byte at current position in buffer. + pub fn readByte(self: *@This()) u8 { + return self.buffer[self.start]; + } + + // Advances reader for count bytes, assumes that we have that number of + // bytes in buffer. pub fn advance(self: *Self, count: usize) void { self.start += count; assert(self.start <= self.end); } + // Advances reader without assuming that count bytes are in the buffer. pub fn skip(self: *Self, count: usize) !void { if (self.start + count > self.end) { try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{}); @@ -241,14 +275,6 @@ fn BufferedReader(comptime ReaderType: type) type { } } - pub fn skipPadding(self: *Self, file_size: usize) !void { - return self.skip(filePadding(file_size)); - } - - pub fn skipFile(self: *Self, file_size: usize) !void { - return self.skip(roundedFileSize(file_size)); - } - inline fn ensureCapacity(self: *Self, count: usize) void { if (self.buffer.len - self.start < count) { const dest_end = self.end - self.start; @@ -258,16 +284,26 @@ fn BufferedReader(comptime ReaderType: type) type { } } - pub fn write(self: *Self, writer: anytype, size: usize) !void { - var rdr = self.sliceReader(size, true); + // Write count bytes to the writer. + pub fn write(self: *Self, writer: anytype, count: usize) !void { + if (self.read(count)) |buf| { + try writer.writeAll(buf); + return; + } + var rdr = self.sliceReader(count); while (try rdr.next()) |slice| { try writer.writeAll(slice); } } - // copy dst.len bytes into dst + // Copy dst.len bytes into dst buffer. pub fn copy(self: *Self, dst: []u8) ![]const u8 { - var rdr = self.sliceReader(dst.len, true); + if (self.read(dst.len)) |buf| { + // fastpath we already have enough bytes in buffer + @memcpy(dst, buf); + return dst; + } + var rdr = self.sliceReader(dst.len); var pos: usize = 0; while (try rdr.next()) |slice| : (pos += slice.len) { @memcpy(dst[pos .. pos + slice.len], slice); @@ -275,91 +311,151 @@ fn BufferedReader(comptime ReaderType: type) type { return dst; } + // Retruns count bytes from buffer and advances for that number of + // bytes. If we don't have that much bytes buffered returns null. + fn read(self: *Self, count: usize) ?[]const u8 { + if (count <= self.end - self.start) { + const buf = self.buffer[self.start .. self.start + count]; + self.advance(count); + return buf; + } + return null; + } + const SliceReader = struct { size: usize, - chunk_size: usize, offset: usize, reader: *Self, - auto_advance: bool, - - pub fn next(self: *@This()) !?[]const u8 { - if (self.offset >= self.size) return null; - const temp = try self.reader.readChunk(self.chunk_size - self.offset); - if (temp.len == 0) return error.UnexpectedEndOfStream; - const slice = temp[0..@min(self.remainingSize(), temp.len)]; - if (self.auto_advance) try self.advance(slice.len); + pub fn next(self: *SliceReader) !?[]const u8 { + const remaining_size = self.size - self.offset; + if (remaining_size == 0) return null; + const slice = try self.reader.readSlice(remaining_size); + self.advance(slice.len); return slice; } - pub fn advance(self: *@This(), len: usize) !void { + fn advance(self: *SliceReader, len: usize) void { self.offset += len; - try self.reader.skip(len); - } - - pub fn byte(self: *@This()) u8 { - return self.reader.buffer[self.reader.start]; - } - - pub fn copy(self: *@This(), dst: []u8) ![]const u8 { - _ = try self.reader.copy(dst); - self.offset += dst.len; - return dst; - } - - pub fn remainingSize(self: *@This()) usize { - return self.size - self.offset; + self.reader.advance(len); } }; - pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader { + pub fn sliceReader(self: *Self, size: usize) SliceReader { return .{ .size = size, - .chunk_size = roundedFileSize(size) + BLOCK_SIZE, + .reader = self, .offset = 0, + }; + } + + pub fn paxFileReader(self: *Self, size: usize) PaxFileReader { + return .{ + .size = size, .reader = self, - .auto_advance = auto_advance, + .offset = 0, }; } - }; -} -// File size rounded to te block boundary. -inline fn roundedFileSize(file_size: usize) usize { - return std.mem.alignForward(usize, file_size, BLOCK_SIZE); -} + const PaxFileReader = struct { + size: usize, + offset: usize = 0, + reader: *Self, -// Number of padding bytes in the last file block. -inline fn filePadding(file_size: usize) usize { - return roundedFileSize(file_size) - file_size; + const PaxKey = enum { + path, + linkpath, + size, + }; + + const PaxAttribute = struct { + key: PaxKey, + value_len: usize, + parent: *PaxFileReader, + + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least value_len. + pub fn value(self: PaxAttribute, dst: []u8) ![]u8 { + assert(dst.len >= self.value_len); + const buf = dst[0..self.value_len]; + _ = try self.parent.reader.copy(buf); + self.parent.offset += buf.len; + try self.parent.checkAttributeEnding(); + return buf; + } + }; + + // Caller of the next has to call value in PaxAttribute, to advance + // reader across value. + pub fn next(self: *PaxFileReader) !?PaxAttribute { + const rdr = self.reader; + _ = rdr; + + while (true) { + const remaining_size = self.size - self.offset; + if (remaining_size == 0) return null; + + const inf = try parsePaxAttribute( + try self.reader.readSlice(remaining_size), + remaining_size, + ); + const key: PaxKey = if (inf.is("path")) + .path + else if (inf.is("linkpath")) + .linkpath + else if (inf.is("size")) + .size + else { + try self.advance(inf.value_off + inf.value_len); + try self.checkAttributeEnding(); + continue; + }; + try self.advance(inf.value_off); // position reader at the start of the value + return PaxAttribute{ .key = key, .value_len = inf.value_len, .parent = self }; + } + } + + fn checkAttributeEnding(self: *PaxFileReader) !void { + if (self.reader.readByte() != '\n') return error.InvalidPaxAttribute; + try self.advance(1); + } + + fn advance(self: *PaxFileReader, len: usize) !void { + self.offset += len; + try self.reader.skip(len); + } + }; + }; } fn Iterator(comptime ReaderType: type) type { const BufferedReaderType = BufferedReader(ReaderType); return struct { - attrs: struct { - buffer: [std.fs.MAX_PATH_BYTES * 2]u8 = undefined, + // scratch buffer for file attributes + scratch: struct { + // size: two paths (name and link_name) and size (24 in pax attribute) + buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined, tail: usize = 0, + // Allocate size of the buffer for some attribute. fn alloc(self: *@This(), size: usize) ![]u8 { - if (size > self.len()) return error.NameTooLong; + const free_size = self.buffer.len - self.tail; + if (size > free_size) return error.TarScratchBufferOverflow; const head = self.tail; self.tail += size; assert(self.tail <= self.buffer.len); return self.buffer[head..self.tail]; } + // Free whole buffer. fn free(self: *@This()) void { self.tail = 0; } - - fn len(self: *@This()) usize { - return self.buffer.len - self.tail; - } } = .{}, reader: BufferedReaderType, diagnostics: ?*Options.Diagnostics, + padding: usize = 0, // bytes of file padding const Self = @This(); @@ -372,28 +468,22 @@ fn Iterator(comptime ReaderType: type) type { pub fn write(self: File, writer: anytype) !void { try self.reader.write(writer, self.size); - try self.skipPadding(); } pub fn skip(self: File) !void { - try self.reader.skip(roundedFileSize(self.size)); - } - - fn skipPadding(self: File) !void { - try self.reader.skip(filePadding(self.size)); + try self.reader.skip(self.size); } fn chksum(self: File) ![16]u8 { var sum = [_]u8{0} ** 16; if (self.size == 0) return sum; - var rdr = self.reader.sliceReader(self.size, true); + var rdr = self.reader.sliceReader(self.size); var h = std.crypto.hash.Md5.init(.{}); while (try rdr.next()) |slice| { h.update(slice); } h.final(&sum); - try self.skipPadding(); return sum; } }; @@ -406,64 +496,65 @@ fn Iterator(comptime ReaderType: type) type { // "normal file". pub fn next(self: *Self) !?File { var file: File = .{ .reader = &self.reader }; - self.attrs.free(); + self.scratch.free(); - while (try self.reader.readBlock()) |block_bytes| { - const block = Header{ .bytes = block_bytes[0..BLOCK_SIZE] }; - if (try block.checkChksum() == 0) return null; // zero block found - const file_type = block.fileType(); - const file_size = try block.fileSize(); + while (try self.reader.readBlock(self.padding)) |block_bytes| { + const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] }; + if (try header.checkChksum() == 0) return null; // zero block found + + const file_type = header.fileType(); + const file_size = try header.fileSize(); + self.padding = filePadding(file_size); switch (file_type) { + // file types to retrun from next .directory, .normal, .symbolic_link => { if (file.size == 0) file.size = file_size; + self.padding = filePadding(file.size); + if (file.name.len == 0) - file.name = try block.fullFileName((try self.attrs.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]); - if (file.link_name.len == 0) file.link_name = block.linkName(); + file.name = try header.fullFileName((try self.scratch.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]); + if (file.link_name.len == 0) file.link_name = header.linkName(); file.file_type = file_type; return file; }, - .global_extended_header => { - self.reader.skipFile(file_size) catch return error.TarHeadersTooBig; + // prefix header types + .gnu_long_name => { + file.name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size))); + }, + .gnu_long_link => { + file.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size))); }, .extended_header => { if (file_size == 0) continue; - // TODO: ovo resetiranje je nezgodno - self.attrs.free(); + // use just last extended header data + self.scratch.free(); file = File{ .reader = &self.reader }; - var rdr = self.reader.sliceReader(file_size, false); - while (try rdr.next()) |slice| { - const attr = try parsePaxAttribute(slice, rdr.remainingSize()); - try rdr.advance(attr.value_off); - if (attr.is("path")) { - file.name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len))); - } else if (attr.is("linkpath")) { - file.link_name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len))); - } else if (attr.is("size")) { - var buf = [_]u8{'0'} ** 32; - file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10); - } else { - try rdr.advance(attr.value_len); + var rdr = self.reader.paxFileReader(file_size); + while (try rdr.next()) |attr| { + switch (attr.key) { + .path => { + file.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); + }, + .linkpath => { + file.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); + }, + .size => { + file.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10); + }, } - if (rdr.byte() != '\n') return error.InvalidPaxAttribute; - try rdr.advance(1); } - try self.reader.skipPadding(file_size); - }, - .gnu_long_name => { - file.name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size))); - try self.reader.skipPadding(file_size); }, - .gnu_long_link => { - file.link_name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size))); - try self.reader.skipPadding(file_size); + // ignored header types + .global_extended_header => { + self.reader.skip(file_size) catch return error.TarHeadersTooBig; }, - .hard_link => return error.TarUnsupportedFileType, + // unsupported header types else => { const d = self.diagnostics orelse return error.TarUnsupportedFileType; try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, block.name()), + .file_name = try d.allocator.dupe(u8, header.name()), .file_type = file_type, } }); }, From 6bfa7bf197634272f30d864a4563f7cddbaf55c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Fri, 1 Dec 2023 18:26:31 +0100 Subject: [PATCH 12/29] tar: use scratch buffer for file names That makes names strings stable during the iteration. Otherwise string buffers can be overwritten while reading file content. --- lib/std/tar.zig | 289 ++++++++++++++++++++++-------------------------- 1 file changed, 130 insertions(+), 159 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index d6a51a94cf11..1a69f113cc98 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -66,6 +66,7 @@ pub const Options = struct { }; const BLOCK_SIZE = 512; +const MAX_HEADER_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) pub const Header = struct { bytes: *const [BLOCK_SIZE]u8, @@ -90,16 +91,14 @@ pub const Header = struct { }; /// Includes prefix concatenated, if any. - /// Return value may point into Header buffer, or might point into the - /// argument buffer. /// TODO: check against "../" and other nefarious things - pub fn fullFileName(header: Header, buffer: *[std.fs.MAX_PATH_BYTES]u8) ![]const u8 { + pub fn fullName(header: Header, buffer: *[MAX_HEADER_NAME_SIZE]u8) ![]const u8 { const n = name(header); - if (!is_ustar(header)) - return n; const p = prefix(header); - if (p.len == 0) - return n; + if (!is_ustar(header) or p.len == 0) { + @memcpy(buffer[0..n.len], n); + return buffer[0..n.len]; + } @memcpy(buffer[0..p.len], p); buffer[p.len] = '/'; @memcpy(buffer[p.len + 1 ..][0..n.len], n); @@ -180,7 +179,7 @@ pub const Header = struct { } // Checks calculated chksum with value of chksum field. - // Returns error or chksum value. + // Returns error or valid chksum value. // Zero value indicates empty block. pub fn checkChksum(header: Header) !u64 { const field = try header.chksum(); @@ -190,7 +189,7 @@ pub const Header = struct { } }; -// break string on first null char +// Breaks string on first null char. fn nullStr(str: []const u8) []const u8 { for (str, 0..) |c, i| { if (c == 0) return str[0..i]; @@ -198,14 +197,10 @@ fn nullStr(str: []const u8) []const u8 { return str; } -// File size rounded to te block boundary. -inline fn roundedFileSize(file_size: usize) usize { - return std.mem.alignForward(usize, file_size, BLOCK_SIZE); -} - // Number of padding bytes in the last file block. -inline fn filePadding(file_size: usize) usize { - return roundedFileSize(file_size) - file_size; +inline fn blockPadding(size: usize) usize { + const block_rounded = std.mem.alignForward(usize, size, BLOCK_SIZE); // size rounded to te block boundary + return block_rounded - size; } fn BufferedReader(comptime ReaderType: type) type { @@ -217,44 +212,38 @@ fn BufferedReader(comptime ReaderType: type) type { const Self = @This(); - fn readChunk(self: *Self, count: usize) ![]const u8 { - self.ensureCapacity(BLOCK_SIZE * 2); - const ask = @min(self.buffer.len - self.end, count -| (self.end - self.start)); - self.end += try self.unbuffered_reader.readAtLeast(self.buffer[self.end..], ask); - return self.buffer[self.start..self.end]; + // Fills buffer from underlaying reader. + fn fillBuffer(self: *Self) !void { + self.removeUsed(); + self.end += try self.unbuffered_reader.read(self.buffer[self.end..]); } - // Returns slice of size count or part of it. + // Returns slice of size count or how much fits into buffer. pub fn readSlice(self: *Self, count: usize) ![]const u8 { if (count <= self.end - self.start) { - // fastpath, we have enough bytes in buffer return self.buffer[self.start .. self.start + count]; } - - const chunk_size = roundedFileSize(count) + BLOCK_SIZE; - const temp = try self.readChunk(chunk_size); - if (temp.len == 0) return error.UnexpectedEndOfStream; - return temp[0..@min(count, temp.len)]; + try self.fillBuffer(); + const buf = self.buffer[self.start..self.end]; + if (buf.len == 0) return error.UnexpectedEndOfStream; + return buf[0..@min(count, buf.len)]; } - // Returns tar header block, 512 bytes. Before reading advances buffer - // for padding of the previous block, to position reader at the start of - // new block. After reading advances for block size, to position reader - // at the start of the file body. - pub fn readBlock(self: *Self, padding: usize) !?[]const u8 { + // Returns tar header block, 512 bytes, or null if eof. Before reading + // advances buffer for padding of the previous block, to position reader + // at the start of new block. After reading advances for block size, to + // position reader at the start of the file content. + pub fn readHeader(self: *Self, padding: usize) !?[]const u8 { try self.skip(padding); - const block_bytes = try self.readChunk(BLOCK_SIZE * 2); - switch (block_bytes.len) { - 0 => return null, - 1...(BLOCK_SIZE - 1) => return error.UnexpectedEndOfStream, - else => {}, - } + const buf = self.readSlice(BLOCK_SIZE) catch return null; + if (buf.len < BLOCK_SIZE) return error.UnexpectedEndOfStream; self.advance(BLOCK_SIZE); - return block_bytes[0..BLOCK_SIZE]; + return buf[0..BLOCK_SIZE]; } - // Retruns byte at current position in buffer. + // Returns byte at current position in buffer. pub fn readByte(self: *@This()) u8 { + assert(self.start < self.end); return self.buffer[self.start]; } @@ -275,78 +264,36 @@ fn BufferedReader(comptime ReaderType: type) type { } } - inline fn ensureCapacity(self: *Self, count: usize) void { - if (self.buffer.len - self.start < count) { - const dest_end = self.end - self.start; - @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]); - self.end = dest_end; - self.start = 0; - } + // Removes used part of the buffer. + inline fn removeUsed(self: *Self) void { + const dest_end = self.end - self.start; + if (self.start == 0 or dest_end > self.start) return; + @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]); + self.end = dest_end; + self.start = 0; } - // Write count bytes to the writer. + // Writes count bytes to the writer. Advances reader. pub fn write(self: *Self, writer: anytype, count: usize) !void { - if (self.read(count)) |buf| { - try writer.writeAll(buf); - return; - } - var rdr = self.sliceReader(count); - while (try rdr.next()) |slice| { + var pos: usize = 0; + while (pos < count) { + const slice = try self.readSlice(count - pos); try writer.writeAll(slice); + self.advance(slice.len); + pos += slice.len; } } - // Copy dst.len bytes into dst buffer. + // Copies dst.len bytes into dst buffer. Advances reader. pub fn copy(self: *Self, dst: []u8) ![]const u8 { - if (self.read(dst.len)) |buf| { - // fastpath we already have enough bytes in buffer - @memcpy(dst, buf); - return dst; - } - var rdr = self.sliceReader(dst.len); var pos: usize = 0; - while (try rdr.next()) |slice| : (pos += slice.len) { + while (pos < dst.len) { + const slice = try self.readSlice(dst.len - pos); @memcpy(dst[pos .. pos + slice.len], slice); - } - return dst; - } - - // Retruns count bytes from buffer and advances for that number of - // bytes. If we don't have that much bytes buffered returns null. - fn read(self: *Self, count: usize) ?[]const u8 { - if (count <= self.end - self.start) { - const buf = self.buffer[self.start .. self.start + count]; - self.advance(count); - return buf; - } - return null; - } - - const SliceReader = struct { - size: usize, - offset: usize, - reader: *Self, - - pub fn next(self: *SliceReader) !?[]const u8 { - const remaining_size = self.size - self.offset; - if (remaining_size == 0) return null; - const slice = try self.reader.readSlice(remaining_size); self.advance(slice.len); - return slice; - } - - fn advance(self: *SliceReader, len: usize) void { - self.offset += len; - self.reader.advance(len); + pos += slice.len; } - }; - - pub fn sliceReader(self: *Self, size: usize) SliceReader { - return .{ - .size = size, - .reader = self, - .offset = 0, - }; + return dst; } pub fn paxFileReader(self: *Self, size: usize) PaxFileReader { @@ -388,9 +335,6 @@ fn BufferedReader(comptime ReaderType: type) type { // Caller of the next has to call value in PaxAttribute, to advance // reader across value. pub fn next(self: *PaxFileReader) !?PaxAttribute { - const rdr = self.reader; - _ = rdr; - while (true) { const remaining_size = self.size - self.offset; if (remaining_size == 0) return null; @@ -433,10 +377,14 @@ fn Iterator(comptime ReaderType: type) type { return struct { // scratch buffer for file attributes scratch: struct { - // size: two paths (name and link_name) and size (24 in pax attribute) + // size: two paths (name and link_name) and files size bytes (24 in pax attribute) buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined, tail: usize = 0, + name: []const u8 = undefined, + link_name: []const u8 = undefined, + size: usize = 0, + // Allocate size of the buffer for some attribute. fn alloc(self: *@This(), size: usize) ![]u8 { const free_size = self.buffer.len - self.tail; @@ -447,45 +395,53 @@ fn Iterator(comptime ReaderType: type) type { return self.buffer[head..self.tail]; } - // Free whole buffer. - fn free(self: *@This()) void { + // Reset buffer and all fields. + fn reset(self: *@This()) void { self.tail = 0; + self.name = self.buffer[0..0]; + self.link_name = self.buffer[0..0]; + self.size = 0; + } + + fn append(self: *@This(), header: Header) !void { + if (self.size == 0) self.size = try header.fileSize(); + if (self.link_name.len == 0) { + const link_name = header.linkName(); + if (link_name.len > 0) { + const buf = try self.alloc(link_name.len); + @memcpy(buf, link_name); + self.link_name = buf; + } + } + if (self.name.len == 0) { + self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]); + } } } = .{}, reader: BufferedReaderType, diagnostics: ?*Options.Diagnostics, - padding: usize = 0, // bytes of file padding + padding: usize = 0, // bytes of padding to the end of the block const Self = @This(); - const File = struct { - name: []const u8 = &[_]u8{}, - link_name: []const u8 = &[_]u8{}, - size: usize = 0, - file_type: Header.FileType = .normal, + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: usize, // size of the file in bytes + file_type: Header.FileType, + reader: *BufferedReaderType, + // Writes file content to writer. pub fn write(self: File, writer: anytype) !void { try self.reader.write(writer, self.size); } + // Skips file content. Advances reader. pub fn skip(self: File) !void { try self.reader.skip(self.size); } - - fn chksum(self: File) ![16]u8 { - var sum = [_]u8{0} ** 16; - if (self.size == 0) return sum; - - var rdr = self.reader.sliceReader(self.size); - var h = std.crypto.hash.Md5.init(.{}); - while (try rdr.next()) |slice| { - h.update(slice); - } - h.final(&sum); - return sum; - } }; // Externally, `next` iterates through the tar archive as if it is a @@ -495,62 +451,62 @@ fn Iterator(comptime ReaderType: type) type { // loop iterates through one or more "header files" until it finds a // "normal file". pub fn next(self: *Self) !?File { - var file: File = .{ .reader = &self.reader }; - self.scratch.free(); + self.scratch.reset(); - while (try self.reader.readBlock(self.padding)) |block_bytes| { + while (try self.reader.readHeader(self.padding)) |block_bytes| { const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] }; if (try header.checkChksum() == 0) return null; // zero block found const file_type = header.fileType(); - const file_size = try header.fileSize(); - self.padding = filePadding(file_size); + const size: usize = @intCast(try header.fileSize()); + self.padding = blockPadding(size); switch (file_type) { - // file types to retrun from next + // File types to retrun upstream .directory, .normal, .symbolic_link => { - if (file.size == 0) file.size = file_size; - self.padding = filePadding(file.size); - - if (file.name.len == 0) - file.name = try header.fullFileName((try self.scratch.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]); - if (file.link_name.len == 0) file.link_name = header.linkName(); - file.file_type = file_type; + try self.scratch.append(header); + const file = File{ + .file_type = file_type, + .name = self.scratch.name, + .link_name = self.scratch.link_name, + .size = self.scratch.size, + .reader = &self.reader, + }; + self.padding = blockPadding(file.size); return file; }, - // prefix header types + // Prefix header types .gnu_long_name => { - file.name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size))); + self.scratch.name = nullStr(try self.reader.copy(try self.scratch.alloc(size))); }, .gnu_long_link => { - file.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size))); + self.scratch.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(size))); }, .extended_header => { - if (file_size == 0) continue; - // use just last extended header data - self.scratch.free(); - file = File{ .reader = &self.reader }; + if (size == 0) continue; + // Use just attributes from last extended header. + self.scratch.reset(); - var rdr = self.reader.paxFileReader(file_size); + var rdr = self.reader.paxFileReader(size); while (try rdr.next()) |attr| { switch (attr.key) { .path => { - file.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); + self.scratch.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); }, .linkpath => { - file.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); + self.scratch.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); }, .size => { - file.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10); + self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10); }, } } }, - // ignored header types + // Ignored header type .global_extended_header => { - self.reader.skip(file_size) catch return error.TarHeadersTooBig; + self.reader.skip(size) catch return error.TarHeadersTooBig; }, - // unsupported header types + // All other are unsupported header types else => { const d = self.diagnostics orelse return error.TarUnsupportedFileType; try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ @@ -1053,16 +1009,31 @@ test "tar: Go test cases" { try std.testing.expectEqualStrings(expected.link_name, actual.link_name); if (case.chksums.len > i) { - var actual_chksum = try actual.chksum(); - var hex_to_bytes_buffer: [16]u8 = undefined; - const expected_chksum = try std.fmt.hexToBytes(&hex_to_bytes_buffer, case.chksums[i]); - // std.debug.print("actual chksum: {s}\n", .{std.fmt.fmtSliceHexLower(&actual_chksum)}); - try std.testing.expectEqualStrings(expected_chksum, &actual_chksum); + var md5writer = Md5Writer{}; + try actual.write(&md5writer); + const chksum = md5writer.chksum(); + // std.debug.print("actual chksum: {s}\n", .{chksum}); + try std.testing.expectEqualStrings(case.chksums[i], &chksum); } else { if (!expected.truncated) try actual.skip(); // skip file content } - i += 1; } try std.testing.expectEqual(case.files.len, i); } } + +// used in test to calculate file chksum +const Md5Writer = struct { + h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), + + pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { + self.h.update(buf); + } + + pub fn chksum(self: *Md5Writer) [32]u8 { + var s = [_]u8{0} ** 16; + self.h.final(&s); + return std.fmt.bytesToHex(s, .lower); + } +}; + From 2ed9a276a701cc55eccf4fcbf68476e797f1818b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Fri, 1 Dec 2023 18:50:48 +0100 Subject: [PATCH 13/29] tar: use Go test cases path from env variable Skip tests if env is not set. --- lib/std/tar.zig | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 1a69f113cc98..36a6de1292e5 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -701,7 +701,11 @@ const TestCase = struct { }; test "tar: Go test cases" { - const test_dir = try std.fs.openDirAbsolute("/usr/local/go/src/archive/tar/testdata", .{}); + const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path| + try std.fs.openDirAbsolute(path, .{}) + else + return error.SkipZigTest; + const cases = [_]TestCase{ .{ .path = "gnu.tar", @@ -781,12 +785,6 @@ test "tar: Go test cases" { .path = "pax-bad-hdr-file.tar", .err = error.InvalidPaxAttribute, }, - // - // .{ - // .path = "pax-bad-mtime-file.tar", - // .err = error.TarBadHeader, - // }, - // .{ // size is in pax attribute .path = "pax-pos-size-file.tar", @@ -987,8 +985,6 @@ test "tar: Go test cases" { }; for (cases) |case| { - //if (!std.mem.eql(u8, case.path, "pax-pos-size-file.tar")) continue; - var fs_file = try test_dir.openFile(case.path, .{}); defer fs_file.close(); @@ -1001,7 +997,7 @@ test "tar: Go test cases" { } else { return err; } - }) |actual| { + }) |actual| : (i += 1) { const expected = case.files[i]; try std.testing.expectEqualStrings(expected.name, actual.name); try std.testing.expectEqual(expected.size, actual.size); @@ -1012,7 +1008,6 @@ test "tar: Go test cases" { var md5writer = Md5Writer{}; try actual.write(&md5writer); const chksum = md5writer.chksum(); - // std.debug.print("actual chksum: {s}\n", .{chksum}); try std.testing.expectEqualStrings(case.chksums[i], &chksum); } else { if (!expected.truncated) try actual.skip(); // skip file content From 2a432d3008fa1e9af645de96b08cbad57709ffb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Fri, 1 Dec 2023 19:03:32 +0100 Subject: [PATCH 14/29] tar: prefix test cases with 'tar' To make it little easier to filter from all stdlib tests. --- lib/std/tar.zig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 36a6de1292e5..ffc4d69d56a8 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -628,7 +628,7 @@ fn stripComponents(path: []const u8, count: u32) ![]const u8 { return path[i..]; } -test stripComponents { +test "tar stripComponents" { const expectEqualStrings = std.testing.expectEqualStrings; try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0)); try expectEqualStrings("b/c", try stripComponents("a/b/c", 1)); @@ -667,7 +667,7 @@ fn noNull(str: []const u8) ![]const u8 { return str; } -test "parsePaxAttribute" { +test "tar parsePaxAttribute" { const expectEqual = std.testing.expectEqual; const expectEqualStrings = std.testing.expectEqualStrings; const expectError = std.testing.expectError; @@ -700,7 +700,7 @@ const TestCase = struct { err: ?anyerror = null, // parsing should fail with this error }; -test "tar: Go test cases" { +test "tar run Go test cases" { const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path| try std.fs.openDirAbsolute(path, .{}) else @@ -1031,4 +1031,3 @@ const Md5Writer = struct { return std.fmt.bytesToHex(s, .lower); } }; - From 7b0bbc680fa831200653fb0af7cb46a768e0dd93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Dec 2023 15:00:42 +0100 Subject: [PATCH 15/29] tar: add file mode to result of tarbal iteration So we have information to set executable bit on write to file system. --- lib/std/tar.zig | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index ffc4d69d56a8..51c1c023ae95 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -109,6 +109,10 @@ pub const Header = struct { return header.str(0, 100); } + pub fn mode(header: Header) !u32 { + return @intCast(try header.numeric(100, 8)); + } + pub fn fileSize(header: Header) !u64 { return header.numeric(124, 12); } @@ -429,6 +433,7 @@ fn Iterator(comptime ReaderType: type) type { name: []const u8, // name of file, symlink or directory link_name: []const u8, // target name of symlink size: usize, // size of the file in bytes + mode: u32, file_type: Header.FileType, reader: *BufferedReaderType, @@ -471,6 +476,7 @@ fn Iterator(comptime ReaderType: type) type { .link_name = self.scratch.link_name, .size = self.scratch.size, .reader = &self.reader, + .mode = try header.mode(), }; self.padding = blockPadding(file.size); return file; @@ -689,6 +695,7 @@ const TestCase = struct { const File = struct { name: []const u8, size: usize = 0, + mode: u32 = 0, link_name: []const u8 = &[0]u8{}, file_type: Header.FileType = .normal, truncated: bool = false, // when there is no file body, just header, usefull for huge files @@ -713,10 +720,12 @@ test "tar run Go test cases" { .{ .name = "small.txt", .size = 5, + .mode = 0o640, }, .{ .name = "small2.txt", .size = 11, + .mode = 0o640, }, }, .chksums = &[_][]const u8{ @@ -734,10 +743,12 @@ test "tar run Go test cases" { .{ .name = "small.txt", .size = 5, + .mode = 0o640, }, .{ .name = "small2.txt", .size = 11, + .mode = 0o640, }, }, .chksums = &[_][]const u8{ @@ -751,10 +762,12 @@ test "tar run Go test cases" { .{ .name = "small.txt", .size = 5, + .mode = 0o444, }, .{ .name = "small2.txt", .size = 11, + .mode = 0o444, }, }, .chksums = &[_][]const u8{ @@ -768,11 +781,13 @@ test "tar run Go test cases" { .{ .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", .size = 7, + .mode = 0o664, }, .{ .name = "a/b", .size = 0, .file_type = .symbolic_link, + .mode = 0o777, .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", }, }, @@ -793,6 +808,7 @@ test "tar run Go test cases" { .name = "foo", .size = 999, .file_type = .normal, + .mode = 0o640, }, }, .chksums = &[_][]const u8{ @@ -833,6 +849,7 @@ test "tar run Go test cases" { .name = "P1050238.JPG.log", .size = 14, .file_type = .normal, + .mode = 0o664, }, }, .chksums = &[_][]const u8{ @@ -847,11 +864,13 @@ test "tar run Go test cases" { .name = "small.txt", .size = 5, .file_type = .normal, + .mode = 0o644, }, .{ .name = "small2.txt", .size = 11, .file_type = .normal, + .mode = 0o644, }, }, .chksums = &[_][]const u8{ @@ -890,6 +909,7 @@ test "tar run Go test cases" { .files = &[_]TestCase.File{ .{ .name = "0123456789", + .mode = 0o644, }, }, }, @@ -898,6 +918,7 @@ test "tar run Go test cases" { .files = &[_]TestCase.File{ .{ .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + .mode = 0o644, }, }, }, @@ -906,6 +927,7 @@ test "tar run Go test cases" { .files = &[_]TestCase.File{ .{ .name = "hi\x80\x81\x82\x83bye", + .mode = 0o644, }, }, }, @@ -948,6 +970,7 @@ test "tar run Go test cases" { .files = &[_]TestCase.File{ .{ .name = "file", + .mode = 0o644, }, }, }, @@ -968,6 +991,7 @@ test "tar run Go test cases" { .name = "tmp/16gig.txt", .size = 16 * 1024 * 1024 * 1024, .truncated = true, + .mode = 0o640, }, }, }, @@ -978,6 +1002,7 @@ test "tar run Go test cases" { .{ .name = "longname/" ** 15 ++ "16gig.txt", .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, .truncated = true, }, }, @@ -1002,6 +1027,7 @@ test "tar run Go test cases" { try std.testing.expectEqualStrings(expected.name, actual.name); try std.testing.expectEqual(expected.size, actual.size); try std.testing.expectEqual(expected.file_type, actual.file_type); + try std.testing.expectEqual(expected.mode, actual.mode); try std.testing.expectEqualStrings(expected.link_name, actual.link_name); if (case.chksums.len > i) { From a3cf8ec71ec17f384608a6df0d41b804f2cfe231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 5 Dec 2023 17:08:45 +0100 Subject: [PATCH 16/29] tar: add pax file reader tests --- lib/std/tar.zig | 121 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 108 insertions(+), 13 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 51c1c023ae95..6d1934d91c54 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -209,17 +209,17 @@ inline fn blockPadding(size: usize) usize { fn BufferedReader(comptime ReaderType: type) type { return struct { - unbuffered_reader: ReaderType, + underlying_reader: ReaderType, buffer: [BLOCK_SIZE * 8]u8 = undefined, start: usize = 0, end: usize = 0, const Self = @This(); - // Fills buffer from underlaying reader. + // Fills buffer from underlying unbuffered reader. fn fillBuffer(self: *Self) !void { self.removeUsed(); - self.end += try self.unbuffered_reader.read(self.buffer[self.end..]); + self.end += try self.underlying_reader.read(self.buffer[self.end..]); } // Returns slice of size count or how much fits into buffer. @@ -261,7 +261,7 @@ fn BufferedReader(comptime ReaderType: type) type { // Advances reader without assuming that count bytes are in the buffer. pub fn skip(self: *Self, count: usize) !void { if (self.start + count > self.end) { - try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{}); + try self.underlying_reader.skipBytes(self.start + count - self.end, .{}); self.start = self.end; } else { self.advance(count); @@ -313,14 +313,14 @@ fn BufferedReader(comptime ReaderType: type) type { offset: usize = 0, reader: *Self, - const PaxKey = enum { + const PaxKeyKind = enum { path, linkpath, size, }; const PaxAttribute = struct { - key: PaxKey, + key: PaxKeyKind, value_len: usize, parent: *PaxFileReader, @@ -347,7 +347,7 @@ fn BufferedReader(comptime ReaderType: type) type { try self.reader.readSlice(remaining_size), remaining_size, ); - const key: PaxKey = if (inf.is("path")) + const key: PaxKeyKind = if (inf.is("path")) .path else if (inf.is("linkpath")) .linkpath @@ -376,8 +376,7 @@ fn BufferedReader(comptime ReaderType: type) type { }; } -fn Iterator(comptime ReaderType: type) type { - const BufferedReaderType = BufferedReader(ReaderType); +fn Iterator(comptime BufferedReaderType: type) type { return struct { // scratch buffer for file attributes scratch: struct { @@ -527,14 +526,19 @@ fn Iterator(comptime ReaderType: type) type { }; } -pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) { - const ReaderType = @TypeOf(reader); +pub fn iterator(underlying_reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(BufferedReader(@TypeOf(underlying_reader))) { return .{ - .reader = BufferedReader(ReaderType){ .unbuffered_reader = reader }, + .reader = bufferedReader(underlying_reader), .diagnostics = diagnostics, }; } +fn bufferedReader(underlying_reader: anytype) BufferedReader(@TypeOf(underlying_reader)) { + return BufferedReader(@TypeOf(underlying_reader)){ + .underlying_reader = underlying_reader, + }; +} + pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { switch (options.mode_mode) { .ignore => {}, @@ -656,7 +660,7 @@ fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo { const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute; const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute; const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10); - if (kv_size > max_size) { + if (kv_size > max_size or kv_size < pos_equals + 2) { return error.InvalidPaxAttribute; } const key = data[pos_space + 1 .. pos_equals]; @@ -1057,3 +1061,94 @@ const Md5Writer = struct { return std.fmt.bytesToHex(s, .lower); } }; + +test "tar PaxFileReader" { + const Attribute = struct { + const PaxKeyKind = enum { + path, + linkpath, + size, + }; + key: PaxKeyKind, + value: []const u8, + }; + const cases = [_]struct { + data: []const u8, + attrs: []const Attribute, + err: ?anyerror = null, + }{ + .{ // valid but unknown keys + .data = + \\30 mtime=1350244992.023960108 + \\6 k=1 + \\13 key1=val1 + \\10 a=name + \\9 a=name + \\ + , + .attrs = &[_]Attribute{}, + }, + .{ // mix of known and unknown keys + .data = + \\6 k=1 + \\13 path=name + \\17 linkpath=link + \\13 key1=val1 + \\12 size=123 + \\13 key2=val2 + \\ + , + .attrs = &[_]Attribute{ + .{ .key = .path, .value = "name" }, + .{ .key = .linkpath, .value = "link" }, + .{ .key = .size, .value = "123" }, + }, + }, + .{ // too short size of the second key-value pair + .data = + \\13 path=name + \\10 linkpath=value + \\ + , + .attrs = &[_]Attribute{ + .{ .key = .path, .value = "name" }, + }, + .err = error.InvalidPaxAttribute, + }, + .{ // too long size of the second key-value pair + .data = + \\13 path=name + \\19 linkpath=value + \\ + , + .attrs = &[_]Attribute{ + .{ .key = .path, .value = "name" }, + }, + .err = error.InvalidPaxAttribute, + }, + }; + var buffer: [1024]u8 = undefined; + + for (cases) |case| { + var stream = std.io.fixedBufferStream(case.data); + var brdr = bufferedReader(stream.reader()); + + var rdr = brdr.paxFileReader(case.data.len); + var i: usize = 0; + while (rdr.next() catch |err| { + if (case.err) |e| { + try std.testing.expectEqual(e, err); + continue; + } else { + return err; + } + }) |attr| : (i += 1) { + try std.testing.expectEqualStrings( + case.attrs[i].value, + try attr.value(&buffer), + ); + } + try std.testing.expectEqual(case.attrs.len, i); + try std.testing.expect(case.err == null); + } +} From 58e0e509c6dc8fae77e668ef8ee267dfdb619196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 6 Dec 2023 15:35:29 +0100 Subject: [PATCH 17/29] tar: add module comment and references --- lib/std/tar.zig | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 6d1934d91c54..a5eb7a3ef569 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,4 +1,21 @@ const std = @import("std.zig"); +/// Tar archive is single ordinary file which can contain many files (or +/// directories, symlinks, ...). It's build by series of blocks each size of 512 +/// bytes. First block of each entry is header which defines type, name, size +/// permissions and other attributes. Header is followed by series of blocks of +/// file content, if any that entry has content. Content is padded to the block +/// size, so next header always starts at block boundary. +/// +/// This simple format is extended by GNU and POSIX pax extensions to support +/// file names longer than 256 bytes and additional attributes. +/// +/// This is not comprehensive tar parser. Here we are only file types needed to +/// support Zig package manager; normal file, directory, symbolic link. And +/// subset of attributes: name, size, permissions. +/// +/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html +/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13 + const assert = std.debug.assert; pub const Options = struct { @@ -193,7 +210,7 @@ pub const Header = struct { } }; -// Breaks string on first null char. +// Breaks string on first null character. fn nullStr(str: []const u8) []const u8 { for (str, 0..) |c, i| { if (c == 0) return str[0..i]; From dbab45cfc6a952aa4ec873d6a33c487cd431bc62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 15:48:43 +0100 Subject: [PATCH 18/29] tar: replace custom buffered reader with std.io --- lib/std/tar.zig | 808 ++++++++++++++++++++++-------------------------- 1 file changed, 366 insertions(+), 442 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index a5eb7a3ef569..e15301589ab0 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,4 +1,3 @@ -const std = @import("std.zig"); /// Tar archive is single ordinary file which can contain many files (or /// directories, symlinks, ...). It's build by series of blocks each size of 512 /// bytes. First block of each entry is header which defines type, name, size @@ -15,7 +14,9 @@ const std = @import("std.zig"); /// /// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html /// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13 - +/// +//const std = @import("std.zig"); +const std = @import("std"); const assert = std.debug.assert; pub const Options = struct { @@ -224,338 +225,6 @@ inline fn blockPadding(size: usize) usize { return block_rounded - size; } -fn BufferedReader(comptime ReaderType: type) type { - return struct { - underlying_reader: ReaderType, - buffer: [BLOCK_SIZE * 8]u8 = undefined, - start: usize = 0, - end: usize = 0, - - const Self = @This(); - - // Fills buffer from underlying unbuffered reader. - fn fillBuffer(self: *Self) !void { - self.removeUsed(); - self.end += try self.underlying_reader.read(self.buffer[self.end..]); - } - - // Returns slice of size count or how much fits into buffer. - pub fn readSlice(self: *Self, count: usize) ![]const u8 { - if (count <= self.end - self.start) { - return self.buffer[self.start .. self.start + count]; - } - try self.fillBuffer(); - const buf = self.buffer[self.start..self.end]; - if (buf.len == 0) return error.UnexpectedEndOfStream; - return buf[0..@min(count, buf.len)]; - } - - // Returns tar header block, 512 bytes, or null if eof. Before reading - // advances buffer for padding of the previous block, to position reader - // at the start of new block. After reading advances for block size, to - // position reader at the start of the file content. - pub fn readHeader(self: *Self, padding: usize) !?[]const u8 { - try self.skip(padding); - const buf = self.readSlice(BLOCK_SIZE) catch return null; - if (buf.len < BLOCK_SIZE) return error.UnexpectedEndOfStream; - self.advance(BLOCK_SIZE); - return buf[0..BLOCK_SIZE]; - } - - // Returns byte at current position in buffer. - pub fn readByte(self: *@This()) u8 { - assert(self.start < self.end); - return self.buffer[self.start]; - } - - // Advances reader for count bytes, assumes that we have that number of - // bytes in buffer. - pub fn advance(self: *Self, count: usize) void { - self.start += count; - assert(self.start <= self.end); - } - - // Advances reader without assuming that count bytes are in the buffer. - pub fn skip(self: *Self, count: usize) !void { - if (self.start + count > self.end) { - try self.underlying_reader.skipBytes(self.start + count - self.end, .{}); - self.start = self.end; - } else { - self.advance(count); - } - } - - // Removes used part of the buffer. - inline fn removeUsed(self: *Self) void { - const dest_end = self.end - self.start; - if (self.start == 0 or dest_end > self.start) return; - @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]); - self.end = dest_end; - self.start = 0; - } - - // Writes count bytes to the writer. Advances reader. - pub fn write(self: *Self, writer: anytype, count: usize) !void { - var pos: usize = 0; - while (pos < count) { - const slice = try self.readSlice(count - pos); - try writer.writeAll(slice); - self.advance(slice.len); - pos += slice.len; - } - } - - // Copies dst.len bytes into dst buffer. Advances reader. - pub fn copy(self: *Self, dst: []u8) ![]const u8 { - var pos: usize = 0; - while (pos < dst.len) { - const slice = try self.readSlice(dst.len - pos); - @memcpy(dst[pos .. pos + slice.len], slice); - self.advance(slice.len); - pos += slice.len; - } - return dst; - } - - pub fn paxFileReader(self: *Self, size: usize) PaxFileReader { - return .{ - .size = size, - .reader = self, - .offset = 0, - }; - } - - const PaxFileReader = struct { - size: usize, - offset: usize = 0, - reader: *Self, - - const PaxKeyKind = enum { - path, - linkpath, - size, - }; - - const PaxAttribute = struct { - key: PaxKeyKind, - value_len: usize, - parent: *PaxFileReader, - - // Copies pax attribute value into destination buffer. - // Must be called with destination buffer of size at least value_len. - pub fn value(self: PaxAttribute, dst: []u8) ![]u8 { - assert(dst.len >= self.value_len); - const buf = dst[0..self.value_len]; - _ = try self.parent.reader.copy(buf); - self.parent.offset += buf.len; - try self.parent.checkAttributeEnding(); - return buf; - } - }; - - // Caller of the next has to call value in PaxAttribute, to advance - // reader across value. - pub fn next(self: *PaxFileReader) !?PaxAttribute { - while (true) { - const remaining_size = self.size - self.offset; - if (remaining_size == 0) return null; - - const inf = try parsePaxAttribute( - try self.reader.readSlice(remaining_size), - remaining_size, - ); - const key: PaxKeyKind = if (inf.is("path")) - .path - else if (inf.is("linkpath")) - .linkpath - else if (inf.is("size")) - .size - else { - try self.advance(inf.value_off + inf.value_len); - try self.checkAttributeEnding(); - continue; - }; - try self.advance(inf.value_off); // position reader at the start of the value - return PaxAttribute{ .key = key, .value_len = inf.value_len, .parent = self }; - } - } - - fn checkAttributeEnding(self: *PaxFileReader) !void { - if (self.reader.readByte() != '\n') return error.InvalidPaxAttribute; - try self.advance(1); - } - - fn advance(self: *PaxFileReader, len: usize) !void { - self.offset += len; - try self.reader.skip(len); - } - }; - }; -} - -fn Iterator(comptime BufferedReaderType: type) type { - return struct { - // scratch buffer for file attributes - scratch: struct { - // size: two paths (name and link_name) and files size bytes (24 in pax attribute) - buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined, - tail: usize = 0, - - name: []const u8 = undefined, - link_name: []const u8 = undefined, - size: usize = 0, - - // Allocate size of the buffer for some attribute. - fn alloc(self: *@This(), size: usize) ![]u8 { - const free_size = self.buffer.len - self.tail; - if (size > free_size) return error.TarScratchBufferOverflow; - const head = self.tail; - self.tail += size; - assert(self.tail <= self.buffer.len); - return self.buffer[head..self.tail]; - } - - // Reset buffer and all fields. - fn reset(self: *@This()) void { - self.tail = 0; - self.name = self.buffer[0..0]; - self.link_name = self.buffer[0..0]; - self.size = 0; - } - - fn append(self: *@This(), header: Header) !void { - if (self.size == 0) self.size = try header.fileSize(); - if (self.link_name.len == 0) { - const link_name = header.linkName(); - if (link_name.len > 0) { - const buf = try self.alloc(link_name.len); - @memcpy(buf, link_name); - self.link_name = buf; - } - } - if (self.name.len == 0) { - self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]); - } - } - } = .{}, - - reader: BufferedReaderType, - diagnostics: ?*Options.Diagnostics, - padding: usize = 0, // bytes of padding to the end of the block - - const Self = @This(); - - pub const File = struct { - name: []const u8, // name of file, symlink or directory - link_name: []const u8, // target name of symlink - size: usize, // size of the file in bytes - mode: u32, - file_type: Header.FileType, - - reader: *BufferedReaderType, - - // Writes file content to writer. - pub fn write(self: File, writer: anytype) !void { - try self.reader.write(writer, self.size); - } - - // Skips file content. Advances reader. - pub fn skip(self: File) !void { - try self.reader.skip(self.size); - } - }; - - // Externally, `next` iterates through the tar archive as if it is a - // series of files. Internally, the tar format often uses fake "files" - // to add meta data that describes the next file. These meta data - // "files" should not normally be visible to the outside. As such, this - // loop iterates through one or more "header files" until it finds a - // "normal file". - pub fn next(self: *Self) !?File { - self.scratch.reset(); - - while (try self.reader.readHeader(self.padding)) |block_bytes| { - const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] }; - if (try header.checkChksum() == 0) return null; // zero block found - - const file_type = header.fileType(); - const size: usize = @intCast(try header.fileSize()); - self.padding = blockPadding(size); - - switch (file_type) { - // File types to retrun upstream - .directory, .normal, .symbolic_link => { - try self.scratch.append(header); - const file = File{ - .file_type = file_type, - .name = self.scratch.name, - .link_name = self.scratch.link_name, - .size = self.scratch.size, - .reader = &self.reader, - .mode = try header.mode(), - }; - self.padding = blockPadding(file.size); - return file; - }, - // Prefix header types - .gnu_long_name => { - self.scratch.name = nullStr(try self.reader.copy(try self.scratch.alloc(size))); - }, - .gnu_long_link => { - self.scratch.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(size))); - }, - .extended_header => { - if (size == 0) continue; - // Use just attributes from last extended header. - self.scratch.reset(); - - var rdr = self.reader.paxFileReader(size); - while (try rdr.next()) |attr| { - switch (attr.key) { - .path => { - self.scratch.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); - }, - .linkpath => { - self.scratch.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len))); - }, - .size => { - self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10); - }, - } - } - }, - // Ignored header type - .global_extended_header => { - self.reader.skip(size) catch return error.TarHeadersTooBig; - }, - // All other are unsupported header types - else => { - const d = self.diagnostics orelse return error.TarUnsupportedFileType; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = file_type, - } }); - }, - } - } - return null; - } - }; -} - -pub fn iterator(underlying_reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(BufferedReader(@TypeOf(underlying_reader))) { - return .{ - .reader = bufferedReader(underlying_reader), - .diagnostics = diagnostics, - }; -} - -fn bufferedReader(underlying_reader: anytype) BufferedReader(@TypeOf(underlying_reader)) { - return BufferedReader(@TypeOf(underlying_reader)){ - .underlying_reader = underlying_reader, - }; -} - pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { switch (options.mode_mode) { .ignore => {}, @@ -569,7 +238,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi }, } - var iter = iterator(reader, options.diagnostics); + var iter = tarReader(reader, options.diagnostics); while (try iter.next()) |file| { switch (file.file_type) { @@ -662,82 +331,37 @@ test "tar stripComponents" { try expectEqualStrings("c", try stripComponents("a/b/c", 2)); } -const PaxAttributeInfo = struct { - size: usize, - key: []const u8, - value_off: usize, - value_len: usize, - - inline fn is(self: @This(), key: []const u8) bool { - return (std.mem.eql(u8, self.key, key)); - } -}; - -fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo { - const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute; - const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute; - const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10); - if (kv_size > max_size or kv_size < pos_equals + 2) { - return error.InvalidPaxAttribute; - } - const key = data[pos_space + 1 .. pos_equals]; - return .{ - .size = kv_size, - .key = try noNull(key), - .value_off = pos_equals + 1, - .value_len = kv_size - pos_equals - 2, - }; -} - fn noNull(str: []const u8) ![]const u8 { if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute; return str; } -test "tar parsePaxAttribute" { - const expectEqual = std.testing.expectEqual; - const expectEqualStrings = std.testing.expectEqualStrings; - const expectError = std.testing.expectError; - const prefix = "1011 path="; - const file_name = "0123456789" ** 100; - const header = prefix ++ file_name ++ "\n"; - const attr_info = try parsePaxAttribute(header, 1011); - try expectEqual(@as(usize, 1011), attr_info.size); - try expectEqualStrings("path", attr_info.key); - try expectEqual(prefix.len, attr_info.value_off); - try expectEqual(file_name.len, attr_info.value_len); - try expectEqual(attr_info, try parsePaxAttribute(header, 1012)); - try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010)); - try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0)); - try expectError(error.InvalidPaxAttribute, parsePaxAttribute("13 pa\x00th=abc\n", 1024)); // null in key -} +test "tar run Go test cases" { + const Case = struct { + const File = struct { + name: []const u8, + size: usize = 0, + mode: u32 = 0, + link_name: []const u8 = &[0]u8{}, + file_type: Header.FileType = .normal, + truncated: bool = false, // when there is no file body, just header, usefull for huge files + }; -const TestCase = struct { - const File = struct { - name: []const u8, - size: usize = 0, - mode: u32 = 0, - link_name: []const u8 = &[0]u8{}, - file_type: Header.FileType = .normal, - truncated: bool = false, // when there is no file body, just header, usefull for huge files + path: []const u8, // path to the tar archive file on dis + files: []const File = &[_]@This().File{}, // expected files to found in archive + chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content + err: ?anyerror = null, // parsing should fail with this error }; - path: []const u8, // path to the tar archive file on dis - files: []const File = &[_]TestCase.File{}, // expected files to found in archive - chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content - err: ?anyerror = null, // parsing should fail with this error -}; - -test "tar run Go test cases" { const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path| try std.fs.openDirAbsolute(path, .{}) else return error.SkipZigTest; - const cases = [_]TestCase{ + const cases = [_]Case{ .{ .path = "gnu.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "small.txt", .size = 5, @@ -760,7 +384,7 @@ test "tar run Go test cases" { }, .{ .path = "star.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "small.txt", .size = 5, @@ -779,7 +403,7 @@ test "tar run Go test cases" { }, .{ .path = "v7.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "small.txt", .size = 5, @@ -798,7 +422,7 @@ test "tar run Go test cases" { }, .{ .path = "pax.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", .size = 7, @@ -824,7 +448,7 @@ test "tar run Go test cases" { .{ // size is in pax attribute .path = "pax-pos-size-file.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "foo", .size = 999, @@ -839,7 +463,7 @@ test "tar run Go test cases" { .{ // has pax records which we are not interested in .path = "pax-records.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "file", }, @@ -848,7 +472,7 @@ test "tar run Go test cases" { .{ // has global records which we are ignoring .path = "pax-global-records.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "file1", }, @@ -865,7 +489,7 @@ test "tar run Go test cases" { }, .{ .path = "nil-uid.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "P1050238.JPG.log", .size = 14, @@ -880,7 +504,7 @@ test "tar run Go test cases" { .{ // has xattrs and pax records which we are ignoring .path = "xattrs.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "small.txt", .size = 5, @@ -901,7 +525,7 @@ test "tar run Go test cases" { }, .{ .path = "gnu-multi-hdrs.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "GNU2/GNU2/long-path-name", .link_name = "GNU4/GNU4/long-linkpath-name", @@ -917,7 +541,7 @@ test "tar run Go test cases" { .{ // should use values only from last pax header .path = "pax-multi-hdrs.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "bar", .link_name = "PAX4/PAX4/long-linkpath-name", @@ -927,7 +551,7 @@ test "tar run Go test cases" { }, .{ .path = "gnu-long-nul.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "0123456789", .mode = 0o644, @@ -936,7 +560,7 @@ test "tar run Go test cases" { }, .{ .path = "gnu-utf8.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", .mode = 0o644, @@ -945,7 +569,7 @@ test "tar run Go test cases" { }, .{ .path = "gnu-not-utf8.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "hi\x80\x81\x82\x83bye", .mode = 0o644, @@ -980,7 +604,7 @@ test "tar run Go test cases" { .{ // has magic with space at end instead of null .path = "invalid-go17.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", }, @@ -988,7 +612,7 @@ test "tar run Go test cases" { }, .{ .path = "ustar-file-devs.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "file", .mode = 0o644, @@ -997,7 +621,7 @@ test "tar run Go test cases" { }, .{ .path = "trailing-slash.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "123456789/" ** 30, .file_type = .directory, @@ -1007,7 +631,7 @@ test "tar run Go test cases" { .{ // Has size in gnu extended format. To represent size bigger than 8 GB. .path = "writer-big.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "tmp/16gig.txt", .size = 16 * 1024 * 1024 * 1024, @@ -1019,7 +643,7 @@ test "tar run Go test cases" { .{ // Size in gnu extended format, and name in pax attribute. .path = "writer-big-long.tar", - .files = &[_]TestCase.File{ + .files = &[_]Case.File{ .{ .name = "longname/" ** 15 ++ "16gig.txt", .size = 16 * 1024 * 1024 * 1024, @@ -1034,7 +658,8 @@ test "tar run Go test cases" { var fs_file = try test_dir.openFile(case.path, .{}); defer fs_file.close(); - var iter = iterator(fs_file.reader(), null); + //var iter = iterator(fs_file.reader(), null); + var iter = tarReader(fs_file.reader(), null); var i: usize = 0; while (iter.next() catch |err| { if (case.err) |e| { @@ -1072,6 +697,10 @@ const Md5Writer = struct { self.h.update(buf); } + pub fn writeByte(self: *Md5Writer, byte: u8) !void { + self.h.update(&[_]u8{byte}); + } + pub fn chksum(self: *Md5Writer) [32]u8 { var s = [_]u8{0} ** 16; self.h.final(&s); @@ -1079,19 +708,113 @@ const Md5Writer = struct { } }; -test "tar PaxFileReader" { - const Attribute = struct { - const PaxKeyKind = enum { - path, - linkpath, - size, +fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) { + return PaxReader(@TypeOf(reader)){ + .reader = reader, + .size = size, + }; +} + +const PaxAttrKind = enum { + path, + linkpath, + size, +}; + +fn PaxReader(comptime ReaderType: type) type { + return struct { + size: usize, + reader: ReaderType, + + const Self = @This(); + + const Attr = struct { + kind: PaxAttrKind, + len: usize, + reader: ReaderType, + + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least value_len. + pub fn value(self: Attr, dst: []u8) ![]const u8 { + assert(self.len <= dst.len); + const buf = dst[0..self.len]; + const n = try self.reader.readAll(buf); + if (n < self.len) return error.UnexpectedEndOfStream; + try checkRecordEnd(self.reader); + return noNull(buf); + } }; - key: PaxKeyKind, - value: []const u8, + + // Iterates over pax records. Returns known records. Caller has to call + // value in Record, to advance reader across value. + pub fn next(self: *Self) !?Attr { + var buf: [128]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buf); + + // An extended header consists of one or more records, each constructed as follows: + // "%d %s=%s\n", , , + while (self.size > 0) { + fbs.reset(); + // read length + try self.reader.streamUntilDelimiter(fbs.writer(), ' ', null); + const rec_len = try std.fmt.parseInt(usize, fbs.getWritten(), 10); // record len in bytes + var pos = try fbs.getPos() + 1; // bytes used for record len + separator + fbs.reset(); + // read keyword + try self.reader.streamUntilDelimiter(fbs.writer(), '=', null); + const keyword = fbs.getWritten(); + pos += try fbs.getPos() + 1; // keyword bytes + separator + try checkKeyword(keyword); + // get value_len + if (rec_len < pos + 1) return error.InvalidPaxAttribute; + const value_len = rec_len - pos - 1; // pos = start of value, -1 => without \n record terminator + + self.size -= rec_len; + const kind: PaxAttrKind = if (eql(keyword, "path")) + .path + else if (eql(keyword, "linkpath")) + .linkpath + else if (eql(keyword, "size")) + .size + else { + try self.reader.skipBytes(value_len, .{}); + try checkRecordEnd(self.reader); + continue; + }; + return Attr{ + .kind = kind, + .len = value_len, + .reader = self.reader, + }; + } + + return null; + } + + inline fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); + } + + fn checkKeyword(keyword: []const u8) !void { + if (std.mem.indexOfScalar(u8, keyword, 0)) |_| return error.InvalidPaxAttribute; + } + + // Checks that each record ends with new line. + fn checkRecordEnd(reader: ReaderType) !void { + if (try reader.readByte() != '\n') return error.InvalidPaxAttribute; + } + }; +} + +test "tar PaxReader" { + const Attr = struct { + kind: PaxAttrKind, + value: []const u8 = undefined, + err: ?anyerror = null, }; const cases = [_]struct { data: []const u8, - attrs: []const Attribute, + attrs: []const Attr, err: ?anyerror = null, }{ .{ // valid but unknown keys @@ -1103,7 +826,7 @@ test "tar PaxFileReader" { \\9 a=name \\ , - .attrs = &[_]Attribute{}, + .attrs = &[_]Attr{}, }, .{ // mix of known and unknown keys .data = @@ -1115,10 +838,10 @@ test "tar PaxFileReader" { \\13 key2=val2 \\ , - .attrs = &[_]Attribute{ - .{ .key = .path, .value = "name" }, - .{ .key = .linkpath, .value = "link" }, - .{ .key = .size, .value = "123" }, + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + .{ .kind = .linkpath, .value = "link" }, + .{ .kind = .size, .value = "123" }, }, }, .{ // too short size of the second key-value pair @@ -1127,8 +850,8 @@ test "tar PaxFileReader" { \\10 linkpath=value \\ , - .attrs = &[_]Attribute{ - .{ .key = .path, .value = "name" }, + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, }, .err = error.InvalidPaxAttribute, }, @@ -1136,36 +859,237 @@ test "tar PaxFileReader" { .data = \\13 path=name \\19 linkpath=value + \\6 k=1 \\ , - .attrs = &[_]Attribute{ - .{ .key = .path, .value = "name" }, + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + .{ .kind = .linkpath, .err = error.InvalidPaxAttribute }, + }, + }, + .{ // null in keyword is not valid + .data = "13 path=name\n" ++ "7 k\x00b=1\n", + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, }, .err = error.InvalidPaxAttribute, }, + .{ // null in value is not valid + .data = "23 path=name\x00with null\n", + .attrs = &[_]Attr{ + .{ .kind = .path, .err = error.InvalidPaxAttribute }, + }, + }, + .{ // 1000 characters path + .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n", + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "0123456789" ** 100 }, + }, + }, }; var buffer: [1024]u8 = undefined; - for (cases) |case| { + outer: for (cases) |case| { var stream = std.io.fixedBufferStream(case.data); - var brdr = bufferedReader(stream.reader()); + var rdr = paxReader(stream.reader(), case.data.len); - var rdr = brdr.paxFileReader(case.data.len); var i: usize = 0; while (rdr.next() catch |err| { if (case.err) |e| { try std.testing.expectEqual(e, err); continue; - } else { - return err; } + return err; }) |attr| : (i += 1) { - try std.testing.expectEqualStrings( - case.attrs[i].value, - try attr.value(&buffer), - ); + const exp = case.attrs[i]; + try std.testing.expectEqual(exp.kind, attr.kind); + const value = attr.value(&buffer) catch |err| { + if (exp.err) |e| { + try std.testing.expectEqual(e, err); + break :outer; + } + return err; + }; + try std.testing.expectEqualStrings(exp.value, value); } try std.testing.expectEqual(case.attrs.len, i); try std.testing.expect(case.err == null); } } + +pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) { + return .{ + .reader = reader, + .diagnostics = diagnostics, + }; +} + +fn TarReader(comptime ReaderType: type) type { + return struct { + // scratch buffer for file attributes + scratch: struct { + // size: two paths (name and link_name) and files size bytes (24 in pax attribute) + buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined, + tail: usize = 0, + + name: []const u8 = undefined, + link_name: []const u8 = undefined, + size: usize = 0, + + // Allocate size of the buffer for some attribute. + fn alloc(self: *@This(), size: usize) ![]u8 { + const free_size = self.buffer.len - self.tail; + if (size > free_size) return error.TarScratchBufferOverflow; + const head = self.tail; + self.tail += size; + assert(self.tail <= self.buffer.len); + return self.buffer[head..self.tail]; + } + + // Reset buffer and all fields. + fn reset(self: *@This()) void { + self.tail = 0; + self.name = self.buffer[0..0]; + self.link_name = self.buffer[0..0]; + self.size = 0; + } + + fn append(self: *@This(), header: Header) !void { + if (self.size == 0) self.size = try header.fileSize(); + if (self.link_name.len == 0) { + const link_name = header.linkName(); + if (link_name.len > 0) { + const buf = try self.alloc(link_name.len); + @memcpy(buf, link_name); + self.link_name = buf; + } + } + if (self.name.len == 0) { + self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]); + } + } + } = .{}, + + reader: ReaderType, + diagnostics: ?*Options.Diagnostics, + padding: usize = 0, // bytes of padding to the end of the block + header_buffer: [BLOCK_SIZE]u8 = undefined, + + const Self = @This(); + + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: usize, // size of the file in bytes + mode: u32, + file_type: Header.FileType, + + reader: *ReaderType, + + // Writes file content to writer. + pub fn write(self: File, writer: anytype) !void { + var n = self.size; + while (n > 0) : (n -= 1) { + const byte: u8 = try self.reader.readByte(); + try writer.writeByte(byte); + } + } + + // Skips file content. Advances reader. + pub fn skip(self: File) !void { + try self.reader.skipBytes(self.size, .{}); + } + }; + + fn readHeader(self: *Self) !?Header { + if (self.padding > 0) { + try self.reader.skipBytes(self.padding, .{}); + } + const n = try self.reader.readAll(&self.header_buffer); + if (n == 0) return null; + if (n < BLOCK_SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..BLOCK_SIZE] }; + if (try header.checkChksum() == 0) return null; + return header; + } + + fn readString(self: *Self, size: usize) ![]const u8 { + const buf = try self.scratch.alloc(size); + try self.reader.readNoEof(buf); + return nullStr(buf); + } + + // Externally, `next` iterates through the tar archive as if it is a + // series of files. Internally, the tar format often uses fake "files" + // to add meta data that describes the next file. These meta data + // "files" should not normally be visible to the outside. As such, this + // loop iterates through one or more "header files" until it finds a + // "normal file". + pub fn next(self: *Self) !?File { + self.scratch.reset(); + + while (try self.readHeader()) |header| { + const file_type = header.fileType(); + const size: usize = @intCast(try header.fileSize()); + self.padding = blockPadding(size); + + switch (file_type) { + // File types to retrun upstream + .directory, .normal, .symbolic_link => { + try self.scratch.append(header); + const file = File{ + .file_type = file_type, + .name = self.scratch.name, + .link_name = self.scratch.link_name, + .size = self.scratch.size, + .reader = &self.reader, + .mode = try header.mode(), + }; + self.padding = blockPadding(file.size); + return file; + }, + // Prefix header types + .gnu_long_name => { + self.scratch.name = try self.readString(size); + }, + .gnu_long_link => { + self.scratch.link_name = try self.readString(size); + }, + .extended_header => { + if (size == 0) continue; + // Use just attributes from last extended header. + self.scratch.reset(); + + var rdr = paxReader(self.reader, size); + while (try rdr.next()) |attr| { + switch (attr.kind) { + .path => { + self.scratch.name = try attr.value(try self.scratch.alloc(attr.len)); + }, + .linkpath => { + self.scratch.link_name = try attr.value(try self.scratch.alloc(attr.len)); + }, + .size => { + self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.len)), 10); + }, + } + } + }, + // Ignored header type + .global_extended_header => { + self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; + }, + // All other are unsupported header types + else => { + const d = self.diagnostics orelse return error.TarUnsupportedFileType; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, header.name()), + .file_type = file_type, + } }); + }, + } + } + return null; + } + }; +} From 9f7dd323082941d66c18af3da88a432835c5e3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 17:47:19 +0100 Subject: [PATCH 19/29] tar: refactor pax attribute Make it little readable. --- lib/std/tar.zig | 126 ++++++++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index e15301589ab0..2065240858c6 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -331,11 +331,6 @@ test "tar stripComponents" { try expectEqualStrings("c", try stripComponents("a/b/c", 2)); } -fn noNull(str: []const u8) ![]const u8 { - if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute; - return str; -} - test "tar run Go test cases" { const Case = struct { const File = struct { @@ -443,7 +438,7 @@ test "tar run Go test cases" { .{ // pax attribute don't end with \n .path = "pax-bad-hdr-file.tar", - .err = error.InvalidPaxAttribute, + .err = error.PaxInvalidAttributeEnd, }, .{ // size is in pax attribute @@ -579,11 +574,11 @@ test "tar run Go test cases" { .{ // null in pax key .path = "pax-nul-xattrs.tar", - .err = error.InvalidPaxAttribute, + .err = error.PaxNullInKeyword, }, .{ .path = "pax-nul-path.tar", - .err = error.InvalidPaxAttribute, + .err = error.PaxNullInValue, }, .{ .path = "neg-size.tar", @@ -715,7 +710,7 @@ fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) { }; } -const PaxAttrKind = enum { +const PaxAttributeKind = enum { path, linkpath, size, @@ -723,54 +718,50 @@ const PaxAttrKind = enum { fn PaxReader(comptime ReaderType: type) type { return struct { - size: usize, + size: usize, // cumulative size of all pax attributes reader: ReaderType, + // scratch buffer used for reading attribute length and keyword + scratch: [128]u8 = undefined, const Self = @This(); - const Attr = struct { - kind: PaxAttrKind, - len: usize, - reader: ReaderType, + const Attribute = struct { + kind: PaxAttributeKind, + len: usize, // length of the attribute value + reader: ReaderType, // reader positioned at value start // Copies pax attribute value into destination buffer. - // Must be called with destination buffer of size at least value_len. - pub fn value(self: Attr, dst: []u8) ![]const u8 { + // Must be called with destination buffer of size at least Attribute.len. + pub fn value(self: Attribute, dst: []u8) ![]const u8 { assert(self.len <= dst.len); const buf = dst[0..self.len]; const n = try self.reader.readAll(buf); if (n < self.len) return error.UnexpectedEndOfStream; - try checkRecordEnd(self.reader); - return noNull(buf); + try validateAttributeEnding(self.reader); + if (hasNull(buf)) return error.PaxNullInValue; + return buf; } }; - // Iterates over pax records. Returns known records. Caller has to call - // value in Record, to advance reader across value. - pub fn next(self: *Self) !?Attr { - var buf: [128]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buf); - - // An extended header consists of one or more records, each constructed as follows: + // Iterates over pax attributes. Returns known only known attributes. + // Caller has to call value in Attribute, to advance reader across value. + pub fn next(self: *Self) !?Attribute { + // Pax extended header consists of one or more attributes, each constructed as follows: // "%d %s=%s\n", , , while (self.size > 0) { - fbs.reset(); - // read length - try self.reader.streamUntilDelimiter(fbs.writer(), ' ', null); - const rec_len = try std.fmt.parseInt(usize, fbs.getWritten(), 10); // record len in bytes - var pos = try fbs.getPos() + 1; // bytes used for record len + separator - fbs.reset(); - // read keyword - try self.reader.streamUntilDelimiter(fbs.writer(), '=', null); - const keyword = fbs.getWritten(); - pos += try fbs.getPos() + 1; // keyword bytes + separator - try checkKeyword(keyword); - // get value_len - if (rec_len < pos + 1) return error.InvalidPaxAttribute; - const value_len = rec_len - pos - 1; // pos = start of value, -1 => without \n record terminator - - self.size -= rec_len; - const kind: PaxAttrKind = if (eql(keyword, "path")) + const length_buf = try self.readUntil(' '); + const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes + + const keyword = try self.readUntil('='); + if (hasNull(keyword)) return error.PaxNullInKeyword; + + // calculate value_len + const value_start = length_buf.len + keyword.len + 2; // 2 separators + if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; + const value_len = length - value_start - 1; // \n separator at end + self.size -= length; + + const kind: PaxAttributeKind = if (eql(keyword, "path")) .path else if (eql(keyword, "linkpath")) .linkpath @@ -778,10 +769,10 @@ fn PaxReader(comptime ReaderType: type) type { .size else { try self.reader.skipBytes(value_len, .{}); - try checkRecordEnd(self.reader); + try validateAttributeEnding(self.reader); continue; }; - return Attr{ + return Attribute{ .kind = kind, .len = value_len, .reader = self.reader, @@ -791,24 +782,30 @@ fn PaxReader(comptime ReaderType: type) type { return null; } + inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 { + var fbs = std.io.fixedBufferStream(&self.scratch); + try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); + return fbs.getWritten(); + } + inline fn eql(a: []const u8, b: []const u8) bool { return std.mem.eql(u8, a, b); } - fn checkKeyword(keyword: []const u8) !void { - if (std.mem.indexOfScalar(u8, keyword, 0)) |_| return error.InvalidPaxAttribute; + inline fn hasNull(str: []const u8) bool { + return (std.mem.indexOfScalar(u8, str, 0)) != null; } // Checks that each record ends with new line. - fn checkRecordEnd(reader: ReaderType) !void { - if (try reader.readByte() != '\n') return error.InvalidPaxAttribute; + inline fn validateAttributeEnding(reader: ReaderType) !void { + if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; } }; } test "tar PaxReader" { const Attr = struct { - kind: PaxAttrKind, + kind: PaxAttributeKind, value: []const u8 = undefined, err: ?anyerror = null, }; @@ -853,8 +850,21 @@ test "tar PaxReader" { .attrs = &[_]Attr{ .{ .kind = .path, .value = "name" }, }, - .err = error.InvalidPaxAttribute, + .err = error.UnexpectedEndOfStream, + }, + .{ // too long size of the second key-value pair + .data = + \\13 path=name + \\6 k=1 + \\19 linkpath=value + \\ + , + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + }, + .err = error.UnexpectedEndOfStream, }, + .{ // too long size of the second key-value pair .data = \\13 path=name @@ -864,7 +874,7 @@ test "tar PaxReader" { , .attrs = &[_]Attr{ .{ .kind = .path, .value = "name" }, - .{ .kind = .linkpath, .err = error.InvalidPaxAttribute }, + .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd }, }, }, .{ // null in keyword is not valid @@ -872,12 +882,12 @@ test "tar PaxReader" { .attrs = &[_]Attr{ .{ .kind = .path, .value = "name" }, }, - .err = error.InvalidPaxAttribute, + .err = error.PaxNullInKeyword, }, .{ // null in value is not valid .data = "23 path=name\x00with null\n", .attrs = &[_]Attr{ - .{ .kind = .path, .err = error.InvalidPaxAttribute }, + .{ .kind = .path, .err = error.PaxNullInValue }, }, }, .{ // 1000 characters path @@ -1019,6 +1029,16 @@ fn TarReader(comptime ReaderType: type) type { return nullStr(buf); } + fn reset(self: *Self) void { + self.file = File{ + .name = self.file_name_buffer[0..0], + .link_name = self.link_name_buffer[0..0], + .size = 0, + .file_type = 0xff, + .mode = 0, + }; + } + // Externally, `next` iterates through the tar archive as if it is a // series of files. Internally, the tar format often uses fake "files" // to add meta data that describes the next file. These meta data From 4a6d67ab1a26e0c89d55453877c1fd8b03ab1976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 20:18:59 +0100 Subject: [PATCH 20/29] tar: remove stratch from tar reader Use explicit buffers for name, link_name instead. It is cleaner that way. --- lib/std/tar.zig | 166 +++++++++++++++++++++--------------------------- 1 file changed, 74 insertions(+), 92 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 2065240858c6..cc7108e62c73 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -83,11 +83,12 @@ pub const Options = struct { }; }; -const BLOCK_SIZE = 512; -const MAX_HEADER_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) - pub const Header = struct { - bytes: *const [BLOCK_SIZE]u8, + const SIZE = 512; + const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) + const LINK_NAME_SIZE = 100; + + bytes: *const [SIZE]u8, pub const FileType = enum(u8) { normal_alias = 0, @@ -110,7 +111,7 @@ pub const Header = struct { /// Includes prefix concatenated, if any. /// TODO: check against "../" and other nefarious things - pub fn fullName(header: Header, buffer: *[MAX_HEADER_NAME_SIZE]u8) ![]const u8 { + pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 { const n = name(header); const p = prefix(header); if (!is_ustar(header) or p.len == 0) { @@ -123,6 +124,16 @@ pub const Header = struct { return buffer[0 .. p.len + 1 + n.len]; } + pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 { + const link_name = header.str(157, 100); + if (link_name.len == 0) { + return buffer[0..0]; + } + const buf = buffer[0..link_name.len]; + @memcpy(buf, link_name); + return buf; + } + pub fn name(header: Header) []const u8 { return header.str(0, 100); } @@ -139,10 +150,6 @@ pub const Header = struct { return header.octal(148, 8); } - pub fn linkName(header: Header) []const u8 { - return header.str(157, 100); - } - pub fn is_ustar(header: Header) bool { const magic = header.bytes[257..][0..6]; return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' '); @@ -219,12 +226,6 @@ fn nullStr(str: []const u8) []const u8 { return str; } -// Number of padding bytes in the last file block. -inline fn blockPadding(size: usize) usize { - const block_rounded = std.mem.alignForward(usize, size, BLOCK_SIZE); // size rounded to te block boundary - return block_rounded - size; -} - pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { switch (options.mode_mode) { .ignore => {}, @@ -936,56 +937,18 @@ pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader( fn TarReader(comptime ReaderType: type) type { return struct { - // scratch buffer for file attributes - scratch: struct { - // size: two paths (name and link_name) and files size bytes (24 in pax attribute) - buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined, - tail: usize = 0, - - name: []const u8 = undefined, - link_name: []const u8 = undefined, - size: usize = 0, - - // Allocate size of the buffer for some attribute. - fn alloc(self: *@This(), size: usize) ![]u8 { - const free_size = self.buffer.len - self.tail; - if (size > free_size) return error.TarScratchBufferOverflow; - const head = self.tail; - self.tail += size; - assert(self.tail <= self.buffer.len); - return self.buffer[head..self.tail]; - } - - // Reset buffer and all fields. - fn reset(self: *@This()) void { - self.tail = 0; - self.name = self.buffer[0..0]; - self.link_name = self.buffer[0..0]; - self.size = 0; - } - - fn append(self: *@This(), header: Header) !void { - if (self.size == 0) self.size = try header.fileSize(); - if (self.link_name.len == 0) { - const link_name = header.linkName(); - if (link_name.len > 0) { - const buf = try self.alloc(link_name.len); - @memcpy(buf, link_name); - self.link_name = buf; - } - } - if (self.name.len == 0) { - self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]); - } - } - } = .{}, - reader: ReaderType, diagnostics: ?*Options.Diagnostics, - padding: usize = 0, // bytes of padding to the end of the block - header_buffer: [BLOCK_SIZE]u8 = undefined, - const Self = @This(); + // buffers for heeader and file attributes + header_buffer: [Header.SIZE]u8 = undefined, + file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + + // bytes of padding to the end of the block + padding: usize = 0, + // current tar file + file: File = undefined, pub const File = struct { name: []const u8, // name of file, symlink or directory @@ -994,14 +957,18 @@ fn TarReader(comptime ReaderType: type) type { mode: u32, file_type: Header.FileType, - reader: *ReaderType, + reader: ReaderType, // Writes file content to writer. pub fn write(self: File, writer: anytype) !void { - var n = self.size; - while (n > 0) : (n -= 1) { - const byte: u8 = try self.reader.readByte(); - try writer.writeByte(byte); + var buffer: [4096]u8 = undefined; + + var n: usize = 0; + while (n < self.size) { + const buf = buffer[0..@min(buffer.len, self.size - n)]; + try self.reader.readNoEof(buf); + try writer.writeAll(buf); + n += buf.len; } } @@ -1011,34 +978,44 @@ fn TarReader(comptime ReaderType: type) type { } }; + const Self = @This(); + fn readHeader(self: *Self) !?Header { if (self.padding > 0) { try self.reader.skipBytes(self.padding, .{}); } const n = try self.reader.readAll(&self.header_buffer); if (n == 0) return null; - if (n < BLOCK_SIZE) return error.UnexpectedEndOfStream; - const header = Header{ .bytes = self.header_buffer[0..BLOCK_SIZE] }; + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; if (try header.checkChksum() == 0) return null; return header; } - fn readString(self: *Self, size: usize) ![]const u8 { - const buf = try self.scratch.alloc(size); + inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { + assert(buffer.len >= size); + const buf = buffer[0..size]; try self.reader.readNoEof(buf); return nullStr(buf); } - fn reset(self: *Self) void { + inline fn initFile(self: *Self) void { self.file = File{ .name = self.file_name_buffer[0..0], .link_name = self.link_name_buffer[0..0], .size = 0, - .file_type = 0xff, + .file_type = .normal, .mode = 0, + .reader = self.reader, }; } + // Number of padding bytes in the last file block. + inline fn blockPadding(size: usize) usize { + const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary + return block_rounded - size; + } + // Externally, `next` iterates through the tar archive as if it is a // series of files. Internally, the tar format often uses fake "files" // to add meta data that describes the next file. These meta data @@ -1046,7 +1023,7 @@ fn TarReader(comptime ReaderType: type) type { // loop iterates through one or more "header files" until it finds a // "normal file". pub fn next(self: *Self) !?File { - self.scratch.reset(); + self.initFile(); while (try self.readHeader()) |header| { const file_type = header.fileType(); @@ -1056,41 +1033,46 @@ fn TarReader(comptime ReaderType: type) type { switch (file_type) { // File types to retrun upstream .directory, .normal, .symbolic_link => { - try self.scratch.append(header); - const file = File{ - .file_type = file_type, - .name = self.scratch.name, - .link_name = self.scratch.link_name, - .size = self.scratch.size, - .reader = &self.reader, - .mode = try header.mode(), - }; - self.padding = blockPadding(file.size); - return file; + self.file.file_type = file_type; + self.file.mode = try header.mode(); + + // set file attributes if not already set by prefix/extended headers + if (self.file.size == 0) { + self.file.size = size; + } + if (self.file.link_name.len == 0) { + self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]); + } + if (self.file.name.len == 0) { + self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]); + } + + self.padding = blockPadding(self.file.size); + return self.file; }, // Prefix header types .gnu_long_name => { - self.scratch.name = try self.readString(size); + self.file.name = try self.readString(size, &self.file_name_buffer); }, .gnu_long_link => { - self.scratch.link_name = try self.readString(size); + self.file.link_name = try self.readString(size, &self.link_name_buffer); }, .extended_header => { - if (size == 0) continue; // Use just attributes from last extended header. - self.scratch.reset(); + self.initFile(); var rdr = paxReader(self.reader, size); while (try rdr.next()) |attr| { switch (attr.kind) { .path => { - self.scratch.name = try attr.value(try self.scratch.alloc(attr.len)); + self.file.name = try attr.value(&self.file_name_buffer); }, .linkpath => { - self.scratch.link_name = try attr.value(try self.scratch.alloc(attr.len)); + self.file.link_name = try attr.value(&self.link_name_buffer); }, .size => { - self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.len)), 10); + var buf: [64]u8 = undefined; + self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10); }, } } From c76abe0e183ef513b9ee651b052c7f99c33c139c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 20:46:27 +0100 Subject: [PATCH 21/29] tar: use file word in less places --- lib/std/tar.zig | 52 ++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index cc7108e62c73..c83ae70e3b38 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -58,7 +58,7 @@ pub const Options = struct { }, unsupported_file_type: struct { file_name: []const u8, - file_type: Header.FileType, + file_type: Header.Kind, }, }; @@ -90,7 +90,7 @@ pub const Header = struct { bytes: *const [SIZE]u8, - pub const FileType = enum(u8) { + pub const Kind = enum(u8) { normal_alias = 0, normal = '0', hard_link = '1', @@ -142,7 +142,7 @@ pub const Header = struct { return @intCast(try header.numeric(100, 8)); } - pub fn fileSize(header: Header) !u64 { + pub fn size(header: Header) !u64 { return header.numeric(124, 12); } @@ -159,8 +159,8 @@ pub const Header = struct { return header.str(345, 155); } - pub fn fileType(header: Header) FileType { - const result: FileType = @enumFromInt(header.bytes[156]); + pub fn kind(header: Header) Kind { + const result: Kind = @enumFromInt(header.bytes[156]); if (result == .normal_alias) return .normal; return result; } @@ -242,7 +242,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi var iter = tarReader(reader, options.diagnostics); while (try iter.next()) |file| { - switch (file.file_type) { + switch (file.kind) { .directory => { const file_name = try stripComponents(file.name, options.strip_components); if (file_name.len != 0 and !options.exclude_empty_directories) { @@ -339,7 +339,7 @@ test "tar run Go test cases" { size: usize = 0, mode: u32 = 0, link_name: []const u8 = &[0]u8{}, - file_type: Header.FileType = .normal, + kind: Header.Kind = .normal, truncated: bool = false, // when there is no file body, just header, usefull for huge files }; @@ -376,7 +376,7 @@ test "tar run Go test cases" { }, .{ .path = "sparse-formats.tar", - .err = error.TarUnsupportedFileType, + .err = error.TarUnsupportedHeader, }, .{ .path = "star.tar", @@ -427,7 +427,7 @@ test "tar run Go test cases" { .{ .name = "a/b", .size = 0, - .file_type = .symbolic_link, + .kind = .symbolic_link, .mode = 0o777, .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", }, @@ -448,7 +448,7 @@ test "tar run Go test cases" { .{ .name = "foo", .size = 999, - .file_type = .normal, + .kind = .normal, .mode = 0o640, }, }, @@ -489,7 +489,7 @@ test "tar run Go test cases" { .{ .name = "P1050238.JPG.log", .size = 14, - .file_type = .normal, + .kind = .normal, .mode = 0o664, }, }, @@ -504,13 +504,13 @@ test "tar run Go test cases" { .{ .name = "small.txt", .size = 5, - .file_type = .normal, + .kind = .normal, .mode = 0o644, }, .{ .name = "small2.txt", .size = 11, - .file_type = .normal, + .kind = .normal, .mode = 0o644, }, }, @@ -525,14 +525,14 @@ test "tar run Go test cases" { .{ .name = "GNU2/GNU2/long-path-name", .link_name = "GNU4/GNU4/long-linkpath-name", - .file_type = .symbolic_link, + .kind = .symbolic_link, }, }, }, .{ // has gnu type D (directory) and S (sparse) blocks .path = "gnu-incremental.tar", - .err = error.TarUnsupportedFileType, + .err = error.TarUnsupportedHeader, }, .{ // should use values only from last pax header @@ -541,7 +541,7 @@ test "tar run Go test cases" { .{ .name = "bar", .link_name = "PAX4/PAX4/long-linkpath-name", - .file_type = .symbolic_link, + .kind = .symbolic_link, }, }, }, @@ -620,7 +620,7 @@ test "tar run Go test cases" { .files = &[_]Case.File{ .{ .name = "123456789/" ** 30, - .file_type = .directory, + .kind = .directory, }, }, }, @@ -668,7 +668,7 @@ test "tar run Go test cases" { const expected = case.files[i]; try std.testing.expectEqualStrings(expected.name, actual.name); try std.testing.expectEqual(expected.size, actual.size); - try std.testing.expectEqual(expected.file_type, actual.file_type); + try std.testing.expectEqual(expected.kind, actual.kind); try std.testing.expectEqual(expected.mode, actual.mode); try std.testing.expectEqualStrings(expected.link_name, actual.link_name); @@ -955,7 +955,7 @@ fn TarReader(comptime ReaderType: type) type { link_name: []const u8, // target name of symlink size: usize, // size of the file in bytes mode: u32, - file_type: Header.FileType, + kind: Header.Kind, reader: ReaderType, @@ -1004,7 +1004,7 @@ fn TarReader(comptime ReaderType: type) type { .name = self.file_name_buffer[0..0], .link_name = self.link_name_buffer[0..0], .size = 0, - .file_type = .normal, + .kind = .normal, .mode = 0, .reader = self.reader, }; @@ -1026,14 +1026,14 @@ fn TarReader(comptime ReaderType: type) type { self.initFile(); while (try self.readHeader()) |header| { - const file_type = header.fileType(); - const size: usize = @intCast(try header.fileSize()); + const kind = header.kind(); + const size: usize = @intCast(try header.size()); self.padding = blockPadding(size); - switch (file_type) { + switch (kind) { // File types to retrun upstream .directory, .normal, .symbolic_link => { - self.file.file_type = file_type; + self.file.kind = kind; self.file.mode = try header.mode(); // set file attributes if not already set by prefix/extended headers @@ -1083,10 +1083,10 @@ fn TarReader(comptime ReaderType: type) type { }, // All other are unsupported header types else => { - const d = self.diagnostics orelse return error.TarUnsupportedFileType; + const d = self.diagnostics orelse return error.TarUnsupportedHeader; try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = file_type, + .file_type = kind, } }); }, } From c07527abac7a5f56bb9111b42fcbcbf468b4917f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 22:00:49 +0100 Subject: [PATCH 22/29] tar: reorganize file, functions before tests --- lib/std/tar.zig | 651 ++++++++++++++++++++++++------------------------ 1 file changed, 326 insertions(+), 325 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index c83ae70e3b38..21d08c527212 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -15,8 +15,7 @@ /// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html /// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13 /// -//const std = @import("std.zig"); -const std = @import("std"); +const std = @import("std.zig"); const assert = std.debug.assert; pub const Options = struct { @@ -226,6 +225,276 @@ fn nullStr(str: []const u8) []const u8 { return str; } +pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) { + return .{ + .reader = reader, + .diagnostics = diagnostics, + }; +} + +fn TarReader(comptime ReaderType: type) type { + return struct { + reader: ReaderType, + diagnostics: ?*Options.Diagnostics, + + // buffers for heeader and file attributes + header_buffer: [Header.SIZE]u8 = undefined, + file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + + // bytes of padding to the end of the block + padding: usize = 0, + // current tar file + file: File = undefined, + + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: usize, // size of the file in bytes + mode: u32, + kind: Header.Kind, + + reader: ReaderType, + + // Writes file content to writer. + pub fn write(self: File, writer: anytype) !void { + var buffer: [4096]u8 = undefined; + + var n: usize = 0; + while (n < self.size) { + const buf = buffer[0..@min(buffer.len, self.size - n)]; + try self.reader.readNoEof(buf); + try writer.writeAll(buf); + n += buf.len; + } + } + + // Skips file content. Advances reader. + pub fn skip(self: File) !void { + try self.reader.skipBytes(self.size, .{}); + } + }; + + const Self = @This(); + + fn readHeader(self: *Self) !?Header { + if (self.padding > 0) { + try self.reader.skipBytes(self.padding, .{}); + } + const n = try self.reader.readAll(&self.header_buffer); + if (n == 0) return null; + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; + if (try header.checkChksum() == 0) return null; + return header; + } + + inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { + assert(buffer.len >= size); + const buf = buffer[0..size]; + try self.reader.readNoEof(buf); + return nullStr(buf); + } + + inline fn initFile(self: *Self) void { + self.file = File{ + .name = self.file_name_buffer[0..0], + .link_name = self.link_name_buffer[0..0], + .size = 0, + .kind = .normal, + .mode = 0, + .reader = self.reader, + }; + } + + // Number of padding bytes in the last file block. + inline fn blockPadding(size: usize) usize { + const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary + return block_rounded - size; + } + + /// Iterates through the tar archive as if it is a series of files. + /// Internally, the tar format often uses entries (header with optional + /// content) to add meta data that describes the next file. These + /// entries should not normally be visible to the outside. As such, this + /// loop iterates through one or more entries until it collects a all + /// file attributes. + pub fn next(self: *Self) !?File { + self.initFile(); + + while (try self.readHeader()) |header| { + const kind = header.kind(); + const size: usize = @intCast(try header.size()); + self.padding = blockPadding(size); + + switch (kind) { + // File types to retrun upstream + .directory, .normal, .symbolic_link => { + self.file.kind = kind; + self.file.mode = try header.mode(); + + // set file attributes if not already set by prefix/extended headers + if (self.file.size == 0) { + self.file.size = size; + } + if (self.file.link_name.len == 0) { + self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]); + } + if (self.file.name.len == 0) { + self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]); + } + + self.padding = blockPadding(self.file.size); + return self.file; + }, + // Prefix header types + .gnu_long_name => { + self.file.name = try self.readString(size, &self.file_name_buffer); + }, + .gnu_long_link => { + self.file.link_name = try self.readString(size, &self.link_name_buffer); + }, + .extended_header => { + // Use just attributes from last extended header. + self.initFile(); + + var rdr = paxReader(self.reader, size); + while (try rdr.next()) |attr| { + switch (attr.kind) { + .path => { + self.file.name = try attr.value(&self.file_name_buffer); + }, + .linkpath => { + self.file.link_name = try attr.value(&self.link_name_buffer); + }, + .size => { + var buf: [64]u8 = undefined; + self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10); + }, + } + } + }, + // Ignored header type + .global_extended_header => { + self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; + }, + // All other are unsupported header types + else => { + const d = self.diagnostics orelse return error.TarUnsupportedHeader; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, header.name()), + .file_type = kind, + } }); + }, + } + } + return null; + } + }; +} + +// Pax attributes reader. +// Size is length of pax extended header in reader. +fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) { + return PaxReader(@TypeOf(reader)){ + .reader = reader, + .size = size, + }; +} + +const PaxAttributeKind = enum { + path, + linkpath, + size, +}; + +fn PaxReader(comptime ReaderType: type) type { + return struct { + size: usize, // cumulative size of all pax attributes + reader: ReaderType, + // scratch buffer used for reading attribute length and keyword + scratch: [128]u8 = undefined, + + const Self = @This(); + + const Attribute = struct { + kind: PaxAttributeKind, + len: usize, // length of the attribute value + reader: ReaderType, // reader positioned at value start + + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least Attribute.len. + pub fn value(self: Attribute, dst: []u8) ![]const u8 { + assert(self.len <= dst.len); + const buf = dst[0..self.len]; + const n = try self.reader.readAll(buf); + if (n < self.len) return error.UnexpectedEndOfStream; + try validateAttributeEnding(self.reader); + if (hasNull(buf)) return error.PaxNullInValue; + return buf; + } + }; + + // Iterates over pax attributes. Returns known only known attributes. + // Caller has to call value in Attribute, to advance reader across value. + pub fn next(self: *Self) !?Attribute { + // Pax extended header consists of one or more attributes, each constructed as follows: + // "%d %s=%s\n", , , + while (self.size > 0) { + const length_buf = try self.readUntil(' '); + const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes + + const keyword = try self.readUntil('='); + if (hasNull(keyword)) return error.PaxNullInKeyword; + + // calculate value_len + const value_start = length_buf.len + keyword.len + 2; // 2 separators + if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; + const value_len = length - value_start - 1; // \n separator at end + self.size -= length; + + const kind: PaxAttributeKind = if (eql(keyword, "path")) + .path + else if (eql(keyword, "linkpath")) + .linkpath + else if (eql(keyword, "size")) + .size + else { + try self.reader.skipBytes(value_len, .{}); + try validateAttributeEnding(self.reader); + continue; + }; + return Attribute{ + .kind = kind, + .len = value_len, + .reader = self.reader, + }; + } + + return null; + } + + inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 { + var fbs = std.io.fixedBufferStream(&self.scratch); + try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); + return fbs.getWritten(); + } + + inline fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); + } + + inline fn hasNull(str: []const u8) bool { + return (std.mem.indexOfScalar(u8, str, 0)) != null; + } + + // Checks that each record ends with new line. + inline fn validateAttributeEnding(reader: ReaderType) !void { + if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; + } + }; +} + pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { switch (options.mode_mode) { .ignore => {}, @@ -639,170 +908,70 @@ test "tar run Go test cases" { .{ // Size in gnu extended format, and name in pax attribute. .path = "writer-big-long.tar", - .files = &[_]Case.File{ - .{ - .name = "longname/" ** 15 ++ "16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .mode = 0o644, - .truncated = true, - }, - }, - }, - }; - - for (cases) |case| { - var fs_file = try test_dir.openFile(case.path, .{}); - defer fs_file.close(); - - //var iter = iterator(fs_file.reader(), null); - var iter = tarReader(fs_file.reader(), null); - var i: usize = 0; - while (iter.next() catch |err| { - if (case.err) |e| { - try std.testing.expectEqual(e, err); - continue; - } else { - return err; - } - }) |actual| : (i += 1) { - const expected = case.files[i]; - try std.testing.expectEqualStrings(expected.name, actual.name); - try std.testing.expectEqual(expected.size, actual.size); - try std.testing.expectEqual(expected.kind, actual.kind); - try std.testing.expectEqual(expected.mode, actual.mode); - try std.testing.expectEqualStrings(expected.link_name, actual.link_name); - - if (case.chksums.len > i) { - var md5writer = Md5Writer{}; - try actual.write(&md5writer); - const chksum = md5writer.chksum(); - try std.testing.expectEqualStrings(case.chksums[i], &chksum); - } else { - if (!expected.truncated) try actual.skip(); // skip file content - } - } - try std.testing.expectEqual(case.files.len, i); - } -} - -// used in test to calculate file chksum -const Md5Writer = struct { - h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), - - pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { - self.h.update(buf); - } - - pub fn writeByte(self: *Md5Writer, byte: u8) !void { - self.h.update(&[_]u8{byte}); - } - - pub fn chksum(self: *Md5Writer) [32]u8 { - var s = [_]u8{0} ** 16; - self.h.final(&s); - return std.fmt.bytesToHex(s, .lower); - } -}; - -fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) { - return PaxReader(@TypeOf(reader)){ - .reader = reader, - .size = size, - }; -} - -const PaxAttributeKind = enum { - path, - linkpath, - size, -}; - -fn PaxReader(comptime ReaderType: type) type { - return struct { - size: usize, // cumulative size of all pax attributes - reader: ReaderType, - // scratch buffer used for reading attribute length and keyword - scratch: [128]u8 = undefined, - - const Self = @This(); - - const Attribute = struct { - kind: PaxAttributeKind, - len: usize, // length of the attribute value - reader: ReaderType, // reader positioned at value start - - // Copies pax attribute value into destination buffer. - // Must be called with destination buffer of size at least Attribute.len. - pub fn value(self: Attribute, dst: []u8) ![]const u8 { - assert(self.len <= dst.len); - const buf = dst[0..self.len]; - const n = try self.reader.readAll(buf); - if (n < self.len) return error.UnexpectedEndOfStream; - try validateAttributeEnding(self.reader); - if (hasNull(buf)) return error.PaxNullInValue; - return buf; - } - }; - - // Iterates over pax attributes. Returns known only known attributes. - // Caller has to call value in Attribute, to advance reader across value. - pub fn next(self: *Self) !?Attribute { - // Pax extended header consists of one or more attributes, each constructed as follows: - // "%d %s=%s\n", , , - while (self.size > 0) { - const length_buf = try self.readUntil(' '); - const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes - - const keyword = try self.readUntil('='); - if (hasNull(keyword)) return error.PaxNullInKeyword; + .files = &[_]Case.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, + .truncated = true, + }, + }, + }, + }; - // calculate value_len - const value_start = length_buf.len + keyword.len + 2; // 2 separators - if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; - const value_len = length - value_start - 1; // \n separator at end - self.size -= length; + for (cases) |case| { + var fs_file = try test_dir.openFile(case.path, .{}); + defer fs_file.close(); - const kind: PaxAttributeKind = if (eql(keyword, "path")) - .path - else if (eql(keyword, "linkpath")) - .linkpath - else if (eql(keyword, "size")) - .size - else { - try self.reader.skipBytes(value_len, .{}); - try validateAttributeEnding(self.reader); - continue; - }; - return Attribute{ - .kind = kind, - .len = value_len, - .reader = self.reader, - }; + //var iter = iterator(fs_file.reader(), null); + var iter = tarReader(fs_file.reader(), null); + var i: usize = 0; + while (iter.next() catch |err| { + if (case.err) |e| { + try std.testing.expectEqual(e, err); + continue; + } else { + return err; } + }) |actual| : (i += 1) { + const expected = case.files[i]; + try std.testing.expectEqualStrings(expected.name, actual.name); + try std.testing.expectEqual(expected.size, actual.size); + try std.testing.expectEqual(expected.kind, actual.kind); + try std.testing.expectEqual(expected.mode, actual.mode); + try std.testing.expectEqualStrings(expected.link_name, actual.link_name); - return null; + if (case.chksums.len > i) { + var md5writer = Md5Writer{}; + try actual.write(&md5writer); + const chksum = md5writer.chksum(); + try std.testing.expectEqualStrings(case.chksums[i], &chksum); + } else { + if (!expected.truncated) try actual.skip(); // skip file content + } } + try std.testing.expectEqual(case.files.len, i); + } +} - inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 { - var fbs = std.io.fixedBufferStream(&self.scratch); - try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); - return fbs.getWritten(); - } +// used in test to calculate file chksum +const Md5Writer = struct { + h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), - inline fn eql(a: []const u8, b: []const u8) bool { - return std.mem.eql(u8, a, b); - } + pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { + self.h.update(buf); + } - inline fn hasNull(str: []const u8) bool { - return (std.mem.indexOfScalar(u8, str, 0)) != null; - } + pub fn writeByte(self: *Md5Writer, byte: u8) !void { + self.h.update(&[_]u8{byte}); + } - // Checks that each record ends with new line. - inline fn validateAttributeEnding(reader: ReaderType) !void { - if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; - } - }; -} + pub fn chksum(self: *Md5Writer) [32]u8 { + var s = [_]u8{0} ** 16; + self.h.final(&s); + return std.fmt.bytesToHex(s, .lower); + } +}; test "tar PaxReader" { const Attr = struct { @@ -927,171 +1096,3 @@ test "tar PaxReader" { try std.testing.expect(case.err == null); } } - -pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) { - return .{ - .reader = reader, - .diagnostics = diagnostics, - }; -} - -fn TarReader(comptime ReaderType: type) type { - return struct { - reader: ReaderType, - diagnostics: ?*Options.Diagnostics, - - // buffers for heeader and file attributes - header_buffer: [Header.SIZE]u8 = undefined, - file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, - link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, - - // bytes of padding to the end of the block - padding: usize = 0, - // current tar file - file: File = undefined, - - pub const File = struct { - name: []const u8, // name of file, symlink or directory - link_name: []const u8, // target name of symlink - size: usize, // size of the file in bytes - mode: u32, - kind: Header.Kind, - - reader: ReaderType, - - // Writes file content to writer. - pub fn write(self: File, writer: anytype) !void { - var buffer: [4096]u8 = undefined; - - var n: usize = 0; - while (n < self.size) { - const buf = buffer[0..@min(buffer.len, self.size - n)]; - try self.reader.readNoEof(buf); - try writer.writeAll(buf); - n += buf.len; - } - } - - // Skips file content. Advances reader. - pub fn skip(self: File) !void { - try self.reader.skipBytes(self.size, .{}); - } - }; - - const Self = @This(); - - fn readHeader(self: *Self) !?Header { - if (self.padding > 0) { - try self.reader.skipBytes(self.padding, .{}); - } - const n = try self.reader.readAll(&self.header_buffer); - if (n == 0) return null; - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; - if (try header.checkChksum() == 0) return null; - return header; - } - - inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { - assert(buffer.len >= size); - const buf = buffer[0..size]; - try self.reader.readNoEof(buf); - return nullStr(buf); - } - - inline fn initFile(self: *Self) void { - self.file = File{ - .name = self.file_name_buffer[0..0], - .link_name = self.link_name_buffer[0..0], - .size = 0, - .kind = .normal, - .mode = 0, - .reader = self.reader, - }; - } - - // Number of padding bytes in the last file block. - inline fn blockPadding(size: usize) usize { - const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary - return block_rounded - size; - } - - // Externally, `next` iterates through the tar archive as if it is a - // series of files. Internally, the tar format often uses fake "files" - // to add meta data that describes the next file. These meta data - // "files" should not normally be visible to the outside. As such, this - // loop iterates through one or more "header files" until it finds a - // "normal file". - pub fn next(self: *Self) !?File { - self.initFile(); - - while (try self.readHeader()) |header| { - const kind = header.kind(); - const size: usize = @intCast(try header.size()); - self.padding = blockPadding(size); - - switch (kind) { - // File types to retrun upstream - .directory, .normal, .symbolic_link => { - self.file.kind = kind; - self.file.mode = try header.mode(); - - // set file attributes if not already set by prefix/extended headers - if (self.file.size == 0) { - self.file.size = size; - } - if (self.file.link_name.len == 0) { - self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]); - } - if (self.file.name.len == 0) { - self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]); - } - - self.padding = blockPadding(self.file.size); - return self.file; - }, - // Prefix header types - .gnu_long_name => { - self.file.name = try self.readString(size, &self.file_name_buffer); - }, - .gnu_long_link => { - self.file.link_name = try self.readString(size, &self.link_name_buffer); - }, - .extended_header => { - // Use just attributes from last extended header. - self.initFile(); - - var rdr = paxReader(self.reader, size); - while (try rdr.next()) |attr| { - switch (attr.kind) { - .path => { - self.file.name = try attr.value(&self.file_name_buffer); - }, - .linkpath => { - self.file.link_name = try attr.value(&self.link_name_buffer); - }, - .size => { - var buf: [64]u8 = undefined; - self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10); - }, - } - } - }, - // Ignored header type - .global_extended_header => { - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - // All other are unsupported header types - else => { - const d = self.diagnostics orelse return error.TarUnsupportedHeader; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = kind, - } }); - }, - } - } - return null; - } - }; -} From f8e42d6b308a2e523d6a32669d0a021a56f70524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 22:17:47 +0100 Subject: [PATCH 23/29] tar: add Go test case files to the project --- lib/std/tar.zig | 8 +++----- test/cases/tar/gnu-incremental.tar | Bin 0 -> 2560 bytes test/cases/tar/gnu-long-nul.tar | Bin 0 -> 2560 bytes test/cases/tar/gnu-multi-hdrs.tar | Bin 0 -> 4608 bytes test/cases/tar/gnu-not-utf8.tar | Bin 0 -> 1536 bytes test/cases/tar/gnu-utf8.tar | Bin 0 -> 2560 bytes test/cases/tar/gnu.tar | Bin 0 -> 3072 bytes test/cases/tar/invalid-go17.tar | Bin 0 -> 1536 bytes test/cases/tar/issue10968.tar | Bin 0 -> 512 bytes test/cases/tar/issue11169.tar | Bin 0 -> 602 bytes test/cases/tar/issue12435.tar | Bin 0 -> 512 bytes test/cases/tar/neg-size.tar | Bin 0 -> 512 bytes test/cases/tar/nil-uid.tar | Bin 0 -> 1024 bytes test/cases/tar/pax-bad-hdr-file.tar | Bin 0 -> 2560 bytes test/cases/tar/pax-global-records.tar | Bin 0 -> 7168 bytes test/cases/tar/pax-multi-hdrs.tar | Bin 0 -> 4608 bytes test/cases/tar/pax-nul-path.tar | Bin 0 -> 2560 bytes test/cases/tar/pax-nul-xattrs.tar | Bin 0 -> 2560 bytes test/cases/tar/pax-pos-size-file.tar | Bin 0 -> 2560 bytes test/cases/tar/pax-records.tar | Bin 0 -> 2560 bytes test/cases/tar/pax.tar | Bin 0 -> 10240 bytes test/cases/tar/sparse-formats.tar | Bin 0 -> 17920 bytes test/cases/tar/star.tar | Bin 0 -> 3072 bytes test/cases/tar/trailing-slash.tar | Bin 0 -> 2560 bytes test/cases/tar/ustar-file-devs.tar | Bin 0 -> 1536 bytes test/cases/tar/v7.tar | Bin 0 -> 3584 bytes test/cases/tar/writer-big-long.tar | Bin 0 -> 1536 bytes test/cases/tar/writer-big.tar | Bin 0 -> 512 bytes test/cases/tar/xattrs.tar | Bin 0 -> 5120 bytes 29 files changed, 3 insertions(+), 5 deletions(-) create mode 100644 test/cases/tar/gnu-incremental.tar create mode 100644 test/cases/tar/gnu-long-nul.tar create mode 100644 test/cases/tar/gnu-multi-hdrs.tar create mode 100644 test/cases/tar/gnu-not-utf8.tar create mode 100644 test/cases/tar/gnu-utf8.tar create mode 100644 test/cases/tar/gnu.tar create mode 100644 test/cases/tar/invalid-go17.tar create mode 100644 test/cases/tar/issue10968.tar create mode 100644 test/cases/tar/issue11169.tar create mode 100644 test/cases/tar/issue12435.tar create mode 100644 test/cases/tar/neg-size.tar create mode 100644 test/cases/tar/nil-uid.tar create mode 100644 test/cases/tar/pax-bad-hdr-file.tar create mode 100644 test/cases/tar/pax-global-records.tar create mode 100644 test/cases/tar/pax-multi-hdrs.tar create mode 100644 test/cases/tar/pax-nul-path.tar create mode 100644 test/cases/tar/pax-nul-xattrs.tar create mode 100644 test/cases/tar/pax-pos-size-file.tar create mode 100644 test/cases/tar/pax-records.tar create mode 100644 test/cases/tar/pax.tar create mode 100644 test/cases/tar/sparse-formats.tar create mode 100644 test/cases/tar/star.tar create mode 100644 test/cases/tar/trailing-slash.tar create mode 100644 test/cases/tar/ustar-file-devs.tar create mode 100644 test/cases/tar/v7.tar create mode 100644 test/cases/tar/writer-big-long.tar create mode 100644 test/cases/tar/writer-big.tar create mode 100644 test/cases/tar/xattrs.tar diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 21d08c527212..ff8cfd4a36cc 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -618,10 +618,8 @@ test "tar run Go test cases" { err: ?anyerror = null, // parsing should fail with this error }; - const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path| - try std.fs.openDirAbsolute(path, .{}) - else - return error.SkipZigTest; + const src_path = comptime std.fs.path.dirname(@src().file) orelse "."; + const test_dir = try std.fs.cwd().openDir(src_path ++ "/../../test/cases/tar", .{}); const cases = [_]Case{ .{ @@ -921,9 +919,9 @@ test "tar run Go test cases" { for (cases) |case| { var fs_file = try test_dir.openFile(case.path, .{}); + defer fs_file.close(); - //var iter = iterator(fs_file.reader(), null); var iter = tarReader(fs_file.reader(), null); var i: usize = 0; while (iter.next() catch |err| { diff --git a/test/cases/tar/gnu-incremental.tar b/test/cases/tar/gnu-incremental.tar new file mode 100644 index 0000000000000000000000000000000000000000..4c442e5b82d1977231c83167324dc6cbb39f090e GIT binary patch literal 2560 zcmeH}%?`pK41jyyQ}6~BR^SDE15X}Fgm`jhV0e9D|52kCnK9aBtYckin|)=$`XDw? zR1gWZlz@m_OI%*lRGwA9h14WT2vq~}S_lHREgIF}{NjUY8H3iu_St#|f3o43!OgQF zA*xBv$!WT=`uOeMH4W_j*|gq%JeYp~t5+a&{O6CLoFGS3L`&|+KG5TjI3dD&{*}_e zuv|#9=O5?a*=X`v4TFi!9!)-~JQx>kr#Lex*}O{T(ROdlh5T#ZSb?7Zvi<)R|EUQ~ p{P+96mNf#~tx?dT{HxUWIL*mD+W*tf(I~k?j`F_TmkZo^y#f_=Y61WN literal 0 HcmV?d00001 diff --git a/test/cases/tar/gnu-long-nul.tar b/test/cases/tar/gnu-long-nul.tar new file mode 100644 index 0000000000000000000000000000000000000000..28bc812aa60e81ea324297c81c738486acffc09c GIT binary patch literal 2560 zcmdPX*VA|K$%Afaj^QI J<`xZ33jpEZfW-g+ literal 0 HcmV?d00001 diff --git a/test/cases/tar/gnu-not-utf8.tar b/test/cases/tar/gnu-not-utf8.tar new file mode 100644 index 0000000000000000000000000000000000000000..81cec67d3309502add09e2495ee1bf139389c8cb GIT binary patch literal 1536 zcmd1EY-nt1PO400pdBy(0y7g61|Vf9U5Ztq0(Jv@FdDKtv;8zq|ijXv5BIw^6DOh^2UK)_HbK1>@VQ0c5`qsHR zJrb1zXEcV16&lMRW}rdtKd=NSXg->JkvP|Esp4`g&CK_h+FMmo7oR?iU7RP&svn2t z!9Ke4)upeR_aRYKtT+(g`B!B>fS>t?p7IZ9V9LKWlK+)w+iY|SVQ_tY3I4Ddrx1w) M;($0H4*b6ZFWOBnBLDyZ literal 0 HcmV?d00001 diff --git a/test/cases/tar/invalid-go17.tar b/test/cases/tar/invalid-go17.tar new file mode 100644 index 0000000000000000000000000000000000000000..58f2488e78fb44a10f4e5a7833d5176c4c0ca090 GIT binary patch literal 1536 zcmYex&u5@DFn|Dt29QMx3=A+nlm<~SF$mk((A3NXq>>Jxw74X(NP%*@pq_zwgdvgE SfDU0ZYQ<;>jE2Cl4gmlQRWrx{ literal 0 HcmV?d00001 diff --git a/test/cases/tar/issue10968.tar b/test/cases/tar/issue10968.tar new file mode 100644 index 0000000000000000000000000000000000000000..1cc837bcff14cd822a26e43034955c82e852ab29 GIT binary patch literal 512 zcmbVI!41MN47Ah*kg@;^fX)>lI!AWsgI^V-_Q4}k$6}2x&>iv*cG6Oc`at9n#lG|1 zIi>(iak!RTol#boyD`0c^v(cHJJuvHh-e39;{t(!nc@gWsV;O@FkUc{-h`pC817Ix zgh|QIatu;A!G^JZ7UC1V_vGb4bURuTWAy6SS-Fx(D=wcI#QP1Y#wzX?HAf0_+~lp> yN?iGbw2JFgJjd0vnp9WIo>K3V$tfee6;KE|`1A3J$tp?9B&Y7`+Gwrtzls-lP-;g2 literal 0 HcmV?d00001 diff --git a/test/cases/tar/issue11169.tar b/test/cases/tar/issue11169.tar new file mode 100644 index 0000000000000000000000000000000000000000..4d71fa15260609ecee0c8c751cfebf49be8763ac GIT binary patch literal 602 zcmdPX4@j)=NKH&hEh^SCG%+zV)=x}KWS}ZA00J`;69y0s1n9JZp|KHzp^>Svp`nSX svAH3G0gzz?R8~P%SKu(Lw74X(2$wLeQpM#*<5EsKtJ9uH<6)~%E3&=F&Kzfi~{+q#!cOa=AzbjSO$1IxR;aN z*g3HiZShf(vs!BvbKPiG1!!GY>l3F=j$kqh!InX?lk@{OAsyh#2%!qzcGBC1;;FXq z6(OUFiyZLnA-?TXGNUXTUA0xix-DD8En}aw{C@tp7XnB4F0(23$NLG-y>mPO&s&?~ literal 0 HcmV?d00001 diff --git a/test/cases/tar/nil-uid.tar b/test/cases/tar/nil-uid.tar new file mode 100644 index 0000000000000000000000000000000000000000..cc9cfaa33cc5de0a28b4183c1705d801f788c96a GIT binary patch literal 1024 zcmWGAG%z(VGPcn33UJrU$xmmX0WbgpGcywmlR@HOU}(l*Xk=(?U}j`)Zf3?{U)I_ zELW9JRmrEpl(4jByA+?w^w&7<2NoZAjuXMp*n7%?6L^l77Eiau7hvcq?jH1?rTX`9 zpMCF(>ILDcd3=FqS4V*TZi>3Pe0#t9R^c-JqBj20{7)M=*Yz)FewT(Uati=tP7$St z*d8bF(D@GlUJ@4%qe!wMbG*3V!1wg0PiykOO!ia%*E*#=&OR(jtILP6KRA&2FJ*Kg zy|A)1mbYF<|BA(6Myl2O)0`aM3Wji&pIh4ClA2$dWoJs58yl;4D?U=(N)W;u%XInf zRTs< z4xd;AU1Jf%`IN4#I124Rb^4Q)!o;hPejoEtY5^qa+2ELA%=<%v`PjLWfatmzU$WMq z0o-nPr8-}Z8wrod+8?IEmrDw+X#Mz zXg5Tql$=bVJ~k+v0GLO)^pr}@?=ab|SNj)8^HW8!blD3B%SS zGI^{kUFR(2?R5l=l)&2sw&#PX@ZqpeYkq_8j?Fe9>W(2A?o+GT0h2`D?X>ODJmvX4 zP~kX>b+(x;9Ro3CH;IaFGqbd&#t{MvQE$!J(C*Cr+>WZOtS-2k`j?wym_80XdBrEb z&A?kq@r`(G3QRVPrgJ7?%^kKYVLESuSIlk5vZfj_byJWIheX`cpD(885)#?nu-$bl zd_2p;+hBpM{R+d!YIE8hxR?tY^UL#n+jl6O_LaKNYd-D1vHp<<`zJM7tNzO*g*mMS z{}PVT{JZ_3frq#ms21`Cr=+Hc0kHIcL{G*ZFA#Efwe*);p(rx?(dkdptIvyaeUIQm zC7-2Q)O+&%G{(pIJ#q(#>i-#uLc#hT`hT40Pta=rFC=*e`!w3WI)BE>e1hozPB`6P z`9Jr6+W!j+!|;>--`D@^3;ld-=Ii;m6ILFVT3VXy>=Udb`;P z_AF}Ko(kwITGLdEM1G)5o<9H!jr=439(@V?ONRXCAu*5YPw-#9x&N1V|EJgyTG0B^ zUZ)s9!5N^&Ghph+I3UGBWYR$X|2zH<_}9R{M*cI=m|k}8li%1Drp7@Vny>jkUkM=z Sl}Br1`$oQ%|3`N;j=%%-SrC5! literal 0 HcmV?d00001 diff --git a/test/cases/tar/pax-nul-path.tar b/test/cases/tar/pax-nul-path.tar new file mode 100644 index 0000000000000000000000000000000000000000..c78f82b16e85363143404ec50c3e77e5174ba696 GIT binary patch literal 2560 zcmWGYtnf%pOi3*&)-%vIFf=kYF*P%{u%s%>00J0r&<4gv#xOaM5(Wb!BV%(j289X+ zI)KvRlEfmQ^^{tL?m0@$qmzCkLqmmv#F7kKs>V3AQxFN}F>ui-ycfm?#?I`Jb2|8dWnI0;3@?JVO8g0FZP& literal 0 HcmV?d00001 diff --git a/test/cases/tar/pax-nul-xattrs.tar b/test/cases/tar/pax-nul-xattrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..881f51768f9872f8d9bfcd2cbb637a64c88b59fb GIT binary patch literal 2560 zcmWGYtnf%pOi3*&)-%vgN=(tsE6vH#E2$`9pdK)Q00tbifq}6(Ob(=k!NAbi+{l1I zp@Mo&z->}#aYE@j+a){BLMtXu_aCah^nR1rE*#&;K|=WmI@H1V%$( H1cU$p$tyMF literal 0 HcmV?d00001 diff --git a/test/cases/tar/pax-pos-size-file.tar b/test/cases/tar/pax-pos-size-file.tar new file mode 100644 index 0000000000000000000000000000000000000000..ea5ccf916426a5b6300dd341dffeeb349e51ad90 GIT binary patch literal 2560 zcmeHGO^=&86wSK7V)iVI`DW1tNf<~-Ffb4pb|xV)A%qVbNciYIMU^VD+e5mrD1N`@xy@5P0!5`e$lo>Ayydck>ZnF=Fo-*7$%B*6VQy8OC1ZYQZ0cQwSoN-=7~KHZm75nsv?7#% zxbP~EBdQp}j$fv$ zm0((q`NB@)$O$!<9_$=t{xvb~Lm9~}L?l0&Jl78;-DOxv-8%`1VOZ&@XiXNP^?Flj z(`mnw@sQZt-FOL46N~e{7&mmZ3_7Zeb)X3_s28DRNA*13 zzlx)-aG@0_eZq!piMD!7yu_+Gn2p4s1_%<*fN8?jFA9^o3rL4@o2IU`LoynihG>kD zgC*AcCW%G}r$MQF^{SM2lx>!a?K5KK;Y$2@1m>3MDAg#{VSu51&>@#aQR@?hJl2&j zOTYwY86z_%=ypQwac8MQ)Nk`zSmC?tfF?@1eU$X~vDKZB%VPg_Skq)0%kmy-XqcqB zSWUKpp_GGFrX{zDo7-dKiXlS@wWp$QH`aD)2Tf5ICwv|0^UXD_0EfP^5VFAG&@E^9 wM*KF17Kq~USjbrS`t?E_kE`$n^D8p1nNBRj66M`4lQuLkKmWlt`)ynQ0y{e~UjP6A literal 0 HcmV?d00001 diff --git a/test/cases/tar/pax-records.tar b/test/cases/tar/pax-records.tar new file mode 100644 index 0000000000000000000000000000000000000000..276c211baa388cd4857f60be3355dc710c079adf GIT binary patch literal 2560 zcmWGYtnf%pOi3*&)-%vg%gjk-pgu5w00tbifuXS}Ob(=k!NAbaz|4q2p@RBO!8Eb7 zxFoR%Xg#LPh!8QfP;mG6arAT7E67f_1qKC|k*Pv*er|4RUWu(oYEDkRj>3~2)1FW5 z;W9N)D9uaEO|{L*&r2r;bB%})9NM0wQI78BOoLqCLtvwr=Z5b$i&RT%Er#Y zO+ZjcSVUAHn~8MUp(~vBV+cg7LjpEKCCE6D7E@EwTcMv_>l+&bbg`j1Cv0A776ym5t@+ zSt9MDBFtXbKY&m4pMN0f`l~hhD>#q(-`x$5n+q@eEPmAevA;0XTM8XMYkTvSmQ-t5 zkihVw{(qQ#_JjT})&KMa&-FhG0c8or{CPvw|Jf69WL!B2Wa1KoKYcMW6^2fg(@@ia-%40!5$*6oDd81d2cr_`3;w E2V3|JA^-pY literal 0 HcmV?d00001 diff --git a/test/cases/tar/sparse-formats.tar b/test/cases/tar/sparse-formats.tar new file mode 100644 index 0000000000000000000000000000000000000000..8bd4e74d50f9c8961f80a887ab7d6449e032048b GIT binary patch literal 17920 zcmeHO!BXQ!5M{6a3g^xmb&sU64qQV{sZ?#{1DvdPiv%!*Aw}}_dEEjdi|Nre#)hpG zE{}(KnjXC;wbY|&t*;k1>*dFY-EbwpT`b+-m>u zXfjaoe5W44g1VMFbuvaLV|3acePf?HHj7T34f|}^XTyHz*zDR5hW%jJ$GN!K=dPX7 zuwNSXOT&I?*sl!xm0`a!>{o{UdfWbohf`t0wKm47jd5yYoVY#C#(p&HN5g(h+o$d^ z>C~x6+ovLJp9;gi;Rj^+0U3Tkh98jO2W0pG8Gb;9ACTb()boS>@h8I{`Aj1#H@B=dZfDAvtjWMmW;RoC~7Py0M z`m*5%-1CF}@n^#y*zgB7{DBRBV8b8S@CP>hfen8^_^{DnOAo^z5MmhHr;h_0e!zww zu;B-6_yHS!z=j{N;RkH^0r&ji+3`30fen9P!ynl22R8fx0blw!^!(v@`{T)$#0AMUzUr{%bWEKK}dPBZfAtotM&Q)$6}V4GkAAL?ydIxj}Q`3 zJOATY>UD~$8khO$y?3COY_Ib_T!Mz?cSHC~#(oEVI84ue{e9LR^x69SzvU?x#e`$G z`ReZSkBilxf3HuQYO>v9_2tWYd3#C|uKGRxyy zk#CN0vO|t>vO|t?vO|t@vV)g2dr7mGGDo)W_L8o>q-!tf+DkfmNk=c~=p`M!q@$Pg+)H}y zB|Z0&o_k5py`&p2>BdW1A|f+1N!@lEFX<*ndTZ#%;H1d0PWQ;sPWQ<1PWQ+WPxo*$ zCpU9)GbcB5ax*74^K5XIR5u%)rF*!UXXCT<7;fg-2rW5AHbhJJa5K*aY3VWC%(G!y za*S-8mhRzZo{iMfW4M`TW3}WM*$8{;Fcc4%{&{rCCA9dZs{Iw=Go{iJw}H4J9rlMBkscMKka?4V*dGW zC;x{dmiMq8gvD(vpG{xk(ev}2>9_pg&wuy1`g67#*MIt_+k5+eaQ%mN-{S%QM+!~( zxc)<2*YN+U4#l|sv%B)c7PePszGeL<)ZO)vtHtH=w09GsNfox9d|WQBPwAMB1HKi$ z5#I)1l17qNl4g>25`gi0%mT0gEC34-1PB5I0fGQQfKq@`fKq@`fKq@;fJ%T$fJ%T$ zfLefBfLefBfLeekKolSf5Cw<=%mtVWFc)Ahz+8YvfJT5ufJT5u0A#CaDG)Nzv=opE zMO*%@0IdS81gZg+MP*A>0a;*L*S;zQ^1P%)r9keM))iGXNaa8-mb9xN$g|SAj;op= zlS*1t6=X?iT~QSVc~H`#(jdo4>x!y6$YPQf)rV9dQiVt*BGrggBvO?~WSR`0jpG)F zR$z95<=;=bg;QIfR|BZ{k=7%FW59w-S{C9wpVT}I{Ao4pNVj%vb z{pbH+{)aiAzW>2BZdt7HACK|hLCzZHZZvnf_-l0|IXl~}=T~SgCPR@QPL^Kc(9Lpj zv56@U!e<=Br@-+2fA>qk!2KVorji&Q8CK+X>e0g#)6 z@dUuK3}6apYu09*vX znm!5vu=b8Z0L;v^6bQklmI7jCCS}XN6`)n1l|VJX%uKdX6)-c?y7pBeFf)@Dl>##} ztt+Z(U}h#Qst0CfT31vh!CNoVqM~4CrgcSC7r2GAs4|$DXJmbGf$=ejIaDU{qjK;EfgdABYgg9smFU literal 0 HcmV?d00001 diff --git a/test/cases/tar/star.tar b/test/cases/tar/star.tar new file mode 100644 index 0000000000000000000000000000000000000000..59e2d4e604611eeac3e2a0f3d6f71d2623c50449 GIT binary patch literal 3072 zcmeHHT?)e>4DRzz;R#BjQ;)ERouag*479>@u-$$NbG3!-WyoMlUh?xvNIv}HZD&jy zuA!-C5KZlY0Y@bP833Zfm_JQ2M2yBQ2dTK6K{>VDLBV=D{z>IvVF` zUD#xgUGh?F1AikeIW6NnOIrMRGU4UU`62nAWxyx>^STEhN#m{lQEc_E4Gd!2kkq(FP`FW-vLBN(KWX14Anam zFj`@3XrNG#Sdw8&v*@Q?!WuA>xdvlQlIJEmL^~{RXSF)#oEGZPaA5Df(AG_n|kZD?R%Y|fxyKx21-jV~=ONh|_7iBh{jE;leR XgnG`9QsYMTkA}c#2#kgRtwR6+ua*#N literal 0 HcmV?d00001 diff --git a/test/cases/tar/v7.tar b/test/cases/tar/v7.tar new file mode 100644 index 0000000000000000000000000000000000000000..eb65fc9410721efd98cb7c5e274f547ec530252d GIT binary patch literal 3584 zcmeHH&ubGw6n?3{z(^13q0owB@t_Dbv)ODTf|dgec!sXT^AK{$jKc@-kWdUe(&uh-&e0L+xARj zwLzm>LI~3|1sT#R&XkBIzWbfCPrYEK7fr^Q@7vXO;&pw$QCTT3-?&yO+jq(<{6qS`FS_vP zIBhMBjnmsnS~{|C9LMN8#r!W{zj5l&zcE?^U_t*||1zJ{zqInH{-Zy}2$O|c?WSFx zxn8RtM3-UpAJiW`Z@Zar#$ojz)NjtWBfnULUzD=jj5!>iG>O2k{o(=ZAg=$-urC7q zVm{n!{kK`S@p|Vk`q%aFg#nw)bMB-40yAj*%7=F37m@ziFINBH7pTSD@Cfil^^9T6 zxL-iu+Aq)#ev#CF(l2&S@A^eC<`;^e4{ZQ#s9$Y4r}$iP3;;e3V;a&MNN*s$f%FFc H(;N5+1FUK9 literal 0 HcmV?d00001 diff --git a/test/cases/tar/writer-big-long.tar b/test/cases/tar/writer-big-long.tar new file mode 100644 index 0000000000000000000000000000000000000000..09fc5dd3dd7fc5de3b6d22461fa23152fd499a41 GIT binary patch literal 1536 zcmdT@%L>CF5cHg{@C({q<0CobV>|>an6xwspR=A26VBn zaX^Kt?_`f1Z)$wO|DLIY?J3lf^OSK^BIf(k6O@V|Yg?ui2v9Nql1oNtkxU->H^Dlm V!!&06{D$v3HgTJv%r&)bzX73UunhnJ literal 0 HcmV?d00001 diff --git a/test/cases/tar/writer-big.tar b/test/cases/tar/writer-big.tar new file mode 100644 index 0000000000000000000000000000000000000000..435dcbce6abc74dc5efa1f4dd34129eb7701c697 GIT binary patch literal 512 zcmXTPEzmbKOV3Q#E2$`9pbanp0y7f>1_L8QGgAXY1_J{_Lo-7Ih6adrEFj9z(8$cl z$k@=p6sXS7*wDzBLBRkbPg}CIxFoSiL4hHqxH30CFQpilZK-+bxTL`1AWs4T&~spb JSM4B@1OOWT7M=hA literal 0 HcmV?d00001 diff --git a/test/cases/tar/xattrs.tar b/test/cases/tar/xattrs.tar new file mode 100644 index 0000000000000000000000000000000000000000..9701950edd1f0dc82858b7117136b37391be0b08 GIT binary patch literal 5120 zcmeHJv2KGf5M|~o_yWg1+khiw>d;i}P^nX=$R$ooYd`|ilD{uBAv6g^kxC>6-(uu< zHg^v_-l5r}td>fyRbC(vfcdOQq}Iq(#u+Ja9X?}Dv(|CCVoJF~09ZgF;2a!G7^%~| zYNYoMUQ-rE=5KzzBJ^EKyr-Mx-NQ4gq%k=v3zee}wOxElT`HH-ei(K*xV|_} zC{$GDvDuoW?o>&odUrVuVHkt_w?IH zW3PV_@V!Jxt@A^i>Yrj(>;K=H?5X8!tJS~MYVd#a^`?|QJKb&Uduf~MfN4M7$J!Lr zF40zZMF!9x{tqJ#0F5+;{2!=)=Knre|G(mAKU`hAc#r>!#{V(9d;sW1hxVv7@B_zF ze)#eKF~#1~>@WTI`#+&4`lkel_5U6!N8h^5vRAE8lqGgr9-Ul!p=H1_U>TS&1K)l2 B)fNB% literal 0 HcmV?d00001 From a75fd4ff156abdd131c2b609b9b19573813838a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Dec 2023 23:55:07 +0100 Subject: [PATCH 24/29] tar: move test cases to std/tar/testdata Create std/tar/test.zig for test which uses cases from testdata. --- lib/std/tar.zig | 374 +----------------- lib/std/tar/test.zig | 373 +++++++++++++++++ .../std/tar/testdata}/gnu-incremental.tar | Bin .../std/tar/testdata}/gnu-long-nul.tar | Bin .../std/tar/testdata}/gnu-multi-hdrs.tar | Bin .../std/tar/testdata}/gnu-not-utf8.tar | Bin .../tar => lib/std/tar/testdata}/gnu-utf8.tar | Bin .../tar => lib/std/tar/testdata}/gnu.tar | Bin .../std/tar/testdata}/invalid-go17.tar | Bin .../std/tar/testdata}/issue10968.tar | Bin .../std/tar/testdata}/issue11169.tar | Bin .../std/tar/testdata}/issue12435.tar | Bin .../tar => lib/std/tar/testdata}/neg-size.tar | Bin .../tar => lib/std/tar/testdata}/nil-uid.tar | Bin .../std/tar/testdata}/pax-bad-hdr-file.tar | Bin .../std/tar/testdata}/pax-global-records.tar | Bin .../std/tar/testdata}/pax-multi-hdrs.tar | Bin .../std/tar/testdata}/pax-nul-path.tar | Bin .../std/tar/testdata}/pax-nul-xattrs.tar | Bin .../std/tar/testdata}/pax-pos-size-file.tar | Bin .../std/tar/testdata}/pax-records.tar | Bin .../tar => lib/std/tar/testdata}/pax.tar | Bin .../std/tar/testdata}/sparse-formats.tar | Bin .../tar => lib/std/tar/testdata}/star.tar | Bin .../std/tar/testdata}/trailing-slash.tar | Bin .../std/tar/testdata}/ustar-file-devs.tar | Bin .../cases/tar => lib/std/tar/testdata}/v7.tar | Bin .../std/tar/testdata}/writer-big-long.tar | Bin .../std/tar/testdata}/writer-big.tar | Bin .../tar => lib/std/tar/testdata}/xattrs.tar | Bin 30 files changed, 377 insertions(+), 370 deletions(-) create mode 100644 lib/std/tar/test.zig rename {test/cases/tar => lib/std/tar/testdata}/gnu-incremental.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/gnu-long-nul.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/gnu-multi-hdrs.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/gnu-not-utf8.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/gnu-utf8.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/gnu.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/invalid-go17.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/issue10968.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/issue11169.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/issue12435.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/neg-size.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/nil-uid.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-bad-hdr-file.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-global-records.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-multi-hdrs.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-nul-path.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-nul-xattrs.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-pos-size-file.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax-records.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/pax.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/sparse-formats.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/star.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/trailing-slash.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/ustar-file-devs.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/v7.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/writer-big-long.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/writer-big.tar (100%) rename {test/cases/tar => lib/std/tar/testdata}/xattrs.tar (100%) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index ff8cfd4a36cc..4f6824de1fa4 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -601,376 +601,6 @@ test "tar stripComponents" { try expectEqualStrings("c", try stripComponents("a/b/c", 2)); } -test "tar run Go test cases" { - const Case = struct { - const File = struct { - name: []const u8, - size: usize = 0, - mode: u32 = 0, - link_name: []const u8 = &[0]u8{}, - kind: Header.Kind = .normal, - truncated: bool = false, // when there is no file body, just header, usefull for huge files - }; - - path: []const u8, // path to the tar archive file on dis - files: []const File = &[_]@This().File{}, // expected files to found in archive - chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content - err: ?anyerror = null, // parsing should fail with this error - }; - - const src_path = comptime std.fs.path.dirname(@src().file) orelse "."; - const test_dir = try std.fs.cwd().openDir(src_path ++ "/../../test/cases/tar", .{}); - - const cases = [_]Case{ - .{ - .path = "gnu.tar", - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o640, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o640, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .path = "sparse-formats.tar", - .err = error.TarUnsupportedHeader, - }, - .{ - .path = "star.tar", - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o640, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o640, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .path = "v7.tar", - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o444, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o444, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .path = "pax.tar", - .files = &[_]Case.File{ - .{ - .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - .size = 7, - .mode = 0o664, - }, - .{ - .name = "a/b", - .size = 0, - .kind = .symbolic_link, - .mode = 0o777, - .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - }, - }, - .chksums = &[_][]const u8{ - "3c382e8f5b6631aa2db52643912ffd4a", - }, - }, - .{ - // pax attribute don't end with \n - .path = "pax-bad-hdr-file.tar", - .err = error.PaxInvalidAttributeEnd, - }, - .{ - // size is in pax attribute - .path = "pax-pos-size-file.tar", - .files = &[_]Case.File{ - .{ - .name = "foo", - .size = 999, - .kind = .normal, - .mode = 0o640, - }, - }, - .chksums = &[_][]const u8{ - "0afb597b283fe61b5d4879669a350556", - }, - }, - .{ - // has pax records which we are not interested in - .path = "pax-records.tar", - .files = &[_]Case.File{ - .{ - .name = "file", - }, - }, - }, - .{ - // has global records which we are ignoring - .path = "pax-global-records.tar", - .files = &[_]Case.File{ - .{ - .name = "file1", - }, - .{ - .name = "file2", - }, - .{ - .name = "file3", - }, - .{ - .name = "file4", - }, - }, - }, - .{ - .path = "nil-uid.tar", - .files = &[_]Case.File{ - .{ - .name = "P1050238.JPG.log", - .size = 14, - .kind = .normal, - .mode = 0o664, - }, - }, - .chksums = &[_][]const u8{ - "08d504674115e77a67244beac19668f5", - }, - }, - .{ - // has xattrs and pax records which we are ignoring - .path = "xattrs.tar", - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .kind = .normal, - .mode = 0o644, - }, - .{ - .name = "small2.txt", - .size = 11, - .kind = .normal, - .mode = 0o644, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .path = "gnu-multi-hdrs.tar", - .files = &[_]Case.File{ - .{ - .name = "GNU2/GNU2/long-path-name", - .link_name = "GNU4/GNU4/long-linkpath-name", - .kind = .symbolic_link, - }, - }, - }, - .{ - // has gnu type D (directory) and S (sparse) blocks - .path = "gnu-incremental.tar", - .err = error.TarUnsupportedHeader, - }, - .{ - // should use values only from last pax header - .path = "pax-multi-hdrs.tar", - .files = &[_]Case.File{ - .{ - .name = "bar", - .link_name = "PAX4/PAX4/long-linkpath-name", - .kind = .symbolic_link, - }, - }, - }, - .{ - .path = "gnu-long-nul.tar", - .files = &[_]Case.File{ - .{ - .name = "0123456789", - .mode = 0o644, - }, - }, - }, - .{ - .path = "gnu-utf8.tar", - .files = &[_]Case.File{ - .{ - .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", - .mode = 0o644, - }, - }, - }, - .{ - .path = "gnu-not-utf8.tar", - .files = &[_]Case.File{ - .{ - .name = "hi\x80\x81\x82\x83bye", - .mode = 0o644, - }, - }, - }, - .{ - // null in pax key - .path = "pax-nul-xattrs.tar", - .err = error.PaxNullInKeyword, - }, - .{ - .path = "pax-nul-path.tar", - .err = error.PaxNullInValue, - }, - .{ - .path = "neg-size.tar", - .err = error.TarHeader, - }, - .{ - .path = "issue10968.tar", - .err = error.TarHeader, - }, - .{ - .path = "issue11169.tar", - .err = error.TarHeader, - }, - .{ - .path = "issue12435.tar", - .err = error.TarHeaderChksum, - }, - .{ - // has magic with space at end instead of null - .path = "invalid-go17.tar", - .files = &[_]Case.File{ - .{ - .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", - }, - }, - }, - .{ - .path = "ustar-file-devs.tar", - .files = &[_]Case.File{ - .{ - .name = "file", - .mode = 0o644, - }, - }, - }, - .{ - .path = "trailing-slash.tar", - .files = &[_]Case.File{ - .{ - .name = "123456789/" ** 30, - .kind = .directory, - }, - }, - }, - .{ - // Has size in gnu extended format. To represent size bigger than 8 GB. - .path = "writer-big.tar", - .files = &[_]Case.File{ - .{ - .name = "tmp/16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .truncated = true, - .mode = 0o640, - }, - }, - }, - .{ - // Size in gnu extended format, and name in pax attribute. - .path = "writer-big-long.tar", - .files = &[_]Case.File{ - .{ - .name = "longname/" ** 15 ++ "16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .mode = 0o644, - .truncated = true, - }, - }, - }, - }; - - for (cases) |case| { - var fs_file = try test_dir.openFile(case.path, .{}); - - defer fs_file.close(); - - var iter = tarReader(fs_file.reader(), null); - var i: usize = 0; - while (iter.next() catch |err| { - if (case.err) |e| { - try std.testing.expectEqual(e, err); - continue; - } else { - return err; - } - }) |actual| : (i += 1) { - const expected = case.files[i]; - try std.testing.expectEqualStrings(expected.name, actual.name); - try std.testing.expectEqual(expected.size, actual.size); - try std.testing.expectEqual(expected.kind, actual.kind); - try std.testing.expectEqual(expected.mode, actual.mode); - try std.testing.expectEqualStrings(expected.link_name, actual.link_name); - - if (case.chksums.len > i) { - var md5writer = Md5Writer{}; - try actual.write(&md5writer); - const chksum = md5writer.chksum(); - try std.testing.expectEqualStrings(case.chksums[i], &chksum); - } else { - if (!expected.truncated) try actual.skip(); // skip file content - } - } - try std.testing.expectEqual(case.files.len, i); - } -} - -// used in test to calculate file chksum -const Md5Writer = struct { - h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), - - pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { - self.h.update(buf); - } - - pub fn writeByte(self: *Md5Writer, byte: u8) !void { - self.h.update(&[_]u8{byte}); - } - - pub fn chksum(self: *Md5Writer) [32]u8 { - var s = [_]u8{0} ** 16; - self.h.final(&s); - return std.fmt.bytesToHex(s, .lower); - } -}; - test "tar PaxReader" { const Attr = struct { kind: PaxAttributeKind, @@ -1094,3 +724,7 @@ test "tar PaxReader" { try std.testing.expect(case.err == null); } } + +test { + _ = @import("tar/test.zig"); +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig new file mode 100644 index 000000000000..f6a9fa1cfdec --- /dev/null +++ b/lib/std/tar/test.zig @@ -0,0 +1,373 @@ +const std = @import("../std.zig"); +const tar = std.tar; +const assert = std.debug.assert; + +test "tar run Go test cases" { + const Case = struct { + const File = struct { + name: []const u8, + size: usize = 0, + mode: u32 = 0, + link_name: []const u8 = &[0]u8{}, + kind: tar.Header.Kind = .normal, + truncated: bool = false, // when there is no file body, just header, usefull for huge files + }; + + path: []const u8, // path to the tar archive file on dis + files: []const File = &[_]@This().File{}, // expected files to found in archive + chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content + err: ?anyerror = null, // parsing should fail with this error + }; + + const src_path = comptime std.fs.path.dirname(@src().file) orelse "."; + const test_dir = try std.fs.cwd().openDir(src_path ++ "/testdata", .{}); + + const cases = [_]Case{ + .{ + .path = "gnu.tar", + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "sparse-formats.tar", + .err = error.TarUnsupportedHeader, + }, + .{ + .path = "star.tar", + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "v7.tar", + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o444, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o444, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "pax.tar", + .files = &[_]Case.File{ + .{ + .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .size = 7, + .mode = 0o664, + }, + .{ + .name = "a/b", + .size = 0, + .kind = .symbolic_link, + .mode = 0o777, + .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }, + }, + .chksums = &[_][]const u8{ + "3c382e8f5b6631aa2db52643912ffd4a", + }, + }, + .{ + // pax attribute don't end with \n + .path = "pax-bad-hdr-file.tar", + .err = error.PaxInvalidAttributeEnd, + }, + .{ + // size is in pax attribute + .path = "pax-pos-size-file.tar", + .files = &[_]Case.File{ + .{ + .name = "foo", + .size = 999, + .kind = .normal, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "0afb597b283fe61b5d4879669a350556", + }, + }, + .{ + // has pax records which we are not interested in + .path = "pax-records.tar", + .files = &[_]Case.File{ + .{ + .name = "file", + }, + }, + }, + .{ + // has global records which we are ignoring + .path = "pax-global-records.tar", + .files = &[_]Case.File{ + .{ + .name = "file1", + }, + .{ + .name = "file2", + }, + .{ + .name = "file3", + }, + .{ + .name = "file4", + }, + }, + }, + .{ + .path = "nil-uid.tar", + .files = &[_]Case.File{ + .{ + .name = "P1050238.JPG.log", + .size = 14, + .kind = .normal, + .mode = 0o664, + }, + }, + .chksums = &[_][]const u8{ + "08d504674115e77a67244beac19668f5", + }, + }, + .{ + // has xattrs and pax records which we are ignoring + .path = "xattrs.tar", + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .kind = .normal, + .mode = 0o644, + }, + .{ + .name = "small2.txt", + .size = 11, + .kind = .normal, + .mode = 0o644, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .path = "gnu-multi-hdrs.tar", + .files = &[_]Case.File{ + .{ + .name = "GNU2/GNU2/long-path-name", + .link_name = "GNU4/GNU4/long-linkpath-name", + .kind = .symbolic_link, + }, + }, + }, + .{ + // has gnu type D (directory) and S (sparse) blocks + .path = "gnu-incremental.tar", + .err = error.TarUnsupportedHeader, + }, + .{ + // should use values only from last pax header + .path = "pax-multi-hdrs.tar", + .files = &[_]Case.File{ + .{ + .name = "bar", + .link_name = "PAX4/PAX4/long-linkpath-name", + .kind = .symbolic_link, + }, + }, + }, + .{ + .path = "gnu-long-nul.tar", + .files = &[_]Case.File{ + .{ + .name = "0123456789", + .mode = 0o644, + }, + }, + }, + .{ + .path = "gnu-utf8.tar", + .files = &[_]Case.File{ + .{ + .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + .mode = 0o644, + }, + }, + }, + .{ + .path = "gnu-not-utf8.tar", + .files = &[_]Case.File{ + .{ + .name = "hi\x80\x81\x82\x83bye", + .mode = 0o644, + }, + }, + }, + .{ + // null in pax key + .path = "pax-nul-xattrs.tar", + .err = error.PaxNullInKeyword, + }, + .{ + .path = "pax-nul-path.tar", + .err = error.PaxNullInValue, + }, + .{ + .path = "neg-size.tar", + .err = error.TarHeader, + }, + .{ + .path = "issue10968.tar", + .err = error.TarHeader, + }, + .{ + .path = "issue11169.tar", + .err = error.TarHeader, + }, + .{ + .path = "issue12435.tar", + .err = error.TarHeaderChksum, + }, + .{ + // has magic with space at end instead of null + .path = "invalid-go17.tar", + .files = &[_]Case.File{ + .{ + .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + }, + }, + }, + .{ + .path = "ustar-file-devs.tar", + .files = &[_]Case.File{ + .{ + .name = "file", + .mode = 0o644, + }, + }, + }, + .{ + .path = "trailing-slash.tar", + .files = &[_]Case.File{ + .{ + .name = "123456789/" ** 30, + .kind = .directory, + }, + }, + }, + .{ + // Has size in gnu extended format. To represent size bigger than 8 GB. + .path = "writer-big.tar", + .files = &[_]Case.File{ + .{ + .name = "tmp/16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .truncated = true, + .mode = 0o640, + }, + }, + }, + .{ + // Size in gnu extended format, and name in pax attribute. + .path = "writer-big-long.tar", + .files = &[_]Case.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, + .truncated = true, + }, + }, + }, + }; + + for (cases) |case| { + var fs_file = try test_dir.openFile(case.path, .{}); + + defer fs_file.close(); + + var iter = tar.tarReader(fs_file.reader(), null); + var i: usize = 0; + while (iter.next() catch |err| { + if (case.err) |e| { + try std.testing.expectEqual(e, err); + continue; + } else { + return err; + } + }) |actual| : (i += 1) { + const expected = case.files[i]; + try std.testing.expectEqualStrings(expected.name, actual.name); + try std.testing.expectEqual(expected.size, actual.size); + try std.testing.expectEqual(expected.kind, actual.kind); + try std.testing.expectEqual(expected.mode, actual.mode); + try std.testing.expectEqualStrings(expected.link_name, actual.link_name); + + if (case.chksums.len > i) { + var md5writer = Md5Writer{}; + try actual.write(&md5writer); + const chksum = md5writer.chksum(); + try std.testing.expectEqualStrings(case.chksums[i], &chksum); + } else { + if (!expected.truncated) try actual.skip(); // skip file content + } + } + try std.testing.expectEqual(case.files.len, i); + } +} + +// used in test to calculate file chksum +const Md5Writer = struct { + h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), + + pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { + self.h.update(buf); + } + + pub fn writeByte(self: *Md5Writer, byte: u8) !void { + self.h.update(&[_]u8{byte}); + } + + pub fn chksum(self: *Md5Writer) [32]u8 { + var s = [_]u8{0} ** 16; + self.h.final(&s); + return std.fmt.bytesToHex(s, .lower); + } +}; diff --git a/test/cases/tar/gnu-incremental.tar b/lib/std/tar/testdata/gnu-incremental.tar similarity index 100% rename from test/cases/tar/gnu-incremental.tar rename to lib/std/tar/testdata/gnu-incremental.tar diff --git a/test/cases/tar/gnu-long-nul.tar b/lib/std/tar/testdata/gnu-long-nul.tar similarity index 100% rename from test/cases/tar/gnu-long-nul.tar rename to lib/std/tar/testdata/gnu-long-nul.tar diff --git a/test/cases/tar/gnu-multi-hdrs.tar b/lib/std/tar/testdata/gnu-multi-hdrs.tar similarity index 100% rename from test/cases/tar/gnu-multi-hdrs.tar rename to lib/std/tar/testdata/gnu-multi-hdrs.tar diff --git a/test/cases/tar/gnu-not-utf8.tar b/lib/std/tar/testdata/gnu-not-utf8.tar similarity index 100% rename from test/cases/tar/gnu-not-utf8.tar rename to lib/std/tar/testdata/gnu-not-utf8.tar diff --git a/test/cases/tar/gnu-utf8.tar b/lib/std/tar/testdata/gnu-utf8.tar similarity index 100% rename from test/cases/tar/gnu-utf8.tar rename to lib/std/tar/testdata/gnu-utf8.tar diff --git a/test/cases/tar/gnu.tar b/lib/std/tar/testdata/gnu.tar similarity index 100% rename from test/cases/tar/gnu.tar rename to lib/std/tar/testdata/gnu.tar diff --git a/test/cases/tar/invalid-go17.tar b/lib/std/tar/testdata/invalid-go17.tar similarity index 100% rename from test/cases/tar/invalid-go17.tar rename to lib/std/tar/testdata/invalid-go17.tar diff --git a/test/cases/tar/issue10968.tar b/lib/std/tar/testdata/issue10968.tar similarity index 100% rename from test/cases/tar/issue10968.tar rename to lib/std/tar/testdata/issue10968.tar diff --git a/test/cases/tar/issue11169.tar b/lib/std/tar/testdata/issue11169.tar similarity index 100% rename from test/cases/tar/issue11169.tar rename to lib/std/tar/testdata/issue11169.tar diff --git a/test/cases/tar/issue12435.tar b/lib/std/tar/testdata/issue12435.tar similarity index 100% rename from test/cases/tar/issue12435.tar rename to lib/std/tar/testdata/issue12435.tar diff --git a/test/cases/tar/neg-size.tar b/lib/std/tar/testdata/neg-size.tar similarity index 100% rename from test/cases/tar/neg-size.tar rename to lib/std/tar/testdata/neg-size.tar diff --git a/test/cases/tar/nil-uid.tar b/lib/std/tar/testdata/nil-uid.tar similarity index 100% rename from test/cases/tar/nil-uid.tar rename to lib/std/tar/testdata/nil-uid.tar diff --git a/test/cases/tar/pax-bad-hdr-file.tar b/lib/std/tar/testdata/pax-bad-hdr-file.tar similarity index 100% rename from test/cases/tar/pax-bad-hdr-file.tar rename to lib/std/tar/testdata/pax-bad-hdr-file.tar diff --git a/test/cases/tar/pax-global-records.tar b/lib/std/tar/testdata/pax-global-records.tar similarity index 100% rename from test/cases/tar/pax-global-records.tar rename to lib/std/tar/testdata/pax-global-records.tar diff --git a/test/cases/tar/pax-multi-hdrs.tar b/lib/std/tar/testdata/pax-multi-hdrs.tar similarity index 100% rename from test/cases/tar/pax-multi-hdrs.tar rename to lib/std/tar/testdata/pax-multi-hdrs.tar diff --git a/test/cases/tar/pax-nul-path.tar b/lib/std/tar/testdata/pax-nul-path.tar similarity index 100% rename from test/cases/tar/pax-nul-path.tar rename to lib/std/tar/testdata/pax-nul-path.tar diff --git a/test/cases/tar/pax-nul-xattrs.tar b/lib/std/tar/testdata/pax-nul-xattrs.tar similarity index 100% rename from test/cases/tar/pax-nul-xattrs.tar rename to lib/std/tar/testdata/pax-nul-xattrs.tar diff --git a/test/cases/tar/pax-pos-size-file.tar b/lib/std/tar/testdata/pax-pos-size-file.tar similarity index 100% rename from test/cases/tar/pax-pos-size-file.tar rename to lib/std/tar/testdata/pax-pos-size-file.tar diff --git a/test/cases/tar/pax-records.tar b/lib/std/tar/testdata/pax-records.tar similarity index 100% rename from test/cases/tar/pax-records.tar rename to lib/std/tar/testdata/pax-records.tar diff --git a/test/cases/tar/pax.tar b/lib/std/tar/testdata/pax.tar similarity index 100% rename from test/cases/tar/pax.tar rename to lib/std/tar/testdata/pax.tar diff --git a/test/cases/tar/sparse-formats.tar b/lib/std/tar/testdata/sparse-formats.tar similarity index 100% rename from test/cases/tar/sparse-formats.tar rename to lib/std/tar/testdata/sparse-formats.tar diff --git a/test/cases/tar/star.tar b/lib/std/tar/testdata/star.tar similarity index 100% rename from test/cases/tar/star.tar rename to lib/std/tar/testdata/star.tar diff --git a/test/cases/tar/trailing-slash.tar b/lib/std/tar/testdata/trailing-slash.tar similarity index 100% rename from test/cases/tar/trailing-slash.tar rename to lib/std/tar/testdata/trailing-slash.tar diff --git a/test/cases/tar/ustar-file-devs.tar b/lib/std/tar/testdata/ustar-file-devs.tar similarity index 100% rename from test/cases/tar/ustar-file-devs.tar rename to lib/std/tar/testdata/ustar-file-devs.tar diff --git a/test/cases/tar/v7.tar b/lib/std/tar/testdata/v7.tar similarity index 100% rename from test/cases/tar/v7.tar rename to lib/std/tar/testdata/v7.tar diff --git a/test/cases/tar/writer-big-long.tar b/lib/std/tar/testdata/writer-big-long.tar similarity index 100% rename from test/cases/tar/writer-big-long.tar rename to lib/std/tar/testdata/writer-big-long.tar diff --git a/test/cases/tar/writer-big.tar b/lib/std/tar/testdata/writer-big.tar similarity index 100% rename from test/cases/tar/writer-big.tar rename to lib/std/tar/testdata/writer-big.tar diff --git a/test/cases/tar/xattrs.tar b/lib/std/tar/testdata/xattrs.tar similarity index 100% rename from test/cases/tar/xattrs.tar rename to lib/std/tar/testdata/xattrs.tar From 76fe1f53d5b9cfae100854afe495e8b378d2dc9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 12 Dec 2023 14:18:20 +0100 Subject: [PATCH 25/29] tar: fix tests on 32-bit platforms --- lib/std/tar.zig | 20 ++++++++++---------- lib/std/tar/test.zig | 5 ++++- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 4f6824de1fa4..376a6ad23027 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -250,7 +250,7 @@ fn TarReader(comptime ReaderType: type) type { pub const File = struct { name: []const u8, // name of file, symlink or directory link_name: []const u8, // target name of symlink - size: usize, // size of the file in bytes + size: u64, // size of the file in bytes mode: u32, kind: Header.Kind, @@ -260,7 +260,7 @@ fn TarReader(comptime ReaderType: type) type { pub fn write(self: File, writer: anytype) !void { var buffer: [4096]u8 = undefined; - var n: usize = 0; + var n: u64 = 0; while (n < self.size) { const buf = buffer[0..@min(buffer.len, self.size - n)]; try self.reader.readNoEof(buf); @@ -308,9 +308,9 @@ fn TarReader(comptime ReaderType: type) type { } // Number of padding bytes in the last file block. - inline fn blockPadding(size: usize) usize { - const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary - return block_rounded - size; + inline fn blockPadding(size: u64) usize { + const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary + return @intCast(block_rounded - size); } /// Iterates through the tar archive as if it is a series of files. @@ -324,7 +324,7 @@ fn TarReader(comptime ReaderType: type) type { while (try self.readHeader()) |header| { const kind = header.kind(); - const size: usize = @intCast(try header.size()); + const size: u64 = try header.size(); self.padding = blockPadding(size); switch (kind) { @@ -349,16 +349,16 @@ fn TarReader(comptime ReaderType: type) type { }, // Prefix header types .gnu_long_name => { - self.file.name = try self.readString(size, &self.file_name_buffer); + self.file.name = try self.readString(@intCast(size), &self.file_name_buffer); }, .gnu_long_link => { - self.file.link_name = try self.readString(size, &self.link_name_buffer); + self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer); }, .extended_header => { // Use just attributes from last extended header. self.initFile(); - var rdr = paxReader(self.reader, size); + var rdr = paxReader(self.reader, @intCast(size)); while (try rdr.next()) |attr| { switch (attr.kind) { .path => { @@ -369,7 +369,7 @@ fn TarReader(comptime ReaderType: type) type { }, .size => { var buf: [64]u8 = undefined; - self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10); + self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); }, } } diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index f6a9fa1cfdec..1265050dd2cd 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -1,12 +1,15 @@ const std = @import("../std.zig"); +const builtin = @import("builtin"); const tar = std.tar; const assert = std.debug.assert; test "tar run Go test cases" { + if (builtin.os.tag == .wasi) return error.SkipZigTest; + const Case = struct { const File = struct { name: []const u8, - size: usize = 0, + size: u64 = 0, mode: u32 = 0, link_name: []const u8 = &[0]u8{}, kind: tar.Header.Kind = .normal, From e21a12e56b21aee956132dd5f68bc2349ce37c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 12 Dec 2023 18:35:42 +0100 Subject: [PATCH 26/29] tar: use @embedFile in tests Like in other tests which uses testdata files (compress). That enables wasi testing also, was failing because file system operations in tests. --- lib/std/tar/test.zig | 72 ++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 1265050dd2cd..16f3b565b015 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -4,8 +4,6 @@ const tar = std.tar; const assert = std.debug.assert; test "tar run Go test cases" { - if (builtin.os.tag == .wasi) return error.SkipZigTest; - const Case = struct { const File = struct { name: []const u8, @@ -16,18 +14,15 @@ test "tar run Go test cases" { truncated: bool = false, // when there is no file body, just header, usefull for huge files }; - path: []const u8, // path to the tar archive file on dis + data: []const u8, // testdata file content files: []const File = &[_]@This().File{}, // expected files to found in archive - chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content + chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content err: ?anyerror = null, // parsing should fail with this error }; - const src_path = comptime std.fs.path.dirname(@src().file) orelse "."; - const test_dir = try std.fs.cwd().openDir(src_path ++ "/testdata", .{}); - const cases = [_]Case{ .{ - .path = "gnu.tar", + .data = @embedFile("testdata/gnu.tar"), .files = &[_]Case.File{ .{ .name = "small.txt", @@ -46,11 +41,11 @@ test "tar run Go test cases" { }, }, .{ - .path = "sparse-formats.tar", + .data = @embedFile("testdata/sparse-formats.tar"), .err = error.TarUnsupportedHeader, }, .{ - .path = "star.tar", + .data = @embedFile("testdata/star.tar"), .files = &[_]Case.File{ .{ .name = "small.txt", @@ -69,7 +64,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "v7.tar", + .data = @embedFile("testdata/v7.tar"), .files = &[_]Case.File{ .{ .name = "small.txt", @@ -88,7 +83,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "pax.tar", + .data = @embedFile("testdata/pax.tar"), .files = &[_]Case.File{ .{ .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", @@ -109,12 +104,12 @@ test "tar run Go test cases" { }, .{ // pax attribute don't end with \n - .path = "pax-bad-hdr-file.tar", + .data = @embedFile("testdata/pax-bad-hdr-file.tar"), .err = error.PaxInvalidAttributeEnd, }, .{ // size is in pax attribute - .path = "pax-pos-size-file.tar", + .data = @embedFile("testdata/pax-pos-size-file.tar"), .files = &[_]Case.File{ .{ .name = "foo", @@ -129,7 +124,7 @@ test "tar run Go test cases" { }, .{ // has pax records which we are not interested in - .path = "pax-records.tar", + .data = @embedFile("testdata/pax-records.tar"), .files = &[_]Case.File{ .{ .name = "file", @@ -138,7 +133,7 @@ test "tar run Go test cases" { }, .{ // has global records which we are ignoring - .path = "pax-global-records.tar", + .data = @embedFile("testdata/pax-global-records.tar"), .files = &[_]Case.File{ .{ .name = "file1", @@ -155,7 +150,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "nil-uid.tar", + .data = @embedFile("testdata/nil-uid.tar"), .files = &[_]Case.File{ .{ .name = "P1050238.JPG.log", @@ -170,7 +165,7 @@ test "tar run Go test cases" { }, .{ // has xattrs and pax records which we are ignoring - .path = "xattrs.tar", + .data = @embedFile("testdata/xattrs.tar"), .files = &[_]Case.File{ .{ .name = "small.txt", @@ -191,7 +186,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "gnu-multi-hdrs.tar", + .data = @embedFile("testdata/gnu-multi-hdrs.tar"), .files = &[_]Case.File{ .{ .name = "GNU2/GNU2/long-path-name", @@ -202,12 +197,12 @@ test "tar run Go test cases" { }, .{ // has gnu type D (directory) and S (sparse) blocks - .path = "gnu-incremental.tar", + .data = @embedFile("testdata/gnu-incremental.tar"), .err = error.TarUnsupportedHeader, }, .{ // should use values only from last pax header - .path = "pax-multi-hdrs.tar", + .data = @embedFile("testdata/pax-multi-hdrs.tar"), .files = &[_]Case.File{ .{ .name = "bar", @@ -217,7 +212,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "gnu-long-nul.tar", + .data = @embedFile("testdata/gnu-long-nul.tar"), .files = &[_]Case.File{ .{ .name = "0123456789", @@ -226,7 +221,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "gnu-utf8.tar", + .data = @embedFile("testdata/gnu-utf8.tar"), .files = &[_]Case.File{ .{ .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", @@ -235,7 +230,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "gnu-not-utf8.tar", + .data = @embedFile("testdata/gnu-not-utf8.tar"), .files = &[_]Case.File{ .{ .name = "hi\x80\x81\x82\x83bye", @@ -245,32 +240,32 @@ test "tar run Go test cases" { }, .{ // null in pax key - .path = "pax-nul-xattrs.tar", + .data = @embedFile("testdata/pax-nul-xattrs.tar"), .err = error.PaxNullInKeyword, }, .{ - .path = "pax-nul-path.tar", + .data = @embedFile("testdata/pax-nul-path.tar"), .err = error.PaxNullInValue, }, .{ - .path = "neg-size.tar", + .data = @embedFile("testdata/neg-size.tar"), .err = error.TarHeader, }, .{ - .path = "issue10968.tar", + .data = @embedFile("testdata/issue10968.tar"), .err = error.TarHeader, }, .{ - .path = "issue11169.tar", + .data = @embedFile("testdata/issue11169.tar"), .err = error.TarHeader, }, .{ - .path = "issue12435.tar", + .data = @embedFile("testdata/issue12435.tar"), .err = error.TarHeaderChksum, }, .{ // has magic with space at end instead of null - .path = "invalid-go17.tar", + .data = @embedFile("testdata/invalid-go17.tar"), .files = &[_]Case.File{ .{ .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", @@ -278,7 +273,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "ustar-file-devs.tar", + .data = @embedFile("testdata/ustar-file-devs.tar"), .files = &[_]Case.File{ .{ .name = "file", @@ -287,7 +282,7 @@ test "tar run Go test cases" { }, }, .{ - .path = "trailing-slash.tar", + .data = @embedFile("testdata/trailing-slash.tar"), .files = &[_]Case.File{ .{ .name = "123456789/" ** 30, @@ -297,7 +292,7 @@ test "tar run Go test cases" { }, .{ // Has size in gnu extended format. To represent size bigger than 8 GB. - .path = "writer-big.tar", + .data = @embedFile("testdata/writer-big.tar"), .files = &[_]Case.File{ .{ .name = "tmp/16gig.txt", @@ -309,7 +304,7 @@ test "tar run Go test cases" { }, .{ // Size in gnu extended format, and name in pax attribute. - .path = "writer-big-long.tar", + .data = @embedFile("testdata/writer-big-long.tar"), .files = &[_]Case.File{ .{ .name = "longname/" ** 15 ++ "16gig.txt", @@ -322,11 +317,8 @@ test "tar run Go test cases" { }; for (cases) |case| { - var fs_file = try test_dir.openFile(case.path, .{}); - - defer fs_file.close(); - - var iter = tar.tarReader(fs_file.reader(), null); + var fsb = std.io.fixedBufferStream(case.data); + var iter = tar.tarReader(fsb.reader(), null); var i: usize = 0; while (iter.next() catch |err| { if (case.err) |e| { From 7923a53996f8d24ad27823db3a45a6dd4a2bf317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 12 Dec 2023 18:50:25 +0100 Subject: [PATCH 27/29] tar: rename reader to iterator Itarator has `next` function, iterates over tar files. When using from outside of module with `tar.` prefix makes more sense. var iter = tar.iterator(reader, null); while (try iter.next()) |file| { ... } --- lib/std/tar.zig | 27 ++++++++++++++------------- lib/std/tar/test.zig | 21 ++++++++++----------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 376a6ad23027..dcd56dec077e 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -225,14 +225,16 @@ fn nullStr(str: []const u8) []const u8 { return str; } -pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) { +/// Iterates over files in tar archive. +/// `next` returns each file in `reader` tar archive. +pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) { return .{ .reader = reader, .diagnostics = diagnostics, }; } -fn TarReader(comptime ReaderType: type) type { +fn Iterator(comptime ReaderType: type) type { return struct { reader: ReaderType, diagnostics: ?*Options.Diagnostics, @@ -358,7 +360,7 @@ fn TarReader(comptime ReaderType: type) type { // Use just attributes from last extended header. self.initFile(); - var rdr = paxReader(self.reader, @intCast(size)); + var rdr = paxIterator(self.reader, @intCast(size)); while (try rdr.next()) |attr| { switch (attr.kind) { .path => { @@ -393,10 +395,10 @@ fn TarReader(comptime ReaderType: type) type { }; } -// Pax attributes reader. -// Size is length of pax extended header in reader. -fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) { - return PaxReader(@TypeOf(reader)){ +/// Pax attributes iterator. +/// Size is length of pax extended header in reader. +fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) { + return PaxIterator(@TypeOf(reader)){ .reader = reader, .size = size, }; @@ -408,7 +410,7 @@ const PaxAttributeKind = enum { size, }; -fn PaxReader(comptime ReaderType: type) type { +fn PaxIterator(comptime ReaderType: type) type { return struct { size: usize, // cumulative size of all pax attributes reader: ReaderType, @@ -508,8 +510,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi }, } - var iter = tarReader(reader, options.diagnostics); - + var iter = iterator(reader, options.diagnostics); while (try iter.next()) |file| { switch (file.kind) { .directory => { @@ -601,7 +602,7 @@ test "tar stripComponents" { try expectEqualStrings("c", try stripComponents("a/b/c", 2)); } -test "tar PaxReader" { +test "tar PaxIterator" { const Attr = struct { kind: PaxAttributeKind, value: []const u8 = undefined, @@ -699,10 +700,10 @@ test "tar PaxReader" { outer: for (cases) |case| { var stream = std.io.fixedBufferStream(case.data); - var rdr = paxReader(stream.reader(), case.data.len); + var iter = paxIterator(stream.reader(), case.data.len); var i: usize = 0; - while (rdr.next() catch |err| { + while (iter.next() catch |err| { if (case.err) |e| { try std.testing.expectEqual(e, err); continue; diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 16f3b565b015..82c73e25466d 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -1,7 +1,6 @@ const std = @import("../std.zig"); -const builtin = @import("builtin"); const tar = std.tar; -const assert = std.debug.assert; +const testing = std.testing; test "tar run Go test cases" { const Case = struct { @@ -318,33 +317,33 @@ test "tar run Go test cases" { for (cases) |case| { var fsb = std.io.fixedBufferStream(case.data); - var iter = tar.tarReader(fsb.reader(), null); + var iter = tar.iterator(fsb.reader(), null); var i: usize = 0; while (iter.next() catch |err| { if (case.err) |e| { - try std.testing.expectEqual(e, err); + try testing.expectEqual(e, err); continue; } else { return err; } }) |actual| : (i += 1) { const expected = case.files[i]; - try std.testing.expectEqualStrings(expected.name, actual.name); - try std.testing.expectEqual(expected.size, actual.size); - try std.testing.expectEqual(expected.kind, actual.kind); - try std.testing.expectEqual(expected.mode, actual.mode); - try std.testing.expectEqualStrings(expected.link_name, actual.link_name); + try testing.expectEqualStrings(expected.name, actual.name); + try testing.expectEqual(expected.size, actual.size); + try testing.expectEqual(expected.kind, actual.kind); + try testing.expectEqual(expected.mode, actual.mode); + try testing.expectEqualStrings(expected.link_name, actual.link_name); if (case.chksums.len > i) { var md5writer = Md5Writer{}; try actual.write(&md5writer); const chksum = md5writer.chksum(); - try std.testing.expectEqualStrings(case.chksums[i], &chksum); + try testing.expectEqualStrings(case.chksums[i], &chksum); } else { if (!expected.truncated) try actual.skip(); // skip file content } } - try std.testing.expectEqual(case.files.len, i); + try testing.expectEqual(case.files.len, i); } } From 7d3a31872eda55438259b54818baaa90b6ecd74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 18 Dec 2023 21:39:07 +0100 Subject: [PATCH 28/29] tar: improve diagnostic reporting Using Python testtar file (mentioned in #14310) to test diagnostic reporting. Added computing checksum by using both unsigned and signed header bytes values. Added skipping gnu exteneded sparse headers while reporting unsupported header in diagnostic. Note on testing: wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O /tmp/testtar.tar ``` test "Python testtar.tar file" { const file_name = "testtar.tar"; var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{}); defer file.close(); var diag = Options.Diagnostics{ .allocator = std.testing.allocator }; defer diag.deinit(); var iter = iterator(file.reader(), &diag); while (try iter.next()) |f| { std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size }); try f.skip(); } for (diag.errors.items) |e| { switch (e) { .unsupported_file_type => |u| { std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name }); }, else => unreachable, } } } ``` --- lib/std/tar.zig | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index dcd56dec077e..3bdfd93f055b 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -105,6 +105,8 @@ pub const Header = struct { // used to store the path or link name for the next file. gnu_long_name = 'L', gnu_long_link = 'K', + gnu_sparse = 'S', + solaris_extended_header = 'X', _, }; @@ -194,16 +196,21 @@ pub const Header = struct { return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader; } + const Chksums = struct { + unsigned: u64, + signed: i64, + }; + // Sum of all bytes in the header block. The chksum field is treated as if // it were filled with spaces (ASCII 32). - fn computeChksum(header: Header) u64 { - var sum: u64 = 0; - for (header.bytes, 0..) |b, i| { - if (148 <= i and i < 156) continue; // skip chksum field bytes - sum += b; + fn computeChksum(header: Header) Chksums { + var cs: Chksums = .{ .signed = 0, .unsigned = 0 }; + for (header.bytes, 0..) |v, i| { + const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces. + cs.unsigned += b; + cs.signed += @as(i8, @bitCast(b)); } - // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces. - return if (sum > 0) sum + 256 else 0; + return cs; } // Checks calculated chksum with value of chksum field. @@ -211,8 +218,9 @@ pub const Header = struct { // Zero value indicates empty block. pub fn checkChksum(header: Header) !u64 { const field = try header.chksum(); - const computed = header.computeChksum(); - if (field != computed) return error.TarHeaderChksum; + const cs = header.computeChksum(); + if (field == 0 and cs.unsigned == 256) return 0; + if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum; return field; } }; @@ -387,11 +395,25 @@ fn Iterator(comptime ReaderType: type) type { .file_name = try d.allocator.dupe(u8, header.name()), .file_type = kind, } }); + if (kind == .gnu_sparse) { + try self.skipGnuSparseExtendedHeaders(header); + } + self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; }, } } return null; } + + fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void { + var is_extended = header.bytes[482] > 0; + while (is_extended) { + var buf: [Header.SIZE]u8 = undefined; + const n = try self.reader.readAll(&buf); + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + is_extended = buf[504] > 0; + } + } }; } From 3f809cbe7ded23a236f98eb1809fc7cda65021e1 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 12 Jan 2024 17:51:44 -0700 Subject: [PATCH 29/29] build.zig: don't install testdata --- build.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.zig b/build.zig index 504eb9a386e9..1d44c249cc51 100644 --- a/build.zig +++ b/build.zig @@ -165,6 +165,8 @@ pub fn build(b: *std.Build) !void { ".xz", // exclude files from lib/std/tz/ ".tzif", + // exclude files from lib/std/tar/testdata + ".tar", // others "README.md", },