From ff8544daa59fa27b1e5c90e5ccdb6c513d719d47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 27 Nov 2023 14:51:51 +0100
Subject: [PATCH 01/29] tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.
---
 lib/std/tar.zig | 242 ++++++++++++++++++++++++++++++------------------
 1 file changed, 153 insertions(+), 89 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index c39cc6e4323e..b41f0d8683c1 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -174,6 +174,144 @@ const Buffer = struct {
     }
 };
 
+fn Iterator(comptime ReaderType: type) type {
+    return struct {
+        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+        file_name_len: usize = 0,
+        buffer: Buffer = .{},
+        reader: ReaderType,
+        pad_len: usize = 0,
+        diagnostics: ?*Options.Diagnostics,
+
+        const Self = @This();
+
+        const File = struct {
+            file_name: []const u8,
+            link_name: []const u8,
+            size: usize,
+            file_type: Header.FileType,
+            iter: *Self,
+
+            pub fn write(self: File, writer: anytype) !void {
+                const rounded_file_size = std.mem.alignForward(u64, self.size, 512);
+                var file_off: usize = 0;
+                while (true) {
+                    const temp = try self.iter.buffer.readChunk(self.iter.reader, @intCast(rounded_file_size + 512 - file_off));
+                    if (temp.len == 0) return error.UnexpectedEndOfStream;
+                    const slice = temp[0..@intCast(@min(self.size - file_off, temp.len))];
+                    try writer.writeAll(slice);
+
+                    file_off += slice.len;
+                    self.iter.buffer.advance(slice.len);
+                    if (file_off >= self.size) {
+                        return;
+                        //     self.iter.buffer.advance(pad_len);
+                        //     continue :header;
+                    }
+                }
+            }
+
+            pub fn skip(self: File) void {
+                _ = self;
+                unreachable;
+            }
+        };
+
+        pub fn next(self: *Self) !?File {
+            self.buffer.advance(self.pad_len);
+            self.pad_len = 0;
+            self.file_name_len = 0;
+
+            while (true) {
+                const chunk = try self.buffer.readChunk(self.reader, 1024);
+                switch (chunk.len) {
+                    0 => return null,
+                    1...511 => return error.UnexpectedEndOfStream,
+                    else => {},
+                }
+                self.buffer.advance(512);
+
+                const header: Header = .{ .bytes = chunk[0..512] };
+                const file_size = try header.fileSize();
+                const file_type = header.fileType();
+                const link_name = header.linkName();
+                const rounded_file_size = std.mem.alignForward(u64, file_size, 512);
+                self.pad_len = @intCast(rounded_file_size - file_size);
+                const file_name = if (self.file_name_len == 0)
+                    try header.fullFileName(&self.file_name_buffer)
+                else
+                    self.file_name_buffer[0..self.file_name_len];
+
+                switch (file_type) {
+                    .directory, .normal, .symbolic_link => {
+                        return File{
+                            .file_name = file_name,
+                            .link_name = link_name,
+                            .size = file_size,
+                            .file_type = file_type,
+                            .iter = self,
+                        };
+                    },
+                    .global_extended_header => {
+                        self.buffer.skip(self.reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig;
+                    },
+                    .extended_header => {
+                        if (file_size == 0) {
+                            self.buffer.advance(@intCast(rounded_file_size));
+                            continue;
+                        }
+
+                        const chunk_size: usize = @intCast(rounded_file_size + 512);
+                        var data_off: usize = 0;
+                        const file_name_override_len = while (data_off < file_size) {
+                            const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off);
+                            if (slice.len == 0) return error.UnexpectedEndOfStream;
+                            const remaining_size: usize = @intCast(file_size - data_off);
+                            const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size);
+
+                            if (std.mem.eql(u8, attr_info.key, "path")) {
+                                if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong;
+                                self.buffer.advance(attr_info.value_off);
+                                data_off += attr_info.value_off;
+                                break attr_info.value_len;
+                            }
+
+                            try self.buffer.skip(self.reader, attr_info.size);
+                            data_off += attr_info.size;
+                        } else 0;
+
+                        var i: usize = 0;
+                        while (i < file_name_override_len) {
+                            const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off - i);
+                            if (slice.len == 0) return error.UnexpectedEndOfStream;
+                            const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len));
+                            @memcpy(self.file_name_buffer[i .. i + copy_size], slice[0..copy_size]);
+                            self.buffer.advance(copy_size);
+                            i += copy_size;
+                        }
+
+                        try self.buffer.skip(self.reader, @intCast(rounded_file_size - data_off - file_name_override_len));
+                        self.file_name_len = file_name_override_len;
+                        continue;
+                    },
+                    .hard_link => return error.TarUnsupportedFileType,
+                    else => {
+                        const d = self.diagnostics orelse return error.TarUnsupportedFileType;
+                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
+                            .file_name = try d.allocator.dupe(u8, file_name),
+                            .file_type = file_type,
+                        } });
+                    },
+                }
+            }
+        }
+    };
+}
+
+pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
+    return .{ .reader = reader, .diagnostics = diagnostics };
+}
+
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
     switch (options.mode_mode) {
         .ignore => {},
@@ -186,37 +324,20 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
             @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
         },
     }
-    var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
-    var file_name_override_len: usize = 0;
-    var buffer: Buffer = .{};
-    header: while (true) {
-        const chunk = try buffer.readChunk(reader, 1024);
-        switch (chunk.len) {
-            0 => return,
-            1...511 => return error.UnexpectedEndOfStream,
-            else => {},
-        }
-        buffer.advance(512);
-
-        const header: Header = .{ .bytes = chunk[0..512] };
-        const file_size = try header.fileSize();
-        const rounded_file_size = std.mem.alignForward(u64, file_size, 512);
-        const pad_len: usize = @intCast(rounded_file_size - file_size);
-        const unstripped_file_name = if (file_name_override_len > 0)
-            file_name_buffer[0..file_name_override_len]
-        else
-            try header.fullFileName(&file_name_buffer);
-        file_name_override_len = 0;
-        switch (header.fileType()) {
+
+    var iter = iterator(reader, options.diagnostics);
+
+    while (try iter.next()) |iter_file| {
+        switch (iter_file.file_type) {
             .directory => {
-                const file_name = try stripComponents(unstripped_file_name, options.strip_components);
+                const file_name = try stripComponents(iter_file.file_name, options.strip_components);
                 if (file_name.len != 0 and !options.exclude_empty_directories) {
                     try dir.makePath(file_name);
                 }
             },
             .normal => {
-                if (file_size == 0 and unstripped_file_name.len == 0) return;
-                const file_name = try stripComponents(unstripped_file_name, options.strip_components);
+                if (iter_file.size == 0 and iter_file.file_name.len == 0) return;
+                const file_name = try stripComponents(iter_file.file_name, options.strip_components);
 
                 const file = dir.createFile(file_name, .{}) catch |err| switch (err) {
                     error.FileNotFound => again: {
@@ -240,68 +361,17 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
                 };
                 defer if (file) |f| f.close();
 
-                var file_off: usize = 0;
-                while (true) {
-                    const temp = try buffer.readChunk(reader, @intCast(rounded_file_size + 512 - file_off));
-                    if (temp.len == 0) return error.UnexpectedEndOfStream;
-                    const slice = temp[0..@intCast(@min(file_size - file_off, temp.len))];
-                    if (file) |f| try f.writeAll(slice);
-
-                    file_off += slice.len;
-                    buffer.advance(slice.len);
-                    if (file_off >= file_size) {
-                        buffer.advance(pad_len);
-                        continue :header;
-                    }
+                if (file) |f| {
+                    try iter_file.write(f);
+                } else {
+                    iter_file.skip();
                 }
             },
-            .extended_header => {
-                if (file_size == 0) {
-                    buffer.advance(@intCast(rounded_file_size));
-                    continue;
-                }
-
-                const chunk_size: usize = @intCast(rounded_file_size + 512);
-                var data_off: usize = 0;
-                file_name_override_len = while (data_off < file_size) {
-                    const slice = try buffer.readChunk(reader, chunk_size - data_off);
-                    if (slice.len == 0) return error.UnexpectedEndOfStream;
-                    const remaining_size: usize = @intCast(file_size - data_off);
-                    const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size);
-
-                    if (std.mem.eql(u8, attr_info.key, "path")) {
-                        if (attr_info.value_len > file_name_buffer.len) return error.NameTooLong;
-                        buffer.advance(attr_info.value_off);
-                        data_off += attr_info.value_off;
-                        break attr_info.value_len;
-                    }
-
-                    try buffer.skip(reader, attr_info.size);
-                    data_off += attr_info.size;
-                } else 0;
-
-                var i: usize = 0;
-                while (i < file_name_override_len) {
-                    const slice = try buffer.readChunk(reader, chunk_size - data_off - i);
-                    if (slice.len == 0) return error.UnexpectedEndOfStream;
-                    const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len));
-                    @memcpy(file_name_buffer[i .. i + copy_size], slice[0..copy_size]);
-                    buffer.advance(copy_size);
-                    i += copy_size;
-                }
-
-                try buffer.skip(reader, @intCast(rounded_file_size - data_off - file_name_override_len));
-                continue :header;
-            },
-            .global_extended_header => {
-                buffer.skip(reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig;
-            },
-            .hard_link => return error.TarUnsupportedFileType,
             .symbolic_link => {
                 // The file system path of the symbolic link.
-                const file_name = try stripComponents(unstripped_file_name, options.strip_components);
+                const file_name = try stripComponents(iter_file.file_name, options.strip_components);
                 // The data inside the symbolic link.
-                const link_name = header.linkName();
+                const link_name = iter_file.link_name;
 
                 dir.symLink(link_name, file_name, .{}) catch |err| again: {
                     const code = code: {
@@ -323,13 +393,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
                     } });
                 };
             },
-            else => |file_type| {
-                const d = options.diagnostics orelse return error.TarUnsupportedFileType;
-                try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-                    .file_name = try d.allocator.dupe(u8, unstripped_file_name),
-                    .file_type = file_type,
-                } });
-            },
+            else => unreachable,
         }
     }
 }

From 4381241237fc6ff18ee889571774d929700ce7a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 27 Nov 2023 17:17:28 +0100
Subject: [PATCH 02/29] tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.
---
 lib/std/tar.zig | 174 ++++++++++++++++++++++++++----------------------
 1 file changed, 94 insertions(+), 80 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index b41f0d8683c1..9fa51bdc81da 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -136,51 +136,90 @@ pub const Header = struct {
     }
 };
 
-const Buffer = struct {
-    buffer: [512 * 8]u8 = undefined,
-    start: usize = 0,
-    end: usize = 0,
+fn BufferedReader(comptime ReaderType: type) type {
+    return struct {
+        unbuffered_reader: ReaderType,
+        buffer: [512 * 8]u8 = undefined,
+        start: usize = 0,
+        end: usize = 0,
 
-    pub fn readChunk(b: *Buffer, reader: anytype, count: usize) ![]const u8 {
-        b.ensureCapacity(1024);
+        const Self = @This();
 
-        const ask = @min(b.buffer.len - b.end, count -| (b.end - b.start));
-        b.end += try reader.readAtLeast(b.buffer[b.end..], ask);
+        pub fn readChunk(self: *Self, count: usize) ![]const u8 {
+            self.ensureCapacity(1024);
 
-        return b.buffer[b.start..b.end];
-    }
+            const ask = @min(self.buffer.len - self.end, count -| (self.end - self.start));
+            self.end += try self.unbuffered_reader.readAtLeast(self.buffer[self.end..], ask);
 
-    pub fn advance(b: *Buffer, count: usize) void {
-        b.start += count;
-        assert(b.start <= b.end);
-    }
+            return self.buffer[self.start..self.end];
+        }
 
-    pub fn skip(b: *Buffer, reader: anytype, count: usize) !void {
-        if (b.start + count > b.end) {
-            try reader.skipBytes(b.start + count - b.end, .{});
-            b.start = b.end;
-        } else {
-            b.advance(count);
+        pub fn advance(self: *Self, count: usize) void {
+            self.start += count;
+            assert(self.start <= self.end);
+        }
+
+        pub fn skip(self: *Self, count: usize) !void {
+            if (self.start + count > self.end) {
+                try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{});
+                self.start = self.end;
+            } else {
+                self.advance(count);
+            }
         }
-    }
 
-    inline fn ensureCapacity(b: *Buffer, count: usize) void {
-        if (b.buffer.len - b.start < count) {
-            const dest_end = b.end - b.start;
-            @memcpy(b.buffer[0..dest_end], b.buffer[b.start..b.end]);
-            b.end = dest_end;
-            b.start = 0;
+        inline fn ensureCapacity(self: *Self, count: usize) void {
+            if (self.buffer.len - self.start < count) {
+                const dest_end = self.end - self.start;
+                @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]);
+                self.end = dest_end;
+                self.start = 0;
+            }
         }
-    }
-};
+
+        pub fn write(self: *Self, writer: anytype, size: usize) !void {
+            const rounded_file_size = std.mem.alignForward(usize, size, 512);
+            const chunk_size = rounded_file_size + 512;
+            const pad_len: usize = rounded_file_size - size;
+
+            var file_off: usize = 0;
+            while (true) {
+                const temp = try self.readChunk(chunk_size - file_off);
+                if (temp.len == 0) return error.UnexpectedEndOfStream;
+                const slice = temp[0..@min(size - file_off, temp.len)];
+                try writer.writeAll(slice);
+
+                file_off += slice.len;
+                self.advance(slice.len);
+                if (file_off >= size) {
+                    self.advance(pad_len);
+                    return;
+                }
+            }
+        }
+
+        pub fn copy(self: *Self, dst_buffer: []u8, size: usize) !void {
+            const rounded_file_size = std.mem.alignForward(usize, size, 512);
+            const chunk_size = rounded_file_size + 512;
+
+            var i: usize = 0;
+            while (i < size) {
+                const slice = try self.readChunk(chunk_size - i);
+                if (slice.len == 0) return error.UnexpectedEndOfStream;
+                const copy_size: usize = @min(size - i, slice.len);
+                @memcpy(dst_buffer[i .. i + copy_size], slice[0..copy_size]);
+                self.advance(copy_size);
+                i += copy_size;
+            }
+        }
+    };
+}
 
 fn Iterator(comptime ReaderType: type) type {
     return struct {
         file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
         file_name_len: usize = 0,
-        buffer: Buffer = .{},
-        reader: ReaderType,
-        pad_len: usize = 0,
+        reader: BufferedReader(ReaderType),
         diagnostics: ?*Options.Diagnostics,
 
         const Self = @This();
@@ -193,50 +232,32 @@ fn Iterator(comptime ReaderType: type) type {
             iter: *Self,
 
             pub fn write(self: File, writer: anytype) !void {
-                const rounded_file_size = std.mem.alignForward(u64, self.size, 512);
-                var file_off: usize = 0;
-                while (true) {
-                    const temp = try self.iter.buffer.readChunk(self.iter.reader, @intCast(rounded_file_size + 512 - file_off));
-                    if (temp.len == 0) return error.UnexpectedEndOfStream;
-                    const slice = temp[0..@intCast(@min(self.size - file_off, temp.len))];
-                    try writer.writeAll(slice);
-
-                    file_off += slice.len;
-                    self.iter.buffer.advance(slice.len);
-                    if (file_off >= self.size) {
-                        return;
-                        //     self.iter.buffer.advance(pad_len);
-                        //     continue :header;
-                    }
-                }
+                try self.iter.reader.write(writer, self.size);
             }
 
-            pub fn skip(self: File) void {
-                _ = self;
-                unreachable;
+            pub fn skip(self: File) !void {
+                const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
+                try self.iter.reader.skip(rounded_file_size);
             }
         };
 
         pub fn next(self: *Self) !?File {
-            self.buffer.advance(self.pad_len);
-            self.pad_len = 0;
             self.file_name_len = 0;
-
             while (true) {
-                const chunk = try self.buffer.readChunk(self.reader, 1024);
+                const chunk = try self.reader.readChunk(1024);
                 switch (chunk.len) {
                     0 => return null,
                     1...511 => return error.UnexpectedEndOfStream,
                     else => {},
                 }
-                self.buffer.advance(512);
+                self.reader.advance(512);
 
                 const header: Header = .{ .bytes = chunk[0..512] };
                 const file_size = try header.fileSize();
                 const file_type = header.fileType();
                 const link_name = header.linkName();
-                const rounded_file_size = std.mem.alignForward(u64, file_size, 512);
-                self.pad_len = @intCast(rounded_file_size - file_size);
+                const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512);
+
                 const file_name = if (self.file_name_len == 0)
                     try header.fullFileName(&self.file_name_buffer)
                 else
@@ -253,44 +274,33 @@ fn Iterator(comptime ReaderType: type) type {
                         };
                     },
                     .global_extended_header => {
-                        self.buffer.skip(self.reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig;
+                        self.reader.skip(rounded_file_size) catch return error.TarHeadersTooBig;
                     },
                     .extended_header => {
-                        if (file_size == 0) {
-                            self.buffer.advance(@intCast(rounded_file_size));
-                            continue;
-                        }
+                        if (file_size == 0) continue;
 
-                        const chunk_size: usize = @intCast(rounded_file_size + 512);
+                        const chunk_size: usize = rounded_file_size + 512;
                         var data_off: usize = 0;
                         const file_name_override_len = while (data_off < file_size) {
-                            const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off);
+                            const slice = try self.reader.readChunk(chunk_size - data_off);
                             if (slice.len == 0) return error.UnexpectedEndOfStream;
-                            const remaining_size: usize = @intCast(file_size - data_off);
+                            const remaining_size: usize = file_size - data_off;
                             const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size);
 
                             if (std.mem.eql(u8, attr_info.key, "path")) {
                                 if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong;
-                                self.buffer.advance(attr_info.value_off);
+                                self.reader.advance(attr_info.value_off);
                                 data_off += attr_info.value_off;
                                 break attr_info.value_len;
                             }
 
-                            try self.buffer.skip(self.reader, attr_info.size);
+                            try self.reader.skip(attr_info.size);
                             data_off += attr_info.size;
                         } else 0;
 
-                        var i: usize = 0;
-                        while (i < file_name_override_len) {
-                            const slice = try self.buffer.readChunk(self.reader, chunk_size - data_off - i);
-                            if (slice.len == 0) return error.UnexpectedEndOfStream;
-                            const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len));
-                            @memcpy(self.file_name_buffer[i .. i + copy_size], slice[0..copy_size]);
-                            self.buffer.advance(copy_size);
-                            i += copy_size;
-                        }
+                        try self.reader.copy(&self.file_name_buffer, file_name_override_len);
 
-                        try self.buffer.skip(self.reader, @intCast(rounded_file_size - data_off - file_name_override_len));
+                        try self.reader.skip(rounded_file_size - data_off - file_name_override_len);
                         self.file_name_len = file_name_override_len;
                         continue;
                     },
@@ -309,7 +319,11 @@ fn Iterator(comptime ReaderType: type) type {
 }
 
 pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
-    return .{ .reader = reader, .diagnostics = diagnostics };
+    const ReaderType = @TypeOf(reader);
+    return .{
+        .reader = BufferedReader(ReaderType){ .unbuffered_reader = reader },
+        .diagnostics = diagnostics,
+    };
 }
 
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
@@ -364,7 +378,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
                 if (file) |f| {
                     try iter_file.write(f);
                 } else {
-                    iter_file.skip();
+                    try iter_file.skip();
                 }
             },
             .symbolic_link => {

From 18170633754afb68c5831bfe4534c64af93ba55b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 27 Nov 2023 21:37:30 +0100
Subject: [PATCH 03/29] tar: add initial test cases

Just adding tests, without changing functionality.
---
 lib/std/tar.zig | 334 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 319 insertions(+), 15 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 9fa51bdc81da..48f6f84dfb14 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -134,6 +134,13 @@ pub const Header = struct {
         }
         return header.bytes[start..i];
     }
+
+    pub fn isZeroBlock(header: Header) bool {
+        for (header.bytes) |b| {
+            if (b != 0) return false;
+        }
+        return true;
+    }
 };
 
 fn BufferedReader(comptime ReaderType: type) type {
@@ -225,7 +232,7 @@ fn Iterator(comptime ReaderType: type) type {
         const Self = @This();
 
         const File = struct {
-            file_name: []const u8,
+            name: []const u8,
             link_name: []const u8,
             size: usize,
             file_type: Header.FileType,
@@ -239,6 +246,31 @@ fn Iterator(comptime ReaderType: type) type {
                 const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
                 try self.iter.reader.skip(rounded_file_size);
             }
+
+            fn chksum(self: File) ![16]u8 {
+                var cs = [_]u8{0} ** 16;
+                if (self.size == 0) return cs;
+
+                var buffer: [512]u8 = undefined;
+                var h = std.crypto.hash.Md5.init(.{});
+
+                var remaining_bytes: usize = self.size;
+                while (remaining_bytes > 0) {
+                    const copy_size = @min(buffer.len, remaining_bytes);
+                    try self.iter.reader.copy(&buffer, copy_size);
+                    h.update(buffer[0..copy_size]);
+                    remaining_bytes -= copy_size;
+                }
+                h.final(&cs);
+                try self.skipPadding();
+                return cs;
+            }
+
+            fn skipPadding(self: File) !void {
+                const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
+                const pad_len: usize = rounded_file_size - self.size;
+                self.iter.reader.advance(pad_len);
+            }
         };
 
         pub fn next(self: *Self) !?File {
@@ -253,6 +285,7 @@ fn Iterator(comptime ReaderType: type) type {
                 self.reader.advance(512);
 
                 const header: Header = .{ .bytes = chunk[0..512] };
+                if (header.isZeroBlock()) return null;
                 const file_size = try header.fileSize();
                 const file_type = header.fileType();
                 const link_name = header.linkName();
@@ -266,10 +299,10 @@ fn Iterator(comptime ReaderType: type) type {
                 switch (file_type) {
                     .directory, .normal, .symbolic_link => {
                         return File{
-                            .file_name = file_name,
-                            .link_name = link_name,
+                            .name = file_name,
                             .size = file_size,
                             .file_type = file_type,
+                            .link_name = link_name,
                             .iter = self,
                         };
                     },
@@ -341,19 +374,19 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
 
     var iter = iterator(reader, options.diagnostics);
 
-    while (try iter.next()) |iter_file| {
-        switch (iter_file.file_type) {
+    while (try iter.next()) |file| {
+        switch (file.file_type) {
             .directory => {
-                const file_name = try stripComponents(iter_file.file_name, options.strip_components);
+                const file_name = try stripComponents(file.name, options.strip_components);
                 if (file_name.len != 0 and !options.exclude_empty_directories) {
                     try dir.makePath(file_name);
                 }
             },
             .normal => {
-                if (iter_file.size == 0 and iter_file.file_name.len == 0) return;
-                const file_name = try stripComponents(iter_file.file_name, options.strip_components);
+                if (file.size == 0 and file.name.len == 0) return;
+                const file_name = try stripComponents(file.name, options.strip_components);
 
-                const file = dir.createFile(file_name, .{}) catch |err| switch (err) {
+                const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) {
                     error.FileNotFound => again: {
                         const code = code: {
                             if (std.fs.path.dirname(file_name)) |dir_name| {
@@ -373,19 +406,19 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
                     },
                     else => |e| return e,
                 };
-                defer if (file) |f| f.close();
+                defer if (fs_file) |f| f.close();
 
-                if (file) |f| {
-                    try iter_file.write(f);
+                if (fs_file) |f| {
+                    try file.write(f);
                 } else {
-                    try iter_file.skip();
+                    try file.skip();
                 }
             },
             .symbolic_link => {
                 // The file system path of the symbolic link.
-                const file_name = try stripComponents(iter_file.file_name, options.strip_components);
+                const file_name = try stripComponents(file.name, options.strip_components);
                 // The data inside the symbolic link.
-                const link_name = iter_file.link_name;
+                const link_name = file.link_name;
 
                 dir.symLink(link_name, file_name, .{}) catch |err| again: {
                     const code = code: {
@@ -473,3 +506,274 @@ test parsePaxAttribute {
 
 const std = @import("std.zig");
 const assert = std.debug.assert;
+
+const TestCase = struct {
+    const File = struct {
+        const empty_string = &[0]u8{};
+
+        name: []const u8,
+        size: usize = 0,
+        link_name: []const u8 = empty_string,
+        file_type: Header.FileType = .normal,
+    };
+
+    path: []const u8,
+    files: []const File = &[_]TestCase.File{},
+    chksums: []const []const u8 = &[_][]const u8{},
+    err: ?anyerror = null,
+};
+
+test "Go test cases" {
+    const test_dir = try std.fs.openDirAbsolute("/usr/local/go/src/archive/tar/testdata", .{});
+    const cases = [_]TestCase{
+        .{
+            .path = "gnu.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .file_type = .normal,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .file_type = .normal,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "sparse-formats.tar",
+            .err = error.TarUnsupportedFileType,
+        },
+        .{
+            .path = "star.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .file_type = .normal,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .file_type = .normal,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "v7.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .file_type = .normal,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .file_type = .normal,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "pax.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+                    .size = 7,
+                    .file_type = .normal,
+                },
+                .{
+                    .name = "a/b",
+                    .size = 0,
+                    .file_type = .symbolic_link,
+                    .link_name = "1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545",
+                    // TODO fix reading link name from pax header
+                    // .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+                },
+            },
+            .chksums = &[_][]const u8{
+                "3c382e8f5b6631aa2db52643912ffd4a",
+            },
+        },
+        // TODO: this should fail
+        // .{
+        //     .path = "pax-bad-hdr-file.tar",
+        //     .err = error.TarBadHeader,
+        // },
+        // .{
+        //     .path = "pax-bad-mtime-file.tar",
+        //     .err = error.TarBadHeader,
+        // },
+        //
+        // TODO: giving wrong result because we are not reading pax size header
+        // .{
+        //     .path = "pax-pos-size-file.tar",
+        //     .files = &[_]TestCase.File{
+        //         .{
+        //             .name = "foo",
+        //             .size = 999,
+        //             .file_type = .normal,
+        //         },
+        //     },
+        //     .chksums = &[_][]const u8{
+        //         "0afb597b283fe61b5d4879669a350556",
+        //     },
+        // },
+        .{
+            // has pax records which we are not interested in
+            .path = "pax-records.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "file",
+                },
+            },
+        },
+        .{
+            // has global records which we are ignoring
+            .path = "pax-global-records.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "file1",
+                },
+                .{
+                    .name = "file2",
+                },
+                .{
+                    .name = "file3",
+                },
+                .{
+                    .name = "file4",
+                },
+            },
+        },
+        .{
+            .path = "nil-uid.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "P1050238.JPG.log",
+                    .size = 14,
+                    .file_type = .normal,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "08d504674115e77a67244beac19668f5",
+            },
+        },
+        .{
+            // has xattrs and pax records which we are ignoring
+            .path = "xattrs.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .file_type = .normal,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .file_type = .normal,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "gnu-multi-hdrs.tar",
+            .err = error.TarUnsupportedFileType,
+        },
+        .{
+            .path = "gnu-incremental.tar",
+            .err = error.TarUnsupportedFileType,
+        },
+        // .{
+        //     .path = "pax-multi-hdrs.tar",
+        // },
+        // .{
+        //     .path = "gnu-long-nul.tar",
+        //     .files = &[_]TestCase.File{
+        //         .{
+        //             .name = "012233456789",
+        //         },
+        //     },
+        // },
+        // .{
+        //     .path = "gnu-utf8.tar",
+        //     .files = &[_]TestCase.File{
+        //         .{
+        //             .name = "012233456789",
+        //         },
+        //     },
+        // },
+        //
+        .{
+            .path = "gnu-not-utf8.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "hi\x80\x81\x82\x83bye",
+                },
+            },
+        },
+        // TODO some files with errors:
+        // pax-nul-xattrs.tar, pax-nul-path.tar, neg-size.tar, issue10968.tar, issue11169.tar, issue12435.tar
+        .{
+            .path = "trailing-slash.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "123456789/" ** 30,
+                    .file_type = .directory,
+                },
+            },
+        },
+    };
+
+    for (cases) |case| {
+        // if (!std.mem.eql(u8, case.path, "pax.tar")) continue;
+
+        var fs_file = try test_dir.openFile(case.path, .{});
+        defer fs_file.close();
+
+        var iter = iterator(fs_file.reader(), null);
+        var i: usize = 0;
+        while (iter.next() catch |err| {
+            if (case.err) |e| {
+                try std.testing.expectEqual(e, err);
+                continue;
+            } else {
+                return err;
+            }
+        }) |actual| {
+            const expected = case.files[i];
+            try std.testing.expectEqualStrings(expected.name, actual.name);
+            try std.testing.expectEqual(expected.size, actual.size);
+            try std.testing.expectEqual(expected.file_type, actual.file_type);
+            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
+
+            if (case.chksums.len > i) {
+                var actual_chksum = try actual.chksum();
+                var hex_to_bytes_buffer: [16]u8 = undefined;
+                const expected_chksum = try std.fmt.hexToBytes(&hex_to_bytes_buffer, case.chksums[i]);
+                // std.debug.print("actual chksum: {s}\n", .{std.fmt.fmtSliceHexLower(&actual_chksum)});
+                try std.testing.expectEqualStrings(expected_chksum, &actual_chksum);
+            } else {
+                try actual.skip(); // skip file content
+            }
+            i += 1;
+        }
+        try std.testing.expectEqual(case.files.len, i);
+    }
+}

From be5d04ab7922d84b59dad06de3df378b94827d4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 27 Nov 2023 22:23:16 +0100
Subject: [PATCH 04/29] tar: add pax linkpath attribute parsing

Name of symbolic link can be also found in pax attribute.
---
 lib/std/tar.zig | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 48f6f84dfb14..9f2fa924406f 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -226,6 +226,9 @@ fn Iterator(comptime ReaderType: type) type {
     return struct {
         file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
         file_name_len: usize = 0,
+        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+        link_name_len: usize = 0,
+
         reader: BufferedReader(ReaderType),
         diagnostics: ?*Options.Diagnostics,
 
@@ -275,6 +278,8 @@ fn Iterator(comptime ReaderType: type) type {
 
         pub fn next(self: *Self) !?File {
             self.file_name_len = 0;
+            self.link_name_len = 0;
+
             while (true) {
                 const chunk = try self.reader.readChunk(1024);
                 switch (chunk.len) {
@@ -287,10 +292,12 @@ fn Iterator(comptime ReaderType: type) type {
                 const header: Header = .{ .bytes = chunk[0..512] };
                 if (header.isZeroBlock()) return null;
                 const file_size = try header.fileSize();
-                const file_type = header.fileType();
-                const link_name = header.linkName();
                 const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512);
-
+                const file_type = header.fileType();
+                const link_name = if (self.link_name_len == 0)
+                    header.linkName()
+                else
+                    self.link_name_buffer[0..self.link_name_len];
                 const file_name = if (self.file_name_len == 0)
                     try header.fullFileName(&self.file_name_buffer)
                 else
@@ -314,7 +321,7 @@ fn Iterator(comptime ReaderType: type) type {
 
                         const chunk_size: usize = rounded_file_size + 512;
                         var data_off: usize = 0;
-                        const file_name_override_len = while (data_off < file_size) {
+                        while (data_off < file_size) {
                             const slice = try self.reader.readChunk(chunk_size - data_off);
                             if (slice.len == 0) return error.UnexpectedEndOfStream;
                             const remaining_size: usize = file_size - data_off;
@@ -323,18 +330,22 @@ fn Iterator(comptime ReaderType: type) type {
                             if (std.mem.eql(u8, attr_info.key, "path")) {
                                 if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong;
                                 self.reader.advance(attr_info.value_off);
-                                data_off += attr_info.value_off;
-                                break attr_info.value_len;
+                                try self.reader.copy(&self.file_name_buffer, attr_info.value_len);
+                                self.file_name_len = attr_info.value_len;
+                                self.reader.advance(1);
+                            } else if (std.mem.eql(u8, attr_info.key, "linkpath")) {
+                                if (attr_info.value_len > self.link_name_buffer.len) return error.NameTooLong;
+                                self.reader.advance(attr_info.value_off);
+                                try self.reader.copy(&self.link_name_buffer, attr_info.value_len);
+                                self.link_name_len = attr_info.value_len;
+                                self.reader.advance(1);
+                            } else {
+                                try self.reader.skip(attr_info.size);
                             }
-
-                            try self.reader.skip(attr_info.size);
                             data_off += attr_info.size;
-                        } else 0;
-
-                        try self.reader.copy(&self.file_name_buffer, file_name_override_len);
+                        }
+                        try self.reader.skip(rounded_file_size - data_off);
 
-                        try self.reader.skip(rounded_file_size - data_off - file_name_override_len);
-                        self.file_name_len = file_name_override_len;
                         continue;
                     },
                     .hard_link => return error.TarUnsupportedFileType,
@@ -599,9 +610,7 @@ test "Go test cases" {
                     .name = "a/b",
                     .size = 0,
                     .file_type = .symbolic_link,
-                    .link_name = "1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545",
-                    // TODO fix reading link name from pax header
-                    // .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+                    .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                 },
             },
             .chksums = &[_][]const u8{

From 6d5283e83550998953f8784ba2b08a413a41baf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Tue, 28 Nov 2023 23:07:37 +0100
Subject: [PATCH 05/29] tar: refactor reader and iterator

Make it more readable.
---
 lib/std/tar.zig | 314 +++++++++++++++++++++++++++---------------------
 1 file changed, 180 insertions(+), 134 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 9f2fa924406f..9ea8f1965204 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -62,8 +62,10 @@ pub const Options = struct {
     };
 };
 
+const block_size = 512;
+
 pub const Header = struct {
-    bytes: *const [512]u8,
+    bytes: *const [block_size]u8,
 
     pub const FileType = enum(u8) {
         normal_alias = 0,
@@ -135,7 +137,7 @@ pub const Header = struct {
         return header.bytes[start..i];
     }
 
-    pub fn isZeroBlock(header: Header) bool {
+    pub fn isZero(header: Header) bool {
         for (header.bytes) |b| {
             if (b != 0) return false;
         }
@@ -146,7 +148,7 @@ pub const Header = struct {
 fn BufferedReader(comptime ReaderType: type) type {
     return struct {
         unbuffered_reader: ReaderType,
-        buffer: [512 * 8]u8 = undefined,
+        buffer: [block_size * 8]u8 = undefined,
         start: usize = 0,
         end: usize = 0,
 
@@ -161,6 +163,17 @@ fn BufferedReader(comptime ReaderType: type) type {
             return self.buffer[self.start..self.end];
         }
 
+        pub fn readBlock(self: *Self) !?[]const u8 {
+            const block_bytes = try self.readChunk(block_size * 2);
+            switch (block_bytes.len) {
+                0 => return null,
+                1...(block_size - 1) => return error.UnexpectedEndOfStream,
+                else => {},
+            }
+            self.advance(block_size);
+            return block_bytes[0..block_size];
+        }
+
         pub fn advance(self: *Self, count: usize) void {
             self.start += count;
             assert(self.start <= self.end);
@@ -175,6 +188,14 @@ fn BufferedReader(comptime ReaderType: type) type {
             }
         }
 
+        pub fn skipPadding(self: *Self, file_size: usize) !void {
+            return self.skip(filePadding(file_size));
+        }
+
+        pub fn skipFile(self: *Self, file_size: usize) !void {
+            return self.skip(roundedFileSize(file_size));
+        }
+
         inline fn ensureCapacity(self: *Self, count: usize) void {
             if (self.buffer.len - self.start < count) {
                 const dest_end = self.end - self.start;
@@ -185,179 +206,200 @@ fn BufferedReader(comptime ReaderType: type) type {
         }
 
         pub fn write(self: *Self, writer: anytype, size: usize) !void {
-            const rounded_file_size = std.mem.alignForward(usize, size, 512);
-            const chunk_size = rounded_file_size + 512;
-            const pad_len: usize = rounded_file_size - size;
-
-            var file_off: usize = 0;
-            while (true) {
-                const temp = try self.readChunk(chunk_size - file_off);
-                if (temp.len == 0) return error.UnexpectedEndOfStream;
-                const slice = temp[0..@min(size - file_off, temp.len)];
+            var rdr = self.sliceReader(size, true);
+            while (try rdr.next()) |slice| {
                 try writer.writeAll(slice);
+            }
+        }
 
-                file_off += slice.len;
-                self.advance(slice.len);
-                if (file_off >= size) {
-                    self.advance(pad_len);
-                    return;
-                }
+        // copy dst.len bytes into dst
+        pub fn copy(self: *Self, dst: []u8) ![]const u8 {
+            var rdr = self.sliceReader(dst.len, true);
+            var pos: usize = 0;
+            while (try rdr.next()) |slice| : (pos += slice.len) {
+                @memcpy(dst[pos .. pos + slice.len], slice);
             }
+            return dst;
         }
 
-        pub fn copy(self: *Self, dst_buffer: []u8, size: usize) !void {
-            const rounded_file_size = std.mem.alignForward(usize, size, 512);
-            const chunk_size = rounded_file_size + 512;
-
-            var i: usize = 0;
-            while (i < size) {
-                const slice = try self.readChunk(chunk_size - i);
-                if (slice.len == 0) return error.UnexpectedEndOfStream;
-                const copy_size: usize = @min(size - i, slice.len);
-                @memcpy(dst_buffer[i .. i + copy_size], slice[0..copy_size]);
-                self.advance(copy_size);
-                i += copy_size;
+        const SliceReader = struct {
+            size: usize,
+            chunk_size: usize,
+            offset: usize,
+            reader: *Self,
+            auto_advance: bool,
+
+            fn next(self: *@This()) !?[]const u8 {
+                if (self.offset >= self.size) return null;
+
+                const temp = try self.reader.readChunk(self.chunk_size - self.offset);
+                if (temp.len == 0) return error.UnexpectedEndOfStream;
+                const slice = temp[0..@min(self.remainingSize(), temp.len)];
+                if (self.auto_advance) try self.advance(slice.len);
+                return slice;
+            }
+
+            fn advance(self: *@This(), len: usize) !void {
+                self.offset += len;
+                try self.reader.skip(len);
             }
+
+            fn copy(self: *@This(), dst: []u8) ![]const u8 {
+                _ = try self.reader.copy(dst);
+                self.offset += dst.len;
+                return dst;
+            }
+
+            fn remainingSize(self: *@This()) usize {
+                return self.size - self.offset;
+            }
+        };
+
+        pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader {
+            return .{
+                .size = size,
+                .chunk_size = roundedFileSize(size) + block_size,
+                .offset = 0,
+                .reader = self,
+                .auto_advance = auto_advance,
+            };
         }
     };
 }
 
+// file_size rouneded to te block boundary
+inline fn roundedFileSize(file_size: usize) usize {
+    return std.mem.alignForward(usize, file_size, block_size);
+}
+
+// number of padding bytes at the last file block
+inline fn filePadding(file_size: usize) usize {
+    return roundedFileSize(file_size) - file_size;
+}
+
 fn Iterator(comptime ReaderType: type) type {
+    const BufferedReaderType = BufferedReader(ReaderType);
     return struct {
-        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-        file_name_len: usize = 0,
-        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-        link_name_len: usize = 0,
+        attrs: struct {
+            buffer: [std.fs.MAX_PATH_BYTES * 2]u8 = undefined,
+            tail: usize = 0,
+
+            fn alloc(self: *@This(), size: usize) ![]u8 {
+                if (size > self.len()) return error.NameTooLong;
+                const head = self.tail;
+                self.tail += size;
+                assert(self.tail <= self.buffer.len);
+                return self.buffer[head..self.tail];
+            }
+
+            fn free(self: *@This()) void {
+                self.tail = 0;
+            }
+
+            fn len(self: *@This()) usize {
+                return self.buffer.len - self.tail;
+            }
+        } = .{},
 
-        reader: BufferedReader(ReaderType),
+        reader: BufferedReaderType,
         diagnostics: ?*Options.Diagnostics,
 
         const Self = @This();
 
         const File = struct {
-            name: []const u8,
-            link_name: []const u8,
-            size: usize,
-            file_type: Header.FileType,
-            iter: *Self,
+            name: []const u8 = &[_]u8{},
+            link_name: []const u8 = &[_]u8{},
+            size: usize = 0,
+            file_type: Header.FileType = .normal,
+            reader: *BufferedReaderType,
 
             pub fn write(self: File, writer: anytype) !void {
-                try self.iter.reader.write(writer, self.size);
+                try self.reader.write(writer, self.size);
+                try self.skipPadding();
             }
 
             pub fn skip(self: File) !void {
-                const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
-                try self.iter.reader.skip(rounded_file_size);
+                try self.reader.skip(roundedFileSize(self.size));
+            }
+
+            fn skipPadding(self: File) !void {
+                try self.reader.skip(filePadding(self.size));
             }
 
             fn chksum(self: File) ![16]u8 {
-                var cs = [_]u8{0} ** 16;
-                if (self.size == 0) return cs;
+                var sum = [_]u8{0} ** 16;
+                if (self.size == 0) return sum;
 
-                var buffer: [512]u8 = undefined;
+                var rdr = self.reader.sliceReader(self.size, true);
                 var h = std.crypto.hash.Md5.init(.{});
-
-                var remaining_bytes: usize = self.size;
-                while (remaining_bytes > 0) {
-                    const copy_size = @min(buffer.len, remaining_bytes);
-                    try self.iter.reader.copy(&buffer, copy_size);
-                    h.update(buffer[0..copy_size]);
-                    remaining_bytes -= copy_size;
+                while (try rdr.next()) |slice| {
+                    h.update(slice);
                 }
-                h.final(&cs);
+                h.final(&sum);
                 try self.skipPadding();
-                return cs;
-            }
-
-            fn skipPadding(self: File) !void {
-                const rounded_file_size = std.mem.alignForward(usize, self.size, 512);
-                const pad_len: usize = rounded_file_size - self.size;
-                self.iter.reader.advance(pad_len);
+                return sum;
             }
         };
 
+        // Externally, Next iterates through the tar archive as if it is a series of
+        // files. Internally, the tar format often uses fake "files" to add meta
+        // data that describes the next file. These meta data "files" should not
+        // normally be visible to the outside. As such, this loop iterates through
+        // one or more "header files" until it finds a "normal file".
         pub fn next(self: *Self) !?File {
-            self.file_name_len = 0;
-            self.link_name_len = 0;
-
-            while (true) {
-                const chunk = try self.reader.readChunk(1024);
-                switch (chunk.len) {
-                    0 => return null,
-                    1...511 => return error.UnexpectedEndOfStream,
-                    else => {},
-                }
-                self.reader.advance(512);
-
-                const header: Header = .{ .bytes = chunk[0..512] };
-                if (header.isZeroBlock()) return null;
-                const file_size = try header.fileSize();
-                const rounded_file_size: usize = std.mem.alignForward(usize, file_size, 512);
-                const file_type = header.fileType();
-                const link_name = if (self.link_name_len == 0)
-                    header.linkName()
-                else
-                    self.link_name_buffer[0..self.link_name_len];
-                const file_name = if (self.file_name_len == 0)
-                    try header.fullFileName(&self.file_name_buffer)
-                else
-                    self.file_name_buffer[0..self.file_name_len];
+            var file: File = .{ .reader = &self.reader };
+            self.attrs.free();
+
+            while (try self.reader.readBlock()) |block_bytes| {
+                const block: Header = .{ .bytes = block_bytes[0..block_size] };
+                if (block.isZero()) return null;
+                const file_type = block.fileType();
+                const file_size = try block.fileSize();
 
                 switch (file_type) {
                     .directory, .normal, .symbolic_link => {
-                        return File{
-                            .name = file_name,
-                            .size = file_size,
-                            .file_type = file_type,
-                            .link_name = link_name,
-                            .iter = self,
-                        };
+                        if (file.size == 0) file.size = file_size;
+                        if (file.name.len == 0)
+                            file.name = try block.fullFileName((try self.attrs.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]);
+                        if (file.link_name.len == 0) file.link_name = block.linkName();
+                        file.file_type = file_type;
+                        return file;
                     },
                     .global_extended_header => {
-                        self.reader.skip(rounded_file_size) catch return error.TarHeadersTooBig;
+                        self.reader.skipFile(file_size) catch return error.TarHeadersTooBig;
                     },
                     .extended_header => {
                         if (file_size == 0) continue;
 
-                        const chunk_size: usize = rounded_file_size + 512;
-                        var data_off: usize = 0;
-                        while (data_off < file_size) {
-                            const slice = try self.reader.readChunk(chunk_size - data_off);
-                            if (slice.len == 0) return error.UnexpectedEndOfStream;
-                            const remaining_size: usize = file_size - data_off;
-                            const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size);
-
-                            if (std.mem.eql(u8, attr_info.key, "path")) {
-                                if (attr_info.value_len > self.file_name_buffer.len) return error.NameTooLong;
-                                self.reader.advance(attr_info.value_off);
-                                try self.reader.copy(&self.file_name_buffer, attr_info.value_len);
-                                self.file_name_len = attr_info.value_len;
-                                self.reader.advance(1);
-                            } else if (std.mem.eql(u8, attr_info.key, "linkpath")) {
-                                if (attr_info.value_len > self.link_name_buffer.len) return error.NameTooLong;
-                                self.reader.advance(attr_info.value_off);
-                                try self.reader.copy(&self.link_name_buffer, attr_info.value_len);
-                                self.link_name_len = attr_info.value_len;
-                                self.reader.advance(1);
+                        var rdr = self.reader.sliceReader(file_size, false);
+                        while (try rdr.next()) |slice| {
+                            const attr = try parsePaxAttribute(slice, rdr.remainingSize());
+                            try rdr.advance(attr.value_off);
+                            if (attr.is("path")) {
+                                file.name = try rdr.copy(try self.attrs.alloc(attr.value_len));
+                            } else if (attr.is("linkpath")) {
+                                file.link_name = try rdr.copy(try self.attrs.alloc(attr.value_len));
+                            } else if (attr.is("size")) {
+                                var buf = [_]u8{'0'} ** 32;
+                                file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10);
                             } else {
-                                try self.reader.skip(attr_info.size);
+                                try rdr.advance(attr.value_len);
                             }
-                            data_off += attr_info.size;
+                            try rdr.advance(1);
                         }
-                        try self.reader.skip(rounded_file_size - data_off);
-
-                        continue;
+                        try self.reader.skipPadding(file_size);
                     },
                     .hard_link => return error.TarUnsupportedFileType,
                     else => {
                         const d = self.diagnostics orelse return error.TarUnsupportedFileType;
                         try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-                            .file_name = try d.allocator.dupe(u8, file_name),
+                            .file_name = try d.allocator.dupe(u8, block.name()),
                             .file_type = file_type,
                         } });
                     },
                 }
             }
+            return null;
         }
     };
 }
@@ -481,6 +523,10 @@ const PaxAttributeInfo = struct {
     key: []const u8,
     value_off: usize,
     value_len: usize,
+
+    inline fn is(self: @This(), key: []const u8) bool {
+        return (std.mem.eql(u8, self.key, key));
+    }
 };
 
 fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
@@ -515,7 +561,7 @@ test parsePaxAttribute {
     try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
 }
 
-const std = @import("std.zig");
+const std = @import("std");
 const assert = std.debug.assert;
 
 const TestCase = struct {
@@ -628,19 +674,19 @@ test "Go test cases" {
         // },
         //
         // TODO: giving wrong result because we are not reading pax size header
-        // .{
-        //     .path = "pax-pos-size-file.tar",
-        //     .files = &[_]TestCase.File{
-        //         .{
-        //             .name = "foo",
-        //             .size = 999,
-        //             .file_type = .normal,
-        //         },
-        //     },
-        //     .chksums = &[_][]const u8{
-        //         "0afb597b283fe61b5d4879669a350556",
-        //     },
-        // },
+        .{
+            .path = "pax-pos-size-file.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "foo",
+                    .size = 999,
+                    .file_type = .normal,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "0afb597b283fe61b5d4879669a350556",
+            },
+        },
         .{
             // has pax records which we are not interested in
             .path = "pax-records.tar",

From e1424b84b87903df265cc052f3dac17d1ec1c3be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Wed, 29 Nov 2023 15:28:38 +0100
Subject: [PATCH 06/29] tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.
---
 lib/std/tar.zig | 75 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 19 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 9ea8f1965204..16726a1e5c1e 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -62,10 +62,10 @@ pub const Options = struct {
     };
 };
 
-const block_size = 512;
+const BLOCK_SIZE = 512;
 
 pub const Header = struct {
-    bytes: *const [block_size]u8,
+    bytes: *const [BLOCK_SIZE]u8,
 
     pub const FileType = enum(u8) {
         normal_alias = 0,
@@ -84,6 +84,19 @@ pub const Header = struct {
 
     pub fn fileSize(header: Header) !u64 {
         const raw = header.bytes[124..][0..12];
+        //  If the leading byte is 0xff (255), all the bytes of the field
+        //  (including the leading byte) are concatenated in big-endian order,
+        //  with the result being a negative number expressed in two’s
+        //  complement form.
+        if (raw[0] == 0xff) return error.SizeNegative;
+        // If the leading byte is 0x80 (128), the non-leading bytes of the
+        // field are concatenated in big-endian order.
+        if (raw[0] == 0x80) {
+            if (raw[1] + raw[2] + raw[3] != 0) return error.SizeTooBig;
+            return std.mem.readInt(u64, raw[4..12], .big);
+        }
+        // Zero-filled octal number in ASCII. Each numeric field of width w
+        // contains w minus 1 digits, and a null
         const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
         const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
         if (rtrimmed.len == 0) return 0;
@@ -148,7 +161,7 @@ pub const Header = struct {
 fn BufferedReader(comptime ReaderType: type) type {
     return struct {
         unbuffered_reader: ReaderType,
-        buffer: [block_size * 8]u8 = undefined,
+        buffer: [BLOCK_SIZE * 8]u8 = undefined,
         start: usize = 0,
         end: usize = 0,
 
@@ -164,14 +177,14 @@ fn BufferedReader(comptime ReaderType: type) type {
         }
 
         pub fn readBlock(self: *Self) !?[]const u8 {
-            const block_bytes = try self.readChunk(block_size * 2);
+            const block_bytes = try self.readChunk(BLOCK_SIZE * 2);
             switch (block_bytes.len) {
                 0 => return null,
-                1...(block_size - 1) => return error.UnexpectedEndOfStream,
+                1...(BLOCK_SIZE - 1) => return error.UnexpectedEndOfStream,
                 else => {},
             }
-            self.advance(block_size);
-            return block_bytes[0..block_size];
+            self.advance(BLOCK_SIZE);
+            return block_bytes[0..BLOCK_SIZE];
         }
 
         pub fn advance(self: *Self, count: usize) void {
@@ -258,7 +271,7 @@ fn BufferedReader(comptime ReaderType: type) type {
         pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader {
             return .{
                 .size = size,
-                .chunk_size = roundedFileSize(size) + block_size,
+                .chunk_size = roundedFileSize(size) + BLOCK_SIZE,
                 .offset = 0,
                 .reader = self,
                 .auto_advance = auto_advance,
@@ -267,12 +280,12 @@ fn BufferedReader(comptime ReaderType: type) type {
     };
 }
 
-// file_size rouneded to te block boundary
+// File size rounded to te block boundary.
 inline fn roundedFileSize(file_size: usize) usize {
-    return std.mem.alignForward(usize, file_size, block_size);
+    return std.mem.alignForward(usize, file_size, BLOCK_SIZE);
 }
 
-// number of padding bytes at the last file block
+// Number of padding bytes in the last file block.
 inline fn filePadding(file_size: usize) usize {
     return roundedFileSize(file_size) - file_size;
 }
@@ -341,17 +354,18 @@ fn Iterator(comptime ReaderType: type) type {
             }
         };
 
-        // Externally, Next iterates through the tar archive as if it is a series of
-        // files. Internally, the tar format often uses fake "files" to add meta
-        // data that describes the next file. These meta data "files" should not
-        // normally be visible to the outside. As such, this loop iterates through
-        // one or more "header files" until it finds a "normal file".
+        // Externally, `next` iterates through the tar archive as if it is a
+        // series of files. Internally, the tar format often uses fake "files"
+        // to add meta data that describes the next file. These meta data
+        // "files" should not normally be visible to the outside. As such, this
+        // loop iterates through one or more "header files" until it finds a
+        // "normal file".
         pub fn next(self: *Self) !?File {
             var file: File = .{ .reader = &self.reader };
             self.attrs.free();
 
             while (try self.reader.readBlock()) |block_bytes| {
-                const block: Header = .{ .bytes = block_bytes[0..block_size] };
+                const block: Header = .{ .bytes = block_bytes[0..BLOCK_SIZE] };
                 if (block.isZero()) return null;
                 const file_type = block.fileType();
                 const file_size = try block.fileSize();
@@ -572,6 +586,7 @@ const TestCase = struct {
         size: usize = 0,
         link_name: []const u8 = empty_string,
         file_type: Header.FileType = .normal,
+        truncated: bool = false, // when there is no file body, just header, usefull for huge files
     };
 
     path: []const u8,
@@ -794,10 +809,32 @@ test "Go test cases" {
                 },
             },
         },
+        .{
+            // Has size in gnu extended format. To represent size bigger than 8 GB.
+            .path = "writer-big.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "tmp/16gig.txt",
+                    .size = 16 * 1024 * 1024 * 1024,
+                    .truncated = true,
+                },
+            },
+        },
+        .{
+            // Size in gnu extended format, and name in pax attribute.
+            .path = "writer-big-long.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "longname/" ** 15 ++ "16gig.txt",
+                    .size = 16 * 1024 * 1024 * 1024,
+                    .truncated = true,
+                },
+            },
+        },
     };
 
     for (cases) |case| {
-        // if (!std.mem.eql(u8, case.path, "pax.tar")) continue;
+        //if (!std.mem.eql(u8, case.path, "pax-pos-size-file.tar")) continue;
 
         var fs_file = try test_dir.openFile(case.path, .{});
         defer fs_file.close();
@@ -825,7 +862,7 @@ test "Go test cases" {
                 // std.debug.print("actual chksum: {s}\n", .{std.fmt.fmtSliceHexLower(&actual_chksum)});
                 try std.testing.expectEqualStrings(expected_chksum, &actual_chksum);
             } else {
-                try actual.skip(); // skip file content
+                if (!expected.truncated) try actual.skip(); // skip file content
             }
             i += 1;
         }

From 169f28d3e6a908717a0e42323ba1a0ee765976da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Wed, 29 Nov 2023 15:31:22 +0100
Subject: [PATCH 07/29] tar: fix import path

---
 lib/std/tar.zig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 16726a1e5c1e..6e1390990581 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -1,3 +1,6 @@
+const std = @import("std.zig");
+const assert = std.debug.assert;
+
 pub const Options = struct {
     /// Number of directory levels to skip when extracting files.
     strip_components: u32 = 0,

From 16c40fc4713c195c7a6b8544c9dffbfc6201dc9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Wed, 29 Nov 2023 17:17:20 +0100
Subject: [PATCH 08/29] tar: add header chksum checking

---
 lib/std/tar.zig | 102 ++++++++++++++++++++++++++++++------------------
 1 file changed, 64 insertions(+), 38 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 6e1390990581..40ca26da7972 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -85,31 +85,6 @@ pub const Header = struct {
         _,
     };
 
-    pub fn fileSize(header: Header) !u64 {
-        const raw = header.bytes[124..][0..12];
-        //  If the leading byte is 0xff (255), all the bytes of the field
-        //  (including the leading byte) are concatenated in big-endian order,
-        //  with the result being a negative number expressed in two’s
-        //  complement form.
-        if (raw[0] == 0xff) return error.SizeNegative;
-        // If the leading byte is 0x80 (128), the non-leading bytes of the
-        // field are concatenated in big-endian order.
-        if (raw[0] == 0x80) {
-            if (raw[1] + raw[2] + raw[3] != 0) return error.SizeTooBig;
-            return std.mem.readInt(u64, raw[4..12], .big);
-        }
-        // Zero-filled octal number in ASCII. Each numeric field of width w
-        // contains w minus 1 digits, and a null
-        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
-        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
-        if (rtrimmed.len == 0) return 0;
-        return std.fmt.parseInt(u64, rtrimmed, 8);
-    }
-
-    pub fn is_ustar(header: Header) bool {
-        return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00");
-    }
-
     /// Includes prefix concatenated, if any.
     /// Return value may point into Header buffer, or might point into the
     /// argument buffer.
@@ -128,15 +103,27 @@ pub const Header = struct {
     }
 
     pub fn name(header: Header) []const u8 {
-        return str(header, 0, 0 + 100);
+        return header.str(0, 100);
+    }
+
+    pub fn fileSize(header: Header) !u64 {
+        return header.numeric(124, 12);
+    }
+
+    pub fn chksum(header: Header) !u64 {
+        return header.octal(148, 8);
     }
 
     pub fn linkName(header: Header) []const u8 {
-        return str(header, 157, 157 + 100);
+        return header.str(157, 100);
+    }
+
+    pub fn is_ustar(header: Header) bool {
+        return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00");
     }
 
     pub fn prefix(header: Header) []const u8 {
-        return str(header, 345, 345 + 155);
+        return header.str(345, 155);
     }
 
     pub fn fileType(header: Header) FileType {
@@ -145,7 +132,8 @@ pub const Header = struct {
         return result;
     }
 
-    fn str(header: Header, start: usize, end: usize) []const u8 {
+    fn str(header: Header, start: usize, len: usize) []const u8 {
+        const end = start + len;
         var i: usize = start;
         while (i < end) : (i += 1) {
             if (header.bytes[i] == 0) break;
@@ -153,11 +141,52 @@ pub const Header = struct {
         return header.bytes[start..i];
     }
 
-    pub fn isZero(header: Header) bool {
-        for (header.bytes) |b| {
-            if (b != 0) return false;
+    fn numeric(header: Header, start: usize, len: usize) !u64 {
+        const raw = header.bytes[start..][0..len];
+        //  If the leading byte is 0xff (255), all the bytes of the field
+        //  (including the leading byte) are concatenated in big-endian order,
+        //  with the result being a negative number expressed in two’s
+        //  complement form.
+        if (raw[0] == 0xff) return error.TarNumericValueNegative;
+        // If the leading byte is 0x80 (128), the non-leading bytes of the
+        // field are concatenated in big-endian order.
+        if (raw[0] == 0x80) {
+            if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig;
+            return std.mem.readInt(u64, raw[4..12], .big);
         }
-        return true;
+        return try header.octal(start, len);
+    }
+
+    fn octal(header: Header, start: usize, len: usize) !u64 {
+        const raw = header.bytes[start..][0..len];
+        // Zero-filled octal number in ASCII. Each numeric field of width w
+        // contains w minus 1 digits, and a null
+        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
+        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
+        if (rtrimmed.len == 0) return 0;
+        return std.fmt.parseInt(u64, rtrimmed, 8);
+    }
+
+    // Sum of all bytes in the header block. The chksum field is treated as if
+    // it were filled with spaces (ASCII 32).
+    fn computeChksum(header: Header) u64 {
+        var sum: u64 = 0;
+        for (header.bytes, 0..) |b, i| {
+            if (148 <= i and i < 156) continue; // skip chksum field bytes
+            sum += b;
+        }
+        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
+        return if (sum > 0) sum + 256 else 0;
+    }
+
+    // Checks calculated chksum with value of chksum field.
+    // Returns error or chksum value.
+    // Zero value indicates empty block.
+    pub fn checkChksum(header: Header) !u64 {
+        const field = try header.chksum();
+        const computed = header.computeChksum();
+        if (field != computed) return error.TarHeaderChksum;
+        return field;
     }
 };
 
@@ -368,8 +397,8 @@ fn Iterator(comptime ReaderType: type) type {
             self.attrs.free();
 
             while (try self.reader.readBlock()) |block_bytes| {
-                const block: Header = .{ .bytes = block_bytes[0..BLOCK_SIZE] };
-                if (block.isZero()) return null;
+                const block = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
+                if (try block.checkChksum() == 0) return null; // zero block found
                 const file_type = block.fileType();
                 const file_size = try block.fileSize();
 
@@ -578,9 +607,6 @@ test parsePaxAttribute {
     try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
 }
 
-const std = @import("std");
-const assert = std.debug.assert;
-
 const TestCase = struct {
     const File = struct {
         const empty_string = &[0]u8{};

From 48b160c1bf75f602acabc3b43eca56b8aa4abf4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Wed, 29 Nov 2023 20:30:08 +0100
Subject: [PATCH 09/29] tar: handle pax null attrs and pax attr ending

---
 lib/std/tar.zig | 79 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 22 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 40ca26da7972..b6dd517d3f58 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -164,7 +164,7 @@ pub const Header = struct {
         const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
         const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
         if (rtrimmed.len == 0) return 0;
-        return std.fmt.parseInt(u64, rtrimmed, 8);
+        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
     }
 
     // Sum of all bytes in the header block. The chksum field is treated as if
@@ -289,6 +289,10 @@ fn BufferedReader(comptime ReaderType: type) type {
                 try self.reader.skip(len);
             }
 
+            fn byte(self: *@This()) u8 {
+                return self.reader.buffer[self.reader.start];
+            }
+
             fn copy(self: *@This(), dst: []u8) ![]const u8 {
                 _ = try self.reader.copy(dst);
                 self.offset += dst.len;
@@ -416,21 +420,25 @@ fn Iterator(comptime ReaderType: type) type {
                     },
                     .extended_header => {
                         if (file_size == 0) continue;
+                        // TODO: ovo resetiranje je nezgodno
+                        self.attrs.free();
+                        file = File{ .reader = &self.reader };
 
                         var rdr = self.reader.sliceReader(file_size, false);
                         while (try rdr.next()) |slice| {
                             const attr = try parsePaxAttribute(slice, rdr.remainingSize());
                             try rdr.advance(attr.value_off);
                             if (attr.is("path")) {
-                                file.name = try rdr.copy(try self.attrs.alloc(attr.value_len));
+                                file.name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len)));
                             } else if (attr.is("linkpath")) {
-                                file.link_name = try rdr.copy(try self.attrs.alloc(attr.value_len));
+                                file.link_name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len)));
                             } else if (attr.is("size")) {
                                 var buf = [_]u8{'0'} ** 32;
                                 file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10);
                             } else {
                                 try rdr.advance(attr.value_len);
                             }
+                            if (rdr.byte() != '\n') return error.InvalidPaxAttribute;
                             try rdr.advance(1);
                         }
                         try self.reader.skipPadding(file_size);
@@ -582,15 +590,21 @@ fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
     if (kv_size > max_size) {
         return error.InvalidPaxAttribute;
     }
+    const key = data[pos_space + 1 .. pos_equals];
     return .{
         .size = kv_size,
-        .key = data[pos_space + 1 .. pos_equals],
+        .key = try noNull(key),
         .value_off = pos_equals + 1,
         .value_len = kv_size - pos_equals - 2,
     };
 }
 
-test parsePaxAttribute {
+fn noNull(str: []const u8) ![]const u8 {
+    if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute;
+    return str;
+}
+
+test "parsePaxAttribute" {
     const expectEqual = std.testing.expectEqual;
     const expectEqualStrings = std.testing.expectEqualStrings;
     const expectError = std.testing.expectError;
@@ -605,6 +619,7 @@ test parsePaxAttribute {
     try expectEqual(attr_info, try parsePaxAttribute(header, 1012));
     try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010));
     try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
+    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("13 pa\x00th=abc\n", 1024)); // null in key
 }
 
 const TestCase = struct {
@@ -633,12 +648,10 @@ test "Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
-                    .file_type = .normal,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
-                    .file_type = .normal,
                 },
             },
             .chksums = &[_][]const u8{
@@ -656,12 +669,10 @@ test "Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
-                    .file_type = .normal,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
-                    .file_type = .normal,
                 },
             },
             .chksums = &[_][]const u8{
@@ -675,12 +686,10 @@ test "Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
-                    .file_type = .normal,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
-                    .file_type = .normal,
                 },
             },
             .chksums = &[_][]const u8{
@@ -694,7 +703,6 @@ test "Go test cases" {
                 .{
                     .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                     .size = 7,
-                    .file_type = .normal,
                 },
                 .{
                     .name = "a/b",
@@ -707,18 +715,25 @@ test "Go test cases" {
                 "3c382e8f5b6631aa2db52643912ffd4a",
             },
         },
-        // TODO: this should fail
-        // .{
-        //     .path = "pax-bad-hdr-file.tar",
-        //     .err = error.TarBadHeader,
-        // },
+        .{
+            // pax attribute don't end with \n
+            .path = "pax-bad-hdr-file.tar",
+            // .files = &[_]TestCase.File{
+            //     .{
+            //         .name = "PAX1/PAX1/long-path-name",
+            //         .size = 684,
+            //     },
+            // },
+            .err = error.InvalidPaxAttribute,
+        },
+        //
         // .{
         //     .path = "pax-bad-mtime-file.tar",
         //     .err = error.TarBadHeader,
         // },
         //
-        // TODO: giving wrong result because we are not reading pax size header
         .{
+            // size is in pax attribute
             .path = "pax-pos-size-file.tar",
             .files = &[_]TestCase.File{
                 .{
@@ -799,9 +814,17 @@ test "Go test cases" {
             .path = "gnu-incremental.tar",
             .err = error.TarUnsupportedFileType,
         },
-        // .{
-        //     .path = "pax-multi-hdrs.tar",
-        // },
+        .{
+            // should use values only from last pax header
+            .path = "pax-multi-hdrs.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "bar",
+                    .link_name = "PAX4/PAX4/long-linkpath-name",
+                    .file_type = .symbolic_link,
+                },
+            },
+        },
         // .{
         //     .path = "gnu-long-nul.tar",
         //     .files = &[_]TestCase.File{
@@ -827,8 +850,20 @@ test "Go test cases" {
                 },
             },
         },
+        .{
+            .path = "neg-size.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "pax-nul-path.tar",
+            .err = error.InvalidPaxAttribute,
+        },
+        .{
+            .path = "pax-nul-xattrs.tar",
+            .err = error.InvalidPaxAttribute,
+        },
         // TODO some files with errors:
-        // pax-nul-xattrs.tar, pax-nul-path.tar, neg-size.tar, issue10968.tar, issue11169.tar, issue12435.tar
+        // issue10968.tar, issue11169.tar, issue12435.tar
         .{
             .path = "trailing-slash.tar",
             .files = &[_]TestCase.File{

From c761dfc1761b38be8d1dc72dd4c0cbf07d2c0eed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Wed, 29 Nov 2023 21:37:13 +0100
Subject: [PATCH 10/29] tar: add gnu path and link extensions handling

---
 lib/std/tar.zig | 142 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 92 insertions(+), 50 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index b6dd517d3f58..f22ee0e73309 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -82,6 +82,10 @@ pub const Header = struct {
         contiguous = '7',
         global_extended_header = 'g',
         extended_header = 'x',
+        // Types 'L' and 'K' are used by the GNU format for a meta file
+        // used to store the path or link name for the next file.
+        gnu_long_name = 'L',
+        gnu_long_link = 'K',
         _,
     };
 
@@ -119,7 +123,8 @@ pub const Header = struct {
     }
 
     pub fn is_ustar(header: Header) bool {
-        return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00");
+        const magic = header.bytes[257..][0..6];
+        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
     }
 
     pub fn prefix(header: Header) []const u8 {
@@ -133,12 +138,7 @@ pub const Header = struct {
     }
 
     fn str(header: Header, start: usize, len: usize) []const u8 {
-        const end = start + len;
-        var i: usize = start;
-        while (i < end) : (i += 1) {
-            if (header.bytes[i] == 0) break;
-        }
-        return header.bytes[start..i];
+        return nullStr(header.bytes[start .. start + len]);
     }
 
     fn numeric(header: Header, start: usize, len: usize) !u64 {
@@ -190,6 +190,14 @@ pub const Header = struct {
     }
 };
 
+// break string on first null char
+fn nullStr(str: []const u8) []const u8 {
+    for (str, 0..) |c, i| {
+        if (c == 0) return str[0..i];
+    }
+    return str;
+}
+
 fn BufferedReader(comptime ReaderType: type) type {
     return struct {
         unbuffered_reader: ReaderType,
@@ -274,7 +282,7 @@ fn BufferedReader(comptime ReaderType: type) type {
             reader: *Self,
             auto_advance: bool,
 
-            fn next(self: *@This()) !?[]const u8 {
+            pub fn next(self: *@This()) !?[]const u8 {
                 if (self.offset >= self.size) return null;
 
                 const temp = try self.reader.readChunk(self.chunk_size - self.offset);
@@ -284,22 +292,22 @@ fn BufferedReader(comptime ReaderType: type) type {
                 return slice;
             }
 
-            fn advance(self: *@This(), len: usize) !void {
+            pub fn advance(self: *@This(), len: usize) !void {
                 self.offset += len;
                 try self.reader.skip(len);
             }
 
-            fn byte(self: *@This()) u8 {
+            pub fn byte(self: *@This()) u8 {
                 return self.reader.buffer[self.reader.start];
             }
 
-            fn copy(self: *@This(), dst: []u8) ![]const u8 {
+            pub fn copy(self: *@This(), dst: []u8) ![]const u8 {
                 _ = try self.reader.copy(dst);
                 self.offset += dst.len;
                 return dst;
             }
 
-            fn remainingSize(self: *@This()) usize {
+            pub fn remainingSize(self: *@This()) usize {
                 return self.size - self.offset;
             }
         };
@@ -443,6 +451,14 @@ fn Iterator(comptime ReaderType: type) type {
                         }
                         try self.reader.skipPadding(file_size);
                     },
+                    .gnu_long_name => {
+                        file.name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size)));
+                        try self.reader.skipPadding(file_size);
+                    },
+                    .gnu_long_link => {
+                        file.link_name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size)));
+                        try self.reader.skipPadding(file_size);
+                    },
                     .hard_link => return error.TarUnsupportedFileType,
                     else => {
                         const d = self.diagnostics orelse return error.TarUnsupportedFileType;
@@ -624,22 +640,20 @@ test "parsePaxAttribute" {
 
 const TestCase = struct {
     const File = struct {
-        const empty_string = &[0]u8{};
-
         name: []const u8,
         size: usize = 0,
-        link_name: []const u8 = empty_string,
+        link_name: []const u8 = &[0]u8{},
         file_type: Header.FileType = .normal,
         truncated: bool = false, // when there is no file body, just header, usefull for huge files
     };
 
-    path: []const u8,
-    files: []const File = &[_]TestCase.File{},
-    chksums: []const []const u8 = &[_][]const u8{},
-    err: ?anyerror = null,
+    path: []const u8, // path to the tar archive file on dis
+    files: []const File = &[_]TestCase.File{}, // expected files to found in archive
+    chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
+    err: ?anyerror = null, // parsing should fail with this error
 };
 
-test "Go test cases" {
+test "tar: Go test cases" {
     const test_dir = try std.fs.openDirAbsolute("/usr/local/go/src/archive/tar/testdata", .{});
     const cases = [_]TestCase{
         .{
@@ -718,12 +732,6 @@ test "Go test cases" {
         .{
             // pax attribute don't end with \n
             .path = "pax-bad-hdr-file.tar",
-            // .files = &[_]TestCase.File{
-            //     .{
-            //         .name = "PAX1/PAX1/long-path-name",
-            //         .size = 684,
-            //     },
-            // },
             .err = error.InvalidPaxAttribute,
         },
         //
@@ -808,9 +816,16 @@ test "Go test cases" {
         },
         .{
             .path = "gnu-multi-hdrs.tar",
-            .err = error.TarUnsupportedFileType,
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "GNU2/GNU2/long-path-name",
+                    .link_name = "GNU4/GNU4/long-linkpath-name",
+                    .file_type = .symbolic_link,
+                },
+            },
         },
         .{
+            // has gnu type D (directory) and S (sparse) blocks
             .path = "gnu-incremental.tar",
             .err = error.TarUnsupportedFileType,
         },
@@ -825,23 +840,22 @@ test "Go test cases" {
                 },
             },
         },
-        // .{
-        //     .path = "gnu-long-nul.tar",
-        //     .files = &[_]TestCase.File{
-        //         .{
-        //             .name = "012233456789",
-        //         },
-        //     },
-        // },
-        // .{
-        //     .path = "gnu-utf8.tar",
-        //     .files = &[_]TestCase.File{
-        //         .{
-        //             .name = "012233456789",
-        //         },
-        //     },
-        // },
-        //
+        .{
+            .path = "gnu-long-nul.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "0123456789",
+                },
+            },
+        },
+        .{
+            .path = "gnu-utf8.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
+                },
+            },
+        },
         .{
             .path = "gnu-not-utf8.tar",
             .files = &[_]TestCase.File{
@@ -851,19 +865,47 @@ test "Go test cases" {
             },
         },
         .{
-            .path = "neg-size.tar",
-            .err = error.TarHeader,
+            // null in pax key
+            .path = "pax-nul-xattrs.tar",
+            .err = error.InvalidPaxAttribute,
         },
         .{
             .path = "pax-nul-path.tar",
             .err = error.InvalidPaxAttribute,
         },
         .{
-            .path = "pax-nul-xattrs.tar",
-            .err = error.InvalidPaxAttribute,
+            .path = "neg-size.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "issue10968.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "issue11169.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "issue12435.tar",
+            .err = error.TarHeaderChksum,
+        },
+        .{
+            // has magic with space at end instead of null
+            .path = "invalid-go17.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
+                },
+            },
+        },
+        .{
+            .path = "ustar-file-devs.tar",
+            .files = &[_]TestCase.File{
+                .{
+                    .name = "file",
+                },
+            },
         },
-        // TODO some files with errors:
-        // issue10968.tar, issue11169.tar, issue12435.tar
         .{
             .path = "trailing-slash.tar",
             .files = &[_]TestCase.File{

From 6e7a39c935b13dddc9153e534e5af8fe12bc5cac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Thu, 30 Nov 2023 21:28:10 +0100
Subject: [PATCH 11/29] tar: refactor reading pax attributes

---
 lib/std/tar.zig | 303 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 197 insertions(+), 106 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index f22ee0e73309..d6a51a94cf11 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -198,6 +198,16 @@ fn nullStr(str: []const u8) []const u8 {
     return str;
 }
 
+// File size rounded to te block boundary.
+inline fn roundedFileSize(file_size: usize) usize {
+    return std.mem.alignForward(usize, file_size, BLOCK_SIZE);
+}
+
+// Number of padding bytes in the last file block.
+inline fn filePadding(file_size: usize) usize {
+    return roundedFileSize(file_size) - file_size;
+}
+
 fn BufferedReader(comptime ReaderType: type) type {
     return struct {
         unbuffered_reader: ReaderType,
@@ -207,16 +217,32 @@ fn BufferedReader(comptime ReaderType: type) type {
 
         const Self = @This();
 
-        pub fn readChunk(self: *Self, count: usize) ![]const u8 {
-            self.ensureCapacity(1024);
-
+        fn readChunk(self: *Self, count: usize) ![]const u8 {
+            self.ensureCapacity(BLOCK_SIZE * 2);
             const ask = @min(self.buffer.len - self.end, count -| (self.end - self.start));
             self.end += try self.unbuffered_reader.readAtLeast(self.buffer[self.end..], ask);
-
             return self.buffer[self.start..self.end];
         }
 
-        pub fn readBlock(self: *Self) !?[]const u8 {
+        // Returns slice of size count or part of it.
+        pub fn readSlice(self: *Self, count: usize) ![]const u8 {
+            if (count <= self.end - self.start) {
+                // fastpath, we have enough bytes in buffer
+                return self.buffer[self.start .. self.start + count];
+            }
+
+            const chunk_size = roundedFileSize(count) + BLOCK_SIZE;
+            const temp = try self.readChunk(chunk_size);
+            if (temp.len == 0) return error.UnexpectedEndOfStream;
+            return temp[0..@min(count, temp.len)];
+        }
+
+        // Returns tar header block, 512 bytes. Before reading advances buffer
+        // for padding of the previous block, to position reader at the start of
+        // new block. After reading advances for block size, to position reader
+        // at the start of the file body.
+        pub fn readBlock(self: *Self, padding: usize) !?[]const u8 {
+            try self.skip(padding);
             const block_bytes = try self.readChunk(BLOCK_SIZE * 2);
             switch (block_bytes.len) {
                 0 => return null,
@@ -227,11 +253,19 @@ fn BufferedReader(comptime ReaderType: type) type {
             return block_bytes[0..BLOCK_SIZE];
         }
 
+        // Retruns byte at current position in buffer.
+        pub fn readByte(self: *@This()) u8 {
+            return self.buffer[self.start];
+        }
+
+        // Advances reader for count bytes, assumes that we have that number of
+        // bytes in buffer.
         pub fn advance(self: *Self, count: usize) void {
             self.start += count;
             assert(self.start <= self.end);
         }
 
+        // Advances reader without assuming that count bytes are in the buffer.
         pub fn skip(self: *Self, count: usize) !void {
             if (self.start + count > self.end) {
                 try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{});
@@ -241,14 +275,6 @@ fn BufferedReader(comptime ReaderType: type) type {
             }
         }
 
-        pub fn skipPadding(self: *Self, file_size: usize) !void {
-            return self.skip(filePadding(file_size));
-        }
-
-        pub fn skipFile(self: *Self, file_size: usize) !void {
-            return self.skip(roundedFileSize(file_size));
-        }
-
         inline fn ensureCapacity(self: *Self, count: usize) void {
             if (self.buffer.len - self.start < count) {
                 const dest_end = self.end - self.start;
@@ -258,16 +284,26 @@ fn BufferedReader(comptime ReaderType: type) type {
             }
         }
 
-        pub fn write(self: *Self, writer: anytype, size: usize) !void {
-            var rdr = self.sliceReader(size, true);
+        // Write count bytes to the writer.
+        pub fn write(self: *Self, writer: anytype, count: usize) !void {
+            if (self.read(count)) |buf| {
+                try writer.writeAll(buf);
+                return;
+            }
+            var rdr = self.sliceReader(count);
             while (try rdr.next()) |slice| {
                 try writer.writeAll(slice);
             }
         }
 
-        // copy dst.len bytes into dst
+        // Copy dst.len bytes into dst buffer.
         pub fn copy(self: *Self, dst: []u8) ![]const u8 {
-            var rdr = self.sliceReader(dst.len, true);
+            if (self.read(dst.len)) |buf| {
+                // fastpath we already have enough bytes in buffer
+                @memcpy(dst, buf);
+                return dst;
+            }
+            var rdr = self.sliceReader(dst.len);
             var pos: usize = 0;
             while (try rdr.next()) |slice| : (pos += slice.len) {
                 @memcpy(dst[pos .. pos + slice.len], slice);
@@ -275,91 +311,151 @@ fn BufferedReader(comptime ReaderType: type) type {
             return dst;
         }
 
+        // Retruns count bytes from buffer and advances for that number of
+        // bytes. If we don't have that much bytes buffered returns null.
+        fn read(self: *Self, count: usize) ?[]const u8 {
+            if (count <= self.end - self.start) {
+                const buf = self.buffer[self.start .. self.start + count];
+                self.advance(count);
+                return buf;
+            }
+            return null;
+        }
+
         const SliceReader = struct {
             size: usize,
-            chunk_size: usize,
             offset: usize,
             reader: *Self,
-            auto_advance: bool,
-
-            pub fn next(self: *@This()) !?[]const u8 {
-                if (self.offset >= self.size) return null;
 
-                const temp = try self.reader.readChunk(self.chunk_size - self.offset);
-                if (temp.len == 0) return error.UnexpectedEndOfStream;
-                const slice = temp[0..@min(self.remainingSize(), temp.len)];
-                if (self.auto_advance) try self.advance(slice.len);
+            pub fn next(self: *SliceReader) !?[]const u8 {
+                const remaining_size = self.size - self.offset;
+                if (remaining_size == 0) return null;
+                const slice = try self.reader.readSlice(remaining_size);
+                self.advance(slice.len);
                 return slice;
             }
 
-            pub fn advance(self: *@This(), len: usize) !void {
+            fn advance(self: *SliceReader, len: usize) void {
                 self.offset += len;
-                try self.reader.skip(len);
-            }
-
-            pub fn byte(self: *@This()) u8 {
-                return self.reader.buffer[self.reader.start];
-            }
-
-            pub fn copy(self: *@This(), dst: []u8) ![]const u8 {
-                _ = try self.reader.copy(dst);
-                self.offset += dst.len;
-                return dst;
-            }
-
-            pub fn remainingSize(self: *@This()) usize {
-                return self.size - self.offset;
+                self.reader.advance(len);
             }
         };
 
-        pub fn sliceReader(self: *Self, size: usize, auto_advance: bool) Self.SliceReader {
+        pub fn sliceReader(self: *Self, size: usize) SliceReader {
             return .{
                 .size = size,
-                .chunk_size = roundedFileSize(size) + BLOCK_SIZE,
+                .reader = self,
                 .offset = 0,
+            };
+        }
+
+        pub fn paxFileReader(self: *Self, size: usize) PaxFileReader {
+            return .{
+                .size = size,
                 .reader = self,
-                .auto_advance = auto_advance,
+                .offset = 0,
             };
         }
-    };
-}
 
-// File size rounded to te block boundary.
-inline fn roundedFileSize(file_size: usize) usize {
-    return std.mem.alignForward(usize, file_size, BLOCK_SIZE);
-}
+        const PaxFileReader = struct {
+            size: usize,
+            offset: usize = 0,
+            reader: *Self,
 
-// Number of padding bytes in the last file block.
-inline fn filePadding(file_size: usize) usize {
-    return roundedFileSize(file_size) - file_size;
+            const PaxKey = enum {
+                path,
+                linkpath,
+                size,
+            };
+
+            const PaxAttribute = struct {
+                key: PaxKey,
+                value_len: usize,
+                parent: *PaxFileReader,
+
+                // Copies pax attribute value into destination buffer.
+                // Must be called with destination buffer of size at least value_len.
+                pub fn value(self: PaxAttribute, dst: []u8) ![]u8 {
+                    assert(dst.len >= self.value_len);
+                    const buf = dst[0..self.value_len];
+                    _ = try self.parent.reader.copy(buf);
+                    self.parent.offset += buf.len;
+                    try self.parent.checkAttributeEnding();
+                    return buf;
+                }
+            };
+
+            // Caller of the next has to call value in PaxAttribute, to advance
+            // reader across value.
+            pub fn next(self: *PaxFileReader) !?PaxAttribute {
+                const rdr = self.reader;
+                _ = rdr;
+
+                while (true) {
+                    const remaining_size = self.size - self.offset;
+                    if (remaining_size == 0) return null;
+
+                    const inf = try parsePaxAttribute(
+                        try self.reader.readSlice(remaining_size),
+                        remaining_size,
+                    );
+                    const key: PaxKey = if (inf.is("path"))
+                        .path
+                    else if (inf.is("linkpath"))
+                        .linkpath
+                    else if (inf.is("size"))
+                        .size
+                    else {
+                        try self.advance(inf.value_off + inf.value_len);
+                        try self.checkAttributeEnding();
+                        continue;
+                    };
+                    try self.advance(inf.value_off); // position reader at the start of the value
+                    return PaxAttribute{ .key = key, .value_len = inf.value_len, .parent = self };
+                }
+            }
+
+            fn checkAttributeEnding(self: *PaxFileReader) !void {
+                if (self.reader.readByte() != '\n') return error.InvalidPaxAttribute;
+                try self.advance(1);
+            }
+
+            fn advance(self: *PaxFileReader, len: usize) !void {
+                self.offset += len;
+                try self.reader.skip(len);
+            }
+        };
+    };
 }
 
 fn Iterator(comptime ReaderType: type) type {
     const BufferedReaderType = BufferedReader(ReaderType);
     return struct {
-        attrs: struct {
-            buffer: [std.fs.MAX_PATH_BYTES * 2]u8 = undefined,
+        // scratch buffer for file attributes
+        scratch: struct {
+            // size: two paths (name and link_name) and size (24 in pax attribute)
+            buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
             tail: usize = 0,
 
+            // Allocate size of the buffer for some attribute.
             fn alloc(self: *@This(), size: usize) ![]u8 {
-                if (size > self.len()) return error.NameTooLong;
+                const free_size = self.buffer.len - self.tail;
+                if (size > free_size) return error.TarScratchBufferOverflow;
                 const head = self.tail;
                 self.tail += size;
                 assert(self.tail <= self.buffer.len);
                 return self.buffer[head..self.tail];
             }
 
+            // Free whole buffer.
             fn free(self: *@This()) void {
                 self.tail = 0;
             }
-
-            fn len(self: *@This()) usize {
-                return self.buffer.len - self.tail;
-            }
         } = .{},
 
         reader: BufferedReaderType,
         diagnostics: ?*Options.Diagnostics,
+        padding: usize = 0, // bytes of file padding
 
         const Self = @This();
 
@@ -372,28 +468,22 @@ fn Iterator(comptime ReaderType: type) type {
 
             pub fn write(self: File, writer: anytype) !void {
                 try self.reader.write(writer, self.size);
-                try self.skipPadding();
             }
 
             pub fn skip(self: File) !void {
-                try self.reader.skip(roundedFileSize(self.size));
-            }
-
-            fn skipPadding(self: File) !void {
-                try self.reader.skip(filePadding(self.size));
+                try self.reader.skip(self.size);
             }
 
             fn chksum(self: File) ![16]u8 {
                 var sum = [_]u8{0} ** 16;
                 if (self.size == 0) return sum;
 
-                var rdr = self.reader.sliceReader(self.size, true);
+                var rdr = self.reader.sliceReader(self.size);
                 var h = std.crypto.hash.Md5.init(.{});
                 while (try rdr.next()) |slice| {
                     h.update(slice);
                 }
                 h.final(&sum);
-                try self.skipPadding();
                 return sum;
             }
         };
@@ -406,64 +496,65 @@ fn Iterator(comptime ReaderType: type) type {
         // "normal file".
         pub fn next(self: *Self) !?File {
             var file: File = .{ .reader = &self.reader };
-            self.attrs.free();
+            self.scratch.free();
 
-            while (try self.reader.readBlock()) |block_bytes| {
-                const block = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
-                if (try block.checkChksum() == 0) return null; // zero block found
-                const file_type = block.fileType();
-                const file_size = try block.fileSize();
+            while (try self.reader.readBlock(self.padding)) |block_bytes| {
+                const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
+                if (try header.checkChksum() == 0) return null; // zero block found
+
+                const file_type = header.fileType();
+                const file_size = try header.fileSize();
+                self.padding = filePadding(file_size);
 
                 switch (file_type) {
+                    // file types to retrun from next
                     .directory, .normal, .symbolic_link => {
                         if (file.size == 0) file.size = file_size;
+                        self.padding = filePadding(file.size);
+
                         if (file.name.len == 0)
-                            file.name = try block.fullFileName((try self.attrs.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]);
-                        if (file.link_name.len == 0) file.link_name = block.linkName();
+                            file.name = try header.fullFileName((try self.scratch.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]);
+                        if (file.link_name.len == 0) file.link_name = header.linkName();
                         file.file_type = file_type;
                         return file;
                     },
-                    .global_extended_header => {
-                        self.reader.skipFile(file_size) catch return error.TarHeadersTooBig;
+                    // prefix header types
+                    .gnu_long_name => {
+                        file.name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size)));
+                    },
+                    .gnu_long_link => {
+                        file.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size)));
                     },
                     .extended_header => {
                         if (file_size == 0) continue;
-                        // TODO: ovo resetiranje je nezgodno
-                        self.attrs.free();
+                        // use just last extended header data
+                        self.scratch.free();
                         file = File{ .reader = &self.reader };
 
-                        var rdr = self.reader.sliceReader(file_size, false);
-                        while (try rdr.next()) |slice| {
-                            const attr = try parsePaxAttribute(slice, rdr.remainingSize());
-                            try rdr.advance(attr.value_off);
-                            if (attr.is("path")) {
-                                file.name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len)));
-                            } else if (attr.is("linkpath")) {
-                                file.link_name = try noNull(try rdr.copy(try self.attrs.alloc(attr.value_len)));
-                            } else if (attr.is("size")) {
-                                var buf = [_]u8{'0'} ** 32;
-                                file.size = try std.fmt.parseInt(usize, try rdr.copy(buf[0..attr.value_len]), 10);
-                            } else {
-                                try rdr.advance(attr.value_len);
+                        var rdr = self.reader.paxFileReader(file_size);
+                        while (try rdr.next()) |attr| {
+                            switch (attr.key) {
+                                .path => {
+                                    file.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
+                                },
+                                .linkpath => {
+                                    file.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
+                                },
+                                .size => {
+                                    file.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10);
+                                },
                             }
-                            if (rdr.byte() != '\n') return error.InvalidPaxAttribute;
-                            try rdr.advance(1);
                         }
-                        try self.reader.skipPadding(file_size);
-                    },
-                    .gnu_long_name => {
-                        file.name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size)));
-                        try self.reader.skipPadding(file_size);
                     },
-                    .gnu_long_link => {
-                        file.link_name = nullStr(try self.reader.copy(try self.attrs.alloc(file_size)));
-                        try self.reader.skipPadding(file_size);
+                    // ignored header types
+                    .global_extended_header => {
+                        self.reader.skip(file_size) catch return error.TarHeadersTooBig;
                     },
-                    .hard_link => return error.TarUnsupportedFileType,
+                    // unsupported header types
                     else => {
                         const d = self.diagnostics orelse return error.TarUnsupportedFileType;
                         try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-                            .file_name = try d.allocator.dupe(u8, block.name()),
+                            .file_name = try d.allocator.dupe(u8, header.name()),
                             .file_type = file_type,
                         } });
                     },

From 6bfa7bf197634272f30d864a4563f7cddbaf55c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Fri, 1 Dec 2023 18:26:31 +0100
Subject: [PATCH 12/29] tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.
---
 lib/std/tar.zig | 289 ++++++++++++++++++++++--------------------------
 1 file changed, 130 insertions(+), 159 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index d6a51a94cf11..1a69f113cc98 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -66,6 +66,7 @@ pub const Options = struct {
 };
 
 const BLOCK_SIZE = 512;
+const MAX_HEADER_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
 
 pub const Header = struct {
     bytes: *const [BLOCK_SIZE]u8,
@@ -90,16 +91,14 @@ pub const Header = struct {
     };
 
     /// Includes prefix concatenated, if any.
-    /// Return value may point into Header buffer, or might point into the
-    /// argument buffer.
     /// TODO: check against "../" and other nefarious things
-    pub fn fullFileName(header: Header, buffer: *[std.fs.MAX_PATH_BYTES]u8) ![]const u8 {
+    pub fn fullName(header: Header, buffer: *[MAX_HEADER_NAME_SIZE]u8) ![]const u8 {
         const n = name(header);
-        if (!is_ustar(header))
-            return n;
         const p = prefix(header);
-        if (p.len == 0)
-            return n;
+        if (!is_ustar(header) or p.len == 0) {
+            @memcpy(buffer[0..n.len], n);
+            return buffer[0..n.len];
+        }
         @memcpy(buffer[0..p.len], p);
         buffer[p.len] = '/';
         @memcpy(buffer[p.len + 1 ..][0..n.len], n);
@@ -180,7 +179,7 @@ pub const Header = struct {
     }
 
     // Checks calculated chksum with value of chksum field.
-    // Returns error or chksum value.
+    // Returns error or valid chksum value.
     // Zero value indicates empty block.
     pub fn checkChksum(header: Header) !u64 {
         const field = try header.chksum();
@@ -190,7 +189,7 @@ pub const Header = struct {
     }
 };
 
-// break string on first null char
+// Breaks string on first null char.
 fn nullStr(str: []const u8) []const u8 {
     for (str, 0..) |c, i| {
         if (c == 0) return str[0..i];
@@ -198,14 +197,10 @@ fn nullStr(str: []const u8) []const u8 {
     return str;
 }
 
-// File size rounded to te block boundary.
-inline fn roundedFileSize(file_size: usize) usize {
-    return std.mem.alignForward(usize, file_size, BLOCK_SIZE);
-}
-
 // Number of padding bytes in the last file block.
-inline fn filePadding(file_size: usize) usize {
-    return roundedFileSize(file_size) - file_size;
+inline fn blockPadding(size: usize) usize {
+    const block_rounded = std.mem.alignForward(usize, size, BLOCK_SIZE); // size rounded to te block boundary
+    return block_rounded - size;
 }
 
 fn BufferedReader(comptime ReaderType: type) type {
@@ -217,44 +212,38 @@ fn BufferedReader(comptime ReaderType: type) type {
 
         const Self = @This();
 
-        fn readChunk(self: *Self, count: usize) ![]const u8 {
-            self.ensureCapacity(BLOCK_SIZE * 2);
-            const ask = @min(self.buffer.len - self.end, count -| (self.end - self.start));
-            self.end += try self.unbuffered_reader.readAtLeast(self.buffer[self.end..], ask);
-            return self.buffer[self.start..self.end];
+        // Fills buffer from underlaying reader.
+        fn fillBuffer(self: *Self) !void {
+            self.removeUsed();
+            self.end += try self.unbuffered_reader.read(self.buffer[self.end..]);
         }
 
-        // Returns slice of size count or part of it.
+        // Returns slice of size count or how much fits into buffer.
         pub fn readSlice(self: *Self, count: usize) ![]const u8 {
             if (count <= self.end - self.start) {
-                // fastpath, we have enough bytes in buffer
                 return self.buffer[self.start .. self.start + count];
             }
-
-            const chunk_size = roundedFileSize(count) + BLOCK_SIZE;
-            const temp = try self.readChunk(chunk_size);
-            if (temp.len == 0) return error.UnexpectedEndOfStream;
-            return temp[0..@min(count, temp.len)];
+            try self.fillBuffer();
+            const buf = self.buffer[self.start..self.end];
+            if (buf.len == 0) return error.UnexpectedEndOfStream;
+            return buf[0..@min(count, buf.len)];
         }
 
-        // Returns tar header block, 512 bytes. Before reading advances buffer
-        // for padding of the previous block, to position reader at the start of
-        // new block. After reading advances for block size, to position reader
-        // at the start of the file body.
-        pub fn readBlock(self: *Self, padding: usize) !?[]const u8 {
+        // Returns tar header block, 512 bytes, or null if eof. Before reading
+        // advances buffer for padding of the previous block, to position reader
+        // at the start of new block. After reading advances for block size, to
+        // position reader at the start of the file content.
+        pub fn readHeader(self: *Self, padding: usize) !?[]const u8 {
             try self.skip(padding);
-            const block_bytes = try self.readChunk(BLOCK_SIZE * 2);
-            switch (block_bytes.len) {
-                0 => return null,
-                1...(BLOCK_SIZE - 1) => return error.UnexpectedEndOfStream,
-                else => {},
-            }
+            const buf = self.readSlice(BLOCK_SIZE) catch return null;
+            if (buf.len < BLOCK_SIZE) return error.UnexpectedEndOfStream;
             self.advance(BLOCK_SIZE);
-            return block_bytes[0..BLOCK_SIZE];
+            return buf[0..BLOCK_SIZE];
         }
 
-        // Retruns byte at current position in buffer.
+        // Returns byte at current position in buffer.
         pub fn readByte(self: *@This()) u8 {
+            assert(self.start < self.end);
             return self.buffer[self.start];
         }
 
@@ -275,78 +264,36 @@ fn BufferedReader(comptime ReaderType: type) type {
             }
         }
 
-        inline fn ensureCapacity(self: *Self, count: usize) void {
-            if (self.buffer.len - self.start < count) {
-                const dest_end = self.end - self.start;
-                @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]);
-                self.end = dest_end;
-                self.start = 0;
-            }
+        // Removes used part of the buffer.
+        inline fn removeUsed(self: *Self) void {
+            const dest_end = self.end - self.start;
+            if (self.start == 0 or dest_end > self.start) return;
+            @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]);
+            self.end = dest_end;
+            self.start = 0;
         }
 
-        // Write count bytes to the writer.
+        // Writes count bytes to the writer. Advances reader.
         pub fn write(self: *Self, writer: anytype, count: usize) !void {
-            if (self.read(count)) |buf| {
-                try writer.writeAll(buf);
-                return;
-            }
-            var rdr = self.sliceReader(count);
-            while (try rdr.next()) |slice| {
+            var pos: usize = 0;
+            while (pos < count) {
+                const slice = try self.readSlice(count - pos);
                 try writer.writeAll(slice);
+                self.advance(slice.len);
+                pos += slice.len;
             }
         }
 
-        // Copy dst.len bytes into dst buffer.
+        // Copies dst.len bytes into dst buffer. Advances reader.
         pub fn copy(self: *Self, dst: []u8) ![]const u8 {
-            if (self.read(dst.len)) |buf| {
-                // fastpath we already have enough bytes in buffer
-                @memcpy(dst, buf);
-                return dst;
-            }
-            var rdr = self.sliceReader(dst.len);
             var pos: usize = 0;
-            while (try rdr.next()) |slice| : (pos += slice.len) {
+            while (pos < dst.len) {
+                const slice = try self.readSlice(dst.len - pos);
                 @memcpy(dst[pos .. pos + slice.len], slice);
-            }
-            return dst;
-        }
-
-        // Retruns count bytes from buffer and advances for that number of
-        // bytes. If we don't have that much bytes buffered returns null.
-        fn read(self: *Self, count: usize) ?[]const u8 {
-            if (count <= self.end - self.start) {
-                const buf = self.buffer[self.start .. self.start + count];
-                self.advance(count);
-                return buf;
-            }
-            return null;
-        }
-
-        const SliceReader = struct {
-            size: usize,
-            offset: usize,
-            reader: *Self,
-
-            pub fn next(self: *SliceReader) !?[]const u8 {
-                const remaining_size = self.size - self.offset;
-                if (remaining_size == 0) return null;
-                const slice = try self.reader.readSlice(remaining_size);
                 self.advance(slice.len);
-                return slice;
-            }
-
-            fn advance(self: *SliceReader, len: usize) void {
-                self.offset += len;
-                self.reader.advance(len);
+                pos += slice.len;
             }
-        };
-
-        pub fn sliceReader(self: *Self, size: usize) SliceReader {
-            return .{
-                .size = size,
-                .reader = self,
-                .offset = 0,
-            };
+            return dst;
         }
 
         pub fn paxFileReader(self: *Self, size: usize) PaxFileReader {
@@ -388,9 +335,6 @@ fn BufferedReader(comptime ReaderType: type) type {
             // Caller of the next has to call value in PaxAttribute, to advance
             // reader across value.
             pub fn next(self: *PaxFileReader) !?PaxAttribute {
-                const rdr = self.reader;
-                _ = rdr;
-
                 while (true) {
                     const remaining_size = self.size - self.offset;
                     if (remaining_size == 0) return null;
@@ -433,10 +377,14 @@ fn Iterator(comptime ReaderType: type) type {
     return struct {
         // scratch buffer for file attributes
         scratch: struct {
-            // size: two paths (name and link_name) and size (24 in pax attribute)
+            // size: two paths (name and link_name) and files size bytes (24 in pax attribute)
             buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
             tail: usize = 0,
 
+            name: []const u8 = undefined,
+            link_name: []const u8 = undefined,
+            size: usize = 0,
+
             // Allocate size of the buffer for some attribute.
             fn alloc(self: *@This(), size: usize) ![]u8 {
                 const free_size = self.buffer.len - self.tail;
@@ -447,45 +395,53 @@ fn Iterator(comptime ReaderType: type) type {
                 return self.buffer[head..self.tail];
             }
 
-            // Free whole buffer.
-            fn free(self: *@This()) void {
+            // Reset buffer and all fields.
+            fn reset(self: *@This()) void {
                 self.tail = 0;
+                self.name = self.buffer[0..0];
+                self.link_name = self.buffer[0..0];
+                self.size = 0;
+            }
+
+            fn append(self: *@This(), header: Header) !void {
+                if (self.size == 0) self.size = try header.fileSize();
+                if (self.link_name.len == 0) {
+                    const link_name = header.linkName();
+                    if (link_name.len > 0) {
+                        const buf = try self.alloc(link_name.len);
+                        @memcpy(buf, link_name);
+                        self.link_name = buf;
+                    }
+                }
+                if (self.name.len == 0) {
+                    self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]);
+                }
             }
         } = .{},
 
         reader: BufferedReaderType,
         diagnostics: ?*Options.Diagnostics,
-        padding: usize = 0, // bytes of file padding
+        padding: usize = 0, // bytes of padding to the end of the block
 
         const Self = @This();
 
-        const File = struct {
-            name: []const u8 = &[_]u8{},
-            link_name: []const u8 = &[_]u8{},
-            size: usize = 0,
-            file_type: Header.FileType = .normal,
+        pub const File = struct {
+            name: []const u8, // name of file, symlink or directory
+            link_name: []const u8, // target name of symlink
+            size: usize, // size of the file in bytes
+            file_type: Header.FileType,
+
             reader: *BufferedReaderType,
 
+            // Writes file content to writer.
             pub fn write(self: File, writer: anytype) !void {
                 try self.reader.write(writer, self.size);
             }
 
+            // Skips file content. Advances reader.
             pub fn skip(self: File) !void {
                 try self.reader.skip(self.size);
             }
-
-            fn chksum(self: File) ![16]u8 {
-                var sum = [_]u8{0} ** 16;
-                if (self.size == 0) return sum;
-
-                var rdr = self.reader.sliceReader(self.size);
-                var h = std.crypto.hash.Md5.init(.{});
-                while (try rdr.next()) |slice| {
-                    h.update(slice);
-                }
-                h.final(&sum);
-                return sum;
-            }
         };
 
         // Externally, `next` iterates through the tar archive as if it is a
@@ -495,62 +451,62 @@ fn Iterator(comptime ReaderType: type) type {
         // loop iterates through one or more "header files" until it finds a
         // "normal file".
         pub fn next(self: *Self) !?File {
-            var file: File = .{ .reader = &self.reader };
-            self.scratch.free();
+            self.scratch.reset();
 
-            while (try self.reader.readBlock(self.padding)) |block_bytes| {
+            while (try self.reader.readHeader(self.padding)) |block_bytes| {
                 const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
                 if (try header.checkChksum() == 0) return null; // zero block found
 
                 const file_type = header.fileType();
-                const file_size = try header.fileSize();
-                self.padding = filePadding(file_size);
+                const size: usize = @intCast(try header.fileSize());
+                self.padding = blockPadding(size);
 
                 switch (file_type) {
-                    // file types to retrun from next
+                    // File types to retrun upstream
                     .directory, .normal, .symbolic_link => {
-                        if (file.size == 0) file.size = file_size;
-                        self.padding = filePadding(file.size);
-
-                        if (file.name.len == 0)
-                            file.name = try header.fullFileName((try self.scratch.alloc(std.fs.MAX_PATH_BYTES))[0..std.fs.MAX_PATH_BYTES]);
-                        if (file.link_name.len == 0) file.link_name = header.linkName();
-                        file.file_type = file_type;
+                        try self.scratch.append(header);
+                        const file = File{
+                            .file_type = file_type,
+                            .name = self.scratch.name,
+                            .link_name = self.scratch.link_name,
+                            .size = self.scratch.size,
+                            .reader = &self.reader,
+                        };
+                        self.padding = blockPadding(file.size);
                         return file;
                     },
-                    // prefix header types
+                    // Prefix header types
                     .gnu_long_name => {
-                        file.name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size)));
+                        self.scratch.name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
                     },
                     .gnu_long_link => {
-                        file.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(file_size)));
+                        self.scratch.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
                     },
                     .extended_header => {
-                        if (file_size == 0) continue;
-                        // use just last extended header data
-                        self.scratch.free();
-                        file = File{ .reader = &self.reader };
+                        if (size == 0) continue;
+                        // Use just attributes from last extended header.
+                        self.scratch.reset();
 
-                        var rdr = self.reader.paxFileReader(file_size);
+                        var rdr = self.reader.paxFileReader(size);
                         while (try rdr.next()) |attr| {
                             switch (attr.key) {
                                 .path => {
-                                    file.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
+                                    self.scratch.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
                                 },
                                 .linkpath => {
-                                    file.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
+                                    self.scratch.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
                                 },
                                 .size => {
-                                    file.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10);
+                                    self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10);
                                 },
                             }
                         }
                     },
-                    // ignored header types
+                    // Ignored header type
                     .global_extended_header => {
-                        self.reader.skip(file_size) catch return error.TarHeadersTooBig;
+                        self.reader.skip(size) catch return error.TarHeadersTooBig;
                     },
-                    // unsupported header types
+                    // All other are unsupported header types
                     else => {
                         const d = self.diagnostics orelse return error.TarUnsupportedFileType;
                         try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
@@ -1053,16 +1009,31 @@ test "tar: Go test cases" {
             try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
 
             if (case.chksums.len > i) {
-                var actual_chksum = try actual.chksum();
-                var hex_to_bytes_buffer: [16]u8 = undefined;
-                const expected_chksum = try std.fmt.hexToBytes(&hex_to_bytes_buffer, case.chksums[i]);
-                // std.debug.print("actual chksum: {s}\n", .{std.fmt.fmtSliceHexLower(&actual_chksum)});
-                try std.testing.expectEqualStrings(expected_chksum, &actual_chksum);
+                var md5writer = Md5Writer{};
+                try actual.write(&md5writer);
+                const chksum = md5writer.chksum();
+                // std.debug.print("actual chksum: {s}\n", .{chksum});
+                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
             } else {
                 if (!expected.truncated) try actual.skip(); // skip file content
             }
-            i += 1;
         }
         try std.testing.expectEqual(case.files.len, i);
     }
 }
+
+// used in test to calculate file chksum
+const Md5Writer = struct {
+    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
+
+    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
+        self.h.update(buf);
+    }
+
+    pub fn chksum(self: *Md5Writer) [32]u8 {
+        var s = [_]u8{0} ** 16;
+        self.h.final(&s);
+        return std.fmt.bytesToHex(s, .lower);
+    }
+};
+

From 2ed9a276a701cc55eccf4fcbf68476e797f1818b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Fri, 1 Dec 2023 18:50:48 +0100
Subject: [PATCH 13/29] tar: use Go test cases path from env variable

Skip tests if env is not set.
---
 lib/std/tar.zig | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 1a69f113cc98..36a6de1292e5 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -701,7 +701,11 @@ const TestCase = struct {
 };
 
 test "tar: Go test cases" {
-    const test_dir = try std.fs.openDirAbsolute("/usr/local/go/src/archive/tar/testdata", .{});
+    const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path|
+        try std.fs.openDirAbsolute(path, .{})
+    else
+        return error.SkipZigTest;
+
     const cases = [_]TestCase{
         .{
             .path = "gnu.tar",
@@ -781,12 +785,6 @@ test "tar: Go test cases" {
             .path = "pax-bad-hdr-file.tar",
             .err = error.InvalidPaxAttribute,
         },
-        //
-        // .{
-        //     .path = "pax-bad-mtime-file.tar",
-        //     .err = error.TarBadHeader,
-        // },
-        //
         .{
             // size is in pax attribute
             .path = "pax-pos-size-file.tar",
@@ -987,8 +985,6 @@ test "tar: Go test cases" {
     };
 
     for (cases) |case| {
-        //if (!std.mem.eql(u8, case.path, "pax-pos-size-file.tar")) continue;
-
         var fs_file = try test_dir.openFile(case.path, .{});
         defer fs_file.close();
 
@@ -1001,7 +997,7 @@ test "tar: Go test cases" {
             } else {
                 return err;
             }
-        }) |actual| {
+        }) |actual| : (i += 1) {
             const expected = case.files[i];
             try std.testing.expectEqualStrings(expected.name, actual.name);
             try std.testing.expectEqual(expected.size, actual.size);
@@ -1012,7 +1008,6 @@ test "tar: Go test cases" {
                 var md5writer = Md5Writer{};
                 try actual.write(&md5writer);
                 const chksum = md5writer.chksum();
-                // std.debug.print("actual chksum: {s}\n", .{chksum});
                 try std.testing.expectEqualStrings(case.chksums[i], &chksum);
             } else {
                 if (!expected.truncated) try actual.skip(); // skip file content

From 2a432d3008fa1e9af645de96b08cbad57709ffb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Fri, 1 Dec 2023 19:03:32 +0100
Subject: [PATCH 14/29] tar: prefix test cases with 'tar'

To make it little easier to filter from all stdlib tests.
---
 lib/std/tar.zig | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 36a6de1292e5..ffc4d69d56a8 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -628,7 +628,7 @@ fn stripComponents(path: []const u8, count: u32) ![]const u8 {
     return path[i..];
 }
 
-test stripComponents {
+test "tar stripComponents" {
     const expectEqualStrings = std.testing.expectEqualStrings;
     try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0));
     try expectEqualStrings("b/c", try stripComponents("a/b/c", 1));
@@ -667,7 +667,7 @@ fn noNull(str: []const u8) ![]const u8 {
     return str;
 }
 
-test "parsePaxAttribute" {
+test "tar parsePaxAttribute" {
     const expectEqual = std.testing.expectEqual;
     const expectEqualStrings = std.testing.expectEqualStrings;
     const expectError = std.testing.expectError;
@@ -700,7 +700,7 @@ const TestCase = struct {
     err: ?anyerror = null, // parsing should fail with this error
 };
 
-test "tar: Go test cases" {
+test "tar run Go test cases" {
     const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path|
         try std.fs.openDirAbsolute(path, .{})
     else
@@ -1031,4 +1031,3 @@ const Md5Writer = struct {
         return std.fmt.bytesToHex(s, .lower);
     }
 };
-

From 7b0bbc680fa831200653fb0af7cb46a768e0dd93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Sat, 2 Dec 2023 15:00:42 +0100
Subject: [PATCH 15/29] tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.
---
 lib/std/tar.zig | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index ffc4d69d56a8..51c1c023ae95 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -109,6 +109,10 @@ pub const Header = struct {
         return header.str(0, 100);
     }
 
+    pub fn mode(header: Header) !u32 {
+        return @intCast(try header.numeric(100, 8));
+    }
+
     pub fn fileSize(header: Header) !u64 {
         return header.numeric(124, 12);
     }
@@ -429,6 +433,7 @@ fn Iterator(comptime ReaderType: type) type {
             name: []const u8, // name of file, symlink or directory
             link_name: []const u8, // target name of symlink
             size: usize, // size of the file in bytes
+            mode: u32,
             file_type: Header.FileType,
 
             reader: *BufferedReaderType,
@@ -471,6 +476,7 @@ fn Iterator(comptime ReaderType: type) type {
                             .link_name = self.scratch.link_name,
                             .size = self.scratch.size,
                             .reader = &self.reader,
+                            .mode = try header.mode(),
                         };
                         self.padding = blockPadding(file.size);
                         return file;
@@ -689,6 +695,7 @@ const TestCase = struct {
     const File = struct {
         name: []const u8,
         size: usize = 0,
+        mode: u32 = 0,
         link_name: []const u8 = &[0]u8{},
         file_type: Header.FileType = .normal,
         truncated: bool = false, // when there is no file body, just header, usefull for huge files
@@ -713,10 +720,12 @@ test "tar run Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
+                    .mode = 0o640,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
+                    .mode = 0o640,
                 },
             },
             .chksums = &[_][]const u8{
@@ -734,10 +743,12 @@ test "tar run Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
+                    .mode = 0o640,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
+                    .mode = 0o640,
                 },
             },
             .chksums = &[_][]const u8{
@@ -751,10 +762,12 @@ test "tar run Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
+                    .mode = 0o444,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
+                    .mode = 0o444,
                 },
             },
             .chksums = &[_][]const u8{
@@ -768,11 +781,13 @@ test "tar run Go test cases" {
                 .{
                     .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                     .size = 7,
+                    .mode = 0o664,
                 },
                 .{
                     .name = "a/b",
                     .size = 0,
                     .file_type = .symbolic_link,
+                    .mode = 0o777,
                     .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                 },
             },
@@ -793,6 +808,7 @@ test "tar run Go test cases" {
                     .name = "foo",
                     .size = 999,
                     .file_type = .normal,
+                    .mode = 0o640,
                 },
             },
             .chksums = &[_][]const u8{
@@ -833,6 +849,7 @@ test "tar run Go test cases" {
                     .name = "P1050238.JPG.log",
                     .size = 14,
                     .file_type = .normal,
+                    .mode = 0o664,
                 },
             },
             .chksums = &[_][]const u8{
@@ -847,11 +864,13 @@ test "tar run Go test cases" {
                     .name = "small.txt",
                     .size = 5,
                     .file_type = .normal,
+                    .mode = 0o644,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
                     .file_type = .normal,
+                    .mode = 0o644,
                 },
             },
             .chksums = &[_][]const u8{
@@ -890,6 +909,7 @@ test "tar run Go test cases" {
             .files = &[_]TestCase.File{
                 .{
                     .name = "0123456789",
+                    .mode = 0o644,
                 },
             },
         },
@@ -898,6 +918,7 @@ test "tar run Go test cases" {
             .files = &[_]TestCase.File{
                 .{
                     .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
+                    .mode = 0o644,
                 },
             },
         },
@@ -906,6 +927,7 @@ test "tar run Go test cases" {
             .files = &[_]TestCase.File{
                 .{
                     .name = "hi\x80\x81\x82\x83bye",
+                    .mode = 0o644,
                 },
             },
         },
@@ -948,6 +970,7 @@ test "tar run Go test cases" {
             .files = &[_]TestCase.File{
                 .{
                     .name = "file",
+                    .mode = 0o644,
                 },
             },
         },
@@ -968,6 +991,7 @@ test "tar run Go test cases" {
                     .name = "tmp/16gig.txt",
                     .size = 16 * 1024 * 1024 * 1024,
                     .truncated = true,
+                    .mode = 0o640,
                 },
             },
         },
@@ -978,6 +1002,7 @@ test "tar run Go test cases" {
                 .{
                     .name = "longname/" ** 15 ++ "16gig.txt",
                     .size = 16 * 1024 * 1024 * 1024,
+                    .mode = 0o644,
                     .truncated = true,
                 },
             },
@@ -1002,6 +1027,7 @@ test "tar run Go test cases" {
             try std.testing.expectEqualStrings(expected.name, actual.name);
             try std.testing.expectEqual(expected.size, actual.size);
             try std.testing.expectEqual(expected.file_type, actual.file_type);
+            try std.testing.expectEqual(expected.mode, actual.mode);
             try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
 
             if (case.chksums.len > i) {

From a3cf8ec71ec17f384608a6df0d41b804f2cfe231 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Tue, 5 Dec 2023 17:08:45 +0100
Subject: [PATCH 16/29] tar: add pax file reader tests

---
 lib/std/tar.zig | 121 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 108 insertions(+), 13 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 51c1c023ae95..6d1934d91c54 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -209,17 +209,17 @@ inline fn blockPadding(size: usize) usize {
 
 fn BufferedReader(comptime ReaderType: type) type {
     return struct {
-        unbuffered_reader: ReaderType,
+        underlying_reader: ReaderType,
         buffer: [BLOCK_SIZE * 8]u8 = undefined,
         start: usize = 0,
         end: usize = 0,
 
         const Self = @This();
 
-        // Fills buffer from underlaying reader.
+        // Fills buffer from underlying unbuffered reader.
         fn fillBuffer(self: *Self) !void {
             self.removeUsed();
-            self.end += try self.unbuffered_reader.read(self.buffer[self.end..]);
+            self.end += try self.underlying_reader.read(self.buffer[self.end..]);
         }
 
         // Returns slice of size count or how much fits into buffer.
@@ -261,7 +261,7 @@ fn BufferedReader(comptime ReaderType: type) type {
         // Advances reader without assuming that count bytes are in the buffer.
         pub fn skip(self: *Self, count: usize) !void {
             if (self.start + count > self.end) {
-                try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{});
+                try self.underlying_reader.skipBytes(self.start + count - self.end, .{});
                 self.start = self.end;
             } else {
                 self.advance(count);
@@ -313,14 +313,14 @@ fn BufferedReader(comptime ReaderType: type) type {
             offset: usize = 0,
             reader: *Self,
 
-            const PaxKey = enum {
+            const PaxKeyKind = enum {
                 path,
                 linkpath,
                 size,
             };
 
             const PaxAttribute = struct {
-                key: PaxKey,
+                key: PaxKeyKind,
                 value_len: usize,
                 parent: *PaxFileReader,
 
@@ -347,7 +347,7 @@ fn BufferedReader(comptime ReaderType: type) type {
                         try self.reader.readSlice(remaining_size),
                         remaining_size,
                     );
-                    const key: PaxKey = if (inf.is("path"))
+                    const key: PaxKeyKind = if (inf.is("path"))
                         .path
                     else if (inf.is("linkpath"))
                         .linkpath
@@ -376,8 +376,7 @@ fn BufferedReader(comptime ReaderType: type) type {
     };
 }
 
-fn Iterator(comptime ReaderType: type) type {
-    const BufferedReaderType = BufferedReader(ReaderType);
+fn Iterator(comptime BufferedReaderType: type) type {
     return struct {
         // scratch buffer for file attributes
         scratch: struct {
@@ -527,14 +526,19 @@ fn Iterator(comptime ReaderType: type) type {
     };
 }
 
-pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
-    const ReaderType = @TypeOf(reader);
+pub fn iterator(underlying_reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(BufferedReader(@TypeOf(underlying_reader))) {
     return .{
-        .reader = BufferedReader(ReaderType){ .unbuffered_reader = reader },
+        .reader = bufferedReader(underlying_reader),
         .diagnostics = diagnostics,
     };
 }
 
+fn bufferedReader(underlying_reader: anytype) BufferedReader(@TypeOf(underlying_reader)) {
+    return BufferedReader(@TypeOf(underlying_reader)){
+        .underlying_reader = underlying_reader,
+    };
+}
+
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
     switch (options.mode_mode) {
         .ignore => {},
@@ -656,7 +660,7 @@ fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
     const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute;
     const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute;
     const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10);
-    if (kv_size > max_size) {
+    if (kv_size > max_size or kv_size < pos_equals + 2) {
         return error.InvalidPaxAttribute;
     }
     const key = data[pos_space + 1 .. pos_equals];
@@ -1057,3 +1061,94 @@ const Md5Writer = struct {
         return std.fmt.bytesToHex(s, .lower);
     }
 };
+
+test "tar PaxFileReader" {
+    const Attribute = struct {
+        const PaxKeyKind = enum {
+            path,
+            linkpath,
+            size,
+        };
+        key: PaxKeyKind,
+        value: []const u8,
+    };
+    const cases = [_]struct {
+        data: []const u8,
+        attrs: []const Attribute,
+        err: ?anyerror = null,
+    }{
+        .{ // valid but unknown keys
+            .data =
+            \\30 mtime=1350244992.023960108
+            \\6 k=1
+            \\13 key1=val1
+            \\10 a=name
+            \\9 a=name
+            \\
+            ,
+            .attrs = &[_]Attribute{},
+        },
+        .{ // mix of known and unknown keys
+            .data =
+            \\6 k=1
+            \\13 path=name
+            \\17 linkpath=link
+            \\13 key1=val1
+            \\12 size=123
+            \\13 key2=val2
+            \\
+            ,
+            .attrs = &[_]Attribute{
+                .{ .key = .path, .value = "name" },
+                .{ .key = .linkpath, .value = "link" },
+                .{ .key = .size, .value = "123" },
+            },
+        },
+        .{ // too short size of the second key-value pair
+            .data =
+            \\13 path=name
+            \\10 linkpath=value
+            \\
+            ,
+            .attrs = &[_]Attribute{
+                .{ .key = .path, .value = "name" },
+            },
+            .err = error.InvalidPaxAttribute,
+        },
+        .{ // too long size of the second key-value pair
+            .data =
+            \\13 path=name
+            \\19 linkpath=value
+            \\
+            ,
+            .attrs = &[_]Attribute{
+                .{ .key = .path, .value = "name" },
+            },
+            .err = error.InvalidPaxAttribute,
+        },
+    };
+    var buffer: [1024]u8 = undefined;
+
+    for (cases) |case| {
+        var stream = std.io.fixedBufferStream(case.data);
+        var brdr = bufferedReader(stream.reader());
+
+        var rdr = brdr.paxFileReader(case.data.len);
+        var i: usize = 0;
+        while (rdr.next() catch |err| {
+            if (case.err) |e| {
+                try std.testing.expectEqual(e, err);
+                continue;
+            } else {
+                return err;
+            }
+        }) |attr| : (i += 1) {
+            try std.testing.expectEqualStrings(
+                case.attrs[i].value,
+                try attr.value(&buffer),
+            );
+        }
+        try std.testing.expectEqual(case.attrs.len, i);
+        try std.testing.expect(case.err == null);
+    }
+}

From 58e0e509c6dc8fae77e668ef8ee267dfdb619196 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Wed, 6 Dec 2023 15:35:29 +0100
Subject: [PATCH 17/29] tar: add module comment and references

---
 lib/std/tar.zig | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 6d1934d91c54..a5eb7a3ef569 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -1,4 +1,21 @@
 const std = @import("std.zig");
+/// Tar archive is single ordinary file which can contain many files (or
+/// directories, symlinks, ...). It's build by series of blocks each size of 512
+/// bytes. First block of each entry is header which defines type, name, size
+/// permissions and other attributes. Header is followed by series of blocks of
+/// file content, if any that entry has content. Content is padded to the block
+/// size, so next header always starts at block boundary.
+///
+/// This simple format is extended by GNU and POSIX pax extensions to support
+/// file names longer than 256 bytes and additional attributes.
+///
+/// This is not comprehensive tar parser. Here we are only file types needed to
+/// support Zig package manager; normal file, directory, symbolic link. And
+/// subset of attributes: name, size, permissions.
+///
+/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
+/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
+
 const assert = std.debug.assert;
 
 pub const Options = struct {
@@ -193,7 +210,7 @@ pub const Header = struct {
     }
 };
 
-// Breaks string on first null char.
+// Breaks string on first null character.
 fn nullStr(str: []const u8) []const u8 {
     for (str, 0..) |c, i| {
         if (c == 0) return str[0..i];

From dbab45cfc6a952aa4ec873d6a33c487cd431bc62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 15:48:43 +0100
Subject: [PATCH 18/29] tar: replace custom buffered reader with std.io

---
 lib/std/tar.zig | 808 ++++++++++++++++++++++--------------------------
 1 file changed, 366 insertions(+), 442 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index a5eb7a3ef569..e15301589ab0 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -1,4 +1,3 @@
-const std = @import("std.zig");
 /// Tar archive is single ordinary file which can contain many files (or
 /// directories, symlinks, ...). It's build by series of blocks each size of 512
 /// bytes. First block of each entry is header which defines type, name, size
@@ -15,7 +14,9 @@ const std = @import("std.zig");
 ///
 /// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
 /// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
-
+///
+//const std = @import("std.zig");
+const std = @import("std");
 const assert = std.debug.assert;
 
 pub const Options = struct {
@@ -224,338 +225,6 @@ inline fn blockPadding(size: usize) usize {
     return block_rounded - size;
 }
 
-fn BufferedReader(comptime ReaderType: type) type {
-    return struct {
-        underlying_reader: ReaderType,
-        buffer: [BLOCK_SIZE * 8]u8 = undefined,
-        start: usize = 0,
-        end: usize = 0,
-
-        const Self = @This();
-
-        // Fills buffer from underlying unbuffered reader.
-        fn fillBuffer(self: *Self) !void {
-            self.removeUsed();
-            self.end += try self.underlying_reader.read(self.buffer[self.end..]);
-        }
-
-        // Returns slice of size count or how much fits into buffer.
-        pub fn readSlice(self: *Self, count: usize) ![]const u8 {
-            if (count <= self.end - self.start) {
-                return self.buffer[self.start .. self.start + count];
-            }
-            try self.fillBuffer();
-            const buf = self.buffer[self.start..self.end];
-            if (buf.len == 0) return error.UnexpectedEndOfStream;
-            return buf[0..@min(count, buf.len)];
-        }
-
-        // Returns tar header block, 512 bytes, or null if eof. Before reading
-        // advances buffer for padding of the previous block, to position reader
-        // at the start of new block. After reading advances for block size, to
-        // position reader at the start of the file content.
-        pub fn readHeader(self: *Self, padding: usize) !?[]const u8 {
-            try self.skip(padding);
-            const buf = self.readSlice(BLOCK_SIZE) catch return null;
-            if (buf.len < BLOCK_SIZE) return error.UnexpectedEndOfStream;
-            self.advance(BLOCK_SIZE);
-            return buf[0..BLOCK_SIZE];
-        }
-
-        // Returns byte at current position in buffer.
-        pub fn readByte(self: *@This()) u8 {
-            assert(self.start < self.end);
-            return self.buffer[self.start];
-        }
-
-        // Advances reader for count bytes, assumes that we have that number of
-        // bytes in buffer.
-        pub fn advance(self: *Self, count: usize) void {
-            self.start += count;
-            assert(self.start <= self.end);
-        }
-
-        // Advances reader without assuming that count bytes are in the buffer.
-        pub fn skip(self: *Self, count: usize) !void {
-            if (self.start + count > self.end) {
-                try self.underlying_reader.skipBytes(self.start + count - self.end, .{});
-                self.start = self.end;
-            } else {
-                self.advance(count);
-            }
-        }
-
-        // Removes used part of the buffer.
-        inline fn removeUsed(self: *Self) void {
-            const dest_end = self.end - self.start;
-            if (self.start == 0 or dest_end > self.start) return;
-            @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]);
-            self.end = dest_end;
-            self.start = 0;
-        }
-
-        // Writes count bytes to the writer. Advances reader.
-        pub fn write(self: *Self, writer: anytype, count: usize) !void {
-            var pos: usize = 0;
-            while (pos < count) {
-                const slice = try self.readSlice(count - pos);
-                try writer.writeAll(slice);
-                self.advance(slice.len);
-                pos += slice.len;
-            }
-        }
-
-        // Copies dst.len bytes into dst buffer. Advances reader.
-        pub fn copy(self: *Self, dst: []u8) ![]const u8 {
-            var pos: usize = 0;
-            while (pos < dst.len) {
-                const slice = try self.readSlice(dst.len - pos);
-                @memcpy(dst[pos .. pos + slice.len], slice);
-                self.advance(slice.len);
-                pos += slice.len;
-            }
-            return dst;
-        }
-
-        pub fn paxFileReader(self: *Self, size: usize) PaxFileReader {
-            return .{
-                .size = size,
-                .reader = self,
-                .offset = 0,
-            };
-        }
-
-        const PaxFileReader = struct {
-            size: usize,
-            offset: usize = 0,
-            reader: *Self,
-
-            const PaxKeyKind = enum {
-                path,
-                linkpath,
-                size,
-            };
-
-            const PaxAttribute = struct {
-                key: PaxKeyKind,
-                value_len: usize,
-                parent: *PaxFileReader,
-
-                // Copies pax attribute value into destination buffer.
-                // Must be called with destination buffer of size at least value_len.
-                pub fn value(self: PaxAttribute, dst: []u8) ![]u8 {
-                    assert(dst.len >= self.value_len);
-                    const buf = dst[0..self.value_len];
-                    _ = try self.parent.reader.copy(buf);
-                    self.parent.offset += buf.len;
-                    try self.parent.checkAttributeEnding();
-                    return buf;
-                }
-            };
-
-            // Caller of the next has to call value in PaxAttribute, to advance
-            // reader across value.
-            pub fn next(self: *PaxFileReader) !?PaxAttribute {
-                while (true) {
-                    const remaining_size = self.size - self.offset;
-                    if (remaining_size == 0) return null;
-
-                    const inf = try parsePaxAttribute(
-                        try self.reader.readSlice(remaining_size),
-                        remaining_size,
-                    );
-                    const key: PaxKeyKind = if (inf.is("path"))
-                        .path
-                    else if (inf.is("linkpath"))
-                        .linkpath
-                    else if (inf.is("size"))
-                        .size
-                    else {
-                        try self.advance(inf.value_off + inf.value_len);
-                        try self.checkAttributeEnding();
-                        continue;
-                    };
-                    try self.advance(inf.value_off); // position reader at the start of the value
-                    return PaxAttribute{ .key = key, .value_len = inf.value_len, .parent = self };
-                }
-            }
-
-            fn checkAttributeEnding(self: *PaxFileReader) !void {
-                if (self.reader.readByte() != '\n') return error.InvalidPaxAttribute;
-                try self.advance(1);
-            }
-
-            fn advance(self: *PaxFileReader, len: usize) !void {
-                self.offset += len;
-                try self.reader.skip(len);
-            }
-        };
-    };
-}
-
-fn Iterator(comptime BufferedReaderType: type) type {
-    return struct {
-        // scratch buffer for file attributes
-        scratch: struct {
-            // size: two paths (name and link_name) and files size bytes (24 in pax attribute)
-            buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
-            tail: usize = 0,
-
-            name: []const u8 = undefined,
-            link_name: []const u8 = undefined,
-            size: usize = 0,
-
-            // Allocate size of the buffer for some attribute.
-            fn alloc(self: *@This(), size: usize) ![]u8 {
-                const free_size = self.buffer.len - self.tail;
-                if (size > free_size) return error.TarScratchBufferOverflow;
-                const head = self.tail;
-                self.tail += size;
-                assert(self.tail <= self.buffer.len);
-                return self.buffer[head..self.tail];
-            }
-
-            // Reset buffer and all fields.
-            fn reset(self: *@This()) void {
-                self.tail = 0;
-                self.name = self.buffer[0..0];
-                self.link_name = self.buffer[0..0];
-                self.size = 0;
-            }
-
-            fn append(self: *@This(), header: Header) !void {
-                if (self.size == 0) self.size = try header.fileSize();
-                if (self.link_name.len == 0) {
-                    const link_name = header.linkName();
-                    if (link_name.len > 0) {
-                        const buf = try self.alloc(link_name.len);
-                        @memcpy(buf, link_name);
-                        self.link_name = buf;
-                    }
-                }
-                if (self.name.len == 0) {
-                    self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]);
-                }
-            }
-        } = .{},
-
-        reader: BufferedReaderType,
-        diagnostics: ?*Options.Diagnostics,
-        padding: usize = 0, // bytes of padding to the end of the block
-
-        const Self = @This();
-
-        pub const File = struct {
-            name: []const u8, // name of file, symlink or directory
-            link_name: []const u8, // target name of symlink
-            size: usize, // size of the file in bytes
-            mode: u32,
-            file_type: Header.FileType,
-
-            reader: *BufferedReaderType,
-
-            // Writes file content to writer.
-            pub fn write(self: File, writer: anytype) !void {
-                try self.reader.write(writer, self.size);
-            }
-
-            // Skips file content. Advances reader.
-            pub fn skip(self: File) !void {
-                try self.reader.skip(self.size);
-            }
-        };
-
-        // Externally, `next` iterates through the tar archive as if it is a
-        // series of files. Internally, the tar format often uses fake "files"
-        // to add meta data that describes the next file. These meta data
-        // "files" should not normally be visible to the outside. As such, this
-        // loop iterates through one or more "header files" until it finds a
-        // "normal file".
-        pub fn next(self: *Self) !?File {
-            self.scratch.reset();
-
-            while (try self.reader.readHeader(self.padding)) |block_bytes| {
-                const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
-                if (try header.checkChksum() == 0) return null; // zero block found
-
-                const file_type = header.fileType();
-                const size: usize = @intCast(try header.fileSize());
-                self.padding = blockPadding(size);
-
-                switch (file_type) {
-                    // File types to retrun upstream
-                    .directory, .normal, .symbolic_link => {
-                        try self.scratch.append(header);
-                        const file = File{
-                            .file_type = file_type,
-                            .name = self.scratch.name,
-                            .link_name = self.scratch.link_name,
-                            .size = self.scratch.size,
-                            .reader = &self.reader,
-                            .mode = try header.mode(),
-                        };
-                        self.padding = blockPadding(file.size);
-                        return file;
-                    },
-                    // Prefix header types
-                    .gnu_long_name => {
-                        self.scratch.name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
-                    },
-                    .gnu_long_link => {
-                        self.scratch.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
-                    },
-                    .extended_header => {
-                        if (size == 0) continue;
-                        // Use just attributes from last extended header.
-                        self.scratch.reset();
-
-                        var rdr = self.reader.paxFileReader(size);
-                        while (try rdr.next()) |attr| {
-                            switch (attr.key) {
-                                .path => {
-                                    self.scratch.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
-                                },
-                                .linkpath => {
-                                    self.scratch.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
-                                },
-                                .size => {
-                                    self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10);
-                                },
-                            }
-                        }
-                    },
-                    // Ignored header type
-                    .global_extended_header => {
-                        self.reader.skip(size) catch return error.TarHeadersTooBig;
-                    },
-                    // All other are unsupported header types
-                    else => {
-                        const d = self.diagnostics orelse return error.TarUnsupportedFileType;
-                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-                            .file_name = try d.allocator.dupe(u8, header.name()),
-                            .file_type = file_type,
-                        } });
-                    },
-                }
-            }
-            return null;
-        }
-    };
-}
-
-pub fn iterator(underlying_reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(BufferedReader(@TypeOf(underlying_reader))) {
-    return .{
-        .reader = bufferedReader(underlying_reader),
-        .diagnostics = diagnostics,
-    };
-}
-
-fn bufferedReader(underlying_reader: anytype) BufferedReader(@TypeOf(underlying_reader)) {
-    return BufferedReader(@TypeOf(underlying_reader)){
-        .underlying_reader = underlying_reader,
-    };
-}
-
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
     switch (options.mode_mode) {
         .ignore => {},
@@ -569,7 +238,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
         },
     }
 
-    var iter = iterator(reader, options.diagnostics);
+    var iter = tarReader(reader, options.diagnostics);
 
     while (try iter.next()) |file| {
         switch (file.file_type) {
@@ -662,82 +331,37 @@ test "tar stripComponents" {
     try expectEqualStrings("c", try stripComponents("a/b/c", 2));
 }
 
-const PaxAttributeInfo = struct {
-    size: usize,
-    key: []const u8,
-    value_off: usize,
-    value_len: usize,
-
-    inline fn is(self: @This(), key: []const u8) bool {
-        return (std.mem.eql(u8, self.key, key));
-    }
-};
-
-fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
-    const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute;
-    const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute;
-    const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10);
-    if (kv_size > max_size or kv_size < pos_equals + 2) {
-        return error.InvalidPaxAttribute;
-    }
-    const key = data[pos_space + 1 .. pos_equals];
-    return .{
-        .size = kv_size,
-        .key = try noNull(key),
-        .value_off = pos_equals + 1,
-        .value_len = kv_size - pos_equals - 2,
-    };
-}
-
 fn noNull(str: []const u8) ![]const u8 {
     if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute;
     return str;
 }
 
-test "tar parsePaxAttribute" {
-    const expectEqual = std.testing.expectEqual;
-    const expectEqualStrings = std.testing.expectEqualStrings;
-    const expectError = std.testing.expectError;
-    const prefix = "1011 path=";
-    const file_name = "0123456789" ** 100;
-    const header = prefix ++ file_name ++ "\n";
-    const attr_info = try parsePaxAttribute(header, 1011);
-    try expectEqual(@as(usize, 1011), attr_info.size);
-    try expectEqualStrings("path", attr_info.key);
-    try expectEqual(prefix.len, attr_info.value_off);
-    try expectEqual(file_name.len, attr_info.value_len);
-    try expectEqual(attr_info, try parsePaxAttribute(header, 1012));
-    try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010));
-    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
-    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("13 pa\x00th=abc\n", 1024)); // null in key
-}
+test "tar run Go test cases" {
+    const Case = struct {
+        const File = struct {
+            name: []const u8,
+            size: usize = 0,
+            mode: u32 = 0,
+            link_name: []const u8 = &[0]u8{},
+            file_type: Header.FileType = .normal,
+            truncated: bool = false, // when there is no file body, just header, usefull for huge files
+        };
 
-const TestCase = struct {
-    const File = struct {
-        name: []const u8,
-        size: usize = 0,
-        mode: u32 = 0,
-        link_name: []const u8 = &[0]u8{},
-        file_type: Header.FileType = .normal,
-        truncated: bool = false, // when there is no file body, just header, usefull for huge files
+        path: []const u8, // path to the tar archive file on dis
+        files: []const File = &[_]@This().File{}, // expected files to found in archive
+        chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
+        err: ?anyerror = null, // parsing should fail with this error
     };
 
-    path: []const u8, // path to the tar archive file on dis
-    files: []const File = &[_]TestCase.File{}, // expected files to found in archive
-    chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
-    err: ?anyerror = null, // parsing should fail with this error
-};
-
-test "tar run Go test cases" {
     const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path|
         try std.fs.openDirAbsolute(path, .{})
     else
         return error.SkipZigTest;
 
-    const cases = [_]TestCase{
+    const cases = [_]Case{
         .{
             .path = "gnu.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
                     .size = 5,
@@ -760,7 +384,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "star.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
                     .size = 5,
@@ -779,7 +403,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "v7.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
                     .size = 5,
@@ -798,7 +422,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "pax.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                     .size = 7,
@@ -824,7 +448,7 @@ test "tar run Go test cases" {
         .{
             // size is in pax attribute
             .path = "pax-pos-size-file.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "foo",
                     .size = 999,
@@ -839,7 +463,7 @@ test "tar run Go test cases" {
         .{
             // has pax records which we are not interested in
             .path = "pax-records.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "file",
                 },
@@ -848,7 +472,7 @@ test "tar run Go test cases" {
         .{
             // has global records which we are ignoring
             .path = "pax-global-records.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "file1",
                 },
@@ -865,7 +489,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "nil-uid.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "P1050238.JPG.log",
                     .size = 14,
@@ -880,7 +504,7 @@ test "tar run Go test cases" {
         .{
             // has xattrs and pax records which we are ignoring
             .path = "xattrs.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
                     .size = 5,
@@ -901,7 +525,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "gnu-multi-hdrs.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "GNU2/GNU2/long-path-name",
                     .link_name = "GNU4/GNU4/long-linkpath-name",
@@ -917,7 +541,7 @@ test "tar run Go test cases" {
         .{
             // should use values only from last pax header
             .path = "pax-multi-hdrs.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "bar",
                     .link_name = "PAX4/PAX4/long-linkpath-name",
@@ -927,7 +551,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "gnu-long-nul.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "0123456789",
                     .mode = 0o644,
@@ -936,7 +560,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "gnu-utf8.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
                     .mode = 0o644,
@@ -945,7 +569,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "gnu-not-utf8.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "hi\x80\x81\x82\x83bye",
                     .mode = 0o644,
@@ -980,7 +604,7 @@ test "tar run Go test cases" {
         .{
             // has magic with space at end instead of null
             .path = "invalid-go17.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
                 },
@@ -988,7 +612,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "ustar-file-devs.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "file",
                     .mode = 0o644,
@@ -997,7 +621,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "trailing-slash.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "123456789/" ** 30,
                     .file_type = .directory,
@@ -1007,7 +631,7 @@ test "tar run Go test cases" {
         .{
             // Has size in gnu extended format. To represent size bigger than 8 GB.
             .path = "writer-big.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "tmp/16gig.txt",
                     .size = 16 * 1024 * 1024 * 1024,
@@ -1019,7 +643,7 @@ test "tar run Go test cases" {
         .{
             // Size in gnu extended format, and name in pax attribute.
             .path = "writer-big-long.tar",
-            .files = &[_]TestCase.File{
+            .files = &[_]Case.File{
                 .{
                     .name = "longname/" ** 15 ++ "16gig.txt",
                     .size = 16 * 1024 * 1024 * 1024,
@@ -1034,7 +658,8 @@ test "tar run Go test cases" {
         var fs_file = try test_dir.openFile(case.path, .{});
         defer fs_file.close();
 
-        var iter = iterator(fs_file.reader(), null);
+        //var iter = iterator(fs_file.reader(), null);
+        var iter = tarReader(fs_file.reader(), null);
         var i: usize = 0;
         while (iter.next() catch |err| {
             if (case.err) |e| {
@@ -1072,6 +697,10 @@ const Md5Writer = struct {
         self.h.update(buf);
     }
 
+    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
+        self.h.update(&[_]u8{byte});
+    }
+
     pub fn chksum(self: *Md5Writer) [32]u8 {
         var s = [_]u8{0} ** 16;
         self.h.final(&s);
@@ -1079,19 +708,113 @@ const Md5Writer = struct {
     }
 };
 
-test "tar PaxFileReader" {
-    const Attribute = struct {
-        const PaxKeyKind = enum {
-            path,
-            linkpath,
-            size,
+fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
+    return PaxReader(@TypeOf(reader)){
+        .reader = reader,
+        .size = size,
+    };
+}
+
+const PaxAttrKind = enum {
+    path,
+    linkpath,
+    size,
+};
+
+fn PaxReader(comptime ReaderType: type) type {
+    return struct {
+        size: usize,
+        reader: ReaderType,
+
+        const Self = @This();
+
+        const Attr = struct {
+            kind: PaxAttrKind,
+            len: usize,
+            reader: ReaderType,
+
+            // Copies pax attribute value into destination buffer.
+            // Must be called with destination buffer of size at least value_len.
+            pub fn value(self: Attr, dst: []u8) ![]const u8 {
+                assert(self.len <= dst.len);
+                const buf = dst[0..self.len];
+                const n = try self.reader.readAll(buf);
+                if (n < self.len) return error.UnexpectedEndOfStream;
+                try checkRecordEnd(self.reader);
+                return noNull(buf);
+            }
         };
-        key: PaxKeyKind,
-        value: []const u8,
+
+        // Iterates over pax records. Returns known records. Caller has to call
+        // value in Record, to advance reader across value.
+        pub fn next(self: *Self) !?Attr {
+            var buf: [128]u8 = undefined;
+            var fbs = std.io.fixedBufferStream(&buf);
+
+            // An extended header consists of one or more records, each constructed as follows:
+            // "%d %s=%s\n", <length>, <keyword>, <value>
+            while (self.size > 0) {
+                fbs.reset();
+                // read length
+                try self.reader.streamUntilDelimiter(fbs.writer(), ' ', null);
+                const rec_len = try std.fmt.parseInt(usize, fbs.getWritten(), 10); // record len in bytes
+                var pos = try fbs.getPos() + 1; // bytes used for record len + separator
+                fbs.reset();
+                // read keyword
+                try self.reader.streamUntilDelimiter(fbs.writer(), '=', null);
+                const keyword = fbs.getWritten();
+                pos += try fbs.getPos() + 1; // keyword bytes + separator
+                try checkKeyword(keyword);
+                // get value_len
+                if (rec_len < pos + 1) return error.InvalidPaxAttribute;
+                const value_len = rec_len - pos - 1; // pos = start of value, -1 => without \n record terminator
+
+                self.size -= rec_len;
+                const kind: PaxAttrKind = if (eql(keyword, "path"))
+                    .path
+                else if (eql(keyword, "linkpath"))
+                    .linkpath
+                else if (eql(keyword, "size"))
+                    .size
+                else {
+                    try self.reader.skipBytes(value_len, .{});
+                    try checkRecordEnd(self.reader);
+                    continue;
+                };
+                return Attr{
+                    .kind = kind,
+                    .len = value_len,
+                    .reader = self.reader,
+                };
+            }
+
+            return null;
+        }
+
+        inline fn eql(a: []const u8, b: []const u8) bool {
+            return std.mem.eql(u8, a, b);
+        }
+
+        fn checkKeyword(keyword: []const u8) !void {
+            if (std.mem.indexOfScalar(u8, keyword, 0)) |_| return error.InvalidPaxAttribute;
+        }
+
+        // Checks that each record ends with new line.
+        fn checkRecordEnd(reader: ReaderType) !void {
+            if (try reader.readByte() != '\n') return error.InvalidPaxAttribute;
+        }
+    };
+}
+
+test "tar PaxReader" {
+    const Attr = struct {
+        kind: PaxAttrKind,
+        value: []const u8 = undefined,
+        err: ?anyerror = null,
     };
     const cases = [_]struct {
         data: []const u8,
-        attrs: []const Attribute,
+        attrs: []const Attr,
         err: ?anyerror = null,
     }{
         .{ // valid but unknown keys
@@ -1103,7 +826,7 @@ test "tar PaxFileReader" {
             \\9 a=name
             \\
             ,
-            .attrs = &[_]Attribute{},
+            .attrs = &[_]Attr{},
         },
         .{ // mix of known and unknown keys
             .data =
@@ -1115,10 +838,10 @@ test "tar PaxFileReader" {
             \\13 key2=val2
             \\
             ,
-            .attrs = &[_]Attribute{
-                .{ .key = .path, .value = "name" },
-                .{ .key = .linkpath, .value = "link" },
-                .{ .key = .size, .value = "123" },
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .value = "name" },
+                .{ .kind = .linkpath, .value = "link" },
+                .{ .kind = .size, .value = "123" },
             },
         },
         .{ // too short size of the second key-value pair
@@ -1127,8 +850,8 @@ test "tar PaxFileReader" {
             \\10 linkpath=value
             \\
             ,
-            .attrs = &[_]Attribute{
-                .{ .key = .path, .value = "name" },
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .value = "name" },
             },
             .err = error.InvalidPaxAttribute,
         },
@@ -1136,36 +859,237 @@ test "tar PaxFileReader" {
             .data =
             \\13 path=name
             \\19 linkpath=value
+            \\6 k=1
             \\
             ,
-            .attrs = &[_]Attribute{
-                .{ .key = .path, .value = "name" },
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .value = "name" },
+                .{ .kind = .linkpath, .err = error.InvalidPaxAttribute },
+            },
+        },
+        .{ // null in keyword is not valid
+            .data = "13 path=name\n" ++ "7 k\x00b=1\n",
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .value = "name" },
             },
             .err = error.InvalidPaxAttribute,
         },
+        .{ // null in value is not valid
+            .data = "23 path=name\x00with null\n",
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .err = error.InvalidPaxAttribute },
+            },
+        },
+        .{ // 1000 characters path
+            .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .value = "0123456789" ** 100 },
+            },
+        },
     };
     var buffer: [1024]u8 = undefined;
 
-    for (cases) |case| {
+    outer: for (cases) |case| {
         var stream = std.io.fixedBufferStream(case.data);
-        var brdr = bufferedReader(stream.reader());
+        var rdr = paxReader(stream.reader(), case.data.len);
 
-        var rdr = brdr.paxFileReader(case.data.len);
         var i: usize = 0;
         while (rdr.next() catch |err| {
             if (case.err) |e| {
                 try std.testing.expectEqual(e, err);
                 continue;
-            } else {
-                return err;
             }
+            return err;
         }) |attr| : (i += 1) {
-            try std.testing.expectEqualStrings(
-                case.attrs[i].value,
-                try attr.value(&buffer),
-            );
+            const exp = case.attrs[i];
+            try std.testing.expectEqual(exp.kind, attr.kind);
+            const value = attr.value(&buffer) catch |err| {
+                if (exp.err) |e| {
+                    try std.testing.expectEqual(e, err);
+                    break :outer;
+                }
+                return err;
+            };
+            try std.testing.expectEqualStrings(exp.value, value);
         }
         try std.testing.expectEqual(case.attrs.len, i);
         try std.testing.expect(case.err == null);
     }
 }
+
+pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
+    return .{
+        .reader = reader,
+        .diagnostics = diagnostics,
+    };
+}
+
+fn TarReader(comptime ReaderType: type) type {
+    return struct {
+        // scratch buffer for file attributes
+        scratch: struct {
+            // size: two paths (name and link_name) and files size bytes (24 in pax attribute)
+            buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
+            tail: usize = 0,
+
+            name: []const u8 = undefined,
+            link_name: []const u8 = undefined,
+            size: usize = 0,
+
+            // Allocate size of the buffer for some attribute.
+            fn alloc(self: *@This(), size: usize) ![]u8 {
+                const free_size = self.buffer.len - self.tail;
+                if (size > free_size) return error.TarScratchBufferOverflow;
+                const head = self.tail;
+                self.tail += size;
+                assert(self.tail <= self.buffer.len);
+                return self.buffer[head..self.tail];
+            }
+
+            // Reset buffer and all fields.
+            fn reset(self: *@This()) void {
+                self.tail = 0;
+                self.name = self.buffer[0..0];
+                self.link_name = self.buffer[0..0];
+                self.size = 0;
+            }
+
+            fn append(self: *@This(), header: Header) !void {
+                if (self.size == 0) self.size = try header.fileSize();
+                if (self.link_name.len == 0) {
+                    const link_name = header.linkName();
+                    if (link_name.len > 0) {
+                        const buf = try self.alloc(link_name.len);
+                        @memcpy(buf, link_name);
+                        self.link_name = buf;
+                    }
+                }
+                if (self.name.len == 0) {
+                    self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]);
+                }
+            }
+        } = .{},
+
+        reader: ReaderType,
+        diagnostics: ?*Options.Diagnostics,
+        padding: usize = 0, // bytes of padding to the end of the block
+        header_buffer: [BLOCK_SIZE]u8 = undefined,
+
+        const Self = @This();
+
+        pub const File = struct {
+            name: []const u8, // name of file, symlink or directory
+            link_name: []const u8, // target name of symlink
+            size: usize, // size of the file in bytes
+            mode: u32,
+            file_type: Header.FileType,
+
+            reader: *ReaderType,
+
+            // Writes file content to writer.
+            pub fn write(self: File, writer: anytype) !void {
+                var n = self.size;
+                while (n > 0) : (n -= 1) {
+                    const byte: u8 = try self.reader.readByte();
+                    try writer.writeByte(byte);
+                }
+            }
+
+            // Skips file content. Advances reader.
+            pub fn skip(self: File) !void {
+                try self.reader.skipBytes(self.size, .{});
+            }
+        };
+
+        fn readHeader(self: *Self) !?Header {
+            if (self.padding > 0) {
+                try self.reader.skipBytes(self.padding, .{});
+            }
+            const n = try self.reader.readAll(&self.header_buffer);
+            if (n == 0) return null;
+            if (n < BLOCK_SIZE) return error.UnexpectedEndOfStream;
+            const header = Header{ .bytes = self.header_buffer[0..BLOCK_SIZE] };
+            if (try header.checkChksum() == 0) return null;
+            return header;
+        }
+
+        fn readString(self: *Self, size: usize) ![]const u8 {
+            const buf = try self.scratch.alloc(size);
+            try self.reader.readNoEof(buf);
+            return nullStr(buf);
+        }
+
+        // Externally, `next` iterates through the tar archive as if it is a
+        // series of files. Internally, the tar format often uses fake "files"
+        // to add meta data that describes the next file. These meta data
+        // "files" should not normally be visible to the outside. As such, this
+        // loop iterates through one or more "header files" until it finds a
+        // "normal file".
+        pub fn next(self: *Self) !?File {
+            self.scratch.reset();
+
+            while (try self.readHeader()) |header| {
+                const file_type = header.fileType();
+                const size: usize = @intCast(try header.fileSize());
+                self.padding = blockPadding(size);
+
+                switch (file_type) {
+                    // File types to retrun upstream
+                    .directory, .normal, .symbolic_link => {
+                        try self.scratch.append(header);
+                        const file = File{
+                            .file_type = file_type,
+                            .name = self.scratch.name,
+                            .link_name = self.scratch.link_name,
+                            .size = self.scratch.size,
+                            .reader = &self.reader,
+                            .mode = try header.mode(),
+                        };
+                        self.padding = blockPadding(file.size);
+                        return file;
+                    },
+                    // Prefix header types
+                    .gnu_long_name => {
+                        self.scratch.name = try self.readString(size);
+                    },
+                    .gnu_long_link => {
+                        self.scratch.link_name = try self.readString(size);
+                    },
+                    .extended_header => {
+                        if (size == 0) continue;
+                        // Use just attributes from last extended header.
+                        self.scratch.reset();
+
+                        var rdr = paxReader(self.reader, size);
+                        while (try rdr.next()) |attr| {
+                            switch (attr.kind) {
+                                .path => {
+                                    self.scratch.name = try attr.value(try self.scratch.alloc(attr.len));
+                                },
+                                .linkpath => {
+                                    self.scratch.link_name = try attr.value(try self.scratch.alloc(attr.len));
+                                },
+                                .size => {
+                                    self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.len)), 10);
+                                },
+                            }
+                        }
+                    },
+                    // Ignored header type
+                    .global_extended_header => {
+                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
+                    },
+                    // All other are unsupported header types
+                    else => {
+                        const d = self.diagnostics orelse return error.TarUnsupportedFileType;
+                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
+                            .file_name = try d.allocator.dupe(u8, header.name()),
+                            .file_type = file_type,
+                        } });
+                    },
+                }
+            }
+            return null;
+        }
+    };
+}

From 9f7dd323082941d66c18af3da88a432835c5e3e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 17:47:19 +0100
Subject: [PATCH 19/29] tar: refactor pax attribute

Make it little readable.
---
 lib/std/tar.zig | 126 ++++++++++++++++++++++++++++--------------------
 1 file changed, 73 insertions(+), 53 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index e15301589ab0..2065240858c6 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -331,11 +331,6 @@ test "tar stripComponents" {
     try expectEqualStrings("c", try stripComponents("a/b/c", 2));
 }
 
-fn noNull(str: []const u8) ![]const u8 {
-    if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute;
-    return str;
-}
-
 test "tar run Go test cases" {
     const Case = struct {
         const File = struct {
@@ -443,7 +438,7 @@ test "tar run Go test cases" {
         .{
             // pax attribute don't end with \n
             .path = "pax-bad-hdr-file.tar",
-            .err = error.InvalidPaxAttribute,
+            .err = error.PaxInvalidAttributeEnd,
         },
         .{
             // size is in pax attribute
@@ -579,11 +574,11 @@ test "tar run Go test cases" {
         .{
             // null in pax key
             .path = "pax-nul-xattrs.tar",
-            .err = error.InvalidPaxAttribute,
+            .err = error.PaxNullInKeyword,
         },
         .{
             .path = "pax-nul-path.tar",
-            .err = error.InvalidPaxAttribute,
+            .err = error.PaxNullInValue,
         },
         .{
             .path = "neg-size.tar",
@@ -715,7 +710,7 @@ fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
     };
 }
 
-const PaxAttrKind = enum {
+const PaxAttributeKind = enum {
     path,
     linkpath,
     size,
@@ -723,54 +718,50 @@ const PaxAttrKind = enum {
 
 fn PaxReader(comptime ReaderType: type) type {
     return struct {
-        size: usize,
+        size: usize, // cumulative size of all pax attributes
         reader: ReaderType,
+        // scratch buffer used for reading attribute length and keyword
+        scratch: [128]u8 = undefined,
 
         const Self = @This();
 
-        const Attr = struct {
-            kind: PaxAttrKind,
-            len: usize,
-            reader: ReaderType,
+        const Attribute = struct {
+            kind: PaxAttributeKind,
+            len: usize, // length of the attribute value
+            reader: ReaderType, // reader positioned at value start
 
             // Copies pax attribute value into destination buffer.
-            // Must be called with destination buffer of size at least value_len.
-            pub fn value(self: Attr, dst: []u8) ![]const u8 {
+            // Must be called with destination buffer of size at least Attribute.len.
+            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
                 assert(self.len <= dst.len);
                 const buf = dst[0..self.len];
                 const n = try self.reader.readAll(buf);
                 if (n < self.len) return error.UnexpectedEndOfStream;
-                try checkRecordEnd(self.reader);
-                return noNull(buf);
+                try validateAttributeEnding(self.reader);
+                if (hasNull(buf)) return error.PaxNullInValue;
+                return buf;
             }
         };
 
-        // Iterates over pax records. Returns known records. Caller has to call
-        // value in Record, to advance reader across value.
-        pub fn next(self: *Self) !?Attr {
-            var buf: [128]u8 = undefined;
-            var fbs = std.io.fixedBufferStream(&buf);
-
-            // An extended header consists of one or more records, each constructed as follows:
+        // Iterates over pax attributes. Returns known only known attributes.
+        // Caller has to call value in Attribute, to advance reader across value.
+        pub fn next(self: *Self) !?Attribute {
+            // Pax extended header consists of one or more attributes, each constructed as follows:
             // "%d %s=%s\n", <length>, <keyword>, <value>
             while (self.size > 0) {
-                fbs.reset();
-                // read length
-                try self.reader.streamUntilDelimiter(fbs.writer(), ' ', null);
-                const rec_len = try std.fmt.parseInt(usize, fbs.getWritten(), 10); // record len in bytes
-                var pos = try fbs.getPos() + 1; // bytes used for record len + separator
-                fbs.reset();
-                // read keyword
-                try self.reader.streamUntilDelimiter(fbs.writer(), '=', null);
-                const keyword = fbs.getWritten();
-                pos += try fbs.getPos() + 1; // keyword bytes + separator
-                try checkKeyword(keyword);
-                // get value_len
-                if (rec_len < pos + 1) return error.InvalidPaxAttribute;
-                const value_len = rec_len - pos - 1; // pos = start of value, -1 => without \n record terminator
-
-                self.size -= rec_len;
-                const kind: PaxAttrKind = if (eql(keyword, "path"))
+                const length_buf = try self.readUntil(' ');
+                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
+
+                const keyword = try self.readUntil('=');
+                if (hasNull(keyword)) return error.PaxNullInKeyword;
+
+                // calculate value_len
+                const value_start = length_buf.len + keyword.len + 2; // 2 separators
+                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
+                const value_len = length - value_start - 1; // \n separator at end
+                self.size -= length;
+
+                const kind: PaxAttributeKind = if (eql(keyword, "path"))
                     .path
                 else if (eql(keyword, "linkpath"))
                     .linkpath
@@ -778,10 +769,10 @@ fn PaxReader(comptime ReaderType: type) type {
                     .size
                 else {
                     try self.reader.skipBytes(value_len, .{});
-                    try checkRecordEnd(self.reader);
+                    try validateAttributeEnding(self.reader);
                     continue;
                 };
-                return Attr{
+                return Attribute{
                     .kind = kind,
                     .len = value_len,
                     .reader = self.reader,
@@ -791,24 +782,30 @@ fn PaxReader(comptime ReaderType: type) type {
             return null;
         }
 
+        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
+            var fbs = std.io.fixedBufferStream(&self.scratch);
+            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
+            return fbs.getWritten();
+        }
+
         inline fn eql(a: []const u8, b: []const u8) bool {
             return std.mem.eql(u8, a, b);
         }
 
-        fn checkKeyword(keyword: []const u8) !void {
-            if (std.mem.indexOfScalar(u8, keyword, 0)) |_| return error.InvalidPaxAttribute;
+        inline fn hasNull(str: []const u8) bool {
+            return (std.mem.indexOfScalar(u8, str, 0)) != null;
         }
 
         // Checks that each record ends with new line.
-        fn checkRecordEnd(reader: ReaderType) !void {
-            if (try reader.readByte() != '\n') return error.InvalidPaxAttribute;
+        inline fn validateAttributeEnding(reader: ReaderType) !void {
+            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
         }
     };
 }
 
 test "tar PaxReader" {
     const Attr = struct {
-        kind: PaxAttrKind,
+        kind: PaxAttributeKind,
         value: []const u8 = undefined,
         err: ?anyerror = null,
     };
@@ -853,8 +850,21 @@ test "tar PaxReader" {
             .attrs = &[_]Attr{
                 .{ .kind = .path, .value = "name" },
             },
-            .err = error.InvalidPaxAttribute,
+            .err = error.UnexpectedEndOfStream,
+        },
+        .{ // too long size of the second key-value pair
+            .data =
+            \\13 path=name
+            \\6 k=1
+            \\19 linkpath=value
+            \\
+            ,
+            .attrs = &[_]Attr{
+                .{ .kind = .path, .value = "name" },
+            },
+            .err = error.UnexpectedEndOfStream,
         },
+
         .{ // too long size of the second key-value pair
             .data =
             \\13 path=name
@@ -864,7 +874,7 @@ test "tar PaxReader" {
             ,
             .attrs = &[_]Attr{
                 .{ .kind = .path, .value = "name" },
-                .{ .kind = .linkpath, .err = error.InvalidPaxAttribute },
+                .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
             },
         },
         .{ // null in keyword is not valid
@@ -872,12 +882,12 @@ test "tar PaxReader" {
             .attrs = &[_]Attr{
                 .{ .kind = .path, .value = "name" },
             },
-            .err = error.InvalidPaxAttribute,
+            .err = error.PaxNullInKeyword,
         },
         .{ // null in value is not valid
             .data = "23 path=name\x00with null\n",
             .attrs = &[_]Attr{
-                .{ .kind = .path, .err = error.InvalidPaxAttribute },
+                .{ .kind = .path, .err = error.PaxNullInValue },
             },
         },
         .{ // 1000 characters path
@@ -1019,6 +1029,16 @@ fn TarReader(comptime ReaderType: type) type {
             return nullStr(buf);
         }
 
+        fn reset(self: *Self) void {
+            self.file = File{
+                .name = self.file_name_buffer[0..0],
+                .link_name = self.link_name_buffer[0..0],
+                .size = 0,
+                .file_type = 0xff,
+                .mode = 0,
+            };
+        }
+
         // Externally, `next` iterates through the tar archive as if it is a
         // series of files. Internally, the tar format often uses fake "files"
         // to add meta data that describes the next file. These meta data

From 4a6d67ab1a26e0c89d55453877c1fd8b03ab1976 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 20:18:59 +0100
Subject: [PATCH 20/29] tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.
---
 lib/std/tar.zig | 166 +++++++++++++++++++++---------------------------
 1 file changed, 74 insertions(+), 92 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 2065240858c6..cc7108e62c73 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -83,11 +83,12 @@ pub const Options = struct {
     };
 };
 
-const BLOCK_SIZE = 512;
-const MAX_HEADER_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
-
 pub const Header = struct {
-    bytes: *const [BLOCK_SIZE]u8,
+    const SIZE = 512;
+    const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
+    const LINK_NAME_SIZE = 100;
+
+    bytes: *const [SIZE]u8,
 
     pub const FileType = enum(u8) {
         normal_alias = 0,
@@ -110,7 +111,7 @@ pub const Header = struct {
 
     /// Includes prefix concatenated, if any.
     /// TODO: check against "../" and other nefarious things
-    pub fn fullName(header: Header, buffer: *[MAX_HEADER_NAME_SIZE]u8) ![]const u8 {
+    pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
         const n = name(header);
         const p = prefix(header);
         if (!is_ustar(header) or p.len == 0) {
@@ -123,6 +124,16 @@ pub const Header = struct {
         return buffer[0 .. p.len + 1 + n.len];
     }
 
+    pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
+        const link_name = header.str(157, 100);
+        if (link_name.len == 0) {
+            return buffer[0..0];
+        }
+        const buf = buffer[0..link_name.len];
+        @memcpy(buf, link_name);
+        return buf;
+    }
+
     pub fn name(header: Header) []const u8 {
         return header.str(0, 100);
     }
@@ -139,10 +150,6 @@ pub const Header = struct {
         return header.octal(148, 8);
     }
 
-    pub fn linkName(header: Header) []const u8 {
-        return header.str(157, 100);
-    }
-
     pub fn is_ustar(header: Header) bool {
         const magic = header.bytes[257..][0..6];
         return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
@@ -219,12 +226,6 @@ fn nullStr(str: []const u8) []const u8 {
     return str;
 }
 
-// Number of padding bytes in the last file block.
-inline fn blockPadding(size: usize) usize {
-    const block_rounded = std.mem.alignForward(usize, size, BLOCK_SIZE); // size rounded to te block boundary
-    return block_rounded - size;
-}
-
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
     switch (options.mode_mode) {
         .ignore => {},
@@ -936,56 +937,18 @@ pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(
 
 fn TarReader(comptime ReaderType: type) type {
     return struct {
-        // scratch buffer for file attributes
-        scratch: struct {
-            // size: two paths (name and link_name) and files size bytes (24 in pax attribute)
-            buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
-            tail: usize = 0,
-
-            name: []const u8 = undefined,
-            link_name: []const u8 = undefined,
-            size: usize = 0,
-
-            // Allocate size of the buffer for some attribute.
-            fn alloc(self: *@This(), size: usize) ![]u8 {
-                const free_size = self.buffer.len - self.tail;
-                if (size > free_size) return error.TarScratchBufferOverflow;
-                const head = self.tail;
-                self.tail += size;
-                assert(self.tail <= self.buffer.len);
-                return self.buffer[head..self.tail];
-            }
-
-            // Reset buffer and all fields.
-            fn reset(self: *@This()) void {
-                self.tail = 0;
-                self.name = self.buffer[0..0];
-                self.link_name = self.buffer[0..0];
-                self.size = 0;
-            }
-
-            fn append(self: *@This(), header: Header) !void {
-                if (self.size == 0) self.size = try header.fileSize();
-                if (self.link_name.len == 0) {
-                    const link_name = header.linkName();
-                    if (link_name.len > 0) {
-                        const buf = try self.alloc(link_name.len);
-                        @memcpy(buf, link_name);
-                        self.link_name = buf;
-                    }
-                }
-                if (self.name.len == 0) {
-                    self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]);
-                }
-            }
-        } = .{},
-
         reader: ReaderType,
         diagnostics: ?*Options.Diagnostics,
-        padding: usize = 0, // bytes of padding to the end of the block
-        header_buffer: [BLOCK_SIZE]u8 = undefined,
 
-        const Self = @This();
+        // buffers for heeader and file attributes
+        header_buffer: [Header.SIZE]u8 = undefined,
+        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+
+        // bytes of padding to the end of the block
+        padding: usize = 0,
+        // current tar file
+        file: File = undefined,
 
         pub const File = struct {
             name: []const u8, // name of file, symlink or directory
@@ -994,14 +957,18 @@ fn TarReader(comptime ReaderType: type) type {
             mode: u32,
             file_type: Header.FileType,
 
-            reader: *ReaderType,
+            reader: ReaderType,
 
             // Writes file content to writer.
             pub fn write(self: File, writer: anytype) !void {
-                var n = self.size;
-                while (n > 0) : (n -= 1) {
-                    const byte: u8 = try self.reader.readByte();
-                    try writer.writeByte(byte);
+                var buffer: [4096]u8 = undefined;
+
+                var n: usize = 0;
+                while (n < self.size) {
+                    const buf = buffer[0..@min(buffer.len, self.size - n)];
+                    try self.reader.readNoEof(buf);
+                    try writer.writeAll(buf);
+                    n += buf.len;
                 }
             }
 
@@ -1011,34 +978,44 @@ fn TarReader(comptime ReaderType: type) type {
             }
         };
 
+        const Self = @This();
+
         fn readHeader(self: *Self) !?Header {
             if (self.padding > 0) {
                 try self.reader.skipBytes(self.padding, .{});
             }
             const n = try self.reader.readAll(&self.header_buffer);
             if (n == 0) return null;
-            if (n < BLOCK_SIZE) return error.UnexpectedEndOfStream;
-            const header = Header{ .bytes = self.header_buffer[0..BLOCK_SIZE] };
+            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
+            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
             if (try header.checkChksum() == 0) return null;
             return header;
         }
 
-        fn readString(self: *Self, size: usize) ![]const u8 {
-            const buf = try self.scratch.alloc(size);
+        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
+            assert(buffer.len >= size);
+            const buf = buffer[0..size];
             try self.reader.readNoEof(buf);
             return nullStr(buf);
         }
 
-        fn reset(self: *Self) void {
+        inline fn initFile(self: *Self) void {
             self.file = File{
                 .name = self.file_name_buffer[0..0],
                 .link_name = self.link_name_buffer[0..0],
                 .size = 0,
-                .file_type = 0xff,
+                .file_type = .normal,
                 .mode = 0,
+                .reader = self.reader,
             };
         }
 
+        // Number of padding bytes in the last file block.
+        inline fn blockPadding(size: usize) usize {
+            const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
+            return block_rounded - size;
+        }
+
         // Externally, `next` iterates through the tar archive as if it is a
         // series of files. Internally, the tar format often uses fake "files"
         // to add meta data that describes the next file. These meta data
@@ -1046,7 +1023,7 @@ fn TarReader(comptime ReaderType: type) type {
         // loop iterates through one or more "header files" until it finds a
         // "normal file".
         pub fn next(self: *Self) !?File {
-            self.scratch.reset();
+            self.initFile();
 
             while (try self.readHeader()) |header| {
                 const file_type = header.fileType();
@@ -1056,41 +1033,46 @@ fn TarReader(comptime ReaderType: type) type {
                 switch (file_type) {
                     // File types to retrun upstream
                     .directory, .normal, .symbolic_link => {
-                        try self.scratch.append(header);
-                        const file = File{
-                            .file_type = file_type,
-                            .name = self.scratch.name,
-                            .link_name = self.scratch.link_name,
-                            .size = self.scratch.size,
-                            .reader = &self.reader,
-                            .mode = try header.mode(),
-                        };
-                        self.padding = blockPadding(file.size);
-                        return file;
+                        self.file.file_type = file_type;
+                        self.file.mode = try header.mode();
+
+                        // set file attributes if not already set by prefix/extended headers
+                        if (self.file.size == 0) {
+                            self.file.size = size;
+                        }
+                        if (self.file.link_name.len == 0) {
+                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
+                        }
+                        if (self.file.name.len == 0) {
+                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
+                        }
+
+                        self.padding = blockPadding(self.file.size);
+                        return self.file;
                     },
                     // Prefix header types
                     .gnu_long_name => {
-                        self.scratch.name = try self.readString(size);
+                        self.file.name = try self.readString(size, &self.file_name_buffer);
                     },
                     .gnu_long_link => {
-                        self.scratch.link_name = try self.readString(size);
+                        self.file.link_name = try self.readString(size, &self.link_name_buffer);
                     },
                     .extended_header => {
-                        if (size == 0) continue;
                         // Use just attributes from last extended header.
-                        self.scratch.reset();
+                        self.initFile();
 
                         var rdr = paxReader(self.reader, size);
                         while (try rdr.next()) |attr| {
                             switch (attr.kind) {
                                 .path => {
-                                    self.scratch.name = try attr.value(try self.scratch.alloc(attr.len));
+                                    self.file.name = try attr.value(&self.file_name_buffer);
                                 },
                                 .linkpath => {
-                                    self.scratch.link_name = try attr.value(try self.scratch.alloc(attr.len));
+                                    self.file.link_name = try attr.value(&self.link_name_buffer);
                                 },
                                 .size => {
-                                    self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.len)), 10);
+                                    var buf: [64]u8 = undefined;
+                                    self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
                                 },
                             }
                         }

From c76abe0e183ef513b9ee651b052c7f99c33c139c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 20:46:27 +0100
Subject: [PATCH 21/29] tar: use file word in less places

---
 lib/std/tar.zig | 52 ++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index cc7108e62c73..c83ae70e3b38 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -58,7 +58,7 @@ pub const Options = struct {
             },
             unsupported_file_type: struct {
                 file_name: []const u8,
-                file_type: Header.FileType,
+                file_type: Header.Kind,
             },
         };
 
@@ -90,7 +90,7 @@ pub const Header = struct {
 
     bytes: *const [SIZE]u8,
 
-    pub const FileType = enum(u8) {
+    pub const Kind = enum(u8) {
         normal_alias = 0,
         normal = '0',
         hard_link = '1',
@@ -142,7 +142,7 @@ pub const Header = struct {
         return @intCast(try header.numeric(100, 8));
     }
 
-    pub fn fileSize(header: Header) !u64 {
+    pub fn size(header: Header) !u64 {
         return header.numeric(124, 12);
     }
 
@@ -159,8 +159,8 @@ pub const Header = struct {
         return header.str(345, 155);
     }
 
-    pub fn fileType(header: Header) FileType {
-        const result: FileType = @enumFromInt(header.bytes[156]);
+    pub fn kind(header: Header) Kind {
+        const result: Kind = @enumFromInt(header.bytes[156]);
         if (result == .normal_alias) return .normal;
         return result;
     }
@@ -242,7 +242,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
     var iter = tarReader(reader, options.diagnostics);
 
     while (try iter.next()) |file| {
-        switch (file.file_type) {
+        switch (file.kind) {
             .directory => {
                 const file_name = try stripComponents(file.name, options.strip_components);
                 if (file_name.len != 0 and !options.exclude_empty_directories) {
@@ -339,7 +339,7 @@ test "tar run Go test cases" {
             size: usize = 0,
             mode: u32 = 0,
             link_name: []const u8 = &[0]u8{},
-            file_type: Header.FileType = .normal,
+            kind: Header.Kind = .normal,
             truncated: bool = false, // when there is no file body, just header, usefull for huge files
         };
 
@@ -376,7 +376,7 @@ test "tar run Go test cases" {
         },
         .{
             .path = "sparse-formats.tar",
-            .err = error.TarUnsupportedFileType,
+            .err = error.TarUnsupportedHeader,
         },
         .{
             .path = "star.tar",
@@ -427,7 +427,7 @@ test "tar run Go test cases" {
                 .{
                     .name = "a/b",
                     .size = 0,
-                    .file_type = .symbolic_link,
+                    .kind = .symbolic_link,
                     .mode = 0o777,
                     .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                 },
@@ -448,7 +448,7 @@ test "tar run Go test cases" {
                 .{
                     .name = "foo",
                     .size = 999,
-                    .file_type = .normal,
+                    .kind = .normal,
                     .mode = 0o640,
                 },
             },
@@ -489,7 +489,7 @@ test "tar run Go test cases" {
                 .{
                     .name = "P1050238.JPG.log",
                     .size = 14,
-                    .file_type = .normal,
+                    .kind = .normal,
                     .mode = 0o664,
                 },
             },
@@ -504,13 +504,13 @@ test "tar run Go test cases" {
                 .{
                     .name = "small.txt",
                     .size = 5,
-                    .file_type = .normal,
+                    .kind = .normal,
                     .mode = 0o644,
                 },
                 .{
                     .name = "small2.txt",
                     .size = 11,
-                    .file_type = .normal,
+                    .kind = .normal,
                     .mode = 0o644,
                 },
             },
@@ -525,14 +525,14 @@ test "tar run Go test cases" {
                 .{
                     .name = "GNU2/GNU2/long-path-name",
                     .link_name = "GNU4/GNU4/long-linkpath-name",
-                    .file_type = .symbolic_link,
+                    .kind = .symbolic_link,
                 },
             },
         },
         .{
             // has gnu type D (directory) and S (sparse) blocks
             .path = "gnu-incremental.tar",
-            .err = error.TarUnsupportedFileType,
+            .err = error.TarUnsupportedHeader,
         },
         .{
             // should use values only from last pax header
@@ -541,7 +541,7 @@ test "tar run Go test cases" {
                 .{
                     .name = "bar",
                     .link_name = "PAX4/PAX4/long-linkpath-name",
-                    .file_type = .symbolic_link,
+                    .kind = .symbolic_link,
                 },
             },
         },
@@ -620,7 +620,7 @@ test "tar run Go test cases" {
             .files = &[_]Case.File{
                 .{
                     .name = "123456789/" ** 30,
-                    .file_type = .directory,
+                    .kind = .directory,
                 },
             },
         },
@@ -668,7 +668,7 @@ test "tar run Go test cases" {
             const expected = case.files[i];
             try std.testing.expectEqualStrings(expected.name, actual.name);
             try std.testing.expectEqual(expected.size, actual.size);
-            try std.testing.expectEqual(expected.file_type, actual.file_type);
+            try std.testing.expectEqual(expected.kind, actual.kind);
             try std.testing.expectEqual(expected.mode, actual.mode);
             try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
 
@@ -955,7 +955,7 @@ fn TarReader(comptime ReaderType: type) type {
             link_name: []const u8, // target name of symlink
             size: usize, // size of the file in bytes
             mode: u32,
-            file_type: Header.FileType,
+            kind: Header.Kind,
 
             reader: ReaderType,
 
@@ -1004,7 +1004,7 @@ fn TarReader(comptime ReaderType: type) type {
                 .name = self.file_name_buffer[0..0],
                 .link_name = self.link_name_buffer[0..0],
                 .size = 0,
-                .file_type = .normal,
+                .kind = .normal,
                 .mode = 0,
                 .reader = self.reader,
             };
@@ -1026,14 +1026,14 @@ fn TarReader(comptime ReaderType: type) type {
             self.initFile();
 
             while (try self.readHeader()) |header| {
-                const file_type = header.fileType();
-                const size: usize = @intCast(try header.fileSize());
+                const kind = header.kind();
+                const size: usize = @intCast(try header.size());
                 self.padding = blockPadding(size);
 
-                switch (file_type) {
+                switch (kind) {
                     // File types to retrun upstream
                     .directory, .normal, .symbolic_link => {
-                        self.file.file_type = file_type;
+                        self.file.kind = kind;
                         self.file.mode = try header.mode();
 
                         // set file attributes if not already set by prefix/extended headers
@@ -1083,10 +1083,10 @@ fn TarReader(comptime ReaderType: type) type {
                     },
                     // All other are unsupported header types
                     else => {
-                        const d = self.diagnostics orelse return error.TarUnsupportedFileType;
+                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
                         try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
                             .file_name = try d.allocator.dupe(u8, header.name()),
-                            .file_type = file_type,
+                            .file_type = kind,
                         } });
                     },
                 }

From c07527abac7a5f56bb9111b42fcbcbf468b4917f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 22:00:49 +0100
Subject: [PATCH 22/29] tar: reorganize file, functions before tests

---
 lib/std/tar.zig | 651 ++++++++++++++++++++++++------------------------
 1 file changed, 326 insertions(+), 325 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index c83ae70e3b38..21d08c527212 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -15,8 +15,7 @@
 /// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
 /// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
 ///
-//const std = @import("std.zig");
-const std = @import("std");
+const std = @import("std.zig");
 const assert = std.debug.assert;
 
 pub const Options = struct {
@@ -226,6 +225,276 @@ fn nullStr(str: []const u8) []const u8 {
     return str;
 }
 
+pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
+    return .{
+        .reader = reader,
+        .diagnostics = diagnostics,
+    };
+}
+
+fn TarReader(comptime ReaderType: type) type {
+    return struct {
+        reader: ReaderType,
+        diagnostics: ?*Options.Diagnostics,
+
+        // buffers for heeader and file attributes
+        header_buffer: [Header.SIZE]u8 = undefined,
+        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
+
+        // bytes of padding to the end of the block
+        padding: usize = 0,
+        // current tar file
+        file: File = undefined,
+
+        pub const File = struct {
+            name: []const u8, // name of file, symlink or directory
+            link_name: []const u8, // target name of symlink
+            size: usize, // size of the file in bytes
+            mode: u32,
+            kind: Header.Kind,
+
+            reader: ReaderType,
+
+            // Writes file content to writer.
+            pub fn write(self: File, writer: anytype) !void {
+                var buffer: [4096]u8 = undefined;
+
+                var n: usize = 0;
+                while (n < self.size) {
+                    const buf = buffer[0..@min(buffer.len, self.size - n)];
+                    try self.reader.readNoEof(buf);
+                    try writer.writeAll(buf);
+                    n += buf.len;
+                }
+            }
+
+            // Skips file content. Advances reader.
+            pub fn skip(self: File) !void {
+                try self.reader.skipBytes(self.size, .{});
+            }
+        };
+
+        const Self = @This();
+
+        fn readHeader(self: *Self) !?Header {
+            if (self.padding > 0) {
+                try self.reader.skipBytes(self.padding, .{});
+            }
+            const n = try self.reader.readAll(&self.header_buffer);
+            if (n == 0) return null;
+            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
+            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
+            if (try header.checkChksum() == 0) return null;
+            return header;
+        }
+
+        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
+            assert(buffer.len >= size);
+            const buf = buffer[0..size];
+            try self.reader.readNoEof(buf);
+            return nullStr(buf);
+        }
+
+        inline fn initFile(self: *Self) void {
+            self.file = File{
+                .name = self.file_name_buffer[0..0],
+                .link_name = self.link_name_buffer[0..0],
+                .size = 0,
+                .kind = .normal,
+                .mode = 0,
+                .reader = self.reader,
+            };
+        }
+
+        // Number of padding bytes in the last file block.
+        inline fn blockPadding(size: usize) usize {
+            const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
+            return block_rounded - size;
+        }
+
+        /// Iterates through the tar archive as if it is a series of files.
+        /// Internally, the tar format often uses entries (header with optional
+        /// content) to add meta data that describes the next file. These
+        /// entries should not normally be visible to the outside. As such, this
+        /// loop iterates through one or more entries until it collects a all
+        /// file attributes.
+        pub fn next(self: *Self) !?File {
+            self.initFile();
+
+            while (try self.readHeader()) |header| {
+                const kind = header.kind();
+                const size: usize = @intCast(try header.size());
+                self.padding = blockPadding(size);
+
+                switch (kind) {
+                    // File types to retrun upstream
+                    .directory, .normal, .symbolic_link => {
+                        self.file.kind = kind;
+                        self.file.mode = try header.mode();
+
+                        // set file attributes if not already set by prefix/extended headers
+                        if (self.file.size == 0) {
+                            self.file.size = size;
+                        }
+                        if (self.file.link_name.len == 0) {
+                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
+                        }
+                        if (self.file.name.len == 0) {
+                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
+                        }
+
+                        self.padding = blockPadding(self.file.size);
+                        return self.file;
+                    },
+                    // Prefix header types
+                    .gnu_long_name => {
+                        self.file.name = try self.readString(size, &self.file_name_buffer);
+                    },
+                    .gnu_long_link => {
+                        self.file.link_name = try self.readString(size, &self.link_name_buffer);
+                    },
+                    .extended_header => {
+                        // Use just attributes from last extended header.
+                        self.initFile();
+
+                        var rdr = paxReader(self.reader, size);
+                        while (try rdr.next()) |attr| {
+                            switch (attr.kind) {
+                                .path => {
+                                    self.file.name = try attr.value(&self.file_name_buffer);
+                                },
+                                .linkpath => {
+                                    self.file.link_name = try attr.value(&self.link_name_buffer);
+                                },
+                                .size => {
+                                    var buf: [64]u8 = undefined;
+                                    self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
+                                },
+                            }
+                        }
+                    },
+                    // Ignored header type
+                    .global_extended_header => {
+                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
+                    },
+                    // All other are unsupported header types
+                    else => {
+                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
+                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
+                            .file_name = try d.allocator.dupe(u8, header.name()),
+                            .file_type = kind,
+                        } });
+                    },
+                }
+            }
+            return null;
+        }
+    };
+}
+
+// Pax attributes reader.
+// Size is length of pax extended header in reader.
+fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
+    return PaxReader(@TypeOf(reader)){
+        .reader = reader,
+        .size = size,
+    };
+}
+
+const PaxAttributeKind = enum {
+    path,
+    linkpath,
+    size,
+};
+
+fn PaxReader(comptime ReaderType: type) type {
+    return struct {
+        size: usize, // cumulative size of all pax attributes
+        reader: ReaderType,
+        // scratch buffer used for reading attribute length and keyword
+        scratch: [128]u8 = undefined,
+
+        const Self = @This();
+
+        const Attribute = struct {
+            kind: PaxAttributeKind,
+            len: usize, // length of the attribute value
+            reader: ReaderType, // reader positioned at value start
+
+            // Copies pax attribute value into destination buffer.
+            // Must be called with destination buffer of size at least Attribute.len.
+            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
+                assert(self.len <= dst.len);
+                const buf = dst[0..self.len];
+                const n = try self.reader.readAll(buf);
+                if (n < self.len) return error.UnexpectedEndOfStream;
+                try validateAttributeEnding(self.reader);
+                if (hasNull(buf)) return error.PaxNullInValue;
+                return buf;
+            }
+        };
+
+        // Iterates over pax attributes. Returns known only known attributes.
+        // Caller has to call value in Attribute, to advance reader across value.
+        pub fn next(self: *Self) !?Attribute {
+            // Pax extended header consists of one or more attributes, each constructed as follows:
+            // "%d %s=%s\n", <length>, <keyword>, <value>
+            while (self.size > 0) {
+                const length_buf = try self.readUntil(' ');
+                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
+
+                const keyword = try self.readUntil('=');
+                if (hasNull(keyword)) return error.PaxNullInKeyword;
+
+                // calculate value_len
+                const value_start = length_buf.len + keyword.len + 2; // 2 separators
+                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
+                const value_len = length - value_start - 1; // \n separator at end
+                self.size -= length;
+
+                const kind: PaxAttributeKind = if (eql(keyword, "path"))
+                    .path
+                else if (eql(keyword, "linkpath"))
+                    .linkpath
+                else if (eql(keyword, "size"))
+                    .size
+                else {
+                    try self.reader.skipBytes(value_len, .{});
+                    try validateAttributeEnding(self.reader);
+                    continue;
+                };
+                return Attribute{
+                    .kind = kind,
+                    .len = value_len,
+                    .reader = self.reader,
+                };
+            }
+
+            return null;
+        }
+
+        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
+            var fbs = std.io.fixedBufferStream(&self.scratch);
+            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
+            return fbs.getWritten();
+        }
+
+        inline fn eql(a: []const u8, b: []const u8) bool {
+            return std.mem.eql(u8, a, b);
+        }
+
+        inline fn hasNull(str: []const u8) bool {
+            return (std.mem.indexOfScalar(u8, str, 0)) != null;
+        }
+
+        // Checks that each record ends with new line.
+        inline fn validateAttributeEnding(reader: ReaderType) !void {
+            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
+        }
+    };
+}
+
 pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
     switch (options.mode_mode) {
         .ignore => {},
@@ -639,170 +908,70 @@ test "tar run Go test cases" {
         .{
             // Size in gnu extended format, and name in pax attribute.
             .path = "writer-big-long.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "longname/" ** 15 ++ "16gig.txt",
-                    .size = 16 * 1024 * 1024 * 1024,
-                    .mode = 0o644,
-                    .truncated = true,
-                },
-            },
-        },
-    };
-
-    for (cases) |case| {
-        var fs_file = try test_dir.openFile(case.path, .{});
-        defer fs_file.close();
-
-        //var iter = iterator(fs_file.reader(), null);
-        var iter = tarReader(fs_file.reader(), null);
-        var i: usize = 0;
-        while (iter.next() catch |err| {
-            if (case.err) |e| {
-                try std.testing.expectEqual(e, err);
-                continue;
-            } else {
-                return err;
-            }
-        }) |actual| : (i += 1) {
-            const expected = case.files[i];
-            try std.testing.expectEqualStrings(expected.name, actual.name);
-            try std.testing.expectEqual(expected.size, actual.size);
-            try std.testing.expectEqual(expected.kind, actual.kind);
-            try std.testing.expectEqual(expected.mode, actual.mode);
-            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
-
-            if (case.chksums.len > i) {
-                var md5writer = Md5Writer{};
-                try actual.write(&md5writer);
-                const chksum = md5writer.chksum();
-                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
-            } else {
-                if (!expected.truncated) try actual.skip(); // skip file content
-            }
-        }
-        try std.testing.expectEqual(case.files.len, i);
-    }
-}
-
-// used in test to calculate file chksum
-const Md5Writer = struct {
-    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
-
-    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
-        self.h.update(buf);
-    }
-
-    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
-        self.h.update(&[_]u8{byte});
-    }
-
-    pub fn chksum(self: *Md5Writer) [32]u8 {
-        var s = [_]u8{0} ** 16;
-        self.h.final(&s);
-        return std.fmt.bytesToHex(s, .lower);
-    }
-};
-
-fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
-    return PaxReader(@TypeOf(reader)){
-        .reader = reader,
-        .size = size,
-    };
-}
-
-const PaxAttributeKind = enum {
-    path,
-    linkpath,
-    size,
-};
-
-fn PaxReader(comptime ReaderType: type) type {
-    return struct {
-        size: usize, // cumulative size of all pax attributes
-        reader: ReaderType,
-        // scratch buffer used for reading attribute length and keyword
-        scratch: [128]u8 = undefined,
-
-        const Self = @This();
-
-        const Attribute = struct {
-            kind: PaxAttributeKind,
-            len: usize, // length of the attribute value
-            reader: ReaderType, // reader positioned at value start
-
-            // Copies pax attribute value into destination buffer.
-            // Must be called with destination buffer of size at least Attribute.len.
-            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
-                assert(self.len <= dst.len);
-                const buf = dst[0..self.len];
-                const n = try self.reader.readAll(buf);
-                if (n < self.len) return error.UnexpectedEndOfStream;
-                try validateAttributeEnding(self.reader);
-                if (hasNull(buf)) return error.PaxNullInValue;
-                return buf;
-            }
-        };
-
-        // Iterates over pax attributes. Returns known only known attributes.
-        // Caller has to call value in Attribute, to advance reader across value.
-        pub fn next(self: *Self) !?Attribute {
-            // Pax extended header consists of one or more attributes, each constructed as follows:
-            // "%d %s=%s\n", <length>, <keyword>, <value>
-            while (self.size > 0) {
-                const length_buf = try self.readUntil(' ');
-                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
-
-                const keyword = try self.readUntil('=');
-                if (hasNull(keyword)) return error.PaxNullInKeyword;
+            .files = &[_]Case.File{
+                .{
+                    .name = "longname/" ** 15 ++ "16gig.txt",
+                    .size = 16 * 1024 * 1024 * 1024,
+                    .mode = 0o644,
+                    .truncated = true,
+                },
+            },
+        },
+    };
 
-                // calculate value_len
-                const value_start = length_buf.len + keyword.len + 2; // 2 separators
-                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
-                const value_len = length - value_start - 1; // \n separator at end
-                self.size -= length;
+    for (cases) |case| {
+        var fs_file = try test_dir.openFile(case.path, .{});
+        defer fs_file.close();
 
-                const kind: PaxAttributeKind = if (eql(keyword, "path"))
-                    .path
-                else if (eql(keyword, "linkpath"))
-                    .linkpath
-                else if (eql(keyword, "size"))
-                    .size
-                else {
-                    try self.reader.skipBytes(value_len, .{});
-                    try validateAttributeEnding(self.reader);
-                    continue;
-                };
-                return Attribute{
-                    .kind = kind,
-                    .len = value_len,
-                    .reader = self.reader,
-                };
+        //var iter = iterator(fs_file.reader(), null);
+        var iter = tarReader(fs_file.reader(), null);
+        var i: usize = 0;
+        while (iter.next() catch |err| {
+            if (case.err) |e| {
+                try std.testing.expectEqual(e, err);
+                continue;
+            } else {
+                return err;
             }
+        }) |actual| : (i += 1) {
+            const expected = case.files[i];
+            try std.testing.expectEqualStrings(expected.name, actual.name);
+            try std.testing.expectEqual(expected.size, actual.size);
+            try std.testing.expectEqual(expected.kind, actual.kind);
+            try std.testing.expectEqual(expected.mode, actual.mode);
+            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
 
-            return null;
+            if (case.chksums.len > i) {
+                var md5writer = Md5Writer{};
+                try actual.write(&md5writer);
+                const chksum = md5writer.chksum();
+                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
+            } else {
+                if (!expected.truncated) try actual.skip(); // skip file content
+            }
         }
+        try std.testing.expectEqual(case.files.len, i);
+    }
+}
 
-        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
-            var fbs = std.io.fixedBufferStream(&self.scratch);
-            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
-            return fbs.getWritten();
-        }
+// used in test to calculate file chksum
+const Md5Writer = struct {
+    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
 
-        inline fn eql(a: []const u8, b: []const u8) bool {
-            return std.mem.eql(u8, a, b);
-        }
+    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
+        self.h.update(buf);
+    }
 
-        inline fn hasNull(str: []const u8) bool {
-            return (std.mem.indexOfScalar(u8, str, 0)) != null;
-        }
+    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
+        self.h.update(&[_]u8{byte});
+    }
 
-        // Checks that each record ends with new line.
-        inline fn validateAttributeEnding(reader: ReaderType) !void {
-            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
-        }
-    };
-}
+    pub fn chksum(self: *Md5Writer) [32]u8 {
+        var s = [_]u8{0} ** 16;
+        self.h.final(&s);
+        return std.fmt.bytesToHex(s, .lower);
+    }
+};
 
 test "tar PaxReader" {
     const Attr = struct {
@@ -927,171 +1096,3 @@ test "tar PaxReader" {
         try std.testing.expect(case.err == null);
     }
 }
-
-pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
-    return .{
-        .reader = reader,
-        .diagnostics = diagnostics,
-    };
-}
-
-fn TarReader(comptime ReaderType: type) type {
-    return struct {
-        reader: ReaderType,
-        diagnostics: ?*Options.Diagnostics,
-
-        // buffers for heeader and file attributes
-        header_buffer: [Header.SIZE]u8 = undefined,
-        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
-
-        // bytes of padding to the end of the block
-        padding: usize = 0,
-        // current tar file
-        file: File = undefined,
-
-        pub const File = struct {
-            name: []const u8, // name of file, symlink or directory
-            link_name: []const u8, // target name of symlink
-            size: usize, // size of the file in bytes
-            mode: u32,
-            kind: Header.Kind,
-
-            reader: ReaderType,
-
-            // Writes file content to writer.
-            pub fn write(self: File, writer: anytype) !void {
-                var buffer: [4096]u8 = undefined;
-
-                var n: usize = 0;
-                while (n < self.size) {
-                    const buf = buffer[0..@min(buffer.len, self.size - n)];
-                    try self.reader.readNoEof(buf);
-                    try writer.writeAll(buf);
-                    n += buf.len;
-                }
-            }
-
-            // Skips file content. Advances reader.
-            pub fn skip(self: File) !void {
-                try self.reader.skipBytes(self.size, .{});
-            }
-        };
-
-        const Self = @This();
-
-        fn readHeader(self: *Self) !?Header {
-            if (self.padding > 0) {
-                try self.reader.skipBytes(self.padding, .{});
-            }
-            const n = try self.reader.readAll(&self.header_buffer);
-            if (n == 0) return null;
-            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
-            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
-            if (try header.checkChksum() == 0) return null;
-            return header;
-        }
-
-        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
-            assert(buffer.len >= size);
-            const buf = buffer[0..size];
-            try self.reader.readNoEof(buf);
-            return nullStr(buf);
-        }
-
-        inline fn initFile(self: *Self) void {
-            self.file = File{
-                .name = self.file_name_buffer[0..0],
-                .link_name = self.link_name_buffer[0..0],
-                .size = 0,
-                .kind = .normal,
-                .mode = 0,
-                .reader = self.reader,
-            };
-        }
-
-        // Number of padding bytes in the last file block.
-        inline fn blockPadding(size: usize) usize {
-            const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
-            return block_rounded - size;
-        }
-
-        // Externally, `next` iterates through the tar archive as if it is a
-        // series of files. Internally, the tar format often uses fake "files"
-        // to add meta data that describes the next file. These meta data
-        // "files" should not normally be visible to the outside. As such, this
-        // loop iterates through one or more "header files" until it finds a
-        // "normal file".
-        pub fn next(self: *Self) !?File {
-            self.initFile();
-
-            while (try self.readHeader()) |header| {
-                const kind = header.kind();
-                const size: usize = @intCast(try header.size());
-                self.padding = blockPadding(size);
-
-                switch (kind) {
-                    // File types to retrun upstream
-                    .directory, .normal, .symbolic_link => {
-                        self.file.kind = kind;
-                        self.file.mode = try header.mode();
-
-                        // set file attributes if not already set by prefix/extended headers
-                        if (self.file.size == 0) {
-                            self.file.size = size;
-                        }
-                        if (self.file.link_name.len == 0) {
-                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
-                        }
-                        if (self.file.name.len == 0) {
-                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
-                        }
-
-                        self.padding = blockPadding(self.file.size);
-                        return self.file;
-                    },
-                    // Prefix header types
-                    .gnu_long_name => {
-                        self.file.name = try self.readString(size, &self.file_name_buffer);
-                    },
-                    .gnu_long_link => {
-                        self.file.link_name = try self.readString(size, &self.link_name_buffer);
-                    },
-                    .extended_header => {
-                        // Use just attributes from last extended header.
-                        self.initFile();
-
-                        var rdr = paxReader(self.reader, size);
-                        while (try rdr.next()) |attr| {
-                            switch (attr.kind) {
-                                .path => {
-                                    self.file.name = try attr.value(&self.file_name_buffer);
-                                },
-                                .linkpath => {
-                                    self.file.link_name = try attr.value(&self.link_name_buffer);
-                                },
-                                .size => {
-                                    var buf: [64]u8 = undefined;
-                                    self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
-                                },
-                            }
-                        }
-                    },
-                    // Ignored header type
-                    .global_extended_header => {
-                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
-                    },
-                    // All other are unsupported header types
-                    else => {
-                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
-                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-                            .file_name = try d.allocator.dupe(u8, header.name()),
-                            .file_type = kind,
-                        } });
-                    },
-                }
-            }
-            return null;
-        }
-    };
-}

From f8e42d6b308a2e523d6a32669d0a021a56f70524 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 22:17:47 +0100
Subject: [PATCH 23/29] tar: add Go test case files to the project

---
 lib/std/tar.zig                       |   8 +++-----
 test/cases/tar/gnu-incremental.tar    | Bin 0 -> 2560 bytes
 test/cases/tar/gnu-long-nul.tar       | Bin 0 -> 2560 bytes
 test/cases/tar/gnu-multi-hdrs.tar     | Bin 0 -> 4608 bytes
 test/cases/tar/gnu-not-utf8.tar       | Bin 0 -> 1536 bytes
 test/cases/tar/gnu-utf8.tar           | Bin 0 -> 2560 bytes
 test/cases/tar/gnu.tar                | Bin 0 -> 3072 bytes
 test/cases/tar/invalid-go17.tar       | Bin 0 -> 1536 bytes
 test/cases/tar/issue10968.tar         | Bin 0 -> 512 bytes
 test/cases/tar/issue11169.tar         | Bin 0 -> 602 bytes
 test/cases/tar/issue12435.tar         | Bin 0 -> 512 bytes
 test/cases/tar/neg-size.tar           | Bin 0 -> 512 bytes
 test/cases/tar/nil-uid.tar            | Bin 0 -> 1024 bytes
 test/cases/tar/pax-bad-hdr-file.tar   | Bin 0 -> 2560 bytes
 test/cases/tar/pax-global-records.tar | Bin 0 -> 7168 bytes
 test/cases/tar/pax-multi-hdrs.tar     | Bin 0 -> 4608 bytes
 test/cases/tar/pax-nul-path.tar       | Bin 0 -> 2560 bytes
 test/cases/tar/pax-nul-xattrs.tar     | Bin 0 -> 2560 bytes
 test/cases/tar/pax-pos-size-file.tar  | Bin 0 -> 2560 bytes
 test/cases/tar/pax-records.tar        | Bin 0 -> 2560 bytes
 test/cases/tar/pax.tar                | Bin 0 -> 10240 bytes
 test/cases/tar/sparse-formats.tar     | Bin 0 -> 17920 bytes
 test/cases/tar/star.tar               | Bin 0 -> 3072 bytes
 test/cases/tar/trailing-slash.tar     | Bin 0 -> 2560 bytes
 test/cases/tar/ustar-file-devs.tar    | Bin 0 -> 1536 bytes
 test/cases/tar/v7.tar                 | Bin 0 -> 3584 bytes
 test/cases/tar/writer-big-long.tar    | Bin 0 -> 1536 bytes
 test/cases/tar/writer-big.tar         | Bin 0 -> 512 bytes
 test/cases/tar/xattrs.tar             | Bin 0 -> 5120 bytes
 29 files changed, 3 insertions(+), 5 deletions(-)
 create mode 100644 test/cases/tar/gnu-incremental.tar
 create mode 100644 test/cases/tar/gnu-long-nul.tar
 create mode 100644 test/cases/tar/gnu-multi-hdrs.tar
 create mode 100644 test/cases/tar/gnu-not-utf8.tar
 create mode 100644 test/cases/tar/gnu-utf8.tar
 create mode 100644 test/cases/tar/gnu.tar
 create mode 100644 test/cases/tar/invalid-go17.tar
 create mode 100644 test/cases/tar/issue10968.tar
 create mode 100644 test/cases/tar/issue11169.tar
 create mode 100644 test/cases/tar/issue12435.tar
 create mode 100644 test/cases/tar/neg-size.tar
 create mode 100644 test/cases/tar/nil-uid.tar
 create mode 100644 test/cases/tar/pax-bad-hdr-file.tar
 create mode 100644 test/cases/tar/pax-global-records.tar
 create mode 100644 test/cases/tar/pax-multi-hdrs.tar
 create mode 100644 test/cases/tar/pax-nul-path.tar
 create mode 100644 test/cases/tar/pax-nul-xattrs.tar
 create mode 100644 test/cases/tar/pax-pos-size-file.tar
 create mode 100644 test/cases/tar/pax-records.tar
 create mode 100644 test/cases/tar/pax.tar
 create mode 100644 test/cases/tar/sparse-formats.tar
 create mode 100644 test/cases/tar/star.tar
 create mode 100644 test/cases/tar/trailing-slash.tar
 create mode 100644 test/cases/tar/ustar-file-devs.tar
 create mode 100644 test/cases/tar/v7.tar
 create mode 100644 test/cases/tar/writer-big-long.tar
 create mode 100644 test/cases/tar/writer-big.tar
 create mode 100644 test/cases/tar/xattrs.tar

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 21d08c527212..ff8cfd4a36cc 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -618,10 +618,8 @@ test "tar run Go test cases" {
         err: ?anyerror = null, // parsing should fail with this error
     };
 
-    const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path|
-        try std.fs.openDirAbsolute(path, .{})
-    else
-        return error.SkipZigTest;
+    const src_path = comptime std.fs.path.dirname(@src().file) orelse ".";
+    const test_dir = try std.fs.cwd().openDir(src_path ++ "/../../test/cases/tar", .{});
 
     const cases = [_]Case{
         .{
@@ -921,9 +919,9 @@ test "tar run Go test cases" {
 
     for (cases) |case| {
         var fs_file = try test_dir.openFile(case.path, .{});
+
         defer fs_file.close();
 
-        //var iter = iterator(fs_file.reader(), null);
         var iter = tarReader(fs_file.reader(), null);
         var i: usize = 0;
         while (iter.next() catch |err| {
diff --git a/test/cases/tar/gnu-incremental.tar b/test/cases/tar/gnu-incremental.tar
new file mode 100644
index 0000000000000000000000000000000000000000..4c442e5b82d1977231c83167324dc6cbb39f090e
GIT binary patch
literal 2560
zcmeH}%?`pK41jyyQ}6~BR^SDE15X}Fgm`jhV0e9D|52kCnK9aBtYckin|)=$`XDw?
zR1gWZlz@m_OI%*lRGwA9h14WT2vq~}S_lHREgIF}{NjUY8H3iu_St#|f3o43!OgQF
zA*xBv$!WT=`uOeMH4W_j*|gq%JeYp~t5+a&{O6CLoFGS3L`&|+KG5TjI3dD&{*}_e
zuv|#9=O5?a*=X`v4TFi!9!)-~JQx>kr#Lex*}O{T(ROdlh5T#ZSb?7Zvi<)R|EUQ~
p{P+96mNf#~tx?dT{HxUWIL*mD+W*tf(I~k?j`F_TmkZo^y#f_=Y61WN

literal 0
HcmV?d00001

diff --git a/test/cases/tar/gnu-long-nul.tar b/test/cases/tar/gnu-long-nul.tar
new file mode 100644
index 0000000000000000000000000000000000000000..28bc812aa60e81ea324297c81c738486acffc09c
GIT binary patch
literal 2560
zcmdPX*VA|K$<Iso$;``Upbanp0y7g61`rJd=(K^6i6Kl3B5!DD22`ftLmTIynp#?1
zl31jmz)+N*UxF%w$|EF?s(Q%q3=EBoO-#+qEi4(}EQHv=vxZDS4pE2FC5C~y8k+wN
z%}v4iA4Y@nI|wis8XK6H040qK%^3`gj0}xT859iY5vK6`pIBam&%Tu6yi~je1U}89
Qa-$(I8UmvsF!(|M0G#ew3IG5A

literal 0
HcmV?d00001

diff --git a/test/cases/tar/gnu-multi-hdrs.tar b/test/cases/tar/gnu-multi-hdrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..8bcad55d06e8f9fde3641d2a8df370503a582ce6
GIT binary patch
literal 4608
zcmdPX*VA|K$<Iso$;``Upbanp0y7g61`rJd=(K^cp&3jJB5!DDW?;;q;6oedpqg4*
zT#{I%pukX+pI?G1gUTZ$kE(je@Z9}E4fVl94lohu79^Hr=;kHnrVbeoj5?H}5YQXV
z|06yB8-en_k^X4@rzj&0QO$!k{~IwVc+(?HN9%uMQ2sZD<o_ICD{!<8K#z1cNbDHR
z|0BNrZvx8yCZqX(kmN}EIwL8uh~^f8d;OUGe$+lcR1su+z}&zTSpOTP<>%Afaj^QI
J<`xZ33jpEZfW-g+

literal 0
HcmV?d00001

diff --git a/test/cases/tar/gnu-not-utf8.tar b/test/cases/tar/gnu-not-utf8.tar
new file mode 100644
index 0000000000000000000000000000000000000000..81cec67d3309502add09e2495ee1bf139389c8cb
GIT binary patch
literal 1536
zcmd1EY-nt1PO400pdBy(0y7g61|Vf<Zfbx`gQP$JBnATv28PClKxGOBv~v=|<kI4j
t#3BU+hN8ssB7`U=E2TIuwFFZbE&_Eu%p-8|QO0NpjE2By2n^B?001Qo7P9~V

literal 0
HcmV?d00001

diff --git a/test/cases/tar/gnu-utf8.tar b/test/cases/tar/gnu-utf8.tar
new file mode 100644
index 0000000000000000000000000000000000000000..2c9c8079cf651d4271ed78ac12bed01df5882f16
GIT binary patch
literal 2560
zcmdPX*VA|K$<Iso$;``Upbap900tbifsu(ZOb(<3$Tl}OHDOTjp^bA;O)V`hNi0%O
zKouN0yhk&3J({um(TtsgmN9bTVjz4<k7x&FYBLiPP#!llHwEQyI1Q2l0o43&YG!C=
r#-Lz8j{t$?e_%l1weHcZop|LT{(yS~ui8<`(GVC7fzc2c1R($b=~Lti

literal 0
HcmV?d00001

diff --git a/test/cases/tar/gnu.tar b/test/cases/tar/gnu.tar
new file mode 100644
index 0000000000000000000000000000000000000000..fc899dc8dc2ad9952f5c5f67a0c76ca2d87249e9
GIT binary patch
literal 3072
zcmeHH%L>9U5Ztq0(Jv@FdDKtv;8zq|ijXv5BIw^6DOh^2UK)_HbK1>@VQ0c5`qsHR
zJrb1zXEcV16&lMRW}rdtKd=NSXg->JkvP|Esp4`g&CK_h+FMmo7oR?iU7RP&svn2t
z!9Ke4)upeR_aRYKtT+(g`B!B>fS>t?p7IZ9V9LKWlK+)w+iY|SVQ_tY3I4Ddrx1w)
M;($0H4*b6ZFWOBnBLDyZ

literal 0
HcmV?d00001

diff --git a/test/cases/tar/invalid-go17.tar b/test/cases/tar/invalid-go17.tar
new file mode 100644
index 0000000000000000000000000000000000000000..58f2488e78fb44a10f4e5a7833d5176c4c0ca090
GIT binary patch
literal 1536
zcmYex&u5@DFn|Dt29QMx3=A+nlm<~SF$mk((A3NXq>>Jxw74X(NP%*@pq_zwgdvgE
SfDU0ZYQ<;>jE2Cl4gmlQRWrx{

literal 0
HcmV?d00001

diff --git a/test/cases/tar/issue10968.tar b/test/cases/tar/issue10968.tar
new file mode 100644
index 0000000000000000000000000000000000000000..1cc837bcff14cd822a26e43034955c82e852ab29
GIT binary patch
literal 512
zcmbVI!41MN47Ah*kg@;^fX)>lI!AWsgI^V-_Q4}k$6}2x&>iv*cG6Oc`at9n#lG|1
zIi>(iak!RTol#boyD`0c^v(cHJJuvHh-e39;{t(!nc@gWsV;O@FkUc{-h`pC817Ix
zgh|QIatu;A!G^JZ7UC1V_vGb4bURuTWAy6SS-Fx(D=wcI#QP1Y#wzX?HAf0_+~lp>
yN?iGbw2JFgJjd0vnp9WIo>K3V$tfee6;KE|`1A3J$tp?9B&Y7`+Gwrtzls-lP-;g2

literal 0
HcmV?d00001

diff --git a/test/cases/tar/issue11169.tar b/test/cases/tar/issue11169.tar
new file mode 100644
index 0000000000000000000000000000000000000000..4d71fa15260609ecee0c8c751cfebf49be8763ac
GIT binary patch
literal 602
zcmdPX4@j)=NKH&hEh^SCG%+zV)=x}KWS}ZA00J`;69y0s1n9JZp|KHzp^>Svp`nSX
svAH3G0gzz?R8~P%SKu(Lw74X(2<Rjn(!*WY*gydsDYjq|ouEV(0JjPxNdN!<

literal 0
HcmV?d00001

diff --git a/test/cases/tar/issue12435.tar b/test/cases/tar/issue12435.tar
new file mode 100644
index 0000000000000000000000000000000000000000..3542dd8efd5d486b99ae03f39a56860af1c09af0
GIT binary patch
literal 512
lcmZQzpgs7{2(bf|=G;FWht&p9;C+0@6=oc2Mun*p0sxa^2!Q|q

literal 0
HcmV?d00001

diff --git a/test/cases/tar/neg-size.tar b/test/cases/tar/neg-size.tar
new file mode 100644
index 0000000000000000000000000000000000000000..21edf38cc3c3d98c834d07b6d31e8325898ec492
GIT binary patch
literal 512
zcma)(y$ypf6of6Y0Mc841tQ%uKO%FmLwaN+C{kErtgOKT*}*+|c47w+qNH(i_T6`9
z$^gnvt>$wLeQpM#*<5EsKtJ9uH<6)~%E3&=F&Kzfi~{+q#!cOa=AzbjSO$1IxR;aN
z*g3HiZShf(vs!BvbKPiG1!!GY>l3F=j$kqh!InX?lk@{OAsyh#2%!qzcGBC1;;FXq
z6(OUFiyZLnA-?TXGNUXTUA0xix-DD8En}aw{C@tp7XnB4F0(23$NLG-y>mPO&s&?~

literal 0
HcmV?d00001

diff --git a/test/cases/tar/nil-uid.tar b/test/cases/tar/nil-uid.tar
new file mode 100644
index 0000000000000000000000000000000000000000..cc9cfaa33cc5de0a28b4183c1705d801f788c96a
GIT binary patch
literal 1024
zcmWGAG%z(VGPcn33UJrU$xmmX0WbgpGcywmlR@HOU}(l*Xk=(?U}j`)Zf3?{U<jnm
z859gKbkIPcw74X(NI`)iwK6p=6OZ|X<nd@7C@FB5ni!ZE85p31FS2_~Omu)Kz(3qI
O$lniHVw621LI421e;KL(

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-bad-hdr-file.tar b/test/cases/tar/pax-bad-hdr-file.tar
new file mode 100644
index 0000000000000000000000000000000000000000..b97cc981f29b849420ffb5c249103d2990d3ea20
GIT binary patch
literal 2560
zcmeHG%Z{Tu5Y4Cb9*w}fS@eP=G$bS#Xb5h0CLuB54F(dv{-$R%n=7?+rPlPFk>)I_
zELW9JRmrEpl(4jByA+?w^w&7<2NoZAjuXMp*n7%?6L^l77Eiau7hvcq?jH1?rTX`9
zpMCF(>ILDcd3=FqS4V*TZi>3Pe0#t9R^c-JqBj20{7)M=*Yz)FewT(Uati=tP7$St
z*d8bF(D@GlUJ@4%qe!wMbG*3V!1wg0PiykOO!ia%*E*#=&OR(jtILP6KRA&2FJ*Kg
zy|A)1mbYF<|BA(6Myl2O)0`aM3Wji&pIh4ClA2$dWoJs58yl;4D?U=(N)W;u%XInf
zRTs<<A~wq0(q)Ju1fb*A@n*e66kgt<O-TASG`KmO!Op;tz4I0m=_wl*5!E<{x2+s=
zNsjQfHuMt=J4fVQyI_TKP_Ef35+xEi%XK5@-FB^dmZKEH_GN#rhTa{rPGf8p5;-x>
z4xd;AU1Jf%`IN4#I124Rb^4Q)!o;hPejoEtY5^qa+2ELA%=<%v`PjLWfatmzU$WMq
z0o-nPr8-}Z8wrod+8?IEmrDw+X#<Z`yn0TdpPIbv3p)~)LpJUD$<+Hvp|oQgAG7r;
z5UsFYM#nc{(qtwueR)81OxAF(MZ`sB&46SgdatuSa&-_Rrhb*F*cEI%=KDBy!~>Mz
zXg5Tql$=bVJ~k+v0GLO)^pr}@?=ab|SNj)8^HW8<Wymf~!<NfQtOFkd>!blD3B%SS
zGI^{kUFR(2?R5l=l)&2sw&#PX@ZqpeYkq_8j?Fe9>W(2A?o+GT0h2`D?X>ODJmvX4
zP~kX>b+(x;9Ro3CH;IaFGqbd&#t{MvQE$!J(C*Cr+>WZOtS-2k`j?wym_80XdBrEb
z&A?kq@r`(G3QRVPrgJ7?%^kKYVLESuSIlk5vZfj_byJWIheX`<zVZAA%j~yp{U5Id
BG?oAW

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-global-records.tar b/test/cases/tar/pax-global-records.tar
new file mode 100644
index 0000000000000000000000000000000000000000..3d3d241e65c3a18feae324e8c40c9e5c76515dde
GIT binary patch
literal 7168
zcmeHL!D_=W4E4ER;V;M}*-LlnxjXI`JeGBZl#Mone*Wa9rr4nrhBeBfTpT&!#d^<r
z5nlJ7uc3eL!dn9k7?BIc%Ul@&e4C!dR}3H!E{)xYtfm0{J`7>cpD(885)#?nu-$bl
zd_2p;+hBpM{R+d!YIE8hxR?tY^UL#n+jl6O_LaKNYd-D1vHp<<`zJM7tNzO*g*mMS
z{}PVT{JZ_3frq#ms21`Cr=+Hc0kHIcL{G*ZFA#Efwe*);p(rx?(dkdptIvyaeUIQm
zC7-2Q)O+&%G{(pIJ#q(#>i-#uLc#hT`hT40Pta=rFC=*e`!w3WI)BE>e1hozPB`6P
z`9Jr6+W!j+!|;>--`D@<P5sFi|9|fPo%a9rm6#$Dr1KwtmI?G<Fs13=&P4wgDEW-9
joAuBAKaz9a`z5~{&wtVJ`LFtaMxzGGfHI&ARAt}?VC0_S

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-multi-hdrs.tar b/test/cases/tar/pax-multi-hdrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..14bc7597808020d7bc37e6610482fd9662814a24
GIT binary patch
literal 4608
zcmeH~OAf*y5QbTM3NFy_tgc*m1D9?w)<<kh(_-TBWo){T=&AxH&MY9%!aps1Fw+@#
zX3ny{GKZ#$Q}NNrhnjOOrf2X9;YxB@5_%^I>^3;ld-=Ii;m6ILFVT3VXy>=Udb`;P
z_AF}Ko(kwITGLdEM1G)5o<9H!jr=439(@V?ONRXCAu*5YPw-#9x&N1V|EJgyTG0B^
zUZ)s9!5N^&Ghph+I3UGBWYR$X|2zH<_}9R{M*cI=m|k}8li%1Drp7@Vny>jkUkM=z
Sl}Br1`$oQ%|3`N;j=%%-SrC5!

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-nul-path.tar b/test/cases/tar/pax-nul-path.tar
new file mode 100644
index 0000000000000000000000000000000000000000..c78f82b16e85363143404ec50c3e77e5174ba696
GIT binary patch
literal 2560
zcmWGYtnf%pOi3*&)-%vIFf=kYF*P%{u%s%>00J0r&<4gv#xOaM5(Wb!BV%(j289X+
zI)KvRlEfmQ^^{tL?m0@$qmzCkLqmmv#F7kKs>V3AQxFN}F>ui-<c5bebcuFI-hkzC
rbQ+T9q4@_$0rS71v6&%*0>ycfm?#?I`Jb2|8dWnI0;3@?JVO8g0FZP&

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-nul-xattrs.tar b/test/cases/tar/pax-nul-xattrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..881f51768f9872f8d9bfcd2cbb637a64c88b59fb
GIT binary patch
literal 2560
zcmWGYtnf%pOi3*&)-%vgN=(tsE6vH#E2$`9pdK)Q00tbifq}6(Ob(=k!NAbi+{l1I
zp@Mo&z->}#aY<qk(0be|h?7D09C5k^T$Qn*La?)kr%$9_MPf-wksc`e8En%stE!Sp
ztE#vL+<l`Cplt}yD9fWb8a->E@j+a){BLMtXu_aCah^nR1rE*#&;K|=WmI@H1V%$(
H1cU$p$tyMF

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-pos-size-file.tar b/test/cases/tar/pax-pos-size-file.tar
new file mode 100644
index 0000000000000000000000000000000000000000..ea5ccf916426a5b6300dd341dffeeb349e51ad90
GIT binary patch
literal 2560
zcmeHGO^=&86wSK7V)iVI`DW1tNf<~-Ffb4pb|xV)A%qVbNci<l+t<Z=(nwDlb=pe3
z3m;48UY~ofd>YIMU^VD+e5mrD1N`@xy@5P0!5`e$lo>Ayydck>ZnF=Fo-*7$<ligx
z-{Ic--Uro-;u9^V{PREU{~L5&|MWq9{FV7N8#vbWPZ$3BsT7HyL8$PmBtOjdxWR|d
zzXud#Y4$i&Sy2^UkY+smmj3i^PyV0De)#`f=Zxaw&650jdXxKui6VHe;uE3b!qI_X
zzYM_{OW~AgHVwup+r3^m!rMV<>%B*6VQy8OC1ZYQZ0cQwSoN-=7~KHZm75nsv?7#%
zxbP~EBdQp}j$fv$<vL;L^ct@sLfgpXr)Yv36C-dJY%Vr(kX~Y@^RD37CFZj-!ROjE
z4k+qeN${Kt5UXB27mHYuN$Ad(jc9c1rRLkNdJ)&p+vByTUSD)N=V*v1nR#@D%qGb>
zm0((q`NB@)$O$!<9_$=t{xvb~Lm9~}L?l0&Jl78;-DOxv-8%`1VOZ&@XiXNP^?Flj
z(`mnw@sQZt-FOL<iotW<pgoh$zI!naO<B>46N~e{7&mmZ3_7Zeb)X3_s28DRNA*13
zzlx)-aG@0_eZq!piMD!7yu_+Gn2p4s1_%<*fN8?jFA9^o3rL4@o2IU`LoynihG>kD
zgC*AcCW%G}r$MQF^{SM2lx>!a?K5KK;Y$2@1m>3MDAg#{VSu51&>@#aQR@?hJl2&j
zOTYwY86z_%=ypQwac8MQ)Nk`zSmC?tfF?@1eU$X~vDKZB%VPg_Skq)0%kmy-XqcqB
zSWUKpp_GGFrX{zDo7-dKiXlS@wWp$QH`aD)2Tf5ICwv|0^UXD_0EfP^5VFAG&@E^9
wM*KF17Kq~USjbrS`t?E_kE`$n^D8p1nNBRj66M`4lQuLkKmWlt`)ynQ0y{e~UjP6A

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax-records.tar b/test/cases/tar/pax-records.tar
new file mode 100644
index 0000000000000000000000000000000000000000..276c211baa388cd4857f60be3355dc710c079adf
GIT binary patch
literal 2560
zcmWGYtnf%pOi3*&)-%vg%gjk-pgu5w00tbifuXS}Ob(=k!NAbaz|4q2p@RBO!8Eb7
zxFoR%Xg#LPh!8QfP;mG6arAT7E67f_1qKC|k*Pv*er|4RUWu(oYEDkRj>3~2)1FW5
z;W9N)D9uaEO|{L*&r2r;bB%})9NM0wQI<yu6dd^;HP1s8LGr(unV~s@0$uVXEdLWq
b&?r8_!a?^W7A2!XqaiRF0;3@?m_h&mFiTUN

literal 0
HcmV?d00001

diff --git a/test/cases/tar/pax.tar b/test/cases/tar/pax.tar
new file mode 100644
index 0000000000000000000000000000000000000000..9bc24b6587d726c7fca4e533d9c61a3801a34688
GIT binary patch
literal 10240
zcmeH~&u-H|5XOD(Q}_vz`9HIV-Z}CL1|qeBRwxNlAD?kbq{vMJQj94uds*3I@2-Ed
zpXb|Q{eF0Qw;4Wdw!4)@_!@~t&7&b8A|a!oqM>78BOoLqCLtvwr=Z5b$i&RT%Er#Y
zO+ZjcSVUAHn~8MUp(~vBV+cg7LjpEKCCE6D7<T<rw?O;0+Yj4Z(zR0^zGPhdbGvr9
zrQRoy{_C*6yB&T*;!Rvanu2Cxl~q*L)HSpj7#bOyn3|beSaWc6a&~cbbNBET5Ev93
z5*ijB5v@VXo!D}hwH&DBLoGKe+%WVH`}>E@EwTcMv_>l+&bbg`j1Cv0A776ym5t@+
zSt9MDBFtXbKY&m4pMN0f`l~hhD>#q(-`x$5n+q@eEPmAevA;0XTM8XMYkTvSmQ-t5
zkihVw{(qQ#_JjT})&KMa&-FhG0c8or{CPvw|Jf69W<Ub#zuZ55K6rrnt=}JY0A@_E
zN^3xAA4n$-;7s(y0ZgN3(ESs)hV}e<pMn|npKk8d_aDyzKdR+K|CC3Dtp9n`nENR5
zazFm(^?#bHx-J4mpa>L!B2Wa1KoKYcMW6^2fg(@@ia-%40!5$*6oDd81d2cr_`3;w
E2V3|JA^-pY

literal 0
HcmV?d00001

diff --git a/test/cases/tar/sparse-formats.tar b/test/cases/tar/sparse-formats.tar
new file mode 100644
index 0000000000000000000000000000000000000000..8bd4e74d50f9c8961f80a887ab7d6449e032048b
GIT binary patch
literal 17920
zcmeHO!BXQ!5M{6a3g^xmb&sU64qQV{sZ?#{1DvdPiv%!*Aw}}_dEEjdi|Nre#)hpG
zE{}(KnjXC;wbY|&t*;k1>*dF<S9eEy`z!EQm*o+DC{{)HSD=DIJR;^8%aUWjb{s-g
zA5A~)ZI3#J@7A}Ao9XoE^WvZL&z<&ubqMco|IPoa1g#FD%)=jb>Y-EbwpT`b+-m>u
zXfjaoe5W44g1VMFbuvaLV|3acePf?HHj7T34f|}^XTyHz*zDR5hW%jJ$GN!K=dPX7
zuwNSXOT&I?*sl!xm0`a!>{o{UdfWbohf`t0wKm47jd5yYoVY#C#(p&HN5g(h+o$d^
z>C~x6+ovLJp9;gi;Rj^+0U3Tkh98jO2W0pG8Gb;9ACTb()boS>@h8I{<l4MBhF!f4
ze;~sj$nXa;{DBOAAj2QX@CP#dfqMS%$dL>`Aj1#H@B=dZfDAvtjWMmW;RoC~7Py0M
z`m*5%-1CF}@n^#y*zgB7{DBRBV8b8S@CP>hfen8^_^{DnOAo^z5MmhHr;h_0e!zww
zu;B-6_yHS!z=j{N;RkH^0r&ji+3`30fen9P!ynl22R8fx0blw!^!(v@<TAFrh5~1+
zeSUs=ja?jV>`{T)$#0AMUzUr{%bWEKK}dPBZfAtotM&Q)$6}V4GkAAL?ydIxj}Q`3
zJOATY>UD~$8khO$y?3COY_Ib_T!Mz?cSHC~#(oEVI84ue{e9LR^x69SzvU?x#e`$G
z`ReZSkBilxf3HuQYO>v9_2tWYd3#C|uKGRxy<M&a*c#zs`{3Lj!@BH8=k`rz`?5>y
zk#CN0vO|t>vO|t?vO|t@vV)g2dr7mGG<!+2mo$4x2QTU1B^|t^gO_yhk}kcZOE2lt
zOS<%uF1@5HFX_rly7H2)yre5H>Do)W_L8o>q-!tf+DkfmNk=c~=p`M!q@$Pg+)H}y
zB|Z0&o_k5py`&p2>BdW1A|f+1N!@lEFX<*ndTZ#%;H1d0PWQ;sPWQ<1PWQ+WPxo*$
zCpU9)GbcB5ax*74^K5XIR5u%)rF*!UXXCT<7;fg-2rW5AHbhJJa5K*aY3VWC%(G!y
za*S-8mhRzZo{iMfW4M`TW3}WM*<dZ*!_7P!uBFFtGtUNW$uY7KTe^pvc{XNCkKtyX
zjoOl9WW%;}4>$8{;Fcc4%{&{rCCA9dZs{Iw=Go{iJw}H4J9rlMBkscMKka?4V*dGW
zC;x{dmiMq8gvD(vpG{xk(ev}2>9_pg&wuy1`g67#*MIt_+k5+eaQ%mN-{S%QM+!~(
zxc)<2*YN+U4#l|sv%B)c7PePszGeL<)ZO)vtHtH=w09GsNfox9d|WQBPwAMB1HKi$
z5#I)1l17qNl4g>25`gi0%mT0gEC34-1PB5I0fGQQfKq@`fKq@`fKq@;fJ%T$fJ%T$
zfLefBfLefBfLeekKolSf5Cw<=%mtVWFc)Ahz+8YvfJT5ufJT5u0A#CaDG)Nzv=opE
zMO*%@0IdS81gZg+MP*A>0a;*L*S;zQ^1P%)r9keM))iGXNaa8-mb9xN$g|SAj;op=
zlS*1t6=X?iT~QSVc~H`#(jdo4>x!y6$YPQf)rV9dQiVt*BGrggBvO?~WSR`0jpG)F
zR$z95<=;=b<p1;e#WI-!u<I>g;QIfR|BZ{k=7%FW59w-S{C9wpVT}I{Ao4pNVj%vb
z{pbH+{)aiAzW>2BZdt7HACK|hLCzZHZZvnf_-l0|IXl~}=T~SgCPR@QPL^Kc(9Lpj
zv56@U!e<=Br@-+2fA>qk!2KVo<PYzEab4ggF!(?2|8czk`;O$xw#M=`;S=zcTEw@(
z7wf3_NGn!5QAj)1BynZHFH-uX5CH3YC<MSN90~!D&xAq%<Q>rji&Q8CK+X>e0g#)6
z@dUuK3<?3Tc!ELzEODR^0J$S51VD}l_D%pi)JGuz9=)Rw01wDf2!I>}6apYu09*vX
znm!5vu=b8Z0L;v^6bQklmI7jCCS}XN6`)n1l|VJX%uKdX6)-c?y7pBeFf)@Dl>##}
ztt+Z(U}h#Qst0CfT31vh!CNoVqM~4CrgcSC7r2GAs4|$DX<bnj2XCB6i^_wUnbs9m
zg)lRd7S#wdGp#GCGQI5F8GC*I3XJjp_kRp`VX(Z)z4W|`<)^*__kX|-&wtA@MtM&O
o7i7H7b-?-m^bOzte$weJYB>JmbGf$=ejIaDU{qjK;EfgdABYgg9smFU

literal 0
HcmV?d00001

diff --git a/test/cases/tar/star.tar b/test/cases/tar/star.tar
new file mode 100644
index 0000000000000000000000000000000000000000..59e2d4e604611eeac3e2a0f3d6f71d2623c50449
GIT binary patch
literal 3072
zcmeHHT?)e>4DRzz;R#BjQ;)ERouag*479>@u-$$NbG3!-WyoMlUh?xvNIv}HZD&jy
zuA!-C5KZlY0Y@bP833Zfm_JQ2M2<pg$x$a<X%X>yBQ2dTK6K{>VDLBV=D{z>IvVF`
zUD#xgUGh?F1AikeIW6NnOIrMRGU4UU`62nAWxyx>^STEhN#m{lQEc_E<B1^sfB5T4
f&6ja*oAiHhZZc!rJG;we^27i!KnxHAf55;UI$1)v

literal 0
HcmV?d00001

diff --git a/test/cases/tar/trailing-slash.tar b/test/cases/tar/trailing-slash.tar
new file mode 100644
index 0000000000000000000000000000000000000000..93718b3034879fba292186c38fae7a9e7be69801
GIT binary patch
literal 2560
zcmXpsGBz<aGq<qRH>4Gd!2kkq(FP`FW-vLBN(KWX14A<d289X+I)KvRlEfmQ^>nam
zFj`@3XrNG#Sdw8&v*@Q?!WuA>xdvlQlIJEmL^~{R<DwyX9#Td?^S_~yxhaE!DS2KZ
dO&7KDKh*tjPm<<}QPra%Fd71*Awc&K000M8gV+E7

literal 0
HcmV?d00001

diff --git a/test/cases/tar/ustar-file-devs.tar b/test/cases/tar/ustar-file-devs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..146e25b79d8980a5eb0d370d0d9a0b7534c05135
GIT binary patch
literal 1536
zcmYex%t>XSF)#oEGZPaA5Df(AG_n|kZD?R%Y|fxyKx21-jV~=ONh|_7iBh{jE;leR
XgnG`9QsYMTkA}c#2#kgRtwR6+ua*#N

literal 0
HcmV?d00001

diff --git a/test/cases/tar/v7.tar b/test/cases/tar/v7.tar
new file mode 100644
index 0000000000000000000000000000000000000000..eb65fc9410721efd98cb7c5e274f547ec530252d
GIT binary patch
literal 3584
zcmeHH&ubGw6n?3{z(^13q0owB@t_Dbv)OD<qP9W<=F*#Zu|<OsHb$B#MLfilBFeB(
zSoNT!67<$TAs)OG!JCj{H}PP>Tf|dgec!sXT^AK{$jKc@-kWdUe(&uh-&e0L+xARj
zwLzm>LI~3|1sT#R<fI@Wr6qG5qg7zYkom%FVb;hoYUM10#ONkUne`1^zGXM+dP9_X
zf9{!k@}lkT-^GT?E=~8;_J-Sfc*0X(rakp_A<JKLSzr#wwI)%K`MZ;SC3bqMH1Ip|
z1{<-><Eg%KV|%16{)~l~IM!EAm?y(W(^K-Ur`8VhORW=CxX-71o<d*Dpz-3wx&t1}
zqB**~o+=Lf%pnHMS44X_c9{pYsHyN=;KOss!Fho3#oJp!j+%FT{yE&EUz{K3SbKkk
z===U0_xa&>&XkBIzWbfCPrYEK7fr^Q@7vXO;&pw$QCTT3-?&yO+jq(<{6qS`FS_vP
zIBhMBjnmsnS~{|C9LMN8#r!W{zj5l&zcE?^U_t*||1zJ{zqInH{-Zy}2$O|c?WSFx
zxn8RtM3-UpAJiW`Z@Zar#$ojz)NjtWBfnULUzD=jj5!>iG>O2k{o(=ZAg=$-urC7q
zVm{n!{kK`S@p|Vk`q%aFg#nw)bMB-40yAj*%7=F37m@ziFINBH7pTSD@Cfil^^9T6
zxL-iu+Aq)#ev#CF(l2&S@A^eC<`;^e4{ZQ#s9$Y4r}$iP3;;e3V;a&MNN*s$f%FFc
H(;N5+1FUK9

literal 0
HcmV?d00001

diff --git a/test/cases/tar/writer-big-long.tar b/test/cases/tar/writer-big-long.tar
new file mode 100644
index 0000000000000000000000000000000000000000..09fc5dd3dd7fc5de3b6d22461fa23152fd499a41
GIT binary patch
literal 1536
zcmdT@%L>CF5cHg{@C({q<0CobV>|>an6xwsp<iE32t7&ZA!7Hk;)t{3IPN~1Sx@8D
zowtC!NTvf5^1#v&0xfa|gs!AOg~jV<q%S~KBLxahAKKrB6aD|?XQ4FKT>R=A26VBn
zaX^Kt?_`f1Z)$wO|DLIY?J3lf^OSK^BIf(k6O@V|Yg?ui2v9Nql1oNtkxU->H^Dlm
V!!&06{D$v3HgTJv%r&)bzX73UunhnJ

literal 0
HcmV?d00001

diff --git a/test/cases/tar/writer-big.tar b/test/cases/tar/writer-big.tar
new file mode 100644
index 0000000000000000000000000000000000000000..435dcbce6abc74dc5efa1f4dd34129eb7701c697
GIT binary patch
literal 512
zcmXTPEzmbKOV3Q#E2$`9pbanp0y7f>1_L8QGgAXY1_J{_Lo-7Ih6adrEFj9z(8$cl
z$k@=p6sXS7*wDzBLBRkbPg}CIxFoSiL4hHqxH30CFQpilZK-+bxTL`1AWs4T&~spb
JSM4B@1OOWT7M=hA

literal 0
HcmV?d00001

diff --git a/test/cases/tar/xattrs.tar b/test/cases/tar/xattrs.tar
new file mode 100644
index 0000000000000000000000000000000000000000..9701950edd1f0dc82858b7117136b37391be0b08
GIT binary patch
literal 5120
zcmeHJv2KGf5M|~o_yWg1+khiw>d;i}P^nX=$R$ooYd`|ilD{uBAv6g^kxC>6-(uu<
zHg^v_-l5r}td>fyRbC(vfcdOQq}Iq(#u+Ja9X?}Dv(|CCVoJF~09ZgF;2a!G7^%~|
zYNYoMUQ-rE=5<MfNf&^--n!;4I5LG45ME;9L@fvneeClfz=*LBI8bnFP`025LJ)!=
zMno8J0GDDfIs&<m0>KzzBJ^EKyr-Mx-NQ4gq%k=v3zee}wOxElT`HH-ei(K*xV|_}
zC{$GDvDu<R={PN`MVUrfx`|V@UX}Cg<y}Q)mCx5|BTH>oW?o>&odUrVuVHkt_w?IH
zW3PV_@V!Jxt@A^i>Yrj(>;K=H?5X8!tJS~MYVd#a^`?|QJKb&Uduf~MfN4M7$J!Lr
zF40zZMF!9x{tqJ#0F5+;{2!=)=Knre|G(mAKU`hAc#r>!#{V(9d;sW1hxVv7@B_zF
ze)#eKF~#1~>@WTI`#+&4`lkel_5U6!N8h^5vRAE8lqGgr9-Ul!p=H1_U>TS&1K)l2
B)fNB%

literal 0
HcmV?d00001


From a75fd4ff156abdd131c2b609b9b19573813838a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 11 Dec 2023 23:55:07 +0100
Subject: [PATCH 24/29] tar: move test cases to std/tar/testdata

Create std/tar/test.zig for test which uses cases from testdata.
---
 lib/std/tar.zig                               | 374 +-----------------
 lib/std/tar/test.zig                          | 373 +++++++++++++++++
 .../std/tar/testdata}/gnu-incremental.tar     | Bin
 .../std/tar/testdata}/gnu-long-nul.tar        | Bin
 .../std/tar/testdata}/gnu-multi-hdrs.tar      | Bin
 .../std/tar/testdata}/gnu-not-utf8.tar        | Bin
 .../tar => lib/std/tar/testdata}/gnu-utf8.tar | Bin
 .../tar => lib/std/tar/testdata}/gnu.tar      | Bin
 .../std/tar/testdata}/invalid-go17.tar        | Bin
 .../std/tar/testdata}/issue10968.tar          | Bin
 .../std/tar/testdata}/issue11169.tar          | Bin
 .../std/tar/testdata}/issue12435.tar          | Bin
 .../tar => lib/std/tar/testdata}/neg-size.tar | Bin
 .../tar => lib/std/tar/testdata}/nil-uid.tar  | Bin
 .../std/tar/testdata}/pax-bad-hdr-file.tar    | Bin
 .../std/tar/testdata}/pax-global-records.tar  | Bin
 .../std/tar/testdata}/pax-multi-hdrs.tar      | Bin
 .../std/tar/testdata}/pax-nul-path.tar        | Bin
 .../std/tar/testdata}/pax-nul-xattrs.tar      | Bin
 .../std/tar/testdata}/pax-pos-size-file.tar   | Bin
 .../std/tar/testdata}/pax-records.tar         | Bin
 .../tar => lib/std/tar/testdata}/pax.tar      | Bin
 .../std/tar/testdata}/sparse-formats.tar      | Bin
 .../tar => lib/std/tar/testdata}/star.tar     | Bin
 .../std/tar/testdata}/trailing-slash.tar      | Bin
 .../std/tar/testdata}/ustar-file-devs.tar     | Bin
 .../cases/tar => lib/std/tar/testdata}/v7.tar | Bin
 .../std/tar/testdata}/writer-big-long.tar     | Bin
 .../std/tar/testdata}/writer-big.tar          | Bin
 .../tar => lib/std/tar/testdata}/xattrs.tar   | Bin
 30 files changed, 377 insertions(+), 370 deletions(-)
 create mode 100644 lib/std/tar/test.zig
 rename {test/cases/tar => lib/std/tar/testdata}/gnu-incremental.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/gnu-long-nul.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/gnu-multi-hdrs.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/gnu-not-utf8.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/gnu-utf8.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/gnu.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/invalid-go17.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/issue10968.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/issue11169.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/issue12435.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/neg-size.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/nil-uid.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-bad-hdr-file.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-global-records.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-multi-hdrs.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-nul-path.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-nul-xattrs.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-pos-size-file.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax-records.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/pax.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/sparse-formats.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/star.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/trailing-slash.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/ustar-file-devs.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/v7.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/writer-big-long.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/writer-big.tar (100%)
 rename {test/cases/tar => lib/std/tar/testdata}/xattrs.tar (100%)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index ff8cfd4a36cc..4f6824de1fa4 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -601,376 +601,6 @@ test "tar stripComponents" {
     try expectEqualStrings("c", try stripComponents("a/b/c", 2));
 }
 
-test "tar run Go test cases" {
-    const Case = struct {
-        const File = struct {
-            name: []const u8,
-            size: usize = 0,
-            mode: u32 = 0,
-            link_name: []const u8 = &[0]u8{},
-            kind: Header.Kind = .normal,
-            truncated: bool = false, // when there is no file body, just header, usefull for huge files
-        };
-
-        path: []const u8, // path to the tar archive file on dis
-        files: []const File = &[_]@This().File{}, // expected files to found in archive
-        chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
-        err: ?anyerror = null, // parsing should fail with this error
-    };
-
-    const src_path = comptime std.fs.path.dirname(@src().file) orelse ".";
-    const test_dir = try std.fs.cwd().openDir(src_path ++ "/../../test/cases/tar", .{});
-
-    const cases = [_]Case{
-        .{
-            .path = "gnu.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "small.txt",
-                    .size = 5,
-                    .mode = 0o640,
-                },
-                .{
-                    .name = "small2.txt",
-                    .size = 11,
-                    .mode = 0o640,
-                },
-            },
-            .chksums = &[_][]const u8{
-                "e38b27eaccb4391bdec553a7f3ae6b2f",
-                "c65bd2e50a56a2138bf1716f2fd56fe9",
-            },
-        },
-        .{
-            .path = "sparse-formats.tar",
-            .err = error.TarUnsupportedHeader,
-        },
-        .{
-            .path = "star.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "small.txt",
-                    .size = 5,
-                    .mode = 0o640,
-                },
-                .{
-                    .name = "small2.txt",
-                    .size = 11,
-                    .mode = 0o640,
-                },
-            },
-            .chksums = &[_][]const u8{
-                "e38b27eaccb4391bdec553a7f3ae6b2f",
-                "c65bd2e50a56a2138bf1716f2fd56fe9",
-            },
-        },
-        .{
-            .path = "v7.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "small.txt",
-                    .size = 5,
-                    .mode = 0o444,
-                },
-                .{
-                    .name = "small2.txt",
-                    .size = 11,
-                    .mode = 0o444,
-                },
-            },
-            .chksums = &[_][]const u8{
-                "e38b27eaccb4391bdec553a7f3ae6b2f",
-                "c65bd2e50a56a2138bf1716f2fd56fe9",
-            },
-        },
-        .{
-            .path = "pax.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
-                    .size = 7,
-                    .mode = 0o664,
-                },
-                .{
-                    .name = "a/b",
-                    .size = 0,
-                    .kind = .symbolic_link,
-                    .mode = 0o777,
-                    .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
-                },
-            },
-            .chksums = &[_][]const u8{
-                "3c382e8f5b6631aa2db52643912ffd4a",
-            },
-        },
-        .{
-            // pax attribute don't end with \n
-            .path = "pax-bad-hdr-file.tar",
-            .err = error.PaxInvalidAttributeEnd,
-        },
-        .{
-            // size is in pax attribute
-            .path = "pax-pos-size-file.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "foo",
-                    .size = 999,
-                    .kind = .normal,
-                    .mode = 0o640,
-                },
-            },
-            .chksums = &[_][]const u8{
-                "0afb597b283fe61b5d4879669a350556",
-            },
-        },
-        .{
-            // has pax records which we are not interested in
-            .path = "pax-records.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "file",
-                },
-            },
-        },
-        .{
-            // has global records which we are ignoring
-            .path = "pax-global-records.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "file1",
-                },
-                .{
-                    .name = "file2",
-                },
-                .{
-                    .name = "file3",
-                },
-                .{
-                    .name = "file4",
-                },
-            },
-        },
-        .{
-            .path = "nil-uid.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "P1050238.JPG.log",
-                    .size = 14,
-                    .kind = .normal,
-                    .mode = 0o664,
-                },
-            },
-            .chksums = &[_][]const u8{
-                "08d504674115e77a67244beac19668f5",
-            },
-        },
-        .{
-            // has xattrs and pax records which we are ignoring
-            .path = "xattrs.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "small.txt",
-                    .size = 5,
-                    .kind = .normal,
-                    .mode = 0o644,
-                },
-                .{
-                    .name = "small2.txt",
-                    .size = 11,
-                    .kind = .normal,
-                    .mode = 0o644,
-                },
-            },
-            .chksums = &[_][]const u8{
-                "e38b27eaccb4391bdec553a7f3ae6b2f",
-                "c65bd2e50a56a2138bf1716f2fd56fe9",
-            },
-        },
-        .{
-            .path = "gnu-multi-hdrs.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "GNU2/GNU2/long-path-name",
-                    .link_name = "GNU4/GNU4/long-linkpath-name",
-                    .kind = .symbolic_link,
-                },
-            },
-        },
-        .{
-            // has gnu type D (directory) and S (sparse) blocks
-            .path = "gnu-incremental.tar",
-            .err = error.TarUnsupportedHeader,
-        },
-        .{
-            // should use values only from last pax header
-            .path = "pax-multi-hdrs.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "bar",
-                    .link_name = "PAX4/PAX4/long-linkpath-name",
-                    .kind = .symbolic_link,
-                },
-            },
-        },
-        .{
-            .path = "gnu-long-nul.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "0123456789",
-                    .mode = 0o644,
-                },
-            },
-        },
-        .{
-            .path = "gnu-utf8.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
-                    .mode = 0o644,
-                },
-            },
-        },
-        .{
-            .path = "gnu-not-utf8.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "hi\x80\x81\x82\x83bye",
-                    .mode = 0o644,
-                },
-            },
-        },
-        .{
-            // null in pax key
-            .path = "pax-nul-xattrs.tar",
-            .err = error.PaxNullInKeyword,
-        },
-        .{
-            .path = "pax-nul-path.tar",
-            .err = error.PaxNullInValue,
-        },
-        .{
-            .path = "neg-size.tar",
-            .err = error.TarHeader,
-        },
-        .{
-            .path = "issue10968.tar",
-            .err = error.TarHeader,
-        },
-        .{
-            .path = "issue11169.tar",
-            .err = error.TarHeader,
-        },
-        .{
-            .path = "issue12435.tar",
-            .err = error.TarHeaderChksum,
-        },
-        .{
-            // has magic with space at end instead of null
-            .path = "invalid-go17.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
-                },
-            },
-        },
-        .{
-            .path = "ustar-file-devs.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "file",
-                    .mode = 0o644,
-                },
-            },
-        },
-        .{
-            .path = "trailing-slash.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "123456789/" ** 30,
-                    .kind = .directory,
-                },
-            },
-        },
-        .{
-            // Has size in gnu extended format. To represent size bigger than 8 GB.
-            .path = "writer-big.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "tmp/16gig.txt",
-                    .size = 16 * 1024 * 1024 * 1024,
-                    .truncated = true,
-                    .mode = 0o640,
-                },
-            },
-        },
-        .{
-            // Size in gnu extended format, and name in pax attribute.
-            .path = "writer-big-long.tar",
-            .files = &[_]Case.File{
-                .{
-                    .name = "longname/" ** 15 ++ "16gig.txt",
-                    .size = 16 * 1024 * 1024 * 1024,
-                    .mode = 0o644,
-                    .truncated = true,
-                },
-            },
-        },
-    };
-
-    for (cases) |case| {
-        var fs_file = try test_dir.openFile(case.path, .{});
-
-        defer fs_file.close();
-
-        var iter = tarReader(fs_file.reader(), null);
-        var i: usize = 0;
-        while (iter.next() catch |err| {
-            if (case.err) |e| {
-                try std.testing.expectEqual(e, err);
-                continue;
-            } else {
-                return err;
-            }
-        }) |actual| : (i += 1) {
-            const expected = case.files[i];
-            try std.testing.expectEqualStrings(expected.name, actual.name);
-            try std.testing.expectEqual(expected.size, actual.size);
-            try std.testing.expectEqual(expected.kind, actual.kind);
-            try std.testing.expectEqual(expected.mode, actual.mode);
-            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
-
-            if (case.chksums.len > i) {
-                var md5writer = Md5Writer{};
-                try actual.write(&md5writer);
-                const chksum = md5writer.chksum();
-                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
-            } else {
-                if (!expected.truncated) try actual.skip(); // skip file content
-            }
-        }
-        try std.testing.expectEqual(case.files.len, i);
-    }
-}
-
-// used in test to calculate file chksum
-const Md5Writer = struct {
-    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
-
-    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
-        self.h.update(buf);
-    }
-
-    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
-        self.h.update(&[_]u8{byte});
-    }
-
-    pub fn chksum(self: *Md5Writer) [32]u8 {
-        var s = [_]u8{0} ** 16;
-        self.h.final(&s);
-        return std.fmt.bytesToHex(s, .lower);
-    }
-};
-
 test "tar PaxReader" {
     const Attr = struct {
         kind: PaxAttributeKind,
@@ -1094,3 +724,7 @@ test "tar PaxReader" {
         try std.testing.expect(case.err == null);
     }
 }
+
+test {
+    _ = @import("tar/test.zig");
+}
diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig
new file mode 100644
index 000000000000..f6a9fa1cfdec
--- /dev/null
+++ b/lib/std/tar/test.zig
@@ -0,0 +1,373 @@
+const std = @import("../std.zig");
+const tar = std.tar;
+const assert = std.debug.assert;
+
+test "tar run Go test cases" {
+    const Case = struct {
+        const File = struct {
+            name: []const u8,
+            size: usize = 0,
+            mode: u32 = 0,
+            link_name: []const u8 = &[0]u8{},
+            kind: tar.Header.Kind = .normal,
+            truncated: bool = false, // when there is no file body, just header, usefull for huge files
+        };
+
+        path: []const u8, // path to the tar archive file on dis
+        files: []const File = &[_]@This().File{}, // expected files to found in archive
+        chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
+        err: ?anyerror = null, // parsing should fail with this error
+    };
+
+    const src_path = comptime std.fs.path.dirname(@src().file) orelse ".";
+    const test_dir = try std.fs.cwd().openDir(src_path ++ "/testdata", .{});
+
+    const cases = [_]Case{
+        .{
+            .path = "gnu.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .mode = 0o640,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .mode = 0o640,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "sparse-formats.tar",
+            .err = error.TarUnsupportedHeader,
+        },
+        .{
+            .path = "star.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .mode = 0o640,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .mode = 0o640,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "v7.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .mode = 0o444,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .mode = 0o444,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "pax.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+                    .size = 7,
+                    .mode = 0o664,
+                },
+                .{
+                    .name = "a/b",
+                    .size = 0,
+                    .kind = .symbolic_link,
+                    .mode = 0o777,
+                    .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
+                },
+            },
+            .chksums = &[_][]const u8{
+                "3c382e8f5b6631aa2db52643912ffd4a",
+            },
+        },
+        .{
+            // pax attribute don't end with \n
+            .path = "pax-bad-hdr-file.tar",
+            .err = error.PaxInvalidAttributeEnd,
+        },
+        .{
+            // size is in pax attribute
+            .path = "pax-pos-size-file.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "foo",
+                    .size = 999,
+                    .kind = .normal,
+                    .mode = 0o640,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "0afb597b283fe61b5d4879669a350556",
+            },
+        },
+        .{
+            // has pax records which we are not interested in
+            .path = "pax-records.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "file",
+                },
+            },
+        },
+        .{
+            // has global records which we are ignoring
+            .path = "pax-global-records.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "file1",
+                },
+                .{
+                    .name = "file2",
+                },
+                .{
+                    .name = "file3",
+                },
+                .{
+                    .name = "file4",
+                },
+            },
+        },
+        .{
+            .path = "nil-uid.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "P1050238.JPG.log",
+                    .size = 14,
+                    .kind = .normal,
+                    .mode = 0o664,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "08d504674115e77a67244beac19668f5",
+            },
+        },
+        .{
+            // has xattrs and pax records which we are ignoring
+            .path = "xattrs.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "small.txt",
+                    .size = 5,
+                    .kind = .normal,
+                    .mode = 0o644,
+                },
+                .{
+                    .name = "small2.txt",
+                    .size = 11,
+                    .kind = .normal,
+                    .mode = 0o644,
+                },
+            },
+            .chksums = &[_][]const u8{
+                "e38b27eaccb4391bdec553a7f3ae6b2f",
+                "c65bd2e50a56a2138bf1716f2fd56fe9",
+            },
+        },
+        .{
+            .path = "gnu-multi-hdrs.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "GNU2/GNU2/long-path-name",
+                    .link_name = "GNU4/GNU4/long-linkpath-name",
+                    .kind = .symbolic_link,
+                },
+            },
+        },
+        .{
+            // has gnu type D (directory) and S (sparse) blocks
+            .path = "gnu-incremental.tar",
+            .err = error.TarUnsupportedHeader,
+        },
+        .{
+            // should use values only from last pax header
+            .path = "pax-multi-hdrs.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "bar",
+                    .link_name = "PAX4/PAX4/long-linkpath-name",
+                    .kind = .symbolic_link,
+                },
+            },
+        },
+        .{
+            .path = "gnu-long-nul.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "0123456789",
+                    .mode = 0o644,
+                },
+            },
+        },
+        .{
+            .path = "gnu-utf8.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
+                    .mode = 0o644,
+                },
+            },
+        },
+        .{
+            .path = "gnu-not-utf8.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "hi\x80\x81\x82\x83bye",
+                    .mode = 0o644,
+                },
+            },
+        },
+        .{
+            // null in pax key
+            .path = "pax-nul-xattrs.tar",
+            .err = error.PaxNullInKeyword,
+        },
+        .{
+            .path = "pax-nul-path.tar",
+            .err = error.PaxNullInValue,
+        },
+        .{
+            .path = "neg-size.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "issue10968.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "issue11169.tar",
+            .err = error.TarHeader,
+        },
+        .{
+            .path = "issue12435.tar",
+            .err = error.TarHeaderChksum,
+        },
+        .{
+            // has magic with space at end instead of null
+            .path = "invalid-go17.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
+                },
+            },
+        },
+        .{
+            .path = "ustar-file-devs.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "file",
+                    .mode = 0o644,
+                },
+            },
+        },
+        .{
+            .path = "trailing-slash.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "123456789/" ** 30,
+                    .kind = .directory,
+                },
+            },
+        },
+        .{
+            // Has size in gnu extended format. To represent size bigger than 8 GB.
+            .path = "writer-big.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "tmp/16gig.txt",
+                    .size = 16 * 1024 * 1024 * 1024,
+                    .truncated = true,
+                    .mode = 0o640,
+                },
+            },
+        },
+        .{
+            // Size in gnu extended format, and name in pax attribute.
+            .path = "writer-big-long.tar",
+            .files = &[_]Case.File{
+                .{
+                    .name = "longname/" ** 15 ++ "16gig.txt",
+                    .size = 16 * 1024 * 1024 * 1024,
+                    .mode = 0o644,
+                    .truncated = true,
+                },
+            },
+        },
+    };
+
+    for (cases) |case| {
+        var fs_file = try test_dir.openFile(case.path, .{});
+
+        defer fs_file.close();
+
+        var iter = tar.tarReader(fs_file.reader(), null);
+        var i: usize = 0;
+        while (iter.next() catch |err| {
+            if (case.err) |e| {
+                try std.testing.expectEqual(e, err);
+                continue;
+            } else {
+                return err;
+            }
+        }) |actual| : (i += 1) {
+            const expected = case.files[i];
+            try std.testing.expectEqualStrings(expected.name, actual.name);
+            try std.testing.expectEqual(expected.size, actual.size);
+            try std.testing.expectEqual(expected.kind, actual.kind);
+            try std.testing.expectEqual(expected.mode, actual.mode);
+            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
+
+            if (case.chksums.len > i) {
+                var md5writer = Md5Writer{};
+                try actual.write(&md5writer);
+                const chksum = md5writer.chksum();
+                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
+            } else {
+                if (!expected.truncated) try actual.skip(); // skip file content
+            }
+        }
+        try std.testing.expectEqual(case.files.len, i);
+    }
+}
+
+// used in test to calculate file chksum
+const Md5Writer = struct {
+    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
+
+    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
+        self.h.update(buf);
+    }
+
+    pub fn writeByte(self: *Md5Writer, byte: u8) !void {
+        self.h.update(&[_]u8{byte});
+    }
+
+    pub fn chksum(self: *Md5Writer) [32]u8 {
+        var s = [_]u8{0} ** 16;
+        self.h.final(&s);
+        return std.fmt.bytesToHex(s, .lower);
+    }
+};
diff --git a/test/cases/tar/gnu-incremental.tar b/lib/std/tar/testdata/gnu-incremental.tar
similarity index 100%
rename from test/cases/tar/gnu-incremental.tar
rename to lib/std/tar/testdata/gnu-incremental.tar
diff --git a/test/cases/tar/gnu-long-nul.tar b/lib/std/tar/testdata/gnu-long-nul.tar
similarity index 100%
rename from test/cases/tar/gnu-long-nul.tar
rename to lib/std/tar/testdata/gnu-long-nul.tar
diff --git a/test/cases/tar/gnu-multi-hdrs.tar b/lib/std/tar/testdata/gnu-multi-hdrs.tar
similarity index 100%
rename from test/cases/tar/gnu-multi-hdrs.tar
rename to lib/std/tar/testdata/gnu-multi-hdrs.tar
diff --git a/test/cases/tar/gnu-not-utf8.tar b/lib/std/tar/testdata/gnu-not-utf8.tar
similarity index 100%
rename from test/cases/tar/gnu-not-utf8.tar
rename to lib/std/tar/testdata/gnu-not-utf8.tar
diff --git a/test/cases/tar/gnu-utf8.tar b/lib/std/tar/testdata/gnu-utf8.tar
similarity index 100%
rename from test/cases/tar/gnu-utf8.tar
rename to lib/std/tar/testdata/gnu-utf8.tar
diff --git a/test/cases/tar/gnu.tar b/lib/std/tar/testdata/gnu.tar
similarity index 100%
rename from test/cases/tar/gnu.tar
rename to lib/std/tar/testdata/gnu.tar
diff --git a/test/cases/tar/invalid-go17.tar b/lib/std/tar/testdata/invalid-go17.tar
similarity index 100%
rename from test/cases/tar/invalid-go17.tar
rename to lib/std/tar/testdata/invalid-go17.tar
diff --git a/test/cases/tar/issue10968.tar b/lib/std/tar/testdata/issue10968.tar
similarity index 100%
rename from test/cases/tar/issue10968.tar
rename to lib/std/tar/testdata/issue10968.tar
diff --git a/test/cases/tar/issue11169.tar b/lib/std/tar/testdata/issue11169.tar
similarity index 100%
rename from test/cases/tar/issue11169.tar
rename to lib/std/tar/testdata/issue11169.tar
diff --git a/test/cases/tar/issue12435.tar b/lib/std/tar/testdata/issue12435.tar
similarity index 100%
rename from test/cases/tar/issue12435.tar
rename to lib/std/tar/testdata/issue12435.tar
diff --git a/test/cases/tar/neg-size.tar b/lib/std/tar/testdata/neg-size.tar
similarity index 100%
rename from test/cases/tar/neg-size.tar
rename to lib/std/tar/testdata/neg-size.tar
diff --git a/test/cases/tar/nil-uid.tar b/lib/std/tar/testdata/nil-uid.tar
similarity index 100%
rename from test/cases/tar/nil-uid.tar
rename to lib/std/tar/testdata/nil-uid.tar
diff --git a/test/cases/tar/pax-bad-hdr-file.tar b/lib/std/tar/testdata/pax-bad-hdr-file.tar
similarity index 100%
rename from test/cases/tar/pax-bad-hdr-file.tar
rename to lib/std/tar/testdata/pax-bad-hdr-file.tar
diff --git a/test/cases/tar/pax-global-records.tar b/lib/std/tar/testdata/pax-global-records.tar
similarity index 100%
rename from test/cases/tar/pax-global-records.tar
rename to lib/std/tar/testdata/pax-global-records.tar
diff --git a/test/cases/tar/pax-multi-hdrs.tar b/lib/std/tar/testdata/pax-multi-hdrs.tar
similarity index 100%
rename from test/cases/tar/pax-multi-hdrs.tar
rename to lib/std/tar/testdata/pax-multi-hdrs.tar
diff --git a/test/cases/tar/pax-nul-path.tar b/lib/std/tar/testdata/pax-nul-path.tar
similarity index 100%
rename from test/cases/tar/pax-nul-path.tar
rename to lib/std/tar/testdata/pax-nul-path.tar
diff --git a/test/cases/tar/pax-nul-xattrs.tar b/lib/std/tar/testdata/pax-nul-xattrs.tar
similarity index 100%
rename from test/cases/tar/pax-nul-xattrs.tar
rename to lib/std/tar/testdata/pax-nul-xattrs.tar
diff --git a/test/cases/tar/pax-pos-size-file.tar b/lib/std/tar/testdata/pax-pos-size-file.tar
similarity index 100%
rename from test/cases/tar/pax-pos-size-file.tar
rename to lib/std/tar/testdata/pax-pos-size-file.tar
diff --git a/test/cases/tar/pax-records.tar b/lib/std/tar/testdata/pax-records.tar
similarity index 100%
rename from test/cases/tar/pax-records.tar
rename to lib/std/tar/testdata/pax-records.tar
diff --git a/test/cases/tar/pax.tar b/lib/std/tar/testdata/pax.tar
similarity index 100%
rename from test/cases/tar/pax.tar
rename to lib/std/tar/testdata/pax.tar
diff --git a/test/cases/tar/sparse-formats.tar b/lib/std/tar/testdata/sparse-formats.tar
similarity index 100%
rename from test/cases/tar/sparse-formats.tar
rename to lib/std/tar/testdata/sparse-formats.tar
diff --git a/test/cases/tar/star.tar b/lib/std/tar/testdata/star.tar
similarity index 100%
rename from test/cases/tar/star.tar
rename to lib/std/tar/testdata/star.tar
diff --git a/test/cases/tar/trailing-slash.tar b/lib/std/tar/testdata/trailing-slash.tar
similarity index 100%
rename from test/cases/tar/trailing-slash.tar
rename to lib/std/tar/testdata/trailing-slash.tar
diff --git a/test/cases/tar/ustar-file-devs.tar b/lib/std/tar/testdata/ustar-file-devs.tar
similarity index 100%
rename from test/cases/tar/ustar-file-devs.tar
rename to lib/std/tar/testdata/ustar-file-devs.tar
diff --git a/test/cases/tar/v7.tar b/lib/std/tar/testdata/v7.tar
similarity index 100%
rename from test/cases/tar/v7.tar
rename to lib/std/tar/testdata/v7.tar
diff --git a/test/cases/tar/writer-big-long.tar b/lib/std/tar/testdata/writer-big-long.tar
similarity index 100%
rename from test/cases/tar/writer-big-long.tar
rename to lib/std/tar/testdata/writer-big-long.tar
diff --git a/test/cases/tar/writer-big.tar b/lib/std/tar/testdata/writer-big.tar
similarity index 100%
rename from test/cases/tar/writer-big.tar
rename to lib/std/tar/testdata/writer-big.tar
diff --git a/test/cases/tar/xattrs.tar b/lib/std/tar/testdata/xattrs.tar
similarity index 100%
rename from test/cases/tar/xattrs.tar
rename to lib/std/tar/testdata/xattrs.tar

From 76fe1f53d5b9cfae100854afe495e8b378d2dc9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Tue, 12 Dec 2023 14:18:20 +0100
Subject: [PATCH 25/29] tar: fix tests on 32-bit platforms

---
 lib/std/tar.zig      | 20 ++++++++++----------
 lib/std/tar/test.zig |  5 ++++-
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 4f6824de1fa4..376a6ad23027 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -250,7 +250,7 @@ fn TarReader(comptime ReaderType: type) type {
         pub const File = struct {
             name: []const u8, // name of file, symlink or directory
             link_name: []const u8, // target name of symlink
-            size: usize, // size of the file in bytes
+            size: u64, // size of the file in bytes
             mode: u32,
             kind: Header.Kind,
 
@@ -260,7 +260,7 @@ fn TarReader(comptime ReaderType: type) type {
             pub fn write(self: File, writer: anytype) !void {
                 var buffer: [4096]u8 = undefined;
 
-                var n: usize = 0;
+                var n: u64 = 0;
                 while (n < self.size) {
                     const buf = buffer[0..@min(buffer.len, self.size - n)];
                     try self.reader.readNoEof(buf);
@@ -308,9 +308,9 @@ fn TarReader(comptime ReaderType: type) type {
         }
 
         // Number of padding bytes in the last file block.
-        inline fn blockPadding(size: usize) usize {
-            const block_rounded = std.mem.alignForward(usize, size, Header.SIZE); // size rounded to te block boundary
-            return block_rounded - size;
+        inline fn blockPadding(size: u64) usize {
+            const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
+            return @intCast(block_rounded - size);
         }
 
         /// Iterates through the tar archive as if it is a series of files.
@@ -324,7 +324,7 @@ fn TarReader(comptime ReaderType: type) type {
 
             while (try self.readHeader()) |header| {
                 const kind = header.kind();
-                const size: usize = @intCast(try header.size());
+                const size: u64 = try header.size();
                 self.padding = blockPadding(size);
 
                 switch (kind) {
@@ -349,16 +349,16 @@ fn TarReader(comptime ReaderType: type) type {
                     },
                     // Prefix header types
                     .gnu_long_name => {
-                        self.file.name = try self.readString(size, &self.file_name_buffer);
+                        self.file.name = try self.readString(@intCast(size), &self.file_name_buffer);
                     },
                     .gnu_long_link => {
-                        self.file.link_name = try self.readString(size, &self.link_name_buffer);
+                        self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer);
                     },
                     .extended_header => {
                         // Use just attributes from last extended header.
                         self.initFile();
 
-                        var rdr = paxReader(self.reader, size);
+                        var rdr = paxReader(self.reader, @intCast(size));
                         while (try rdr.next()) |attr| {
                             switch (attr.kind) {
                                 .path => {
@@ -369,7 +369,7 @@ fn TarReader(comptime ReaderType: type) type {
                                 },
                                 .size => {
                                     var buf: [64]u8 = undefined;
-                                    self.file.size = try std.fmt.parseInt(usize, try attr.value(&buf), 10);
+                                    self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
                                 },
                             }
                         }
diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig
index f6a9fa1cfdec..1265050dd2cd 100644
--- a/lib/std/tar/test.zig
+++ b/lib/std/tar/test.zig
@@ -1,12 +1,15 @@
 const std = @import("../std.zig");
+const builtin = @import("builtin");
 const tar = std.tar;
 const assert = std.debug.assert;
 
 test "tar run Go test cases" {
+    if (builtin.os.tag == .wasi) return error.SkipZigTest;
+
     const Case = struct {
         const File = struct {
             name: []const u8,
-            size: usize = 0,
+            size: u64 = 0,
             mode: u32 = 0,
             link_name: []const u8 = &[0]u8{},
             kind: tar.Header.Kind = .normal,

From e21a12e56b21aee956132dd5f68bc2349ce37c1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Tue, 12 Dec 2023 18:35:42 +0100
Subject: [PATCH 26/29] tar: use @embedFile in tests

Like in other tests which uses testdata files (compress). That enables
wasi testing also, was failing because file system operations in tests.
---
 lib/std/tar/test.zig | 72 ++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 40 deletions(-)

diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig
index 1265050dd2cd..16f3b565b015 100644
--- a/lib/std/tar/test.zig
+++ b/lib/std/tar/test.zig
@@ -4,8 +4,6 @@ const tar = std.tar;
 const assert = std.debug.assert;
 
 test "tar run Go test cases" {
-    if (builtin.os.tag == .wasi) return error.SkipZigTest;
-
     const Case = struct {
         const File = struct {
             name: []const u8,
@@ -16,18 +14,15 @@ test "tar run Go test cases" {
             truncated: bool = false, // when there is no file body, just header, usefull for huge files
         };
 
-        path: []const u8, // path to the tar archive file on dis
+        data: []const u8, // testdata file content
         files: []const File = &[_]@This().File{}, // expected files to found in archive
-        chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
+        chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content
         err: ?anyerror = null, // parsing should fail with this error
     };
 
-    const src_path = comptime std.fs.path.dirname(@src().file) orelse ".";
-    const test_dir = try std.fs.cwd().openDir(src_path ++ "/testdata", .{});
-
     const cases = [_]Case{
         .{
-            .path = "gnu.tar",
+            .data = @embedFile("testdata/gnu.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
@@ -46,11 +41,11 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "sparse-formats.tar",
+            .data = @embedFile("testdata/sparse-formats.tar"),
             .err = error.TarUnsupportedHeader,
         },
         .{
-            .path = "star.tar",
+            .data = @embedFile("testdata/star.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
@@ -69,7 +64,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "v7.tar",
+            .data = @embedFile("testdata/v7.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
@@ -88,7 +83,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "pax.tar",
+            .data = @embedFile("testdata/pax.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
@@ -109,12 +104,12 @@ test "tar run Go test cases" {
         },
         .{
             // pax attribute don't end with \n
-            .path = "pax-bad-hdr-file.tar",
+            .data = @embedFile("testdata/pax-bad-hdr-file.tar"),
             .err = error.PaxInvalidAttributeEnd,
         },
         .{
             // size is in pax attribute
-            .path = "pax-pos-size-file.tar",
+            .data = @embedFile("testdata/pax-pos-size-file.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "foo",
@@ -129,7 +124,7 @@ test "tar run Go test cases" {
         },
         .{
             // has pax records which we are not interested in
-            .path = "pax-records.tar",
+            .data = @embedFile("testdata/pax-records.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "file",
@@ -138,7 +133,7 @@ test "tar run Go test cases" {
         },
         .{
             // has global records which we are ignoring
-            .path = "pax-global-records.tar",
+            .data = @embedFile("testdata/pax-global-records.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "file1",
@@ -155,7 +150,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "nil-uid.tar",
+            .data = @embedFile("testdata/nil-uid.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "P1050238.JPG.log",
@@ -170,7 +165,7 @@ test "tar run Go test cases" {
         },
         .{
             // has xattrs and pax records which we are ignoring
-            .path = "xattrs.tar",
+            .data = @embedFile("testdata/xattrs.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "small.txt",
@@ -191,7 +186,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "gnu-multi-hdrs.tar",
+            .data = @embedFile("testdata/gnu-multi-hdrs.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "GNU2/GNU2/long-path-name",
@@ -202,12 +197,12 @@ test "tar run Go test cases" {
         },
         .{
             // has gnu type D (directory) and S (sparse) blocks
-            .path = "gnu-incremental.tar",
+            .data = @embedFile("testdata/gnu-incremental.tar"),
             .err = error.TarUnsupportedHeader,
         },
         .{
             // should use values only from last pax header
-            .path = "pax-multi-hdrs.tar",
+            .data = @embedFile("testdata/pax-multi-hdrs.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "bar",
@@ -217,7 +212,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "gnu-long-nul.tar",
+            .data = @embedFile("testdata/gnu-long-nul.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "0123456789",
@@ -226,7 +221,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "gnu-utf8.tar",
+            .data = @embedFile("testdata/gnu-utf8.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
@@ -235,7 +230,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "gnu-not-utf8.tar",
+            .data = @embedFile("testdata/gnu-not-utf8.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "hi\x80\x81\x82\x83bye",
@@ -245,32 +240,32 @@ test "tar run Go test cases" {
         },
         .{
             // null in pax key
-            .path = "pax-nul-xattrs.tar",
+            .data = @embedFile("testdata/pax-nul-xattrs.tar"),
             .err = error.PaxNullInKeyword,
         },
         .{
-            .path = "pax-nul-path.tar",
+            .data = @embedFile("testdata/pax-nul-path.tar"),
             .err = error.PaxNullInValue,
         },
         .{
-            .path = "neg-size.tar",
+            .data = @embedFile("testdata/neg-size.tar"),
             .err = error.TarHeader,
         },
         .{
-            .path = "issue10968.tar",
+            .data = @embedFile("testdata/issue10968.tar"),
             .err = error.TarHeader,
         },
         .{
-            .path = "issue11169.tar",
+            .data = @embedFile("testdata/issue11169.tar"),
             .err = error.TarHeader,
         },
         .{
-            .path = "issue12435.tar",
+            .data = @embedFile("testdata/issue12435.tar"),
             .err = error.TarHeaderChksum,
         },
         .{
             // has magic with space at end instead of null
-            .path = "invalid-go17.tar",
+            .data = @embedFile("testdata/invalid-go17.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
@@ -278,7 +273,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "ustar-file-devs.tar",
+            .data = @embedFile("testdata/ustar-file-devs.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "file",
@@ -287,7 +282,7 @@ test "tar run Go test cases" {
             },
         },
         .{
-            .path = "trailing-slash.tar",
+            .data = @embedFile("testdata/trailing-slash.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "123456789/" ** 30,
@@ -297,7 +292,7 @@ test "tar run Go test cases" {
         },
         .{
             // Has size in gnu extended format. To represent size bigger than 8 GB.
-            .path = "writer-big.tar",
+            .data = @embedFile("testdata/writer-big.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "tmp/16gig.txt",
@@ -309,7 +304,7 @@ test "tar run Go test cases" {
         },
         .{
             // Size in gnu extended format, and name in pax attribute.
-            .path = "writer-big-long.tar",
+            .data = @embedFile("testdata/writer-big-long.tar"),
             .files = &[_]Case.File{
                 .{
                     .name = "longname/" ** 15 ++ "16gig.txt",
@@ -322,11 +317,8 @@ test "tar run Go test cases" {
     };
 
     for (cases) |case| {
-        var fs_file = try test_dir.openFile(case.path, .{});
-
-        defer fs_file.close();
-
-        var iter = tar.tarReader(fs_file.reader(), null);
+        var fsb = std.io.fixedBufferStream(case.data);
+        var iter = tar.tarReader(fsb.reader(), null);
         var i: usize = 0;
         while (iter.next() catch |err| {
             if (case.err) |e| {

From 7923a53996f8d24ad27823db3a45a6dd4a2bf317 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Tue, 12 Dec 2023 18:50:25 +0100
Subject: [PATCH 27/29] tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}
---
 lib/std/tar.zig      | 27 ++++++++++++++-------------
 lib/std/tar/test.zig | 21 ++++++++++-----------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index 376a6ad23027..dcd56dec077e 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -225,14 +225,16 @@ fn nullStr(str: []const u8) []const u8 {
     return str;
 }
 
-pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
+/// Iterates over files in tar archive.
+/// `next` returns each file in `reader` tar archive.
+pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
     return .{
         .reader = reader,
         .diagnostics = diagnostics,
     };
 }
 
-fn TarReader(comptime ReaderType: type) type {
+fn Iterator(comptime ReaderType: type) type {
     return struct {
         reader: ReaderType,
         diagnostics: ?*Options.Diagnostics,
@@ -358,7 +360,7 @@ fn TarReader(comptime ReaderType: type) type {
                         // Use just attributes from last extended header.
                         self.initFile();
 
-                        var rdr = paxReader(self.reader, @intCast(size));
+                        var rdr = paxIterator(self.reader, @intCast(size));
                         while (try rdr.next()) |attr| {
                             switch (attr.kind) {
                                 .path => {
@@ -393,10 +395,10 @@ fn TarReader(comptime ReaderType: type) type {
     };
 }
 
-// Pax attributes reader.
-// Size is length of pax extended header in reader.
-fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
-    return PaxReader(@TypeOf(reader)){
+/// Pax attributes iterator.
+/// Size is length of pax extended header in reader.
+fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
+    return PaxIterator(@TypeOf(reader)){
         .reader = reader,
         .size = size,
     };
@@ -408,7 +410,7 @@ const PaxAttributeKind = enum {
     size,
 };
 
-fn PaxReader(comptime ReaderType: type) type {
+fn PaxIterator(comptime ReaderType: type) type {
     return struct {
         size: usize, // cumulative size of all pax attributes
         reader: ReaderType,
@@ -508,8 +510,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
         },
     }
 
-    var iter = tarReader(reader, options.diagnostics);
-
+    var iter = iterator(reader, options.diagnostics);
     while (try iter.next()) |file| {
         switch (file.kind) {
             .directory => {
@@ -601,7 +602,7 @@ test "tar stripComponents" {
     try expectEqualStrings("c", try stripComponents("a/b/c", 2));
 }
 
-test "tar PaxReader" {
+test "tar PaxIterator" {
     const Attr = struct {
         kind: PaxAttributeKind,
         value: []const u8 = undefined,
@@ -699,10 +700,10 @@ test "tar PaxReader" {
 
     outer: for (cases) |case| {
         var stream = std.io.fixedBufferStream(case.data);
-        var rdr = paxReader(stream.reader(), case.data.len);
+        var iter = paxIterator(stream.reader(), case.data.len);
 
         var i: usize = 0;
-        while (rdr.next() catch |err| {
+        while (iter.next() catch |err| {
             if (case.err) |e| {
                 try std.testing.expectEqual(e, err);
                 continue;
diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig
index 16f3b565b015..82c73e25466d 100644
--- a/lib/std/tar/test.zig
+++ b/lib/std/tar/test.zig
@@ -1,7 +1,6 @@
 const std = @import("../std.zig");
-const builtin = @import("builtin");
 const tar = std.tar;
-const assert = std.debug.assert;
+const testing = std.testing;
 
 test "tar run Go test cases" {
     const Case = struct {
@@ -318,33 +317,33 @@ test "tar run Go test cases" {
 
     for (cases) |case| {
         var fsb = std.io.fixedBufferStream(case.data);
-        var iter = tar.tarReader(fsb.reader(), null);
+        var iter = tar.iterator(fsb.reader(), null);
         var i: usize = 0;
         while (iter.next() catch |err| {
             if (case.err) |e| {
-                try std.testing.expectEqual(e, err);
+                try testing.expectEqual(e, err);
                 continue;
             } else {
                 return err;
             }
         }) |actual| : (i += 1) {
             const expected = case.files[i];
-            try std.testing.expectEqualStrings(expected.name, actual.name);
-            try std.testing.expectEqual(expected.size, actual.size);
-            try std.testing.expectEqual(expected.kind, actual.kind);
-            try std.testing.expectEqual(expected.mode, actual.mode);
-            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
+            try testing.expectEqualStrings(expected.name, actual.name);
+            try testing.expectEqual(expected.size, actual.size);
+            try testing.expectEqual(expected.kind, actual.kind);
+            try testing.expectEqual(expected.mode, actual.mode);
+            try testing.expectEqualStrings(expected.link_name, actual.link_name);
 
             if (case.chksums.len > i) {
                 var md5writer = Md5Writer{};
                 try actual.write(&md5writer);
                 const chksum = md5writer.chksum();
-                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
+                try testing.expectEqualStrings(case.chksums[i], &chksum);
             } else {
                 if (!expected.truncated) try actual.skip(); // skip file content
             }
         }
-        try std.testing.expectEqual(case.files.len, i);
+        try testing.expectEqual(case.files.len, i);
     }
 }
 

From 7d3a31872eda55438259b54818baaa90b6ecd74c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Anic=CC=81?= <igor.anic@gmail.com>
Date: Mon, 18 Dec 2023 21:39:07 +0100
Subject: [PATCH 28/29] tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```
---
 lib/std/tar.zig | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index dcd56dec077e..3bdfd93f055b 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -105,6 +105,8 @@ pub const Header = struct {
         // used to store the path or link name for the next file.
         gnu_long_name = 'L',
         gnu_long_link = 'K',
+        gnu_sparse = 'S',
+        solaris_extended_header = 'X',
         _,
     };
 
@@ -194,16 +196,21 @@ pub const Header = struct {
         return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
     }
 
+    const Chksums = struct {
+        unsigned: u64,
+        signed: i64,
+    };
+
     // Sum of all bytes in the header block. The chksum field is treated as if
     // it were filled with spaces (ASCII 32).
-    fn computeChksum(header: Header) u64 {
-        var sum: u64 = 0;
-        for (header.bytes, 0..) |b, i| {
-            if (148 <= i and i < 156) continue; // skip chksum field bytes
-            sum += b;
+    fn computeChksum(header: Header) Chksums {
+        var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
+        for (header.bytes, 0..) |v, i| {
+            const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
+            cs.unsigned += b;
+            cs.signed += @as(i8, @bitCast(b));
         }
-        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
-        return if (sum > 0) sum + 256 else 0;
+        return cs;
     }
 
     // Checks calculated chksum with value of chksum field.
@@ -211,8 +218,9 @@ pub const Header = struct {
     // Zero value indicates empty block.
     pub fn checkChksum(header: Header) !u64 {
         const field = try header.chksum();
-        const computed = header.computeChksum();
-        if (field != computed) return error.TarHeaderChksum;
+        const cs = header.computeChksum();
+        if (field == 0 and cs.unsigned == 256) return 0;
+        if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
         return field;
     }
 };
@@ -387,11 +395,25 @@ fn Iterator(comptime ReaderType: type) type {
                             .file_name = try d.allocator.dupe(u8, header.name()),
                             .file_type = kind,
                         } });
+                        if (kind == .gnu_sparse) {
+                            try self.skipGnuSparseExtendedHeaders(header);
+                        }
+                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
                     },
                 }
             }
             return null;
         }
+
+        fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
+            var is_extended = header.bytes[482] > 0;
+            while (is_extended) {
+                var buf: [Header.SIZE]u8 = undefined;
+                const n = try self.reader.readAll(&buf);
+                if (n < Header.SIZE) return error.UnexpectedEndOfStream;
+                is_extended = buf[504] > 0;
+            }
+        }
     };
 }
 

From 3f809cbe7ded23a236f98eb1809fc7cda65021e1 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 12 Jan 2024 17:51:44 -0700
Subject: [PATCH 29/29] build.zig: don't install testdata

---
 build.zig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build.zig b/build.zig
index 504eb9a386e9..1d44c249cc51 100644
--- a/build.zig
+++ b/build.zig
@@ -165,6 +165,8 @@ pub fn build(b: *std.Build) !void {
                 ".xz",
                 // exclude files from lib/std/tz/
                 ".tzif",
+                // exclude files from lib/std/tar/testdata
+                ".tar",
                 // others
                 "README.md",
             },