diff --git a/build.zig b/build.zig index 504eb9a386e9..1d44c249cc51 100644 --- a/build.zig +++ b/build.zig @@ -165,6 +165,8 @@ pub fn build(b: *std.Build) !void { ".xz", // exclude files from lib/std/tz/ ".tzif", + // exclude files from lib/std/tar/testdata + ".tar", // others "README.md", }, diff --git a/lib/std/tar.zig b/lib/std/tar.zig index c39cc6e4323e..3bdfd93f055b 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,3 +1,23 @@ +/// Tar archive is single ordinary file which can contain many files (or +/// directories, symlinks, ...). It's build by series of blocks each size of 512 +/// bytes. First block of each entry is header which defines type, name, size +/// permissions and other attributes. Header is followed by series of blocks of +/// file content, if any that entry has content. Content is padded to the block +/// size, so next header always starts at block boundary. +/// +/// This simple format is extended by GNU and POSIX pax extensions to support +/// file names longer than 256 bytes and additional attributes. +/// +/// This is not comprehensive tar parser. Here we are only file types needed to +/// support Zig package manager; normal file, directory, symbolic link. And +/// subset of attributes: name, size, permissions. +/// +/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html +/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13 +/// +const std = @import("std.zig"); +const assert = std.debug.assert; + pub const Options = struct { /// Number of directory levels to skip when extracting files. strip_components: u32 = 0, @@ -37,7 +57,7 @@ pub const Options = struct { }, unsupported_file_type: struct { file_name: []const u8, - file_type: Header.FileType, + file_type: Header.Kind, }, }; @@ -63,9 +83,13 @@ pub const Options = struct { }; pub const Header = struct { - bytes: *const [512]u8, + const SIZE = 512; + const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) + const LINK_NAME_SIZE = 100; - pub const FileType = enum(u8) { + bytes: *const [SIZE]u8, + + pub const Kind = enum(u8) { normal_alias = 0, normal = '0', hard_link = '1', @@ -77,103 +101,424 @@ pub const Header = struct { contiguous = '7', global_extended_header = 'g', extended_header = 'x', + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + gnu_long_name = 'L', + gnu_long_link = 'K', + gnu_sparse = 'S', + solaris_extended_header = 'X', _, }; - pub fn fileSize(header: Header) !u64 { - const raw = header.bytes[124..][0..12]; - const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); - const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); - if (rtrimmed.len == 0) return 0; - return std.fmt.parseInt(u64, rtrimmed, 8); - } - - pub fn is_ustar(header: Header) bool { - return std.mem.eql(u8, header.bytes[257..][0..6], "ustar\x00"); - } - /// Includes prefix concatenated, if any. - /// Return value may point into Header buffer, or might point into the - /// argument buffer. /// TODO: check against "../" and other nefarious things - pub fn fullFileName(header: Header, buffer: *[std.fs.MAX_PATH_BYTES]u8) ![]const u8 { + pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 { const n = name(header); - if (!is_ustar(header)) - return n; const p = prefix(header); - if (p.len == 0) - return n; + if (!is_ustar(header) or p.len == 0) { + @memcpy(buffer[0..n.len], n); + return buffer[0..n.len]; + } @memcpy(buffer[0..p.len], p); buffer[p.len] = '/'; @memcpy(buffer[p.len + 1 ..][0..n.len], n); return buffer[0 .. p.len + 1 + n.len]; } + pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 { + const link_name = header.str(157, 100); + if (link_name.len == 0) { + return buffer[0..0]; + } + const buf = buffer[0..link_name.len]; + @memcpy(buf, link_name); + return buf; + } + pub fn name(header: Header) []const u8 { - return str(header, 0, 0 + 100); + return header.str(0, 100); + } + + pub fn mode(header: Header) !u32 { + return @intCast(try header.numeric(100, 8)); } - pub fn linkName(header: Header) []const u8 { - return str(header, 157, 157 + 100); + pub fn size(header: Header) !u64 { + return header.numeric(124, 12); + } + + pub fn chksum(header: Header) !u64 { + return header.octal(148, 8); + } + + pub fn is_ustar(header: Header) bool { + const magic = header.bytes[257..][0..6]; + return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' '); } pub fn prefix(header: Header) []const u8 { - return str(header, 345, 345 + 155); + return header.str(345, 155); } - pub fn fileType(header: Header) FileType { - const result: FileType = @enumFromInt(header.bytes[156]); + pub fn kind(header: Header) Kind { + const result: Kind = @enumFromInt(header.bytes[156]); if (result == .normal_alias) return .normal; return result; } - fn str(header: Header, start: usize, end: usize) []const u8 { - var i: usize = start; - while (i < end) : (i += 1) { - if (header.bytes[i] == 0) break; + fn str(header: Header, start: usize, len: usize) []const u8 { + return nullStr(header.bytes[start .. start + len]); + } + + fn numeric(header: Header, start: usize, len: usize) !u64 { + const raw = header.bytes[start..][0..len]; + // If the leading byte is 0xff (255), all the bytes of the field + // (including the leading byte) are concatenated in big-endian order, + // with the result being a negative number expressed in two’s + // complement form. + if (raw[0] == 0xff) return error.TarNumericValueNegative; + // If the leading byte is 0x80 (128), the non-leading bytes of the + // field are concatenated in big-endian order. + if (raw[0] == 0x80) { + if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig; + return std.mem.readInt(u64, raw[4..12], .big); } - return header.bytes[start..i]; + return try header.octal(start, len); } -}; -const Buffer = struct { - buffer: [512 * 8]u8 = undefined, - start: usize = 0, - end: usize = 0, + fn octal(header: Header, start: usize, len: usize) !u64 { + const raw = header.bytes[start..][0..len]; + // Zero-filled octal number in ASCII. Each numeric field of width w + // contains w minus 1 digits, and a null + const ltrimmed = std.mem.trimLeft(u8, raw, "0 "); + const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00"); + if (rtrimmed.len == 0) return 0; + return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader; + } - pub fn readChunk(b: *Buffer, reader: anytype, count: usize) ![]const u8 { - b.ensureCapacity(1024); + const Chksums = struct { + unsigned: u64, + signed: i64, + }; - const ask = @min(b.buffer.len - b.end, count -| (b.end - b.start)); - b.end += try reader.readAtLeast(b.buffer[b.end..], ask); + // Sum of all bytes in the header block. The chksum field is treated as if + // it were filled with spaces (ASCII 32). + fn computeChksum(header: Header) Chksums { + var cs: Chksums = .{ .signed = 0, .unsigned = 0 }; + for (header.bytes, 0..) |v, i| { + const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces. + cs.unsigned += b; + cs.signed += @as(i8, @bitCast(b)); + } + return cs; + } - return b.buffer[b.start..b.end]; + // Checks calculated chksum with value of chksum field. + // Returns error or valid chksum value. + // Zero value indicates empty block. + pub fn checkChksum(header: Header) !u64 { + const field = try header.chksum(); + const cs = header.computeChksum(); + if (field == 0 and cs.unsigned == 256) return 0; + if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum; + return field; } +}; - pub fn advance(b: *Buffer, count: usize) void { - b.start += count; - assert(b.start <= b.end); +// Breaks string on first null character. +fn nullStr(str: []const u8) []const u8 { + for (str, 0..) |c, i| { + if (c == 0) return str[0..i]; } + return str; +} - pub fn skip(b: *Buffer, reader: anytype, count: usize) !void { - if (b.start + count > b.end) { - try reader.skipBytes(b.start + count - b.end, .{}); - b.start = b.end; - } else { - b.advance(count); +/// Iterates over files in tar archive. +/// `next` returns each file in `reader` tar archive. +pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) { + return .{ + .reader = reader, + .diagnostics = diagnostics, + }; +} + +fn Iterator(comptime ReaderType: type) type { + return struct { + reader: ReaderType, + diagnostics: ?*Options.Diagnostics, + + // buffers for heeader and file attributes + header_buffer: [Header.SIZE]u8 = undefined, + file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined, + + // bytes of padding to the end of the block + padding: usize = 0, + // current tar file + file: File = undefined, + + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: u64, // size of the file in bytes + mode: u32, + kind: Header.Kind, + + reader: ReaderType, + + // Writes file content to writer. + pub fn write(self: File, writer: anytype) !void { + var buffer: [4096]u8 = undefined; + + var n: u64 = 0; + while (n < self.size) { + const buf = buffer[0..@min(buffer.len, self.size - n)]; + try self.reader.readNoEof(buf); + try writer.writeAll(buf); + n += buf.len; + } + } + + // Skips file content. Advances reader. + pub fn skip(self: File) !void { + try self.reader.skipBytes(self.size, .{}); + } + }; + + const Self = @This(); + + fn readHeader(self: *Self) !?Header { + if (self.padding > 0) { + try self.reader.skipBytes(self.padding, .{}); + } + const n = try self.reader.readAll(&self.header_buffer); + if (n == 0) return null; + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; + if (try header.checkChksum() == 0) return null; + return header; } - } - inline fn ensureCapacity(b: *Buffer, count: usize) void { - if (b.buffer.len - b.start < count) { - const dest_end = b.end - b.start; - @memcpy(b.buffer[0..dest_end], b.buffer[b.start..b.end]); - b.end = dest_end; - b.start = 0; + inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { + assert(buffer.len >= size); + const buf = buffer[0..size]; + try self.reader.readNoEof(buf); + return nullStr(buf); } - } + + inline fn initFile(self: *Self) void { + self.file = File{ + .name = self.file_name_buffer[0..0], + .link_name = self.link_name_buffer[0..0], + .size = 0, + .kind = .normal, + .mode = 0, + .reader = self.reader, + }; + } + + // Number of padding bytes in the last file block. + inline fn blockPadding(size: u64) usize { + const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary + return @intCast(block_rounded - size); + } + + /// Iterates through the tar archive as if it is a series of files. + /// Internally, the tar format often uses entries (header with optional + /// content) to add meta data that describes the next file. These + /// entries should not normally be visible to the outside. As such, this + /// loop iterates through one or more entries until it collects a all + /// file attributes. + pub fn next(self: *Self) !?File { + self.initFile(); + + while (try self.readHeader()) |header| { + const kind = header.kind(); + const size: u64 = try header.size(); + self.padding = blockPadding(size); + + switch (kind) { + // File types to retrun upstream + .directory, .normal, .symbolic_link => { + self.file.kind = kind; + self.file.mode = try header.mode(); + + // set file attributes if not already set by prefix/extended headers + if (self.file.size == 0) { + self.file.size = size; + } + if (self.file.link_name.len == 0) { + self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]); + } + if (self.file.name.len == 0) { + self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]); + } + + self.padding = blockPadding(self.file.size); + return self.file; + }, + // Prefix header types + .gnu_long_name => { + self.file.name = try self.readString(@intCast(size), &self.file_name_buffer); + }, + .gnu_long_link => { + self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer); + }, + .extended_header => { + // Use just attributes from last extended header. + self.initFile(); + + var rdr = paxIterator(self.reader, @intCast(size)); + while (try rdr.next()) |attr| { + switch (attr.kind) { + .path => { + self.file.name = try attr.value(&self.file_name_buffer); + }, + .linkpath => { + self.file.link_name = try attr.value(&self.link_name_buffer); + }, + .size => { + var buf: [64]u8 = undefined; + self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); + }, + } + } + }, + // Ignored header type + .global_extended_header => { + self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; + }, + // All other are unsupported header types + else => { + const d = self.diagnostics orelse return error.TarUnsupportedHeader; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, header.name()), + .file_type = kind, + } }); + if (kind == .gnu_sparse) { + try self.skipGnuSparseExtendedHeaders(header); + } + self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; + }, + } + } + return null; + } + + fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void { + var is_extended = header.bytes[482] > 0; + while (is_extended) { + var buf: [Header.SIZE]u8 = undefined; + const n = try self.reader.readAll(&buf); + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + is_extended = buf[504] > 0; + } + } + }; +} + +/// Pax attributes iterator. +/// Size is length of pax extended header in reader. +fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) { + return PaxIterator(@TypeOf(reader)){ + .reader = reader, + .size = size, + }; +} + +const PaxAttributeKind = enum { + path, + linkpath, + size, }; +fn PaxIterator(comptime ReaderType: type) type { + return struct { + size: usize, // cumulative size of all pax attributes + reader: ReaderType, + // scratch buffer used for reading attribute length and keyword + scratch: [128]u8 = undefined, + + const Self = @This(); + + const Attribute = struct { + kind: PaxAttributeKind, + len: usize, // length of the attribute value + reader: ReaderType, // reader positioned at value start + + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least Attribute.len. + pub fn value(self: Attribute, dst: []u8) ![]const u8 { + assert(self.len <= dst.len); + const buf = dst[0..self.len]; + const n = try self.reader.readAll(buf); + if (n < self.len) return error.UnexpectedEndOfStream; + try validateAttributeEnding(self.reader); + if (hasNull(buf)) return error.PaxNullInValue; + return buf; + } + }; + + // Iterates over pax attributes. Returns known only known attributes. + // Caller has to call value in Attribute, to advance reader across value. + pub fn next(self: *Self) !?Attribute { + // Pax extended header consists of one or more attributes, each constructed as follows: + // "%d %s=%s\n", , , + while (self.size > 0) { + const length_buf = try self.readUntil(' '); + const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes + + const keyword = try self.readUntil('='); + if (hasNull(keyword)) return error.PaxNullInKeyword; + + // calculate value_len + const value_start = length_buf.len + keyword.len + 2; // 2 separators + if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; + const value_len = length - value_start - 1; // \n separator at end + self.size -= length; + + const kind: PaxAttributeKind = if (eql(keyword, "path")) + .path + else if (eql(keyword, "linkpath")) + .linkpath + else if (eql(keyword, "size")) + .size + else { + try self.reader.skipBytes(value_len, .{}); + try validateAttributeEnding(self.reader); + continue; + }; + return Attribute{ + .kind = kind, + .len = value_len, + .reader = self.reader, + }; + } + + return null; + } + + inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 { + var fbs = std.io.fixedBufferStream(&self.scratch); + try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); + return fbs.getWritten(); + } + + inline fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); + } + + inline fn hasNull(str: []const u8) bool { + return (std.mem.indexOfScalar(u8, str, 0)) != null; + } + + // Checks that each record ends with new line. + inline fn validateAttributeEnding(reader: ReaderType) !void { + if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; + } + }; +} + pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { switch (options.mode_mode) { .ignore => {}, @@ -186,39 +531,21 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi @panic("TODO: unimplemented: tar ModeMode.executable_bit_only"); }, } - var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - var file_name_override_len: usize = 0; - var buffer: Buffer = .{}; - header: while (true) { - const chunk = try buffer.readChunk(reader, 1024); - switch (chunk.len) { - 0 => return, - 1...511 => return error.UnexpectedEndOfStream, - else => {}, - } - buffer.advance(512); - - const header: Header = .{ .bytes = chunk[0..512] }; - const file_size = try header.fileSize(); - const rounded_file_size = std.mem.alignForward(u64, file_size, 512); - const pad_len: usize = @intCast(rounded_file_size - file_size); - const unstripped_file_name = if (file_name_override_len > 0) - file_name_buffer[0..file_name_override_len] - else - try header.fullFileName(&file_name_buffer); - file_name_override_len = 0; - switch (header.fileType()) { + + var iter = iterator(reader, options.diagnostics); + while (try iter.next()) |file| { + switch (file.kind) { .directory => { - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + const file_name = try stripComponents(file.name, options.strip_components); if (file_name.len != 0 and !options.exclude_empty_directories) { try dir.makePath(file_name); } }, .normal => { - if (file_size == 0 and unstripped_file_name.len == 0) return; - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + if (file.size == 0 and file.name.len == 0) return; + const file_name = try stripComponents(file.name, options.strip_components); - const file = dir.createFile(file_name, .{}) catch |err| switch (err) { + const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) { error.FileNotFound => again: { const code = code: { if (std.fs.path.dirname(file_name)) |dir_name| { @@ -238,70 +565,19 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi }, else => |e| return e, }; - defer if (file) |f| f.close(); - - var file_off: usize = 0; - while (true) { - const temp = try buffer.readChunk(reader, @intCast(rounded_file_size + 512 - file_off)); - if (temp.len == 0) return error.UnexpectedEndOfStream; - const slice = temp[0..@intCast(@min(file_size - file_off, temp.len))]; - if (file) |f| try f.writeAll(slice); - - file_off += slice.len; - buffer.advance(slice.len); - if (file_off >= file_size) { - buffer.advance(pad_len); - continue :header; - } - } - }, - .extended_header => { - if (file_size == 0) { - buffer.advance(@intCast(rounded_file_size)); - continue; - } + defer if (fs_file) |f| f.close(); - const chunk_size: usize = @intCast(rounded_file_size + 512); - var data_off: usize = 0; - file_name_override_len = while (data_off < file_size) { - const slice = try buffer.readChunk(reader, chunk_size - data_off); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const remaining_size: usize = @intCast(file_size - data_off); - const attr_info = try parsePaxAttribute(slice[0..@min(remaining_size, slice.len)], remaining_size); - - if (std.mem.eql(u8, attr_info.key, "path")) { - if (attr_info.value_len > file_name_buffer.len) return error.NameTooLong; - buffer.advance(attr_info.value_off); - data_off += attr_info.value_off; - break attr_info.value_len; - } - - try buffer.skip(reader, attr_info.size); - data_off += attr_info.size; - } else 0; - - var i: usize = 0; - while (i < file_name_override_len) { - const slice = try buffer.readChunk(reader, chunk_size - data_off - i); - if (slice.len == 0) return error.UnexpectedEndOfStream; - const copy_size: usize = @intCast(@min(file_name_override_len - i, slice.len)); - @memcpy(file_name_buffer[i .. i + copy_size], slice[0..copy_size]); - buffer.advance(copy_size); - i += copy_size; + if (fs_file) |f| { + try file.write(f); + } else { + try file.skip(); } - - try buffer.skip(reader, @intCast(rounded_file_size - data_off - file_name_override_len)); - continue :header; }, - .global_extended_header => { - buffer.skip(reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig; - }, - .hard_link => return error.TarUnsupportedFileType, .symbolic_link => { // The file system path of the symbolic link. - const file_name = try stripComponents(unstripped_file_name, options.strip_components); + const file_name = try stripComponents(file.name, options.strip_components); // The data inside the symbolic link. - const link_name = header.linkName(); + const link_name = file.link_name; dir.symLink(link_name, file_name, .{}) catch |err| again: { const code = code: { @@ -323,13 +599,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi } }); }; }, - else => |file_type| { - const d = options.diagnostics orelse return error.TarUnsupportedFileType; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, unstripped_file_name), - .file_type = file_type, - } }); - }, + else => unreachable, } } } @@ -347,51 +617,137 @@ fn stripComponents(path: []const u8, count: u32) ![]const u8 { return path[i..]; } -test stripComponents { +test "tar stripComponents" { const expectEqualStrings = std.testing.expectEqualStrings; try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0)); try expectEqualStrings("b/c", try stripComponents("a/b/c", 1)); try expectEqualStrings("c", try stripComponents("a/b/c", 2)); } -const PaxAttributeInfo = struct { - size: usize, - key: []const u8, - value_off: usize, - value_len: usize, -}; +test "tar PaxIterator" { + const Attr = struct { + kind: PaxAttributeKind, + value: []const u8 = undefined, + err: ?anyerror = null, + }; + const cases = [_]struct { + data: []const u8, + attrs: []const Attr, + err: ?anyerror = null, + }{ + .{ // valid but unknown keys + .data = + \\30 mtime=1350244992.023960108 + \\6 k=1 + \\13 key1=val1 + \\10 a=name + \\9 a=name + \\ + , + .attrs = &[_]Attr{}, + }, + .{ // mix of known and unknown keys + .data = + \\6 k=1 + \\13 path=name + \\17 linkpath=link + \\13 key1=val1 + \\12 size=123 + \\13 key2=val2 + \\ + , + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + .{ .kind = .linkpath, .value = "link" }, + .{ .kind = .size, .value = "123" }, + }, + }, + .{ // too short size of the second key-value pair + .data = + \\13 path=name + \\10 linkpath=value + \\ + , + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + }, + .err = error.UnexpectedEndOfStream, + }, + .{ // too long size of the second key-value pair + .data = + \\13 path=name + \\6 k=1 + \\19 linkpath=value + \\ + , + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + }, + .err = error.UnexpectedEndOfStream, + }, -fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo { - const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute; - const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute; - const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10); - if (kv_size > max_size) { - return error.InvalidPaxAttribute; - } - return .{ - .size = kv_size, - .key = data[pos_space + 1 .. pos_equals], - .value_off = pos_equals + 1, - .value_len = kv_size - pos_equals - 2, + .{ // too long size of the second key-value pair + .data = + \\13 path=name + \\19 linkpath=value + \\6 k=1 + \\ + , + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd }, + }, + }, + .{ // null in keyword is not valid + .data = "13 path=name\n" ++ "7 k\x00b=1\n", + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "name" }, + }, + .err = error.PaxNullInKeyword, + }, + .{ // null in value is not valid + .data = "23 path=name\x00with null\n", + .attrs = &[_]Attr{ + .{ .kind = .path, .err = error.PaxNullInValue }, + }, + }, + .{ // 1000 characters path + .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n", + .attrs = &[_]Attr{ + .{ .kind = .path, .value = "0123456789" ** 100 }, + }, + }, }; -} + var buffer: [1024]u8 = undefined; -test parsePaxAttribute { - const expectEqual = std.testing.expectEqual; - const expectEqualStrings = std.testing.expectEqualStrings; - const expectError = std.testing.expectError; - const prefix = "1011 path="; - const file_name = "0123456789" ** 100; - const header = prefix ++ file_name ++ "\n"; - const attr_info = try parsePaxAttribute(header, 1011); - try expectEqual(@as(usize, 1011), attr_info.size); - try expectEqualStrings("path", attr_info.key); - try expectEqual(prefix.len, attr_info.value_off); - try expectEqual(file_name.len, attr_info.value_len); - try expectEqual(attr_info, try parsePaxAttribute(header, 1012)); - try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010)); - try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0)); + outer: for (cases) |case| { + var stream = std.io.fixedBufferStream(case.data); + var iter = paxIterator(stream.reader(), case.data.len); + + var i: usize = 0; + while (iter.next() catch |err| { + if (case.err) |e| { + try std.testing.expectEqual(e, err); + continue; + } + return err; + }) |attr| : (i += 1) { + const exp = case.attrs[i]; + try std.testing.expectEqual(exp.kind, attr.kind); + const value = attr.value(&buffer) catch |err| { + if (exp.err) |e| { + try std.testing.expectEqual(e, err); + break :outer; + } + return err; + }; + try std.testing.expectEqualStrings(exp.value, value); + } + try std.testing.expectEqual(case.attrs.len, i); + try std.testing.expect(case.err == null); + } } -const std = @import("std.zig"); -const assert = std.debug.assert; +test { + _ = @import("tar/test.zig"); +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig new file mode 100644 index 000000000000..82c73e25466d --- /dev/null +++ b/lib/std/tar/test.zig @@ -0,0 +1,367 @@ +const std = @import("../std.zig"); +const tar = std.tar; +const testing = std.testing; + +test "tar run Go test cases" { + const Case = struct { + const File = struct { + name: []const u8, + size: u64 = 0, + mode: u32 = 0, + link_name: []const u8 = &[0]u8{}, + kind: tar.Header.Kind = .normal, + truncated: bool = false, // when there is no file body, just header, usefull for huge files + }; + + data: []const u8, // testdata file content + files: []const File = &[_]@This().File{}, // expected files to found in archive + chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content + err: ?anyerror = null, // parsing should fail with this error + }; + + const cases = [_]Case{ + .{ + .data = @embedFile("testdata/gnu.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/sparse-formats.tar"), + .err = error.TarUnsupportedHeader, + }, + .{ + .data = @embedFile("testdata/star.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/v7.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o444, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o444, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/pax.tar"), + .files = &[_]Case.File{ + .{ + .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .size = 7, + .mode = 0o664, + }, + .{ + .name = "a/b", + .size = 0, + .kind = .symbolic_link, + .mode = 0o777, + .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }, + }, + .chksums = &[_][]const u8{ + "3c382e8f5b6631aa2db52643912ffd4a", + }, + }, + .{ + // pax attribute don't end with \n + .data = @embedFile("testdata/pax-bad-hdr-file.tar"), + .err = error.PaxInvalidAttributeEnd, + }, + .{ + // size is in pax attribute + .data = @embedFile("testdata/pax-pos-size-file.tar"), + .files = &[_]Case.File{ + .{ + .name = "foo", + .size = 999, + .kind = .normal, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "0afb597b283fe61b5d4879669a350556", + }, + }, + .{ + // has pax records which we are not interested in + .data = @embedFile("testdata/pax-records.tar"), + .files = &[_]Case.File{ + .{ + .name = "file", + }, + }, + }, + .{ + // has global records which we are ignoring + .data = @embedFile("testdata/pax-global-records.tar"), + .files = &[_]Case.File{ + .{ + .name = "file1", + }, + .{ + .name = "file2", + }, + .{ + .name = "file3", + }, + .{ + .name = "file4", + }, + }, + }, + .{ + .data = @embedFile("testdata/nil-uid.tar"), + .files = &[_]Case.File{ + .{ + .name = "P1050238.JPG.log", + .size = 14, + .kind = .normal, + .mode = 0o664, + }, + }, + .chksums = &[_][]const u8{ + "08d504674115e77a67244beac19668f5", + }, + }, + .{ + // has xattrs and pax records which we are ignoring + .data = @embedFile("testdata/xattrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .kind = .normal, + .mode = 0o644, + }, + .{ + .name = "small2.txt", + .size = 11, + .kind = .normal, + .mode = 0o644, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/gnu-multi-hdrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "GNU2/GNU2/long-path-name", + .link_name = "GNU4/GNU4/long-linkpath-name", + .kind = .symbolic_link, + }, + }, + }, + .{ + // has gnu type D (directory) and S (sparse) blocks + .data = @embedFile("testdata/gnu-incremental.tar"), + .err = error.TarUnsupportedHeader, + }, + .{ + // should use values only from last pax header + .data = @embedFile("testdata/pax-multi-hdrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "bar", + .link_name = "PAX4/PAX4/long-linkpath-name", + .kind = .symbolic_link, + }, + }, + }, + .{ + .data = @embedFile("testdata/gnu-long-nul.tar"), + .files = &[_]Case.File{ + .{ + .name = "0123456789", + .mode = 0o644, + }, + }, + }, + .{ + .data = @embedFile("testdata/gnu-utf8.tar"), + .files = &[_]Case.File{ + .{ + .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + .mode = 0o644, + }, + }, + }, + .{ + .data = @embedFile("testdata/gnu-not-utf8.tar"), + .files = &[_]Case.File{ + .{ + .name = "hi\x80\x81\x82\x83bye", + .mode = 0o644, + }, + }, + }, + .{ + // null in pax key + .data = @embedFile("testdata/pax-nul-xattrs.tar"), + .err = error.PaxNullInKeyword, + }, + .{ + .data = @embedFile("testdata/pax-nul-path.tar"), + .err = error.PaxNullInValue, + }, + .{ + .data = @embedFile("testdata/neg-size.tar"), + .err = error.TarHeader, + }, + .{ + .data = @embedFile("testdata/issue10968.tar"), + .err = error.TarHeader, + }, + .{ + .data = @embedFile("testdata/issue11169.tar"), + .err = error.TarHeader, + }, + .{ + .data = @embedFile("testdata/issue12435.tar"), + .err = error.TarHeaderChksum, + }, + .{ + // has magic with space at end instead of null + .data = @embedFile("testdata/invalid-go17.tar"), + .files = &[_]Case.File{ + .{ + .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + }, + }, + }, + .{ + .data = @embedFile("testdata/ustar-file-devs.tar"), + .files = &[_]Case.File{ + .{ + .name = "file", + .mode = 0o644, + }, + }, + }, + .{ + .data = @embedFile("testdata/trailing-slash.tar"), + .files = &[_]Case.File{ + .{ + .name = "123456789/" ** 30, + .kind = .directory, + }, + }, + }, + .{ + // Has size in gnu extended format. To represent size bigger than 8 GB. + .data = @embedFile("testdata/writer-big.tar"), + .files = &[_]Case.File{ + .{ + .name = "tmp/16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .truncated = true, + .mode = 0o640, + }, + }, + }, + .{ + // Size in gnu extended format, and name in pax attribute. + .data = @embedFile("testdata/writer-big-long.tar"), + .files = &[_]Case.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, + .truncated = true, + }, + }, + }, + }; + + for (cases) |case| { + var fsb = std.io.fixedBufferStream(case.data); + var iter = tar.iterator(fsb.reader(), null); + var i: usize = 0; + while (iter.next() catch |err| { + if (case.err) |e| { + try testing.expectEqual(e, err); + continue; + } else { + return err; + } + }) |actual| : (i += 1) { + const expected = case.files[i]; + try testing.expectEqualStrings(expected.name, actual.name); + try testing.expectEqual(expected.size, actual.size); + try testing.expectEqual(expected.kind, actual.kind); + try testing.expectEqual(expected.mode, actual.mode); + try testing.expectEqualStrings(expected.link_name, actual.link_name); + + if (case.chksums.len > i) { + var md5writer = Md5Writer{}; + try actual.write(&md5writer); + const chksum = md5writer.chksum(); + try testing.expectEqualStrings(case.chksums[i], &chksum); + } else { + if (!expected.truncated) try actual.skip(); // skip file content + } + } + try testing.expectEqual(case.files.len, i); + } +} + +// used in test to calculate file chksum +const Md5Writer = struct { + h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), + + pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { + self.h.update(buf); + } + + pub fn writeByte(self: *Md5Writer, byte: u8) !void { + self.h.update(&[_]u8{byte}); + } + + pub fn chksum(self: *Md5Writer) [32]u8 { + var s = [_]u8{0} ** 16; + self.h.final(&s); + return std.fmt.bytesToHex(s, .lower); + } +}; diff --git a/lib/std/tar/testdata/gnu-incremental.tar b/lib/std/tar/testdata/gnu-incremental.tar new file mode 100644 index 000000000000..4c442e5b82d1 Binary files /dev/null and b/lib/std/tar/testdata/gnu-incremental.tar differ diff --git a/lib/std/tar/testdata/gnu-long-nul.tar b/lib/std/tar/testdata/gnu-long-nul.tar new file mode 100644 index 000000000000..28bc812aa60e Binary files /dev/null and b/lib/std/tar/testdata/gnu-long-nul.tar differ diff --git a/lib/std/tar/testdata/gnu-multi-hdrs.tar b/lib/std/tar/testdata/gnu-multi-hdrs.tar new file mode 100644 index 000000000000..8bcad55d06e8 Binary files /dev/null and b/lib/std/tar/testdata/gnu-multi-hdrs.tar differ diff --git a/lib/std/tar/testdata/gnu-not-utf8.tar b/lib/std/tar/testdata/gnu-not-utf8.tar new file mode 100644 index 000000000000..81cec67d3309 Binary files /dev/null and b/lib/std/tar/testdata/gnu-not-utf8.tar differ diff --git a/lib/std/tar/testdata/gnu-utf8.tar b/lib/std/tar/testdata/gnu-utf8.tar new file mode 100644 index 000000000000..2c9c8079cf65 Binary files /dev/null and b/lib/std/tar/testdata/gnu-utf8.tar differ diff --git a/lib/std/tar/testdata/gnu.tar b/lib/std/tar/testdata/gnu.tar new file mode 100644 index 000000000000..fc899dc8dc2a Binary files /dev/null and b/lib/std/tar/testdata/gnu.tar differ diff --git a/lib/std/tar/testdata/invalid-go17.tar b/lib/std/tar/testdata/invalid-go17.tar new file mode 100644 index 000000000000..58f2488e78fb Binary files /dev/null and b/lib/std/tar/testdata/invalid-go17.tar differ diff --git a/lib/std/tar/testdata/issue10968.tar b/lib/std/tar/testdata/issue10968.tar new file mode 100644 index 000000000000..1cc837bcff14 Binary files /dev/null and b/lib/std/tar/testdata/issue10968.tar differ diff --git a/lib/std/tar/testdata/issue11169.tar b/lib/std/tar/testdata/issue11169.tar new file mode 100644 index 000000000000..4d71fa152606 Binary files /dev/null and b/lib/std/tar/testdata/issue11169.tar differ diff --git a/lib/std/tar/testdata/issue12435.tar b/lib/std/tar/testdata/issue12435.tar new file mode 100644 index 000000000000..3542dd8efd5d Binary files /dev/null and b/lib/std/tar/testdata/issue12435.tar differ diff --git a/lib/std/tar/testdata/neg-size.tar b/lib/std/tar/testdata/neg-size.tar new file mode 100644 index 000000000000..21edf38cc3c3 Binary files /dev/null and b/lib/std/tar/testdata/neg-size.tar differ diff --git a/lib/std/tar/testdata/nil-uid.tar b/lib/std/tar/testdata/nil-uid.tar new file mode 100644 index 000000000000..cc9cfaa33cc5 Binary files /dev/null and b/lib/std/tar/testdata/nil-uid.tar differ diff --git a/lib/std/tar/testdata/pax-bad-hdr-file.tar b/lib/std/tar/testdata/pax-bad-hdr-file.tar new file mode 100644 index 000000000000..b97cc981f29b Binary files /dev/null and b/lib/std/tar/testdata/pax-bad-hdr-file.tar differ diff --git a/lib/std/tar/testdata/pax-global-records.tar b/lib/std/tar/testdata/pax-global-records.tar new file mode 100644 index 000000000000..3d3d241e65c3 Binary files /dev/null and b/lib/std/tar/testdata/pax-global-records.tar differ diff --git a/lib/std/tar/testdata/pax-multi-hdrs.tar b/lib/std/tar/testdata/pax-multi-hdrs.tar new file mode 100644 index 000000000000..14bc75978080 Binary files /dev/null and b/lib/std/tar/testdata/pax-multi-hdrs.tar differ diff --git a/lib/std/tar/testdata/pax-nul-path.tar b/lib/std/tar/testdata/pax-nul-path.tar new file mode 100644 index 000000000000..c78f82b16e85 Binary files /dev/null and b/lib/std/tar/testdata/pax-nul-path.tar differ diff --git a/lib/std/tar/testdata/pax-nul-xattrs.tar b/lib/std/tar/testdata/pax-nul-xattrs.tar new file mode 100644 index 000000000000..881f51768f98 Binary files /dev/null and b/lib/std/tar/testdata/pax-nul-xattrs.tar differ diff --git a/lib/std/tar/testdata/pax-pos-size-file.tar b/lib/std/tar/testdata/pax-pos-size-file.tar new file mode 100644 index 000000000000..ea5ccf916426 Binary files /dev/null and b/lib/std/tar/testdata/pax-pos-size-file.tar differ diff --git a/lib/std/tar/testdata/pax-records.tar b/lib/std/tar/testdata/pax-records.tar new file mode 100644 index 000000000000..276c211baa38 Binary files /dev/null and b/lib/std/tar/testdata/pax-records.tar differ diff --git a/lib/std/tar/testdata/pax.tar b/lib/std/tar/testdata/pax.tar new file mode 100644 index 000000000000..9bc24b6587d7 Binary files /dev/null and b/lib/std/tar/testdata/pax.tar differ diff --git a/lib/std/tar/testdata/sparse-formats.tar b/lib/std/tar/testdata/sparse-formats.tar new file mode 100644 index 000000000000..8bd4e74d50f9 Binary files /dev/null and b/lib/std/tar/testdata/sparse-formats.tar differ diff --git a/lib/std/tar/testdata/star.tar b/lib/std/tar/testdata/star.tar new file mode 100644 index 000000000000..59e2d4e60461 Binary files /dev/null and b/lib/std/tar/testdata/star.tar differ diff --git a/lib/std/tar/testdata/trailing-slash.tar b/lib/std/tar/testdata/trailing-slash.tar new file mode 100644 index 000000000000..93718b303487 Binary files /dev/null and b/lib/std/tar/testdata/trailing-slash.tar differ diff --git a/lib/std/tar/testdata/ustar-file-devs.tar b/lib/std/tar/testdata/ustar-file-devs.tar new file mode 100644 index 000000000000..146e25b79d89 Binary files /dev/null and b/lib/std/tar/testdata/ustar-file-devs.tar differ diff --git a/lib/std/tar/testdata/v7.tar b/lib/std/tar/testdata/v7.tar new file mode 100644 index 000000000000..eb65fc941072 Binary files /dev/null and b/lib/std/tar/testdata/v7.tar differ diff --git a/lib/std/tar/testdata/writer-big-long.tar b/lib/std/tar/testdata/writer-big-long.tar new file mode 100644 index 000000000000..09fc5dd3dd7f Binary files /dev/null and b/lib/std/tar/testdata/writer-big-long.tar differ diff --git a/lib/std/tar/testdata/writer-big.tar b/lib/std/tar/testdata/writer-big.tar new file mode 100644 index 000000000000..435dcbce6abc Binary files /dev/null and b/lib/std/tar/testdata/writer-big.tar differ diff --git a/lib/std/tar/testdata/xattrs.tar b/lib/std/tar/testdata/xattrs.tar new file mode 100644 index 000000000000..9701950edd1f Binary files /dev/null and b/lib/std/tar/testdata/xattrs.tar differ