From b3a2ab3fedfc2e3e15f9024c7334a1f53d9aa7c5 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 19 Jun 2023 10:29:14 +0200
Subject: [PATCH 1/7] macho: extract parallel hasher into a generic helper
 struct

---
 CMakeLists.txt                   |  1 +
 src/link/MachO/CodeSignature.zig | 65 +++-----------------------------
 src/link/MachO/hasher.zig        | 60 +++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 59 deletions(-)
 create mode 100644 src/link/MachO/hasher.zig

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 16c7dd3d0ef3..7a5726bdc582 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -594,6 +594,7 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"
diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig
index 02511dbe29b7..84c5b49362c8 100644
--- a/src/link/MachO/CodeSignature.zig
+++ b/src/link/MachO/CodeSignature.zig
@@ -7,11 +7,10 @@ const log = std.log.scoped(.link);
 const macho = std.macho;
 const mem = std.mem;
 const testing = std.testing;
-const ThreadPool = std.Thread.Pool;
-const WaitGroup = std.Thread.WaitGroup;
 
 const Allocator = mem.Allocator;
 const Compilation = @import("../../Compilation.zig");
+const Hasher = @import("hasher.zig").ParallelHasher;
 const Sha256 = std.crypto.hash.sha2.Sha256;
 
 const hash_size = Sha256.digest_length;
@@ -289,7 +288,11 @@ pub fn writeAdhocSignature(
     self.code_directory.inner.nCodeSlots = total_pages;
 
     // Calculate hash for each page (in file) and write it to the buffer
-    try self.parallelHash(gpa, comp.thread_pool, opts.file, opts.file_size);
+    var hasher = Hasher(Sha256){};
+    try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{
+        .chunk_size = self.page_size,
+        .max_file_size = opts.file_size,
+    });
 
     try blobs.append(.{ .code_directory = &self.code_directory });
     header.length += @sizeOf(macho.BlobIndex);
@@ -348,62 +351,6 @@ pub fn writeAdhocSignature(
     }
 }
 
-fn parallelHash(
-    self: *CodeSignature,
-    gpa: Allocator,
-    pool: *ThreadPool,
-    file: fs.File,
-    file_size: u32,
-) !void {
-    var wg: WaitGroup = .{};
-
-    const total_num_chunks = mem.alignForward(usize, file_size, self.page_size) / self.page_size;
-    assert(self.code_directory.code_slots.items.len >= total_num_chunks);
-
-    const buffer = try gpa.alloc(u8, self.page_size * total_num_chunks);
-    defer gpa.free(buffer);
-
-    const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
-    defer gpa.free(results);
-
-    {
-        wg.reset();
-        defer wg.wait();
-
-        var i: usize = 0;
-        while (i < total_num_chunks) : (i += 1) {
-            const fstart = i * self.page_size;
-            const fsize = if (fstart + self.page_size > file_size)
-                file_size - fstart
-            else
-                self.page_size;
-            wg.start();
-            try pool.spawn(worker, .{
-                file,
-                fstart,
-                buffer[fstart..][0..fsize],
-                &self.code_directory.code_slots.items[i],
-                &results[i],
-                &wg,
-            });
-        }
-    }
-    for (results) |result| _ = try result;
-}
-
-fn worker(
-    file: fs.File,
-    fstart: usize,
-    buffer: []u8,
-    out: *[hash_size]u8,
-    err: *fs.File.PReadError!usize,
-    wg: *WaitGroup,
-) void {
-    defer wg.finish();
-    err.* = file.preadAll(buffer, fstart);
-    Sha256.hash(buffer, out, .{});
-}
-
 pub fn size(self: CodeSignature) u32 {
     var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
     if (self.requirements) |req| {
diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
new file mode 100644
index 000000000000..d7bf6888b559
--- /dev/null
+++ b/src/link/MachO/hasher.zig
@@ -0,0 +1,60 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const ThreadPool = std.Thread.Pool;
+const WaitGroup = std.Thread.WaitGroup;
+
+pub fn ParallelHasher(comptime Hasher: type) type {
+    const hash_size = Hasher.digest_length;
+
+    return struct {
+        pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
+            chunk_size: u16 = 0x4000,
+            max_file_size: ?u64 = null,
+        }) !void {
+            _ = self;
+
+            var wg: WaitGroup = .{};
+
+            const file_size = opts.max_file_size orelse try file.getEndPos();
+            const total_num_chunks = mem.alignForward(u64, file_size, opts.chunk_size) / opts.chunk_size;
+            assert(out.len >= total_num_chunks);
+
+            const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks);
+            defer gpa.free(buffer);
+
+            const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
+            defer gpa.free(results);
+
+            {
+                wg.reset();
+                defer wg.wait();
+
+                var i: usize = 0;
+                while (i < total_num_chunks) : (i += 1) {
+                    const fstart = i * opts.chunk_size;
+                    const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
+                    wg.start();
+                    try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg });
+                }
+            }
+            for (results) |result| _ = try result;
+        }
+
+        fn worker(
+            file: fs.File,
+            fstart: usize,
+            buffer: []u8,
+            out: *[hash_size]u8,
+            err: *fs.File.PReadError!usize,
+            wg: *WaitGroup,
+        ) void {
+            defer wg.finish();
+            err.* = file.preadAll(buffer, fstart);
+            Hasher.hash(buffer, out, .{});
+        }
+    };
+}

From 10aaf2983d5db65082c4b348269150eddc12e67e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 19 Jun 2023 11:26:56 +0200
Subject: [PATCH 2/7] macho: hash the entire file contents for UUID but calc in
 parallel

---
 src/link/MachO.zig      |  19 +++--
 src/link/MachO/uuid.zig |  46 +++++++++++
 src/link/MachO/zld.zig  | 168 +++-------------------------------------
 3 files changed, 67 insertions(+), 166 deletions(-)
 create mode 100644 src/link/MachO/uuid.zig

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 024fe1f8d9dc..18fb37babfd1 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -13,6 +13,7 @@ const mem = std.mem;
 const meta = std.meta;
 
 const aarch64 = @import("../arch/aarch64/bits.zig");
+const calcUuid = @import("MachO/uuid.zig").calcUuid;
 const codegen = @import("../codegen.zig");
 const dead_strip = @import("MachO/dead_strip.zig");
 const fat = @import("MachO/fat.zig");
@@ -756,11 +757,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
     });
     try load_commands.writeBuildVersionLC(&self.base.options, lc_writer);
 
-    if (self.cold_start) {
-        std.crypto.random.bytes(&self.uuid_cmd.uuid);
-        Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
-        conformUuid(&self.uuid_cmd.uuid);
-    }
+    const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
     try lc_writer.writeStruct(self.uuid_cmd);
 
     try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer);
@@ -769,10 +766,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
         try lc_writer.writeStruct(self.codesig_cmd);
     }
 
-    try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
-
     const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
+    try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
     try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
+    try self.writeUuid(comp, uuid_cmd_offset);
 
     if (codesig) |*csig| {
         try self.writeCodeSignature(comp, csig); // code signing always comes last
@@ -3510,6 +3507,14 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
     self.dysymtab_cmd.nindirectsyms = nindirectsyms;
 }
 
+fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32) !void {
+    const seg = self.getLinkeditSegmentPtr();
+    const file_size = seg.fileoff + seg.filesize;
+    try calcUuid(comp, self.base.file.?, file_size, &self.uuid_cmd.uuid);
+    const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
+    try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset);
+}
+
 fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void {
     const seg = self.getLinkeditSegmentPtr();
     // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
new file mode 100644
index 000000000000..486bf43b997e
--- /dev/null
+++ b/src/link/MachO/uuid.zig
@@ -0,0 +1,46 @@
+const std = @import("std");
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Compilation = @import("../../Compilation.zig");
+const Md5 = std.crypto.hash.Md5;
+const Hasher = @import("hasher.zig").ParallelHasher;
+
+/// Somewhat random chunk size for MD5 hash calculation.
+pub const chunk_size = 0x4000;
+
+/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
+/// the final digest.
+/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
+/// and we will use it too as it seems accepted by Apple OSes.
+/// TODO LLD also hashes the output filename to disambiguate between same builds with different
+/// output files. Should we also do that?
+pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+    const total_hashes = mem.alignForward(u64, file_size, chunk_size) / chunk_size;
+
+    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
+    defer comp.gpa.free(hashes);
+
+    var hasher = Hasher(Md5){};
+    try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
+        .chunk_size = chunk_size,
+        .max_file_size = file_size,
+    });
+
+    const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
+    defer comp.gpa.free(final_buffer);
+
+    for (hashes, 0..) |hash, i| {
+        mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
+    }
+
+    Md5.hash(final_buffer, out, .{});
+    conform(out);
+}
+
+inline fn conform(out: *[Md5.digest_length]u8) void {
+    // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+    out[6] = (out[6] & 0x0F) | (3 << 4);
+    out[8] = (out[8] & 0x3F) | 0x80;
+}
diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig
index 7902d67d87e5..a938a1cf9092 100644
--- a/src/link/MachO/zld.zig
+++ b/src/link/MachO/zld.zig
@@ -9,14 +9,15 @@ const math = std.math;
 const mem = std.mem;
 
 const aarch64 = @import("../../arch/aarch64/bits.zig");
+const calcUuid = @import("uuid.zig").calcUuid;
 const dead_strip = @import("dead_strip.zig");
 const eh_frame = @import("eh_frame.zig");
 const fat = @import("fat.zig");
 const link = @import("../../link.zig");
 const load_commands = @import("load_commands.zig");
+const stub_helpers = @import("stubs.zig");
 const thunks = @import("thunks.zig");
 const trace = @import("../../tracy.zig").trace;
-const stub_helpers = @import("stubs.zig");
 
 const Allocator = mem.Allocator;
 const Archive = @import("Archive.zig");
@@ -2575,150 +2576,12 @@ pub const Zld = struct {
         self.dysymtab_cmd.nindirectsyms = nindirectsyms;
     }
 
-    fn writeUuid(self: *Zld, comp: *const Compilation, args: struct {
-        linkedit_cmd_offset: u32,
-        symtab_cmd_offset: u32,
-        uuid_cmd_offset: u32,
-        codesig_cmd_offset: ?u32,
-    }) !void {
-        _ = comp;
-        switch (self.options.optimize_mode) {
-            .Debug => {
-                // In Debug we don't really care about reproducibility, so put in a random value
-                // and be done with it.
-                std.crypto.random.bytes(&self.uuid_cmd.uuid);
-                Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
-                conformUuid(&self.uuid_cmd.uuid);
-            },
-            else => {
-                // We set the max file size to the actual strtab buffer length to exclude any strtab padding.
-                const max_file_end = @intCast(u32, self.symtab_cmd.stroff + self.strtab.buffer.items.len);
-
-                const FileSubsection = struct {
-                    start: u32,
-                    end: u32,
-                };
-
-                var subsections: [5]FileSubsection = undefined;
-                var count: usize = 0;
-
-                // Exclude LINKEDIT segment command as it contains file size that includes stabs contribution
-                // and code signature.
-                subsections[count] = .{
-                    .start = 0,
-                    .end = args.linkedit_cmd_offset,
-                };
-                count += 1;
-
-                // Exclude SYMTAB and DYSYMTAB commands for the same reason.
-                subsections[count] = .{
-                    .start = subsections[count - 1].end + @sizeOf(macho.segment_command_64),
-                    .end = args.symtab_cmd_offset,
-                };
-                count += 1;
-
-                // Exclude CODE_SIGNATURE command (if present).
-                if (args.codesig_cmd_offset) |offset| {
-                    subsections[count] = .{
-                        .start = subsections[count - 1].end + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command),
-                        .end = offset,
-                    };
-                    count += 1;
-                }
-
-                if (!self.options.strip) {
-                    // Exclude region comprising all symbol stabs.
-                    const nlocals = self.dysymtab_cmd.nlocalsym;
-
-                    const locals = try self.gpa.alloc(macho.nlist_64, nlocals);
-                    defer self.gpa.free(locals);
-
-                    const locals_buf = @ptrCast([*]u8, locals.ptr)[0 .. @sizeOf(macho.nlist_64) * nlocals];
-                    const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff);
-                    if (amt != locals_buf.len) return error.InputOutput;
-
-                    const istab: usize = for (locals, 0..) |local, i| {
-                        if (local.stab()) break i;
-                    } else locals.len;
-                    const nstabs = locals.len - istab;
-
-                    if (nstabs == 0) {
-                        subsections[count] = .{
-                            .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null)
-                                @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
-                            else
-                                @sizeOf(macho.linkedit_data_command),
-                            .end = max_file_end,
-                        };
-                        count += 1;
-                    } else {
-                        // Exclude a subsection of the strtab with names of the stabs.
-                        // We do not care about anything succeeding strtab as it is the code signature data which is
-                        // not part of the UUID calculation anyway.
-                        const stab_stroff = locals[istab].n_strx;
-
-                        subsections[count] = .{
-                            .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null)
-                                @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
-                            else
-                                @sizeOf(macho.linkedit_data_command),
-                            .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)),
-                        };
-                        count += 1;
-
-                        subsections[count] = .{
-                            .start = subsections[count - 1].end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)),
-                            .end = self.symtab_cmd.stroff + stab_stroff,
-                        };
-                        count += 1;
-                    }
-                } else {
-                    subsections[count] = .{
-                        .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null)
-                            @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
-                        else
-                            @sizeOf(macho.linkedit_data_command),
-                        .end = max_file_end,
-                    };
-                    count += 1;
-                }
-
-                const chunk_size = 0x4000;
-
-                var hasher = Md5.init(.{});
-                var buffer: [chunk_size]u8 = undefined;
-
-                for (subsections[0..count]) |cut| {
-                    const size = cut.end - cut.start;
-                    const num_chunks = mem.alignForward(usize, size, chunk_size) / chunk_size;
-
-                    var i: usize = 0;
-                    while (i < num_chunks) : (i += 1) {
-                        const fstart = cut.start + i * chunk_size;
-                        const fsize = if (fstart + chunk_size > cut.end)
-                            cut.end - fstart
-                        else
-                            chunk_size;
-                        const amt = try self.file.preadAll(buffer[0..fsize], fstart);
-                        if (amt != fsize) return error.InputOutput;
-
-                        hasher.update(buffer[0..fsize]);
-                    }
-                }
-
-                hasher.final(&self.uuid_cmd.uuid);
-                conformUuid(&self.uuid_cmd.uuid);
-            },
-        }
-
-        const in_file = args.uuid_cmd_offset + @sizeOf(macho.load_command);
-        try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file);
-    }
-
-    inline fn conformUuid(out: *[Md5.digest_length]u8) void {
-        // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
-        out[6] = (out[6] & 0x0F) | (3 << 4);
-        out[8] = (out[8] & 0x3F) | 0x80;
+    fn writeUuid(self: *Zld, comp: *const Compilation, uuid_cmd_offset: u32) !void {
+        const seg = self.getLinkeditSegmentPtr();
+        const file_size = seg.fileoff + seg.filesize;
+        try calcUuid(comp, self.file, file_size, &self.uuid_cmd.uuid);
+        const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
+        try self.file.pwriteAll(&self.uuid_cmd.uuid, offset);
     }
 
     fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void {
@@ -4041,16 +3904,11 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
         const lc_writer = lc_buffer.writer();
 
         try zld.writeSegmentHeaders(lc_writer);
-        const linkedit_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len - @sizeOf(macho.segment_command_64));
-
         try lc_writer.writeStruct(zld.dyld_info_cmd);
         try lc_writer.writeStruct(zld.function_starts_cmd);
         try lc_writer.writeStruct(zld.data_in_code_cmd);
-
-        const symtab_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
         try lc_writer.writeStruct(zld.symtab_cmd);
         try lc_writer.writeStruct(zld.dysymtab_cmd);
-
         try load_commands.writeDylinkerLC(lc_writer);
 
         if (zld.options.output_mode == .Exe) {
@@ -4088,22 +3946,14 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
 
         try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), lc_writer);
 
-        var codesig_cmd_offset: ?u32 = null;
         if (requires_codesig) {
-            codesig_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
             try lc_writer.writeStruct(zld.codesig_cmd);
         }
 
         const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
         try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
         try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
-
-        try zld.writeUuid(comp, .{
-            .linkedit_cmd_offset = linkedit_cmd_offset,
-            .symtab_cmd_offset = symtab_cmd_offset,
-            .uuid_cmd_offset = uuid_cmd_offset,
-            .codesig_cmd_offset = codesig_cmd_offset,
-        });
+        try zld.writeUuid(comp, uuid_cmd_offset);
 
         if (codesig) |*csig| {
             try zld.writeCodeSignature(comp, csig); // code signing always comes last

From c2554cf0f17668659d0b898fcb43b3efb8694d3a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 19 Jun 2023 11:33:06 +0200
Subject: [PATCH 3/7] link-test: remove now obsolete UUID test for MachO

---
 test/link.zig                  |   9 ---
 test/link/macho/uuid/build.zig | 144 ---------------------------------
 test/link/macho/uuid/test.c    |   2 -
 3 files changed, 155 deletions(-)
 delete mode 100644 test/link/macho/uuid/build.zig
 delete mode 100644 test/link/macho/uuid/test.c

diff --git a/test/link.zig b/test/link.zig
index b8857a1e5e29..6f8134e4f410 100644
--- a/test/link.zig
+++ b/test/link.zig
@@ -164,15 +164,6 @@ pub const cases = [_]Case{
         .build_root = "test/link/macho/unwind_info",
         .import = @import("link/macho/unwind_info/build.zig"),
     },
-    // TODO: re-enable this test. It currently has some incompatibilities with
-    // the new build system API. In particular, it depends on installing the build
-    // artifacts, which should be unnecessary, and it has a custom build step that
-    // prints directly to stderr instead of failing the step with an error message.
-    //.{
-    //    .build_root = "test/link/macho/uuid",
-    //    .import = @import("link/macho/uuid/build.zig"),
-    //},
-
     .{
         .build_root = "test/link/macho/weak_library",
         .import = @import("link/macho/weak_library/build.zig"),
diff --git a/test/link/macho/uuid/build.zig b/test/link/macho/uuid/build.zig
deleted file mode 100644
index f2ef6b33ec5f..000000000000
--- a/test/link/macho/uuid/build.zig
+++ /dev/null
@@ -1,144 +0,0 @@
-const std = @import("std");
-const FileSource = std.Build.FileSource;
-const Step = std.Build.Step;
-
-pub const requires_symlinks = true;
-
-pub fn build(b: *std.Build) void {
-    const test_step = b.step("test", "Test");
-    b.default_step = test_step;
-
-    // We force cross-compilation to ensure we always pick a generic CPU with
-    // constant set of CPU features.
-    const aarch64_macos = std.zig.CrossTarget{
-        .cpu_arch = .aarch64,
-        .os_tag = .macos,
-    };
-
-    testUuid(b, test_step, .ReleaseSafe, aarch64_macos);
-    testUuid(b, test_step, .ReleaseFast, aarch64_macos);
-    testUuid(b, test_step, .ReleaseSmall, aarch64_macos);
-
-    const x86_64_macos = std.zig.CrossTarget{
-        .cpu_arch = .x86_64,
-        .os_tag = .macos,
-    };
-
-    testUuid(b, test_step, .ReleaseSafe, x86_64_macos);
-    testUuid(b, test_step, .ReleaseFast, x86_64_macos);
-    testUuid(b, test_step, .ReleaseSmall, x86_64_macos);
-}
-
-fn testUuid(
-    b: *std.Build,
-    test_step: *std.Build.Step,
-    optimize: std.builtin.OptimizeMode,
-    target: std.zig.CrossTarget,
-) void {
-    // The calculated UUID value is independent of debug info and so it should
-    // stay the same across builds.
-    {
-        const dylib = simpleDylib(b, optimize, target);
-        const install_step = b.addInstallArtifact(dylib);
-        install_step.dest_sub_path = "test1.dylib";
-        install_step.step.dependOn(&dylib.step);
-    }
-    {
-        const dylib = simpleDylib(b, optimize, target);
-        dylib.strip = true;
-        const install_step = b.addInstallArtifact(dylib);
-        install_step.dest_sub_path = "test2.dylib";
-        install_step.step.dependOn(&dylib.step);
-    }
-
-    const cmp_step = CompareUuid.create(b, "test1.dylib", "test2.dylib");
-    test_step.dependOn(&cmp_step.step);
-}
-
-fn simpleDylib(
-    b: *std.Build,
-    optimize: std.builtin.OptimizeMode,
-    target: std.zig.CrossTarget,
-) *std.Build.Step.Compile {
-    const dylib = b.addSharedLibrary(.{
-        .name = "test",
-        .version = .{ .major = 1, .minor = 0, .patch = 0 },
-        .optimize = optimize,
-        .target = target,
-    });
-    dylib.addCSourceFile("test.c", &.{});
-    dylib.linkLibC();
-    return dylib;
-}
-
-const CompareUuid = struct {
-    pub const base_id = .custom;
-
-    step: Step,
-    lhs: []const u8,
-    rhs: []const u8,
-
-    pub fn create(owner: *std.Build, lhs: []const u8, rhs: []const u8) *CompareUuid {
-        const self = owner.allocator.create(CompareUuid) catch @panic("OOM");
-        self.* = CompareUuid{
-            .step = Step.init(.{
-                .id = base_id,
-                .name = owner.fmt("compare uuid: {s} and {s}", .{
-                    lhs,
-                    rhs,
-                }),
-                .owner = owner,
-                .makeFn = make,
-            }),
-            .lhs = lhs,
-            .rhs = rhs,
-        };
-        return self;
-    }
-
-    fn make(step: *Step, prog_node: *std.Progress.Node) anyerror!void {
-        _ = prog_node;
-        const b = step.owner;
-        const self = @fieldParentPtr(CompareUuid, "step", step);
-        const gpa = b.allocator;
-
-        var lhs_uuid: [16]u8 = undefined;
-        const lhs_path = b.getInstallPath(.lib, self.lhs);
-        try parseUuid(gpa, lhs_path, &lhs_uuid);
-
-        var rhs_uuid: [16]u8 = undefined;
-        const rhs_path = b.getInstallPath(.lib, self.rhs);
-        try parseUuid(gpa, rhs_path, &rhs_uuid);
-
-        try std.testing.expectEqualStrings(&lhs_uuid, &rhs_uuid);
-    }
-
-    fn parseUuid(gpa: std.mem.Allocator, path: []const u8, uuid: *[16]u8) anyerror!void {
-        const max_bytes: usize = 20 * 1024 * 1024;
-        const data = try std.fs.cwd().readFileAllocOptions(
-            gpa,
-            path,
-            max_bytes,
-            null,
-            @alignOf(u64),
-            null,
-        );
-        var stream = std.io.fixedBufferStream(data);
-        const reader = stream.reader();
-
-        const hdr = try reader.readStruct(std.macho.mach_header_64);
-        if (hdr.magic != std.macho.MH_MAGIC_64) {
-            return error.InvalidMagicNumber;
-        }
-
-        var it = std.macho.LoadCommandIterator{
-            .ncmds = hdr.ncmds,
-            .buffer = data[@sizeOf(std.macho.mach_header_64)..][0..hdr.sizeofcmds],
-        };
-        const cmd = while (it.next()) |cmd| switch (cmd.cmd()) {
-            .UUID => break cmd.cast(std.macho.uuid_command).?,
-            else => {},
-        } else return error.UuidLoadCommandNotFound;
-        std.mem.copy(u8, uuid, &cmd.uuid);
-    }
-};
diff --git a/test/link/macho/uuid/test.c b/test/link/macho/uuid/test.c
deleted file mode 100644
index 6f23a1a92627..000000000000
--- a/test/link/macho/uuid/test.c
+++ /dev/null
@@ -1,2 +0,0 @@
-void test() {}
-

From 8087c134dbeaa2925948597883d6a401f251a716 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 19 Jun 2023 12:53:26 +0200
Subject: [PATCH 4/7] macho: calculate UUID chunk size based on available
 thread count

---
 CMakeLists.txt          | 1 +
 src/link/MachO/uuid.zig | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a5726bdc582..470be240c4e3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -597,6 +597,7 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
index 486bf43b997e..4c1b6a17eed1 100644
--- a/src/link/MachO/uuid.zig
+++ b/src/link/MachO/uuid.zig
@@ -7,9 +7,6 @@ const Compilation = @import("../../Compilation.zig");
 const Md5 = std.crypto.hash.Md5;
 const Hasher = @import("hasher.zig").ParallelHasher;
 
-/// Somewhat random chunk size for MD5 hash calculation.
-pub const chunk_size = 0x4000;
-
 /// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
 /// the final digest.
 /// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
@@ -17,6 +14,8 @@ pub const chunk_size = 0x4000;
 /// TODO LLD also hashes the output filename to disambiguate between same builds with different
 /// output files. Should we also do that?
 pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+    const num_chunks = @intCast(u64, comp.thread_pool.threads.len) * 10;
+    const chunk_size = @divTrunc(file_size + num_chunks - 1, num_chunks);
     const total_hashes = mem.alignForward(u64, file_size, chunk_size) / chunk_size;
 
     const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);

From ef9d6331fc9067f7ba47eccee204fa2f0c5d0a18 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 19 Jun 2023 20:33:27 +0200
Subject: [PATCH 5/7] macho: clean up hasher interface

---
 src/link/MachO/CodeSignature.zig |  4 ++--
 src/link/MachO/hasher.zig        | 33 +++++++++++++++++++-------------
 src/link/MachO/uuid.zig          | 11 +++++------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig
index 84c5b49362c8..fcb4c1606353 100644
--- a/src/link/MachO/CodeSignature.zig
+++ b/src/link/MachO/CodeSignature.zig
@@ -288,8 +288,8 @@ pub fn writeAdhocSignature(
     self.code_directory.inner.nCodeSlots = total_pages;
 
     // Calculate hash for each page (in file) and write it to the buffer
-    var hasher = Hasher(Sha256){};
-    try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{
+    var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool };
+    try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
         .chunk_size = self.page_size,
         .max_file_size = opts.file_size,
     });
diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
index d7bf6888b559..5cdba0b5278f 100644
--- a/src/link/MachO/hasher.zig
+++ b/src/link/MachO/hasher.zig
@@ -11,34 +11,39 @@ pub fn ParallelHasher(comptime Hasher: type) type {
     const hash_size = Hasher.digest_length;
 
     return struct {
-        pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
-            chunk_size: u16 = 0x4000,
+        allocator: Allocator,
+        thread_pool: *ThreadPool,
+
+        pub fn hash(self: Self, file: fs.File, out: [][hash_size]u8, opts: struct {
+            chunk_size: u64 = 0x4000,
             max_file_size: ?u64 = null,
         }) !void {
-            _ = self;
-
             var wg: WaitGroup = .{};
 
             const file_size = opts.max_file_size orelse try file.getEndPos();
-            const total_num_chunks = mem.alignForward(u64, file_size, opts.chunk_size) / opts.chunk_size;
-            assert(out.len >= total_num_chunks);
 
-            const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks);
-            defer gpa.free(buffer);
+            const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len);
+            defer self.allocator.free(buffer);
 
-            const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
-            defer gpa.free(results);
+            const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len);
+            defer self.allocator.free(results);
 
             {
                 wg.reset();
                 defer wg.wait();
 
-                var i: usize = 0;
-                while (i < total_num_chunks) : (i += 1) {
+                for (out, results, 0..) |*out_buf, *result, i| {
                     const fstart = i * opts.chunk_size;
                     const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
                     wg.start();
-                    try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg });
+                    try self.thread_pool.spawn(worker, .{
+                        file,
+                        fstart,
+                        buffer[fstart..][0..fsize],
+                        &(out_buf.*),
+                        &(result.*),
+                        &wg,
+                    });
                 }
             }
             for (results) |result| _ = try result;
@@ -56,5 +61,7 @@ pub fn ParallelHasher(comptime Hasher: type) type {
             err.* = file.preadAll(buffer, fstart);
             Hasher.hash(buffer, out, .{});
         }
+
+        const Self = @This();
     };
 }
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
index 4c1b6a17eed1..0770702dd75f 100644
--- a/src/link/MachO/uuid.zig
+++ b/src/link/MachO/uuid.zig
@@ -14,20 +14,19 @@ const Hasher = @import("hasher.zig").ParallelHasher;
 /// TODO LLD also hashes the output filename to disambiguate between same builds with different
 /// output files. Should we also do that?
 pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
-    const num_chunks = @intCast(u64, comp.thread_pool.threads.len) * 10;
+    const num_chunks = comp.thread_pool.threads.len * 0x10;
     const chunk_size = @divTrunc(file_size + num_chunks - 1, num_chunks);
-    const total_hashes = mem.alignForward(u64, file_size, chunk_size) / chunk_size;
 
-    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
+    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, num_chunks);
     defer comp.gpa.free(hashes);
 
-    var hasher = Hasher(Md5){};
-    try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
+    var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool };
+    try hasher.hash(file, hashes, .{
         .chunk_size = chunk_size,
         .max_file_size = file_size,
     });
 
-    const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
+    const final_buffer = try comp.gpa.alloc(u8, num_chunks * Md5.digest_length);
     defer comp.gpa.free(final_buffer);
 
     for (hashes, 0..) |hash, i| {

From 22540e5402799d1c4ee12b5163744cf7431a4c2c Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 19 Jun 2023 22:28:03 +0200
Subject: [PATCH 6/7] macho: exclude code signature padding from uuid
 calculation

---
 src/link/MachO.zig      | 10 ++++++----
 src/link/MachO/uuid.zig |  7 ++++---
 src/link/MachO/zld.zig  | 10 ++++++----
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 18fb37babfd1..c91d18b0f731 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -769,7 +769,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
     const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
     try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
     try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
-    try self.writeUuid(comp, uuid_cmd_offset);
+    try self.writeUuid(comp, uuid_cmd_offset, requires_codesig);
 
     if (codesig) |*csig| {
         try self.writeCodeSignature(comp, csig); // code signing always comes last
@@ -3507,9 +3507,11 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
     self.dysymtab_cmd.nindirectsyms = nindirectsyms;
 }
 
-fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32) !void {
-    const seg = self.getLinkeditSegmentPtr();
-    const file_size = seg.fileoff + seg.filesize;
+fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void {
+    const file_size = if (!has_codesig) blk: {
+        const seg = self.getLinkeditSegmentPtr();
+        break :blk seg.fileoff + seg.filesize;
+    } else self.codesig_cmd.dataoff;
     try calcUuid(comp, self.base.file.?, file_size, &self.uuid_cmd.uuid);
     const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
     try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset);
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
index 0770702dd75f..8cef0693bf39 100644
--- a/src/link/MachO/uuid.zig
+++ b/src/link/MachO/uuid.zig
@@ -15,9 +15,10 @@ const Hasher = @import("hasher.zig").ParallelHasher;
 /// output files. Should we also do that?
 pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
     const num_chunks = comp.thread_pool.threads.len * 0x10;
-    const chunk_size = @divTrunc(file_size + num_chunks - 1, num_chunks);
+    const chunk_size = @divTrunc(file_size, num_chunks);
+    const actual_num_chunks = if (@rem(file_size, num_chunks) > 0) num_chunks + 1 else num_chunks;
 
-    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, num_chunks);
+    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
     defer comp.gpa.free(hashes);
 
     var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool };
@@ -26,7 +27,7 @@ pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[
         .max_file_size = file_size,
     });
 
-    const final_buffer = try comp.gpa.alloc(u8, num_chunks * Md5.digest_length);
+    const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
     defer comp.gpa.free(final_buffer);
 
     for (hashes, 0..) |hash, i| {
diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig
index a938a1cf9092..6087035562ed 100644
--- a/src/link/MachO/zld.zig
+++ b/src/link/MachO/zld.zig
@@ -2576,9 +2576,11 @@ pub const Zld = struct {
         self.dysymtab_cmd.nindirectsyms = nindirectsyms;
     }
 
-    fn writeUuid(self: *Zld, comp: *const Compilation, uuid_cmd_offset: u32) !void {
-        const seg = self.getLinkeditSegmentPtr();
-        const file_size = seg.fileoff + seg.filesize;
+    fn writeUuid(self: *Zld, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void {
+        const file_size = if (!has_codesig) blk: {
+            const seg = self.getLinkeditSegmentPtr();
+            break :blk seg.fileoff + seg.filesize;
+        } else self.codesig_cmd.dataoff;
         try calcUuid(comp, self.file, file_size, &self.uuid_cmd.uuid);
         const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
         try self.file.pwriteAll(&self.uuid_cmd.uuid, offset);
@@ -3953,7 +3955,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
         const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
         try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
         try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
-        try zld.writeUuid(comp, uuid_cmd_offset);
+        try zld.writeUuid(comp, uuid_cmd_offset, requires_codesig);
 
         if (codesig) |*csig| {
             try zld.writeCodeSignature(comp, csig); // code signing always comes last

From eb1050b83aecc3b681901613eeb316030f08ad12 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 20 Jun 2023 10:08:54 +0200
Subject: [PATCH 7/7] macho: fix 32bit compilation issues

---
 src/link/MachO/hasher.zig | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
index 5cdba0b5278f..40c034c90cec 100644
--- a/src/link/MachO/hasher.zig
+++ b/src/link/MachO/hasher.zig
@@ -20,9 +20,13 @@ pub fn ParallelHasher(comptime Hasher: type) type {
         }) !void {
             var wg: WaitGroup = .{};
 
-            const file_size = opts.max_file_size orelse try file.getEndPos();
+            const file_size = blk: {
+                const file_size = opts.max_file_size orelse try file.getEndPos();
+                break :blk std.math.cast(usize, file_size) orelse return error.Overflow;
+            };
+            const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow;
 
-            const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len);
+            const buffer = try self.allocator.alloc(u8, chunk_size * out.len);
             defer self.allocator.free(buffer);
 
             const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len);
@@ -33,8 +37,8 @@ pub fn ParallelHasher(comptime Hasher: type) type {
                 defer wg.wait();
 
                 for (out, results, 0..) |*out_buf, *result, i| {
-                    const fstart = i * opts.chunk_size;
-                    const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
+                    const fstart = i * chunk_size;
+                    const fsize = if (fstart + chunk_size > file_size) file_size - fstart else chunk_size;
                     wg.start();
                     try self.thread_pool.spawn(worker, .{
                         file,