ziglang · kubkon · Jun 20, 2023 · Jun 19, 2023 · Jun 19, 2023 · Jun 19, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -594,8 +594,10 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
@@ -13,6 +13,7 @@ const mem = std.mem;
 const meta = std.meta;
 
 const aarch64 = @import("../arch/aarch64/bits.zig");
+const calcUuid = @import("MachO/uuid.zig").calcUuid;
 const codegen = @import("../codegen.zig");
 const dead_strip = @import("MachO/dead_strip.zig");
 const fat = @import("MachO/fat.zig");
@@ -756,11 +757,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
     });
     try load_commands.writeBuildVersionLC(&self.base.options, lc_writer);
 
-    if (self.cold_start) {
-        std.crypto.random.bytes(&self.uuid_cmd.uuid);
-        Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
-        conformUuid(&self.uuid_cmd.uuid);
-    }
+    const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
     try lc_writer.writeStruct(self.uuid_cmd);
 
     try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer);
@@ -769,10 +766,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
         try lc_writer.writeStruct(self.codesig_cmd);
     }
 
-    try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
-
     const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
+    try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
     try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
+    try self.writeUuid(comp, uuid_cmd_offset, requires_codesig);
 
     if (codesig) |*csig| {
         try self.writeCodeSignature(comp, csig); // code signing always comes last
@@ -3510,6 +3507,16 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
     self.dysymtab_cmd.nindirectsyms = nindirectsyms;
 }
 
+fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32, has_codesig: bool) !void {
+    const file_size = if (!has_codesig) blk: {
+        const seg = self.getLinkeditSegmentPtr();
+        break :blk seg.fileoff + seg.filesize;
+    } else self.codesig_cmd.dataoff;
+    try calcUuid(comp, self.base.file.?, file_size, &self.uuid_cmd.uuid);
+    const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
+    try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset);
+}
+
 fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void {
     const seg = self.getLinkeditSegmentPtr();
     // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file

diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig
@@ -7,11 +7,10 @@ const log = std.log.scoped(.link);
 const macho = std.macho;
 const mem = std.mem;
 const testing = std.testing;
-const ThreadPool = std.Thread.Pool;
-const WaitGroup = std.Thread.WaitGroup;
 
 const Allocator = mem.Allocator;
 const Compilation = @import("../../Compilation.zig");
+const Hasher = @import("hasher.zig").ParallelHasher;
 const Sha256 = std.crypto.hash.sha2.Sha256;
 
 const hash_size = Sha256.digest_length;
@@ -289,7 +288,11 @@ pub fn writeAdhocSignature(
     self.code_directory.inner.nCodeSlots = total_pages;
 
     // Calculate hash for each page (in file) and write it to the buffer
-    try self.parallelHash(gpa, comp.thread_pool, opts.file, opts.file_size);
+    var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool };
+    try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
+        .chunk_size = self.page_size,
+        .max_file_size = opts.file_size,
+    });
 
     try blobs.append(.{ .code_directory = &self.code_directory });
     header.length += @sizeOf(macho.BlobIndex);
@@ -348,62 +351,6 @@ pub fn writeAdhocSignature(
     }
 }
 
-fn parallelHash(
-    self: *CodeSignature,
-    gpa: Allocator,
-    pool: *ThreadPool,
-    file: fs.File,
-    file_size: u32,
-) !void {
-    var wg: WaitGroup = .{};
-
-    const total_num_chunks = mem.alignForward(usize, file_size, self.page_size) / self.page_size;
-    assert(self.code_directory.code_slots.items.len >= total_num_chunks);
-
-    const buffer = try gpa.alloc(u8, self.page_size * total_num_chunks);
-    defer gpa.free(buffer);
-
-    const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
-    defer gpa.free(results);
-
-    {
-        wg.reset();
-        defer wg.wait();
-
-        var i: usize = 0;
-        while (i < total_num_chunks) : (i += 1) {
-            const fstart = i * self.page_size;
-            const fsize = if (fstart + self.page_size > file_size)
-                file_size - fstart
-            else
-                self.page_size;
-            wg.start();
-            try pool.spawn(worker, .{
-                file,
-                fstart,
-                buffer[fstart..][0..fsize],
-                &self.code_directory.code_slots.items[i],
-                &results[i],
-                &wg,
-            });
-        }
-    }
-    for (results) |result| _ = try result;
-}
-
-fn worker(
-    file: fs.File,
-    fstart: usize,
-    buffer: []u8,
-    out: *[hash_size]u8,
-    err: *fs.File.PReadError!usize,
-    wg: *WaitGroup,
-) void {
-    defer wg.finish();
-    err.* = file.preadAll(buffer, fstart);
-    Sha256.hash(buffer, out, .{});
-}
-
 pub fn size(self: CodeSignature) u32 {
     var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
     if (self.requirements) |req| {

diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
@@ -0,0 +1,71 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const ThreadPool = std.Thread.Pool;
+const WaitGroup = std.Thread.WaitGroup;
+
+pub fn ParallelHasher(comptime Hasher: type) type {
+    const hash_size = Hasher.digest_length;
+
+    return struct {
+        allocator: Allocator,
+        thread_pool: *ThreadPool,
+
+        pub fn hash(self: Self, file: fs.File, out: [][hash_size]u8, opts: struct {
+            chunk_size: u64 = 0x4000,
+            max_file_size: ?u64 = null,
+        }) !void {
+            var wg: WaitGroup = .{};
+
+            const file_size = blk: {
+                const file_size = opts.max_file_size orelse try file.getEndPos();
+                break :blk std.math.cast(usize, file_size) orelse return error.Overflow;
+            };
+            const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow;
+
+            const buffer = try self.allocator.alloc(u8, chunk_size * out.len);
+            defer self.allocator.free(buffer);
+
+            const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len);
+            defer self.allocator.free(results);
+
+            {
+                wg.reset();
+                defer wg.wait();
+
+                for (out, results, 0..) |*out_buf, *result, i| {
+                    const fstart = i * chunk_size;
+                    const fsize = if (fstart + chunk_size > file_size) file_size - fstart else chunk_size;
+                    wg.start();
+                    try self.thread_pool.spawn(worker, .{
+                        file,
+                        fstart,
+                        buffer[fstart..][0..fsize],
+                        &(out_buf.*),
+                        &(result.*),
+                        &wg,
+                    });
+                }
+            }
+            for (results) |result| _ = try result;
+        }
+
+        fn worker(
+            file: fs.File,
+            fstart: usize,
+            buffer: []u8,
+            out: *[hash_size]u8,
+            err: *fs.File.PReadError!usize,
+            wg: *WaitGroup,
+        ) void {
+            defer wg.finish();
+            err.* = file.preadAll(buffer, fstart);
+            Hasher.hash(buffer, out, .{});
+        }
+
+        const Self = @This();
+    };
+}
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
@@ -0,0 +1,45 @@
+const std = @import("std");
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Compilation = @import("../../Compilation.zig");
+const Md5 = std.crypto.hash.Md5;
+const Hasher = @import("hasher.zig").ParallelHasher;
+
+/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
+/// the final digest.
+/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
+/// and we will use it too as it seems accepted by Apple OSes.
+/// TODO LLD also hashes the output filename to disambiguate between same builds with different
+/// output files. Should we also do that?
+pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+    const num_chunks = comp.thread_pool.threads.len * 0x10;
+    const chunk_size = @divTrunc(file_size, num_chunks);
+    const actual_num_chunks = if (@rem(file_size, num_chunks) > 0) num_chunks + 1 else num_chunks;
+
+    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
+    defer comp.gpa.free(hashes);
+
+    var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool };
+    try hasher.hash(file, hashes, .{
+        .chunk_size = chunk_size,
+        .max_file_size = file_size,
+    });
+
+    const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
+    defer comp.gpa.free(final_buffer);
+
+    for (hashes, 0..) |hash, i| {
+        mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
+    }
+
+    Md5.hash(final_buffer, out, .{});
+    conform(out);
+}
+
+inline fn conform(out: *[Md5.digest_length]u8) void {
+    // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+    out[6] = (out[6] & 0x0F) | (3 << 4);
+    out[8] = (out[8] & 0x3F) | 0x80;
+}