Skip to content

Commit 52a0f49

Browse files
committed
Support fetching dependencies over git+http(s)
Closes #14298 This commit adds support for fetching dependencies over git+http(s) using a minimal implementation of the Git protocols and formats relevant to fetching repository data. Git URLs can be specified in `build.zig.zon` as follows: ```zig .xml = .{ .url = "git+https://github.com/ianprime0509/zig-xml#7380d59d50f1cd8460fd748b5f6f179306679e2f", .hash = "122085c1e4045fa9cb69632ff771c56acdb6760f34ca5177e80f70b0b92cd80da3e9", }, ``` The fragment part of the URL may specify a commit ID (SHA1 hash), branch name, or tag. It is an error to omit the fragment: if this happens, the compiler will prompt the user to add it, using the commit ID of the HEAD commit of the repository (that is, the latest commit of the default branch): ``` Fetch Packages... xml... /var/home/ian/src/zig-gobject/build.zig.zon:6:20: error: url field is missing an explicit ref .url = "git+https://github.com/ianprime0509/zig-xml", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ note: try .url = "git+https://github.com/ianprime0509/zig-xml#dfdc044f3271641c7d428dc8ec8cd46423d8b8b6", ``` This implementation currently supports only version 2 of Git's wire protocol (documented in [protocol-v2](https://git-scm.com/docs/protocol-v2)), which was first introduced in Git 2.19 (2018) and made the default in 2.26 (2020). The wire protocol behaves similarly when used over other transports, such as SSH and the "Git protocol" (git:// URLs), so it should be reasonably straightforward to support fetching dependencies from such URLs if the necessary transports are implemented (e.g. #14295).
1 parent acac685 commit 52a0f49

File tree

4 files changed

+1619
-58
lines changed

4 files changed

+1619
-58
lines changed

src/Package.zig

Lines changed: 198 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const Module = @import("Module.zig");
1717
const Cache = std.Build.Cache;
1818
const build_options = @import("build_options");
1919
const Manifest = @import("Manifest.zig");
20+
const git = @import("git.zig");
2021

2122
pub const Table = std.StringHashMapUnmanaged(*Package);
2223

@@ -647,65 +648,12 @@ fn fetchAndUnpack(
647648
};
648649
defer tmp_directory.closeAndFree(gpa);
649650

650-
var h = std.http.Headers{ .allocator = gpa };
651-
defer h.deinit();
652-
653-
var req = try http_client.request(.GET, uri, h, .{});
654-
defer req.deinit();
655-
656-
try req.start(.{});
657-
try req.wait();
658-
659-
if (req.response.status != .ok) {
660-
return report.fail(dep.url_tok, "Expected response status '200 OK' got '{} {s}'", .{
661-
@intFromEnum(req.response.status),
662-
req.response.status.phrase() orelse "",
663-
});
664-
}
665-
666-
const content_type = req.response.headers.getFirstValue("Content-Type") orelse
667-
return report.fail(dep.url_tok, "Missing 'Content-Type' header", .{});
668-
669-
var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{
670-
.child_reader = req.reader(),
671-
.prog_node = &pkg_prog_node,
672-
.unit = if (req.response.content_length) |content_length| unit: {
673-
const kib = content_length / 1024;
674-
const mib = kib / 1024;
675-
if (mib > 0) {
676-
pkg_prog_node.setEstimatedTotalItems(@intCast(mib));
677-
pkg_prog_node.setUnit("MiB");
678-
break :unit .mib;
679-
} else {
680-
pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib)));
681-
pkg_prog_node.setUnit("KiB");
682-
break :unit .kib;
683-
}
684-
} else .any,
685-
};
686-
pkg_prog_node.context.refresh();
687-
688-
if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
689-
ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
690-
ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
691-
{
692-
// I observed the gzip stream to read 1 byte at a time, so I am using a
693-
// buffered reader on the front of it.
694-
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
695-
} else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) {
696-
// I have not checked what buffer sizes the xz decompression implementation uses
697-
// by default, so the same logic applies for buffering the reader as for gzip.
698-
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz);
699-
} else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) {
700-
// support gitlab tarball urls such as https://gitlab.com/<namespace>/<project>/-/archive/<sha>/<project>-<sha>.tar.gz
701-
// whose content-disposition header is: 'attachment; filename="<project>-<sha>.tar.gz"'
702-
const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse
703-
return report.fail(dep.url_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{});
704-
if (isTarAttachment(content_disposition)) {
705-
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
706-
} else return report.fail(dep.url_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
651+
if (mem.eql(u8, uri.scheme, "http") or mem.eql(u8, uri.scheme, "https")) {
652+
try fetchHttp(http_client, tmp_directory, dep, uri, report, &pkg_prog_node);
653+
} else if (mem.eql(u8, uri.scheme, "git+http") or mem.eql(u8, uri.scheme, "git+https")) {
654+
try fetchGit(http_client, tmp_directory, dep, uri, report, &pkg_prog_node);
707655
} else {
708-
return report.fail(dep.url_tok, "Unsupported 'Content-Type' header value: '{s}'", .{content_type});
656+
return report.fail(dep.url_tok, "unsupported URL scheme: {s}", .{uri.scheme});
709657
}
710658

711659
// Download completed - stop showing downloaded amount as progress
@@ -776,6 +724,198 @@ fn fetchAndUnpack(
776724
};
777725
}
778726

727+
fn fetchHttp(
728+
http_client: *std.http.Client,
729+
tmp_directory: Compilation.Directory,
730+
dep: Manifest.Dependency,
731+
uri: std.Uri,
732+
report: Report,
733+
pkg_prog_node: *std.Progress.Node,
734+
) !void {
735+
const gpa = http_client.allocator;
736+
737+
var h = std.http.Headers{ .allocator = gpa };
738+
defer h.deinit();
739+
740+
var req = try http_client.request(.GET, uri, h, .{});
741+
defer req.deinit();
742+
743+
try req.start(.{});
744+
try req.wait();
745+
746+
if (req.response.status != .ok) {
747+
return report.fail(dep.url_tok, "Expected response status '200 OK' got '{} {s}'", .{
748+
@intFromEnum(req.response.status),
749+
req.response.status.phrase() orelse "",
750+
});
751+
}
752+
753+
const content_type = req.response.headers.getFirstValue("Content-Type") orelse
754+
return report.fail(dep.url_tok, "Missing 'Content-Type' header", .{});
755+
756+
var prog_reader: ProgressReader(std.http.Client.Request.Reader) = .{
757+
.child_reader = req.reader(),
758+
.prog_node = pkg_prog_node,
759+
.unit = if (req.response.content_length) |content_length| unit: {
760+
const kib = content_length / 1024;
761+
const mib = kib / 1024;
762+
if (mib > 0) {
763+
pkg_prog_node.setEstimatedTotalItems(@intCast(mib));
764+
pkg_prog_node.setUnit("MiB");
765+
break :unit .mib;
766+
} else {
767+
pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib)));
768+
pkg_prog_node.setUnit("KiB");
769+
break :unit .kib;
770+
}
771+
} else .any,
772+
};
773+
pkg_prog_node.context.refresh();
774+
775+
if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
776+
ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
777+
ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
778+
{
779+
// I observed the gzip stream to read 1 byte at a time, so I am using a
780+
// buffered reader on the front of it.
781+
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
782+
} else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) {
783+
// I have not checked what buffer sizes the xz decompression implementation uses
784+
// by default, so the same logic applies for buffering the reader as for gzip.
785+
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.xz);
786+
} else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) {
787+
// support gitlab tarball urls such as https://gitlab.com/<namespace>/<project>/-/archive/<sha>/<project>-<sha>.tar.gz
788+
// whose content-disposition header is: 'attachment; filename="<project>-<sha>.tar.gz"'
789+
const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse
790+
return report.fail(dep.url_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{});
791+
if (isTarAttachment(content_disposition)) {
792+
try unpackTarball(gpa, prog_reader.reader(), tmp_directory.handle, std.compress.gzip);
793+
} else return report.fail(dep.url_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition});
794+
} else {
795+
return report.fail(dep.url_tok, "Unsupported 'Content-Type' header value: '{s}'", .{content_type});
796+
}
797+
}
798+
799+
fn fetchGit(
800+
http_client: *std.http.Client,
801+
tmp_directory: Compilation.Directory,
802+
dep: Manifest.Dependency,
803+
uri: std.Uri,
804+
report: Report,
805+
pkg_prog_node: *std.Progress.Node,
806+
) !void {
807+
const gpa = http_client.allocator;
808+
var transport_uri = uri;
809+
transport_uri.scheme = uri.scheme["git+".len..];
810+
var redirect_uri: []u8 = undefined;
811+
var session: git.Session = .{ .transport = http_client, .uri = transport_uri };
812+
session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) {
813+
error.Redirected => {
814+
defer gpa.free(redirect_uri);
815+
return report.fail(dep.url_tok, "repository moved to {s}", .{redirect_uri});
816+
},
817+
else => |other| return other,
818+
};
819+
820+
const want_oid = want_oid: {
821+
const want_ref = uri.fragment orelse "HEAD";
822+
if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
823+
824+
const want_ref_head = try std.fmt.allocPrint(gpa, "refs/heads/{s}", .{want_ref});
825+
defer gpa.free(want_ref_head);
826+
const want_ref_tag = try std.fmt.allocPrint(gpa, "refs/tags/{s}", .{want_ref});
827+
defer gpa.free(want_ref_tag);
828+
829+
var ref_iterator = try session.listRefs(gpa, .{
830+
.ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag },
831+
.include_peeled = true,
832+
});
833+
defer ref_iterator.deinit();
834+
while (try ref_iterator.next()) |ref| {
835+
if (mem.eql(u8, ref.name, want_ref) or
836+
mem.eql(u8, ref.name, want_ref_head) or
837+
mem.eql(u8, ref.name, want_ref_tag))
838+
{
839+
break :want_oid ref.peeled orelse ref.oid;
840+
}
841+
}
842+
return report.fail(dep.url_tok, "ref not found: {s}", .{want_ref});
843+
};
844+
if (uri.fragment == null) {
845+
const file_path = try report.directory.join(gpa, &.{Manifest.basename});
846+
defer gpa.free(file_path);
847+
848+
const eb = report.error_bundle;
849+
const notes_len = 1;
850+
try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{
851+
.tok = dep.url_tok,
852+
.off = 0,
853+
.msg = "url field is missing an explicit ref",
854+
});
855+
const notes_start = try eb.reserveNotes(notes_len);
856+
eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
857+
.msg = try eb.printString("try .url = \"{+/}#{}\",", .{ uri, std.fmt.fmtSliceHexLower(&want_oid) }),
858+
}));
859+
return error.PackageFetchFailed;
860+
}
861+
862+
// The .git directory is used to store the packfile and associated index, but
863+
// we do not attempt to replicate the exact structure of a real .git
864+
// directory, since that isn't relevant for fetching a package.
865+
{
866+
var pack_dir = try tmp_directory.handle.makeOpenPath(".git", .{});
867+
defer pack_dir.close();
868+
var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true });
869+
defer pack_file.close();
870+
{
871+
var want_oid_buf: [git.fmt_oid_length]u8 = undefined;
872+
_ = std.fmt.bufPrint(&want_oid_buf, "{}", .{std.fmt.fmtSliceHexLower(&want_oid)}) catch unreachable;
873+
var fetch_stream = try session.fetch(gpa, &.{&want_oid_buf});
874+
defer fetch_stream.deinit();
875+
876+
var fetch_prog_node = pkg_prog_node.start("Fetch", 0);
877+
defer fetch_prog_node.end();
878+
fetch_prog_node.activate();
879+
fetch_prog_node.context.refresh();
880+
var prog_reader: ProgressReader(git.Session.FetchStream.Reader) = .{
881+
.child_reader = fetch_stream.reader(),
882+
.prog_node = pkg_prog_node,
883+
.unit = .any,
884+
};
885+
pkg_prog_node.context.refresh();
886+
887+
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
888+
try fifo.pump(prog_reader.reader(), pack_file.writer());
889+
try pack_file.sync();
890+
}
891+
892+
var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });
893+
defer index_file.close();
894+
{
895+
var index_prog_node = pkg_prog_node.start("Index pack", 0);
896+
defer index_prog_node.end();
897+
index_prog_node.activate();
898+
index_prog_node.context.refresh();
899+
var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
900+
try git.indexPack(gpa, pack_file, index_buffered_writer.writer());
901+
try index_buffered_writer.flush();
902+
try index_file.sync();
903+
}
904+
905+
{
906+
var checkout_prog_node = pkg_prog_node.start("Checkout", 0);
907+
defer checkout_prog_node.end();
908+
checkout_prog_node.activate();
909+
checkout_prog_node.context.refresh();
910+
var repository = try git.Repository.init(gpa, pack_file, index_file);
911+
defer repository.deinit();
912+
try repository.checkout(tmp_directory.handle, want_oid);
913+
}
914+
}
915+
916+
try tmp_directory.handle.deleteTree(".git");
917+
}
918+
779919
fn unpackTarball(
780920
gpa: Allocator,
781921
req_reader: anytype,

0 commit comments

Comments
 (0)