Skip to content

Commit 92269b6

Browse files
authored
Merge branch 'ggml-org:master' into inspect-march-and-mcpu-to-found-the-cpu
2 parents 267f8d5 + b8595b1 commit 92269b6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3281
-1433
lines changed

.devops/vulkan.Dockerfile

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG UBUNTU_VERSION=24.04
1+
ARG UBUNTU_VERSION=25.10
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

@@ -7,32 +7,16 @@ FROM ubuntu:$UBUNTU_VERSION AS build
77
# Install build tools
88
RUN apt update && apt install -y git build-essential cmake wget xz-utils
99

10-
# Install Vulkan SDK
11-
ARG VULKAN_VERSION=1.4.321.1
12-
RUN ARCH=$(uname -m) && \
13-
wget -qO /tmp/vulkan-sdk.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz && \
14-
mkdir -p /opt/vulkan && \
15-
tar -xf /tmp/vulkan-sdk.tar.xz -C /tmp --strip-components=1 && \
16-
mv /tmp/${ARCH}/* /opt/vulkan/ && \
17-
rm -rf /tmp/*
18-
1910
# Install cURL and Vulkan SDK dependencies
2011
RUN apt install -y libcurl4-openssl-dev curl \
21-
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev
22-
23-
# Set environment variables
24-
ENV VULKAN_SDK=/opt/vulkan
25-
ENV PATH=$VULKAN_SDK/bin:$PATH
26-
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
27-
ENV CMAKE_PREFIX_PATH=$VULKAN_SDK:$CMAKE_PREFIX_PATH
28-
ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig:$PKG_CONFIG_PATH
12+
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc
2913

3014
# Build it
3115
WORKDIR /app
3216

3317
COPY . .
3418

35-
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
19+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
3620
cmake --build build --config Release -j$(nproc)
3721

3822
RUN mkdir -p /app/lib && \
@@ -50,7 +34,7 @@ RUN mkdir -p /app/full \
5034
FROM ubuntu:$UBUNTU_VERSION AS base
5135

5236
RUN apt-get update \
53-
&& apt-get install -y libgomp1 curl libvulkan-dev \
37+
&& apt-get install -y libgomp1 curl libvulkan1 mesa-vulkan-drivers \
5438
&& apt autoremove -y \
5539
&& apt clean -y \
5640
&& rm -rf /tmp/* /var/tmp/* \

.github/workflows/build.yml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,16 @@ jobs:
161161
- name: Dawn Dependency
162162
id: dawn-depends
163163
run: |
164-
DAWN_VERSION="v1.0.0"
164+
DAWN_VERSION="v2.0.0"
165165
DAWN_OWNER="reeselevine"
166166
DAWN_REPO="dawn"
167-
DAWN_ASSET_NAME="Dawn-a1a6b45cced25a3b7f4fb491e0ae70796cc7f22b-macos-latest-Release.tar.gz"
167+
DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release.zip"
168168
echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
169-
curl -L -o artifact.tar.gz \
169+
curl -L -o artifact.zip \
170170
"https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
171171
mkdir dawn
172-
tar -xvf artifact.tar.gz -C dawn --strip-components=1
172+
unzip artifact.zip
173+
tar -xvf Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release.tar.gz -C dawn --strip-components=1
173174
174175
- name: Build
175176
id: cmake_build
@@ -521,15 +522,16 @@ jobs:
521522
id: dawn-depends
522523
run: |
523524
sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
524-
DAWN_VERSION="v1.0.0"
525+
DAWN_VERSION="v2.0.0"
525526
DAWN_OWNER="reeselevine"
526527
DAWN_REPO="dawn"
527-
DAWN_ASSET_NAME="Dawn-a1a6b45cced25a3b7f4fb491e0ae70796cc7f22b-ubuntu-latest-Release.tar.gz"
528+
DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-ubuntu-latest-Release.zip"
528529
echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
529-
curl -L -o artifact.tar.gz \
530+
curl -L -o artifact.zip \
530531
"https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}"
531532
mkdir dawn
532-
tar -xvf artifact.tar.gz -C dawn --strip-components=1
533+
unzip artifact.zip
534+
tar -xvf Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-ubuntu-latest-Release.tar.gz -C dawn --strip-components=1
533535
534536
- name: Build
535537
id: cmake_build

benches/dgx-spark.md

Lines changed: 264 additions & 0 deletions
Large diffs are not rendered by default.

common/arg.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
740740
exit(0);
741741
}
742742
));
743+
add_opt(common_arg(
744+
{"-cl", "--cache-list"},
745+
"show list of models in cache",
746+
[](common_params &) {
747+
printf("model cache directory: %s\n", fs_get_cache_directory().c_str());
748+
auto models = common_list_cached_models();
749+
printf("number of models in cache: %zu\n", models.size());
750+
for (size_t i = 0; i < models.size(); i++) {
751+
auto & model = models[i];
752+
printf("%4d. %s\n", (int) i + 1, model.to_string().c_str());
753+
}
754+
exit(0);
755+
}
756+
));
743757
add_opt(common_arg(
744758
{"--completion-bash"},
745759
"print source-able bash completion script for llama.cpp",

common/common.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
908908
return cache_directory + filename;
909909
}
910910

911+
std::vector<common_file_info> fs_list_files(const std::string & path) {
912+
std::vector<common_file_info> files;
913+
if (path.empty()) return files;
914+
915+
std::filesystem::path dir(path);
916+
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
917+
return files;
918+
}
919+
920+
for (const auto & entry : std::filesystem::directory_iterator(dir)) {
921+
try {
922+
// Only include regular files (skip directories)
923+
const auto & p = entry.path();
924+
if (std::filesystem::is_regular_file(p)) {
925+
common_file_info info;
926+
info.path = p.string();
927+
info.name = p.filename().string();
928+
try {
929+
info.size = static_cast<size_t>(std::filesystem::file_size(p));
930+
} catch (const std::filesystem::filesystem_error &) {
931+
info.size = 0;
932+
}
933+
files.push_back(std::move(info));
934+
}
935+
} catch (const std::filesystem::filesystem_error &) {
936+
// skip entries we cannot inspect
937+
continue;
938+
}
939+
}
940+
941+
return files;
942+
}
943+
911944

912945
//
913946
// Model utils

common/common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,13 @@ bool fs_create_directory_with_parents(const std::string & path);
611611
std::string fs_get_cache_directory();
612612
std::string fs_get_cache_file(const std::string & filename);
613613

614+
struct common_file_info {
615+
std::string path;
616+
std::string name;
617+
size_t size = 0; // in bytes
618+
};
619+
std::vector<common_file_info> fs_list_files(const std::string & path);
620+
614621
//
615622
// Model utils
616623
//

common/download.cpp

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,22 @@ using json = nlohmann::ordered_json;
5050
// downloader
5151
//
5252

53+
// validate repo name format: owner/repo
54+
static bool validate_repo_name(const std::string & repo) {
55+
static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)");
56+
return std::regex_match(repo, repo_regex);
57+
}
58+
59+
static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
60+
// we use "=" to avoid clashing with other component, while still being allowed on windows
61+
std::string fname = "manifest=" + repo + "=" + tag + ".json";
62+
if (!validate_repo_name(repo)) {
63+
throw std::runtime_error("error: repo name must be in the format 'owner/repo'");
64+
}
65+
string_replace_all(fname, "/", "=");
66+
return fs_get_cache_file(fname);
67+
}
68+
5369
static std::string read_file(const std::string & fname) {
5470
std::ifstream file(fname);
5571
if (!file) {
@@ -829,17 +845,13 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
829845
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
830846
// User-Agent header is already set in common_remote_get_content, no need to set it here
831847

832-
// we use "=" to avoid clashing with other component, while still being allowed on windows
833-
std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
834-
string_replace_all(cached_response_fname, "/", "_");
835-
std::string cached_response_path = fs_get_cache_file(cached_response_fname);
836-
837848
// make the request
838849
common_remote_params params;
839850
params.headers = headers;
840851
long res_code = 0;
841852
std::string res_str;
842853
bool use_cache = false;
854+
std::string cached_response_path = get_manifest_path(hf_repo, tag);
843855
if (!offline) {
844856
try {
845857
auto res = common_remote_get_content(url, params);
@@ -895,6 +907,33 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
895907
return { hf_repo, ggufFile, mmprojFile };
896908
}
897909

910+
std::vector<common_cached_model_info> common_list_cached_models() {
911+
std::vector<common_cached_model_info> models;
912+
const std::string cache_dir = fs_get_cache_directory();
913+
const std::vector<common_file_info> files = fs_list_files(cache_dir);
914+
for (const auto & file : files) {
915+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
916+
common_cached_model_info model_info;
917+
model_info.manifest_path = file.path;
918+
std::string fname = file.name;
919+
string_replace_all(fname, ".json", ""); // remove extension
920+
auto parts = string_split<std::string>(fname, '=');
921+
if (parts.size() == 4) {
922+
// expect format: manifest=<user>=<model>=<tag>=<other>
923+
model_info.user = parts[1];
924+
model_info.model = parts[2];
925+
model_info.tag = parts[3];
926+
} else {
927+
// invalid format
928+
continue;
929+
}
930+
model_info.size = 0; // TODO: get GGUF size, not manifest size
931+
models.push_back(model_info);
932+
}
933+
}
934+
return models;
935+
}
936+
898937
//
899938
// Docker registry functions
900939
//
@@ -959,6 +998,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
959998
std::string token = common_docker_get_token(repo); // Get authentication token
960999

9611000
// Get manifest
1001+
// TODO: cache the manifest response so that it appears in the model list
9621002
const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
9631003
std::string manifest_url = url_prefix + "/manifests/" + tag;
9641004
common_remote_params manifest_params;

common/download.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,23 @@ struct common_params_model;
88
// download functionalities
99
//
1010

11+
struct common_cached_model_info {
12+
std::string manifest_path;
13+
std::string user;
14+
std::string model;
15+
std::string tag;
16+
size_t size = 0; // GGUF size in bytes
17+
std::string to_string() const {
18+
return user + "/" + model + ":" + tag;
19+
}
20+
};
21+
1122
struct common_hf_file_res {
1223
std::string repo; // repo name with ":tag" removed
1324
std::string ggufFile;
1425
std::string mmprojFile;
1526
};
1627

17-
// resolve and download model from Docker registry
18-
// return local path to downloaded model file
19-
std::string common_docker_resolve_model(const std::string & docker);
20-
2128
/**
2229
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
2330
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -39,3 +46,10 @@ bool common_download_model(
3946
const common_params_model & model,
4047
const std::string & bearer_token,
4148
bool offline);
49+
50+
// returns list of cached models
51+
std::vector<common_cached_model_info> common_list_cached_models();
52+
53+
// resolve and download model from Docker registry
54+
// return local path to downloaded model file
55+
std::string common_docker_resolve_model(const std::string & docker);

0 commit comments

Comments
 (0)