|
1 | 1 | { |
2 | 2 | inputs = { |
3 | 3 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; |
4 | | - flake-utils.url = "github:numtide/flake-utils"; |
5 | 4 | }; |
6 | | - outputs = { self, nixpkgs, flake-utils }: |
7 | | - flake-utils.lib.eachDefaultSystem (system: |
8 | | - let |
9 | | - name = "llama.cpp"; |
10 | | - src = ./.; |
11 | | - meta.mainProgram = "llama"; |
12 | | - inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin; |
13 | | - buildInputs = with pkgs; [ openmpi ]; |
14 | | - osSpecific = with pkgs; buildInputs ++ ( |
15 | | - if isAarch64 && isDarwin then |
16 | | - with pkgs.darwin.apple_sdk_11_0.frameworks; [ |
17 | | - Accelerate |
18 | | - MetalKit |
19 | | - ] |
20 | | - else if isAarch32 && isDarwin then |
21 | | - with pkgs.darwin.apple_sdk.frameworks; [ |
22 | | - Accelerate |
23 | | - CoreGraphics |
24 | | - CoreVideo |
25 | | - ] |
26 | | - else if isDarwin then |
27 | | - with pkgs.darwin.apple_sdk.frameworks; [ |
28 | | - Accelerate |
29 | | - CoreGraphics |
30 | | - CoreVideo |
31 | | - ] |
32 | | - else |
33 | | - with pkgs; [ openblas ] |
34 | | - ); |
35 | | - pkgs = import nixpkgs { inherit system; }; |
36 | | - nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ]; |
37 | | - cudatoolkit_joined = with pkgs; symlinkJoin { |
38 | | - # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit |
39 | | - # see https://github.com/NixOS/nixpkgs/issues/224291 |
40 | | - # copied from jaxlib |
41 | | - name = "${cudaPackages.cudatoolkit.name}-merged"; |
42 | | - paths = [ |
43 | | - cudaPackages.cudatoolkit.lib |
44 | | - cudaPackages.cudatoolkit.out |
45 | | - ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [ |
46 | | - # for some reason some of the required libs are in the targets/x86_64-linux |
47 | | - # directory; not sure why but this works around it |
48 | | - "${cudaPackages.cudatoolkit}/targets/${system}" |
49 | | - ]; |
50 | | - }; |
51 | | - llama-python = |
52 | | - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]); |
53 | | - # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime |
54 | | - llama-python-extra = |
55 | | - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]); |
56 | | - postPatch = '' |
57 | | - substituteInPlace ./ggml-metal.m \ |
58 | | - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" |
59 | | - substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' |
60 | | - ''; |
61 | | - postInstall = '' |
62 | | - mv $out/bin/main $out/bin/llama |
63 | | - mv $out/bin/server $out/bin/llama-server |
64 | | - mkdir -p $out/include |
65 | | - cp ${src}/llama.h $out/include/ |
66 | | - ''; |
67 | | - cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ]; |
68 | | - in |
69 | | - { |
70 | | - packages.default = pkgs.stdenv.mkDerivation { |
71 | | - inherit name src meta postPatch nativeBuildInputs postInstall; |
72 | | - buildInputs = osSpecific; |
73 | | - cmakeFlags = cmakeFlags |
74 | | - ++ (if isAarch64 && isDarwin then [ |
75 | | - "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" |
76 | | - "-DLLAMA_METAL=ON" |
77 | | - ] else [ |
78 | | - "-DLLAMA_BLAS=ON" |
79 | | - "-DLLAMA_BLAS_VENDOR=OpenBLAS" |
80 | | - ]); |
81 | | - }; |
82 | | - packages.opencl = pkgs.stdenv.mkDerivation { |
83 | | - inherit name src meta postPatch nativeBuildInputs postInstall; |
84 | | - buildInputs = with pkgs; buildInputs ++ [ clblast ]; |
85 | | - cmakeFlags = cmakeFlags ++ [ |
86 | | - "-DLLAMA_CLBLAST=ON" |
87 | | - ]; |
88 | | - }; |
89 | | - packages.cuda = pkgs.stdenv.mkDerivation { |
90 | | - inherit name src meta postPatch nativeBuildInputs postInstall; |
91 | | - buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ]; |
92 | | - cmakeFlags = cmakeFlags ++ [ |
93 | | - "-DLLAMA_CUBLAS=ON" |
94 | | - ]; |
95 | | - }; |
96 | | - packages.rocm = pkgs.stdenv.mkDerivation { |
97 | | - inherit name src meta postPatch nativeBuildInputs postInstall; |
98 | | - buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ]; |
99 | | - cmakeFlags = cmakeFlags ++ [ |
100 | | - "-DLLAMA_HIPBLAS=1" |
101 | | - "-DCMAKE_C_COMPILER=hipcc" |
102 | | - "-DCMAKE_CXX_COMPILER=hipcc" |
103 | | - # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM |
104 | | - # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt |
105 | | - # and select the line that matches the current nixpkgs version of rocBLAS. |
106 | | - "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" |
107 | | - ]; |
108 | | - }; |
109 | | - apps.llama-server = { |
110 | | - type = "app"; |
111 | | - program = "${self.packages.${system}.default}/bin/llama-server"; |
112 | | - }; |
113 | | - apps.llama-embedding = { |
114 | | - type = "app"; |
115 | | - program = "${self.packages.${system}.default}/bin/embedding"; |
116 | | - }; |
117 | | - apps.llama = { |
118 | | - type = "app"; |
119 | | - program = "${self.packages.${system}.default}/bin/llama"; |
120 | | - }; |
121 | | - apps.quantize = { |
122 | | - type = "app"; |
123 | | - program = "${self.packages.${system}.default}/bin/quantize"; |
124 | | - }; |
125 | | - apps.train-text-from-scratch = { |
126 | | - type = "app"; |
127 | | - program = "${self.packages.${system}.default}/bin/train-text-from-scratch"; |
128 | | - }; |
129 | | - apps.default = self.apps.${system}.llama; |
130 | | - devShells.default = pkgs.mkShell { |
131 | | - buildInputs = [ llama-python ]; |
132 | | - packages = nativeBuildInputs ++ osSpecific; |
133 | | - }; |
134 | | - devShells.extra = pkgs.mkShell { |
135 | | - buildInputs = [ llama-python-extra ]; |
136 | | - packages = nativeBuildInputs ++ osSpecific; |
137 | | - }; |
138 | | - }); |
| 5 | + |
| 6 | + outputs = { self, nixpkgs }: |
| 7 | + |
| 8 | + let |
| 9 | + inherit (nixpkgs.lib) genAttrs; |
| 10 | + overlays = import ./overlays.nix; |
| 11 | + importNixpkgs = system: import nixpkgs { |
| 12 | + inherit system; |
| 13 | + overlays = [ overlays ]; |
| 14 | + }; |
| 15 | + systems = [ "aarch64-darwin" "aarch64-linux" "x86_64-darwin" "x86_64-linux" ]; |
| 16 | + withSystemPackages = f: genAttrs systems (system: f (importNixpkgs system)); |
| 17 | + in |
| 18 | + |
| 19 | + { |
| 20 | + # These define the various ways to build the llama.cpp project. |
| 21 | + # Integrate them into your flake.nix configuration by adding this |
| 22 | + # overlay to nixpkgs.overlays. |
| 23 | + overlays = { |
| 24 | + default = overlays; |
| 25 | + }; |
| 26 | + |
| 27 | + # These use the definitions from ./overlays.nix and expose them as installables. |
| 28 | + packages = withSystemPackages (pkgs: { |
| 29 | + default = pkgs.llama-cpp; |
| 30 | + opencl = pkgs.llama-cpp-opencl; |
| 31 | + cuda = pkgs.llama-cpp-cuda; |
| 32 | + rocm = pkgs.llama-cpp-rocm; |
| 33 | + }); |
| 34 | + |
| 35 | + # These use the definition of llama-cpp from ./overlays.nix and expose various |
| 36 | + # binaries as apps so that they're able to be run with `nix run`. |
| 37 | + apps = withSystemPackages (import ./apps.nix [ |
| 38 | + "llama" |
| 39 | + "llama-embedding" |
| 40 | + "llama-server" |
| 41 | + "quantize" |
| 42 | + "train-text-from-scratch" |
| 43 | + ]); |
| 44 | + |
| 45 | + # These expose a build environment for either a "default" or an "extra" set of |
| 46 | + # dependencies. |
| 47 | + devShells = withSystemPackages (import ./devshells.nix); |
| 48 | + }; |
139 | 49 | } |
0 commit comments