delphix · dependabot · Jul 10, 2025 · Jul 10, 2025 · Jul 11, 2025 · Jul 13, 2025
diff --git a/ARTIFACTORY_BUILD.md b/ARTIFACTORY_BUILD.md
diff --git a/Dockerfile-cpu b/Dockerfile-cpu
@@ -0,0 +1,78 @@
+# vim: filetype=dockerfile
+
+ARG FLAVOR=${TARGETARCH}
+
+ARG ROCMVERSION=6.3.3
+ARG JETPACK5VERSION=r35.4.1
+ARG JETPACK6VERSION=r36.4.0
+ARG CMAKEVERSION=3.31.2
+
+# CUDA v11 requires gcc v10.  v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
+FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
+RUN yum install -y yum-utils \
+    && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
+    && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
+    && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
+    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
+ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+
+
+
+
+FROM --platform=linux/arm64 almalinux:8 AS base-arm64
+# install epel-release for ccache
+RUN yum install -y yum-utils epel-release \
+    && dnf install -y clang ccache \
+    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
+ENV CC=clang CXX=clang++
+
+FROM base-${TARGETARCH} AS base
+ARG CMAKEVERSION
+RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
+ENV LDFLAGS=-s
+
+FROM base AS cpu
+RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
+ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
+RUN --mount=type=cache,target=/root/.ccache \
+    cmake --preset 'CPU' \
+        && cmake --build --parallel --preset 'CPU' \
+        && cmake --install build --component CPU --strip --parallel 2
+
+
+FROM base AS build
+ARG GOVERSION=1.24.4
+RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
+ENV PATH=/usr/local/go/bin:$PATH
+WORKDIR /go/src/github.com/ollama/ollama
+COPY . .
+ARG GOFLAGS="'-ldflags=-w -s'"
+ENV CGO_ENABLED=1
+RUN --mount=type=cache,target=/root/.cache/go-build \
+    go build -trimpath -buildmode=pie -o /bin/ollama .
+
+FROM --platform=linux/amd64 scratch AS amd64
+
+FROM --platform=linux/arm64 scratch AS arm64
+
+FROM ${FLAVOR} AS archive
+COPY --from=cpu dist/lib/ollama /lib/ollama
+COPY --from=build /bin/ollama /bin/ollama
+
+FROM ubuntu:24.04
+RUN apt-get update \
+    && apt-get install -y ca-certificates curl openssl \
+    && apt-get install --only-upgrade -y libpam0g libpam-modules libpam-modules-bin libpam-runtime \
+    libsystemd0 libudev1 \
+    && update-ca-certificates \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+COPY --from=archive /bin /usr/bin
+ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+COPY --from=archive /lib/ollama /usr/lib/ollama
+ENV OLLAMA_HOST=0.0.0.0:11434
+EXPOSE 11434
+ENTRYPOINT ["/bin/ollama"]
+CMD ["serve"]
diff --git a/build_cpu_artifactory.sh b/build_cpu_artifactory.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+set -eu
+
+# Artifactory configuration
+REGISTRY=${REGISTRY:-""}
+IMAGE_NAME=${IMAGE_NAME:-"ollama-cpu"}
+VERSION=${VERSION:-"latest"}
+
+# Optional Docker repository path within the registry
+# If using this path, ensure to add /${DOCKER_REPO} to the image name
+# example: FULL_IMAGE_NAME="${REGISTRY}/${DOCKER_REPO}/${IMAGE_NAME}:${VERSION}"
+#DOCKER_REPO=${DOCKER_REPO:-""}
+
+# Artifactory credentials (can be set via environment variables)
+# Use API Key authentication for Artifactory
+ARTIFACTORY_USERNAME=${ARTIFACTORY_USERNAME:-""}
+ARTIFACTORY_API_KEY=${ARTIFACTORY_API_KEY:-""}
+
+# Target platforms
+PLATFORMS=${PLATFORMS:-"linux/amd64,linux/arm64"}
+
+# Silent login if credentials are provided
+if [ -n "$ARTIFACTORY_USERNAME" ] && [ -n "$ARTIFACTORY_API_KEY" ]; then
+  echo "Logging in to Artifactory at $REGISTRY as $ARTIFACTORY_USERNAME..."
+  echo "$ARTIFACTORY_API_KEY" | docker login -u "$ARTIFACTORY_USERNAME" --password-stdin "$REGISTRY" >/dev/null 2>&1
+  echo "Login successful"
+else
+  echo "Artifactory credentials not provided, assuming you're already logged in"
+fi
+
+# Set up buildx if needed
+BUILDER_NAME="multiarch-builder"
+if ! docker buildx inspect ${BUILDER_NAME} &>/dev/null; then
+    echo "Creating new buildx builder: ${BUILDER_NAME}"
+    docker buildx create --name ${BUILDER_NAME} --driver docker-container --use
+else
+    echo "Using existing buildx builder: ${BUILDER_NAME}"
+    docker buildx use ${BUILDER_NAME}
+fi
+docker buildx inspect --bootstrap
+
+# Build and push the multi-arch image
+FULL_IMAGE_NAME="${REGISTRY}/${IMAGE_NAME}:${VERSION}"
+echo "Building and pushing ${FULL_IMAGE_NAME} for platforms: ${PLATFORMS}"
+
+docker buildx build \
+    --push \
+    --platform ${PLATFORMS} \
+    --output=type=image,push=true,registry.insecure=true \
+    --tag ${FULL_IMAGE_NAME} \
+    -f Dockerfile-cpu \
+    . \
+    --no-cache
+
+echo "Build and push completed successfully!"
+echo "Image pushed to: ${FULL_IMAGE_NAME}"
diff --git a/build_cpu_dockerhub.sh b/build_cpu_dockerhub.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+set -eu
+
+# Set your organization and image name
+ORG=${ORG:-""}
+IMAGE_NAME=${IMAGE_NAME:-"ollama-cpu"}
+VERSION=${VERSION:-"latest"}
+
+# Docker Hub credentials (can be set via environment variables)
+DOCKER_USERNAME=${DOCKER_USERNAME:-""}
+DOCKER_PASSWORD=${DOCKER_PASSWORD:-""}
+
+# Target platforms - same as Ollama's defaults
+PLATFORMS=${PLATFORMS:-"linux/arm64,linux/amd64"}
+
+# Silent login if credentials are provided
+if [ -n "$DOCKER_USERNAME" ] && [ -n "$DOCKER_PASSWORD" ]; then
+  echo "Logging in to Docker Hub as $DOCKER_USERNAME..."
+  echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin >/dev/null 2>&1
+  echo "Login successful"
+
+  # If login successful, use the provided username as the org
+  if [ "$ORG" = "yourorg" ]; then
+    ORG=$DOCKER_USERNAME
+    echo "Using Docker username '$ORG' as organization"
+  fi
+else
+  echo "Docker credentials not provided, assuming you're already logged in"
+fi
+
+# Ensure QEMU is installed for cross-platform builds
+echo "Setting up QEMU for cross-platform builds..."
+docker run --privileged --rm tonistiigi/binfmt --install all
+
+# Set up buildx if needed
+BUILDER_NAME="multiarch-builder"
+if ! docker buildx inspect ${BUILDER_NAME} &>/dev/null; then
+    echo "Creating new buildx builder: ${BUILDER_NAME}"
+    docker buildx create --name ${BUILDER_NAME} --driver docker-container --use
+else
+    docker buildx use ${BUILDER_NAME}
+fi
+docker buildx inspect --bootstrap
+
+# Set PUSH to a non-empty string to trigger push instead of load
+PUSH=${PUSH:-""}
+if [ -z "${PUSH}" ] ; then
+    echo "Building ${ORG}/${IMAGE_NAME}:${VERSION} locally. Set PUSH=1 to push"
+    # Note: --load only works for single platform, so if building locally, adjust PLATFORMS
+    if [[ "${PLATFORMS}" == *","* ]]; then
+        echo "WARNING: --load only works for single platform. Setting platform to linux/$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')"
+        PLATFORMS="linux/$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')"
+    fi
+    LOAD_OR_PUSH="--load"
+else
+    echo "Will be pushing ${ORG}/${IMAGE_NAME}:${VERSION}"
+    LOAD_OR_PUSH="--push"
+fi
+
+# Build and push/load the multi-arch image
+echo "Building for platforms: ${PLATFORMS}"
+docker buildx build \
+    --provenance=true \
+    --sbom=true \
+    --network=host \
+    ${LOAD_OR_PUSH} \
+    --platform=${PLATFORMS} \
+    -f Dockerfile-cpu \
+    -t ${ORG}/${IMAGE_NAME}:${VERSION} \
+    .
+
+echo "Build completed successfully!"
+if [ -n "${PUSH}" ]; then
+    echo "Image pushed to: ${ORG}/${IMAGE_NAME}:${VERSION}"
+    echo "To pull: docker pull ${ORG}/${IMAGE_NAME}:${VERSION}"
+fi
diff --git a/cmd/cmd.go b/cmd/cmd.go
@@ -1571,6 +1571,7 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_LLM_LIBRARY"],
 				envVars["OLLAMA_GPU_OVERHEAD"],
 				envVars["OLLAMA_LOAD_TIMEOUT"],
+				envVars["OLLAMA_SKIP_MEMORY_CHECK"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)

diff --git a/docs/faq.md b/docs/faq.md
@@ -333,3 +333,23 @@ The currently available K/V cache quantization types are:
 How much the cache quantization impacts the model's response quality will depend on the model and the task.  Models that have a high GQA count (e.g. Qwen2) may see a larger impact on precision from quantization than models with a low GQA count.
 
 You may need to experiment with different quantization types to find the best balance between memory usage and quality.
+
+
+
+## How do I bypass available memory check before loading a model?
+
+By default, Ollama checks if your system has sufficient available memory before loading a model to prevent out-of-memory errors that could crash your system or cause instability.
+You can bypass this safety check by setting the OLLAMA_SKIP_MEMORY_CHECK environment variable to 1. 
+
+### When to use this option
+
+- You have swap space configured and accept slower performance
+- You're running on a system with non-standard memory reporting
+- You're debugging memory-related issues
+- You understand the risks and have adequate system monitoring
+
+###  Important Warnings
+
+- System instability: Loading models without sufficient memory can cause system freezes or crashes
+- Performance degradation: Your system may become unresponsive due to excessive swapping
+- Data loss risk: System crashes could result in unsaved work being lost
diff --git a/envconfig/config.go b/envconfig/config.go
@@ -226,6 +226,12 @@ var (
 	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
 )
 
+var (
+	// Bypass the memory check during model load. This is an expert only setting, to be used under situations where the system is guaranteedAdd commentMore actions
+	// to get the have enough memory or is able to procure this at runtime by evicting blocks from caches. e.g ZFS Arc Cache.
+	AvailableMemoryCheckOverride = Uint("OLLAMA_SKIP_MEMORY_CHECK", 0)
+)
+
 func Uint64(key string, defaultValue uint64) func() uint64 {
 	return func() uint64 {
 		if s := Var(key); s != "" {
@@ -275,6 +281,9 @@ func AsMap() map[string]EnvVar {
 		"HTTP_PROXY":  {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
 		"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
 		"NO_PROXY":    {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
+
+		//Overrides
+		"OLLAMA_SKIP_MEMORY_CHECK": {"OLLAMA_SKIP_MEMORY_CHECK", AvailableMemoryCheckOverride(), "Bypass checking for available memory before loading models. (e.g. OLLAMA_SKIP_MEMORY_CHECK=1)"},
 	}
 
 	if runtime.GOOS != "windows" {

diff --git a/llm/memory.go b/llm/memory.go
@@ -1,6 +1,7 @@
 package llm
 
 import (
+	"bufio"
 	"fmt"
 	"log/slog"
 	"os"
@@ -438,3 +439,55 @@ func projectorMemoryRequirements(filename string) (weights uint64) {
 
 	return weights
 }
+
+// GetZFSReclaimable returns max(0, size – c_min) from ZFS ARC stats.
+// Added to fix the arc memory cache issue on zfs
+// This will be a no-op is no zfs is involved.
+func GetZFSReclaimableMemory() (uint64, error) {
+	paths := []string{"/proc/spl/kstat/zfs/arcstats", "/proc/zfs/arcstats"}
+	var f *os.File
+	for _, path := range paths {
+		if file, err := os.Open(path); err == nil {
+			f = file
+			break
+		}
+	}
+	if f == nil {
+		return 0, fmt.Errorf("no ZFS ARC stats found")
+	}
+	defer f.Close()
+
+	var size, cmin uint64
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		cols := strings.Fields(scanner.Text())
+		if len(cols) < 3 {
+			continue
+		}
+		var err error
+		var val uint64
+
+		val, err = strconv.ParseUint(cols[2], 10, 64)
+		if err != nil {
+			continue
+		}
+		switch cols[0] {
+		case "size":
+			size = val
+		case "c_min":
+			cmin = val
+		default:
+			continue
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		return 0, err
+	}
+	if size <= 0 || cmin <= 0 {
+		return 0, fmt.Errorf("failed to read ZFS ARC stats")
+	}
+	if size > cmin {
+		return size - cmin, nil
+	}
+	return 0, nil
+}
diff --git a/llm/server.go b/llm/server.go
@@ -161,14 +161,30 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
 		}
 	}
 
-	// On linux and windows, over-allocating CPU memory will almost always result in an error
-	// Darwin has fully dynamic swap so has no direct concept of free swap space
-	if runtime.GOOS != "darwin" {
-		systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
-		available := systemFreeMemory + systemSwapFreeMemory
-		if systemMemoryRequired > available {
-			slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
-			return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))
+	// Env variable to bypass ollama's memory check guardrail.
+	if envconfig.AvailableMemoryCheckOverride() == 1 {
+		slog.Warn("OLLAMA_SKIP_MEMORY_CHECK set; bypassing memory checks")
+	} else {
+		// On linux and windows, over-allocating CPU memory will almost always result in an error
+		// Darwin has fully dynamic swap so has no direct concept of free swap space
+		slog.Debug("OLLAMA_SKIP_MEMORY_CHECK not set; running memory checks")
+		if runtime.GOOS != "darwin" {
+			systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
+			available := systemFreeMemory + systemSwapFreeMemory
+
+			// On Linux, reclaim ZFS ARC (size – c_min)
+			if runtime.GOOS == "linux" {
+				if reclaim, err := GetZFSReclaimableMemory(); err == nil {
+					slog.Info("reclaiming ZFS Arc cache size:", "size", format.HumanBytes2(reclaim))
+					available += reclaim
+				} else {
+					slog.Warn("failure while computing ZFS Arc cache size:", "error", err)
+				}
+			}
+			if systemMemoryRequired > available {
+				slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
+				return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))
+			}
 		}
 	}