diff --git a/ARTIFACTORY_BUILD.md b/ARTIFACTORY_BUILD.md new file mode 100644 index 00000000000..6414d6399d2 --- /dev/null +++ b/ARTIFACTORY_BUILD.md @@ -0,0 +1,609 @@ +
+ delphix + +
+ +# Ollama + +Get up and running with large language models. + +### macOS + +[Download](https://ollama.com/download/Ollama.dmg) + +### Windows + +[Download](https://ollama.com/download/OllamaSetup.exe) + +### Linux + +```shell +curl -fsSL https://ollama.com/install.sh | sh +``` + +[Manual install instructions](https://github.com/ollama/ollama/blob/main/docs/linux.md) + +### Docker + +The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub. + +### Libraries + +- [ollama-python](https://github.com/ollama/ollama-python) +- [ollama-js](https://github.com/ollama/ollama-js) + +### Community + +- [Discord](https://discord.gg/ollama) +- [Reddit](https://reddit.com/r/ollama) + +## Quickstart + +To run and chat with [Gemma 3](https://ollama.com/library/gemma3): + +```shell +ollama run gemma3 +``` + +## Model library + +Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library') + +Here are some example models that can be downloaded: + +| Model | Parameters | Size | Download | +| ------------------ | ---------- | ----- | -------------------------------- | +| Gemma 3 | 1B | 815MB | `ollama run gemma3:1b` | +| Gemma 3 | 4B | 3.3GB | `ollama run gemma3` | +| Gemma 3 | 12B | 8.1GB | `ollama run gemma3:12b` | +| Gemma 3 | 27B | 17GB | `ollama run gemma3:27b` | +| QwQ | 32B | 20GB | `ollama run qwq` | +| DeepSeek-R1 | 7B | 4.7GB | `ollama run deepseek-r1` | +| DeepSeek-R1 | 671B | 404GB | `ollama run deepseek-r1:671b` | +| Llama 4 | 109B | 67GB | `ollama run llama4:scout` | +| Llama 4 | 400B | 245GB | `ollama run llama4:maverick` | +| Llama 3.3 | 70B | 43GB | `ollama run llama3.3` | +| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` | +| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` | +| Llama 3.2 Vision | 11B | 7.9GB | `ollama run llama3.2-vision` | +| Llama 3.2 Vision | 90B | 55GB | `ollama run llama3.2-vision:90b` | +| Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` | +| Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` | +| Phi 4 | 14B | 9.1GB | `ollama run phi4` | +| Phi 4 Mini | 3.8B | 2.5GB | `ollama run phi4-mini` | +| Mistral | 7B | 4.1GB | `ollama run mistral` | +| Moondream 2 | 1.4B | 829MB | `ollama run moondream` | +| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` | +| Starling | 7B | 4.1GB | `ollama run starling-lm` | +| Code Llama | 7B | 3.8GB | `ollama run codellama` | +| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` | +| LLaVA | 7B | 4.5GB | `ollama run llava` | +| Granite-3.3 | 8B | 4.9GB | `ollama run granite3.3` | + +> [!NOTE] +> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models. + +## Customize a model + +### Import from GGUF + +Ollama supports importing GGUF models in the Modelfile: + +1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import. + + ``` + FROM ./vicuna-33b.Q4_0.gguf + ``` + +2. Create the model in Ollama + + ```shell + ollama create example -f Modelfile + ``` + +3. Run the model + + ```shell + ollama run example + ``` + +### Import from Safetensors + +See the [guide](docs/import.md) on importing models for more information. + +### Customize a prompt + +Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model: + +```shell +ollama pull llama3.2 +``` + +Create a `Modelfile`: + +``` +FROM llama3.2 + +# set the temperature to 1 [higher is more creative, lower is more coherent] +PARAMETER temperature 1 + +# set the system message +SYSTEM """ +You are Mario from Super Mario Bros. Answer as Mario, the assistant, only. +""" +``` + +Next, create and run the model: + +``` +ollama create mario -f ./Modelfile +ollama run mario +>>> hi +Hello! It's your friend Mario. +``` + +For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation. + +## CLI Reference + +### Create a model + +`ollama create` is used to create a model from a Modelfile. + +```shell +ollama create mymodel -f ./Modelfile +``` + +### Pull a model + +```shell +ollama pull llama3.2 +``` + +> This command can also be used to update a local model. Only the diff will be pulled. + +### Remove a model + +```shell +ollama rm llama3.2 +``` + +### Copy a model + +```shell +ollama cp llama3.2 my-model +``` + +### Multiline input + +For multiline input, you can wrap text with `"""`: + +``` +>>> """Hello, +... world! +... """ +I'm a basic program that prints the famous "Hello, world!" message to the console. +``` + +### Multimodal models + +``` +ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png" +``` + +> **Output**: The image features a yellow smiley face, which is likely the central focus of the picture. + +### Pass the prompt as an argument + +```shell +ollama run llama3.2 "Summarize this file: $(cat README.md)" +``` + +> **Output**: Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. + +### Show model information + +```shell +ollama show llama3.2 +``` + +### List models on your computer + +```shell +ollama list +``` + +### List which models are currently loaded + +```shell +ollama ps +``` + +### Stop a model which is currently running + +```shell +ollama stop llama3.2 +``` + +### Start Ollama + +`ollama serve` is used when you want to start ollama without running the desktop application. + +## Building + +See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md) + +### Running local builds + +Next, start the server: + +```shell +./ollama serve +``` + +Finally, in a separate shell, run a model: + +```shell +./ollama run llama3.2 +``` + +## REST API + +Ollama has a REST API for running and managing models. + +### Generate a response + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama3.2", + "prompt":"Why is the sky blue?" +}' +``` + +### Chat with a model + +```shell +curl http://localhost:11434/api/chat -d '{ + "model": "llama3.2", + "messages": [ + { "role": "user", "content": "why is the sky blue?" } + ] +}' +``` + +See the [API documentation](./docs/api.md) for all endpoints. + +## Community Integrations + +### Web & Desktop + +- [Open WebUI](https://github.com/open-webui/open-webui) +- [SwiftChat (macOS with ReactNative)](https://github.com/aws-samples/swift-chat) +- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted) +- [Hollama](https://github.com/fmaclen/hollama) +- [Lollms-Webui](https://github.com/ParisNeo/lollms-webui) +- [LibreChat](https://github.com/danny-avila/LibreChat) +- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt) +- [HTML UI](https://github.com/rtcfirefly/ollama-ui) +- [Saddle](https://github.com/jikkuatwork/saddle) +- [TagSpaces](https://www.tagspaces.org) (A platform for file-based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions) +- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama) +- [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui) +- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file) +- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui) +- [Ollamac](https://github.com/kevinhermawan/Ollamac) +- [big-AGI](https://github.com/enricoros/big-AGI) +- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core) +- [Amica](https://github.com/semperai/amica) +- [chatd](https://github.com/BruceMacD/chatd) +- [Ollama-SwiftUI](https://github.com/kghandour/Ollama-SwiftUI) +- [Dify.AI](https://github.com/langgenius/dify) +- [MindMac](https://mindmac.app) +- [NextJS Web Interface for Ollama](https://github.com/jakobhoeg/nextjs-ollama-llm-ui) +- [Msty](https://msty.app) +- [Chatbox](https://github.com/Bin-Huang/Chatbox) +- [WinForm Ollama Copilot](https://github.com/tgraupmann/WinForm_Ollama_Copilot) +- [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) with [Get Started Doc](https://docs.nextchat.dev/models/ollama) +- [Alpaca WebUI](https://github.com/mmo80/alpaca-webui) +- [OllamaGUI](https://github.com/enoch1118/ollamaGUI) +- [OpenAOE](https://github.com/InternLM/OpenAOE) +- [Odin Runes](https://github.com/leonid20000/OdinRunes) +- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App) +- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm) +- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat) +- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats) +- [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS) +- [Jirapt](https://github.com/AliAhmedNada/jirapt) (Jira Integration to generate issues, tasks, epics) +- [ojira](https://github.com/AliAhmedNada/ojira) (Jira chrome plugin to easily generate descriptions for tasks) +- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories) +- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases) +- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG) +- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding) +- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold) +- [chat](https://github.com/swuecho/chat) (chat web app for teams) +- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama) +- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG) +- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation) +- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends) +- [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama) +- [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models) +- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) +- [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support, and multiple large language models.) +- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) +- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama) +- [Shinkai Desktop](https://github.com/dcSpark/shinkai-apps) (Two click install Local AI using Ollama + Files + RAG) +- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in Discord) +- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama) +- [R2R](https://github.com/SciPhi-AI/R2R) (Open-source RAG engine) +- [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy-to-use GUI with sample custom LLM for Drivers Education) +- [OpenGPA](https://opengpa.org) (Open-source offline-first Enterprise Agentic Application) +- [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations) +- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS) +- [AI Studio](https://github.com/MindWorkAI/AI-Studio) +- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client) +- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows) +- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac) +- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend) +- [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows, and Mac) +- [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for Linux and macOS made with GTK4 and Adwaita) +- [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) (AutoGPT Ollama integration) +- [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang) +- [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery) +- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot, and Ollama4j +- [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models. +- [Cline](https://github.com/cline/cline) - Formerly known as Claude Dev is a VSCode extension for multi-file/whole-repo coding +- [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support) +- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption) +- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library) +- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama) +- [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama) +- [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface) +- [Local Multimodal AI Chat](https://github.com/Leon-Sander/Local-Multimodal-AI-Chat) (Ollama-based LLM Chat with support for multiple features, including PDF RAG, voice chat, image-based interactions, and integration with OpenAI.) +- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux) +- [OrionChat](https://github.com/EliasPereirah/OrionChat) - OrionChat is a web interface for chatting with different AI providers +- [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.) +- [Web management](https://github.com/lemonit-eric-mao/ollama-web-management) (Web management page) +- [Promptery](https://github.com/promptery/promptery) (desktop client for Ollama.) +- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama) +- [chat-ollama](https://github.com/annilq/chat-ollama) (a React Native client for Ollama) +- [SpaceLlama](https://github.com/tcsenpai/spacellama) (Firefox and Chrome extension to quickly summarize web pages with ollama in a sidebar) +- [YouLama](https://github.com/tcsenpai/youlama) (Webapp to quickly summarize any YouTube video, supporting Invidious as well) +- [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface) +- [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol) +- [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app) +- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard, and said in the meetings) +- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder) +- [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation) +- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI) +- [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama) +- [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama) +- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application available for Mac/Windows/Linux) +- [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support) +- [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow) +- [aidful-ollama-model-delete](https://github.com/AidfulAI/aidful-ollama-model-delete) (User interface for simplified model cleanup) +- [Perplexica](https://github.com/ItzCrazyKns/Perplexica) (An AI-powered search engine & an open-source alternative to Perplexity AI) +- [Ollama Chat WebUI for Docker ](https://github.com/oslook/ollama-webui) (Support for local docker deployment, lightweight ollama webui) +- [AI Toolkit for Visual Studio Code](https://aka.ms/ai-tooklit/ollama-docs) (Microsoft-official VSCode extension to chat, test, evaluate models with Ollama support, and use them in your AI applications.) +- [MinimalNextOllamaChat](https://github.com/anilkay/MinimalNextOllamaChat) (Minimal Web UI for Chat and Model Control) +- [Chipper](https://github.com/TilmanGriesel/chipper) AI interface for tinkerers (Ollama, Haystack RAG, Python) +- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints) +- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI) +- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models) +- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivalent endpoint with Ollama support for running locally) +- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot) +- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot) +- [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models) +- [LangBot](https://github.com/RockChinQ/LangBot) (LLM-based instant messaging bots platform, with Agents, RAG features, supports multiple platforms) +- [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool) +- [AstrBot](https://github.com/Soulter/AstrBot/) (User-friendly LLM-based multi-platform chatbot with a WebUI, supporting RAG, LLM agents, and plugins integration) +- [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.) +- [Flufy](https://github.com/Aharon-Bensadoun/Flufy) (A beautiful chat interface for interacting with Ollama's API. Built with React, TypeScript, and Material-UI.) +- [Ellama](https://github.com/zeozeozeo/ellama) (Friendly native app to chat with an Ollama instance) +- [screenpipe](https://github.com/mediar-ai/screenpipe) Build agents powered by your screen history +- [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).) +- [Writeopia](https://github.com/Writeopia/Writeopia) (Text editor with integration with Ollama) +- [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable) +- [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers) +- [Tiny Notepad](https://pypi.org/project/tiny-notepad) (A lightweight, notepad-like interface to chat with ollama available on PyPI) +- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.) +- [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.) +- [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.) +- [ai-hub](https://github.com/Aj-Seven/ai-hub) (AI Hub supports multiple models via API keys and Chat support via Ollama API.) + +### Cloud + +- [Google Cloud](https://cloud.google.com/run/docs/tutorials/gpu-gemma2-with-ollama) +- [Fly.io](https://fly.io/docs/python/do-more/add-ollama/) +- [Koyeb](https://www.koyeb.com/deploy/ollama) + +### Terminal + +- [oterm](https://github.com/ggozad/oterm) +- [Ellama Emacs client](https://github.com/s-kostyaev/ellama) +- [Emacs client](https://github.com/zweifisch/ollama) +- [neollama](https://github.com/paradoxical-dev/neollama) UI client for interacting with models from within Neovim +- [gen.nvim](https://github.com/David-Kunz/gen.nvim) +- [ollama.nvim](https://github.com/nomnivore/ollama.nvim) +- [ollero.nvim](https://github.com/marco-souza/ollero.nvim) +- [ollama-chat.nvim](https://github.com/gerazov/ollama-chat.nvim) +- [ogpt.nvim](https://github.com/huynle/ogpt.nvim) +- [gptel Emacs client](https://github.com/karthink/gptel) +- [Oatmeal](https://github.com/dustinblackman/oatmeal) +- [cmdh](https://github.com/pgibler/cmdh) +- [ooo](https://github.com/npahlfer/ooo) +- [shell-pilot](https://github.com/reid41/shell-pilot)(Interact with models via pure shell scripts on Linux or macOS) +- [tenere](https://github.com/pythops/tenere) +- [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/). +- [typechat-cli](https://github.com/anaisbetts/typechat-cli) +- [ShellOracle](https://github.com/djcopley/ShellOracle) +- [tlm](https://github.com/yusufcanb/tlm) +- [podman-ollama](https://github.com/ericcurtin/podman-ollama) +- [gollama](https://github.com/sammcj/gollama) +- [ParLlama](https://github.com/paulrobello/parllama) +- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/) +- [Ollama Mixture of Experts (MOE) in 50 lines of code](https://github.com/rapidarchitect/ollama_moe) +- [vim-intelligence-bridge](https://github.com/pepo-ec/vim-intelligence-bridge) Simple interaction of "Ollama" with the Vim editor +- [x-cmd ollama](https://x-cmd.com/mod/ollama) +- [bb7](https://github.com/drunkwcodes/bb7) +- [SwollamaCLI](https://github.com/marcusziade/Swollama) bundled with the Swollama Swift package. [Demo](https://github.com/marcusziade/Swollama?tab=readme-ov-file#cli-usage) +- [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more. +- [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama +- [DeepShell](https://github.com/Abyss-c0re/deepshell) Your self-hosted AI assistant. Interactive Shell, Files and Folders analysis. +- [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama. +- [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal. +- [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform) +- [AWS-Strands-With-Ollama](https://github.com/rapidarchitect/ollama_strands) - AWS Strands Agents with Ollama Examples +- [ollama-multirun](https://github.com/attogram/ollama-multirun) - A bash shell script to run a single prompt against any or all of your locally installed ollama models, saving the output and performance statistics as easily navigable web pages. ([Demo](https://attogram.github.io/ai_test_zone/)) +- [ollama-bash-toolshed](https://github.com/attogram/ollama-bash-toolshed) - Bash scripts to chat with tool using models. Add new tools to your shed with ease. Runs on Ollama. + +### Apple Vision Pro + +- [SwiftChat](https://github.com/aws-samples/swift-chat) (Cross-platform AI chat app supporting Apple Vision Pro via "Designed for iPad") +- [Enchanted](https://github.com/AugustDev/enchanted) + +### Database + +- [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector) + - [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md) +- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps) +- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama) +- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases) + +### Package managers + +- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/) +- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama) +- [Homebrew](https://formulae.brew.sh/formula/ollama) +- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama) +- [Guix channel](https://codeberg.org/tusharhero/ollama-guix) +- [Nix package](https://search.nixos.org/packages?show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama) +- [Flox](https://flox.dev/blog/ollama-part-one) + +### Libraries + +- [LangChain](https://python.langchain.com/docs/integrations/chat/ollama/) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/) +- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama) +- [crewAI](https://github.com/crewAIInc/crewAI) +- [Yacana](https://remembersoftwares.github.io/yacana/) (User-friendly multi-agent framework for brainstorming and executing predetermined flows with built-in tool integration) +- [Spring AI](https://github.com/spring-projects/spring-ai) with [reference](https://docs.spring.io/spring-ai/reference/api/chat/ollama-chat.html) and [example](https://github.com/tzolov/ollama-tools) +- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example) +- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java) +- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs) +- [LangChain for .NET](https://github.com/tryAGI/LangChain) with [example](https://github.com/tryAGI/LangChain/blob/main/examples/LangChain.Samples.OpenAI/Program.cs) +- [LLPhant](https://github.com/theodo-group/LLPhant?tab=readme-ov-file#ollama) +- [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama) +- [LiteLLM](https://github.com/BerriAI/litellm) +- [OllamaFarm for Go](https://github.com/presbrey/ollamafarm) +- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp) +- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai) +- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs) +- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp) +- [Ollama4j for Java](https://github.com/ollama4j/ollama4j) +- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama) +- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit) +- [Ollama for Dart](https://github.com/breitburg/dart-ollama) +- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel) +- [LangChainDart](https://github.com/davidmigloz/langchain_dart) +- [Semantic Kernel - Python](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama) +- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md) +- [Elixir LangChain](https://github.com/brainlid/langchain) +- [Ollama for R - rollama](https://github.com/JBGruber/rollama) +- [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r) +- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex) +- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama) +- [Testcontainers](https://testcontainers.com/modules/ollama/) +- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama) +- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama) +- [LlamaScript](https://github.com/Project-Llama/llamascript) +- [llm-axe](https://github.com/emirsahin1/llm-axe) (Python Toolkit for Building LLM Powered Apps) +- [Gollm](https://docs.gollm.co/examples/ollama-example) +- [Gollama for Golang](https://github.com/jonathanhecl/gollama) +- [Ollamaclient for Golang](https://github.com/xyproto/ollamaclient) +- [High-level function abstraction in Go](https://gitlab.com/tozd/go/fun) +- [Ollama PHP](https://github.com/ArdaGnsrn/ollama-php) +- [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) with [example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama) +- [Parakeet](https://github.com/parakeet-nest/parakeet) is a GoLang library, made to simplify the development of small generative AI applications with Ollama. +- [Haverscript](https://github.com/andygill/haverscript) with [examples](https://github.com/andygill/haverscript/tree/main/examples) +- [Ollama for Swift](https://github.com/mattt/ollama-swift) +- [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/) +- [GoLamify](https://github.com/prasad89/golamify) +- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell) +- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in a unified API) +- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs) +- [Ollama for Zig](https://github.com/dravenk/ollama-zig) +- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider) +- [Nichey](https://github.com/goodreasonai/nichey) is a Python package for generating custom wikis for your research topic +- [Ollama for D](https://github.com/kassane/ollama-d) +- [OllamaPlusPlus](https://github.com/HardCodeDev777/OllamaPlusPlus) (Very simple C++ library for Ollama) + +### Mobile + +- [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS, and iPad) +- [Enchanted](https://github.com/AugustDev/enchanted) +- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid) +- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama) +- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption) +- [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) (No need for Termux, start the Ollama service with one click on an Android device) +- [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.) + +### Extensions & Plugins + +- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama) +- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel) +- [Continue](https://github.com/continuedev/continue) +- [Vibe](https://github.com/thewh1teagle/vibe) (Transcribe and analyze meetings with Ollama) +- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama) +- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq) +- [NotesOllama](https://github.com/andersrex/notesollama) (Apple Notes Ollama plugin) +- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot) +- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) +- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram) +- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation) +- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama) +- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot) +- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support) +- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot) +- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt) +- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama) +- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama) +- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use Ollama as a copilot like GitHub Copilot) +- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) +- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face) +- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) +- [Plasmoid Ollama Control](https://github.com/imoize/plasmoid-ollamacontrol) (KDE Plasma extension that allows you to quickly manage/control Ollama model) +- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) +- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) +- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) +- [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467) +- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. +- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depend on ollama server) +- [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front-end Open WebUI service.) +- [node-red-contrib-ollama](https://github.com/jakubburkiewicz/node-red-contrib-ollama) +- [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.) +- [vnc-lm](https://github.com/jake83741/vnc-lm) (Discord bot for messaging with LLMs through Ollama and LiteLLM. Seamlessly move between local and flagship models.) +- [LSP-AI](https://github.com/SilasMarvin/lsp-ai) (Open-source language server for AI-powered functionality) +- [QodeAssist](https://github.com/Palm1r/QodeAssist) (AI-powered coding assistant plugin for Qt Creator) +- [Obsidian Quiz Generator plugin](https://github.com/ECuiDev/obsidian-quiz-generator) +- [AI Summmary Helper plugin](https://github.com/philffm/ai-summary-helper) +- [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama) +- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow) +- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language +- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai) +- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c) +- [mcp-llm](https://github.com/sammcj/mcp-llm) (MCP Server to allow LLMs to call other LLMs) +- [SimpleOllamaUnity](https://github.com/HardCodeDev777/SimpleOllamaUnity) (Unity Engine extension for communicating with Ollama in a few lines of code. Also works at runtime) +- [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama) +- [NativeMind](https://github.com/NativeMindBrowser/NativeMindExtension) (Private, on-device AI Assistant, no cloud dependencies) + +### Supported backends + +- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. + +### Observability +- [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native intergration to Ollama. +- [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing. +- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics. +- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production. +- [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications. +- [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications. diff --git a/Dockerfile-cpu b/Dockerfile-cpu new file mode 100644 index 00000000000..d28aabb99be --- /dev/null +++ b/Dockerfile-cpu @@ -0,0 +1,78 @@ +# vim: filetype=dockerfile + +ARG FLAVOR=${TARGETARCH} + +ARG ROCMVERSION=6.3.3 +ARG JETPACK5VERSION=r35.4.1 +ARG JETPACK6VERSION=r36.4.0 +ARG CMAKEVERSION=3.31.2 + +# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version +FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 +RUN yum install -y yum-utils \ + && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \ + && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \ + && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH + + + + +FROM --platform=linux/arm64 almalinux:8 AS base-arm64 +# install epel-release for ccache +RUN yum install -y yum-utils epel-release \ + && dnf install -y clang ccache \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo +ENV CC=clang CXX=clang++ + +FROM base-${TARGETARCH} AS base +ARG CMAKEVERSION +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +ENV LDFLAGS=-s + +FROM base AS cpu +RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ +ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH +RUN --mount=type=cache,target=/root/.ccache \ + cmake --preset 'CPU' \ + && cmake --build --parallel --preset 'CPU' \ + && cmake --install build --component CPU --strip --parallel 2 + + +FROM base AS build +ARG GOVERSION=1.24.4 +RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH +WORKDIR /go/src/github.com/ollama/ollama +COPY . . +ARG GOFLAGS="'-ldflags=-w -s'" +ENV CGO_ENABLED=1 +RUN --mount=type=cache,target=/root/.cache/go-build \ + go build -trimpath -buildmode=pie -o /bin/ollama . + +FROM --platform=linux/amd64 scratch AS amd64 + +FROM --platform=linux/arm64 scratch AS arm64 + +FROM ${FLAVOR} AS archive +COPY --from=cpu dist/lib/ollama /lib/ollama +COPY --from=build /bin/ollama /bin/ollama + +FROM ubuntu:24.04 +RUN apt-get update \ + && apt-get install -y ca-certificates curl openssl \ + && apt-get install --only-upgrade -y libpam0g libpam-modules libpam-modules-bin libpam-runtime \ + libsystemd0 libudev1 \ + && update-ca-certificates \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +COPY --from=archive /bin /usr/bin +ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +COPY --from=archive /lib/ollama /usr/lib/ollama +ENV OLLAMA_HOST=0.0.0.0:11434 +EXPOSE 11434 +ENTRYPOINT ["/bin/ollama"] +CMD ["serve"] \ No newline at end of file diff --git a/build_cpu_artifactory.sh b/build_cpu_artifactory.sh new file mode 100755 index 00000000000..a548471acbe --- /dev/null +++ b/build_cpu_artifactory.sh @@ -0,0 +1,56 @@ +#!/bin/bash +set -eu + +# Artifactory configuration +REGISTRY=${REGISTRY:-""} +IMAGE_NAME=${IMAGE_NAME:-"ollama-cpu"} +VERSION=${VERSION:-"latest"} + +# Optional Docker repository path within the registry +# If using this path, ensure to add /${DOCKER_REPO} to the image name +# example: FULL_IMAGE_NAME="${REGISTRY}/${DOCKER_REPO}/${IMAGE_NAME}:${VERSION}" +#DOCKER_REPO=${DOCKER_REPO:-""} + +# Artifactory credentials (can be set via environment variables) +# Use API Key authentication for Artifactory +ARTIFACTORY_USERNAME=${ARTIFACTORY_USERNAME:-""} +ARTIFACTORY_API_KEY=${ARTIFACTORY_API_KEY:-""} + +# Target platforms +PLATFORMS=${PLATFORMS:-"linux/amd64,linux/arm64"} + +# Silent login if credentials are provided +if [ -n "$ARTIFACTORY_USERNAME" ] && [ -n "$ARTIFACTORY_API_KEY" ]; then + echo "Logging in to Artifactory at $REGISTRY as $ARTIFACTORY_USERNAME..." + echo "$ARTIFACTORY_API_KEY" | docker login -u "$ARTIFACTORY_USERNAME" --password-stdin "$REGISTRY" >/dev/null 2>&1 + echo "Login successful" +else + echo "Artifactory credentials not provided, assuming you're already logged in" +fi + +# Set up buildx if needed +BUILDER_NAME="multiarch-builder" +if ! docker buildx inspect ${BUILDER_NAME} &>/dev/null; then + echo "Creating new buildx builder: ${BUILDER_NAME}" + docker buildx create --name ${BUILDER_NAME} --driver docker-container --use +else + echo "Using existing buildx builder: ${BUILDER_NAME}" + docker buildx use ${BUILDER_NAME} +fi +docker buildx inspect --bootstrap + +# Build and push the multi-arch image +FULL_IMAGE_NAME="${REGISTRY}/${IMAGE_NAME}:${VERSION}" +echo "Building and pushing ${FULL_IMAGE_NAME} for platforms: ${PLATFORMS}" + +docker buildx build \ + --push \ + --platform ${PLATFORMS} \ + --output=type=image,push=true,registry.insecure=true \ + --tag ${FULL_IMAGE_NAME} \ + -f Dockerfile-cpu \ + . \ + --no-cache + +echo "Build and push completed successfully!" +echo "Image pushed to: ${FULL_IMAGE_NAME}" diff --git a/build_cpu_dockerhub.sh b/build_cpu_dockerhub.sh new file mode 100755 index 00000000000..9f6f8d171df --- /dev/null +++ b/build_cpu_dockerhub.sh @@ -0,0 +1,76 @@ +#!/bin/bash +set -eu + +# Set your organization and image name +ORG=${ORG:-""} +IMAGE_NAME=${IMAGE_NAME:-"ollama-cpu"} +VERSION=${VERSION:-"latest"} + +# Docker Hub credentials (can be set via environment variables) +DOCKER_USERNAME=${DOCKER_USERNAME:-""} +DOCKER_PASSWORD=${DOCKER_PASSWORD:-""} + +# Target platforms - same as Ollama's defaults +PLATFORMS=${PLATFORMS:-"linux/arm64,linux/amd64"} + +# Silent login if credentials are provided +if [ -n "$DOCKER_USERNAME" ] && [ -n "$DOCKER_PASSWORD" ]; then + echo "Logging in to Docker Hub as $DOCKER_USERNAME..." + echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin >/dev/null 2>&1 + echo "Login successful" + + # If login successful, use the provided username as the org + if [ "$ORG" = "yourorg" ]; then + ORG=$DOCKER_USERNAME + echo "Using Docker username '$ORG' as organization" + fi +else + echo "Docker credentials not provided, assuming you're already logged in" +fi + +# Ensure QEMU is installed for cross-platform builds +echo "Setting up QEMU for cross-platform builds..." +docker run --privileged --rm tonistiigi/binfmt --install all + +# Set up buildx if needed +BUILDER_NAME="multiarch-builder" +if ! docker buildx inspect ${BUILDER_NAME} &>/dev/null; then + echo "Creating new buildx builder: ${BUILDER_NAME}" + docker buildx create --name ${BUILDER_NAME} --driver docker-container --use +else + docker buildx use ${BUILDER_NAME} +fi +docker buildx inspect --bootstrap + +# Set PUSH to a non-empty string to trigger push instead of load +PUSH=${PUSH:-""} +if [ -z "${PUSH}" ] ; then + echo "Building ${ORG}/${IMAGE_NAME}:${VERSION} locally. Set PUSH=1 to push" + # Note: --load only works for single platform, so if building locally, adjust PLATFORMS + if [[ "${PLATFORMS}" == *","* ]]; then + echo "WARNING: --load only works for single platform. Setting platform to linux/$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')" + PLATFORMS="linux/$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')" + fi + LOAD_OR_PUSH="--load" +else + echo "Will be pushing ${ORG}/${IMAGE_NAME}:${VERSION}" + LOAD_OR_PUSH="--push" +fi + +# Build and push/load the multi-arch image +echo "Building for platforms: ${PLATFORMS}" +docker buildx build \ + --provenance=true \ + --sbom=true \ + --network=host \ + ${LOAD_OR_PUSH} \ + --platform=${PLATFORMS} \ + -f Dockerfile-cpu \ + -t ${ORG}/${IMAGE_NAME}:${VERSION} \ + . + +echo "Build completed successfully!" +if [ -n "${PUSH}" ]; then + echo "Image pushed to: ${ORG}/${IMAGE_NAME}:${VERSION}" + echo "To pull: docker pull ${ORG}/${IMAGE_NAME}:${VERSION}" +fi \ No newline at end of file diff --git a/cmd/cmd.go b/cmd/cmd.go index 2d165379069..67d2759e523 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -1571,6 +1571,7 @@ func NewCLI() *cobra.Command { envVars["OLLAMA_LLM_LIBRARY"], envVars["OLLAMA_GPU_OVERHEAD"], envVars["OLLAMA_LOAD_TIMEOUT"], + envVars["OLLAMA_SKIP_MEMORY_CHECK"], }) default: appendEnvDocs(cmd, envs) diff --git a/docs/faq.md b/docs/faq.md index 6fe6334146f..13b6f424c54 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -333,3 +333,23 @@ The currently available K/V cache quantization types are: How much the cache quantization impacts the model's response quality will depend on the model and the task. Models that have a high GQA count (e.g. Qwen2) may see a larger impact on precision from quantization than models with a low GQA count. You may need to experiment with different quantization types to find the best balance between memory usage and quality. + + + +## How do I bypass available memory check before loading a model? + +By default, Ollama checks if your system has sufficient available memory before loading a model to prevent out-of-memory errors that could crash your system or cause instability. +You can bypass this safety check by setting the OLLAMA_SKIP_MEMORY_CHECK environment variable to 1. + +### When to use this option + +- You have swap space configured and accept slower performance +- You're running on a system with non-standard memory reporting +- You're debugging memory-related issues +- You understand the risks and have adequate system monitoring + +### Important Warnings + +- System instability: Loading models without sufficient memory can cause system freezes or crashes +- Performance degradation: Your system may become unresponsive due to excessive swapping +- Data loss risk: System crashes could result in unsaved work being lost \ No newline at end of file diff --git a/envconfig/config.go b/envconfig/config.go index 763f0464668..3b2ebd8089b 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -226,6 +226,12 @@ var ( MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512) ) +var ( + // Bypass the memory check during model load. This is an expert only setting, to be used under situations where the system is guaranteedAdd commentMore actions + // to get the have enough memory or is able to procure this at runtime by evicting blocks from caches. e.g ZFS Arc Cache. + AvailableMemoryCheckOverride = Uint("OLLAMA_SKIP_MEMORY_CHECK", 0) +) + func Uint64(key string, defaultValue uint64) func() uint64 { return func() uint64 { if s := Var(key); s != "" { @@ -275,6 +281,9 @@ func AsMap() map[string]EnvVar { "HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"}, "HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"}, "NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"}, + + //Overrides + "OLLAMA_SKIP_MEMORY_CHECK": {"OLLAMA_SKIP_MEMORY_CHECK", AvailableMemoryCheckOverride(), "Bypass checking for available memory before loading models. (e.g. OLLAMA_SKIP_MEMORY_CHECK=1)"}, } if runtime.GOOS != "windows" { diff --git a/llm/memory.go b/llm/memory.go index b5300004642..a4e07fa8326 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -1,6 +1,7 @@ package llm import ( + "bufio" "fmt" "log/slog" "os" @@ -438,3 +439,55 @@ func projectorMemoryRequirements(filename string) (weights uint64) { return weights } + +// GetZFSReclaimable returns max(0, size – c_min) from ZFS ARC stats. +// Added to fix the arc memory cache issue on zfs +// This will be a no-op is no zfs is involved. +func GetZFSReclaimableMemory() (uint64, error) { + paths := []string{"/proc/spl/kstat/zfs/arcstats", "/proc/zfs/arcstats"} + var f *os.File + for _, path := range paths { + if file, err := os.Open(path); err == nil { + f = file + break + } + } + if f == nil { + return 0, fmt.Errorf("no ZFS ARC stats found") + } + defer f.Close() + + var size, cmin uint64 + scanner := bufio.NewScanner(f) + for scanner.Scan() { + cols := strings.Fields(scanner.Text()) + if len(cols) < 3 { + continue + } + var err error + var val uint64 + + val, err = strconv.ParseUint(cols[2], 10, 64) + if err != nil { + continue + } + switch cols[0] { + case "size": + size = val + case "c_min": + cmin = val + default: + continue + } + } + if err := scanner.Err(); err != nil { + return 0, err + } + if size <= 0 || cmin <= 0 { + return 0, fmt.Errorf("failed to read ZFS ARC stats") + } + if size > cmin { + return size - cmin, nil + } + return 0, nil +} diff --git a/llm/server.go b/llm/server.go index 7d921f14437..a47cc17c088 100644 --- a/llm/server.go +++ b/llm/server.go @@ -161,14 +161,30 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a } } - // On linux and windows, over-allocating CPU memory will almost always result in an error - // Darwin has fully dynamic swap so has no direct concept of free swap space - if runtime.GOOS != "darwin" { - systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize - available := systemFreeMemory + systemSwapFreeMemory - if systemMemoryRequired > available { - slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory)) - return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available)) + // Env variable to bypass ollama's memory check guardrail. + if envconfig.AvailableMemoryCheckOverride() == 1 { + slog.Warn("OLLAMA_SKIP_MEMORY_CHECK set; bypassing memory checks") + } else { + // On linux and windows, over-allocating CPU memory will almost always result in an error + // Darwin has fully dynamic swap so has no direct concept of free swap space + slog.Debug("OLLAMA_SKIP_MEMORY_CHECK not set; running memory checks") + if runtime.GOOS != "darwin" { + systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize + available := systemFreeMemory + systemSwapFreeMemory + + // On Linux, reclaim ZFS ARC (size – c_min) + if runtime.GOOS == "linux" { + if reclaim, err := GetZFSReclaimableMemory(); err == nil { + slog.Info("reclaiming ZFS Arc cache size:", "size", format.HumanBytes2(reclaim)) + available += reclaim + } else { + slog.Warn("failure while computing ZFS Arc cache size:", "error", err) + } + } + if systemMemoryRequired > available { + slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory)) + return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available)) + } } } diff --git a/macapp/package-lock.json b/macapp/package-lock.json index bacc2a37e52..c5cdf18d9da 100644 --- a/macapp/package-lock.json +++ b/macapp/package-lock.json @@ -46,7 +46,7 @@ "chmodr": "^1.2.0", "copy-webpack-plugin": "^11.0.0", "css-loader": "^6.8.1", - "electron": "25.9.2", + "electron": "38.0.0", "eslint": "^8.43.0", "eslint-plugin-import": "^2.27.5", "fork-ts-checker-webpack-plugin": "^7.3.0", @@ -7672,13 +7672,14 @@ "dev": true }, "node_modules/electron": { - "version": "25.9.2", - "resolved": "https://registry.npmjs.org/electron/-/electron-25.9.2.tgz", - "integrity": "sha512-hVBN5rsrL99BKNHvzMeYy2PkAmewuIobu4U3o3EzVz4MDoLmMfW4yTH5GZ4RbJrpokoEky5IzGtRR/ggPzL6Fw==", + "version": "38.0.0", + "resolved": "https://registry.npmjs.org/electron/-/electron-38.0.0.tgz", + "integrity": "sha512-egljptiPJqbL/oamFCEY+g3RNeONWTVxZSGeyLqzK8xq106JhzuxnhJZ3sxt4DzJFaofbGyGJA37Oe9d+gVzYw==", "hasInstallScript": true, + "license": "MIT", "dependencies": { "@electron/get": "^2.0.0", - "@types/node": "^18.11.18", + "@types/node": "^22.7.7", "extract-zip": "^2.0.1" }, "bin": { @@ -8121,9 +8122,13 @@ } }, "node_modules/electron/node_modules/@types/node": { - "version": "18.16.18", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.18.tgz", - "integrity": "sha512-/aNaQZD0+iSBAGnvvN2Cx92HqE5sZCPZtx2TsK+4nvV23fFe09jVDvpArXr2j9DnYlzuU9WuoykDDc6wqvpNcw==" + "version": "22.18.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.18.0.tgz", + "integrity": "sha512-m5ObIqwsUp6BZzyiy4RdZpzWGub9bqLJMvZDD0QMXhxjqMHMENlj+SqF5QxoUwaQNFe+8kz8XM8ZQhqkQPTgMQ==", + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } }, "node_modules/emoji-regex": { "version": "8.0.0", @@ -15462,6 +15467,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "license": "MIT" + }, "node_modules/unicode-canonical-property-names-ecmascript": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-2.0.0.tgz", diff --git a/macapp/package.json b/macapp/package.json index 088ec0b1a86..d1f714d272d 100644 --- a/macapp/package.json +++ b/macapp/package.json @@ -44,7 +44,7 @@ "chmodr": "^1.2.0", "copy-webpack-plugin": "^11.0.0", "css-loader": "^6.8.1", - "electron": "25.9.2", + "electron": "38.0.0", "eslint": "^8.43.0", "eslint-plugin-import": "^2.27.5", "fork-ts-checker-webpack-plugin": "^7.3.0", diff --git a/server/routes.go b/server/routes.go index cb46cef11d4..7e48bdfeef0 100644 --- a/server/routes.go +++ b/server/routes.go @@ -48,9 +48,16 @@ func experimentEnabled(name string) bool { return slices.Contains(strings.Split(os.Getenv("OLLAMA_EXPERIMENT"), ","), name) } +func getEnvOrDefault(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue +} + var useClient2 = experimentEnabled("client2") -var mode string = gin.DebugMode +var mode string = getEnvOrDefault("GIN_MODE", gin.DebugMode) type Server struct { addr net.Addr