Sanitized mirror from private repository - 2026-04-18 11:19:59 UTC

2026-04-18 11:19:59 +00:00
commit fb00a325d1
1418 changed files with 359990 additions and 0 deletions
--- a/hosts/vms/seattle/ollama.yaml
+++ b/hosts/vms/seattle/ollama.yaml
@@ -0,0 +1,36 @@
+# Ollama - Local LLM inference server
+# OpenAI-compatible API for running local language models
+# Port: 11434 (Ollama API), 8000 (OpenAI-compatible proxy)
+#
+# Ollama is much better suited for CPU inference than vLLM.
+# It provides efficient CPU-based inference with automatic optimization.
+
+services:
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama-seattle
+    ports:
+      - "11434:11434"
+    environment:
+      - OLLAMA_HOST=0.0.0.0:11434
+      - OLLAMA_KEEP_ALIVE=24h
+      # CPU-specific optimizations
+      - OLLAMA_NUM_PARALLEL=2
+      - OLLAMA_MAX_LOADED_MODELS=2
+    volumes:
+      # Persist model downloads
+      - ollama-data:/root/.ollama
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '12'
+          memory: 32G
+        reservations:
+          cpus: '4'
+          memory: 8G
+
+
+volumes:
+  ollama-data:
+    name: ollama-seattle-data