Sanitized mirror from private repository - 2026-04-18 11:19:59 UTC
This commit is contained in:
36
hosts/vms/seattle/ollama.yaml
Normal file
36
hosts/vms/seattle/ollama.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
# Ollama - Local LLM inference server
|
||||
# OpenAI-compatible API for running local language models
|
||||
# Port: 11434 (Ollama API), 8000 (OpenAI-compatible proxy)
|
||||
#
|
||||
# Ollama is much better suited for CPU inference than vLLM.
|
||||
# It provides efficient CPU-based inference with automatic optimization.
|
||||
|
||||
services:
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
container_name: ollama-seattle
|
||||
ports:
|
||||
- "11434:11434"
|
||||
environment:
|
||||
- OLLAMA_HOST=0.0.0.0:11434
|
||||
- OLLAMA_KEEP_ALIVE=24h
|
||||
# CPU-specific optimizations
|
||||
- OLLAMA_NUM_PARALLEL=2
|
||||
- OLLAMA_MAX_LOADED_MODELS=2
|
||||
volumes:
|
||||
# Persist model downloads
|
||||
- ollama-data:/root/.ollama
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '12'
|
||||
memory: 32G
|
||||
reservations:
|
||||
cpus: '4'
|
||||
memory: 8G
|
||||
|
||||
|
||||
volumes:
|
||||
ollama-data:
|
||||
name: ollama-seattle-data
|
||||
Reference in New Issue
Block a user