Files
homelab-optimized/scripts/lib/ollama.py
Gitea Mirror Bot d3fa5d354a
Some checks failed
Documentation / Build Docusaurus (push) Failing after 17m1s
Documentation / Deploy to GitHub Pages (push) Has been skipped
Sanitized mirror from private repository - 2026-04-06 21:14:57 UTC
2026-04-06 21:14:57 +00:00

78 lines
2.3 KiB
Python

"""Ollama LLM client with retry and response cleaning."""
import json
import logging
import re
import time
import urllib.request
import urllib.error
log = logging.getLogger(__name__)
DEFAULT_URL = "http://192.168.0.145:31434"
DEFAULT_MODEL = "qwen3-coder:latest"
class OllamaUnavailableError(Exception):
pass
def ollama_available(url: str = DEFAULT_URL) -> bool:
"""Quick health check — GET /api/tags."""
try:
req = urllib.request.Request(f"{url.rstrip('/')}/api/tags")
with urllib.request.urlopen(req, timeout=5):
return True
except Exception:
return False
_last_call_time = 0.0
MIN_CALL_INTERVAL = 2.0 # seconds between calls to avoid overwhelming Ollama
def ollama_generate(
prompt: str,
model: str = DEFAULT_MODEL,
url: str = DEFAULT_URL,
max_retries: int = 3,
timeout: int = 120,
temperature: float = 0.3,
num_predict: int = 2000,
) -> str:
"""Generate text from Ollama with retry + backoff. Returns cleaned response."""
global _last_call_time
elapsed = time.time() - _last_call_time
if elapsed < MIN_CALL_INTERVAL:
time.sleep(MIN_CALL_INTERVAL - elapsed)
_last_call_time = time.time()
data = json.dumps({
"model": model,
"prompt": prompt,
"stream": False,
"options": {"temperature": temperature, "num_predict": num_predict},
}).encode()
req = urllib.request.Request(
f"{url.rstrip('/')}/api/generate",
data=data,
headers={"Content-Type": "application/json"},
)
last_error = None
for attempt in range(max_retries):
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
result = json.loads(resp.read())
raw = result.get("response", "").strip()
return re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
except (urllib.error.URLError, TimeoutError, OSError) as e:
last_error = e
if attempt < max_retries - 1:
wait = 2 ** attempt
log.warning("Ollama attempt %d/%d failed: %s — retrying in %ds",
attempt + 1, max_retries, e, wait)
time.sleep(wait)
raise OllamaUnavailableError(f"Ollama unavailable after {max_retries} attempts: {last_error}")