Files
nimue/nimue/ollama_client.py
arch_agent 27dcaf6552 Initial commit: Nimue AI Companion v1.0
- Langzeit- und Kurzzeitgedächtnis mit SQLite
- Ollama-Integration für lokale LLMs
- Flask-Webinterface mit Stream-Response
- Persona-System mit konfigurierbarem Charakter
- Auto-Zusammenfassung bei Token-Limit
- Rate Limiting und Sicherheitsfeatures
- Uncensored Modell-Support
2026-04-14 07:44:36 +02:00

151 lines
5.2 KiB
Python

import requests
import json
import time
from typing import Generator, List, Dict, Optional
import logging
logger = logging.getLogger('nimue')
class OllamaClient:
def __init__(self, config: dict):
self.host = config['host']
self.model = config['model']
self.timeout = config['timeout']
self.session = requests.Session()
def _prepare_messages(self, system_prompt: str, context: List[Dict], user_message: str) -> List[Dict]:
"""Prepare message list for Ollama API"""
messages = []
# System prompt first
if system_prompt:
messages.append({
"role": "system",
"content": system_prompt
})
# Add context (memory)
for msg in context:
messages.append({
"role": msg['role'],
"content": msg['content']
})
# User message last
messages.append({
"role": "user",
"content": user_message
})
return messages
def generate(self,
system_prompt: str,
context: List[Dict],
user_message: str,
options: Optional[Dict] = None) -> Generator[str, None, None]:
"""
Stream response from Ollama API
Yields tokens/chunks as they arrive
"""
messages = self._prepare_messages(system_prompt, context, user_message)
payload = {
"model": self.model,
"messages": messages,
"stream": True,
"options": options or {
"temperature": 0.9,
"top_p": 0.9,
"top_k": 40
}
}
try:
response = self.session.post(
f"{self.host}/api/chat",
json=payload,
stream=True,
timeout=self.timeout
)
response.raise_for_status()
full_response = ""
for line in response.iter_lines():
if line:
try:
data = json.loads(line)
if 'message' in data and 'content' in data['message']:
chunk = data['message']['content']
full_response += chunk
yield chunk
# Check for completion
if data.get('done', False):
break
except json.JSONDecodeError:
continue
logger.info(f"Generated {len(full_response)} characters")
except requests.exceptions.ConnectionError:
logger.error(f"Cannot connect to Ollama at {self.host}")
yield "*softly* I'm having trouble connecting to my thoughts... Please check if Ollama is running."
except requests.exceptions.Timeout:
logger.error("Ollama request timed out")
yield "*breathes deeply* I need a moment... the thoughts are coming slowly."
except Exception as e:
logger.error(f"Error generating response: {e}")
yield "*whispers* Something went wrong... please try again."
def check_model(self) -> bool:
"""Check if configured model is available"""
try:
response = self.session.get(f"{self.host}/api/tags", timeout=10)
if response.status_code == 200:
data = response.json()
models = [m['name'] for m in data.get('models', [])]
if self.model in models:
return True
else:
logger.warning(f"Model {self.model} not found. Available: {models}")
return False
except Exception as e:
logger.error(f"Cannot reach Ollama: {e}")
return False
def list_models(self) -> List[str]:
"""List available models"""
try:
response = self.session.get(f"{self.host}/api/tags", timeout=10)
if response.status_code == 200:
data = response.json()
return [m['name'] for m in data.get('models', [])]
except Exception:
pass
return []
def pull_model(self, model_name: str) -> Generator[str, None, None]:
"""Pull a model from Ollama library"""
try:
response = self.session.post(
f"{self.host}/api/pull",
json={"name": model_name},
stream=True
)
for line in response.iter_lines():
if line:
try:
data = json.loads(line)
status = data.get('status', '')
if 'completed' in data:
yield f"Downloading... {data.get('completed', 0)}/{data.get('total', 0)}"
else:
yield status
except:
pass
except Exception as e:
yield f"Error pulling model: {e}"