- Langzeit- und Kurzzeitgedächtnis mit SQLite - Ollama-Integration für lokale LLMs - Flask-Webinterface mit Stream-Response - Persona-System mit konfigurierbarem Charakter - Auto-Zusammenfassung bei Token-Limit - Rate Limiting und Sicherheitsfeatures - Uncensored Modell-Support
151 lines
5.2 KiB
Python
151 lines
5.2 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
from typing import Generator, List, Dict, Optional
|
|
import logging
|
|
|
|
logger = logging.getLogger('nimue')
|
|
|
|
class OllamaClient:
|
|
def __init__(self, config: dict):
|
|
self.host = config['host']
|
|
self.model = config['model']
|
|
self.timeout = config['timeout']
|
|
self.session = requests.Session()
|
|
|
|
def _prepare_messages(self, system_prompt: str, context: List[Dict], user_message: str) -> List[Dict]:
|
|
"""Prepare message list for Ollama API"""
|
|
messages = []
|
|
|
|
# System prompt first
|
|
if system_prompt:
|
|
messages.append({
|
|
"role": "system",
|
|
"content": system_prompt
|
|
})
|
|
|
|
# Add context (memory)
|
|
for msg in context:
|
|
messages.append({
|
|
"role": msg['role'],
|
|
"content": msg['content']
|
|
})
|
|
|
|
# User message last
|
|
messages.append({
|
|
"role": "user",
|
|
"content": user_message
|
|
})
|
|
|
|
return messages
|
|
|
|
def generate(self,
|
|
system_prompt: str,
|
|
context: List[Dict],
|
|
user_message: str,
|
|
options: Optional[Dict] = None) -> Generator[str, None, None]:
|
|
"""
|
|
Stream response from Ollama API
|
|
Yields tokens/chunks as they arrive
|
|
"""
|
|
messages = self._prepare_messages(system_prompt, context, user_message)
|
|
|
|
payload = {
|
|
"model": self.model,
|
|
"messages": messages,
|
|
"stream": True,
|
|
"options": options or {
|
|
"temperature": 0.9,
|
|
"top_p": 0.9,
|
|
"top_k": 40
|
|
}
|
|
}
|
|
|
|
try:
|
|
response = self.session.post(
|
|
f"{self.host}/api/chat",
|
|
json=payload,
|
|
stream=True,
|
|
timeout=self.timeout
|
|
)
|
|
response.raise_for_status()
|
|
|
|
full_response = ""
|
|
|
|
for line in response.iter_lines():
|
|
if line:
|
|
try:
|
|
data = json.loads(line)
|
|
if 'message' in data and 'content' in data['message']:
|
|
chunk = data['message']['content']
|
|
full_response += chunk
|
|
yield chunk
|
|
|
|
# Check for completion
|
|
if data.get('done', False):
|
|
break
|
|
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
logger.info(f"Generated {len(full_response)} characters")
|
|
|
|
except requests.exceptions.ConnectionError:
|
|
logger.error(f"Cannot connect to Ollama at {self.host}")
|
|
yield "*softly* I'm having trouble connecting to my thoughts... Please check if Ollama is running."
|
|
except requests.exceptions.Timeout:
|
|
logger.error("Ollama request timed out")
|
|
yield "*breathes deeply* I need a moment... the thoughts are coming slowly."
|
|
except Exception as e:
|
|
logger.error(f"Error generating response: {e}")
|
|
yield "*whispers* Something went wrong... please try again."
|
|
|
|
def check_model(self) -> bool:
|
|
"""Check if configured model is available"""
|
|
try:
|
|
response = self.session.get(f"{self.host}/api/tags", timeout=10)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
models = [m['name'] for m in data.get('models', [])]
|
|
if self.model in models:
|
|
return True
|
|
else:
|
|
logger.warning(f"Model {self.model} not found. Available: {models}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Cannot reach Ollama: {e}")
|
|
return False
|
|
|
|
def list_models(self) -> List[str]:
|
|
"""List available models"""
|
|
try:
|
|
response = self.session.get(f"{self.host}/api/tags", timeout=10)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
return [m['name'] for m in data.get('models', [])]
|
|
except Exception:
|
|
pass
|
|
return []
|
|
|
|
def pull_model(self, model_name: str) -> Generator[str, None, None]:
|
|
"""Pull a model from Ollama library"""
|
|
try:
|
|
response = self.session.post(
|
|
f"{self.host}/api/pull",
|
|
json={"name": model_name},
|
|
stream=True
|
|
)
|
|
|
|
for line in response.iter_lines():
|
|
if line:
|
|
try:
|
|
data = json.loads(line)
|
|
status = data.get('status', '')
|
|
if 'completed' in data:
|
|
yield f"Downloading... {data.get('completed', 0)}/{data.get('total', 0)}"
|
|
else:
|
|
yield status
|
|
except:
|
|
pass
|
|
except Exception as e:
|
|
yield f"Error pulling model: {e}" |