297 lines
10 KiB
Python
297 lines
10 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
VisionBridge v2 – LLaVA 7b Integration für Natiris
|
||
Bildanalyse mit lokalem LLaVA-Modell via Ollama
|
||
|
||
Features:
|
||
- LLaVA 7b multimodale Bildanalyse
|
||
- Emotionserkennung (Gesichtsausdruck, Stimmung)
|
||
- Körpersprache-Analyse
|
||
- Core-State Update basierend auf Analyse
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import base64
|
||
import requests
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
# Konfiguration
|
||
PATHS = {
|
||
"state": os.path.expanduser("~/natiris/core/natiris_full_state.json"),
|
||
"vision_output": os.path.expanduser("~/natiris/bridges/vision_analysis.json"),
|
||
"vision_history": os.path.expanduser("~/natiris/memory/vision_history.json"),
|
||
}
|
||
|
||
OLLAMA_API = "http://localhost:11434/api/generate"
|
||
LLAVA_MODEL = "llava:7b"
|
||
|
||
class VisionAnalyzer:
|
||
"""LLaVA-basierte Bildanalyse für Natiris"""
|
||
|
||
def __init__(self):
|
||
self.model = LLAVA_MODEL
|
||
self.memory = []
|
||
self.load_memory()
|
||
|
||
def check_model(self):
|
||
"""Prüft ob LLaVA verfügbar"""
|
||
try:
|
||
response = requests.get("http://localhost:11434/api/tags", timeout=5)
|
||
models = response.json()
|
||
available = [m["name"] for m in models.get("models", [])]
|
||
return self.model in available
|
||
except:
|
||
return False
|
||
|
||
def encode_image(self, image_path):
|
||
"""Kodiert Bild zu base64 für LLaVA"""
|
||
try:
|
||
with open(image_path, "rb") as f:
|
||
return base64.b64encode(f.read()).decode("utf-8")
|
||
except Exception as e:
|
||
print(f"Error encoding image: {e}")
|
||
return None
|
||
|
||
def query_llava(self, image_path, prompt):
|
||
"""Sendet Anfrage an LLaVA"""
|
||
image_base64 = self.encode_image(image_path)
|
||
if not image_base64:
|
||
return None
|
||
|
||
payload = {
|
||
"model": self.model,
|
||
"prompt": prompt,
|
||
"images": [image_base64],
|
||
"stream": False
|
||
}
|
||
|
||
try:
|
||
response = requests.post(OLLAMA_API, json=payload, timeout=60)
|
||
result = response.json()
|
||
return result.get("response", "")
|
||
except Exception as e:
|
||
print(f"LLaVA query error: {e}")
|
||
return None
|
||
|
||
def analyze_emotion(self, image_path):
|
||
"""Analysiert Emotion im Bild"""
|
||
prompts = {
|
||
"mood": "Describe the mood and emotional state of the person in this image in 2-3 sentences.",
|
||
"facial": "What is the facial expression? Choose one: happy, sad, neutral, surprised, angry, fearful, or content.",
|
||
"gaze": "Where is the person looking? Choose: at_camera, away, down, or eyes_closed.",
|
||
"body": "Describe the body language and posture in one sentence."
|
||
}
|
||
|
||
results = {}
|
||
for key, prompt in prompts.items():
|
||
print(f" Analyzing {key}...")
|
||
response = self.query_llava(image_path, prompt)
|
||
results[key] = response.strip() if response else "unknown"
|
||
|
||
return results
|
||
|
||
def parse_emotion_scores(self, analysis):
|
||
"""Extrahiert numerische Werte aus LLaVA-Antwort"""
|
||
text = json.dumps(analysis).lower()
|
||
|
||
# Mood Score (1-10)
|
||
mood_score = 5 # default
|
||
if any(w in text for w in ["happy", "joyful", "cheerful", "content", "smiling"]):
|
||
mood_score = 8
|
||
elif any(w in text for w in ["sad", "depressed", "crying", "down"]):
|
||
mood_score = 3
|
||
elif any(w in text for w in ["angry", "furious", "mad"]):
|
||
mood_score = 2
|
||
elif any(w in text for w in ["neutral", "calm", "relaxed"]):
|
||
mood_score = 5
|
||
elif any(w in text for w in ["surprised", "shocked"]):
|
||
mood_score = 6
|
||
|
||
# Anxiety Detection
|
||
anxiety_detected = any(w in text for w in ["nervous", "anxious", "worried", "tense", "stressed"])
|
||
|
||
# Trust/Intimacy Indicators
|
||
intimate_detected = any(w in text for w in ["close", "intimate", "warm", "tender", "affectionate"])
|
||
distant_detected = any(w in text for w in ["distant", "cold", "withdrawn", "guarded"])
|
||
|
||
return {
|
||
"mood": mood_score,
|
||
"mood_delta": (mood_score - 5) * 0.3, # Normalize to small delta
|
||
"anxiety": 2.0 if anxiety_detected else 0.0,
|
||
"anxiety_delta": 0.5 if anxiety_detected else 0.0,
|
||
"intimacy": 1 if intimate_detected else 0,
|
||
"distance": 1 if distant_detected else 0
|
||
}
|
||
|
||
def update_core_state(self, adjustments):
|
||
"""Aktualisiert core_state.json mit Vision-Daten"""
|
||
try:
|
||
if os.path.exists(PATHS["state"]):
|
||
with open(PATHS["state"]) as f:
|
||
state = json.load(f)
|
||
else:
|
||
state = {"core_state": {}}
|
||
|
||
core = state.get("core_state", {})
|
||
modules = state.get("modules", {})
|
||
|
||
# Wende Anpassungen an
|
||
if "mood_delta" in adjustments:
|
||
core["mood"] = max(0, min(10, core.get("mood", 5) + adjustments["mood_delta"]))
|
||
if "anxiety_delta" in adjustments:
|
||
core["anxiety"] = max(0, min(10, core.get("anxiety", 0) + adjustments["anxiety_delta"]))
|
||
|
||
# Vision-Daten hinzufügen
|
||
modules["Vision"] = {
|
||
"last_analysis": datetime.now(timezone.utc).isoformat(),
|
||
"detected_mood": adjustments.get("mood", 5),
|
||
"anxiety_detected": adjustments.get("anxiety", 0) > 1,
|
||
"intimacy_level": adjustments.get("intimacy", 0)
|
||
}
|
||
|
||
state["core_state"] = core
|
||
state["modules"] = modules
|
||
|
||
with open(PATHS["state"], "w") as f:
|
||
json.dump(state, f, indent=2)
|
||
|
||
return True
|
||
except Exception as e:
|
||
print(f"Error updating core state: {e}")
|
||
return False
|
||
|
||
def load_memory(self):
|
||
"""Lädt Vision-Analyse-Verlauf"""
|
||
if os.path.exists(PATHS["vision_history"]):
|
||
try:
|
||
with open(PATHS["vision_history"]) as f:
|
||
self.memory = json.load(f)
|
||
except:
|
||
self.memory = []
|
||
|
||
def save_memory(self, analysis):
|
||
"""Speichert Analyse im Verlauf"""
|
||
self.memory.append({
|
||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||
"analysis": analysis
|
||
})
|
||
# Nur letzte 50 Einträge behalten
|
||
self.memory = self.memory[-50:]
|
||
|
||
try:
|
||
with open(PATHS["vision_history"], "w") as f:
|
||
json.dump(self.memory, f, indent=2)
|
||
except Exception as e:
|
||
print(f"Error saving memory: {e}")
|
||
|
||
def analyze(self, image_path, update_core=True):
|
||
"""Hauptmethode: Vollständige Bildanalyse"""
|
||
print(f"VisionBridge v2 – Analyzing: {image_path}")
|
||
print("-" * 40)
|
||
|
||
# Check Model
|
||
if not self.check_model():
|
||
return {
|
||
"success": False,
|
||
"error": f"{self.model} not available in Ollama",
|
||
"fallback": True
|
||
}
|
||
|
||
# Analyse durchführen
|
||
raw_analysis = self.analyze_emotion(image_path)
|
||
|
||
if not raw_analysis or "unknown" in raw_analysis.values():
|
||
return {
|
||
"success": False,
|
||
"error": "LLaVA analysis failed",
|
||
"fallback": True
|
||
}
|
||
|
||
# Scores extrahieren
|
||
adjustments = self.parse_emotion_scores(raw_analysis)
|
||
|
||
# Ergebnis zusammensetzen
|
||
result = {
|
||
"success": True,
|
||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||
"image": image_path,
|
||
"raw_analysis": raw_analysis,
|
||
"parsed_scores": adjustments,
|
||
"model": self.model
|
||
}
|
||
|
||
# Core-State aktualisieren
|
||
if update_core:
|
||
self.update_core_state(adjustments)
|
||
print("✓ Core state updated")
|
||
|
||
# Memory speichern
|
||
self.save_memory(result)
|
||
print("✓ Analysis saved to memory")
|
||
|
||
# Output schreiben
|
||
with open(PATHS["vision_output"], "w") as f:
|
||
json.dump(result, f, indent=2)
|
||
|
||
print(f"\nResults:")
|
||
print(f" Mood: {adjustments['mood']}/10")
|
||
print(f" Anxiety: {'Yes' if adjustments['anxiety_detected'] else 'No'}")
|
||
print(f" Intimacy: {'High' if adjustments['intimacy'] else 'Low'}")
|
||
|
||
return result
|
||
|
||
def fallback_analysis(self, image_path=None):
|
||
"""Simulations-Modus wenn LLaVA nicht verfügbar"""
|
||
return {
|
||
"success": True,
|
||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||
"mode": "simulation",
|
||
"image": image_path,
|
||
"raw_analysis": {
|
||
"mood": "neutral, calm presence",
|
||
"facial": "neutral",
|
||
"gaze": "at_camera",
|
||
"body": "relaxed posture"
|
||
},
|
||
"parsed_scores": {
|
||
"mood": 5,
|
||
"mood_delta": 0,
|
||
"anxiety": 0,
|
||
"anxiety_delta": 0,
|
||
"intimacy": 0,
|
||
"distance": 0
|
||
}
|
||
}
|
||
|
||
|
||
def main():
|
||
"""CLI Entry Point"""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description="Natiris VisionBridge v2")
|
||
parser.add_argument("--image", "-i", required=True, help="Path to image file")
|
||
parser.add_argument("--no-update-core", action="store_true", help="Don't update core state")
|
||
parser.add_argument("--check", action="store_true", help="Check LLaVA availability")
|
||
|
||
args = parser.parse_args()
|
||
|
||
analyzer = VisionAnalyzer()
|
||
|
||
if args.check:
|
||
available = analyzer.check_model()
|
||
print(f"LLaVA 7b: {'✓ Available' if available else '✗ Not found'}")
|
||
return
|
||
|
||
if not os.path.exists(args.image):
|
||
print(f"Error: Image not found: {args.image}")
|
||
return
|
||
|
||
result = analyzer.analyze(args.image, update_core=not args.no_update_core)
|
||
print(json.dumps(result, indent=2))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|