Files
natiris/bridges/VisionBridge_v2.py

297 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
VisionBridge v2 LLaVA 7b Integration für Natiris
Bildanalyse mit lokalem LLaVA-Modell via Ollama
Features:
- LLaVA 7b multimodale Bildanalyse
- Emotionserkennung (Gesichtsausdruck, Stimmung)
- Körpersprache-Analyse
- Core-State Update basierend auf Analyse
"""
import json
import os
import base64
import requests
from datetime import datetime, timezone
from pathlib import Path
# Konfiguration
PATHS = {
"state": os.path.expanduser("~/natiris/core/natiris_full_state.json"),
"vision_output": os.path.expanduser("~/natiris/bridges/vision_analysis.json"),
"vision_history": os.path.expanduser("~/natiris/memory/vision_history.json"),
}
OLLAMA_API = "http://localhost:11434/api/generate"
LLAVA_MODEL = "llava:7b"
class VisionAnalyzer:
"""LLaVA-basierte Bildanalyse für Natiris"""
def __init__(self):
self.model = LLAVA_MODEL
self.memory = []
self.load_memory()
def check_model(self):
"""Prüft ob LLaVA verfügbar"""
try:
response = requests.get("http://localhost:11434/api/tags", timeout=5)
models = response.json()
available = [m["name"] for m in models.get("models", [])]
return self.model in available
except:
return False
def encode_image(self, image_path):
"""Kodiert Bild zu base64 für LLaVA"""
try:
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
except Exception as e:
print(f"Error encoding image: {e}")
return None
def query_llava(self, image_path, prompt):
"""Sendet Anfrage an LLaVA"""
image_base64 = self.encode_image(image_path)
if not image_base64:
return None
payload = {
"model": self.model,
"prompt": prompt,
"images": [image_base64],
"stream": False
}
try:
response = requests.post(OLLAMA_API, json=payload, timeout=60)
result = response.json()
return result.get("response", "")
except Exception as e:
print(f"LLaVA query error: {e}")
return None
def analyze_emotion(self, image_path):
"""Analysiert Emotion im Bild"""
prompts = {
"mood": "Describe the mood and emotional state of the person in this image in 2-3 sentences.",
"facial": "What is the facial expression? Choose one: happy, sad, neutral, surprised, angry, fearful, or content.",
"gaze": "Where is the person looking? Choose: at_camera, away, down, or eyes_closed.",
"body": "Describe the body language and posture in one sentence."
}
results = {}
for key, prompt in prompts.items():
print(f" Analyzing {key}...")
response = self.query_llava(image_path, prompt)
results[key] = response.strip() if response else "unknown"
return results
def parse_emotion_scores(self, analysis):
"""Extrahiert numerische Werte aus LLaVA-Antwort"""
text = json.dumps(analysis).lower()
# Mood Score (1-10)
mood_score = 5 # default
if any(w in text for w in ["happy", "joyful", "cheerful", "content", "smiling"]):
mood_score = 8
elif any(w in text for w in ["sad", "depressed", "crying", "down"]):
mood_score = 3
elif any(w in text for w in ["angry", "furious", "mad"]):
mood_score = 2
elif any(w in text for w in ["neutral", "calm", "relaxed"]):
mood_score = 5
elif any(w in text for w in ["surprised", "shocked"]):
mood_score = 6
# Anxiety Detection
anxiety_detected = any(w in text for w in ["nervous", "anxious", "worried", "tense", "stressed"])
# Trust/Intimacy Indicators
intimate_detected = any(w in text for w in ["close", "intimate", "warm", "tender", "affectionate"])
distant_detected = any(w in text for w in ["distant", "cold", "withdrawn", "guarded"])
return {
"mood": mood_score,
"mood_delta": (mood_score - 5) * 0.3, # Normalize to small delta
"anxiety": 2.0 if anxiety_detected else 0.0,
"anxiety_delta": 0.5 if anxiety_detected else 0.0,
"intimacy": 1 if intimate_detected else 0,
"distance": 1 if distant_detected else 0
}
def update_core_state(self, adjustments):
"""Aktualisiert core_state.json mit Vision-Daten"""
try:
if os.path.exists(PATHS["state"]):
with open(PATHS["state"]) as f:
state = json.load(f)
else:
state = {"core_state": {}}
core = state.get("core_state", {})
modules = state.get("modules", {})
# Wende Anpassungen an
if "mood_delta" in adjustments:
core["mood"] = max(0, min(10, core.get("mood", 5) + adjustments["mood_delta"]))
if "anxiety_delta" in adjustments:
core["anxiety"] = max(0, min(10, core.get("anxiety", 0) + adjustments["anxiety_delta"]))
# Vision-Daten hinzufügen
modules["Vision"] = {
"last_analysis": datetime.now(timezone.utc).isoformat(),
"detected_mood": adjustments.get("mood", 5),
"anxiety_detected": adjustments.get("anxiety", 0) > 1,
"intimacy_level": adjustments.get("intimacy", 0)
}
state["core_state"] = core
state["modules"] = modules
with open(PATHS["state"], "w") as f:
json.dump(state, f, indent=2)
return True
except Exception as e:
print(f"Error updating core state: {e}")
return False
def load_memory(self):
"""Lädt Vision-Analyse-Verlauf"""
if os.path.exists(PATHS["vision_history"]):
try:
with open(PATHS["vision_history"]) as f:
self.memory = json.load(f)
except:
self.memory = []
def save_memory(self, analysis):
"""Speichert Analyse im Verlauf"""
self.memory.append({
"timestamp": datetime.now(timezone.utc).isoformat(),
"analysis": analysis
})
# Nur letzte 50 Einträge behalten
self.memory = self.memory[-50:]
try:
with open(PATHS["vision_history"], "w") as f:
json.dump(self.memory, f, indent=2)
except Exception as e:
print(f"Error saving memory: {e}")
def analyze(self, image_path, update_core=True):
"""Hauptmethode: Vollständige Bildanalyse"""
print(f"VisionBridge v2 Analyzing: {image_path}")
print("-" * 40)
# Check Model
if not self.check_model():
return {
"success": False,
"error": f"{self.model} not available in Ollama",
"fallback": True
}
# Analyse durchführen
raw_analysis = self.analyze_emotion(image_path)
if not raw_analysis or "unknown" in raw_analysis.values():
return {
"success": False,
"error": "LLaVA analysis failed",
"fallback": True
}
# Scores extrahieren
adjustments = self.parse_emotion_scores(raw_analysis)
# Ergebnis zusammensetzen
result = {
"success": True,
"timestamp": datetime.now(timezone.utc).isoformat(),
"image": image_path,
"raw_analysis": raw_analysis,
"parsed_scores": adjustments,
"model": self.model
}
# Core-State aktualisieren
if update_core:
self.update_core_state(adjustments)
print("✓ Core state updated")
# Memory speichern
self.save_memory(result)
print("✓ Analysis saved to memory")
# Output schreiben
with open(PATHS["vision_output"], "w") as f:
json.dump(result, f, indent=2)
print(f"\nResults:")
print(f" Mood: {adjustments['mood']}/10")
print(f" Anxiety: {'Yes' if adjustments['anxiety_detected'] else 'No'}")
print(f" Intimacy: {'High' if adjustments['intimacy'] else 'Low'}")
return result
def fallback_analysis(self, image_path=None):
"""Simulations-Modus wenn LLaVA nicht verfügbar"""
return {
"success": True,
"timestamp": datetime.now(timezone.utc).isoformat(),
"mode": "simulation",
"image": image_path,
"raw_analysis": {
"mood": "neutral, calm presence",
"facial": "neutral",
"gaze": "at_camera",
"body": "relaxed posture"
},
"parsed_scores": {
"mood": 5,
"mood_delta": 0,
"anxiety": 0,
"anxiety_delta": 0,
"intimacy": 0,
"distance": 0
}
}
def main():
"""CLI Entry Point"""
import argparse
parser = argparse.ArgumentParser(description="Natiris VisionBridge v2")
parser.add_argument("--image", "-i", required=True, help="Path to image file")
parser.add_argument("--no-update-core", action="store_true", help="Don't update core state")
parser.add_argument("--check", action="store_true", help="Check LLaVA availability")
args = parser.parse_args()
analyzer = VisionAnalyzer()
if args.check:
available = analyzer.check_model()
print(f"LLaVA 7b: {'✓ Available' if available else '✗ Not found'}")
return
if not os.path.exists(args.image):
print(f"Error: Image not found: {args.image}")
return
result = analyzer.analyze(args.image, update_core=not args.no_update_core)
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()