#!/usr/bin/env python3 """ VisionBridge v2 – LLaVA 7b Integration für Natiris Bildanalyse mit lokalem LLaVA-Modell via Ollama Features: - LLaVA 7b multimodale Bildanalyse - Emotionserkennung (Gesichtsausdruck, Stimmung) - Körpersprache-Analyse - Core-State Update basierend auf Analyse """ import json import os import base64 import requests from datetime import datetime, timezone from pathlib import Path # Konfiguration PATHS = { "state": os.path.expanduser("~/natiris/core/natiris_full_state.json"), "vision_output": os.path.expanduser("~/natiris/bridges/vision_analysis.json"), "vision_history": os.path.expanduser("~/natiris/memory/vision_history.json"), } OLLAMA_API = "http://localhost:11434/api/generate" LLAVA_MODEL = "llava:7b" class VisionAnalyzer: """LLaVA-basierte Bildanalyse für Natiris""" def __init__(self): self.model = LLAVA_MODEL self.memory = [] self.load_memory() def check_model(self): """Prüft ob LLaVA verfügbar""" try: response = requests.get("http://localhost:11434/api/tags", timeout=5) models = response.json() available = [m["name"] for m in models.get("models", [])] return self.model in available except: return False def encode_image(self, image_path): """Kodiert Bild zu base64 für LLaVA""" try: with open(image_path, "rb") as f: return base64.b64encode(f.read()).decode("utf-8") except Exception as e: print(f"Error encoding image: {e}") return None def query_llava(self, image_path, prompt): """Sendet Anfrage an LLaVA""" image_base64 = self.encode_image(image_path) if not image_base64: return None payload = { "model": self.model, "prompt": prompt, "images": [image_base64], "stream": False } try: response = requests.post(OLLAMA_API, json=payload, timeout=60) result = response.json() return result.get("response", "") except Exception as e: print(f"LLaVA query error: {e}") return None def analyze_emotion(self, image_path): """Analysiert Emotion im Bild""" prompts = { "mood": "Describe the mood and emotional state of the person in this image in 2-3 sentences.", "facial": "What is the facial expression? Choose one: happy, sad, neutral, surprised, angry, fearful, or content.", "gaze": "Where is the person looking? Choose: at_camera, away, down, or eyes_closed.", "body": "Describe the body language and posture in one sentence." } results = {} for key, prompt in prompts.items(): print(f" Analyzing {key}...") response = self.query_llava(image_path, prompt) results[key] = response.strip() if response else "unknown" return results def parse_emotion_scores(self, analysis): """Extrahiert numerische Werte aus LLaVA-Antwort""" text = json.dumps(analysis).lower() # Mood Score (1-10) mood_score = 5 # default if any(w in text for w in ["happy", "joyful", "cheerful", "content", "smiling"]): mood_score = 8 elif any(w in text for w in ["sad", "depressed", "crying", "down"]): mood_score = 3 elif any(w in text for w in ["angry", "furious", "mad"]): mood_score = 2 elif any(w in text for w in ["neutral", "calm", "relaxed"]): mood_score = 5 elif any(w in text for w in ["surprised", "shocked"]): mood_score = 6 # Anxiety Detection anxiety_detected = any(w in text for w in ["nervous", "anxious", "worried", "tense", "stressed"]) # Trust/Intimacy Indicators intimate_detected = any(w in text for w in ["close", "intimate", "warm", "tender", "affectionate"]) distant_detected = any(w in text for w in ["distant", "cold", "withdrawn", "guarded"]) return { "mood": mood_score, "mood_delta": (mood_score - 5) * 0.3, # Normalize to small delta "anxiety": 2.0 if anxiety_detected else 0.0, "anxiety_delta": 0.5 if anxiety_detected else 0.0, "intimacy": 1 if intimate_detected else 0, "distance": 1 if distant_detected else 0 } def update_core_state(self, adjustments): """Aktualisiert core_state.json mit Vision-Daten""" try: if os.path.exists(PATHS["state"]): with open(PATHS["state"]) as f: state = json.load(f) else: state = {"core_state": {}} core = state.get("core_state", {}) modules = state.get("modules", {}) # Wende Anpassungen an if "mood_delta" in adjustments: core["mood"] = max(0, min(10, core.get("mood", 5) + adjustments["mood_delta"])) if "anxiety_delta" in adjustments: core["anxiety"] = max(0, min(10, core.get("anxiety", 0) + adjustments["anxiety_delta"])) # Vision-Daten hinzufügen modules["Vision"] = { "last_analysis": datetime.now(timezone.utc).isoformat(), "detected_mood": adjustments.get("mood", 5), "anxiety_detected": adjustments.get("anxiety", 0) > 1, "intimacy_level": adjustments.get("intimacy", 0) } state["core_state"] = core state["modules"] = modules with open(PATHS["state"], "w") as f: json.dump(state, f, indent=2) return True except Exception as e: print(f"Error updating core state: {e}") return False def load_memory(self): """Lädt Vision-Analyse-Verlauf""" if os.path.exists(PATHS["vision_history"]): try: with open(PATHS["vision_history"]) as f: self.memory = json.load(f) except: self.memory = [] def save_memory(self, analysis): """Speichert Analyse im Verlauf""" self.memory.append({ "timestamp": datetime.now(timezone.utc).isoformat(), "analysis": analysis }) # Nur letzte 50 Einträge behalten self.memory = self.memory[-50:] try: with open(PATHS["vision_history"], "w") as f: json.dump(self.memory, f, indent=2) except Exception as e: print(f"Error saving memory: {e}") def analyze(self, image_path, update_core=True): """Hauptmethode: Vollständige Bildanalyse""" print(f"VisionBridge v2 – Analyzing: {image_path}") print("-" * 40) # Check Model if not self.check_model(): return { "success": False, "error": f"{self.model} not available in Ollama", "fallback": True } # Analyse durchführen raw_analysis = self.analyze_emotion(image_path) if not raw_analysis or "unknown" in raw_analysis.values(): return { "success": False, "error": "LLaVA analysis failed", "fallback": True } # Scores extrahieren adjustments = self.parse_emotion_scores(raw_analysis) # Ergebnis zusammensetzen result = { "success": True, "timestamp": datetime.now(timezone.utc).isoformat(), "image": image_path, "raw_analysis": raw_analysis, "parsed_scores": adjustments, "model": self.model } # Core-State aktualisieren if update_core: self.update_core_state(adjustments) print("✓ Core state updated") # Memory speichern self.save_memory(result) print("✓ Analysis saved to memory") # Output schreiben with open(PATHS["vision_output"], "w") as f: json.dump(result, f, indent=2) print(f"\nResults:") print(f" Mood: {adjustments['mood']}/10") print(f" Anxiety: {'Yes' if adjustments['anxiety_detected'] else 'No'}") print(f" Intimacy: {'High' if adjustments['intimacy'] else 'Low'}") return result def fallback_analysis(self, image_path=None): """Simulations-Modus wenn LLaVA nicht verfügbar""" return { "success": True, "timestamp": datetime.now(timezone.utc).isoformat(), "mode": "simulation", "image": image_path, "raw_analysis": { "mood": "neutral, calm presence", "facial": "neutral", "gaze": "at_camera", "body": "relaxed posture" }, "parsed_scores": { "mood": 5, "mood_delta": 0, "anxiety": 0, "anxiety_delta": 0, "intimacy": 0, "distance": 0 } } def main(): """CLI Entry Point""" import argparse parser = argparse.ArgumentParser(description="Natiris VisionBridge v2") parser.add_argument("--image", "-i", required=True, help="Path to image file") parser.add_argument("--no-update-core", action="store_true", help="Don't update core state") parser.add_argument("--check", action="store_true", help="Check LLaVA availability") args = parser.parse_args() analyzer = VisionAnalyzer() if args.check: available = analyzer.check_model() print(f"LLaVA 7b: {'✓ Available' if available else '✗ Not found'}") return if not os.path.exists(args.image): print(f"Error: Image not found: {args.image}") return result = analyzer.analyze(args.image, update_core=not args.no_update_core) print(json.dumps(result, indent=2)) if __name__ == "__main__": main()