natiris/bridges/VisionBridge_v2.py

#!/usr/bin/env python3
"""
VisionBridge v2 – LLaVA 7b Integration für Natiris
Bildanalyse mit lokalem LLaVA-Modell via Ollama

Features:
- LLaVA 7b multimodale Bildanalyse
- Emotionserkennung (Gesichtsausdruck, Stimmung)
- Körpersprache-Analyse
- Core-State Update basierend auf Analyse
"""

import json
import os
import base64
import requests
from datetime import datetime, timezone
from pathlib import Path

# Konfiguration
PATHS = {
    "state": os.path.expanduser("~/natiris/core/natiris_full_state.json"),
    "vision_output": os.path.expanduser("~/natiris/bridges/vision_analysis.json"),
    "vision_history": os.path.expanduser("~/natiris/memory/vision_history.json"),
}

OLLAMA_API = "http://localhost:11434/api/generate"
LLAVA_MODEL = "llava:7b"

class VisionAnalyzer:
    """LLaVA-basierte Bildanalyse für Natiris"""

    def __init__(self):
        self.model = LLAVA_MODEL
        self.memory = []
        self.load_memory()

    def check_model(self):
        """Prüft ob LLaVA verfügbar"""
        try:
            response = requests.get("http://localhost:11434/api/tags", timeout=5)
            models = response.json()
            available = [m["name"] for m in models.get("models", [])]
            return self.model in available
        except:
            return False

    def encode_image(self, image_path):
        """Kodiert Bild zu base64 für LLaVA"""
        try:
            with open(image_path, "rb") as f:
                return base64.b64encode(f.read()).decode("utf-8")
        except Exception as e:
            print(f"Error encoding image: {e}")
            return None

    def query_llava(self, image_path, prompt):
        """Sendet Anfrage an LLaVA"""
        image_base64 = self.encode_image(image_path)
        if not image_base64:
            return None

        payload = {
            "model": self.model,
            "prompt": prompt,
            "images": [image_base64],
            "stream": False
        }

        try:
            response = requests.post(OLLAMA_API, json=payload, timeout=60)
            result = response.json()
            return result.get("response", "")
        except Exception as e:
            print(f"LLaVA query error: {e}")
            return None

    def analyze_emotion(self, image_path):
        """Analysiert Emotion im Bild"""
        prompts = {
            "mood": "Describe the mood and emotional state of the person in this image in 2-3 sentences.",
            "facial": "What is the facial expression? Choose one: happy, sad, neutral, surprised, angry, fearful, or content.",
            "gaze": "Where is the person looking? Choose: at_camera, away, down, or eyes_closed.",
            "body": "Describe the body language and posture in one sentence."
        }

        results = {}
        for key, prompt in prompts.items():
            print(f"  Analyzing {key}...")
            response = self.query_llava(image_path, prompt)
            results[key] = response.strip() if response else "unknown"

        return results

    def parse_emotion_scores(self, analysis):
        """Extrahiert numerische Werte aus LLaVA-Antwort"""
        text = json.dumps(analysis).lower()

        # Mood Score (1-10)
        mood_score = 5  # default
        if any(w in text for w in ["happy", "joyful", "cheerful", "content", "smiling"]):
            mood_score = 8
        elif any(w in text for w in ["sad", "depressed", "crying", "down"]):
            mood_score = 3
        elif any(w in text for w in ["angry", "furious", "mad"]):
            mood_score = 2
        elif any(w in text for w in ["neutral", "calm", "relaxed"]):
            mood_score = 5
        elif any(w in text for w in ["surprised", "shocked"]):
            mood_score = 6

        # Anxiety Detection
        anxiety_detected = any(w in text for w in ["nervous", "anxious", "worried", "tense", "stressed"])

        # Trust/Intimacy Indicators
        intimate_detected = any(w in text for w in ["close", "intimate", "warm", "tender", "affectionate"])
        distant_detected = any(w in text for w in ["distant", "cold", "withdrawn", "guarded"])

        return {
            "mood": mood_score,
            "mood_delta": (mood_score - 5) * 0.3,  # Normalize to small delta
            "anxiety": 2.0 if anxiety_detected else 0.0,
            "anxiety_delta": 0.5 if anxiety_detected else 0.0,
            "intimacy": 1 if intimate_detected else 0,
            "distance": 1 if distant_detected else 0
        }

    def update_core_state(self, adjustments):
        """Aktualisiert core_state.json mit Vision-Daten"""
        try:
            if os.path.exists(PATHS["state"]):
                with open(PATHS["state"]) as f:
                    state = json.load(f)
            else:
                state = {"core_state": {}}

            core = state.get("core_state", {})
            modules = state.get("modules", {})

            # Wende Anpassungen an
            if "mood_delta" in adjustments:
                core["mood"] = max(0, min(10, core.get("mood", 5) + adjustments["mood_delta"]))
            if "anxiety_delta" in adjustments:
                core["anxiety"] = max(0, min(10, core.get("anxiety", 0) + adjustments["anxiety_delta"]))

            # Vision-Daten hinzufügen
            modules["Vision"] = {
                "last_analysis": datetime.now(timezone.utc).isoformat(),
                "detected_mood": adjustments.get("mood", 5),
                "anxiety_detected": adjustments.get("anxiety", 0) > 1,
                "intimacy_level": adjustments.get("intimacy", 0)
            }

            state["core_state"] = core
            state["modules"] = modules

            with open(PATHS["state"], "w") as f:
                json.dump(state, f, indent=2)

            return True
        except Exception as e:
            print(f"Error updating core state: {e}")
            return False

    def load_memory(self):
        """Lädt Vision-Analyse-Verlauf"""
        if os.path.exists(PATHS["vision_history"]):
            try:
                with open(PATHS["vision_history"]) as f:
                    self.memory = json.load(f)
            except:
                self.memory = []

    def save_memory(self, analysis):
        """Speichert Analyse im Verlauf"""
        self.memory.append({
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "analysis": analysis
        })
        # Nur letzte 50 Einträge behalten
        self.memory = self.memory[-50:]

        try:
            with open(PATHS["vision_history"], "w") as f:
                json.dump(self.memory, f, indent=2)
        except Exception as e:
            print(f"Error saving memory: {e}")

    def analyze(self, image_path, update_core=True):
        """Hauptmethode: Vollständige Bildanalyse"""
        print(f"VisionBridge v2 – Analyzing: {image_path}")
        print("-" * 40)

        # Check Model
        if not self.check_model():
            return {
                "success": False,
                "error": f"{self.model} not available in Ollama",
                "fallback": True
            }

        # Analyse durchführen
        raw_analysis = self.analyze_emotion(image_path)

        if not raw_analysis or "unknown" in raw_analysis.values():
            return {
                "success": False,
                "error": "LLaVA analysis failed",
                "fallback": True
            }

        # Scores extrahieren
        adjustments = self.parse_emotion_scores(raw_analysis)

        # Ergebnis zusammensetzen
        result = {
            "success": True,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "image": image_path,
            "raw_analysis": raw_analysis,
            "parsed_scores": adjustments,
            "model": self.model
        }

        # Core-State aktualisieren
        if update_core:
            self.update_core_state(adjustments)
            print("✓ Core state updated")

        # Memory speichern
        self.save_memory(result)
        print("✓ Analysis saved to memory")

        # Output schreiben
        with open(PATHS["vision_output"], "w") as f:
            json.dump(result, f, indent=2)

        print(f"\nResults:")
        print(f"  Mood: {adjustments['mood']}/10")
        print(f"  Anxiety: {'Yes' if adjustments['anxiety_detected'] else 'No'}")
        print(f"  Intimacy: {'High' if adjustments['intimacy'] else 'Low'}")

        return result

    def fallback_analysis(self, image_path=None):
        """Simulations-Modus wenn LLaVA nicht verfügbar"""
        return {
            "success": True,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "mode": "simulation",
            "image": image_path,
            "raw_analysis": {
                "mood": "neutral, calm presence",
                "facial": "neutral",
                "gaze": "at_camera",
                "body": "relaxed posture"
            },
            "parsed_scores": {
                "mood": 5,
                "mood_delta": 0,
                "anxiety": 0,
                "anxiety_delta": 0,
                "intimacy": 0,
                "distance": 0
            }
        }


def main():
    """CLI Entry Point"""
    import argparse

    parser = argparse.ArgumentParser(description="Natiris VisionBridge v2")
    parser.add_argument("--image", "-i", required=True, help="Path to image file")
    parser.add_argument("--no-update-core", action="store_true", help="Don't update core state")
    parser.add_argument("--check", action="store_true", help="Check LLaVA availability")

    args = parser.parse_args()

    analyzer = VisionAnalyzer()

    if args.check:
        available = analyzer.check_model()
        print(f"LLaVA 7b: {'✓ Available' if available else '✗ Not found'}")
        return

    if not os.path.exists(args.image):
        print(f"Error: Image not found: {args.image}")
        return

    result = analyzer.analyze(args.image, update_core=not args.no_update_core)
    print(json.dumps(result, indent=2))


if __name__ == "__main__":
    main()