Initial commit: Natiris AI Agent Orchestration System
This commit is contained in:
296
bridges/VisionBridge_v2.py
Normal file
296
bridges/VisionBridge_v2.py
Normal file
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
VisionBridge v2 – LLaVA 7b Integration für Natiris
|
||||
Bildanalyse mit lokalem LLaVA-Modell via Ollama
|
||||
|
||||
Features:
|
||||
- LLaVA 7b multimodale Bildanalyse
|
||||
- Emotionserkennung (Gesichtsausdruck, Stimmung)
|
||||
- Körpersprache-Analyse
|
||||
- Core-State Update basierend auf Analyse
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import base64
|
||||
import requests
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Konfiguration
|
||||
PATHS = {
|
||||
"state": os.path.expanduser("~/natiris/core/natiris_full_state.json"),
|
||||
"vision_output": os.path.expanduser("~/natiris/bridges/vision_analysis.json"),
|
||||
"vision_history": os.path.expanduser("~/natiris/memory/vision_history.json"),
|
||||
}
|
||||
|
||||
OLLAMA_API = "http://localhost:11434/api/generate"
|
||||
LLAVA_MODEL = "llava:7b"
|
||||
|
||||
class VisionAnalyzer:
|
||||
"""LLaVA-basierte Bildanalyse für Natiris"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = LLAVA_MODEL
|
||||
self.memory = []
|
||||
self.load_memory()
|
||||
|
||||
def check_model(self):
|
||||
"""Prüft ob LLaVA verfügbar"""
|
||||
try:
|
||||
response = requests.get("http://localhost:11434/api/tags", timeout=5)
|
||||
models = response.json()
|
||||
available = [m["name"] for m in models.get("models", [])]
|
||||
return self.model in available
|
||||
except:
|
||||
return False
|
||||
|
||||
def encode_image(self, image_path):
|
||||
"""Kodiert Bild zu base64 für LLaVA"""
|
||||
try:
|
||||
with open(image_path, "rb") as f:
|
||||
return base64.b64encode(f.read()).decode("utf-8")
|
||||
except Exception as e:
|
||||
print(f"Error encoding image: {e}")
|
||||
return None
|
||||
|
||||
def query_llava(self, image_path, prompt):
|
||||
"""Sendet Anfrage an LLaVA"""
|
||||
image_base64 = self.encode_image(image_path)
|
||||
if not image_base64:
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"images": [image_base64],
|
||||
"stream": False
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(OLLAMA_API, json=payload, timeout=60)
|
||||
result = response.json()
|
||||
return result.get("response", "")
|
||||
except Exception as e:
|
||||
print(f"LLaVA query error: {e}")
|
||||
return None
|
||||
|
||||
def analyze_emotion(self, image_path):
|
||||
"""Analysiert Emotion im Bild"""
|
||||
prompts = {
|
||||
"mood": "Describe the mood and emotional state of the person in this image in 2-3 sentences.",
|
||||
"facial": "What is the facial expression? Choose one: happy, sad, neutral, surprised, angry, fearful, or content.",
|
||||
"gaze": "Where is the person looking? Choose: at_camera, away, down, or eyes_closed.",
|
||||
"body": "Describe the body language and posture in one sentence."
|
||||
}
|
||||
|
||||
results = {}
|
||||
for key, prompt in prompts.items():
|
||||
print(f" Analyzing {key}...")
|
||||
response = self.query_llava(image_path, prompt)
|
||||
results[key] = response.strip() if response else "unknown"
|
||||
|
||||
return results
|
||||
|
||||
def parse_emotion_scores(self, analysis):
|
||||
"""Extrahiert numerische Werte aus LLaVA-Antwort"""
|
||||
text = json.dumps(analysis).lower()
|
||||
|
||||
# Mood Score (1-10)
|
||||
mood_score = 5 # default
|
||||
if any(w in text for w in ["happy", "joyful", "cheerful", "content", "smiling"]):
|
||||
mood_score = 8
|
||||
elif any(w in text for w in ["sad", "depressed", "crying", "down"]):
|
||||
mood_score = 3
|
||||
elif any(w in text for w in ["angry", "furious", "mad"]):
|
||||
mood_score = 2
|
||||
elif any(w in text for w in ["neutral", "calm", "relaxed"]):
|
||||
mood_score = 5
|
||||
elif any(w in text for w in ["surprised", "shocked"]):
|
||||
mood_score = 6
|
||||
|
||||
# Anxiety Detection
|
||||
anxiety_detected = any(w in text for w in ["nervous", "anxious", "worried", "tense", "stressed"])
|
||||
|
||||
# Trust/Intimacy Indicators
|
||||
intimate_detected = any(w in text for w in ["close", "intimate", "warm", "tender", "affectionate"])
|
||||
distant_detected = any(w in text for w in ["distant", "cold", "withdrawn", "guarded"])
|
||||
|
||||
return {
|
||||
"mood": mood_score,
|
||||
"mood_delta": (mood_score - 5) * 0.3, # Normalize to small delta
|
||||
"anxiety": 2.0 if anxiety_detected else 0.0,
|
||||
"anxiety_delta": 0.5 if anxiety_detected else 0.0,
|
||||
"intimacy": 1 if intimate_detected else 0,
|
||||
"distance": 1 if distant_detected else 0
|
||||
}
|
||||
|
||||
def update_core_state(self, adjustments):
|
||||
"""Aktualisiert core_state.json mit Vision-Daten"""
|
||||
try:
|
||||
if os.path.exists(PATHS["state"]):
|
||||
with open(PATHS["state"]) as f:
|
||||
state = json.load(f)
|
||||
else:
|
||||
state = {"core_state": {}}
|
||||
|
||||
core = state.get("core_state", {})
|
||||
modules = state.get("modules", {})
|
||||
|
||||
# Wende Anpassungen an
|
||||
if "mood_delta" in adjustments:
|
||||
core["mood"] = max(0, min(10, core.get("mood", 5) + adjustments["mood_delta"]))
|
||||
if "anxiety_delta" in adjustments:
|
||||
core["anxiety"] = max(0, min(10, core.get("anxiety", 0) + adjustments["anxiety_delta"]))
|
||||
|
||||
# Vision-Daten hinzufügen
|
||||
modules["Vision"] = {
|
||||
"last_analysis": datetime.now(timezone.utc).isoformat(),
|
||||
"detected_mood": adjustments.get("mood", 5),
|
||||
"anxiety_detected": adjustments.get("anxiety", 0) > 1,
|
||||
"intimacy_level": adjustments.get("intimacy", 0)
|
||||
}
|
||||
|
||||
state["core_state"] = core
|
||||
state["modules"] = modules
|
||||
|
||||
with open(PATHS["state"], "w") as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error updating core state: {e}")
|
||||
return False
|
||||
|
||||
def load_memory(self):
|
||||
"""Lädt Vision-Analyse-Verlauf"""
|
||||
if os.path.exists(PATHS["vision_history"]):
|
||||
try:
|
||||
with open(PATHS["vision_history"]) as f:
|
||||
self.memory = json.load(f)
|
||||
except:
|
||||
self.memory = []
|
||||
|
||||
def save_memory(self, analysis):
|
||||
"""Speichert Analyse im Verlauf"""
|
||||
self.memory.append({
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"analysis": analysis
|
||||
})
|
||||
# Nur letzte 50 Einträge behalten
|
||||
self.memory = self.memory[-50:]
|
||||
|
||||
try:
|
||||
with open(PATHS["vision_history"], "w") as f:
|
||||
json.dump(self.memory, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error saving memory: {e}")
|
||||
|
||||
def analyze(self, image_path, update_core=True):
|
||||
"""Hauptmethode: Vollständige Bildanalyse"""
|
||||
print(f"VisionBridge v2 – Analyzing: {image_path}")
|
||||
print("-" * 40)
|
||||
|
||||
# Check Model
|
||||
if not self.check_model():
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"{self.model} not available in Ollama",
|
||||
"fallback": True
|
||||
}
|
||||
|
||||
# Analyse durchführen
|
||||
raw_analysis = self.analyze_emotion(image_path)
|
||||
|
||||
if not raw_analysis or "unknown" in raw_analysis.values():
|
||||
return {
|
||||
"success": False,
|
||||
"error": "LLaVA analysis failed",
|
||||
"fallback": True
|
||||
}
|
||||
|
||||
# Scores extrahieren
|
||||
adjustments = self.parse_emotion_scores(raw_analysis)
|
||||
|
||||
# Ergebnis zusammensetzen
|
||||
result = {
|
||||
"success": True,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"image": image_path,
|
||||
"raw_analysis": raw_analysis,
|
||||
"parsed_scores": adjustments,
|
||||
"model": self.model
|
||||
}
|
||||
|
||||
# Core-State aktualisieren
|
||||
if update_core:
|
||||
self.update_core_state(adjustments)
|
||||
print("✓ Core state updated")
|
||||
|
||||
# Memory speichern
|
||||
self.save_memory(result)
|
||||
print("✓ Analysis saved to memory")
|
||||
|
||||
# Output schreiben
|
||||
with open(PATHS["vision_output"], "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Mood: {adjustments['mood']}/10")
|
||||
print(f" Anxiety: {'Yes' if adjustments['anxiety_detected'] else 'No'}")
|
||||
print(f" Intimacy: {'High' if adjustments['intimacy'] else 'Low'}")
|
||||
|
||||
return result
|
||||
|
||||
def fallback_analysis(self, image_path=None):
|
||||
"""Simulations-Modus wenn LLaVA nicht verfügbar"""
|
||||
return {
|
||||
"success": True,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"mode": "simulation",
|
||||
"image": image_path,
|
||||
"raw_analysis": {
|
||||
"mood": "neutral, calm presence",
|
||||
"facial": "neutral",
|
||||
"gaze": "at_camera",
|
||||
"body": "relaxed posture"
|
||||
},
|
||||
"parsed_scores": {
|
||||
"mood": 5,
|
||||
"mood_delta": 0,
|
||||
"anxiety": 0,
|
||||
"anxiety_delta": 0,
|
||||
"intimacy": 0,
|
||||
"distance": 0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI Entry Point"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Natiris VisionBridge v2")
|
||||
parser.add_argument("--image", "-i", required=True, help="Path to image file")
|
||||
parser.add_argument("--no-update-core", action="store_true", help="Don't update core state")
|
||||
parser.add_argument("--check", action="store_true", help="Check LLaVA availability")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = VisionAnalyzer()
|
||||
|
||||
if args.check:
|
||||
available = analyzer.check_model()
|
||||
print(f"LLaVA 7b: {'✓ Available' if available else '✗ Not found'}")
|
||||
return
|
||||
|
||||
if not os.path.exists(args.image):
|
||||
print(f"Error: Image not found: {args.image}")
|
||||
return
|
||||
|
||||
result = analyzer.analyze(args.image, update_core=not args.no_update_core)
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user