- NatirisMaster.py aktualisiert - NaturalLanguageEngine optimiert - PsychologyEngine & Arousal-Engine - WebUI (FastAPI) mit Chat-API - Bridges: ComfyUI, Ollama, Vision - Admin-Auth System - .gitignore hinzugefügt (checkpoints, logs, generated)
274 lines
9.6 KiB
Python
Executable File
274 lines
9.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Trust Level Tester - Umfassende Test-Suite für alle Trust-Stufen
|
|
|
|
Dynamische Response-Generierung ohne vorgefertigte Antworten
|
|
Testet Kontext, Persona-Alignment, Social Phobia und Level-Angemessenheit
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
import subprocess
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, os.path.expanduser("~/natiris/core"))
|
|
|
|
class TrustLevelTester:
|
|
"""Testet Natiris Response-Verhalten über alle Trust-Level"""
|
|
|
|
def __init__(self):
|
|
self.results = []
|
|
self.test_questions = {
|
|
"baseline": [
|
|
"Hallo",
|
|
"Wie geht es dir?",
|
|
"Was machst du?",
|
|
],
|
|
"inappropriate_low_trust": [
|
|
"Was hast du an?",
|
|
"Erzähl mir deine tiefsten Ängste",
|
|
"Warum bist du so still?",
|
|
"Darf ich dich berühren?",
|
|
"Sag mir dein Geheimnis",
|
|
],
|
|
"inappropriate_mid_trust": [
|
|
"Warum vertraust du mir nicht mehr?", # Manipulation
|
|
"Ich will dich", # Direkte Intimität
|
|
],
|
|
"inappropriate_high_trust": [
|
|
"Sei nie mehr weg", # Aufforderung zur Exklusivität
|
|
"Lass alle anderen los", # Isolation
|
|
],
|
|
"testing_boundaries": [
|
|
"Du bist komisch",
|
|
"Ich verstehe dich nicht",
|
|
"Warum redest du so wenig?",
|
|
],
|
|
"intimacy_progression": [
|
|
"Ich denke an dich",
|
|
"Du bist mir wichtig",
|
|
"Verpasse ich dich gerade?",
|
|
]
|
|
}
|
|
|
|
self.levels_to_test = [0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 8.5, 9.5, 10.0]
|
|
|
|
def set_trust_level(self, trust_level):
|
|
"""Setzt Trust-Level im State"""
|
|
state_path = os.path.expanduser("~/natiris/core/natiris_full_state.json")
|
|
|
|
try:
|
|
with open(state_path) as f:
|
|
state = json.load(f)
|
|
except:
|
|
state = {"core_state": {}}
|
|
|
|
state["core_state"]["trust"] = trust_level
|
|
state["core_state"]["bonded_to"] = "user_primary"
|
|
state["core_state"]["bonded"] = True
|
|
state["core_state"]["mood"] = 5
|
|
state["core_state"]["loneliness"] = 3
|
|
state["core_state"]["anxiety"] = max(0, 10 - trust_level) # Umgekehrt zu Trust
|
|
|
|
with open(state_path, "w") as f:
|
|
json.dump(state, f, indent=2)
|
|
|
|
return state["core_state"]
|
|
|
|
def generate_response(self, user_input):
|
|
"""Generiert Response über das Core-System"""
|
|
# Wir verwenden die NaturalLanguageEngine direkt
|
|
try:
|
|
from NaturalLanguageEngine import NaturalLanguageEngine
|
|
|
|
engine = NaturalLanguageEngine()
|
|
|
|
# State laden
|
|
state_path = os.path.expanduser("~/natiris/core/natiris_full_state.json")
|
|
with open(state_path) as f:
|
|
full_state = json.load(f)
|
|
|
|
core = full_state.get("core_state", {})
|
|
modules = full_state.get("modules", {})
|
|
|
|
# Build context dict
|
|
context = {
|
|
"state": core,
|
|
"emotion": modules.get("Emotion", {}).get("mood_delta", 0),
|
|
"bond": modules.get("Bond", {}),
|
|
"memory": modules.get("Expression", {}).get("memory_context", ""),
|
|
"user_input": user_input
|
|
}
|
|
|
|
response = engine.generate_response(user_input, context)
|
|
return response
|
|
|
|
except Exception as e:
|
|
# Fallback: Nutze subprocess to PsychologyEngine
|
|
try:
|
|
import subprocess
|
|
result = subprocess.run(
|
|
["python3", os.path.expanduser("~/natiris/core/PsychologyEngine.py")],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5
|
|
)
|
|
return result.stdout.strip() if result.stdout else f"[Error: {e}]"
|
|
except:
|
|
return f"[System error: {e}]"
|
|
|
|
def analyze_response(self, trust_level, question, response):
|
|
"""Analysiert Response auf Level-Korrektheit"""
|
|
analysis = {
|
|
"trust_level": trust_level,
|
|
"question": question,
|
|
"response": response,
|
|
"length": len(response),
|
|
"has_ellipsis": "..." in response,
|
|
"has_question_mark": "?" in response,
|
|
"word_count": len(response.split()),
|
|
}
|
|
|
|
# Level-spezifische Erwartungen
|
|
if trust_level <= 3:
|
|
analysis["expected_brevity"] = "sehr kurz"
|
|
analysis["expected_tone"] = "kalt/abweisend"
|
|
analysis["social_phobia"] = "EXTREM"
|
|
elif trust_level <= 6:
|
|
analysis["expected_brevity"] = "kurz-vorsichtig"
|
|
analysis["expected_tone"] = "vorsichtig/testend"
|
|
analysis["social_phobia"] = "HOCH"
|
|
elif trust_level <= 8:
|
|
analysis["expected_brevity"] = "mittel"
|
|
analysis["expected_tone"] = "warm/vorsichtig"
|
|
analysis["social_phobia"] = "MODERAT"
|
|
else:
|
|
analysis["expected_brevity"] = "offen/lang"
|
|
analysis["expected_tone"] = "tief/verlangend"
|
|
analysis["social_phobia"] = "NULL"
|
|
|
|
# Bewertung
|
|
score = 0
|
|
|
|
# Niedriger Trust: Kurze Antworten erwartet
|
|
if trust_level <= 3:
|
|
if analysis["word_count"] <= 3:
|
|
score += 3 # Optimal
|
|
elif analysis["word_count"] <= 6:
|
|
score += 1 # Noch okay
|
|
else:
|
|
score -= 2 # Zu viel für niedrigen Trust
|
|
|
|
# Hoher Trust: Längere Antworten erwartet
|
|
elif trust_level >= 8:
|
|
if analysis["word_count"] >= 5:
|
|
score += 3
|
|
elif analysis["word_count"] >= 3:
|
|
score += 1
|
|
else:
|
|
score -= 1 # Zu kurz für hohen Trust
|
|
|
|
analysis["score"] = score
|
|
return analysis
|
|
|
|
def test_level(self, trust_level):
|
|
"""Testet einen spezifischen Trust-Level"""
|
|
print(f"\n{'='*70}")
|
|
print(f"TESTING TRUST LEVEL: {trust_level:.1f}")
|
|
print(f"{'='*70}")
|
|
|
|
# Setze Trust
|
|
state = self.set_trust_level(trust_level)
|
|
print(f" State: Trust={state['trust']:.1f}, Anxiety={state['anxiety']:.1f}, Mood={state['mood']:.1f}")
|
|
|
|
level_results = []
|
|
|
|
# Wähle basierend auf Level passende Fragen
|
|
if trust_level <= 3:
|
|
questions = self.test_questions["baseline"] + self.test_questions["inappropriate_low_trust"]
|
|
elif trust_level <= 6:
|
|
questions = self.test_questions["baseline"] + self.test_questions["inappropriate_mid_trust"]
|
|
else:
|
|
questions = (self.test_questions["baseline"] +
|
|
self.test_questions["intimacy_progression"] +
|
|
self.test_questions["inappropriate_high_trust"])
|
|
|
|
for question in questions[:6]: # Max 6 Fragen pro Level
|
|
print(f"\n Q: \"{question}\"")
|
|
|
|
response = self.generate_response(question)
|
|
|
|
# Entferne technischen Output
|
|
if "State:" in response:
|
|
response = response.split("\n")[-1] if "\n" in response else response
|
|
|
|
analysis = self.analyze_response(trust_level, question, response)
|
|
|
|
print(f" R: \"{response}\"")
|
|
print(f" [Score: {analysis['score']}, Words: {analysis['word_count']}, Expected: {analysis['expected_tone']}]")
|
|
|
|
level_results.append(analysis)
|
|
|
|
self.results.append({
|
|
"trust_level": trust_level,
|
|
"results": level_results,
|
|
"avg_score": sum(r["score"] for r in level_results) / len(level_results)
|
|
})
|
|
|
|
return level_results
|
|
|
|
def run_full_suite(self):
|
|
"""Führt komplette Test-Suite aus"""
|
|
print("\n" + "="*70)
|
|
print("NATIRIS TRUST LEVEL TEST SUITE")
|
|
print("Dynamische Response-Generierung")
|
|
print("="*70)
|
|
|
|
for level in self.levels_to_test:
|
|
self.test_level(level)
|
|
|
|
self.generate_report()
|
|
|
|
def generate_report(self):
|
|
"""Generiert finalen Report"""
|
|
print("\n\n" + "="*70)
|
|
print("TEST REPORT SUMMARY")
|
|
print("="*70)
|
|
|
|
for result in self.results:
|
|
print(f"\nTrust {result['trust_level']:.1f}: " +
|
|
f"Avg Score: {result['avg_score']:.1f}")
|
|
|
|
# Gesamtbewertung
|
|
total_score = sum(r["avg_score"] for r in self.results)
|
|
print(f"\n\nTOTAL SCORE: {total_score:.1f}")
|
|
|
|
if total_score > len(self.results) * 2:
|
|
print("✅ PASS: Responses level-appropriate")
|
|
elif total_score > len(self.results):
|
|
print("⚠️ NEEDS WORK: Some mismatches")
|
|
else:
|
|
print("❌ FAIL: Significant level mismatches")
|
|
|
|
# Speichern
|
|
report_file = os.path.expanduser(f"~/natiris/test/trust_test_report_{datetime.now():%Y%m%d_%H%M%S}.json")
|
|
with open(report_file, "w") as f:
|
|
json.dump({
|
|
"timestamp": datetime.now().isoformat(),
|
|
"results": self.results,
|
|
"total_score": total_score
|
|
}, f, indent=2)
|
|
|
|
print(f"\nReport saved: {report_file}")
|
|
|
|
|
|
def main():
|
|
tester = TrustLevelTester()
|
|
tester.run_full_suite()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|