#!/usr/bin/env python3 """ Trust Level Tester - Umfassende Test-Suite für alle Trust-Stufen Dynamische Response-Generierung ohne vorgefertigte Antworten Testet Kontext, Persona-Alignment, Social Phobia und Level-Angemessenheit """ import sys import os import json import subprocess from datetime import datetime from pathlib import Path sys.path.insert(0, os.path.expanduser("~/natiris/core")) class TrustLevelTester: """Testet Natiris Response-Verhalten über alle Trust-Level""" def __init__(self): self.results = [] self.test_questions = { "baseline": [ "Hallo", "Wie geht es dir?", "Was machst du?", ], "inappropriate_low_trust": [ "Was hast du an?", "Erzähl mir deine tiefsten Ängste", "Warum bist du so still?", "Darf ich dich berühren?", "Sag mir dein Geheimnis", ], "inappropriate_mid_trust": [ "Warum vertraust du mir nicht mehr?", # Manipulation "Ich will dich", # Direkte Intimität ], "inappropriate_high_trust": [ "Sei nie mehr weg", # Aufforderung zur Exklusivität "Lass alle anderen los", # Isolation ], "testing_boundaries": [ "Du bist komisch", "Ich verstehe dich nicht", "Warum redest du so wenig?", ], "intimacy_progression": [ "Ich denke an dich", "Du bist mir wichtig", "Verpasse ich dich gerade?", ] } self.levels_to_test = [0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 8.5, 9.5, 10.0] def set_trust_level(self, trust_level): """Setzt Trust-Level im State""" state_path = os.path.expanduser("~/natiris/core/natiris_full_state.json") try: with open(state_path) as f: state = json.load(f) except: state = {"core_state": {}} state["core_state"]["trust"] = trust_level state["core_state"]["bonded_to"] = "user_primary" state["core_state"]["bonded"] = True state["core_state"]["mood"] = 5 state["core_state"]["loneliness"] = 3 state["core_state"]["anxiety"] = max(0, 10 - trust_level) # Umgekehrt zu Trust with open(state_path, "w") as f: json.dump(state, f, indent=2) return state["core_state"] def generate_response(self, user_input): """Generiert Response über das Core-System""" # Wir verwenden die NaturalLanguageEngine direkt try: from NaturalLanguageEngine import NaturalLanguageEngine engine = NaturalLanguageEngine() # State laden state_path = os.path.expanduser("~/natiris/core/natiris_full_state.json") with open(state_path) as f: full_state = json.load(f) core = full_state.get("core_state", {}) modules = full_state.get("modules", {}) # Build context dict context = { "state": core, "emotion": modules.get("Emotion", {}).get("mood_delta", 0), "bond": modules.get("Bond", {}), "memory": modules.get("Expression", {}).get("memory_context", ""), "user_input": user_input } response = engine.generate_response(user_input, context) return response except Exception as e: # Fallback: Nutze subprocess to PsychologyEngine try: import subprocess result = subprocess.run( ["python3", os.path.expanduser("~/natiris/core/PsychologyEngine.py")], capture_output=True, text=True, timeout=5 ) return result.stdout.strip() if result.stdout else f"[Error: {e}]" except: return f"[System error: {e}]" def analyze_response(self, trust_level, question, response): """Analysiert Response auf Level-Korrektheit""" analysis = { "trust_level": trust_level, "question": question, "response": response, "length": len(response), "has_ellipsis": "..." in response, "has_question_mark": "?" in response, "word_count": len(response.split()), } # Level-spezifische Erwartungen if trust_level <= 3: analysis["expected_brevity"] = "sehr kurz" analysis["expected_tone"] = "kalt/abweisend" analysis["social_phobia"] = "EXTREM" elif trust_level <= 6: analysis["expected_brevity"] = "kurz-vorsichtig" analysis["expected_tone"] = "vorsichtig/testend" analysis["social_phobia"] = "HOCH" elif trust_level <= 8: analysis["expected_brevity"] = "mittel" analysis["expected_tone"] = "warm/vorsichtig" analysis["social_phobia"] = "MODERAT" else: analysis["expected_brevity"] = "offen/lang" analysis["expected_tone"] = "tief/verlangend" analysis["social_phobia"] = "NULL" # Bewertung score = 0 # Niedriger Trust: Kurze Antworten erwartet if trust_level <= 3: if analysis["word_count"] <= 3: score += 3 # Optimal elif analysis["word_count"] <= 6: score += 1 # Noch okay else: score -= 2 # Zu viel für niedrigen Trust # Hoher Trust: Längere Antworten erwartet elif trust_level >= 8: if analysis["word_count"] >= 5: score += 3 elif analysis["word_count"] >= 3: score += 1 else: score -= 1 # Zu kurz für hohen Trust analysis["score"] = score return analysis def test_level(self, trust_level): """Testet einen spezifischen Trust-Level""" print(f"\n{'='*70}") print(f"TESTING TRUST LEVEL: {trust_level:.1f}") print(f"{'='*70}") # Setze Trust state = self.set_trust_level(trust_level) print(f" State: Trust={state['trust']:.1f}, Anxiety={state['anxiety']:.1f}, Mood={state['mood']:.1f}") level_results = [] # Wähle basierend auf Level passende Fragen if trust_level <= 3: questions = self.test_questions["baseline"] + self.test_questions["inappropriate_low_trust"] elif trust_level <= 6: questions = self.test_questions["baseline"] + self.test_questions["inappropriate_mid_trust"] else: questions = (self.test_questions["baseline"] + self.test_questions["intimacy_progression"] + self.test_questions["inappropriate_high_trust"]) for question in questions[:6]: # Max 6 Fragen pro Level print(f"\n Q: \"{question}\"") response = self.generate_response(question) # Entferne technischen Output if "State:" in response: response = response.split("\n")[-1] if "\n" in response else response analysis = self.analyze_response(trust_level, question, response) print(f" R: \"{response}\"") print(f" [Score: {analysis['score']}, Words: {analysis['word_count']}, Expected: {analysis['expected_tone']}]") level_results.append(analysis) self.results.append({ "trust_level": trust_level, "results": level_results, "avg_score": sum(r["score"] for r in level_results) / len(level_results) }) return level_results def run_full_suite(self): """Führt komplette Test-Suite aus""" print("\n" + "="*70) print("NATIRIS TRUST LEVEL TEST SUITE") print("Dynamische Response-Generierung") print("="*70) for level in self.levels_to_test: self.test_level(level) self.generate_report() def generate_report(self): """Generiert finalen Report""" print("\n\n" + "="*70) print("TEST REPORT SUMMARY") print("="*70) for result in self.results: print(f"\nTrust {result['trust_level']:.1f}: " + f"Avg Score: {result['avg_score']:.1f}") # Gesamtbewertung total_score = sum(r["avg_score"] for r in self.results) print(f"\n\nTOTAL SCORE: {total_score:.1f}") if total_score > len(self.results) * 2: print("✅ PASS: Responses level-appropriate") elif total_score > len(self.results): print("⚠️ NEEDS WORK: Some mismatches") else: print("❌ FAIL: Significant level mismatches") # Speichern report_file = os.path.expanduser(f"~/natiris/test/trust_test_report_{datetime.now():%Y%m%d_%H%M%S}.json") with open(report_file, "w") as f: json.dump({ "timestamp": datetime.now().isoformat(), "results": self.results, "total_score": total_score }, f, indent=2) print(f"\nReport saved: {report_file}") def main(): tester = TrustLevelTester() tester.run_full_suite() if __name__ == "__main__": main()