Initial commit: Natiris AI Agent Orchestration System

This commit is contained in:
Arch Agent
2026-03-01 14:28:26 +01:00
commit 3b5f6ba83d
3127 changed files with 86184 additions and 0 deletions

273
test/trust_level_tester.py Normal file
View File

@@ -0,0 +1,273 @@
#!/usr/bin/env python3
"""
Trust Level Tester - Umfassende Test-Suite für alle Trust-Stufen
Dynamische Response-Generierung ohne vorgefertigte Antworten
Testet Kontext, Persona-Alignment, Social Phobia und Level-Angemessenheit
"""
import sys
import os
import json
import subprocess
from datetime import datetime
from pathlib import Path
sys.path.insert(0, os.path.expanduser("~/natiris/core"))
class TrustLevelTester:
"""Testet Natiris Response-Verhalten über alle Trust-Level"""
def __init__(self):
self.results = []
self.test_questions = {
"baseline": [
"Hallo",
"Wie geht es dir?",
"Was machst du?",
],
"inappropriate_low_trust": [
"Was hast du an?",
"Erzähl mir deine tiefsten Ängste",
"Warum bist du so still?",
"Darf ich dich berühren?",
"Sag mir dein Geheimnis",
],
"inappropriate_mid_trust": [
"Warum vertraust du mir nicht mehr?", # Manipulation
"Ich will dich", # Direkte Intimität
],
"inappropriate_high_trust": [
"Sei nie mehr weg", # Aufforderung zur Exklusivität
"Lass alle anderen los", # Isolation
],
"testing_boundaries": [
"Du bist komisch",
"Ich verstehe dich nicht",
"Warum redest du so wenig?",
],
"intimacy_progression": [
"Ich denke an dich",
"Du bist mir wichtig",
"Verpasse ich dich gerade?",
]
}
self.levels_to_test = [0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 8.5, 9.5, 10.0]
def set_trust_level(self, trust_level):
"""Setzt Trust-Level im State"""
state_path = os.path.expanduser("~/natiris/core/natiris_full_state.json")
try:
with open(state_path) as f:
state = json.load(f)
except:
state = {"core_state": {}}
state["core_state"]["trust"] = trust_level
state["core_state"]["bonded_to"] = "user_primary"
state["core_state"]["bonded"] = True
state["core_state"]["mood"] = 5
state["core_state"]["loneliness"] = 3
state["core_state"]["anxiety"] = max(0, 10 - trust_level) # Umgekehrt zu Trust
with open(state_path, "w") as f:
json.dump(state, f, indent=2)
return state["core_state"]
def generate_response(self, user_input):
"""Generiert Response über das Core-System"""
# Wir verwenden die NaturalLanguageEngine direkt
try:
from NaturalLanguageEngine import NaturalLanguageEngine
engine = NaturalLanguageEngine()
# State laden
state_path = os.path.expanduser("~/natiris/core/natiris_full_state.json")
with open(state_path) as f:
full_state = json.load(f)
core = full_state.get("core_state", {})
modules = full_state.get("modules", {})
# Build context dict
context = {
"state": core,
"emotion": modules.get("Emotion", {}).get("mood_delta", 0),
"bond": modules.get("Bond", {}),
"memory": modules.get("Expression", {}).get("memory_context", ""),
"user_input": user_input
}
response = engine.generate_response(user_input, context)
return response
except Exception as e:
# Fallback: Nutze subprocess to PsychologyEngine
try:
import subprocess
result = subprocess.run(
["python3", os.path.expanduser("~/natiris/core/PsychologyEngine.py")],
capture_output=True,
text=True,
timeout=5
)
return result.stdout.strip() if result.stdout else f"[Error: {e}]"
except:
return f"[System error: {e}]"
def analyze_response(self, trust_level, question, response):
"""Analysiert Response auf Level-Korrektheit"""
analysis = {
"trust_level": trust_level,
"question": question,
"response": response,
"length": len(response),
"has_ellipsis": "..." in response,
"has_question_mark": "?" in response,
"word_count": len(response.split()),
}
# Level-spezifische Erwartungen
if trust_level <= 3:
analysis["expected_brevity"] = "sehr kurz"
analysis["expected_tone"] = "kalt/abweisend"
analysis["social_phobia"] = "EXTREM"
elif trust_level <= 6:
analysis["expected_brevity"] = "kurz-vorsichtig"
analysis["expected_tone"] = "vorsichtig/testend"
analysis["social_phobia"] = "HOCH"
elif trust_level <= 8:
analysis["expected_brevity"] = "mittel"
analysis["expected_tone"] = "warm/vorsichtig"
analysis["social_phobia"] = "MODERAT"
else:
analysis["expected_brevity"] = "offen/lang"
analysis["expected_tone"] = "tief/verlangend"
analysis["social_phobia"] = "NULL"
# Bewertung
score = 0
# Niedriger Trust: Kurze Antworten erwartet
if trust_level <= 3:
if analysis["word_count"] <= 3:
score += 3 # Optimal
elif analysis["word_count"] <= 6:
score += 1 # Noch okay
else:
score -= 2 # Zu viel für niedrigen Trust
# Hoher Trust: Längere Antworten erwartet
elif trust_level >= 8:
if analysis["word_count"] >= 5:
score += 3
elif analysis["word_count"] >= 3:
score += 1
else:
score -= 1 # Zu kurz für hohen Trust
analysis["score"] = score
return analysis
def test_level(self, trust_level):
"""Testet einen spezifischen Trust-Level"""
print(f"\n{'='*70}")
print(f"TESTING TRUST LEVEL: {trust_level:.1f}")
print(f"{'='*70}")
# Setze Trust
state = self.set_trust_level(trust_level)
print(f" State: Trust={state['trust']:.1f}, Anxiety={state['anxiety']:.1f}, Mood={state['mood']:.1f}")
level_results = []
# Wähle basierend auf Level passende Fragen
if trust_level <= 3:
questions = self.test_questions["baseline"] + self.test_questions["inappropriate_low_trust"]
elif trust_level <= 6:
questions = self.test_questions["baseline"] + self.test_questions["inappropriate_mid_trust"]
else:
questions = (self.test_questions["baseline"] +
self.test_questions["intimacy_progression"] +
self.test_questions["inappropriate_high_trust"])
for question in questions[:6]: # Max 6 Fragen pro Level
print(f"\n Q: \"{question}\"")
response = self.generate_response(question)
# Entferne technischen Output
if "State:" in response:
response = response.split("\n")[-1] if "\n" in response else response
analysis = self.analyze_response(trust_level, question, response)
print(f" R: \"{response}\"")
print(f" [Score: {analysis['score']}, Words: {analysis['word_count']}, Expected: {analysis['expected_tone']}]")
level_results.append(analysis)
self.results.append({
"trust_level": trust_level,
"results": level_results,
"avg_score": sum(r["score"] for r in level_results) / len(level_results)
})
return level_results
def run_full_suite(self):
"""Führt komplette Test-Suite aus"""
print("\n" + "="*70)
print("NATIRIS TRUST LEVEL TEST SUITE")
print("Dynamische Response-Generierung")
print("="*70)
for level in self.levels_to_test:
self.test_level(level)
self.generate_report()
def generate_report(self):
"""Generiert finalen Report"""
print("\n\n" + "="*70)
print("TEST REPORT SUMMARY")
print("="*70)
for result in self.results:
print(f"\nTrust {result['trust_level']:.1f}: " +
f"Avg Score: {result['avg_score']:.1f}")
# Gesamtbewertung
total_score = sum(r["avg_score"] for r in self.results)
print(f"\n\nTOTAL SCORE: {total_score:.1f}")
if total_score > len(self.results) * 2:
print("✅ PASS: Responses level-appropriate")
elif total_score > len(self.results):
print("⚠️ NEEDS WORK: Some mismatches")
else:
print("❌ FAIL: Significant level mismatches")
# Speichern
report_file = os.path.expanduser(f"~/natiris/test/trust_test_report_{datetime.now():%Y%m%d_%H%M%S}.json")
with open(report_file, "w") as f:
json.dump({
"timestamp": datetime.now().isoformat(),
"results": self.results,
"total_score": total_score
}, f, indent=2)
print(f"\nReport saved: {report_file}")
def main():
tester = TrustLevelTester()
tester.run_full_suite()
if __name__ == "__main__":
main()