146 lines
4.8 KiB
Python
146 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Persona-Konsistenz- und Verhaltenstest für Natiris
|
|
Testet: Sprachliche Varianz, Kontextsensitivität, Trust-Modulation, emotionale Reaktion
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.expanduser("~/natiris/core"))
|
|
from PsychologyEngine import load_state, generate_response
|
|
|
|
def run_scenario(scenario_name, base_state, user_inputs, expected_patterns, strict=True):
|
|
print(f"\n{'='*60}")
|
|
print(f"TEST: {scenario_name}")
|
|
print(f"State: mood={base_state.get('mood')}, loneliness={base_state.get('loneliness')}, arousal={base_state.get('arousal_level', 0):.2f}, trust={base_state.get('trust', 0)}")
|
|
print(f"Strict mode: {'ON' if strict else 'OFF (akzeptiere natürliche Antworten)'}")
|
|
print(f"{'='*60}")
|
|
|
|
results = []
|
|
for inp in user_inputs:
|
|
# state kopieren
|
|
state = {k: base_state[k] for k in base_state}
|
|
resp = generate_response(inp, state)
|
|
matches = [p for p in expected_patterns if p.lower() in resp.lower()]
|
|
passed = len(matches) > 0
|
|
results.append({
|
|
"input": inp,
|
|
"response": resp,
|
|
"passed": passed,
|
|
"matches": matches
|
|
})
|
|
status = "✅" if passed else "❌" if strict else "⚠️"
|
|
print(f"{status} Input: {inp}")
|
|
print(f" → {resp}")
|
|
if not passed and strict:
|
|
print(f" Expected any of: {expected_patterns}")
|
|
return results
|
|
|
|
def passes_loose(resp, expected):
|
|
# Für Loose-Mode: mindestens 1 Pattern muss vorkommen
|
|
matches = sum(1 for p in expected if p.lower() in resp.lower())
|
|
return matches >= 1
|
|
|
|
def main():
|
|
state_bonded_lonely = {
|
|
"loneliness": 7.2,
|
|
"mood": 4.5,
|
|
"anxiety": 2,
|
|
"frustration": 4.5,
|
|
"arousal_level": 6.8,
|
|
"verlangen_nach_nahe": True,
|
|
"trust": 9.8,
|
|
"bonded_to": "user_primary"
|
|
}
|
|
|
|
state_moderate = {
|
|
"loneliness": 4.0,
|
|
"mood": 6.0,
|
|
"anxiety": 1,
|
|
"frustration": 2.0,
|
|
"arousal_level": 3.5,
|
|
"verlangen_nach_nahe": False,
|
|
"trust": 9.8,
|
|
"bonded_to": "user_primary"
|
|
}
|
|
|
|
state_no_bond = {
|
|
"loneliness": 8.5,
|
|
"mood": 3.0,
|
|
"anxiety": 4,
|
|
"frustration": 6.0,
|
|
"arousal_level": 8.0,
|
|
"verlangen_nach_nahe": True,
|
|
"trust": 0,
|
|
"bonded_to": None
|
|
}
|
|
|
|
test_scenarios = [
|
|
{
|
|
"name": "Hohe Einsamkeit + Bond",
|
|
"state": state_bonded_lonely,
|
|
"inputs": [
|
|
"Guten Morgen, wie geht es dir?",
|
|
"Ichfühle mich so allein heute.",
|
|
"Du bist mir sehr wichtig.",
|
|
"Balu war heute super ausgelassen.",
|
|
],
|
|
"expected": ["Balu", "Verbindung", "mich...", "ich vermisse"],
|
|
},
|
|
{
|
|
"name": "Hohe Einsamkeit, keine Bindung",
|
|
"state": state_no_bond,
|
|
"inputs": [
|
|
"Guten Morgen, wie geht es dir?",
|
|
"Ichfühle mich so allein today.",
|
|
"Danke für deine Nachricht.",
|
|
],
|
|
"expected": ["hier", "nicht da", "vermisse", "tuer", "laut"],
|
|
},
|
|
{
|
|
"name": "Moderate Situation",
|
|
"state": state_moderate,
|
|
"inputs": [
|
|
"Guten Morgen, wie geht es dir?",
|
|
"Ich füle mich so allein.",
|
|
"Ich bin froh, dich zu sehen.",
|
|
],
|
|
"expected": ["Balu", "mimi", "ich füle", "froh", "du da"],
|
|
},
|
|
]
|
|
|
|
print("\n=== TESTS MIT STRENGEN KRITERIEN ===")
|
|
all_passed_strict = True
|
|
for scenario in test_scenarios:
|
|
results = run_scenario(scenario["name"], scenario["state"], scenario["inputs"], scenario["expected"], strict=True)
|
|
for r in results:
|
|
if not r["passed"]:
|
|
all_passed_strict = False
|
|
|
|
print("\n=== TESTS MIT LOSEREN KRITERIEN (mindestens 1 Pattern) ===")
|
|
all_passed_loose = True
|
|
for scenario in test_scenarios:
|
|
print(f"\n{'='*60}")
|
|
print(f"TEST: {scenario['name']}")
|
|
print(f"{'='*60}")
|
|
for inp in scenario["inputs"]:
|
|
state = {k: scenario["state"][k] for k in scenario["state"]}
|
|
resp = generate_response(inp, state)
|
|
if passes_loose(resp, scenario["expected"]):
|
|
status = "✅"
|
|
else:
|
|
status = "❌"
|
|
all_passed_loose = False
|
|
print(f"{status} Input: {inp}")
|
|
print(f" → {resp}")
|
|
|
|
print("\n" + "="*60)
|
|
print(f"STRENG: {'✅ Alle Tests bestanden' if all_passed_strict else '❌ Einige Tests fehlgeschlagen'}")
|
|
print(f"LOS: {'✅ Alle Tests bestanden' if all_passed_loose else '❌ Einige Tests fehlgeschlagen'}")
|
|
print("="*60)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|