Files
natiris/agents/natiris_recovery_agent.py

371 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Natiris Recovery Agent Status-Tracker und Wiederanlauf
Überwacht den Projektstatus und ermöglicht nahtlose Fortsetzung
Features:
- Status-Tracking nach jedem Schritt
- Checkpoint-System für Unterbrechungen
- Automatischer Resume nach Token-Limit/Neustart
- Fortschrittsbericht generierung
"""
import json
import os
import sys
import time
from datetime import datetime
from pathlib import Path
PROJECT_ROOT = os.path.expanduser("~/natiris")
STATE_FILE = os.path.join(PROJECT_ROOT, "agent_state.json")
CHECKPOINT_FILE = os.path.join(PROJECT_ROOT, "checkpoints", "latest.json")
try:
import requests
except ImportError:
requests = None
def log(msg, level="INFO"):
"""Log mit Timestamp"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] [{level}] {msg}")
# Auch in Log-Datei
log_file = os.path.join(PROJECT_ROOT, "agent_recovery.log")
with open(log_file, "a") as f:
f.write(f"[{timestamp}] [{level}] {msg}\n")
class NatirisRecoveryAgent:
"""
Verwaltet den Zustand des Natiris-Projekts
und ermöglicht Wiederanlauf nach Unterbrechungen
"""
def __init__(self):
self.project_root = Path(PROJECT_ROOT)
self.checkpoints_dir = self.project_root / "checkpoints"
self.checkpoints_dir.mkdir(exist_ok=True)
# Aktueller Zustand
self.state = self.load_state()
# Task-Queue (was als nächstes zu tun ist)
self.task_queue = []
def load_state(self):
"""Lädt den aktuellen Agent-State"""
if os.path.exists(STATE_FILE):
try:
with open(STATE_FILE) as f:
return json.load(f)
except:
return self.get_default_state()
return self.get_default_state()
def get_default_state(self):
"""Standard-Zustand für neuen Start"""
return {
"phase": "initialization",
"last_action": None,
"last_action_time": None,
"completed_tasks": [],
"pending_tasks": [
"update_comfybridge_with_ipadapter",
"test_ipadapter_integration",
"integrate_vision_loop",
"optimize_natural_language",
"final_testing",
"documentation"
],
"checkpoint_count": 0,
"last_error": None,
"session_count": 0
}
def save_state(self):
"""Speichert aktuellen Zustand"""
self.state["last_action_time"] = datetime.now().isoformat()
with open(STATE_FILE, "w") as f:
json.dump(self.state, f, indent=2)
# Zusätzlich als Checkpoint
checkpoint_id = self.state.get("checkpoint_count", 0) + 1
checkpoint_file = self.checkpoints_dir / f"checkpoint_{checkpoint_id:04d}.json"
with open(checkpoint_file, "w") as f:
json.dump(self.state, f, indent=2)
self.state["checkpoint_count"] = checkpoint_id
log(f"State saved (checkpoint {checkpoint_id})")
def mark_task_complete(self, task_name, details=None):
"""Markiert eine Task als abgeschlossen"""
if task_name in self.state["pending_tasks"]:
self.state["pending_tasks"].remove(task_name)
completion_entry = {
"task": task_name,
"completed_at": datetime.now().isoformat(),
"details": details or {}
}
self.state["completed_tasks"].append(completion_entry)
self.state["last_action"] = f"completed_{task_name}"
log(f"Task completed: {task_name}")
self.save_state()
def set_phase(self, phase_name):
"""Setzt aktuelle Phase"""
self.state["phase"] = phase_name
self.state["last_action"] = f"phase_change_{phase_name}"
log(f"Phase changed to: {phase_name}")
self.save_state()
def record_error(self, error_msg):
"""Speichert Fehler für spätere Analyse"""
self.state["last_error"] = {
"message": str(error_msg),
"timestamp": datetime.now().isoformat(),
"phase": self.state["phase"]
}
self.save_state()
log(f"ERROR recorded: {error_msg}", level="ERROR")
def get_next_task(self):
"""Liefert nächste zu erledigende Task"""
if self.state["pending_tasks"]:
return self.state["pending_tasks"][0]
return None
def get_progress_report(self):
"""Generiert Fortschrittsbericht"""
total = len(self.state["completed_tasks"]) + len(self.state["pending_tasks"])
completed = len(self.state["completed_tasks"])
if total > 0:
percent = (completed / total) * 100
else:
percent = 0
report = f"""
╔══════════════════════════════════════════════════════════════╗
║ NATIRIS RECOVERY AGENT - STATUS REPORT ║
╠══════════════════════════════════════════════════════════════╣
║ Current Phase: {self.state['phase']:<38}
║ Last Action: {str(self.state['last_action'])[:38]:<38}
║ Checkpoints: {self.state['checkpoint_count']:<38}
║ Progress: {completed}/{total} ({percent:.1f}%) {'' * int(percent/5):<20}
╠══════════════════════════════════════════════════════════════╣
║ PENDING TASKS: ║
"""
for i, task in enumerate(self.state["pending_tasks"][:5], 1):
report += f"{i}. {task[:50]:<51}\n"
if len(self.state["pending_tasks"]) > 5:
report += f"║ ... and {len(self.state['pending_tasks']) - 5} more{'':<36}\n"
report += """╠══════════════════════════════════════════════════════════════╣
║ RECENTLY COMPLETED: ║
"""
recent = self.state["completed_tasks"][-3:]
for entry in recent:
task = entry["task"]
time_str = entry["completed_at"][11:19] # HH:MM:SS
report += f"║ ✓ {task[:20]:<20} at {time_str:<24}\n"
if not recent:
report += f"║ (none yet){'':<47}\n"
report += """╚══════════════════════════════════════════════════════════════╝
"""
return report
def generate_resume_instructions(self):
"""Generiert Befehle für Wiederanlauf"""
next_task = self.get_next_task()
instructions = f"""
╔══════════════════════════════════════════════════════════════╗
║ NATIRIS RESUME INSTRUCTIONS ║
╠══════════════════════════════════════════════════════════════╣
Current Status:
Phase: {self.state['phase']}
Last Action: {self.state['last_action']}
Pending Tasks: {len(self.state['pending_tasks'])}
NEXT IMMEDIATE TASK:
{next_task or 'ALL TASKS COMPLETED'}
QUICK START COMMANDS:
"""
if next_task == "update_comfybridge_with_ipadapter":
instructions += """
1. Update ComfyBridge with correct IPAdapter paths:
cd ~/natiris && nano bridges/ComfyBridge.py
- Update MODEL_PATHS with correct paths
- Test with: python3 bridges/ComfyBridge.py --test
2. Verify IPAdapter models exist:
ls /opt/pinokio/drive/drives/peers/d1753059260169/ipadapter/
"""
elif next_task == "test_ipadapter_integration":
instructions += """
1. Generate test image with IPAdapter:
cd ~/natiris && python3 bridges/ComfyBridge.py --test
2. Check if image was generated with face consistency:
ls -la ~/natiris/generated/
"""
elif next_task == "integrate_vision_loop":
instructions += """
1. Connect VisionBridge to Core:
- Update Orchestrator to trigger Vision after ComfyBridge
- Test full loop: Text → Image → Vision → Response
2. Verify state updates:
cat ~/natiris/core/natiris_full_state.json
"""
instructions += """
TO CONTINUE WHERE YOU LEFT OFF:
python3 ~/natiris/agents/natiris_recovery_agent.py --resume
TO CHECK STATUS:
python3 ~/natiris/agents/natiris_recovery_agent.py --status
═══════════════════════════════════════════════════════════════
"""
return instructions
def create_recovery_script(self):
"""Erstellt Recovery-Skript für automatischen Wiederanlauf"""
script_path = self.project_root / "resume_natiris.sh"
script_content = """#!/bin/bash
# Natiris Auto-Recovery Script
# Generated by Recovery Agent
echo "╭────────────────────────────────────────────────────────────╮"
echo "│ NATIRIS PROJECT RESUMER │"
echo "╰────────────────────────────────────────────────────────────╯"
echo ""
# Check if state exists
if [ -f ~/natiris/agent_state.json ]; then
echo "📋 Found existing state - loading progress..."
python3 ~/natiris/agents/natiris_recovery_agent.py --resume
else
echo "⚠️ No state found - starting fresh..."
python3 ~/natiris/agents/natiris_recovery_agent.py --init
fi
"""
with open(script_path, "w") as f:
f.write(script_content)
# Make executable
os.chmod(script_path, 0o755)
log(f"Recovery script created: {script_path}")
return str(script_path)
def monitor_and_track(self):
"""Haupt-Monitoring-Loop (kann im Hintergrund laufen)"""
log("Recovery Agent started - monitoring project...")
while True:
# Speichere State regelmäßig
self.save_state()
# Prüfe auf Probleme (z.B. Prozesse, Dateien)
self.check_system_health()
time.sleep(30) # Alle 30 Sekunden
def check_system_health(self):
"""Prüft System-Health"""
issues = []
# Prüfe ComfyUI
try:
import requests
resp = requests.get("http://localhost:8188/system_stats", timeout=2)
if resp.status_code != 200:
issues.append("ComfyUI not responding properly")
except:
issues.append("ComfyUI unreachable")
# Prüfe Ollama
try:
resp = requests.get("http://localhost:11434/api/tags", timeout=2)
if resp.status_code != 200:
issues.append("Ollama not responding")
except:
issues.append("Ollama unreachable")
if issues:
log(f"System health issues: {', '.join(issues)}", level="WARN")
return len(issues) == 0
def main():
"""CLI Entry Point"""
import argparse
parser = argparse.ArgumentParser(description="Natiris Recovery Agent")
parser.add_argument("--status", action="store_true", help="Show status report")
parser.add_argument("--resume", action="store_true", help="Resume from last checkpoint")
parser.add_argument("--init", action="store_true", help="Initialize new state")
parser.add_argument("--complete", help="Mark task as complete")
parser.add_argument("--phase", help="Set current phase")
parser.add_argument("--create-recovery", action="store_true", help="Create recovery script")
parser.add_argument("--monitor", action="store_true", help="Start monitoring mode")
args = parser.parse_args()
agent = NatirisRecoveryAgent()
if args.status:
print(agent.get_progress_report())
elif args.resume:
print(agent.get_progress_report())
print("\n" + "="*60)
print(agent.generate_resume_instructions())
elif args.init:
agent.state = agent.get_default_state()
agent.state["session_count"] = 1
agent.save_state()
log("Initialized fresh state")
print("✓ Fresh state initialized")
elif args.complete:
agent.mark_task_complete(args.complete)
print(f"✓ Marked as complete: {args.complete}")
elif args.phase:
agent.set_phase(args.phase)
print(f"✓ Phase set to: {args.phase}")
elif args.create_recovery:
script = agent.create_recovery_script()
print(f"✓ Recovery script created: {script}")
print(f"\nTo use: ./{script}")
elif args.monitor:
agent.monitor_and_track()
else:
# Default: show status
print(agent.get_progress_report())
print("\nUse --resume to see continuation instructions")
if __name__ == "__main__":
main()