commit 695d715d5b8539b61e0f495c190832fd7abb4e98 Author: Arch Agent Date: Thu May 7 10:14:57 2026 +0200 Initial commit: Log Analyzer Backend diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c8bcf2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.env +.venv/ +venv/ +data/ +*.db +*.sqlite3 diff --git a/README.md b/README.md new file mode 100644 index 0000000..5069e8d --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# Log Analyzer Backend + +CPU-LLM-gestütztes Backend zur Auswertung von Firewall- und Proxy-Logs. Parst Dateien, aggregiert Statistiken (Top-Hits, Quellen, Ziele, Ports, URLs) und nutzt ein lokales LLM (via Ollama) für die Analyse. + +## Features +- **Log-Parsing:** Unterstützt iptables, pfSense, Cisco ASA, Squid, nginx +- **Statistiken:** Top Quellen, Ziele, Ports, URLs, Actions, Timeline +- **LLM-Analyse:** Zusammenfassung und Anomalie-Erkennung via lokalem Ollama-Modell +- **REST-API:** FastAPI mit automatischer OpenAPI-Doku unter `/docs` +- **Docker-Compose:** Schneller Start mit Backend + nginx Reverse Proxy + +## Architektur +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ +│ Client │──────▶│ nginx:80 │──────▶│ FastAPI Backend:8000 │ +└──────────────┘ └──────────────┘ │ - SQLite (Volumen) │ + │ - Log-Parser │ + │ - Ollama-Client │ + └───────────────────────┘ + │ + ▼ + ┌──────────────────────┐ + │ Ollama (CPU/Host) │ + │ http://host.docker │ + │ .internal:11434│ + └──────────────────────┘ +``` + +## Voraussetzungen +- Docker + Docker Compose +- Laufende Ollama-Instanz auf dem Host (Port 11434) mit Modell `llava:7b` +- Ports: 8080 (nginx), 11434 (Ollama auf Host) + +## Installation & Start + +```bash +# Repository klonen +git clone https://gitea.die-heimatlosen.eu/arch_agent/log-analyzer-backend.git +cd log-analyzer-backend + +# Starten +docker-compose up -d --build +``` + +Die API ist dann unter `http://localhost:8080/api` erreichbar. + +## Nutzung + +### Log-Datei hochladen +```bash +curl -X POST http://localhost:8080/api/upload \ + -H "Content-Type: multipart/form-data" \ + -F "file=@/var/log/iptables.log" +``` + +### Statistiken abrufen +```bash +curl "http://localhost:8080/api/stats?limit=20" +``` + +### LLM-Analyse starten +```bash +curl -X POST "http://localhost:8080/api/analyze?log_type=firewall&limit=100" +``` + +### Gesundheitscheck +```bash +curl http://localhost:8080/health +``` + +## API-Endpunkte + +| Methode | Endpunkt | Beschreibung | +|---------|----------|--------------| +| POST | `/api/upload` | Log-Datei hochladen & parsen | +| GET | `/api/stats` | Übersicht aller Statistiken | +| GET | `/api/stats/sources` | Top Quellen | +| GET | `/api/stats/destinations` | Top Ziele | +| GET | `/api/stats/ports` | Top Ports | +| POST | `/api/analyze` | LLM-Analyse der Logs | +| GET | `/health` | Healthcheck | + +## Umgebungsvariablen + +| Variable | Standard | Beschreibung | +|----------|----------|--------------| +| `DATABASE_URL` | `sqlite+aiosqlite:///data/logs.db` | SQLite Datenbankpfad | +| `OLLAMA_URL` | `http://host.docker.internal:11434` | Ollama API URL | +| `OLLAMA_MODEL` | `llava:7b` | LLM Modellname | + +## Unterstützte Log-Formate + +### Firewall +- **iptables:** Kernel-Logzeilen mit `SRC=... DST=... PROTO=... DPT=...` +- **pfSense:** `filterlog` CSV-ähnliche Zeilen +- **Cisco ASA:** `%ASA-... Built/Teardown/Denied ...` + +### Proxy +- **Squid:** Native Squid-Logformat (Timestamp, elapsed, client, code, status, size, method, URL) +- **nginx:** Standard access_log (`$remote_addr - [$time_local] "$request" $status $body_bytes_sent`) + +## Hinweise +- Das Backend erwartet Ollama auf dem **Host** (nicht im Container). Für Linux ggf. `extra_hosts: ["host.docker.internal:host-gateway"]` nutzen. +- Große Logdateien werden in Batches von 500 Zeilen verarbeitet. +- Die SQLite-Datenbank wird im Docker-Volumen `logdata` persistiert. + +## Lizenz +MIT diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..4c3eb24 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends gcc \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/api/routes.py b/backend/api/routes.py new file mode 100644 index 0000000..fa50f19 --- /dev/null +++ b/backend/api/routes.py @@ -0,0 +1,124 @@ +import io +from typing import List +from fastapi import APIRouter, UploadFile, File, Depends, HTTPException, Query +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession +from models import LogEntry +from services.log_parser import parse_lines +from services.analyzer import StatsAnalyzer +from services.llm_service import LLMService +from database import get_db + +router = APIRouter(prefix="/api", tags=["logs"]) + +class StatsResponse(BaseModel): + overview: dict + top_sources: List[dict] + top_destinations: List[dict] + top_ports: List[dict] + top_urls: List[dict] + actions: List[dict] + timeline: List[dict] + unique_counts: dict + +class AnalysisResponse(BaseModel): + analysis: str + +class UploadResponse(BaseModel): + message: str + parsed_lines: int + +@router.post("/upload", response_model=UploadResponse) +async def upload_logs( + file: UploadFile = File(...), + db: AsyncSession = Depends(get_db), +): + if not file.filename: + raise HTTPException(status_code=400, detail="No file provided") + try: + content = (await file.read()).decode("utf-8", errors="ignore") + except Exception as e: + raise HTTPException(status_code=400, detail=f"Cannot read file: {e}") + + lines = content.splitlines() + parsed = parse_lines(lines) + + for batch in _batches(parsed, 500): + entries = [LogEntry(**row) for row in batch] + db.add_all(entries) + await db.commit() + + return UploadResponse( + message=f"Uploaded and parsed {len(parsed)} log lines.", + parsed_lines=len(parsed), + ) + +@router.get("/stats", response_model=StatsResponse) +async def get_stats( + limit: int = Query(20, ge=1, le=100), + db: AsyncSession = Depends(get_db), +): + overview = await StatsAnalyzer.overview(db) + sources = await StatsAnalyzer.top_sources(db, limit) + dests = await StatsAnalyzer.top_destinations(db, limit) + ports = await StatsAnalyzer.top_ports(db, limit) + urls = await StatsAnalyzer.top_urls(db, limit) + actions = await StatsAnalyzer.action_distribution(db) + timeline = await StatsAnalyzer.timeline(db, "hour") + uniq = await StatsAnalyzer.unique_counts(db) + return StatsResponse( + overview=overview, + top_sources=sources, + top_destinations=dests, + top_ports=ports, + top_urls=urls, + actions=actions, + timeline=timeline, + unique_counts=uniq, + ) + +@router.get("/stats/sources", response_model=List[dict]) +async def get_sources(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)): + return await StatsAnalyzer.top_sources(db, limit) + +@router.get("/stats/destinations", response_model=List[dict]) +async def get_destinations(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)): + return await StatsAnalyzer.top_destinations(db, limit) + +@router.get("/stats/ports", response_model=List[dict]) +async def get_ports(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)): + return await StatsAnalyzer.top_ports(db, limit) + +@router.post("/analyze", response_model=AnalysisResponse) +async def analyze_logs( + log_type: str = Query("firewall", enum=["firewall", "proxy", "all"]), + limit: int = Query(100, ge=1, le=500), + db: AsyncSession = Depends(get_db), +): + from sqlalchemy import select + if log_type != "all": + stmt = select(LogEntry).where(LogEntry.log_type == log_type).limit(limit) + else: + stmt = select(LogEntry).limit(limit) + result = await db.execute(stmt) + rows = result.scalars().all() + if not rows: + raise HTTPException(status_code=404, detail="No logs found for analysis") + + stats = { + "total_entries": (await StatsAnalyzer.overview(db))["total_entries"], + "unique_sources": (await StatsAnalyzer.unique_counts(db))["unique_sources"], + "unique_destinations": (await StatsAnalyzer.unique_counts(db))["unique_destinations"], + "top_sources": await StatsAnalyzer.top_sources(db, 10), + "top_destinations": await StatsAnalyzer.top_destinations(db, 10), + "top_ports": await StatsAnalyzer.top_ports(db, 10), + } + snippets = [r.raw_line for r in rows] + llm = LLMService() + analysis = await llm.analyze_logs(snippets, stats) + return AnalysisResponse(analysis=analysis) + + +def _batches(data: List[dict], size: int): + for i in range(0, len(data), size): + yield data[i : i + size] diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 0000000..e644be3 --- /dev/null +++ b/backend/config.py @@ -0,0 +1,10 @@ +from pydantic_settings import BaseSettings + +class Settings(BaseSettings): + database_url: str = "sqlite+aiosqlite:///data/logs.db" + ollama_url: str = "http://host.docker.internal:11434" + ollama_model: str = "llava:7b" + upload_max_size_mb: int = 50 + log_batch_size: int = 1000 + +settings = Settings() diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..6ae7bcc --- /dev/null +++ b/backend/database.py @@ -0,0 +1,13 @@ +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker +from config import settings + +DATABASE_URL = settings.database_url +engine = create_async_engine(DATABASE_URL, echo=False) +SessionLocal = async_sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False) + +async def get_db(): + async with SessionLocal() as session: + try: + yield session + finally: + await session.close() diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..f87ff5f --- /dev/null +++ b/backend/main.py @@ -0,0 +1,38 @@ +import os +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from database import engine +from models import Base +from api.routes import router as api_router + +@asynccontextmanager +async def lifespan(app: FastAPI): + os.makedirs("data", exist_ok=True) + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + yield + await engine.dispose() + +app = FastAPI( + title="Log Analyzer Backend", + description="CPU LLM powered firewall & proxy log analyzer.", + version="1.0.0", + lifespan=lifespan, +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(api_router) + +@app.get("/health") +async def health(): + return {"status": "ok"} diff --git a/backend/models.py b/backend/models.py new file mode 100644 index 0000000..0701590 --- /dev/null +++ b/backend/models.py @@ -0,0 +1,25 @@ +from datetime import datetime +from sqlalchemy import Integer, String, DateTime, Text, Boolean, BigInteger +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + +class Base(DeclarativeBase): + pass + +class LogEntry(Base): + __tablename__ = "log_entries" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True) + raw_line: Mapped[str] = mapped_column(Text, nullable=False) + log_type: Mapped[str] = mapped_column(String(20), nullable=False, index=True) # firewall, proxy + source_ip: Mapped[str | None] = mapped_column(String(45), nullable=True, index=True) + destination_ip: Mapped[str | None] = mapped_column(String(45), nullable=True, index=True) + source_port: Mapped[int | None] = mapped_column(Integer, nullable=True) + destination_port: Mapped[int | None] = mapped_column(Integer, nullable=True) + protocol: Mapped[str | None] = mapped_column(String(10), nullable=True) + action: Mapped[str | None] = mapped_column(String(20), nullable=True, index=True) # ACCEPT, DROP, DENY, ALLOW + url: Mapped[str | None] = mapped_column(Text, nullable=True) + method: Mapped[str | None] = mapped_column(String(10), nullable=True) + status_code: Mapped[int | None] = mapped_column(Integer, nullable=True) + bytes_size: Mapped[int | None] = mapped_column(BigInteger, nullable=True) + timestamp: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, index=True) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..ed23221 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,10 @@ +fastapi>=0.110.0 +uvicorn[standard]>=0.27.0 +sqlalchemy[asyncio]>=2.0.0 +aiosqlite>=0.19.0 +python-multipart>=0.0.9 +httpx>=0.27.0 +pydantic>=2.6.0 +pydantic-settings>=2.1.0 +aiofiles>=23.2.0 +jinja2>=3.1.0 diff --git a/backend/services/__init__.py b/backend/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/services/analyzer.py b/backend/services/analyzer.py new file mode 100644 index 0000000..52959fe --- /dev/null +++ b/backend/services/analyzer.py @@ -0,0 +1,105 @@ +from typing import List, Dict, Any +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession +from models import LogEntry + +class StatsAnalyzer: + @staticmethod + async def overview(session: AsyncSession) -> Dict[str, Any]: + total = (await session.execute(select(func.count()).select_from(LogEntry))).scalar() or 0 + fw = (await session.execute( + select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "firewall") + )).scalar() or 0 + px = (await session.execute( + select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "proxy") + )).scalar() or 0 + return { + "total_entries": total, + "firewall_entries": fw, + "proxy_entries": px, + } + + @staticmethod + async def top_sources(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]: + stmt = ( + select(LogEntry.source_ip, func.count().label("cnt")) + .where(LogEntry.source_ip.isnot(None)) + .group_by(LogEntry.source_ip) + .order_by(func.count().desc()) + .limit(limit) + ) + rows = await session.execute(stmt) + return [{"source_ip": r[0], "count": r[1]} for r in rows] + + @staticmethod + async def top_destinations(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]: + stmt = ( + select(LogEntry.destination_ip, func.count().label("cnt")) + .where(LogEntry.destination_ip.isnot(None)) + .group_by(LogEntry.destination_ip) + .order_by(func.count().desc()) + .limit(limit) + ) + rows = await session.execute(stmt) + return [{"destination_ip": r[0], "count": r[1]} for r in rows] + + @staticmethod + async def top_ports(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]: + stmt = ( + select(LogEntry.destination_port, func.count().label("cnt")) + .where(LogEntry.destination_port.isnot(None)) + .group_by(LogEntry.destination_port) + .order_by(func.count().desc()) + .limit(limit) + ) + rows = await session.execute(stmt) + return [{"destination_port": r[0], "count": r[1]} for r in rows] + + @staticmethod + async def top_urls(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]: + stmt = ( + select(LogEntry.url, func.count().label("cnt")) + .where(LogEntry.url.isnot(None)) + .group_by(LogEntry.url) + .order_by(func.count().desc()) + .limit(limit) + ) + rows = await session.execute(stmt) + return [{"url": r[0], "count": r[1]} for r in rows] + + @staticmethod + async def action_distribution(session: AsyncSession) -> List[Dict[str, Any]]: + stmt = ( + select(LogEntry.action, func.count().label("cnt")) + .where(LogEntry.action.isnot(None)) + .group_by(LogEntry.action) + .order_by(func.count().desc()) + ) + rows = await session.execute(stmt) + return [{"action": r[0], "count": r[1]} for r in rows] + + @staticmethod + async def timeline(session: AsyncSession, granularity: str = "hour") -> List[Dict[str, Any]]: + if granularity == "hour": + fmt = "%Y-%m-%d %H:00" + else: + fmt = "%Y-%m-%d" + # SQLite strftime + stmt = ( + select(func.strftime(fmt, LogEntry.timestamp).label("bucket"), func.count().label("cnt")) + .where(LogEntry.timestamp.isnot(None)) + .group_by("bucket") + .order_by("bucket") + ) + rows = await session.execute(stmt) + return [{"time_bucket": r[0], "count": r[1]} for r in rows] + + @staticmethod + async def unique_counts(session: AsyncSession) -> Dict[str, int]: + src = (await session.execute( + select(func.count(func.distinct(LogEntry.source_ip))) + )).scalar() or 0 + dst = (await session.execute( + select(func.count(func.distinct(LogEntry.destination_ip))) + )).scalar() or 0 + return {"unique_sources": src, "unique_destinations": dst} diff --git a/backend/services/llm_service.py b/backend/services/llm_service.py new file mode 100644 index 0000000..e0b371b --- /dev/null +++ b/backend/services/llm_service.py @@ -0,0 +1,52 @@ +import httpx +from typing import List, Dict, Any +from config import settings + +class LLMService: + def __init__(self, base_url: str = settings.ollama_url, model: str = settings.ollama_model): + self.base_url = base_url.rstrip("/") + self.model = model + + async def analyze_logs(self, log_snippets: List[str], stats: Dict[str, Any]) -> str: + prompt = self._build_analysis_prompt(log_snippets, stats) + payload = { + "model": self.model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.3, + "num_predict": 1024, + }, + } + try: + async with httpx.AsyncClient(timeout=120.0) as client: + resp = await client.post(f"{self.base_url}/api/generate", json=payload) + resp.raise_for_status() + data = resp.json() + return data.get("response", "No response from LLM.").strip() + except httpx.HTTPStatusError as e: + return f"LLM HTTP error: {e.response.status_code}" + except httpx.ConnectError: + return "LLM service unreachable. Ensure Ollama is running and accessible." + except Exception as e: + return f"LLM analysis error: {type(e).__name__}: {str(e)}" + + def _build_analysis_prompt(self, snippets: List[str], stats: Dict[str, Any]) -> str: + top_sources = ", ".join([f"{s['source_ip']} ({s['count']})" for s in stats.get("top_sources", [])[:5]]) + top_dests = ", ".join([f"{d['destination_ip']} ({d['count']})" for d in stats.get("top_destinations", [])[:5]]) + top_ports = ", ".join([f"{p['destination_port']} ({p['count']})" for p in stats.get("top_ports", [])[:5]]) + lines = "\n".join(snippets[:20]) + return ( + "You are a network security analyst. Analyze the following firewall/proxy log snippets and statistics. " + "Summarize the most important observations in 3-5 bullet points. Identify potential anomalies, scan patterns, " + "or top talkers. Be concise and factual. Use German or English depending on the log content.\n\n" + f"=== Statistics ===\n" + f"Top Sources: {top_sources}\n" + f"Top Destinations: {top_dests}\n" + f"Top Ports: {top_ports}\n" + f"Total Entries: {stats.get('total_entries', 0)}\n" + f"Unique Sources: {stats.get('unique_sources', 0)}\n" + f"Unique Destinations: {stats.get('unique_destinations', 0)}\n\n" + f"=== Sample Logs ===\n{lines}\n\n" + "=== Analysis ===" + ) diff --git a/backend/services/log_parser.py b/backend/services/log_parser.py new file mode 100644 index 0000000..49d2bdc --- /dev/null +++ b/backend/services/log_parser.py @@ -0,0 +1,189 @@ +import re +import ipaddress +from datetime import datetime +from typing import Dict, List, Any, Optional +from models import LogEntry + +# iptables: ... SRC=1.2.3.4 DST=5.6.7.8 PROTO=TCP SPT=123 DPT=80 ... +IPTABLES_RE = re.compile( + r"SRC=(?P[0-9a-fA-F.:]+)\s+" + r"DST=(?P[0-9a-fA-F.:]+)\s+" + r"(?:PROTO=(?P\w+)\s+)?" + r"(?:SPT=(?P\d+)\s+)?" + r"(?:DPT=(?P\d+)\s+)?" + r".*?(?PACCEPT|DROP|REJECT|DENY|ALLOW|PASS|BLOCK)", + re.IGNORECASE, +) + +# pfSense filterlog: <134>1 2024-01-01T12:00:00+00:00 ... filterlog: ... 4,,,1000000103,em0,match,pass,in,4,0x0,,64,0,0,DF,6,tcp,60,192.168.1.1,10.0.0.1,0,0,0,0,12345,80,0,S,1234567890,,mss +PFSENSE_RE = re.compile( + r"filterlog:.*?,(?Ppass|block|match|reject),.*?,(?Ptcp|udp|icmp),.*?," + r"(?P[0-9a-fA-F.:]+),(?P[0-9a-fA-F.:]+),.*?,(?P\d+)?,(?P\d+)?", + re.IGNORECASE, +) + +# Cisco ASA: %ASA-6-302013: Built outbound TCP connection 123 for outside:10.0.0.1/80 to inside:192.168.1.1/12345 +CISCO_ASA_RE = re.compile( + r"%ASA-\d+-\d+:\s+.*?(?PBuilt|Teardown|Denied|Deny|Allowed|Permit).*?" + r"(?PTCP|UDP|ICMP).*?" + r"(?:for\s+(?P\w+):)?(?P[0-9.]+)/(?P\d+)\s+" + r"to\s+(?P\w+):(?P[0-9.]+)/(?P\d+)", + re.IGNORECASE, +) + +# Squid: 1704108600.123 200 192.168.1.1 TCP_MISS/200 1234 GET http://example.com/ - DIRECT/93.184.216.34 text/html +SQUID_RE = re.compile( + r"^(?P[\d.]+)\s+" + r"(?P-?\d+)\s+" + r"(?P[0-9a-fA-F.:]+)\s+" + r"(?P\S+)\s+" + r"(?P\d+)\s+" + r"(?P\d+)\s+" + r"(?P\w+)\s+" + r"(?P\S+)\s+", +) + +# Nginx proxy: 192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] "GET / HTTP/1.1" 200 1234 "-" "curl/7.68.0" +NGINX_RE = re.compile( + r"^(?P[0-9a-fA-F.:]+)\s+.*?\s+" + r"\[(?P[^\]]+)\]\s+" + r'"(?P\w+)\s+(?P\S+)\s+HTTP/[\d.]+"\s+' + r"(?P\d+)\s+(?P\d+)", +) + +TIMESTAMP_FORMATS = [ + "%d/%b/%Y:%H:%M:%S %z", + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%d %H:%M:%S", +] + + +def _to_int(val: str | None) -> int | None: + if val is None: + return None + try: + return int(val) + except ValueError: + return None + + +def _parse_timestamp(ts_str: str) -> datetime | None: + for fmt in TIMESTAMP_FORMATS: + try: + return datetime.strptime(ts_str, fmt) + except ValueError: + continue + # Try unix float + try: + return datetime.utcfromtimestamp(float(ts_str)) + except (ValueError, OSError, OverflowError): + pass + return None + + +def parse_line(line: str) -> Optional[Dict[str, Any]]: + line = line.strip() + if not line: + return None + + # iptables / kernel / ufw + m = IPTABLES_RE.search(line) + if m: + return { + "log_type": "firewall", + "source_ip": m.group("src"), + "destination_ip": m.group("dst"), + "protocol": m.group("proto"), + "source_port": _to_int(m.group("spt")), + "destination_port": _to_int(m.group("dpt")), + "action": m.group("action").upper(), + "url": None, + "method": None, + "status_code": None, + "bytes_size": None, + "timestamp": None, + "raw_line": line, + } + + m = PFSENSE_RE.search(line) + if m: + return { + "log_type": "firewall", + "source_ip": m.group("src"), + "destination_ip": m.group("dst"), + "protocol": m.group("proto").upper() if m.group("proto") else None, + "source_port": _to_int(m.group("spt")), + "destination_port": _to_int(m.group("dpt")), + "action": m.group("action").upper() if m.group("action") else "UNKNOWN", + "url": None, + "method": None, + "status_code": None, + "bytes_size": None, + "timestamp": None, + "raw_line": line, + } + + m = CISCO_ASA_RE.search(line) + if m: + return { + "log_type": "firewall", + "source_ip": m.group("src"), + "destination_ip": m.group("dst"), + "protocol": m.group("proto").upper() if m.group("proto") else None, + "source_port": _to_int(m.group("spt")), + "destination_port": _to_int(m.group("dpt")), + "action": "ALLOW" if m.group("action") and m.group("action").lower() in ("built", "allowed", "permit") else "DENY", + "url": None, + "method": None, + "status_code": None, + "bytes_size": None, + "timestamp": None, + "raw_line": line, + } + + m = SQUID_RE.match(line) + if m: + return { + "log_type": "proxy", + "source_ip": m.group("src"), + "destination_ip": None, + "protocol": None, + "source_port": None, + "destination_port": None, + "action": None, + "url": m.group("url"), + "method": m.group("method"), + "status_code": _to_int(m.group("status")), + "bytes_size": _to_int(m.group("size")), + "timestamp": _parse_timestamp(m.group("ts")), + "raw_line": line, + } + + m = NGINX_RE.match(line) + if m: + return { + "log_type": "proxy", + "source_ip": m.group("src"), + "destination_ip": None, + "protocol": None, + "source_port": None, + "destination_port": None, + "action": None, + "url": m.group("url"), + "method": m.group("method"), + "status_code": _to_int(m.group("status")), + "bytes_size": _to_int(m.group("size")), + "timestamp": _parse_timestamp(m.group("ts")), + "raw_line": line, + } + + return None + + +def parse_lines(lines: List[str]) -> List[Dict[str, Any]]: + results = [] + for line in lines: + parsed = parse_line(line) + if parsed: + results.append(parsed) + return results diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0192721 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,29 @@ +version: "3.8" + +services: + backend: + build: ./backend + container_name: log-analyzer-backend + volumes: + - logdata:/app/data + environment: + - DATABASE_URL=sqlite+aiosqlite:///data/logs.db + - OLLAMA_URL=http://host.docker.internal:11434 + - OLLAMA_MODEL=llava:7b + extra_hosts: + - "host.docker.internal:host-gateway" + restart: unless-stopped + + nginx: + image: nginx:alpine + container_name: log-analyzer-nginx + ports: + - "8080:80" + volumes: + - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro + depends_on: + - backend + restart: unless-stopped + +volumes: + logdata: diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..d6fe1b4 --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,22 @@ +events { + worker_connections 1024; +} + +http { + upstream backend { + server backend:8000; + } + + server { + listen 80; + client_max_body_size 100M; + + location / { + proxy_pass http://backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +}