Initial commit: Log Analyzer Backend

2026-05-07 10:14:57 +02:00
commit 695d715d5b
16 changed files with 748 additions and 0 deletions
@@ -0,0 +1,10 @@
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 .env
 .venv/
 venv/
 data/
 *.db
 *.sqlite3
@@ -0,0 +1,108 @@
 # Log Analyzer Backend
 CPU-LLM-gestütztes Backend zur Auswertung von Firewall- und Proxy-Logs. Parst Dateien, aggregiert Statistiken (Top-Hits, Quellen, Ziele, Ports, URLs) und nutzt ein lokales LLM (via Ollama) für die Analyse.
 ## Features
 - **Log-Parsing:** Unterstützt iptables, pfSense, Cisco ASA, Squid, nginx
 - **Statistiken:** Top Quellen, Ziele, Ports, URLs, Actions, Timeline
 - **LLM-Analyse:** Zusammenfassung und Anomalie-Erkennung via lokalem Ollama-Modell
 - **REST-API:** FastAPI mit automatischer OpenAPI-Doku unter `/docs`
 - **Docker-Compose:** Schneller Start mit Backend + nginx Reverse Proxy
 ## Architektur
 ```
 ┌──────────────┐      ┌──────────────┐      ┌──────────────────────┐
 │   Client     │──────▶│   nginx:80   │──────▶│  FastAPI Backend:8000 │
 └──────────────┘      └──────────────┘      │  - SQLite (Volumen)   │
                                            │  - Log-Parser         │
                                            │  - Ollama-Client      │
                                            └───────────────────────┘
                                                      │
                                                      ▼
                                            ┌──────────────────────┐
                                            │  Ollama (CPU/Host)   │
                                            │  http://host.docker  │
                                            │       .internal:11434│
                                            └──────────────────────┘
 ```
 ## Voraussetzungen
 - Docker + Docker Compose
 - Laufende Ollama-Instanz auf dem Host (Port 11434) mit Modell `llava:7b`
 - Ports: 8080 (nginx), 11434 (Ollama auf Host)
 ## Installation & Start
 ```bash
 # Repository klonen
 git clone https://gitea.die-heimatlosen.eu/arch_agent/log-analyzer-backend.git
 cd log-analyzer-backend
 # Starten
 docker-compose up -d --build
 ```
 Die API ist dann unter `http://localhost:8080/api` erreichbar.
 ## Nutzung
 ### Log-Datei hochladen
 ```bash
 curl -X POST http://localhost:8080/api/upload \
  -H "Content-Type: multipart/form-data" \
  -F "file=@/var/log/iptables.log"
 ```
 ### Statistiken abrufen
 ```bash
 curl "http://localhost:8080/api/stats?limit=20"
 ```
 ### LLM-Analyse starten
 ```bash
 curl -X POST "http://localhost:8080/api/analyze?log_type=firewall&limit=100"
 ```
 ### Gesundheitscheck
 ```bash
 curl http://localhost:8080/health
 ```
 ## API-Endpunkte
 | Methode | Endpunkt | Beschreibung |
 |---------|----------|--------------|
 | POST | `/api/upload` | Log-Datei hochladen & parsen |
 | GET | `/api/stats` | Übersicht aller Statistiken |
 | GET | `/api/stats/sources` | Top Quellen |
 | GET | `/api/stats/destinations` | Top Ziele |
 | GET | `/api/stats/ports` | Top Ports |
 | POST | `/api/analyze` | LLM-Analyse der Logs |
 | GET | `/health` | Healthcheck |
 ## Umgebungsvariablen
 | Variable | Standard | Beschreibung |
 |----------|----------|--------------|
 | `DATABASE_URL` | `sqlite+aiosqlite:///data/logs.db` | SQLite Datenbankpfad |
 | `OLLAMA_URL` | `http://host.docker.internal:11434` | Ollama API URL |
 | `OLLAMA_MODEL` | `llava:7b` | LLM Modellname |
 ## Unterstützte Log-Formate
 ### Firewall
 - **iptables:** Kernel-Logzeilen mit `SRC=... DST=... PROTO=... DPT=...`
 - **pfSense:** `filterlog` CSV-ähnliche Zeilen
 - **Cisco ASA:** `%ASA-... Built/Teardown/Denied ...`
 ### Proxy
 - **Squid:** Native Squid-Logformat (Timestamp, elapsed, client, code, status, size, method, URL)
 - **nginx:** Standard access_log (`$remote_addr - [$time_local] "$request" $status $body_bytes_sent`)
 ## Hinweise
 - Das Backend erwartet Ollama auf dem **Host** (nicht im Container). Für Linux ggf. `extra_hosts: ["host.docker.internal:host-gateway"]` nutzen.
 - Große Logdateien werden in Batches von 500 Zeilen verarbeitet.
 - Die SQLite-Datenbank wird im Docker-Volumen `logdata` persistiert.
 ## Lizenz
 MIT
@@ -0,0 +1,13 @@
 FROM python:3.11-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends gcc \
    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,124 @@
 import io
 from typing import List
 from fastapi import APIRouter, UploadFile, File, Depends, HTTPException, Query
 from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
 from models import LogEntry
 from services.log_parser import parse_lines
 from services.analyzer import StatsAnalyzer
 from services.llm_service import LLMService
 from database import get_db
 router = APIRouter(prefix="/api", tags=["logs"])
 class StatsResponse(BaseModel):
    overview: dict
    top_sources: List[dict]
    top_destinations: List[dict]
    top_ports: List[dict]
    top_urls: List[dict]
    actions: List[dict]
    timeline: List[dict]
    unique_counts: dict
 class AnalysisResponse(BaseModel):
    analysis: str
 class UploadResponse(BaseModel):
    message: str
    parsed_lines: int
@router.post("/upload", response_model=UploadResponse)
 async def upload_logs(
    file: UploadFile = File(...),
    db: AsyncSession = Depends(get_db),
 ):
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")
    try:
        content = (await file.read()).decode("utf-8", errors="ignore")
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Cannot read file: {e}")
    lines = content.splitlines()
    parsed = parse_lines(lines)
    for batch in _batches(parsed, 500):
        entries = [LogEntry(**row) for row in batch]
        db.add_all(entries)
        await db.commit()
    return UploadResponse(
        message=f"Uploaded and parsed {len(parsed)} log lines.",
        parsed_lines=len(parsed),
    )
@router.get("/stats", response_model=StatsResponse)
 async def get_stats(
    limit: int = Query(20, ge=1, le=100),
    db: AsyncSession = Depends(get_db),
 ):
    overview = await StatsAnalyzer.overview(db)
    sources = await StatsAnalyzer.top_sources(db, limit)
    dests = await StatsAnalyzer.top_destinations(db, limit)
    ports = await StatsAnalyzer.top_ports(db, limit)
    urls = await StatsAnalyzer.top_urls(db, limit)
    actions = await StatsAnalyzer.action_distribution(db)
    timeline = await StatsAnalyzer.timeline(db, "hour")
    uniq = await StatsAnalyzer.unique_counts(db)
    return StatsResponse(
        overview=overview,
        top_sources=sources,
        top_destinations=dests,
        top_ports=ports,
        top_urls=urls,
        actions=actions,
        timeline=timeline,
        unique_counts=uniq,
    )
@router.get("/stats/sources", response_model=List[dict])
 async def get_sources(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)):
    return await StatsAnalyzer.top_sources(db, limit)
@router.get("/stats/destinations", response_model=List[dict])
 async def get_destinations(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)):
    return await StatsAnalyzer.top_destinations(db, limit)
@router.get("/stats/ports", response_model=List[dict])
 async def get_ports(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)):
    return await StatsAnalyzer.top_ports(db, limit)
@router.post("/analyze", response_model=AnalysisResponse)
 async def analyze_logs(
    log_type: str = Query("firewall", enum=["firewall", "proxy", "all"]),
    limit: int = Query(100, ge=1, le=500),
    db: AsyncSession = Depends(get_db),
 ):
    from sqlalchemy import select
    if log_type != "all":
        stmt = select(LogEntry).where(LogEntry.log_type == log_type).limit(limit)
    else:
        stmt = select(LogEntry).limit(limit)
    result = await db.execute(stmt)
    rows = result.scalars().all()
    if not rows:
        raise HTTPException(status_code=404, detail="No logs found for analysis")
    stats = {
        "total_entries": (await StatsAnalyzer.overview(db))["total_entries"],
        "unique_sources": (await StatsAnalyzer.unique_counts(db))["unique_sources"],
        "unique_destinations": (await StatsAnalyzer.unique_counts(db))["unique_destinations"],
        "top_sources": await StatsAnalyzer.top_sources(db, 10),
        "top_destinations": await StatsAnalyzer.top_destinations(db, 10),
        "top_ports": await StatsAnalyzer.top_ports(db, 10),
    }
    snippets = [r.raw_line for r in rows]
    llm = LLMService()
    analysis = await llm.analyze_logs(snippets, stats)
    return AnalysisResponse(analysis=analysis)
 def _batches(data: List[dict], size: int):
    for i in range(0, len(data), size):
        yield data[i : i + size]
@@ -0,0 +1,10 @@
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
    database_url: str = "sqlite+aiosqlite:///data/logs.db"
    ollama_url: str = "http://host.docker.internal:11434"
    ollama_model: str = "llava:7b"
    upload_max_size_mb: int = 50
    log_batch_size: int = 1000
 settings = Settings()
@@ -0,0 +1,13 @@
 from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
 from config import settings
 DATABASE_URL = settings.database_url
 engine = create_async_engine(DATABASE_URL, echo=False)
 SessionLocal = async_sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False)
 async def get_db():
    async with SessionLocal() as session:
        try:
            yield session
        finally:
            await session.close()
@@ -0,0 +1,38 @@
 import os
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from database import engine
 from models import Base
 from api.routes import router as api_router
@asynccontextmanager
 async def lifespan(app: FastAPI):
    os.makedirs("data", exist_ok=True)
    async with engine.begin() as conn:
        await conn.run_sync(Base.metadata.create_all)
    yield
    await engine.dispose()
 app = FastAPI(
    title="Log Analyzer Backend",
    description="CPU LLM powered firewall & proxy log analyzer.",
    version="1.0.0",
    lifespan=lifespan,
 )
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 app.include_router(api_router)
@app.get("/health")
 async def health():
    return {"status": "ok"}
@@ -0,0 +1,25 @@
 from datetime import datetime
 from sqlalchemy import Integer, String, DateTime, Text, Boolean, BigInteger
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
 class Base(DeclarativeBase):
    pass
 class LogEntry(Base):
    __tablename__ = "log_entries"
    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
    raw_line: Mapped[str] = mapped_column(Text, nullable=False)
    log_type: Mapped[str] = mapped_column(String(20), nullable=False, index=True)  # firewall, proxy
    source_ip: Mapped[str | None] = mapped_column(String(45), nullable=True, index=True)
    destination_ip: Mapped[str | None] = mapped_column(String(45), nullable=True, index=True)
    source_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
    destination_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
    protocol: Mapped[str | None] = mapped_column(String(10), nullable=True)
    action: Mapped[str | None] = mapped_column(String(20), nullable=True, index=True)  # ACCEPT, DROP, DENY, ALLOW
    url: Mapped[str | None] = mapped_column(Text, nullable=True)
    method: Mapped[str | None] = mapped_column(String(10), nullable=True)
    status_code: Mapped[int | None] = mapped_column(Integer, nullable=True)
    bytes_size: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
    timestamp: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, index=True)
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
@@ -0,0 +1,10 @@
 fastapi>=0.110.0
 uvicorn[standard]>=0.27.0
 sqlalchemy[asyncio]>=2.0.0
 aiosqlite>=0.19.0
 python-multipart>=0.0.9
 httpx>=0.27.0
 pydantic>=2.6.0
 pydantic-settings>=2.1.0
 aiofiles>=23.2.0
 jinja2>=3.1.0
@@ -0,0 +1,105 @@
 from typing import List, Dict, Any
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from models import LogEntry
 class StatsAnalyzer:
    @staticmethod
    async def overview(session: AsyncSession) -> Dict[str, Any]:
        total = (await session.execute(select(func.count()).select_from(LogEntry))).scalar() or 0
        fw = (await session.execute(
            select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "firewall")
        )).scalar() or 0
        px = (await session.execute(
            select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "proxy")
        )).scalar() or 0
        return {
            "total_entries": total,
            "firewall_entries": fw,
            "proxy_entries": px,
        }
    @staticmethod
    async def top_sources(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
        stmt = (
            select(LogEntry.source_ip, func.count().label("cnt"))
            .where(LogEntry.source_ip.isnot(None))
            .group_by(LogEntry.source_ip)
            .order_by(func.count().desc())
            .limit(limit)
        )
        rows = await session.execute(stmt)
        return [{"source_ip": r[0], "count": r[1]} for r in rows]
    @staticmethod
    async def top_destinations(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
        stmt = (
            select(LogEntry.destination_ip, func.count().label("cnt"))
            .where(LogEntry.destination_ip.isnot(None))
            .group_by(LogEntry.destination_ip)
            .order_by(func.count().desc())
            .limit(limit)
        )
        rows = await session.execute(stmt)
        return [{"destination_ip": r[0], "count": r[1]} for r in rows]
    @staticmethod
    async def top_ports(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
        stmt = (
            select(LogEntry.destination_port, func.count().label("cnt"))
            .where(LogEntry.destination_port.isnot(None))
            .group_by(LogEntry.destination_port)
            .order_by(func.count().desc())
            .limit(limit)
        )
        rows = await session.execute(stmt)
        return [{"destination_port": r[0], "count": r[1]} for r in rows]
    @staticmethod
    async def top_urls(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
        stmt = (
            select(LogEntry.url, func.count().label("cnt"))
            .where(LogEntry.url.isnot(None))
            .group_by(LogEntry.url)
            .order_by(func.count().desc())
            .limit(limit)
        )
        rows = await session.execute(stmt)
        return [{"url": r[0], "count": r[1]} for r in rows]
    @staticmethod
    async def action_distribution(session: AsyncSession) -> List[Dict[str, Any]]:
        stmt = (
            select(LogEntry.action, func.count().label("cnt"))
            .where(LogEntry.action.isnot(None))
            .group_by(LogEntry.action)
            .order_by(func.count().desc())
        )
        rows = await session.execute(stmt)
        return [{"action": r[0], "count": r[1]} for r in rows]
    @staticmethod
    async def timeline(session: AsyncSession, granularity: str = "hour") -> List[Dict[str, Any]]:
        if granularity == "hour":
            fmt = "%Y-%m-%d %H:00"
        else:
            fmt = "%Y-%m-%d"
        # SQLite strftime
        stmt = (
            select(func.strftime(fmt, LogEntry.timestamp).label("bucket"), func.count().label("cnt"))
            .where(LogEntry.timestamp.isnot(None))
            .group_by("bucket")
            .order_by("bucket")
        )
        rows = await session.execute(stmt)
        return [{"time_bucket": r[0], "count": r[1]} for r in rows]
    @staticmethod
    async def unique_counts(session: AsyncSession) -> Dict[str, int]:
        src = (await session.execute(
            select(func.count(func.distinct(LogEntry.source_ip)))
        )).scalar() or 0
        dst = (await session.execute(
            select(func.count(func.distinct(LogEntry.destination_ip)))
        )).scalar() or 0
        return {"unique_sources": src, "unique_destinations": dst}
@@ -0,0 +1,52 @@
 import httpx
 from typing import List, Dict, Any
 from config import settings
 class LLMService:
    def __init__(self, base_url: str = settings.ollama_url, model: str = settings.ollama_model):
        self.base_url = base_url.rstrip("/")
        self.model = model
    async def analyze_logs(self, log_snippets: List[str], stats: Dict[str, Any]) -> str:
        prompt = self._build_analysis_prompt(log_snippets, stats)
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": False,
            "options": {
                "temperature": 0.3,
                "num_predict": 1024,
            },
        }
        try:
            async with httpx.AsyncClient(timeout=120.0) as client:
                resp = await client.post(f"{self.base_url}/api/generate", json=payload)
                resp.raise_for_status()
                data = resp.json()
                return data.get("response", "No response from LLM.").strip()
        except httpx.HTTPStatusError as e:
            return f"LLM HTTP error: {e.response.status_code}"
        except httpx.ConnectError:
            return "LLM service unreachable. Ensure Ollama is running and accessible."
        except Exception as e:
            return f"LLM analysis error: {type(e).__name__}: {str(e)}"
    def _build_analysis_prompt(self, snippets: List[str], stats: Dict[str, Any]) -> str:
        top_sources = ", ".join([f"{s['source_ip']} ({s['count']})" for s in stats.get("top_sources", [])[:5]])
        top_dests = ", ".join([f"{d['destination_ip']} ({d['count']})" for d in stats.get("top_destinations", [])[:5]])
        top_ports = ", ".join([f"{p['destination_port']} ({p['count']})" for p in stats.get("top_ports", [])[:5]])
        lines = "\n".join(snippets[:20])
        return (
            "You are a network security analyst. Analyze the following firewall/proxy log snippets and statistics. "
            "Summarize the most important observations in 3-5 bullet points. Identify potential anomalies, scan patterns, "
            "or top talkers. Be concise and factual. Use German or English depending on the log content.\n\n"
            f"=== Statistics ===\n"
            f"Top Sources: {top_sources}\n"
            f"Top Destinations: {top_dests}\n"
            f"Top Ports: {top_ports}\n"
            f"Total Entries: {stats.get('total_entries', 0)}\n"
            f"Unique Sources: {stats.get('unique_sources', 0)}\n"
            f"Unique Destinations: {stats.get('unique_destinations', 0)}\n\n"
            f"=== Sample Logs ===\n{lines}\n\n"
            "=== Analysis ==="
        )
@@ -0,0 +1,189 @@
 import re
 import ipaddress
 from datetime import datetime
 from typing import Dict, List, Any, Optional
 from models import LogEntry
 # iptables: ... SRC=1.2.3.4 DST=5.6.7.8 PROTO=TCP SPT=123 DPT=80 ...
 IPTABLES_RE = re.compile(
    r"SRC=(?P<src>[0-9a-fA-F.:]+)\s+"
    r"DST=(?P<dst>[0-9a-fA-F.:]+)\s+"
    r"(?:PROTO=(?P<proto>\w+)\s+)?"
    r"(?:SPT=(?P<spt>\d+)\s+)?"
    r"(?:DPT=(?P<dpt>\d+)\s+)?"
    r".*?(?P<action>ACCEPT|DROP|REJECT|DENY|ALLOW|PASS|BLOCK)",
    re.IGNORECASE,
 )
 # pfSense filterlog: <134>1 2024-01-01T12:00:00+00:00 ... filterlog: ... 4,,,1000000103,em0,match,pass,in,4,0x0,,64,0,0,DF,6,tcp,60,192.168.1.1,10.0.0.1,0,0,0,0,12345,80,0,S,1234567890,,mss
 PFSENSE_RE = re.compile(
    r"filterlog:.*?,(?P<action>pass|block|match|reject),.*?,(?P<proto>tcp|udp|icmp),.*?,"
    r"(?P<src>[0-9a-fA-F.:]+),(?P<dst>[0-9a-fA-F.:]+),.*?,(?P<spt>\d+)?,(?P<dpt>\d+)?",
    re.IGNORECASE,
 )
 # Cisco ASA: %ASA-6-302013: Built outbound TCP connection 123 for outside:10.0.0.1/80 to inside:192.168.1.1/12345
 CISCO_ASA_RE = re.compile(
    r"%ASA-\d+-\d+:\s+.*?(?P<action>Built|Teardown|Denied|Deny|Allowed|Permit).*?"
    r"(?P<proto>TCP|UDP|ICMP).*?"
    r"(?:for\s+(?P<dir>\w+):)?(?P<dst>[0-9.]+)/(?P<dpt>\d+)\s+"
    r"to\s+(?P<src_dir>\w+):(?P<src>[0-9.]+)/(?P<spt>\d+)",
    re.IGNORECASE,
 )
 # Squid: 1704108600.123 200 192.168.1.1 TCP_MISS/200 1234 GET http://example.com/ - DIRECT/93.184.216.34 text/html
 SQUID_RE = re.compile(
    r"^(?P<ts>[\d.]+)\s+"
    r"(?P<elapsed>-?\d+)\s+"
    r"(?P<src>[0-9a-fA-F.:]+)\s+"
    r"(?P<code>\S+)\s+"
    r"(?P<status>\d+)\s+"
    r"(?P<size>\d+)\s+"
    r"(?P<method>\w+)\s+"
    r"(?P<url>\S+)\s+",
 )
 # Nginx proxy: 192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] "GET / HTTP/1.1" 200 1234 "-" "curl/7.68.0"
 NGINX_RE = re.compile(
    r"^(?P<src>[0-9a-fA-F.:]+)\s+.*?\s+"
    r"\[(?P<ts>[^\]]+)\]\s+"
    r'"(?P<method>\w+)\s+(?P<url>\S+)\s+HTTP/[\d.]+"\s+'
    r"(?P<status>\d+)\s+(?P<size>\d+)",
 )
 TIMESTAMP_FORMATS = [
    "%d/%b/%Y:%H:%M:%S %z",
    "%Y-%m-%dT%H:%M:%S%z",
    "%Y-%m-%d %H:%M:%S",
 ]
 def _to_int(val: str | None) -> int | None:
    if val is None:
        return None
    try:
        return int(val)
    except ValueError:
        return None
 def _parse_timestamp(ts_str: str) -> datetime | None:
    for fmt in TIMESTAMP_FORMATS:
        try:
            return datetime.strptime(ts_str, fmt)
        except ValueError:
            continue
    # Try unix float
    try:
        return datetime.utcfromtimestamp(float(ts_str))
    except (ValueError, OSError, OverflowError):
        pass
    return None
 def parse_line(line: str) -> Optional[Dict[str, Any]]:
    line = line.strip()
    if not line:
        return None
    # iptables / kernel / ufw
    m = IPTABLES_RE.search(line)
    if m:
        return {
            "log_type": "firewall",
            "source_ip": m.group("src"),
            "destination_ip": m.group("dst"),
            "protocol": m.group("proto"),
            "source_port": _to_int(m.group("spt")),
            "destination_port": _to_int(m.group("dpt")),
            "action": m.group("action").upper(),
            "url": None,
            "method": None,
            "status_code": None,
            "bytes_size": None,
            "timestamp": None,
            "raw_line": line,
        }
    m = PFSENSE_RE.search(line)
    if m:
        return {
            "log_type": "firewall",
            "source_ip": m.group("src"),
            "destination_ip": m.group("dst"),
            "protocol": m.group("proto").upper() if m.group("proto") else None,
            "source_port": _to_int(m.group("spt")),
            "destination_port": _to_int(m.group("dpt")),
            "action": m.group("action").upper() if m.group("action") else "UNKNOWN",
            "url": None,
            "method": None,
            "status_code": None,
            "bytes_size": None,
            "timestamp": None,
            "raw_line": line,
        }
    m = CISCO_ASA_RE.search(line)
    if m:
        return {
            "log_type": "firewall",
            "source_ip": m.group("src"),
            "destination_ip": m.group("dst"),
            "protocol": m.group("proto").upper() if m.group("proto") else None,
            "source_port": _to_int(m.group("spt")),
            "destination_port": _to_int(m.group("dpt")),
            "action": "ALLOW" if m.group("action") and m.group("action").lower() in ("built", "allowed", "permit") else "DENY",
            "url": None,
            "method": None,
            "status_code": None,
            "bytes_size": None,
            "timestamp": None,
            "raw_line": line,
        }
    m = SQUID_RE.match(line)
    if m:
        return {
            "log_type": "proxy",
            "source_ip": m.group("src"),
            "destination_ip": None,
            "protocol": None,
            "source_port": None,
            "destination_port": None,
            "action": None,
            "url": m.group("url"),
            "method": m.group("method"),
            "status_code": _to_int(m.group("status")),
            "bytes_size": _to_int(m.group("size")),
            "timestamp": _parse_timestamp(m.group("ts")),
            "raw_line": line,
        }
    m = NGINX_RE.match(line)
    if m:
        return {
            "log_type": "proxy",
            "source_ip": m.group("src"),
            "destination_ip": None,
            "protocol": None,
            "source_port": None,
            "destination_port": None,
            "action": None,
            "url": m.group("url"),
            "method": m.group("method"),
            "status_code": _to_int(m.group("status")),
            "bytes_size": _to_int(m.group("size")),
            "timestamp": _parse_timestamp(m.group("ts")),
            "raw_line": line,
        }
    return None
 def parse_lines(lines: List[str]) -> List[Dict[str, Any]]:
    results = []
    for line in lines:
        parsed = parse_line(line)
        if parsed:
            results.append(parsed)
    return results
@@ -0,0 +1,29 @@
 version: "3.8"
 services:
  backend:
    build: ./backend
    container_name: log-analyzer-backend
    volumes:
      - logdata:/app/data
    environment:
      - DATABASE_URL=sqlite+aiosqlite:///data/logs.db
      - OLLAMA_URL=http://host.docker.internal:11434
      - OLLAMA_MODEL=llava:7b
    extra_hosts:
      - "host.docker.internal:host-gateway"
    restart: unless-stopped
  nginx:
    image: nginx:alpine
    container_name: log-analyzer-nginx
    ports:
      - "8080:80"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
    depends_on:
      - backend
    restart: unless-stopped
 volumes:
  logdata:
@@ -0,0 +1,22 @@
 events {
    worker_connections 1024;
 }
 http {
    upstream backend {
        server backend:8000;
    }
    server {
        listen 80;
        client_max_body_size 100M;
        location / {
            proxy_pass http://backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
        }
    }
 }