commit 695d715d5b8539b61e0f495c190832fd7abb4e98
Author: Arch Agent <arch_agent@localhost>
Date:   Thu May 7 10:14:57 2026 +0200

    Initial commit: Log Analyzer Backend

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6c8bcf2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+.venv/
+venv/
+data/
+*.db
+*.sqlite3
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5069e8d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,108 @@
+# Log Analyzer Backend
+
+CPU-LLM-gestütztes Backend zur Auswertung von Firewall- und Proxy-Logs. Parst Dateien, aggregiert Statistiken (Top-Hits, Quellen, Ziele, Ports, URLs) und nutzt ein lokales LLM (via Ollama) für die Analyse.
+
+## Features
+- **Log-Parsing:** Unterstützt iptables, pfSense, Cisco ASA, Squid, nginx
+- **Statistiken:** Top Quellen, Ziele, Ports, URLs, Actions, Timeline
+- **LLM-Analyse:** Zusammenfassung und Anomalie-Erkennung via lokalem Ollama-Modell
+- **REST-API:** FastAPI mit automatischer OpenAPI-Doku unter `/docs`
+- **Docker-Compose:** Schneller Start mit Backend + nginx Reverse Proxy
+
+## Architektur
+```
+┌──────────────┐      ┌──────────────┐      ┌──────────────────────┐
+│   Client     │──────▶│   nginx:80   │──────▶│  FastAPI Backend:8000 │
+└──────────────┘      └──────────────┘      │  - SQLite (Volumen)   │
+                                            │  - Log-Parser         │
+                                            │  - Ollama-Client      │
+                                            └───────────────────────┘
+                                                      │
+                                                      ▼
+                                            ┌──────────────────────┐
+                                            │  Ollama (CPU/Host)   │
+                                            │  http://host.docker  │
+                                            │       .internal:11434│
+                                            └──────────────────────┘
+```
+
+## Voraussetzungen
+- Docker + Docker Compose
+- Laufende Ollama-Instanz auf dem Host (Port 11434) mit Modell `llava:7b`
+- Ports: 8080 (nginx), 11434 (Ollama auf Host)
+
+## Installation & Start
+
+```bash
+# Repository klonen
+git clone https://gitea.die-heimatlosen.eu/arch_agent/log-analyzer-backend.git
+cd log-analyzer-backend
+
+# Starten
+docker-compose up -d --build
+```
+
+Die API ist dann unter `http://localhost:8080/api` erreichbar.
+
+## Nutzung
+
+### Log-Datei hochladen
+```bash
+curl -X POST http://localhost:8080/api/upload \
+  -H "Content-Type: multipart/form-data" \
+  -F "file=@/var/log/iptables.log"
+```
+
+### Statistiken abrufen
+```bash
+curl "http://localhost:8080/api/stats?limit=20"
+```
+
+### LLM-Analyse starten
+```bash
+curl -X POST "http://localhost:8080/api/analyze?log_type=firewall&limit=100"
+```
+
+### Gesundheitscheck
+```bash
+curl http://localhost:8080/health
+```
+
+## API-Endpunkte
+
+| Methode | Endpunkt | Beschreibung |
+|---------|----------|--------------|
+| POST | `/api/upload` | Log-Datei hochladen & parsen |
+| GET | `/api/stats` | Übersicht aller Statistiken |
+| GET | `/api/stats/sources` | Top Quellen |
+| GET | `/api/stats/destinations` | Top Ziele |
+| GET | `/api/stats/ports` | Top Ports |
+| POST | `/api/analyze` | LLM-Analyse der Logs |
+| GET | `/health` | Healthcheck |
+
+## Umgebungsvariablen
+
+| Variable | Standard | Beschreibung |
+|----------|----------|--------------|
+| `DATABASE_URL` | `sqlite+aiosqlite:///data/logs.db` | SQLite Datenbankpfad |
+| `OLLAMA_URL` | `http://host.docker.internal:11434` | Ollama API URL |
+| `OLLAMA_MODEL` | `llava:7b` | LLM Modellname |
+
+## Unterstützte Log-Formate
+
+### Firewall
+- **iptables:** Kernel-Logzeilen mit `SRC=... DST=... PROTO=... DPT=...`
+- **pfSense:** `filterlog` CSV-ähnliche Zeilen
+- **Cisco ASA:** `%ASA-... Built/Teardown/Denied ...`
+
+### Proxy
+- **Squid:** Native Squid-Logformat (Timestamp, elapsed, client, code, status, size, method, URL)
+- **nginx:** Standard access_log (`$remote_addr - [$time_local] "$request" $status $body_bytes_sent`)
+
+## Hinweise
+- Das Backend erwartet Ollama auf dem **Host** (nicht im Container). Für Linux ggf. `extra_hosts: ["host.docker.internal:host-gateway"]` nutzen.
+- Große Logdateien werden in Batches von 500 Zeilen verarbeitet.
+- Die SQLite-Datenbank wird im Docker-Volumen `logdata` persistiert.
+
+## Lizenz
+MIT
diff --git a/backend/Dockerfile b/backend/Dockerfile
new file mode 100644
index 0000000..4c3eb24
--- /dev/null
+++ b/backend/Dockerfile
@@ -0,0 +1,13 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends gcc \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/backend/api/__init__.py b/backend/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/api/routes.py b/backend/api/routes.py
new file mode 100644
index 0000000..fa50f19
--- /dev/null
+++ b/backend/api/routes.py
@@ -0,0 +1,124 @@
+import io
+from typing import List
+from fastapi import APIRouter, UploadFile, File, Depends, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy.ext.asyncio import AsyncSession
+from models import LogEntry
+from services.log_parser import parse_lines
+from services.analyzer import StatsAnalyzer
+from services.llm_service import LLMService
+from database import get_db
+
+router = APIRouter(prefix="/api", tags=["logs"])
+
+class StatsResponse(BaseModel):
+    overview: dict
+    top_sources: List[dict]
+    top_destinations: List[dict]
+    top_ports: List[dict]
+    top_urls: List[dict]
+    actions: List[dict]
+    timeline: List[dict]
+    unique_counts: dict
+
+class AnalysisResponse(BaseModel):
+    analysis: str
+
+class UploadResponse(BaseModel):
+    message: str
+    parsed_lines: int
+
+@router.post("/upload", response_model=UploadResponse)
+async def upload_logs(
+    file: UploadFile = File(...),
+    db: AsyncSession = Depends(get_db),
+):
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file provided")
+    try:
+        content = (await file.read()).decode("utf-8", errors="ignore")
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Cannot read file: {e}")
+
+    lines = content.splitlines()
+    parsed = parse_lines(lines)
+
+    for batch in _batches(parsed, 500):
+        entries = [LogEntry(**row) for row in batch]
+        db.add_all(entries)
+        await db.commit()
+
+    return UploadResponse(
+        message=f"Uploaded and parsed {len(parsed)} log lines.",
+        parsed_lines=len(parsed),
+    )
+
+@router.get("/stats", response_model=StatsResponse)
+async def get_stats(
+    limit: int = Query(20, ge=1, le=100),
+    db: AsyncSession = Depends(get_db),
+):
+    overview = await StatsAnalyzer.overview(db)
+    sources = await StatsAnalyzer.top_sources(db, limit)
+    dests = await StatsAnalyzer.top_destinations(db, limit)
+    ports = await StatsAnalyzer.top_ports(db, limit)
+    urls = await StatsAnalyzer.top_urls(db, limit)
+    actions = await StatsAnalyzer.action_distribution(db)
+    timeline = await StatsAnalyzer.timeline(db, "hour")
+    uniq = await StatsAnalyzer.unique_counts(db)
+    return StatsResponse(
+        overview=overview,
+        top_sources=sources,
+        top_destinations=dests,
+        top_ports=ports,
+        top_urls=urls,
+        actions=actions,
+        timeline=timeline,
+        unique_counts=uniq,
+    )
+
+@router.get("/stats/sources", response_model=List[dict])
+async def get_sources(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)):
+    return await StatsAnalyzer.top_sources(db, limit)
+
+@router.get("/stats/destinations", response_model=List[dict])
+async def get_destinations(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)):
+    return await StatsAnalyzer.top_destinations(db, limit)
+
+@router.get("/stats/ports", response_model=List[dict])
+async def get_ports(limit: int = Query(20, ge=1, le=500), db: AsyncSession = Depends(get_db)):
+    return await StatsAnalyzer.top_ports(db, limit)
+
+@router.post("/analyze", response_model=AnalysisResponse)
+async def analyze_logs(
+    log_type: str = Query("firewall", enum=["firewall", "proxy", "all"]),
+    limit: int = Query(100, ge=1, le=500),
+    db: AsyncSession = Depends(get_db),
+):
+    from sqlalchemy import select
+    if log_type != "all":
+        stmt = select(LogEntry).where(LogEntry.log_type == log_type).limit(limit)
+    else:
+        stmt = select(LogEntry).limit(limit)
+    result = await db.execute(stmt)
+    rows = result.scalars().all()
+    if not rows:
+        raise HTTPException(status_code=404, detail="No logs found for analysis")
+
+    stats = {
+        "total_entries": (await StatsAnalyzer.overview(db))["total_entries"],
+        "unique_sources": (await StatsAnalyzer.unique_counts(db))["unique_sources"],
+        "unique_destinations": (await StatsAnalyzer.unique_counts(db))["unique_destinations"],
+        "top_sources": await StatsAnalyzer.top_sources(db, 10),
+        "top_destinations": await StatsAnalyzer.top_destinations(db, 10),
+        "top_ports": await StatsAnalyzer.top_ports(db, 10),
+    }
+    snippets = [r.raw_line for r in rows]
+    llm = LLMService()
+    analysis = await llm.analyze_logs(snippets, stats)
+    return AnalysisResponse(analysis=analysis)
+
+
+def _batches(data: List[dict], size: int):
+    for i in range(0, len(data), size):
+        yield data[i : i + size]
diff --git a/backend/config.py b/backend/config.py
new file mode 100644
index 0000000..e644be3
--- /dev/null
+++ b/backend/config.py
@@ -0,0 +1,10 @@
+from pydantic_settings import BaseSettings
+
+class Settings(BaseSettings):
+    database_url: str = "sqlite+aiosqlite:///data/logs.db"
+    ollama_url: str = "http://host.docker.internal:11434"
+    ollama_model: str = "llava:7b"
+    upload_max_size_mb: int = 50
+    log_batch_size: int = 1000
+
+settings = Settings()
diff --git a/backend/database.py b/backend/database.py
new file mode 100644
index 0000000..6ae7bcc
--- /dev/null
+++ b/backend/database.py
@@ -0,0 +1,13 @@
+from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
+from config import settings
+
+DATABASE_URL = settings.database_url
+engine = create_async_engine(DATABASE_URL, echo=False)
+SessionLocal = async_sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False)
+
+async def get_db():
+    async with SessionLocal() as session:
+        try:
+            yield session
+        finally:
+            await session.close()
diff --git a/backend/main.py b/backend/main.py
new file mode 100644
index 0000000..f87ff5f
--- /dev/null
+++ b/backend/main.py
@@ -0,0 +1,38 @@
+import os
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from database import engine
+from models import Base
+from api.routes import router as api_router
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    os.makedirs("data", exist_ok=True)
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+    yield
+    await engine.dispose()
+
+app = FastAPI(
+    title="Log Analyzer Backend",
+    description="CPU LLM powered firewall & proxy log analyzer.",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(api_router)
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
diff --git a/backend/models.py b/backend/models.py
new file mode 100644
index 0000000..0701590
--- /dev/null
+++ b/backend/models.py
@@ -0,0 +1,25 @@
+from datetime import datetime
+from sqlalchemy import Integer, String, DateTime, Text, Boolean, BigInteger
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+
+class Base(DeclarativeBase):
+    pass
+
+class LogEntry(Base):
+    __tablename__ = "log_entries"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
+    raw_line: Mapped[str] = mapped_column(Text, nullable=False)
+    log_type: Mapped[str] = mapped_column(String(20), nullable=False, index=True)  # firewall, proxy
+    source_ip: Mapped[str | None] = mapped_column(String(45), nullable=True, index=True)
+    destination_ip: Mapped[str | None] = mapped_column(String(45), nullable=True, index=True)
+    source_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    destination_port: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    protocol: Mapped[str | None] = mapped_column(String(10), nullable=True)
+    action: Mapped[str | None] = mapped_column(String(20), nullable=True, index=True)  # ACCEPT, DROP, DENY, ALLOW
+    url: Mapped[str | None] = mapped_column(Text, nullable=True)
+    method: Mapped[str | None] = mapped_column(String(10), nullable=True)
+    status_code: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    bytes_size: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
+    timestamp: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, index=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
diff --git a/backend/requirements.txt b/backend/requirements.txt
new file mode 100644
index 0000000..ed23221
--- /dev/null
+++ b/backend/requirements.txt
@@ -0,0 +1,10 @@
+fastapi>=0.110.0
+uvicorn[standard]>=0.27.0
+sqlalchemy[asyncio]>=2.0.0
+aiosqlite>=0.19.0
+python-multipart>=0.0.9
+httpx>=0.27.0
+pydantic>=2.6.0
+pydantic-settings>=2.1.0
+aiofiles>=23.2.0
+jinja2>=3.1.0
diff --git a/backend/services/__init__.py b/backend/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/services/analyzer.py b/backend/services/analyzer.py
new file mode 100644
index 0000000..52959fe
--- /dev/null
+++ b/backend/services/analyzer.py
@@ -0,0 +1,105 @@
+from typing import List, Dict, Any
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+from models import LogEntry
+
+class StatsAnalyzer:
+    @staticmethod
+    async def overview(session: AsyncSession) -> Dict[str, Any]:
+        total = (await session.execute(select(func.count()).select_from(LogEntry))).scalar() or 0
+        fw = (await session.execute(
+            select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "firewall")
+        )).scalar() or 0
+        px = (await session.execute(
+            select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "proxy")
+        )).scalar() or 0
+        return {
+            "total_entries": total,
+            "firewall_entries": fw,
+            "proxy_entries": px,
+        }
+
+    @staticmethod
+    async def top_sources(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
+        stmt = (
+            select(LogEntry.source_ip, func.count().label("cnt"))
+            .where(LogEntry.source_ip.isnot(None))
+            .group_by(LogEntry.source_ip)
+            .order_by(func.count().desc())
+            .limit(limit)
+        )
+        rows = await session.execute(stmt)
+        return [{"source_ip": r[0], "count": r[1]} for r in rows]
+
+    @staticmethod
+    async def top_destinations(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
+        stmt = (
+            select(LogEntry.destination_ip, func.count().label("cnt"))
+            .where(LogEntry.destination_ip.isnot(None))
+            .group_by(LogEntry.destination_ip)
+            .order_by(func.count().desc())
+            .limit(limit)
+        )
+        rows = await session.execute(stmt)
+        return [{"destination_ip": r[0], "count": r[1]} for r in rows]
+
+    @staticmethod
+    async def top_ports(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
+        stmt = (
+            select(LogEntry.destination_port, func.count().label("cnt"))
+            .where(LogEntry.destination_port.isnot(None))
+            .group_by(LogEntry.destination_port)
+            .order_by(func.count().desc())
+            .limit(limit)
+        )
+        rows = await session.execute(stmt)
+        return [{"destination_port": r[0], "count": r[1]} for r in rows]
+
+    @staticmethod
+    async def top_urls(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
+        stmt = (
+            select(LogEntry.url, func.count().label("cnt"))
+            .where(LogEntry.url.isnot(None))
+            .group_by(LogEntry.url)
+            .order_by(func.count().desc())
+            .limit(limit)
+        )
+        rows = await session.execute(stmt)
+        return [{"url": r[0], "count": r[1]} for r in rows]
+
+    @staticmethod
+    async def action_distribution(session: AsyncSession) -> List[Dict[str, Any]]:
+        stmt = (
+            select(LogEntry.action, func.count().label("cnt"))
+            .where(LogEntry.action.isnot(None))
+            .group_by(LogEntry.action)
+            .order_by(func.count().desc())
+        )
+        rows = await session.execute(stmt)
+        return [{"action": r[0], "count": r[1]} for r in rows]
+
+    @staticmethod
+    async def timeline(session: AsyncSession, granularity: str = "hour") -> List[Dict[str, Any]]:
+        if granularity == "hour":
+            fmt = "%Y-%m-%d %H:00"
+        else:
+            fmt = "%Y-%m-%d"
+        # SQLite strftime
+        stmt = (
+            select(func.strftime(fmt, LogEntry.timestamp).label("bucket"), func.count().label("cnt"))
+            .where(LogEntry.timestamp.isnot(None))
+            .group_by("bucket")
+            .order_by("bucket")
+        )
+        rows = await session.execute(stmt)
+        return [{"time_bucket": r[0], "count": r[1]} for r in rows]
+
+    @staticmethod
+    async def unique_counts(session: AsyncSession) -> Dict[str, int]:
+        src = (await session.execute(
+            select(func.count(func.distinct(LogEntry.source_ip)))
+        )).scalar() or 0
+        dst = (await session.execute(
+            select(func.count(func.distinct(LogEntry.destination_ip)))
+        )).scalar() or 0
+        return {"unique_sources": src, "unique_destinations": dst}
diff --git a/backend/services/llm_service.py b/backend/services/llm_service.py
new file mode 100644
index 0000000..e0b371b
--- /dev/null
+++ b/backend/services/llm_service.py
@@ -0,0 +1,52 @@
+import httpx
+from typing import List, Dict, Any
+from config import settings
+
+class LLMService:
+    def __init__(self, base_url: str = settings.ollama_url, model: str = settings.ollama_model):
+        self.base_url = base_url.rstrip("/")
+        self.model = model
+
+    async def analyze_logs(self, log_snippets: List[str], stats: Dict[str, Any]) -> str:
+        prompt = self._build_analysis_prompt(log_snippets, stats)
+        payload = {
+            "model": self.model,
+            "prompt": prompt,
+            "stream": False,
+            "options": {
+                "temperature": 0.3,
+                "num_predict": 1024,
+            },
+        }
+        try:
+            async with httpx.AsyncClient(timeout=120.0) as client:
+                resp = await client.post(f"{self.base_url}/api/generate", json=payload)
+                resp.raise_for_status()
+                data = resp.json()
+                return data.get("response", "No response from LLM.").strip()
+        except httpx.HTTPStatusError as e:
+            return f"LLM HTTP error: {e.response.status_code}"
+        except httpx.ConnectError:
+            return "LLM service unreachable. Ensure Ollama is running and accessible."
+        except Exception as e:
+            return f"LLM analysis error: {type(e).__name__}: {str(e)}"
+
+    def _build_analysis_prompt(self, snippets: List[str], stats: Dict[str, Any]) -> str:
+        top_sources = ", ".join([f"{s['source_ip']} ({s['count']})" for s in stats.get("top_sources", [])[:5]])
+        top_dests = ", ".join([f"{d['destination_ip']} ({d['count']})" for d in stats.get("top_destinations", [])[:5]])
+        top_ports = ", ".join([f"{p['destination_port']} ({p['count']})" for p in stats.get("top_ports", [])[:5]])
+        lines = "\n".join(snippets[:20])
+        return (
+            "You are a network security analyst. Analyze the following firewall/proxy log snippets and statistics. "
+            "Summarize the most important observations in 3-5 bullet points. Identify potential anomalies, scan patterns, "
+            "or top talkers. Be concise and factual. Use German or English depending on the log content.\n\n"
+            f"=== Statistics ===\n"
+            f"Top Sources: {top_sources}\n"
+            f"Top Destinations: {top_dests}\n"
+            f"Top Ports: {top_ports}\n"
+            f"Total Entries: {stats.get('total_entries', 0)}\n"
+            f"Unique Sources: {stats.get('unique_sources', 0)}\n"
+            f"Unique Destinations: {stats.get('unique_destinations', 0)}\n\n"
+            f"=== Sample Logs ===\n{lines}\n\n"
+            "=== Analysis ==="
+        )
diff --git a/backend/services/log_parser.py b/backend/services/log_parser.py
new file mode 100644
index 0000000..49d2bdc
--- /dev/null
+++ b/backend/services/log_parser.py
@@ -0,0 +1,189 @@
+import re
+import ipaddress
+from datetime import datetime
+from typing import Dict, List, Any, Optional
+from models import LogEntry
+
+# iptables: ... SRC=1.2.3.4 DST=5.6.7.8 PROTO=TCP SPT=123 DPT=80 ...
+IPTABLES_RE = re.compile(
+    r"SRC=(?P<src>[0-9a-fA-F.:]+)\s+"
+    r"DST=(?P<dst>[0-9a-fA-F.:]+)\s+"
+    r"(?:PROTO=(?P<proto>\w+)\s+)?"
+    r"(?:SPT=(?P<spt>\d+)\s+)?"
+    r"(?:DPT=(?P<dpt>\d+)\s+)?"
+    r".*?(?P<action>ACCEPT|DROP|REJECT|DENY|ALLOW|PASS|BLOCK)",
+    re.IGNORECASE,
+)
+
+# pfSense filterlog: <134>1 2024-01-01T12:00:00+00:00 ... filterlog: ... 4,,,1000000103,em0,match,pass,in,4,0x0,,64,0,0,DF,6,tcp,60,192.168.1.1,10.0.0.1,0,0,0,0,12345,80,0,S,1234567890,,mss
+PFSENSE_RE = re.compile(
+    r"filterlog:.*?,(?P<action>pass|block|match|reject),.*?,(?P<proto>tcp|udp|icmp),.*?,"
+    r"(?P<src>[0-9a-fA-F.:]+),(?P<dst>[0-9a-fA-F.:]+),.*?,(?P<spt>\d+)?,(?P<dpt>\d+)?",
+    re.IGNORECASE,
+)
+
+# Cisco ASA: %ASA-6-302013: Built outbound TCP connection 123 for outside:10.0.0.1/80 to inside:192.168.1.1/12345
+CISCO_ASA_RE = re.compile(
+    r"%ASA-\d+-\d+:\s+.*?(?P<action>Built|Teardown|Denied|Deny|Allowed|Permit).*?"
+    r"(?P<proto>TCP|UDP|ICMP).*?"
+    r"(?:for\s+(?P<dir>\w+):)?(?P<dst>[0-9.]+)/(?P<dpt>\d+)\s+"
+    r"to\s+(?P<src_dir>\w+):(?P<src>[0-9.]+)/(?P<spt>\d+)",
+    re.IGNORECASE,
+)
+
+# Squid: 1704108600.123 200 192.168.1.1 TCP_MISS/200 1234 GET http://example.com/ - DIRECT/93.184.216.34 text/html
+SQUID_RE = re.compile(
+    r"^(?P<ts>[\d.]+)\s+"
+    r"(?P<elapsed>-?\d+)\s+"
+    r"(?P<src>[0-9a-fA-F.:]+)\s+"
+    r"(?P<code>\S+)\s+"
+    r"(?P<status>\d+)\s+"
+    r"(?P<size>\d+)\s+"
+    r"(?P<method>\w+)\s+"
+    r"(?P<url>\S+)\s+",
+)
+
+# Nginx proxy: 192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] "GET / HTTP/1.1" 200 1234 "-" "curl/7.68.0"
+NGINX_RE = re.compile(
+    r"^(?P<src>[0-9a-fA-F.:]+)\s+.*?\s+"
+    r"\[(?P<ts>[^\]]+)\]\s+"
+    r'"(?P<method>\w+)\s+(?P<url>\S+)\s+HTTP/[\d.]+"\s+'
+    r"(?P<status>\d+)\s+(?P<size>\d+)",
+)
+
+TIMESTAMP_FORMATS = [
+    "%d/%b/%Y:%H:%M:%S %z",
+    "%Y-%m-%dT%H:%M:%S%z",
+    "%Y-%m-%d %H:%M:%S",
+]
+
+
+def _to_int(val: str | None) -> int | None:
+    if val is None:
+        return None
+    try:
+        return int(val)
+    except ValueError:
+        return None
+
+
+def _parse_timestamp(ts_str: str) -> datetime | None:
+    for fmt in TIMESTAMP_FORMATS:
+        try:
+            return datetime.strptime(ts_str, fmt)
+        except ValueError:
+            continue
+    # Try unix float
+    try:
+        return datetime.utcfromtimestamp(float(ts_str))
+    except (ValueError, OSError, OverflowError):
+        pass
+    return None
+
+
+def parse_line(line: str) -> Optional[Dict[str, Any]]:
+    line = line.strip()
+    if not line:
+        return None
+
+    # iptables / kernel / ufw
+    m = IPTABLES_RE.search(line)
+    if m:
+        return {
+            "log_type": "firewall",
+            "source_ip": m.group("src"),
+            "destination_ip": m.group("dst"),
+            "protocol": m.group("proto"),
+            "source_port": _to_int(m.group("spt")),
+            "destination_port": _to_int(m.group("dpt")),
+            "action": m.group("action").upper(),
+            "url": None,
+            "method": None,
+            "status_code": None,
+            "bytes_size": None,
+            "timestamp": None,
+            "raw_line": line,
+        }
+
+    m = PFSENSE_RE.search(line)
+    if m:
+        return {
+            "log_type": "firewall",
+            "source_ip": m.group("src"),
+            "destination_ip": m.group("dst"),
+            "protocol": m.group("proto").upper() if m.group("proto") else None,
+            "source_port": _to_int(m.group("spt")),
+            "destination_port": _to_int(m.group("dpt")),
+            "action": m.group("action").upper() if m.group("action") else "UNKNOWN",
+            "url": None,
+            "method": None,
+            "status_code": None,
+            "bytes_size": None,
+            "timestamp": None,
+            "raw_line": line,
+        }
+
+    m = CISCO_ASA_RE.search(line)
+    if m:
+        return {
+            "log_type": "firewall",
+            "source_ip": m.group("src"),
+            "destination_ip": m.group("dst"),
+            "protocol": m.group("proto").upper() if m.group("proto") else None,
+            "source_port": _to_int(m.group("spt")),
+            "destination_port": _to_int(m.group("dpt")),
+            "action": "ALLOW" if m.group("action") and m.group("action").lower() in ("built", "allowed", "permit") else "DENY",
+            "url": None,
+            "method": None,
+            "status_code": None,
+            "bytes_size": None,
+            "timestamp": None,
+            "raw_line": line,
+        }
+
+    m = SQUID_RE.match(line)
+    if m:
+        return {
+            "log_type": "proxy",
+            "source_ip": m.group("src"),
+            "destination_ip": None,
+            "protocol": None,
+            "source_port": None,
+            "destination_port": None,
+            "action": None,
+            "url": m.group("url"),
+            "method": m.group("method"),
+            "status_code": _to_int(m.group("status")),
+            "bytes_size": _to_int(m.group("size")),
+            "timestamp": _parse_timestamp(m.group("ts")),
+            "raw_line": line,
+        }
+
+    m = NGINX_RE.match(line)
+    if m:
+        return {
+            "log_type": "proxy",
+            "source_ip": m.group("src"),
+            "destination_ip": None,
+            "protocol": None,
+            "source_port": None,
+            "destination_port": None,
+            "action": None,
+            "url": m.group("url"),
+            "method": m.group("method"),
+            "status_code": _to_int(m.group("status")),
+            "bytes_size": _to_int(m.group("size")),
+            "timestamp": _parse_timestamp(m.group("ts")),
+            "raw_line": line,
+        }
+
+    return None
+
+
+def parse_lines(lines: List[str]) -> List[Dict[str, Any]]:
+    results = []
+    for line in lines:
+        parsed = parse_line(line)
+        if parsed:
+            results.append(parsed)
+    return results
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..0192721
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,29 @@
+version: "3.8"
+
+services:
+  backend:
+    build: ./backend
+    container_name: log-analyzer-backend
+    volumes:
+      - logdata:/app/data
+    environment:
+      - DATABASE_URL=sqlite+aiosqlite:///data/logs.db
+      - OLLAMA_URL=http://host.docker.internal:11434
+      - OLLAMA_MODEL=llava:7b
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    restart: unless-stopped
+
+  nginx:
+    image: nginx:alpine
+    container_name: log-analyzer-nginx
+    ports:
+      - "8080:80"
+    volumes:
+      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
+    depends_on:
+      - backend
+    restart: unless-stopped
+
+volumes:
+  logdata:
diff --git a/nginx/nginx.conf b/nginx/nginx.conf
new file mode 100644
index 0000000..d6fe1b4
--- /dev/null
+++ b/nginx/nginx.conf
@@ -0,0 +1,22 @@
+events {
+    worker_connections 1024;
+}
+
+http {
+    upstream backend {
+        server backend:8000;
+    }
+
+    server {
+        listen 80;
+        client_max_body_size 100M;
+
+        location / {
+            proxy_pass http://backend;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+        }
+    }
+}