Initial commit: Log Analyzer Backend

This commit is contained in:
Arch Agent
2026-05-07 10:14:57 +02:00
commit 695d715d5b
16 changed files with 748 additions and 0 deletions
View File
+105
View File
@@ -0,0 +1,105 @@
from typing import List, Dict, Any
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from models import LogEntry
class StatsAnalyzer:
@staticmethod
async def overview(session: AsyncSession) -> Dict[str, Any]:
total = (await session.execute(select(func.count()).select_from(LogEntry))).scalar() or 0
fw = (await session.execute(
select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "firewall")
)).scalar() or 0
px = (await session.execute(
select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "proxy")
)).scalar() or 0
return {
"total_entries": total,
"firewall_entries": fw,
"proxy_entries": px,
}
@staticmethod
async def top_sources(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
stmt = (
select(LogEntry.source_ip, func.count().label("cnt"))
.where(LogEntry.source_ip.isnot(None))
.group_by(LogEntry.source_ip)
.order_by(func.count().desc())
.limit(limit)
)
rows = await session.execute(stmt)
return [{"source_ip": r[0], "count": r[1]} for r in rows]
@staticmethod
async def top_destinations(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
stmt = (
select(LogEntry.destination_ip, func.count().label("cnt"))
.where(LogEntry.destination_ip.isnot(None))
.group_by(LogEntry.destination_ip)
.order_by(func.count().desc())
.limit(limit)
)
rows = await session.execute(stmt)
return [{"destination_ip": r[0], "count": r[1]} for r in rows]
@staticmethod
async def top_ports(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
stmt = (
select(LogEntry.destination_port, func.count().label("cnt"))
.where(LogEntry.destination_port.isnot(None))
.group_by(LogEntry.destination_port)
.order_by(func.count().desc())
.limit(limit)
)
rows = await session.execute(stmt)
return [{"destination_port": r[0], "count": r[1]} for r in rows]
@staticmethod
async def top_urls(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
stmt = (
select(LogEntry.url, func.count().label("cnt"))
.where(LogEntry.url.isnot(None))
.group_by(LogEntry.url)
.order_by(func.count().desc())
.limit(limit)
)
rows = await session.execute(stmt)
return [{"url": r[0], "count": r[1]} for r in rows]
@staticmethod
async def action_distribution(session: AsyncSession) -> List[Dict[str, Any]]:
stmt = (
select(LogEntry.action, func.count().label("cnt"))
.where(LogEntry.action.isnot(None))
.group_by(LogEntry.action)
.order_by(func.count().desc())
)
rows = await session.execute(stmt)
return [{"action": r[0], "count": r[1]} for r in rows]
@staticmethod
async def timeline(session: AsyncSession, granularity: str = "hour") -> List[Dict[str, Any]]:
if granularity == "hour":
fmt = "%Y-%m-%d %H:00"
else:
fmt = "%Y-%m-%d"
# SQLite strftime
stmt = (
select(func.strftime(fmt, LogEntry.timestamp).label("bucket"), func.count().label("cnt"))
.where(LogEntry.timestamp.isnot(None))
.group_by("bucket")
.order_by("bucket")
)
rows = await session.execute(stmt)
return [{"time_bucket": r[0], "count": r[1]} for r in rows]
@staticmethod
async def unique_counts(session: AsyncSession) -> Dict[str, int]:
src = (await session.execute(
select(func.count(func.distinct(LogEntry.source_ip)))
)).scalar() or 0
dst = (await session.execute(
select(func.count(func.distinct(LogEntry.destination_ip)))
)).scalar() or 0
return {"unique_sources": src, "unique_destinations": dst}
+52
View File
@@ -0,0 +1,52 @@
import httpx
from typing import List, Dict, Any
from config import settings
class LLMService:
def __init__(self, base_url: str = settings.ollama_url, model: str = settings.ollama_model):
self.base_url = base_url.rstrip("/")
self.model = model
async def analyze_logs(self, log_snippets: List[str], stats: Dict[str, Any]) -> str:
prompt = self._build_analysis_prompt(log_snippets, stats)
payload = {
"model": self.model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.3,
"num_predict": 1024,
},
}
try:
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(f"{self.base_url}/api/generate", json=payload)
resp.raise_for_status()
data = resp.json()
return data.get("response", "No response from LLM.").strip()
except httpx.HTTPStatusError as e:
return f"LLM HTTP error: {e.response.status_code}"
except httpx.ConnectError:
return "LLM service unreachable. Ensure Ollama is running and accessible."
except Exception as e:
return f"LLM analysis error: {type(e).__name__}: {str(e)}"
def _build_analysis_prompt(self, snippets: List[str], stats: Dict[str, Any]) -> str:
top_sources = ", ".join([f"{s['source_ip']} ({s['count']})" for s in stats.get("top_sources", [])[:5]])
top_dests = ", ".join([f"{d['destination_ip']} ({d['count']})" for d in stats.get("top_destinations", [])[:5]])
top_ports = ", ".join([f"{p['destination_port']} ({p['count']})" for p in stats.get("top_ports", [])[:5]])
lines = "\n".join(snippets[:20])
return (
"You are a network security analyst. Analyze the following firewall/proxy log snippets and statistics. "
"Summarize the most important observations in 3-5 bullet points. Identify potential anomalies, scan patterns, "
"or top talkers. Be concise and factual. Use German or English depending on the log content.\n\n"
f"=== Statistics ===\n"
f"Top Sources: {top_sources}\n"
f"Top Destinations: {top_dests}\n"
f"Top Ports: {top_ports}\n"
f"Total Entries: {stats.get('total_entries', 0)}\n"
f"Unique Sources: {stats.get('unique_sources', 0)}\n"
f"Unique Destinations: {stats.get('unique_destinations', 0)}\n\n"
f"=== Sample Logs ===\n{lines}\n\n"
"=== Analysis ==="
)
+189
View File
@@ -0,0 +1,189 @@
import re
import ipaddress
from datetime import datetime
from typing import Dict, List, Any, Optional
from models import LogEntry
# iptables: ... SRC=1.2.3.4 DST=5.6.7.8 PROTO=TCP SPT=123 DPT=80 ...
IPTABLES_RE = re.compile(
r"SRC=(?P<src>[0-9a-fA-F.:]+)\s+"
r"DST=(?P<dst>[0-9a-fA-F.:]+)\s+"
r"(?:PROTO=(?P<proto>\w+)\s+)?"
r"(?:SPT=(?P<spt>\d+)\s+)?"
r"(?:DPT=(?P<dpt>\d+)\s+)?"
r".*?(?P<action>ACCEPT|DROP|REJECT|DENY|ALLOW|PASS|BLOCK)",
re.IGNORECASE,
)
# pfSense filterlog: <134>1 2024-01-01T12:00:00+00:00 ... filterlog: ... 4,,,1000000103,em0,match,pass,in,4,0x0,,64,0,0,DF,6,tcp,60,192.168.1.1,10.0.0.1,0,0,0,0,12345,80,0,S,1234567890,,mss
PFSENSE_RE = re.compile(
r"filterlog:.*?,(?P<action>pass|block|match|reject),.*?,(?P<proto>tcp|udp|icmp),.*?,"
r"(?P<src>[0-9a-fA-F.:]+),(?P<dst>[0-9a-fA-F.:]+),.*?,(?P<spt>\d+)?,(?P<dpt>\d+)?",
re.IGNORECASE,
)
# Cisco ASA: %ASA-6-302013: Built outbound TCP connection 123 for outside:10.0.0.1/80 to inside:192.168.1.1/12345
CISCO_ASA_RE = re.compile(
r"%ASA-\d+-\d+:\s+.*?(?P<action>Built|Teardown|Denied|Deny|Allowed|Permit).*?"
r"(?P<proto>TCP|UDP|ICMP).*?"
r"(?:for\s+(?P<dir>\w+):)?(?P<dst>[0-9.]+)/(?P<dpt>\d+)\s+"
r"to\s+(?P<src_dir>\w+):(?P<src>[0-9.]+)/(?P<spt>\d+)",
re.IGNORECASE,
)
# Squid: 1704108600.123 200 192.168.1.1 TCP_MISS/200 1234 GET http://example.com/ - DIRECT/93.184.216.34 text/html
SQUID_RE = re.compile(
r"^(?P<ts>[\d.]+)\s+"
r"(?P<elapsed>-?\d+)\s+"
r"(?P<src>[0-9a-fA-F.:]+)\s+"
r"(?P<code>\S+)\s+"
r"(?P<status>\d+)\s+"
r"(?P<size>\d+)\s+"
r"(?P<method>\w+)\s+"
r"(?P<url>\S+)\s+",
)
# Nginx proxy: 192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] "GET / HTTP/1.1" 200 1234 "-" "curl/7.68.0"
NGINX_RE = re.compile(
r"^(?P<src>[0-9a-fA-F.:]+)\s+.*?\s+"
r"\[(?P<ts>[^\]]+)\]\s+"
r'"(?P<method>\w+)\s+(?P<url>\S+)\s+HTTP/[\d.]+"\s+'
r"(?P<status>\d+)\s+(?P<size>\d+)",
)
TIMESTAMP_FORMATS = [
"%d/%b/%Y:%H:%M:%S %z",
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%d %H:%M:%S",
]
def _to_int(val: str | None) -> int | None:
if val is None:
return None
try:
return int(val)
except ValueError:
return None
def _parse_timestamp(ts_str: str) -> datetime | None:
for fmt in TIMESTAMP_FORMATS:
try:
return datetime.strptime(ts_str, fmt)
except ValueError:
continue
# Try unix float
try:
return datetime.utcfromtimestamp(float(ts_str))
except (ValueError, OSError, OverflowError):
pass
return None
def parse_line(line: str) -> Optional[Dict[str, Any]]:
line = line.strip()
if not line:
return None
# iptables / kernel / ufw
m = IPTABLES_RE.search(line)
if m:
return {
"log_type": "firewall",
"source_ip": m.group("src"),
"destination_ip": m.group("dst"),
"protocol": m.group("proto"),
"source_port": _to_int(m.group("spt")),
"destination_port": _to_int(m.group("dpt")),
"action": m.group("action").upper(),
"url": None,
"method": None,
"status_code": None,
"bytes_size": None,
"timestamp": None,
"raw_line": line,
}
m = PFSENSE_RE.search(line)
if m:
return {
"log_type": "firewall",
"source_ip": m.group("src"),
"destination_ip": m.group("dst"),
"protocol": m.group("proto").upper() if m.group("proto") else None,
"source_port": _to_int(m.group("spt")),
"destination_port": _to_int(m.group("dpt")),
"action": m.group("action").upper() if m.group("action") else "UNKNOWN",
"url": None,
"method": None,
"status_code": None,
"bytes_size": None,
"timestamp": None,
"raw_line": line,
}
m = CISCO_ASA_RE.search(line)
if m:
return {
"log_type": "firewall",
"source_ip": m.group("src"),
"destination_ip": m.group("dst"),
"protocol": m.group("proto").upper() if m.group("proto") else None,
"source_port": _to_int(m.group("spt")),
"destination_port": _to_int(m.group("dpt")),
"action": "ALLOW" if m.group("action") and m.group("action").lower() in ("built", "allowed", "permit") else "DENY",
"url": None,
"method": None,
"status_code": None,
"bytes_size": None,
"timestamp": None,
"raw_line": line,
}
m = SQUID_RE.match(line)
if m:
return {
"log_type": "proxy",
"source_ip": m.group("src"),
"destination_ip": None,
"protocol": None,
"source_port": None,
"destination_port": None,
"action": None,
"url": m.group("url"),
"method": m.group("method"),
"status_code": _to_int(m.group("status")),
"bytes_size": _to_int(m.group("size")),
"timestamp": _parse_timestamp(m.group("ts")),
"raw_line": line,
}
m = NGINX_RE.match(line)
if m:
return {
"log_type": "proxy",
"source_ip": m.group("src"),
"destination_ip": None,
"protocol": None,
"source_port": None,
"destination_port": None,
"action": None,
"url": m.group("url"),
"method": m.group("method"),
"status_code": _to_int(m.group("status")),
"bytes_size": _to_int(m.group("size")),
"timestamp": _parse_timestamp(m.group("ts")),
"raw_line": line,
}
return None
def parse_lines(lines: List[str]) -> List[Dict[str, Any]]:
results = []
for line in lines:
parsed = parse_line(line)
if parsed:
results.append(parsed)
return results