Initial commit: Log Analyzer Backend
This commit is contained in:
@@ -0,0 +1,105 @@
|
||||
from typing import List, Dict, Any
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from models import LogEntry
|
||||
|
||||
class StatsAnalyzer:
|
||||
@staticmethod
|
||||
async def overview(session: AsyncSession) -> Dict[str, Any]:
|
||||
total = (await session.execute(select(func.count()).select_from(LogEntry))).scalar() or 0
|
||||
fw = (await session.execute(
|
||||
select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "firewall")
|
||||
)).scalar() or 0
|
||||
px = (await session.execute(
|
||||
select(func.count()).select_from(LogEntry).where(LogEntry.log_type == "proxy")
|
||||
)).scalar() or 0
|
||||
return {
|
||||
"total_entries": total,
|
||||
"firewall_entries": fw,
|
||||
"proxy_entries": px,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
async def top_sources(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
|
||||
stmt = (
|
||||
select(LogEntry.source_ip, func.count().label("cnt"))
|
||||
.where(LogEntry.source_ip.isnot(None))
|
||||
.group_by(LogEntry.source_ip)
|
||||
.order_by(func.count().desc())
|
||||
.limit(limit)
|
||||
)
|
||||
rows = await session.execute(stmt)
|
||||
return [{"source_ip": r[0], "count": r[1]} for r in rows]
|
||||
|
||||
@staticmethod
|
||||
async def top_destinations(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
|
||||
stmt = (
|
||||
select(LogEntry.destination_ip, func.count().label("cnt"))
|
||||
.where(LogEntry.destination_ip.isnot(None))
|
||||
.group_by(LogEntry.destination_ip)
|
||||
.order_by(func.count().desc())
|
||||
.limit(limit)
|
||||
)
|
||||
rows = await session.execute(stmt)
|
||||
return [{"destination_ip": r[0], "count": r[1]} for r in rows]
|
||||
|
||||
@staticmethod
|
||||
async def top_ports(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
|
||||
stmt = (
|
||||
select(LogEntry.destination_port, func.count().label("cnt"))
|
||||
.where(LogEntry.destination_port.isnot(None))
|
||||
.group_by(LogEntry.destination_port)
|
||||
.order_by(func.count().desc())
|
||||
.limit(limit)
|
||||
)
|
||||
rows = await session.execute(stmt)
|
||||
return [{"destination_port": r[0], "count": r[1]} for r in rows]
|
||||
|
||||
@staticmethod
|
||||
async def top_urls(session: AsyncSession, limit: int = 20) -> List[Dict[str, Any]]:
|
||||
stmt = (
|
||||
select(LogEntry.url, func.count().label("cnt"))
|
||||
.where(LogEntry.url.isnot(None))
|
||||
.group_by(LogEntry.url)
|
||||
.order_by(func.count().desc())
|
||||
.limit(limit)
|
||||
)
|
||||
rows = await session.execute(stmt)
|
||||
return [{"url": r[0], "count": r[1]} for r in rows]
|
||||
|
||||
@staticmethod
|
||||
async def action_distribution(session: AsyncSession) -> List[Dict[str, Any]]:
|
||||
stmt = (
|
||||
select(LogEntry.action, func.count().label("cnt"))
|
||||
.where(LogEntry.action.isnot(None))
|
||||
.group_by(LogEntry.action)
|
||||
.order_by(func.count().desc())
|
||||
)
|
||||
rows = await session.execute(stmt)
|
||||
return [{"action": r[0], "count": r[1]} for r in rows]
|
||||
|
||||
@staticmethod
|
||||
async def timeline(session: AsyncSession, granularity: str = "hour") -> List[Dict[str, Any]]:
|
||||
if granularity == "hour":
|
||||
fmt = "%Y-%m-%d %H:00"
|
||||
else:
|
||||
fmt = "%Y-%m-%d"
|
||||
# SQLite strftime
|
||||
stmt = (
|
||||
select(func.strftime(fmt, LogEntry.timestamp).label("bucket"), func.count().label("cnt"))
|
||||
.where(LogEntry.timestamp.isnot(None))
|
||||
.group_by("bucket")
|
||||
.order_by("bucket")
|
||||
)
|
||||
rows = await session.execute(stmt)
|
||||
return [{"time_bucket": r[0], "count": r[1]} for r in rows]
|
||||
|
||||
@staticmethod
|
||||
async def unique_counts(session: AsyncSession) -> Dict[str, int]:
|
||||
src = (await session.execute(
|
||||
select(func.count(func.distinct(LogEntry.source_ip)))
|
||||
)).scalar() or 0
|
||||
dst = (await session.execute(
|
||||
select(func.count(func.distinct(LogEntry.destination_ip)))
|
||||
)).scalar() or 0
|
||||
return {"unique_sources": src, "unique_destinations": dst}
|
||||
@@ -0,0 +1,52 @@
|
||||
import httpx
|
||||
from typing import List, Dict, Any
|
||||
from config import settings
|
||||
|
||||
class LLMService:
|
||||
def __init__(self, base_url: str = settings.ollama_url, model: str = settings.ollama_model):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model = model
|
||||
|
||||
async def analyze_logs(self, log_snippets: List[str], stats: Dict[str, Any]) -> str:
|
||||
prompt = self._build_analysis_prompt(log_snippets, stats)
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.3,
|
||||
"num_predict": 1024,
|
||||
},
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(f"{self.base_url}/api/generate", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data.get("response", "No response from LLM.").strip()
|
||||
except httpx.HTTPStatusError as e:
|
||||
return f"LLM HTTP error: {e.response.status_code}"
|
||||
except httpx.ConnectError:
|
||||
return "LLM service unreachable. Ensure Ollama is running and accessible."
|
||||
except Exception as e:
|
||||
return f"LLM analysis error: {type(e).__name__}: {str(e)}"
|
||||
|
||||
def _build_analysis_prompt(self, snippets: List[str], stats: Dict[str, Any]) -> str:
|
||||
top_sources = ", ".join([f"{s['source_ip']} ({s['count']})" for s in stats.get("top_sources", [])[:5]])
|
||||
top_dests = ", ".join([f"{d['destination_ip']} ({d['count']})" for d in stats.get("top_destinations", [])[:5]])
|
||||
top_ports = ", ".join([f"{p['destination_port']} ({p['count']})" for p in stats.get("top_ports", [])[:5]])
|
||||
lines = "\n".join(snippets[:20])
|
||||
return (
|
||||
"You are a network security analyst. Analyze the following firewall/proxy log snippets and statistics. "
|
||||
"Summarize the most important observations in 3-5 bullet points. Identify potential anomalies, scan patterns, "
|
||||
"or top talkers. Be concise and factual. Use German or English depending on the log content.\n\n"
|
||||
f"=== Statistics ===\n"
|
||||
f"Top Sources: {top_sources}\n"
|
||||
f"Top Destinations: {top_dests}\n"
|
||||
f"Top Ports: {top_ports}\n"
|
||||
f"Total Entries: {stats.get('total_entries', 0)}\n"
|
||||
f"Unique Sources: {stats.get('unique_sources', 0)}\n"
|
||||
f"Unique Destinations: {stats.get('unique_destinations', 0)}\n\n"
|
||||
f"=== Sample Logs ===\n{lines}\n\n"
|
||||
"=== Analysis ==="
|
||||
)
|
||||
@@ -0,0 +1,189 @@
|
||||
import re
|
||||
import ipaddress
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional
|
||||
from models import LogEntry
|
||||
|
||||
# iptables: ... SRC=1.2.3.4 DST=5.6.7.8 PROTO=TCP SPT=123 DPT=80 ...
|
||||
IPTABLES_RE = re.compile(
|
||||
r"SRC=(?P<src>[0-9a-fA-F.:]+)\s+"
|
||||
r"DST=(?P<dst>[0-9a-fA-F.:]+)\s+"
|
||||
r"(?:PROTO=(?P<proto>\w+)\s+)?"
|
||||
r"(?:SPT=(?P<spt>\d+)\s+)?"
|
||||
r"(?:DPT=(?P<dpt>\d+)\s+)?"
|
||||
r".*?(?P<action>ACCEPT|DROP|REJECT|DENY|ALLOW|PASS|BLOCK)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# pfSense filterlog: <134>1 2024-01-01T12:00:00+00:00 ... filterlog: ... 4,,,1000000103,em0,match,pass,in,4,0x0,,64,0,0,DF,6,tcp,60,192.168.1.1,10.0.0.1,0,0,0,0,12345,80,0,S,1234567890,,mss
|
||||
PFSENSE_RE = re.compile(
|
||||
r"filterlog:.*?,(?P<action>pass|block|match|reject),.*?,(?P<proto>tcp|udp|icmp),.*?,"
|
||||
r"(?P<src>[0-9a-fA-F.:]+),(?P<dst>[0-9a-fA-F.:]+),.*?,(?P<spt>\d+)?,(?P<dpt>\d+)?",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Cisco ASA: %ASA-6-302013: Built outbound TCP connection 123 for outside:10.0.0.1/80 to inside:192.168.1.1/12345
|
||||
CISCO_ASA_RE = re.compile(
|
||||
r"%ASA-\d+-\d+:\s+.*?(?P<action>Built|Teardown|Denied|Deny|Allowed|Permit).*?"
|
||||
r"(?P<proto>TCP|UDP|ICMP).*?"
|
||||
r"(?:for\s+(?P<dir>\w+):)?(?P<dst>[0-9.]+)/(?P<dpt>\d+)\s+"
|
||||
r"to\s+(?P<src_dir>\w+):(?P<src>[0-9.]+)/(?P<spt>\d+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Squid: 1704108600.123 200 192.168.1.1 TCP_MISS/200 1234 GET http://example.com/ - DIRECT/93.184.216.34 text/html
|
||||
SQUID_RE = re.compile(
|
||||
r"^(?P<ts>[\d.]+)\s+"
|
||||
r"(?P<elapsed>-?\d+)\s+"
|
||||
r"(?P<src>[0-9a-fA-F.:]+)\s+"
|
||||
r"(?P<code>\S+)\s+"
|
||||
r"(?P<status>\d+)\s+"
|
||||
r"(?P<size>\d+)\s+"
|
||||
r"(?P<method>\w+)\s+"
|
||||
r"(?P<url>\S+)\s+",
|
||||
)
|
||||
|
||||
# Nginx proxy: 192.168.1.1 - - [01/Jan/2024:12:00:00 +0000] "GET / HTTP/1.1" 200 1234 "-" "curl/7.68.0"
|
||||
NGINX_RE = re.compile(
|
||||
r"^(?P<src>[0-9a-fA-F.:]+)\s+.*?\s+"
|
||||
r"\[(?P<ts>[^\]]+)\]\s+"
|
||||
r'"(?P<method>\w+)\s+(?P<url>\S+)\s+HTTP/[\d.]+"\s+'
|
||||
r"(?P<status>\d+)\s+(?P<size>\d+)",
|
||||
)
|
||||
|
||||
TIMESTAMP_FORMATS = [
|
||||
"%d/%b/%Y:%H:%M:%S %z",
|
||||
"%Y-%m-%dT%H:%M:%S%z",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
]
|
||||
|
||||
|
||||
def _to_int(val: str | None) -> int | None:
|
||||
if val is None:
|
||||
return None
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_timestamp(ts_str: str) -> datetime | None:
|
||||
for fmt in TIMESTAMP_FORMATS:
|
||||
try:
|
||||
return datetime.strptime(ts_str, fmt)
|
||||
except ValueError:
|
||||
continue
|
||||
# Try unix float
|
||||
try:
|
||||
return datetime.utcfromtimestamp(float(ts_str))
|
||||
except (ValueError, OSError, OverflowError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def parse_line(line: str) -> Optional[Dict[str, Any]]:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return None
|
||||
|
||||
# iptables / kernel / ufw
|
||||
m = IPTABLES_RE.search(line)
|
||||
if m:
|
||||
return {
|
||||
"log_type": "firewall",
|
||||
"source_ip": m.group("src"),
|
||||
"destination_ip": m.group("dst"),
|
||||
"protocol": m.group("proto"),
|
||||
"source_port": _to_int(m.group("spt")),
|
||||
"destination_port": _to_int(m.group("dpt")),
|
||||
"action": m.group("action").upper(),
|
||||
"url": None,
|
||||
"method": None,
|
||||
"status_code": None,
|
||||
"bytes_size": None,
|
||||
"timestamp": None,
|
||||
"raw_line": line,
|
||||
}
|
||||
|
||||
m = PFSENSE_RE.search(line)
|
||||
if m:
|
||||
return {
|
||||
"log_type": "firewall",
|
||||
"source_ip": m.group("src"),
|
||||
"destination_ip": m.group("dst"),
|
||||
"protocol": m.group("proto").upper() if m.group("proto") else None,
|
||||
"source_port": _to_int(m.group("spt")),
|
||||
"destination_port": _to_int(m.group("dpt")),
|
||||
"action": m.group("action").upper() if m.group("action") else "UNKNOWN",
|
||||
"url": None,
|
||||
"method": None,
|
||||
"status_code": None,
|
||||
"bytes_size": None,
|
||||
"timestamp": None,
|
||||
"raw_line": line,
|
||||
}
|
||||
|
||||
m = CISCO_ASA_RE.search(line)
|
||||
if m:
|
||||
return {
|
||||
"log_type": "firewall",
|
||||
"source_ip": m.group("src"),
|
||||
"destination_ip": m.group("dst"),
|
||||
"protocol": m.group("proto").upper() if m.group("proto") else None,
|
||||
"source_port": _to_int(m.group("spt")),
|
||||
"destination_port": _to_int(m.group("dpt")),
|
||||
"action": "ALLOW" if m.group("action") and m.group("action").lower() in ("built", "allowed", "permit") else "DENY",
|
||||
"url": None,
|
||||
"method": None,
|
||||
"status_code": None,
|
||||
"bytes_size": None,
|
||||
"timestamp": None,
|
||||
"raw_line": line,
|
||||
}
|
||||
|
||||
m = SQUID_RE.match(line)
|
||||
if m:
|
||||
return {
|
||||
"log_type": "proxy",
|
||||
"source_ip": m.group("src"),
|
||||
"destination_ip": None,
|
||||
"protocol": None,
|
||||
"source_port": None,
|
||||
"destination_port": None,
|
||||
"action": None,
|
||||
"url": m.group("url"),
|
||||
"method": m.group("method"),
|
||||
"status_code": _to_int(m.group("status")),
|
||||
"bytes_size": _to_int(m.group("size")),
|
||||
"timestamp": _parse_timestamp(m.group("ts")),
|
||||
"raw_line": line,
|
||||
}
|
||||
|
||||
m = NGINX_RE.match(line)
|
||||
if m:
|
||||
return {
|
||||
"log_type": "proxy",
|
||||
"source_ip": m.group("src"),
|
||||
"destination_ip": None,
|
||||
"protocol": None,
|
||||
"source_port": None,
|
||||
"destination_port": None,
|
||||
"action": None,
|
||||
"url": m.group("url"),
|
||||
"method": m.group("method"),
|
||||
"status_code": _to_int(m.group("status")),
|
||||
"bytes_size": _to_int(m.group("size")),
|
||||
"timestamp": _parse_timestamp(m.group("ts")),
|
||||
"raw_line": line,
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_lines(lines: List[str]) -> List[Dict[str, Any]]:
|
||||
results = []
|
||||
for line in lines:
|
||||
parsed = parse_line(line)
|
||||
if parsed:
|
||||
results.append(parsed)
|
||||
return results
|
||||
Reference in New Issue
Block a user