1
Files
2025/tools/stats/top_requests.py

132 lines
4.0 KiB
Python

#!/usr/bin/env python3
import sys
import json
import os
import urllib.request
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
TOOLS_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir))
ROOT_DIR = os.path.abspath(os.path.join(TOOLS_DIR, os.pardir))
if CURRENT_DIR not in sys.path:
sys.path.append(CURRENT_DIR)
if TOOLS_DIR not in sys.path:
sys.path.append(TOOLS_DIR)
def load_env(env_path=None):
path = env_path or os.path.join(ROOT_DIR, ".env")
if not os.path.exists(path):
return
try:
with open(path, "r", encoding="utf-8") as handle:
for line in handle:
stripped = line.strip()
if not stripped or stripped.startswith("#") or "=" not in stripped:
continue
key, value = stripped.split("=", 1)
key = key.strip()
if not key or key in os.environ:
continue
os.environ[key] = value
except Exception as exc: # noqa: BLE001
print(f"Failed to load .env: {exc}", file=sys.stderr)
def load_config():
cfg_path = os.path.join(ROOT_DIR, "tools", "config", "config.json")
try:
with open(cfg_path, "r", encoding="utf-8") as handle:
return json.load(handle)
except Exception:
return {}
def fetch_goaccess(url, timeout=10):
with urllib.request.urlopen(url, timeout=timeout) as resp:
data = resp.read().decode("utf-8")
return json.loads(data)
def crawler_ratios(data):
browsers = (data.get("browsers") or {}).get("data") or []
crawler = next((entry for entry in browsers if entry.get("data") == "Crawlers"), None)
if not crawler:
return {"hits": 0.0, "visitors": 0.0}
def total(field):
return sum((entry.get(field, {}) or {}).get("count", 0) for entry in browsers)
total_hits = total("hits")
total_visitors = total("visitors")
return {
"hits": min(1.0, (crawler.get("hits", {}) or {}).get("count", 0) / total_hits) if total_hits else 0.0,
"visitors": min(1.0, (crawler.get("visitors", {}) or {}).get("count", 0) / total_visitors)
if total_visitors
else 0.0,
}
def adjust(value, ratio):
return max(0, round(value * (1 - ratio)))
def main():
try:
payload = json.load(sys.stdin)
except Exception as exc: # noqa: BLE001
print(f"Failed to read JSON: {exc}", file=sys.stderr)
sys.exit(1)
output_path = payload.get("outputPath")
public_path = payload.get("publicPath")
url = payload.get("stat", {}).get("url")
load_env()
cfg = load_config()
goaccess_url = url or os.environ.get("GOACCESS_URL") or (cfg.get("goaccess") or {}).get("url")
if not goaccess_url:
print("Missing GoAccess URL (set GOACCESS_URL or goaccess.url in tools/config/config.json)", file=sys.stderr)
sys.exit(1)
try:
data = fetch_goaccess(goaccess_url)
except Exception as exc: # noqa: BLE001
print(f"Failed to fetch GoAccess JSON: {exc}", file=sys.stderr)
sys.exit(1)
ratios = crawler_ratios(data)
reqs = (data.get("requests") or {}).get("data") or []
# entries have .data = path, hits.count, visitors.count ?
cleaned = []
for entry in reqs:
path = entry.get("data") or ""
hits = (entry.get("hits") or {}).get("count", 0)
if not path or hits <= 0:
continue
cleaned.append((path, adjust(hits, ratios["hits"])))
cleaned.sort(key=lambda item: item[1], reverse=True)
top = cleaned[:10]
labels = [item[0] for item in top]
values = [item[1] for item in top]
try:
from lib.render_stats_charts import render_top_requests, setup_rcparams
except ImportError as exc: # noqa: BLE001
print(f"Failed to import renderer: {exc}", file=sys.stderr)
sys.exit(1)
setup_rcparams()
render_top_requests({"labels": labels, "values": values, "title": "Top 10 requêtes (hors crawlers)"}, output_path)
json.dump({"image": public_path}, sys.stdout)
sys.stdout.flush()
if __name__ == "__main__":
main()