#!/usr/bin/env python3 import sys import json import os import urllib.request CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) TOOLS_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir)) ROOT_DIR = os.path.abspath(os.path.join(TOOLS_DIR, os.pardir)) if CURRENT_DIR not in sys.path: sys.path.append(CURRENT_DIR) if TOOLS_DIR not in sys.path: sys.path.append(TOOLS_DIR) def load_config(): cfg_path = os.path.join(ROOT_DIR, "tools", "config.json") try: with open(cfg_path, "r", encoding="utf-8") as handle: return json.load(handle) except Exception: return {} def fetch_goaccess(url, timeout=10): with urllib.request.urlopen(url, timeout=timeout) as resp: data = resp.read().decode("utf-8") return json.loads(data) def crawler_ratios(data): browsers = (data.get("browsers") or {}).get("data") or [] crawler = next((entry for entry in browsers if entry.get("data") == "Crawlers"), None) if not crawler: return {"hits": 0.0, "visitors": 0.0} def total(field): return sum((entry.get(field, {}) or {}).get("count", 0) for entry in browsers) total_hits = total("hits") total_visitors = total("visitors") return { "hits": min(1.0, (crawler.get("hits", {}) or {}).get("count", 0) / total_hits) if total_hits else 0.0, "visitors": min(1.0, (crawler.get("visitors", {}) or {}).get("count", 0) / total_visitors) if total_visitors else 0.0, } def adjust(value, ratio): return max(0, round(value * (1 - ratio))) def main(): try: payload = json.load(sys.stdin) except Exception as exc: # noqa: BLE001 print(f"Failed to read JSON: {exc}", file=sys.stderr) sys.exit(1) output_path = payload.get("outputPath") public_path = payload.get("publicPath") url = payload.get("stat", {}).get("url") cfg = load_config() goaccess_url = url or (cfg.get("goaccess") or {}).get("url") or "" try: data = fetch_goaccess(goaccess_url) except Exception as exc: # noqa: BLE001 print(f"Failed to fetch GoAccess JSON: {exc}", file=sys.stderr) sys.exit(1) ratios = crawler_ratios(data) reqs = (data.get("requests") or {}).get("data") or [] # entries have .data = path, hits.count, visitors.count ? cleaned = [] for entry in reqs: path = entry.get("data") or "" hits = (entry.get("hits") or {}).get("count", 0) if not path or hits <= 0: continue cleaned.append((path, adjust(hits, ratios["hits"]))) cleaned.sort(key=lambda item: item[1], reverse=True) top = cleaned[:10] labels = [item[0] for item in top] values = [item[1] for item in top] try: from render_stats_charts import render_top_requests, setup_rcparams except ImportError as exc: # noqa: BLE001 print(f"Failed to import renderer: {exc}", file=sys.stderr) sys.exit(1) setup_rcparams() render_top_requests({"labels": labels, "values": values, "title": "Top 10 requĂȘtes (hors crawlers)"}, output_path) json.dump({"image": public_path}, sys.stdout) sys.stdout.flush() if __name__ == "__main__": main()