132 lines
4.0 KiB
Python
132 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import sys
|
|
import json
|
|
import os
|
|
import urllib.request
|
|
|
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
TOOLS_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir))
|
|
ROOT_DIR = os.path.abspath(os.path.join(TOOLS_DIR, os.pardir))
|
|
if CURRENT_DIR not in sys.path:
|
|
sys.path.append(CURRENT_DIR)
|
|
if TOOLS_DIR not in sys.path:
|
|
sys.path.append(TOOLS_DIR)
|
|
|
|
|
|
def load_env(env_path=None):
|
|
path = env_path or os.path.join(ROOT_DIR, ".env")
|
|
if not os.path.exists(path):
|
|
return
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
stripped = line.strip()
|
|
if not stripped or stripped.startswith("#") or "=" not in stripped:
|
|
continue
|
|
key, value = stripped.split("=", 1)
|
|
key = key.strip()
|
|
if not key or key in os.environ:
|
|
continue
|
|
os.environ[key] = value
|
|
except Exception as exc: # noqa: BLE001
|
|
print(f"Failed to load .env: {exc}", file=sys.stderr)
|
|
|
|
|
|
def load_config():
|
|
cfg_path = os.path.join(ROOT_DIR, "tools", "config.json")
|
|
try:
|
|
with open(cfg_path, "r", encoding="utf-8") as handle:
|
|
return json.load(handle)
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def fetch_goaccess(url, timeout=10):
|
|
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
|
data = resp.read().decode("utf-8")
|
|
return json.loads(data)
|
|
|
|
|
|
def crawler_ratios(data):
|
|
browsers = (data.get("browsers") or {}).get("data") or []
|
|
crawler = next((entry for entry in browsers if entry.get("data") == "Crawlers"), None)
|
|
if not crawler:
|
|
return {"hits": 0.0, "visitors": 0.0}
|
|
|
|
def total(field):
|
|
return sum((entry.get(field, {}) or {}).get("count", 0) for entry in browsers)
|
|
|
|
total_hits = total("hits")
|
|
total_visitors = total("visitors")
|
|
return {
|
|
"hits": min(1.0, (crawler.get("hits", {}) or {}).get("count", 0) / total_hits) if total_hits else 0.0,
|
|
"visitors": min(1.0, (crawler.get("visitors", {}) or {}).get("count", 0) / total_visitors)
|
|
if total_visitors
|
|
else 0.0,
|
|
}
|
|
|
|
|
|
def adjust(value, ratio):
|
|
return max(0, round(value * (1 - ratio)))
|
|
|
|
|
|
def main():
|
|
try:
|
|
payload = json.load(sys.stdin)
|
|
except Exception as exc: # noqa: BLE001
|
|
print(f"Failed to read JSON: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
output_path = payload.get("outputPath")
|
|
public_path = payload.get("publicPath")
|
|
url = payload.get("stat", {}).get("url")
|
|
|
|
load_env()
|
|
cfg = load_config()
|
|
goaccess_url = url or os.environ.get("GOACCESS_URL") or (cfg.get("goaccess") or {}).get("url")
|
|
|
|
if not goaccess_url:
|
|
print("Missing GoAccess URL (set GOACCESS_URL or goaccess.url in tools/config.json)", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
try:
|
|
data = fetch_goaccess(goaccess_url)
|
|
except Exception as exc: # noqa: BLE001
|
|
print(f"Failed to fetch GoAccess JSON: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
ratios = crawler_ratios(data)
|
|
|
|
reqs = (data.get("requests") or {}).get("data") or []
|
|
# entries have .data = path, hits.count, visitors.count ?
|
|
cleaned = []
|
|
for entry in reqs:
|
|
path = entry.get("data") or ""
|
|
hits = (entry.get("hits") or {}).get("count", 0)
|
|
if not path or hits <= 0:
|
|
continue
|
|
cleaned.append((path, adjust(hits, ratios["hits"])))
|
|
|
|
cleaned.sort(key=lambda item: item[1], reverse=True)
|
|
top = cleaned[:10]
|
|
|
|
labels = [item[0] for item in top]
|
|
values = [item[1] for item in top]
|
|
|
|
try:
|
|
from render_stats_charts import render_top_requests, setup_rcparams
|
|
except ImportError as exc: # noqa: BLE001
|
|
print(f"Failed to import renderer: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
setup_rcparams()
|
|
render_top_requests({"labels": labels, "values": values, "title": "Top 10 requêtes (hors crawlers)"}, output_path)
|
|
|
|
json.dump({"image": public_path}, sys.stdout)
|
|
sys.stdout.flush()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|