Page de statistiques
This commit is contained in:
166
tools/stats/common.py
Normal file
166
tools/stats/common.py
Normal file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import yaml
|
||||
from datetime import datetime, date, timezone
|
||||
|
||||
MONTH_LABELS = ["Jan", "Fev", "Mar", "Avr", "Mai", "Jun", "Jul", "Aou", "Sep", "Oct", "Nov", "Dec"]
|
||||
|
||||
|
||||
def find_markdown_files(root):
|
||||
files = []
|
||||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
for filename in filenames:
|
||||
if not filename.lower().endswith(".md"):
|
||||
continue
|
||||
if filename == "_index.md":
|
||||
continue
|
||||
files.append(os.path.join(dirpath, filename))
|
||||
return files
|
||||
|
||||
|
||||
def collect_section_dirs(root):
|
||||
section_dirs = set()
|
||||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
if "_index.md" in filenames:
|
||||
section_dirs.add(os.path.abspath(dirpath))
|
||||
return section_dirs
|
||||
|
||||
|
||||
def leaf_sections(section_dirs):
|
||||
leaves = set()
|
||||
for section in section_dirs:
|
||||
is_leaf = True
|
||||
for other in section_dirs:
|
||||
if other == section:
|
||||
continue
|
||||
if other.startswith(section + os.sep):
|
||||
is_leaf = False
|
||||
break
|
||||
if is_leaf:
|
||||
leaves.add(section)
|
||||
return leaves
|
||||
|
||||
|
||||
def parse_frontmatter(path):
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
content = handle.read()
|
||||
if content.startswith("---"):
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) >= 3:
|
||||
fm_text = parts[1]
|
||||
body = parts[2]
|
||||
else:
|
||||
return {}, content
|
||||
else:
|
||||
return {}, content
|
||||
|
||||
try:
|
||||
data = yaml.safe_load(fm_text) or {}
|
||||
except Exception:
|
||||
data = {}
|
||||
return data, body
|
||||
|
||||
|
||||
def parse_date(value):
|
||||
if not value:
|
||||
return None
|
||||
dt = None
|
||||
if isinstance(value, datetime):
|
||||
dt = value
|
||||
elif isinstance(value, date):
|
||||
dt = datetime.combine(value, datetime.min.time())
|
||||
elif isinstance(value, (int, float)):
|
||||
try:
|
||||
dt = datetime.fromtimestamp(value)
|
||||
except Exception:
|
||||
dt = None
|
||||
elif isinstance(value, str):
|
||||
# try ISO-like formats
|
||||
for fmt in ("%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%d/%m/%Y"):
|
||||
try:
|
||||
dt = datetime.strptime(value, fmt)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if dt is None:
|
||||
try:
|
||||
dt = datetime.fromisoformat(value)
|
||||
except Exception:
|
||||
dt = None
|
||||
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
if dt.tzinfo is not None:
|
||||
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
|
||||
return dt
|
||||
|
||||
|
||||
WORD_RE = re.compile(r"[\w'-]+", re.UNICODE)
|
||||
|
||||
|
||||
def count_words(text):
|
||||
if not text:
|
||||
return 0
|
||||
words = WORD_RE.findall(text)
|
||||
return len(words)
|
||||
|
||||
|
||||
def resolve_section(file_path, content_root, leaf_dirs):
|
||||
content_root = os.path.abspath(content_root)
|
||||
current = os.path.abspath(os.path.dirname(file_path))
|
||||
best = None
|
||||
while current.startswith(content_root):
|
||||
if current in leaf_dirs:
|
||||
best = current
|
||||
break
|
||||
parent = os.path.dirname(current)
|
||||
if parent == current:
|
||||
break
|
||||
current = parent
|
||||
if not best:
|
||||
return None
|
||||
rel = os.path.relpath(best, content_root)
|
||||
return rel.replace(os.sep, "/") if rel != "." else "."
|
||||
|
||||
|
||||
def load_articles(content_root):
|
||||
files = find_markdown_files(content_root)
|
||||
section_dirs = collect_section_dirs(content_root)
|
||||
leaf_dirs = leaf_sections(section_dirs)
|
||||
articles = []
|
||||
|
||||
for file_path in files:
|
||||
fm, body = parse_frontmatter(file_path)
|
||||
date = parse_date(fm.get("date"))
|
||||
title = fm.get("title") or os.path.splitext(os.path.basename(file_path))[0]
|
||||
word_count = count_words(body)
|
||||
rel_path = os.path.relpath(file_path, content_root)
|
||||
section = resolve_section(file_path, content_root, leaf_dirs)
|
||||
|
||||
weather = fm.get("weather") if isinstance(fm, dict) else None
|
||||
|
||||
articles.append(
|
||||
{
|
||||
"path": file_path,
|
||||
"relativePath": rel_path,
|
||||
"title": title,
|
||||
"date": date,
|
||||
"wordCount": word_count,
|
||||
"section": section,
|
||||
"weather": weather,
|
||||
}
|
||||
)
|
||||
|
||||
return articles
|
||||
|
||||
|
||||
def write_result(data):
|
||||
import sys
|
||||
|
||||
json.dump(data, sys.stdout)
|
||||
sys.stdout.flush()
|
||||
Reference in New Issue
Block a user