167 lines
4.3 KiB
Python
167 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import re
|
|
import json
|
|
import yaml
|
|
from datetime import datetime, date, timezone
|
|
|
|
MONTH_LABELS = ["Jan", "Fev", "Mar", "Avr", "Mai", "Jun", "Jul", "Aou", "Sep", "Oct", "Nov", "Dec"]
|
|
|
|
|
|
def find_markdown_files(root):
|
|
files = []
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
for filename in filenames:
|
|
if not filename.lower().endswith(".md"):
|
|
continue
|
|
if filename == "_index.md":
|
|
continue
|
|
files.append(os.path.join(dirpath, filename))
|
|
return files
|
|
|
|
|
|
def collect_section_dirs(root):
|
|
section_dirs = set()
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
if "_index.md" in filenames:
|
|
section_dirs.add(os.path.abspath(dirpath))
|
|
return section_dirs
|
|
|
|
|
|
def leaf_sections(section_dirs):
|
|
leaves = set()
|
|
for section in section_dirs:
|
|
is_leaf = True
|
|
for other in section_dirs:
|
|
if other == section:
|
|
continue
|
|
if other.startswith(section + os.sep):
|
|
is_leaf = False
|
|
break
|
|
if is_leaf:
|
|
leaves.add(section)
|
|
return leaves
|
|
|
|
|
|
def parse_frontmatter(path):
|
|
with open(path, "r", encoding="utf-8") as handle:
|
|
content = handle.read()
|
|
if content.startswith("---"):
|
|
parts = content.split("---", 2)
|
|
if len(parts) >= 3:
|
|
fm_text = parts[1]
|
|
body = parts[2]
|
|
else:
|
|
return {}, content
|
|
else:
|
|
return {}, content
|
|
|
|
try:
|
|
data = yaml.safe_load(fm_text) or {}
|
|
except Exception:
|
|
data = {}
|
|
return data, body
|
|
|
|
|
|
def parse_date(value):
|
|
if not value:
|
|
return None
|
|
dt = None
|
|
if isinstance(value, datetime):
|
|
dt = value
|
|
elif isinstance(value, date):
|
|
dt = datetime.combine(value, datetime.min.time())
|
|
elif isinstance(value, (int, float)):
|
|
try:
|
|
dt = datetime.fromtimestamp(value)
|
|
except Exception:
|
|
dt = None
|
|
elif isinstance(value, str):
|
|
# try ISO-like formats
|
|
for fmt in ("%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%d/%m/%Y"):
|
|
try:
|
|
dt = datetime.strptime(value, fmt)
|
|
break
|
|
except Exception:
|
|
continue
|
|
if dt is None:
|
|
try:
|
|
dt = datetime.fromisoformat(value)
|
|
except Exception:
|
|
dt = None
|
|
|
|
if dt is None:
|
|
return None
|
|
|
|
if dt.tzinfo is not None:
|
|
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
|
|
|
return dt
|
|
|
|
|
|
WORD_RE = re.compile(r"[\w'-]+", re.UNICODE)
|
|
|
|
|
|
def count_words(text):
|
|
if not text:
|
|
return 0
|
|
words = WORD_RE.findall(text)
|
|
return len(words)
|
|
|
|
|
|
def resolve_section(file_path, content_root, leaf_dirs):
|
|
content_root = os.path.abspath(content_root)
|
|
current = os.path.abspath(os.path.dirname(file_path))
|
|
best = None
|
|
while current.startswith(content_root):
|
|
if current in leaf_dirs:
|
|
best = current
|
|
break
|
|
parent = os.path.dirname(current)
|
|
if parent == current:
|
|
break
|
|
current = parent
|
|
if not best:
|
|
return None
|
|
rel = os.path.relpath(best, content_root)
|
|
return rel.replace(os.sep, "/") if rel != "." else "."
|
|
|
|
|
|
def load_articles(content_root):
|
|
files = find_markdown_files(content_root)
|
|
section_dirs = collect_section_dirs(content_root)
|
|
leaf_dirs = leaf_sections(section_dirs)
|
|
articles = []
|
|
|
|
for file_path in files:
|
|
fm, body = parse_frontmatter(file_path)
|
|
date = parse_date(fm.get("date"))
|
|
title = fm.get("title") or os.path.splitext(os.path.basename(file_path))[0]
|
|
word_count = count_words(body)
|
|
rel_path = os.path.relpath(file_path, content_root)
|
|
section = resolve_section(file_path, content_root, leaf_dirs)
|
|
|
|
weather = fm.get("weather") if isinstance(fm, dict) else None
|
|
|
|
articles.append(
|
|
{
|
|
"path": file_path,
|
|
"relativePath": rel_path,
|
|
"title": title,
|
|
"date": date,
|
|
"wordCount": word_count,
|
|
"section": section,
|
|
"weather": weather,
|
|
}
|
|
)
|
|
|
|
return articles
|
|
|
|
|
|
def write_result(data):
|
|
import sys
|
|
|
|
json.dump(data, sys.stdout)
|
|
sys.stdout.flush()
|