#!/usr/bin/env python3 import os import re import json import yaml from datetime import datetime, date, timezone MONTH_LABELS = ["Jan", "Fev", "Mar", "Avr", "Mai", "Jun", "Jul", "Aou", "Sep", "Oct", "Nov", "Dec"] def find_markdown_files(root): files = [] for dirpath, dirnames, filenames in os.walk(root): for filename in filenames: if not filename.lower().endswith(".md"): continue if filename == "_index.md": continue files.append(os.path.join(dirpath, filename)) return files def collect_section_dirs(root): section_dirs = set() for dirpath, dirnames, filenames in os.walk(root): if "_index.md" in filenames: section_dirs.add(os.path.abspath(dirpath)) return section_dirs def leaf_sections(section_dirs): leaves = set() for section in section_dirs: is_leaf = True for other in section_dirs: if other == section: continue if other.startswith(section + os.sep): is_leaf = False break if is_leaf: leaves.add(section) return leaves def parse_frontmatter(path): with open(path, "r", encoding="utf-8") as handle: content = handle.read() if content.startswith("---"): parts = content.split("---", 2) if len(parts) >= 3: fm_text = parts[1] body = parts[2] else: return {}, content else: return {}, content try: data = yaml.safe_load(fm_text) or {} except Exception: data = {} return data, body def parse_date(value): if not value: return None dt = None if isinstance(value, datetime): dt = value elif isinstance(value, date): dt = datetime.combine(value, datetime.min.time()) elif isinstance(value, (int, float)): try: dt = datetime.fromtimestamp(value) except Exception: dt = None elif isinstance(value, str): # try ISO-like formats for fmt in ("%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%d/%m/%Y"): try: dt = datetime.strptime(value, fmt) break except Exception: continue if dt is None: try: dt = datetime.fromisoformat(value) except Exception: dt = None if dt is None: return None if dt.tzinfo is not None: dt = dt.astimezone(timezone.utc).replace(tzinfo=None) return dt WORD_RE = re.compile(r"[\w'-]+", re.UNICODE) def count_words(text): if not text: return 0 words = WORD_RE.findall(text) return len(words) def resolve_section(file_path, content_root, leaf_dirs): content_root = os.path.abspath(content_root) current = os.path.abspath(os.path.dirname(file_path)) best = None while current.startswith(content_root): if current in leaf_dirs: best = current break parent = os.path.dirname(current) if parent == current: break current = parent if not best: return None rel = os.path.relpath(best, content_root) return rel.replace(os.sep, "/") if rel != "." else "." def load_articles(content_root): files = find_markdown_files(content_root) section_dirs = collect_section_dirs(content_root) leaf_dirs = leaf_sections(section_dirs) articles = [] for file_path in files: fm, body = parse_frontmatter(file_path) date = parse_date(fm.get("date")) title = fm.get("title") or os.path.splitext(os.path.basename(file_path))[0] word_count = count_words(body) rel_path = os.path.relpath(file_path, content_root) section = resolve_section(file_path, content_root, leaf_dirs) weather = fm.get("weather") if isinstance(fm, dict) else None articles.append( { "path": file_path, "relativePath": rel_path, "title": title, "date": date, "wordCount": word_count, "section": section, "weather": weather, } ) return articles def write_result(data): import sys json.dump(data, sys.stdout) sys.stdout.flush()