#!/usr/bin/env python3

import os
import re
import json
import yaml
from datetime import datetime, date, timezone

MONTH_LABELS = ["Jan", "Fev", "Mar", "Avr", "Mai", "Jun", "Jul", "Aou", "Sep", "Oct", "Nov", "Dec"]


def find_markdown_files(root):
    files = []
    for dirpath, dirnames, filenames in os.walk(root):
        for filename in filenames:
            if not filename.lower().endswith(".md"):
                continue
            if filename == "_index.md":
                continue
            files.append(os.path.join(dirpath, filename))
    return files


def collect_section_dirs(root):
    section_dirs = set()
    for dirpath, dirnames, filenames in os.walk(root):
        if "_index.md" in filenames:
            section_dirs.add(os.path.abspath(dirpath))
    return section_dirs


def leaf_sections(section_dirs):
    leaves = set()
    for section in section_dirs:
        is_leaf = True
        for other in section_dirs:
            if other == section:
                continue
            if other.startswith(section + os.sep):
                is_leaf = False
                break
        if is_leaf:
            leaves.add(section)
    return leaves


def parse_frontmatter(path):
    with open(path, "r", encoding="utf-8") as handle:
        content = handle.read()
    if content.startswith("---"):
        parts = content.split("---", 2)
        if len(parts) >= 3:
            fm_text = parts[1]
            body = parts[2]
        else:
            return {}, content
    else:
        return {}, content

    try:
        data = yaml.safe_load(fm_text) or {}
    except Exception:
        data = {}
    return data, body


def parse_date(value):
    if not value:
        return None
    dt = None
    if isinstance(value, datetime):
        dt = value
    elif isinstance(value, date):
        dt = datetime.combine(value, datetime.min.time())
    elif isinstance(value, (int, float)):
        try:
            dt = datetime.fromtimestamp(value)
        except Exception:
            dt = None
    elif isinstance(value, str):
        # try ISO-like formats
        for fmt in ("%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y/%m/%d", "%d/%m/%Y"):
            try:
                dt = datetime.strptime(value, fmt)
                break
            except Exception:
                continue
        if dt is None:
            try:
                dt = datetime.fromisoformat(value)
            except Exception:
                dt = None

    if dt is None:
        return None

    if dt.tzinfo is not None:
        dt = dt.astimezone(timezone.utc).replace(tzinfo=None)

    return dt


WORD_RE = re.compile(r"[\w'-]+", re.UNICODE)


def count_words(text):
    if not text:
        return 0
    words = WORD_RE.findall(text)
    return len(words)


def resolve_section(file_path, content_root, leaf_dirs):
    content_root = os.path.abspath(content_root)
    current = os.path.abspath(os.path.dirname(file_path))
    best = None
    while current.startswith(content_root):
        if current in leaf_dirs:
            best = current
            break
        parent = os.path.dirname(current)
        if parent == current:
            break
        current = parent
    if not best:
        return None
    rel = os.path.relpath(best, content_root)
    return rel.replace(os.sep, "/") if rel != "." else "."


def load_articles(content_root):
    files = find_markdown_files(content_root)
    section_dirs = collect_section_dirs(content_root)
    leaf_dirs = leaf_sections(section_dirs)
    articles = []

    for file_path in files:
        fm, body = parse_frontmatter(file_path)
        date = parse_date(fm.get("date"))
        title = fm.get("title") or os.path.splitext(os.path.basename(file_path))[0]
        word_count = count_words(body)
        rel_path = os.path.relpath(file_path, content_root)
        section = resolve_section(file_path, content_root, leaf_dirs)

        weather = fm.get("weather") if isinstance(fm, dict) else None

        articles.append(
            {
                "path": file_path,
                "relativePath": rel_path,
                "title": title,
                "date": date,
                "wordCount": word_count,
                "section": section,
                "weather": weather,
            }
        )

    return articles


def write_result(data):
    import sys

    json.dump(data, sys.stdout)
    sys.stdout.flush()