donnees_meteo/docs/12 - Conclusion/scripts/plot_weekday_profiles.py

from __future__ import annotations

import sys
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from meteo.dataset import load_raw_csv
from meteo.plots import export_plot_dataset, plot_weekday_profiles
from meteo.variables import VARIABLES_BY_KEY, Variable


CSV_PATH = PROJECT_ROOT / "data" / "weather_minutely.csv"
DOC_DIR = Path(__file__).resolve().parent.parent
OUTPUT_PROFILES_PATH = DOC_DIR / "figures" / "weekday_profiles.png"
OUTPUT_SCORES_PATH = DOC_DIR / "figures" / "weekday_scores.png"
OUTPUT_RADAR_DIR = DOC_DIR / "figures" / "weekday_radars"

# On se concentre sur le ressenti "agréable" :
# - température (plus élevée = meilleur score),
# - humidité (plus faible = meilleur score),
# - pression atmosphérique (score maximal pour une plage "confortable"),
# - luminosité (plus élevée = meilleur score),
# - vent (plus faible = meilleur score).
VARIABLE_KEYS = ["temperature", "humidity", "pressure", "illuminance", "wind_speed"]

# Type de scoring par variable (clé = Variable.key)
COMFORT_SCORING: dict[str, str] = {
    "temperature": "higher_better",
    "humidity": "lower_better",
    "pressure": "midrange_better",
    "illuminance": "higher_better",
    "wind_speed": "lower_better",
}


def compute_weekday_means(df: pd.DataFrame, variables: list[Variable]) -> pd.DataFrame:
    """
    Calcule, pour chaque jour de semaine (0=lundi,…,6=dimanche),
    la moyenne des variables fournies.
    """
    if df.empty:
        return pd.DataFrame(index=range(7))

    weekday_index = df.index.dayofweek
    columns = [var.column for var in variables]
    weekday_means = df.groupby(weekday_index)[columns].mean()
    # S'assure que toutes les valeurs 0–6 sont présentes, même si certaines manquent.
    weekday_means = weekday_means.reindex(range(7))
    weekday_means.index.name = "weekday"
    return weekday_means


def compute_weekday_scores(weekday_means: pd.DataFrame, variables: list[Variable]) -> pd.DataFrame:
    """
    À partir des moyennes par jour, calcule un score normalisé (0–1) par variable
    en fonction d'un critère de confort, puis un score global moyen.
    """
    if weekday_means.empty:
        return pd.DataFrame(index=range(7))

    scores = pd.DataFrame(index=weekday_means.index)

    def _normalize_monotonic(series: pd.Series, *, higher_is_better: bool) -> pd.Series:
        vmin = float(series.min(skipna=True))
        vmax = float(series.max(skipna=True))
        if np.isclose(vmax, vmin):
            return pd.Series(1.0, index=series.index)
        norm = (series - vmin) / (vmax - vmin)
        if not higher_is_better:
            norm = 1.0 - norm
        return norm

    def _normalize_midrange(series: pd.Series) -> pd.Series:
        """
        Score maximal pour des valeurs proches de la moyenne,
        plus faible pour des pressions très basses ou très élevées.
        """
        vmin = float(series.min(skipna=True))
        vmax = float(series.max(skipna=True))
        if np.isclose(vmax, vmin):
            return pd.Series(1.0, index=series.index)
        mid = 0.5 * (vmin + vmax)
        half_range = 0.5 * (vmax - vmin)
        # 1 au centre, 0 aux extrêmes (vmin/vmax), valeur >0 dans l'intervalle.
        norm = 1.0 - (series - mid).abs() / half_range
        norm = norm.clip(lower=0.0, upper=1.0)
        return norm

    for var in variables:
        col = var.column
        if col not in weekday_means.columns:
            continue
        series = weekday_means[col]
        if series.isna().all():
            continue

        scoring = COMFORT_SCORING.get(var.key, "higher_better")
        if scoring == "higher_better":
            norm = _normalize_monotonic(series, higher_is_better=True)
        elif scoring == "lower_better":
            norm = _normalize_monotonic(series, higher_is_better=False)
        elif scoring == "midrange_better":
            norm = _normalize_midrange(series)
        else:
            # Fallback : plus élevé = meilleur score.
            norm = _normalize_monotonic(series, higher_is_better=True)

        scores[col] = norm

    if scores.empty:
        return scores

    scores["overall_score"] = scores.mean(axis=1, skipna=True)
    scores.index.name = "weekday"
    return scores


def plot_overall_weekday_score(
    scores: pd.DataFrame,
    weekday_labels: list[str],
    output_path: Path,
) -> Path | None:
    """
    Trace un graphique synthétique du score global par jour de la semaine.
    """
    if "overall_score" not in scores.columns or scores["overall_score"].isna().all():
        return None

    output_path.parent.mkdir(parents=True, exist_ok=True)
    export_plot_dataset(scores, output_path)

    overall = scores["overall_score"]
    x = np.arange(len(weekday_labels))

    fig, ax = plt.subplots(figsize=(8, 4))
    values = overall.to_numpy(dtype=float)
    best_idx = int(np.nanargmax(values))

    colors = ["#9ecae1"] * len(values)
    colors[best_idx] = "#08519c"

    ax.bar(x, values, color=colors)
    ax.set_xticks(x)
    ax.set_xticklabels(weekday_labels)
    ax.set_ylabel("Score global (0–1)")
    ax.set_ylim(0, 1.05)
    ax.set_title("Score global d'agrément par jour de semaine")
    ax.grid(True, axis="y", linestyle=":", alpha=0.5)

    fig.tight_layout()
    fig.savefig(output_path, dpi=150)
    plt.close(fig)
    return output_path.resolve()


def plot_weekday_radars(
    scores: pd.DataFrame,
    variables: list[Variable],
    weekday_labels_long: list[str],
    output_dir: Path,
) -> list[Path]:
    """
    Produit un graphique radar par jour de la semaine, avec un axe par variable.
    """
    paths: list[Path] = []
    if scores.empty:
        return paths

    output_dir.mkdir(parents=True, exist_ok=True)

    var_cols = [v.column for v in variables if v.column in scores.columns]
    if not var_cols:
        return paths

    labels = [v.label for v in variables if v.column in scores.columns]
    n_vars = len(labels)
    if n_vars == 0:
        return paths

    angles = np.linspace(0, 2 * np.pi, n_vars, endpoint=False)
    angles = np.concatenate([angles, angles[:1]])

    for weekday in scores.index:
        day_scores = scores.loc[weekday, var_cols]
        if day_scores.isna().all():
            continue

        values = day_scores.to_numpy(dtype=float)
        values = np.nan_to_num(values, nan=0.0)
        values = np.concatenate([values, values[:1]])

        fig, ax = plt.subplots(subplot_kw={"projection": "polar"}, figsize=(6, 6))
        ax.plot(angles, values, marker="o")
        ax.fill(angles, values, alpha=0.25)
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(labels)
        ax.set_yticks([0.25, 0.5, 0.75, 1.0])
        ax.set_ylim(0, 1.05)

        day_label = weekday_labels_long[int(weekday)] if 0 <= int(weekday) < len(weekday_labels_long) else str(weekday)
        ax.set_title(f"Profil radar des scores – {day_label}")

        fig.tight_layout()
        filename = f"weekday_radar_{int(weekday)}.png"
        output_path = output_dir / filename
        # Export des données brutes associées à ce radar
        export_plot_dataset(day_scores.to_frame().T, output_path)
        fig.savefig(output_path, dpi=150)
        plt.close(fig)
        paths.append(output_path.resolve())

    return paths


def plot_weekday_radar_all(
    scores: pd.DataFrame,
    variables: list[Variable],
    weekday_labels_long: list[str],
    output_path: Path,
) -> Path | None:
    """
    Produit un seul graphique radar superposant tous les jours de la semaine.
    """
    if scores.empty:
        return None

    output_path.parent.mkdir(parents=True, exist_ok=True)

    var_cols = [v.column for v in variables if v.column in scores.columns]
    if not var_cols:
        return None

    labels = [v.label for v in variables if v.column in scores.columns]
    n_vars = len(labels)
    if n_vars == 0:
        return None

    angles = np.linspace(0, 2 * np.pi, n_vars, endpoint=False)
    angles = np.concatenate([angles, angles[:1]])

    fig, ax = plt.subplots(subplot_kw={"projection": "polar"}, figsize=(7, 7))

    cmap = plt.get_cmap("tab10")

    for idx, weekday in enumerate(scores.index):
        day_scores = scores.loc[weekday, var_cols]
        if day_scores.isna().all():
            continue

        values = day_scores.to_numpy(dtype=float)
        values = np.nan_to_num(values, nan=0.0)
        values = np.concatenate([values, values[:1]])

        color = cmap(idx % 10)
        day_label = (
            weekday_labels_long[int(weekday)]
            if 0 <= int(weekday) < len(weekday_labels_long)
            else str(weekday)
        )

        ax.plot(angles, values, marker="o", color=color, label=day_label)
        ax.fill(angles, values, color=color, alpha=0.15)

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels)
    ax.set_yticks([0.25, 0.5, 0.75, 1.0])
    ax.set_ylim(0, 1.05)
    ax.set_title("Profils radar des scores – tous les jours")
    ax.grid(True, linestyle=":", alpha=0.4)
    ax.legend(loc="upper right", bbox_to_anchor=(1.35, 1.05), borderaxespad=0.0)

    # Export des données brutes utilisées pour ce radar global
    export_plot_dataset(scores[var_cols], output_path)

    fig.tight_layout()
    fig.savefig(output_path, dpi=150)
    plt.close(fig)
    return output_path.resolve()


def main() -> None:
    if not CSV_PATH.exists():
        print(f"⚠ Fichier introuvable : {CSV_PATH}")
        return

    df = load_raw_csv(CSV_PATH)
    print(f"Dataset minuté chargé : {CSV_PATH}")
    print(f"  Lignes   : {len(df)}")
    print(f"  Colonnes : {list(df.columns)}")
    print()

    variables = [VARIABLES_BY_KEY[key] for key in VARIABLE_KEYS]
    weekday_means = compute_weekday_means(df, variables)

    output_path = plot_weekday_profiles(
        weekday_df=weekday_means,
        variables=variables,
        output_path=OUTPUT_PROFILES_PATH,
        title="Moyennes par jour de semaine",
    )

    print(f"✔ Profils hebdomadaires exportés : {output_path}")

    weekday_labels_long = ["lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"]
    for var in variables:
        series = weekday_means[var.column]
        if series.isna().all():
            continue
        best_idx = int(series.idxmax())
        best_label = weekday_labels_long[best_idx]
        best_value = series.max()
        unit = f" {var.unit}" if var.unit else ""
        print(f"  → {var.label} maximale en moyenne le {best_label} (≈{best_value:.2f}{unit})")

    # Calcul des scores normalisés et du score global.
    scores = compute_weekday_scores(weekday_means, variables)
    if not scores.empty and "overall_score" in scores.columns:
        print()
        print("Scores globaux (0–1) par jour de semaine :")
        overall = scores["overall_score"]
        for idx, label in enumerate(weekday_labels_long):
            value = overall.get(idx)
            if pd.isna(value):
                continue
            print(f"  - {label:<9} : {value:.3f}")

        best_idx = int(overall.idxmax())
        best_label = weekday_labels_long[best_idx]
        best_score = overall.max()
        print()
        print(
            f"⇒ Jour le plus « agréable » au sens de ce score normalisé : "
            f"{best_label} (score global ≈{best_score:.3f})."
        )

        # Graphique synthétique des scores globaux.
        weekday_labels_short = ["Lun", "Mar", "Mer", "Jeu", "Ven", "Sam", "Dim"]
        scores_path = plot_overall_weekday_score(
            scores=scores,
            weekday_labels=weekday_labels_short,
            output_path=OUTPUT_SCORES_PATH,
        )
        if scores_path is not None:
            print(f"✔ Graphique des scores globaux exporté : {scores_path}")

        # Graphiques radar par jour.
        radar_paths = plot_weekday_radars(
            scores=scores,
            variables=variables,
            weekday_labels_long=weekday_labels_long,
            output_dir=OUTPUT_RADAR_DIR,
        )
        if radar_paths:
            print(f"✔ Graphiques radar exportés ({len(radar_paths)}) dans : {OUTPUT_RADAR_DIR}")

        # Graphique radar global superposant tous les jours.
        radar_all_path = plot_weekday_radar_all(
            scores=scores,
            variables=variables,
            weekday_labels_long=weekday_labels_long,
            output_path=DOC_DIR / "figures" / "weekday_radar_all.png",
        )
        if radar_all_path is not None:
            print(f"✔ Graphique radar global exporté : {radar_all_path}")


if __name__ == "__main__":
    main()