donnees_meteo/docs/03 - Premiers graphiques/scripts/plot_basic_variables.py

# scripts/plot_basic_variables.py
"""Génère des séries temporelles simples pour chaque variable météo."""

from __future__ import annotations

import argparse
from pathlib import Path
import sys

import pandas as pd


PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from meteo.dataset import load_raw_csv
from meteo.plots import PlotChoice, PlotStyle, plot_basic_series, recommended_style, resample_series_for_plot
from meteo.variables import Variable, VARIABLES


CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent
DEFAULT_OUTPUT_DIR = DOC_DIR / "figures"


def _select_window(df: pd.DataFrame, *, last_days: int | None) -> pd.DataFrame:
    """Extrait la fenêtre temporelle souhaitée (ou la totalité si None)."""

    if last_days is None:
        return df
    end = df.index.max()
    start = end - pd.Timedelta(days=last_days)
    return df.loc[start:end]


def _format_ylabel(var: Variable) -> str:
    unit_text = f" ({var.unit})" if var.unit else ""
    return f"{var.label}{unit_text}"


def _aggregation_label(choice: PlotChoice, freq: str) -> str:
    """Texte court pour indiquer l'agrégation appliquée."""

    base = "moyenne"
    if callable(choice.agg) and getattr(choice.agg, "__name__", "") == "_circular_mean_deg":
        base = "moyenne circulaire"
    elif choice.agg == "sum":
        base = "somme"
    elif choice.agg == "median":
        base = "médiane"
    return f"{base} {freq}"


def main(argv: list[str] | None = None) -> None:
    parser = argparse.ArgumentParser(description="Trace les séries simples pour chaque variable météo.")
    parser.add_argument(
        "--only",
        nargs="*",
        help="Clés de variables à tracer (par défaut : toutes).",
    )
    parser.add_argument(
        "--days",
        type=int,
        default=None,
        help="Nombre de jours à afficher (par défaut : toute la période disponible).",
    )
    parser.add_argument(
        "--style",
        choices=[style.value for style in PlotStyle],
        help="Style de représentation à utiliser pour toutes les variables (par défaut : recommandations par variable).",
    )
    parser.add_argument(
        "--resample",
        help="Fréquence pandas à utiliser pour l'agrégation temporelle (par défaut : calcul automatique).",
    )
    parser.add_argument(
        "--max-points",
        type=int,
        default=420,
        help="Nombre de points cible après agrégation automatique (par défaut : 420).",
    )
    parser.add_argument(
        "--output-dir",
        type=Path,
        default=DEFAULT_OUTPUT_DIR,
        help="Dossier où stocker les figures.",
    )
    args = parser.parse_args(argv)

    if not CSV_PATH.exists():
        raise FileNotFoundError(f"Dataset introuvable : {CSV_PATH}")

    df = load_raw_csv(CSV_PATH)
    df_window = _select_window(df, last_days=args.days)

    selected: list[Variable]
    if args.only:
        keys = set(args.only)
        selected = [var for var in VARIABLES if var.key in keys]
        missing = keys - {var.key for var in selected}
        if missing:
            raise KeyError(f"Variables inconnues : {sorted(missing)}")
    else:
        selected = list(VARIABLES)

    output_dir: Path = args.output_dir
    output_dir.mkdir(parents=True, exist_ok=True)

    for variable in selected:
        if variable.column not in df_window.columns:
            print(f"⚠ Colonne absente pour {variable.key} ({variable.column}).")
            continue

        series = df_window[variable.column].dropna()
        if series.empty:
            print(f"⚠ Aucun point valide pour {variable.key} sur la période choisie.")
            continue

        style_choice = recommended_style(variable, args.style)

        aggregated, freq_used = resample_series_for_plot(
            series,
            variable=variable,
            freq=args.resample,
            target_points=args.max_points,
        )
        if aggregated.empty:
            print(f"⚠ Pas de points après agrégation pour {variable.key}.")
            continue

        output_path = output_dir / f"{variable.key}_overview.png"
        annotate_freq = _aggregation_label(style_choice, freq_used)

        plot_basic_series(
            aggregated,
            variable=variable,
            output_path=output_path,
            style=style_choice.style,
            title=f"{variable.label} — évolution temporelle",
            ylabel=_format_ylabel(variable),
            annotate_freq=annotate_freq,
        )
        print(f"✔ Graphique généré : {output_path}")


if __name__ == "__main__":
    main()