donnees_meteo/scripts/plot_rolling_correlation_heatmap.py

# scripts/plot_rolling_correlation_heatmap.py
from __future__ import annotations

from pathlib import Path

from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.analysis import compute_rolling_correlations_for_pairs
from meteo.plots import plot_rolling_correlation_heatmap


CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_PATH = Path("figures/rolling_correlations/rolling_correlation_heatmap.png")

ROLLING_PAIRS: list[tuple[str, str]] = [
    ("temperature", "humidity"),
    ("pressure", "rain_rate"),
    ("pressure", "wind_speed"),
    ("illuminance", "temperature"),
    ("humidity", "rain_rate"),
]

WINDOW_MINUTES = 180  # 3 heures pour observer les tendances synoptiques
STEP_MINUTES = 30     # on n'échantillonne qu'un point sur 30 minutes


def main() -> None:
    if not CSV_PATH.exists():
        print(f"⚠ Fichier introuvable : {CSV_PATH}")
        return

    df = load_raw_csv(CSV_PATH)
    print(f"Dataset minuté chargé : {CSV_PATH}")
    print(f"  Lignes   : {len(df)}")
    print(f"  Colonnes : {list(df.columns)}")
    print()

    pairs = [(VARIABLES_BY_KEY[a], VARIABLES_BY_KEY[b]) for a, b in ROLLING_PAIRS]

    rolling_df = compute_rolling_correlations_for_pairs(
        df=df,
        pairs=pairs,
        window_minutes=WINDOW_MINUTES,
        min_valid_fraction=0.7,
        step_minutes=STEP_MINUTES,
        method="pearson",
    )

    if rolling_df.empty:
        print("⚠ Impossible de calculer les corrélations glissantes (données insuffisantes).")
        return

    output_path = plot_rolling_correlation_heatmap(
        rolling_corr=rolling_df,
        output_path=OUTPUT_PATH,
        cmap="coolwarm",
        vmin=-1.0,
        vmax=1.0,
    )

    print(f"✔ Heatmap de corrélations glissantes enregistrée : {output_path}")


if __name__ == "__main__":
    main()