donnees_meteo/scripts/plot_hexbin_explorations.py

# scripts/plot_hexbin_explorations.py
from __future__ import annotations

from pathlib import Path
from typing import Callable

import numpy as np

from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.plots import plot_hexbin_with_third_variable


CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_DIR = Path("figures/hexbin_explorations")


REDUCE_FUNCTIONS: dict[str, Callable[[np.ndarray], float]] = {
    "mean": np.mean,
    "median": np.median,
    "max": np.max,
}

REDUCE_LABEL_FR: dict[str, str] = {
    "mean": "moyenne",
    "median": "médiane",
    "max": "maximum",
}

# Chaque scénario illustre soit une corrélation bien connue,
# soit l'absence de structure entre variables.
HEXBIN_SCENARIOS: list[dict[str, object]] = [
    {
        "x": "temperature",
        "y": "humidity",
        "color": "rain_rate",
        "filename": "hexbin_temp_humidity_color_rain.png",
        "description": (
            "Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
            "et comment les épisodes de pluie se situent dans une bande restreinte."
        ),
        "reduce": "max",
        "gridsize": 50,
        "mincnt": 8,
    },
    {
        "x": "pressure",
        "y": "rain_rate",
        "color": "wind_speed",
        "filename": "hexbin_pressure_rain_color_wind.png",
        "description": (
            "Vérifier si des rafales accompagnent vraiment les chutes de pression. "
            "On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
        ),
        "reduce": "median",
        "gridsize": 45,
        "mincnt": 5,
    },
    {
        "x": "illuminance",
        "y": "humidity",
        "color": "temperature",
        "filename": "hexbin_lux_humidity_color_temp.png",
        "description": (
            "Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
            "mais cela n'implique pas toujours une baisse rapide de température."
        ),
        "reduce": "mean",
        "gridsize": 55,
        "mincnt": 6,
    },
]


def main() -> None:
    if not CSV_PATH.exists():
        print(f"⚠ Fichier introuvable : {CSV_PATH}")
        return

    df = load_raw_csv(CSV_PATH)
    print(f"Dataset minuté chargé : {CSV_PATH}")
    print(f"  Lignes   : {len(df)}")
    print(f"  Colonnes : {list(df.columns)}")
    print()

    for scenario in HEXBIN_SCENARIOS:
        key_x = scenario["x"]
        key_y = scenario["y"]
        key_color = scenario["color"]

        var_x = VARIABLES_BY_KEY[key_x]
        var_y = VARIABLES_BY_KEY[key_y]
        var_color = VARIABLES_BY_KEY[key_color]

        filename = scenario["filename"]
        output_path = OUTPUT_DIR / filename

        reduce_name = scenario.get("reduce", "mean")
        reduce_func = REDUCE_FUNCTIONS.get(reduce_name, np.mean)
        reduce_label = REDUCE_LABEL_FR.get(reduce_name, reduce_name)

        gridsize = int(scenario.get("gridsize", 60))
        mincnt = int(scenario.get("mincnt", 5))

        description = scenario["description"]
        print(f"→ Hexbin {var_y.key} vs {var_x.key} (couleur = {var_color.key})")
        print(f"   {description}")

        plot_hexbin_with_third_variable(
            df=df,
            var_x=var_x,
            var_y=var_y,
            var_color=var_color,
            output_path=output_path,
            gridsize=gridsize,
            mincnt=mincnt,
            reduce_func=reduce_func,
            reduce_func_label=reduce_label,
            cmap="magma",
        )
        print(f"   ✔ Graphique enregistré : {output_path}")
        print()

    print("✔ Tous les graphiques hexbin ont été générés.")


if __name__ == "__main__":
    main()