1
donnees_meteo/scripts/plot_hexbin_explorations.py

129 lines
3.7 KiB
Python

# scripts/plot_hexbin_explorations.py
from __future__ import annotations
from pathlib import Path
from typing import Callable
import numpy as np
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.plots import plot_hexbin_with_third_variable
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_DIR = Path("figures/hexbin_explorations")
REDUCE_FUNCTIONS: dict[str, Callable[[np.ndarray], float]] = {
"mean": np.mean,
"median": np.median,
"max": np.max,
}
REDUCE_LABEL_FR: dict[str, str] = {
"mean": "moyenne",
"median": "médiane",
"max": "maximum",
}
# Chaque scénario illustre soit une corrélation bien connue,
# soit l'absence de structure entre variables.
HEXBIN_SCENARIOS: list[dict[str, object]] = [
{
"x": "temperature",
"y": "humidity",
"color": "rain_rate",
"filename": "hexbin_temp_humidity_color_rain.png",
"description": (
"Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
"et comment les épisodes de pluie se situent dans une bande restreinte."
),
"reduce": "max",
"gridsize": 50,
"mincnt": 8,
},
{
"x": "pressure",
"y": "rain_rate",
"color": "wind_speed",
"filename": "hexbin_pressure_rain_color_wind.png",
"description": (
"Vérifier si des rafales accompagnent vraiment les chutes de pression. "
"On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
),
"reduce": "median",
"gridsize": 45,
"mincnt": 5,
},
{
"x": "illuminance",
"y": "humidity",
"color": "temperature",
"filename": "hexbin_lux_humidity_color_temp.png",
"description": (
"Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
"mais cela n'implique pas toujours une baisse rapide de température."
),
"reduce": "mean",
"gridsize": 55,
"mincnt": 6,
},
]
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
for scenario in HEXBIN_SCENARIOS:
key_x = scenario["x"]
key_y = scenario["y"]
key_color = scenario["color"]
var_x = VARIABLES_BY_KEY[key_x]
var_y = VARIABLES_BY_KEY[key_y]
var_color = VARIABLES_BY_KEY[key_color]
filename = scenario["filename"]
output_path = OUTPUT_DIR / filename
reduce_name = scenario.get("reduce", "mean")
reduce_func = REDUCE_FUNCTIONS.get(reduce_name, np.mean)
reduce_label = REDUCE_LABEL_FR.get(reduce_name, reduce_name)
gridsize = int(scenario.get("gridsize", 60))
mincnt = int(scenario.get("mincnt", 5))
description = scenario["description"]
print(f"→ Hexbin {var_y.key} vs {var_x.key} (couleur = {var_color.key})")
print(f" {description}")
plot_hexbin_with_third_variable(
df=df,
var_x=var_x,
var_y=var_y,
var_color=var_color,
output_path=output_path,
gridsize=gridsize,
mincnt=mincnt,
reduce_func=reduce_func,
reduce_func_label=reduce_label,
cmap="magma",
)
print(f" ✔ Graphique enregistré : {output_path}")
print()
print("✔ Tous les graphiques hexbin ont été générés.")
if __name__ == "__main__":
main()