1
donnees_meteo/scripts/plot_rolling_correlation_heatmap.py

66 lines
1.8 KiB
Python

# scripts/plot_rolling_correlation_heatmap.py
from __future__ import annotations
from pathlib import Path
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.analysis import compute_rolling_correlations_for_pairs
from meteo.plots import plot_rolling_correlation_heatmap
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_PATH = Path("figures/rolling_correlations/rolling_correlation_heatmap.png")
ROLLING_PAIRS: list[tuple[str, str]] = [
("temperature", "humidity"),
("pressure", "rain_rate"),
("pressure", "wind_speed"),
("illuminance", "temperature"),
("humidity", "rain_rate"),
]
WINDOW_MINUTES = 180 # 3 heures pour observer les tendances synoptiques
STEP_MINUTES = 30 # on n'échantillonne qu'un point sur 30 minutes
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
pairs = [(VARIABLES_BY_KEY[a], VARIABLES_BY_KEY[b]) for a, b in ROLLING_PAIRS]
rolling_df = compute_rolling_correlations_for_pairs(
df=df,
pairs=pairs,
window_minutes=WINDOW_MINUTES,
min_valid_fraction=0.7,
step_minutes=STEP_MINUTES,
method="pearson",
)
if rolling_df.empty:
print("⚠ Impossible de calculer les corrélations glissantes (données insuffisantes).")
return
output_path = plot_rolling_correlation_heatmap(
rolling_corr=rolling_df,
output_path=OUTPUT_PATH,
cmap="coolwarm",
vmin=-1.0,
vmax=1.0,
)
print(f"✔ Heatmap de corrélations glissantes enregistrée : {output_path}")
if __name__ == "__main__":
main()