1

Nouvelles visualisations exploratoires

This commit is contained in:
2025-11-17 21:57:13 +01:00
parent b72349a369
commit fd42a692d9
10 changed files with 679 additions and 2 deletions

View File

@@ -0,0 +1,128 @@
# scripts/plot_hexbin_explorations.py
from __future__ import annotations
from pathlib import Path
from typing import Callable
import numpy as np
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.plots import plot_hexbin_with_third_variable
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_DIR = Path("figures/hexbin_explorations")
REDUCE_FUNCTIONS: dict[str, Callable[[np.ndarray], float]] = {
"mean": np.mean,
"median": np.median,
"max": np.max,
}
REDUCE_LABEL_FR: dict[str, str] = {
"mean": "moyenne",
"median": "médiane",
"max": "maximum",
}
# Chaque scénario illustre soit une corrélation bien connue,
# soit l'absence de structure entre variables.
HEXBIN_SCENARIOS: list[dict[str, object]] = [
{
"x": "temperature",
"y": "humidity",
"color": "rain_rate",
"filename": "hexbin_temp_humidity_color_rain.png",
"description": (
"Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
"et comment les épisodes de pluie se situent dans une bande restreinte."
),
"reduce": "max",
"gridsize": 50,
"mincnt": 8,
},
{
"x": "pressure",
"y": "rain_rate",
"color": "wind_speed",
"filename": "hexbin_pressure_rain_color_wind.png",
"description": (
"Vérifier si des rafales accompagnent vraiment les chutes de pression. "
"On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
),
"reduce": "median",
"gridsize": 45,
"mincnt": 5,
},
{
"x": "illuminance",
"y": "humidity",
"color": "temperature",
"filename": "hexbin_lux_humidity_color_temp.png",
"description": (
"Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
"mais cela n'implique pas toujours une baisse rapide de température."
),
"reduce": "mean",
"gridsize": 55,
"mincnt": 6,
},
]
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
for scenario in HEXBIN_SCENARIOS:
key_x = scenario["x"]
key_y = scenario["y"]
key_color = scenario["color"]
var_x = VARIABLES_BY_KEY[key_x]
var_y = VARIABLES_BY_KEY[key_y]
var_color = VARIABLES_BY_KEY[key_color]
filename = scenario["filename"]
output_path = OUTPUT_DIR / filename
reduce_name = scenario.get("reduce", "mean")
reduce_func = REDUCE_FUNCTIONS.get(reduce_name, np.mean)
reduce_label = REDUCE_LABEL_FR.get(reduce_name, reduce_name)
gridsize = int(scenario.get("gridsize", 60))
mincnt = int(scenario.get("mincnt", 5))
description = scenario["description"]
print(f"→ Hexbin {var_y.key} vs {var_x.key} (couleur = {var_color.key})")
print(f" {description}")
plot_hexbin_with_third_variable(
df=df,
var_x=var_x,
var_y=var_y,
var_color=var_color,
output_path=output_path,
gridsize=gridsize,
mincnt=mincnt,
reduce_func=reduce_func,
reduce_func_label=reduce_label,
cmap="magma",
)
print(f" ✔ Graphique enregistré : {output_path}")
print()
print("✔ Tous les graphiques hexbin ont été générés.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,85 @@
# scripts/plot_rain_event_composites.py
from __future__ import annotations
from pathlib import Path
from typing import Sequence
import pandas as pd
from meteo.dataset import load_raw_csv
from meteo.variables import Variable, VARIABLES_BY_KEY
from meteo.analysis import detect_threshold_events, build_event_aligned_segments
from meteo.plots import plot_event_composite
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_PATH = Path("figures/event_composites/rain_event_composites.png")
RAIN_THRESHOLD = 0.2 # mm/h : au-dessous on considère qu'il ne pleut pas vraiment
MIN_EVENT_DURATION = 5 # minutes
MIN_EVENT_GAP = 20 # minutes nécessaires pour considérer un nouvel événement
WINDOW_BEFORE = 120 # minutes affichées avant le début de la pluie
WINDOW_AFTER = 240 # minutes après le déclenchement
COMPOSITE_VARIABLE_KEYS: Sequence[str] = [
"pressure",
"temperature",
"humidity",
"wind_speed",
]
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
rain_series = df["rain_rate"]
events = detect_threshold_events(
rain_series,
threshold=RAIN_THRESHOLD,
min_duration=pd.Timedelta(minutes=MIN_EVENT_DURATION),
min_gap=pd.Timedelta(minutes=MIN_EVENT_GAP),
)
if not events:
print("⚠ Aucun événement de pluie détecté avec les paramètres actuels.")
return
print(f"Nombre d'événements détectés : {len(events)}")
variables: list[Variable] = [VARIABLES_BY_KEY[key] for key in COMPOSITE_VARIABLE_KEYS]
columns = [v.column for v in variables]
aligned_segments = build_event_aligned_segments(
df=df,
events=events,
columns=columns,
window_before_minutes=WINDOW_BEFORE,
window_after_minutes=WINDOW_AFTER,
resample_minutes=1,
)
if aligned_segments.empty:
print("⚠ Les segments alignés sont vides (période manquante ?).")
return
output_path = plot_event_composite(
aligned_segments=aligned_segments,
variables=variables,
output_path=OUTPUT_PATH,
quantiles=(0.2, 0.8),
baseline_label="Début de la pluie",
)
print(f"✔ Graphique composite pluie sauvegardé : {output_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,65 @@
# scripts/plot_rolling_correlation_heatmap.py
from __future__ import annotations
from pathlib import Path
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.analysis import compute_rolling_correlations_for_pairs
from meteo.plots import plot_rolling_correlation_heatmap
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_PATH = Path("figures/rolling_correlations/rolling_correlation_heatmap.png")
ROLLING_PAIRS: list[tuple[str, str]] = [
("temperature", "humidity"),
("pressure", "rain_rate"),
("pressure", "wind_speed"),
("illuminance", "temperature"),
("humidity", "rain_rate"),
]
WINDOW_MINUTES = 180 # 3 heures pour observer les tendances synoptiques
STEP_MINUTES = 30 # on n'échantillonne qu'un point sur 30 minutes
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
pairs = [(VARIABLES_BY_KEY[a], VARIABLES_BY_KEY[b]) for a, b in ROLLING_PAIRS]
rolling_df = compute_rolling_correlations_for_pairs(
df=df,
pairs=pairs,
window_minutes=WINDOW_MINUTES,
min_valid_fraction=0.7,
step_minutes=STEP_MINUTES,
method="pearson",
)
if rolling_df.empty:
print("⚠ Impossible de calculer les corrélations glissantes (données insuffisantes).")
return
output_path = plot_rolling_correlation_heatmap(
rolling_corr=rolling_df,
output_path=OUTPUT_PATH,
cmap="coolwarm",
vmin=-1.0,
vmax=1.0,
)
print(f"✔ Heatmap de corrélations glissantes enregistrée : {output_path}")
if __name__ == "__main__":
main()