Nouvelles visualisations exploratoires

2025-11-17 21:57:13 +01:00
parent b72349a369
commit fd42a692d9
10 changed files with 679 additions and 2 deletions
--- a/scripts/plot_hexbin_explorations.py
+++ b/scripts/plot_hexbin_explorations.py
@@ -0,0 +1,128 @@
+# scripts/plot_hexbin_explorations.py
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Callable
+
+import numpy as np
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import VARIABLES_BY_KEY
+from meteo.plots import plot_hexbin_with_third_variable
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+OUTPUT_DIR = Path("figures/hexbin_explorations")
+
+
+REDUCE_FUNCTIONS: dict[str, Callable[[np.ndarray], float]] = {
+    "mean": np.mean,
+    "median": np.median,
+    "max": np.max,
+}
+
+REDUCE_LABEL_FR: dict[str, str] = {
+    "mean": "moyenne",
+    "median": "médiane",
+    "max": "maximum",
+}
+
+# Chaque scénario illustre soit une corrélation bien connue,
+# soit l'absence de structure entre variables.
+HEXBIN_SCENARIOS: list[dict[str, object]] = [
+    {
+        "x": "temperature",
+        "y": "humidity",
+        "color": "rain_rate",
+        "filename": "hexbin_temp_humidity_color_rain.png",
+        "description": (
+            "Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
+            "et comment les épisodes de pluie se situent dans une bande restreinte."
+        ),
+        "reduce": "max",
+        "gridsize": 50,
+        "mincnt": 8,
+    },
+    {
+        "x": "pressure",
+        "y": "rain_rate",
+        "color": "wind_speed",
+        "filename": "hexbin_pressure_rain_color_wind.png",
+        "description": (
+            "Vérifier si des rafales accompagnent vraiment les chutes de pression. "
+            "On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
+        ),
+        "reduce": "median",
+        "gridsize": 45,
+        "mincnt": 5,
+    },
+    {
+        "x": "illuminance",
+        "y": "humidity",
+        "color": "temperature",
+        "filename": "hexbin_lux_humidity_color_temp.png",
+        "description": (
+            "Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
+            "mais cela n'implique pas toujours une baisse rapide de température."
+        ),
+        "reduce": "mean",
+        "gridsize": 55,
+        "mincnt": 6,
+    },
+]
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+    print()
+
+    for scenario in HEXBIN_SCENARIOS:
+        key_x = scenario["x"]
+        key_y = scenario["y"]
+        key_color = scenario["color"]
+
+        var_x = VARIABLES_BY_KEY[key_x]
+        var_y = VARIABLES_BY_KEY[key_y]
+        var_color = VARIABLES_BY_KEY[key_color]
+
+        filename = scenario["filename"]
+        output_path = OUTPUT_DIR / filename
+
+        reduce_name = scenario.get("reduce", "mean")
+        reduce_func = REDUCE_FUNCTIONS.get(reduce_name, np.mean)
+        reduce_label = REDUCE_LABEL_FR.get(reduce_name, reduce_name)
+
+        gridsize = int(scenario.get("gridsize", 60))
+        mincnt = int(scenario.get("mincnt", 5))
+
+        description = scenario["description"]
+        print(f"→ Hexbin {var_y.key} vs {var_x.key} (couleur = {var_color.key})")
+        print(f"   {description}")
+
+        plot_hexbin_with_third_variable(
+            df=df,
+            var_x=var_x,
+            var_y=var_y,
+            var_color=var_color,
+            output_path=output_path,
+            gridsize=gridsize,
+            mincnt=mincnt,
+            reduce_func=reduce_func,
+            reduce_func_label=reduce_label,
+            cmap="magma",
+        )
+        print(f"   ✔ Graphique enregistré : {output_path}")
+        print()
+
+    print("✔ Tous les graphiques hexbin ont été générés.")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/plot_rain_event_composites.py
+++ b/scripts/plot_rain_event_composites.py
@@ -0,0 +1,85 @@
+# scripts/plot_rain_event_composites.py
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Sequence
+
+import pandas as pd
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import Variable, VARIABLES_BY_KEY
+from meteo.analysis import detect_threshold_events, build_event_aligned_segments
+from meteo.plots import plot_event_composite
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+OUTPUT_PATH = Path("figures/event_composites/rain_event_composites.png")
+
+RAIN_THRESHOLD = 0.2      # mm/h : au-dessous on considère qu'il ne pleut pas vraiment
+MIN_EVENT_DURATION = 5    # minutes
+MIN_EVENT_GAP = 20        # minutes nécessaires pour considérer un nouvel événement
+WINDOW_BEFORE = 120       # minutes affichées avant le début de la pluie
+WINDOW_AFTER = 240        # minutes après le déclenchement
+
+COMPOSITE_VARIABLE_KEYS: Sequence[str] = [
+    "pressure",
+    "temperature",
+    "humidity",
+    "wind_speed",
+]
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+    print()
+
+    rain_series = df["rain_rate"]
+    events = detect_threshold_events(
+        rain_series,
+        threshold=RAIN_THRESHOLD,
+        min_duration=pd.Timedelta(minutes=MIN_EVENT_DURATION),
+        min_gap=pd.Timedelta(minutes=MIN_EVENT_GAP),
+    )
+
+    if not events:
+        print("⚠ Aucun événement de pluie détecté avec les paramètres actuels.")
+        return
+
+    print(f"Nombre d'événements détectés : {len(events)}")
+
+    variables: list[Variable] = [VARIABLES_BY_KEY[key] for key in COMPOSITE_VARIABLE_KEYS]
+    columns = [v.column for v in variables]
+
+    aligned_segments = build_event_aligned_segments(
+        df=df,
+        events=events,
+        columns=columns,
+        window_before_minutes=WINDOW_BEFORE,
+        window_after_minutes=WINDOW_AFTER,
+        resample_minutes=1,
+    )
+
+    if aligned_segments.empty:
+        print("⚠ Les segments alignés sont vides (période manquante ?).")
+        return
+
+    output_path = plot_event_composite(
+        aligned_segments=aligned_segments,
+        variables=variables,
+        output_path=OUTPUT_PATH,
+        quantiles=(0.2, 0.8),
+        baseline_label="Début de la pluie",
+    )
+
+    print(f"✔ Graphique composite pluie sauvegardé : {output_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/plot_rolling_correlation_heatmap.py
+++ b/scripts/plot_rolling_correlation_heatmap.py
@@ -0,0 +1,65 @@
+# scripts/plot_rolling_correlation_heatmap.py
+from __future__ import annotations
+
+from pathlib import Path
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import VARIABLES_BY_KEY
+from meteo.analysis import compute_rolling_correlations_for_pairs
+from meteo.plots import plot_rolling_correlation_heatmap
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+OUTPUT_PATH = Path("figures/rolling_correlations/rolling_correlation_heatmap.png")
+
+ROLLING_PAIRS: list[tuple[str, str]] = [
+    ("temperature", "humidity"),
+    ("pressure", "rain_rate"),
+    ("pressure", "wind_speed"),
+    ("illuminance", "temperature"),
+    ("humidity", "rain_rate"),
+]
+
+WINDOW_MINUTES = 180  # 3 heures pour observer les tendances synoptiques
+STEP_MINUTES = 30     # on n'échantillonne qu'un point sur 30 minutes
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+    print()
+
+    pairs = [(VARIABLES_BY_KEY[a], VARIABLES_BY_KEY[b]) for a, b in ROLLING_PAIRS]
+
+    rolling_df = compute_rolling_correlations_for_pairs(
+        df=df,
+        pairs=pairs,
+        window_minutes=WINDOW_MINUTES,
+        min_valid_fraction=0.7,
+        step_minutes=STEP_MINUTES,
+        method="pearson",
+    )
+
+    if rolling_df.empty:
+        print("⚠ Impossible de calculer les corrélations glissantes (données insuffisantes).")
+        return
+
+    output_path = plot_rolling_correlation_heatmap(
+        rolling_corr=rolling_df,
+        output_path=OUTPUT_PATH,
+        cmap="coolwarm",
+        vmin=-1.0,
+        vmax=1.0,
+    )
+
+    print(f"✔ Heatmap de corrélations glissantes enregistrée : {output_path}")
+
+
+if __name__ == "__main__":
+    main()