Premières analyses de corrélation

2025-11-17 14:59:59 +01:00
parent 62a928ec85
commit 5a546688f1
36 changed files with 631 additions and 0 deletions
--- a/scripts/plot_all_pairwise_scatter.py
+++ b/scripts/plot_all_pairwise_scatter.py
@@ -0,0 +1,45 @@
+# scripts/plot_all_pairwise_scatter.py
+from __future__ import annotations
+
+from pathlib import Path
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import iter_variable_pairs
+from meteo.plots import plot_scatter_pair
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+OUTPUT_DIR = Path("figures/pairwise_scatter")
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+
+    pairs = iter_variable_pairs()
+    print(f"Nombre de paires de variables : {len(pairs)}")
+
+    for var_x, var_y in pairs:
+        filename = f"scatter_{var_x.key}_vs_{var_y.key}.png"
+        output_path = OUTPUT_DIR / filename
+
+        print(f"→ Trace {var_y.key} en fonction de {var_x.key} → {output_path}")
+        plot_scatter_pair(
+            df=df,
+            var_x=var_x,
+            var_y=var_y,
+            output_path=output_path,
+            sample_step=10,  # un point sur 10 : ≈ 32k points au lieu de 320k
+        )
+
+    print("✔ Tous les graphiques de nuages de points ont été générés.")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/plot_correlation_heatmap.py
+++ b/scripts/plot_correlation_heatmap.py
@@ -0,0 +1,45 @@
+# scripts/plot_correlation_heatmap.py
+from __future__ import annotations
+
+from pathlib import Path
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import VARIABLES
+from meteo.analysis import compute_correlation_matrix_for_variables
+from meteo.plots import plot_correlation_heatmap
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+OUTPUT_PATH = Path("figures/correlation_heatmap.png")
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        print("   Assurez-vous d'avoir généré le dataset minuté.")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+    print()
+
+    corr = compute_correlation_matrix_for_variables(df, VARIABLES, method="pearson")
+
+    print("Matrice de corrélation (aperçu) :")
+    print(corr)
+    print()
+
+    output_path = plot_correlation_heatmap(
+        corr=corr,
+        variables=VARIABLES,
+        output_path=OUTPUT_PATH,
+        annotate=True,
+    )
+
+    print(f"✔ Heatmap de corrélation sauvegardée dans : {output_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/plot_lagged_correlations.py
+++ b/scripts/plot_lagged_correlations.py
@@ -0,0 +1,69 @@
+# scripts/plot_lagged_correlations.py
+from __future__ import annotations
+
+from pathlib import Path
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import VARIABLES_BY_KEY
+from meteo.analysis import compute_lagged_correlation
+from meteo.plots import plot_lagged_correlation
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+OUTPUT_DIR = Path("figures/lagged_correlations")
+
+
+# Paires à analyser (clé de variable X, clé de variable Y)
+# Convention : X précède potentiellement Y
+INTERESTING_PAIRS: list[tuple[str, str]] = [
+    ("temperature", "humidity"),
+    ("temperature", "rain_rate"),
+    ("pressure", "rain_rate"),
+    ("pressure", "wind_speed"),
+    ("pressure", "illuminance"),
+    ("illuminance", "temperature"),
+    ("humidity", "rain_rate"),
+]
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+    print()
+
+    for key_x, key_y in INTERESTING_PAIRS:
+        var_x = VARIABLES_BY_KEY[key_x]
+        var_y = VARIABLES_BY_KEY[key_y]
+
+        print(f"→ Corrélation décalée : {var_x.key} → {var_y.key}")
+
+        lag_df = compute_lagged_correlation(
+            df=df,
+            var_x=var_x,
+            var_y=var_y,
+            max_lag_minutes=360,   # ± 6 heures
+            step_minutes=10,       # pas de 10 minutes
+            method="pearson",
+        )
+
+        filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png"
+        output_path = OUTPUT_DIR / filename
+
+        plot_lagged_correlation(
+            lag_df=lag_df,
+            var_x=var_x,
+            var_y=var_y,
+            output_path=output_path,
+        )
+
+    print("✔ Graphiques de corrélation décalée générés.")
+
+
+if __name__ == "__main__":
+    main()