Réorganisation

2025-11-19 17:01:45 +01:00
parent 566d4400ce
commit 617b12c02e
91 changed files with 874 additions and 1715 deletions
--- a/binaires/scripts/plot_all_pairwise_scatter.py
+++ b/binaires/scripts/plot_all_pairwise_scatter.py
@@ -0,0 +1,52 @@
+# scripts/plot_all_pairwise_scatter.py
+from __future__ import annotations
+
+from pathlib import Path
+import sys
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import iter_variable_pairs
+from meteo.plots import plot_scatter_pair
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+DOC_DIR = Path(__file__).resolve().parent.parent
+OUTPUT_DIR = DOC_DIR / "figures" / "pairwise_scatter"
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+
+    pairs = iter_variable_pairs()
+    print(f"Nombre de paires de variables : {len(pairs)}")
+
+    for var_x, var_y in pairs:
+        filename = f"scatter_{var_x.key}_vs_{var_y.key}.png"
+        output_path = OUTPUT_DIR / filename
+
+        print(f"→ Trace {var_y.key} en fonction de {var_x.key} → {output_path}")
+        plot_scatter_pair(
+            df=df,
+            var_x=var_x,
+            var_y=var_y,
+            output_path=output_path,
+            sample_step=10,  # un point sur 10 : ≈ 32k points au lieu de 320k
+        )
+
+    print("✔ Tous les graphiques de nuages de points ont été générés.")
+
+
+if __name__ == "__main__":
+    main()
--- a/binaires/scripts/plot_correlation_heatmap.py
+++ b/binaires/scripts/plot_correlation_heatmap.py
@@ -0,0 +1,52 @@
+# scripts/plot_correlation_heatmap.py
+from __future__ import annotations
+
+from pathlib import Path
+import sys
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import VARIABLES
+from meteo.analysis import compute_correlation_matrix_for_variables
+from meteo.plots import plot_correlation_heatmap
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+DOC_DIR = Path(__file__).resolve().parent.parent
+OUTPUT_PATH = DOC_DIR / "figures" / "correlation_heatmap.png"
+
+
+def main() -> None:
+    if not CSV_PATH.exists():
+        print(f"⚠ Fichier introuvable : {CSV_PATH}")
+        print("   Assurez-vous d'avoir généré le dataset minuté.")
+        return
+
+    df = load_raw_csv(CSV_PATH)
+    print(f"Dataset minuté chargé : {CSV_PATH}")
+    print(f"  Lignes   : {len(df)}")
+    print(f"  Colonnes : {list(df.columns)}")
+    print()
+
+    corr = compute_correlation_matrix_for_variables(df, VARIABLES, method="pearson")
+
+    print("Matrice de corrélation (aperçu) :")
+    print(corr)
+    print()
+
+    output_path = plot_correlation_heatmap(
+        corr=corr,
+        variables=VARIABLES,
+        output_path=OUTPUT_PATH,
+        annotate=True,
+    )
+
+    print(f"✔ Heatmap de corrélation sauvegardée dans : {output_path}")
+
+
+if __name__ == "__main__":
+    main()
--- a/binaires/scripts/plot_correlations.py
+++ b/binaires/scripts/plot_correlations.py
@@ -0,0 +1,96 @@
+# scripts/plot_correlations.py
+"""Produit les nuages de points ciblés entre variables sélectionnées."""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+import sys
+from typing import Sequence
+
+import pandas as pd
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from meteo.dataset import load_raw_csv
+from meteo.variables import Variable, VARIABLES, VARIABLES_BY_KEY, iter_variable_pairs
+from meteo.plots import plot_scatter_pair
+
+
+CSV_PATH = Path("data/weather_minutely.csv")
+DOC_DIR = Path(__file__).resolve().parent.parent
+SCATTER_DIR = DOC_DIR / "figures" / "pairwise_scatter"
+
+
+def _select_variables(keys: Sequence[str] | None) -> list[Variable]:
+    if not keys:
+        return list(VARIABLES)
+    try:
+        selected = [VARIABLES_BY_KEY[key] for key in keys]
+    except KeyError as exc:
+        raise SystemExit(f"Variable inconnue : {exc.args[0]!r}.") from exc
+    return selected
+
+
+def _generate_pairwise_scatter(
+    df: pd.DataFrame,
+    variables: Sequence[Variable],
+    *,
+    sample_step: int,
+) -> None:
+    pairs = iter_variable_pairs()
+    selected = [(vx, vy) for vx, vy in pairs if vx in variables and vy in variables]
+    if not selected:
+        print("⚠ Aucun couple sélectionné pour les nuages de points.")
+        return
+
+    SCATTER_DIR.mkdir(parents=True, exist_ok=True)
+    for var_x, var_y in selected:
+        output_path = SCATTER_DIR / f"scatter_{var_x.key}_vs_{var_y.key}.png"
+        print(f"→ Scatter {var_y.key} vs {var_x.key}")
+        plot_scatter_pair(df, var_x=var_x, var_y=var_y, output_path=output_path, sample_step=sample_step)
+    print(f"✔ {len(selected)} nuage(s) de points généré(s) dans {SCATTER_DIR}.")
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Génère des nuages de points pairwise entre variables.")
+    parser.add_argument(
+        "--dataset",
+        type=Path,
+        default=CSV_PATH,
+        help="Dataset à utiliser (par défaut : data/weather_minutely.csv).",
+    )
+    parser.add_argument(
+        "--variables",
+        nargs="*",
+        help="Restreint l'analyse à certaines clés de variables.",
+    )
+    parser.add_argument(
+        "--scatter-step",
+        type=int,
+        default=20,
+        help="Pas d'échantillonnage pour les nuages de points individuels.",
+    )
+
+    args = parser.parse_args(argv)
+
+    dataset_path = args.dataset
+    if not dataset_path.exists():
+        raise SystemExit(f"Dataset introuvable : {dataset_path}")
+
+    df = load_raw_csv(dataset_path)
+    print(f"Dataset chargé : {dataset_path} ({len(df)} lignes)")
+    print()
+
+    variables = _select_variables(args.variables)
+
+    _generate_pairwise_scatter(df, variables, sample_step=args.scatter_step)
+    print("✔ Terminé.")
+    return 0
+
+
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())