1

Réorganisation

This commit is contained in:
2025-11-19 17:01:45 +01:00
parent 566d4400ce
commit 617b12c02e
91 changed files with 874 additions and 1715 deletions

View File

@@ -0,0 +1,52 @@
# scripts/plot_all_pairwise_scatter.py
from __future__ import annotations
from pathlib import Path
import sys
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv
from meteo.variables import iter_variable_pairs
from meteo.plots import plot_scatter_pair
CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent
OUTPUT_DIR = DOC_DIR / "figures" / "pairwise_scatter"
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
pairs = iter_variable_pairs()
print(f"Nombre de paires de variables : {len(pairs)}")
for var_x, var_y in pairs:
filename = f"scatter_{var_x.key}_vs_{var_y.key}.png"
output_path = OUTPUT_DIR / filename
print(f"→ Trace {var_y.key} en fonction de {var_x.key}{output_path}")
plot_scatter_pair(
df=df,
var_x=var_x,
var_y=var_y,
output_path=output_path,
sample_step=10, # un point sur 10 : ≈ 32k points au lieu de 320k
)
print("✔ Tous les graphiques de nuages de points ont été générés.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,52 @@
# scripts/plot_correlation_heatmap.py
from __future__ import annotations
from pathlib import Path
import sys
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES
from meteo.analysis import compute_correlation_matrix_for_variables
from meteo.plots import plot_correlation_heatmap
CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent
OUTPUT_PATH = DOC_DIR / "figures" / "correlation_heatmap.png"
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
print(" Assurez-vous d'avoir généré le dataset minuté.")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
corr = compute_correlation_matrix_for_variables(df, VARIABLES, method="pearson")
print("Matrice de corrélation (aperçu) :")
print(corr)
print()
output_path = plot_correlation_heatmap(
corr=corr,
variables=VARIABLES,
output_path=OUTPUT_PATH,
annotate=True,
)
print(f"✔ Heatmap de corrélation sauvegardée dans : {output_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,96 @@
# scripts/plot_correlations.py
"""Produit les nuages de points ciblés entre variables sélectionnées."""
from __future__ import annotations
import argparse
from pathlib import Path
import sys
from typing import Sequence
import pandas as pd
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv
from meteo.variables import Variable, VARIABLES, VARIABLES_BY_KEY, iter_variable_pairs
from meteo.plots import plot_scatter_pair
CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent
SCATTER_DIR = DOC_DIR / "figures" / "pairwise_scatter"
def _select_variables(keys: Sequence[str] | None) -> list[Variable]:
if not keys:
return list(VARIABLES)
try:
selected = [VARIABLES_BY_KEY[key] for key in keys]
except KeyError as exc:
raise SystemExit(f"Variable inconnue : {exc.args[0]!r}.") from exc
return selected
def _generate_pairwise_scatter(
df: pd.DataFrame,
variables: Sequence[Variable],
*,
sample_step: int,
) -> None:
pairs = iter_variable_pairs()
selected = [(vx, vy) for vx, vy in pairs if vx in variables and vy in variables]
if not selected:
print("⚠ Aucun couple sélectionné pour les nuages de points.")
return
SCATTER_DIR.mkdir(parents=True, exist_ok=True)
for var_x, var_y in selected:
output_path = SCATTER_DIR / f"scatter_{var_x.key}_vs_{var_y.key}.png"
print(f"→ Scatter {var_y.key} vs {var_x.key}")
plot_scatter_pair(df, var_x=var_x, var_y=var_y, output_path=output_path, sample_step=sample_step)
print(f"{len(selected)} nuage(s) de points généré(s) dans {SCATTER_DIR}.")
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Génère des nuages de points pairwise entre variables.")
parser.add_argument(
"--dataset",
type=Path,
default=CSV_PATH,
help="Dataset à utiliser (par défaut : data/weather_minutely.csv).",
)
parser.add_argument(
"--variables",
nargs="*",
help="Restreint l'analyse à certaines clés de variables.",
)
parser.add_argument(
"--scatter-step",
type=int,
default=20,
help="Pas d'échantillonnage pour les nuages de points individuels.",
)
args = parser.parse_args(argv)
dataset_path = args.dataset
if not dataset_path.exists():
raise SystemExit(f"Dataset introuvable : {dataset_path}")
df = load_raw_csv(dataset_path)
print(f"Dataset chargé : {dataset_path} ({len(df)} lignes)")
print()
variables = _select_variables(args.variables)
_generate_pairwise_scatter(df, variables, sample_step=args.scatter_step)
print("✔ Terminé.")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())