Réorganisation
BIN
docs/04 - Corrélations binaires/figures/correlation_heatmap.png
Normal file
|
After Width: | Height: | Size: 127 KiB |
|
After Width: | Height: | Size: 497 KiB |
|
After Width: | Height: | Size: 432 KiB |
|
After Width: | Height: | Size: 83 KiB |
|
After Width: | Height: | Size: 610 KiB |
|
After Width: | Height: | Size: 587 KiB |
|
After Width: | Height: | Size: 379 KiB |
|
After Width: | Height: | Size: 235 KiB |
|
After Width: | Height: | Size: 364 KiB |
|
After Width: | Height: | Size: 351 KiB |
|
After Width: | Height: | Size: 390 KiB |
|
After Width: | Height: | Size: 102 KiB |
|
After Width: | Height: | Size: 505 KiB |
|
After Width: | Height: | Size: 479 KiB |
|
After Width: | Height: | Size: 325 KiB |
|
After Width: | Height: | Size: 88 KiB |
|
After Width: | Height: | Size: 111 KiB |
|
After Width: | Height: | Size: 108 KiB |
|
After Width: | Height: | Size: 97 KiB |
|
After Width: | Height: | Size: 445 KiB |
|
After Width: | Height: | Size: 430 KiB |
|
After Width: | Height: | Size: 363 KiB |
|
After Width: | Height: | Size: 92 KiB |
|
After Width: | Height: | Size: 512 KiB |
|
After Width: | Height: | Size: 471 KiB |
|
After Width: | Height: | Size: 340 KiB |
|
After Width: | Height: | Size: 574 KiB |
|
After Width: | Height: | Size: 403 KiB |
|
After Width: | Height: | Size: 201 KiB |
72
docs/04 - Corrélations binaires/index.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Corrélations binaires
|
||||
|
||||
Cette étape regroupe l'ensemble des scripts dédiés aux corrélations et comparaisons directes entre variables.
|
||||
Chaque figure déposée dans `figures/` possède son CSV compagnon exporté dans le dossier `data/` au même emplacement.
|
||||
|
||||
```shell
|
||||
python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py"
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## Heatmap globale
|
||||
|
||||
```shell
|
||||
python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py"
|
||||
```
|
||||
|
||||

|
||||
@@ -0,0 +1,52 @@
|
||||
# scripts/plot_all_pairwise_scatter.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import iter_variable_pairs
|
||||
from meteo.plots import plot_scatter_pair
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
DOC_DIR = Path(__file__).resolve().parent.parent
|
||||
OUTPUT_DIR = DOC_DIR / "figures" / "pairwise_scatter"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
|
||||
pairs = iter_variable_pairs()
|
||||
print(f"Nombre de paires de variables : {len(pairs)}")
|
||||
|
||||
for var_x, var_y in pairs:
|
||||
filename = f"scatter_{var_x.key}_vs_{var_y.key}.png"
|
||||
output_path = OUTPUT_DIR / filename
|
||||
|
||||
print(f"→ Trace {var_y.key} en fonction de {var_x.key} → {output_path}")
|
||||
plot_scatter_pair(
|
||||
df=df,
|
||||
var_x=var_x,
|
||||
var_y=var_y,
|
||||
output_path=output_path,
|
||||
sample_step=10, # un point sur 10 : ≈ 32k points au lieu de 320k
|
||||
)
|
||||
|
||||
print("✔ Tous les graphiques de nuages de points ont été générés.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,52 @@
|
||||
# scripts/plot_correlation_heatmap.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import VARIABLES
|
||||
from meteo.analysis import compute_correlation_matrix_for_variables
|
||||
from meteo.plots import plot_correlation_heatmap
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
DOC_DIR = Path(__file__).resolve().parent.parent
|
||||
OUTPUT_PATH = DOC_DIR / "figures" / "correlation_heatmap.png"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
corr = compute_correlation_matrix_for_variables(df, VARIABLES, method="pearson")
|
||||
|
||||
print("Matrice de corrélation (aperçu) :")
|
||||
print(corr)
|
||||
print()
|
||||
|
||||
output_path = plot_correlation_heatmap(
|
||||
corr=corr,
|
||||
variables=VARIABLES,
|
||||
output_path=OUTPUT_PATH,
|
||||
annotate=True,
|
||||
)
|
||||
|
||||
print(f"✔ Heatmap de corrélation sauvegardée dans : {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
96
docs/04 - Corrélations binaires/scripts/plot_correlations.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# scripts/plot_correlations.py
|
||||
"""Produit les nuages de points ciblés entre variables sélectionnées."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from typing import Sequence
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import Variable, VARIABLES, VARIABLES_BY_KEY, iter_variable_pairs
|
||||
from meteo.plots import plot_scatter_pair
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
DOC_DIR = Path(__file__).resolve().parent.parent
|
||||
SCATTER_DIR = DOC_DIR / "figures" / "pairwise_scatter"
|
||||
|
||||
|
||||
def _select_variables(keys: Sequence[str] | None) -> list[Variable]:
|
||||
if not keys:
|
||||
return list(VARIABLES)
|
||||
try:
|
||||
selected = [VARIABLES_BY_KEY[key] for key in keys]
|
||||
except KeyError as exc:
|
||||
raise SystemExit(f"Variable inconnue : {exc.args[0]!r}.") from exc
|
||||
return selected
|
||||
|
||||
|
||||
def _generate_pairwise_scatter(
|
||||
df: pd.DataFrame,
|
||||
variables: Sequence[Variable],
|
||||
*,
|
||||
sample_step: int,
|
||||
) -> None:
|
||||
pairs = iter_variable_pairs()
|
||||
selected = [(vx, vy) for vx, vy in pairs if vx in variables and vy in variables]
|
||||
if not selected:
|
||||
print("⚠ Aucun couple sélectionné pour les nuages de points.")
|
||||
return
|
||||
|
||||
SCATTER_DIR.mkdir(parents=True, exist_ok=True)
|
||||
for var_x, var_y in selected:
|
||||
output_path = SCATTER_DIR / f"scatter_{var_x.key}_vs_{var_y.key}.png"
|
||||
print(f"→ Scatter {var_y.key} vs {var_x.key}")
|
||||
plot_scatter_pair(df, var_x=var_x, var_y=var_y, output_path=output_path, sample_step=sample_step)
|
||||
print(f"✔ {len(selected)} nuage(s) de points généré(s) dans {SCATTER_DIR}.")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description="Génère des nuages de points pairwise entre variables.")
|
||||
parser.add_argument(
|
||||
"--dataset",
|
||||
type=Path,
|
||||
default=CSV_PATH,
|
||||
help="Dataset à utiliser (par défaut : data/weather_minutely.csv).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--variables",
|
||||
nargs="*",
|
||||
help="Restreint l'analyse à certaines clés de variables.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--scatter-step",
|
||||
type=int,
|
||||
default=20,
|
||||
help="Pas d'échantillonnage pour les nuages de points individuels.",
|
||||
)
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
dataset_path = args.dataset
|
||||
if not dataset_path.exists():
|
||||
raise SystemExit(f"Dataset introuvable : {dataset_path}")
|
||||
|
||||
df = load_raw_csv(dataset_path)
|
||||
print(f"Dataset chargé : {dataset_path} ({len(df)} lignes)")
|
||||
print()
|
||||
|
||||
variables = _select_variables(args.variables)
|
||||
|
||||
_generate_pairwise_scatter(df, variables, sample_step=args.scatter_step)
|
||||
print("✔ Terminé.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main())
|
||||