Affiner les heatmaps de corrélation et l'annotation des lags
This commit is contained in:
parent
a36157b52f
commit
2ff719107b
Binary file not shown.
|
After Width: | Height: | Size: 112 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 114 KiB |
@ -122,12 +122,12 @@ python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py"
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Matrices de corrélation
|
## Matrices de corrélation (instantané, signé)
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py"
|
python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py" --transform absolute --upper-only
|
||||||
```
|
```
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|||||||
@ -4,6 +4,9 @@ from __future__ import annotations
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
|
import argparse
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||||
@ -20,7 +23,6 @@ CSV_PATH = Path("data/weather_minutely.csv")
|
|||||||
DOC_DIR = Path(__file__).resolve().parent.parent
|
DOC_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
CORRELATION_METHODS: tuple[str, ...] = ("pearson", "spearman")
|
CORRELATION_METHODS: tuple[str, ...] = ("pearson", "spearman")
|
||||||
CORRELATION_TRANSFORM = "square"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@ -36,13 +38,19 @@ class HeatmapConfig:
|
|||||||
HEATMAP_CONFIGS: dict[str, HeatmapConfig] = {
|
HEATMAP_CONFIGS: dict[str, HeatmapConfig] = {
|
||||||
"pearson": HeatmapConfig(
|
"pearson": HeatmapConfig(
|
||||||
filename="correlation_heatmap.png",
|
filename="correlation_heatmap.png",
|
||||||
title="Corrélations R² (coef. de Pearson)",
|
title="Corrélations (coef. de Pearson)",
|
||||||
colorbar_label="Coefficient de corrélation R²",
|
colorbar_label="Coefficient de corrélation",
|
||||||
|
cmap="viridis",
|
||||||
|
vmin=0.0,
|
||||||
|
vmax=1.0,
|
||||||
),
|
),
|
||||||
"spearman": HeatmapConfig(
|
"spearman": HeatmapConfig(
|
||||||
filename="correlation_heatmap_spearman.png",
|
filename="correlation_heatmap_spearman.png",
|
||||||
title="Corrélations R² (coef. de Spearman)",
|
title="Corrélations (coef. de Spearman)",
|
||||||
colorbar_label="Coefficient de corrélation R²",
|
colorbar_label="Coefficient de corrélation",
|
||||||
|
cmap="viridis",
|
||||||
|
vmin=0.0,
|
||||||
|
vmax=1.0,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,36 +71,92 @@ def _get_heatmap_config(method: str) -> HeatmapConfig:
|
|||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Trace des matrices de corrélation instantanées (signées, absolues ou r²).")
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
type=Path,
|
||||||
|
default=DOC_DIR / "figures",
|
||||||
|
help="Dossier de sortie pour les heatmaps.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--transform",
|
||||||
|
choices=["identity", "absolute", "square"],
|
||||||
|
default="absolute",
|
||||||
|
help="Transformation de la matrice (signée, |r| ou r²). Par défaut : |r|.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--upper-only",
|
||||||
|
action="store_true",
|
||||||
|
help="Masque la partie inférieure de la matrice pour alléger la lecture.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not CSV_PATH.exists():
|
if not CSV_PATH.exists():
|
||||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||||
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
||||||
return
|
return
|
||||||
|
|
||||||
df = load_raw_csv(CSV_PATH)
|
df = load_raw_csv(CSV_PATH)
|
||||||
|
df = df[[v.column for v in VARIABLES]]
|
||||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||||
print(f" Lignes : {len(df)}")
|
print(f" Lignes : {len(df)}")
|
||||||
print(f" Colonnes : {list(df.columns)}")
|
print(f" Colonnes : {list(df.columns)}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
transform = args.transform
|
||||||
matrices = compute_correlation_matrices_for_methods(
|
matrices = compute_correlation_matrices_for_methods(
|
||||||
df=df,
|
df=df,
|
||||||
variables=VARIABLES,
|
variables=VARIABLES,
|
||||||
methods=CORRELATION_METHODS,
|
methods=CORRELATION_METHODS,
|
||||||
transform=CORRELATION_TRANSFORM,
|
transform=transform,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
for method, corr in matrices.items():
|
for method, corr in matrices.items():
|
||||||
print(f"Matrice de corrélation (méthode={method}, transform={CORRELATION_TRANSFORM}) :")
|
if args.upper_only:
|
||||||
|
mask = np.tril(np.ones_like(corr, dtype=bool), k=-1)
|
||||||
|
corr = corr.mask(mask)
|
||||||
|
|
||||||
|
print(f"Matrice de corrélation (méthode={method}, transform={transform}) :")
|
||||||
print(corr)
|
print(corr)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
config = _get_heatmap_config(method)
|
config = _get_heatmap_config(method)
|
||||||
|
filename = config.filename
|
||||||
|
title = config.title
|
||||||
|
if transform == "absolute":
|
||||||
|
title = f"{title} (|r|)"
|
||||||
|
stem, suffix = filename.rsplit(".", 1)
|
||||||
|
filename = f"{stem}_abs.{suffix}"
|
||||||
|
elif transform == "square":
|
||||||
|
title = f"{title} (r²)"
|
||||||
|
stem, suffix = filename.rsplit(".", 1)
|
||||||
|
filename = f"{stem}_r2.{suffix}"
|
||||||
|
config = HeatmapConfig(
|
||||||
|
filename=filename,
|
||||||
|
title=title,
|
||||||
|
colorbar_label="Coefficient de corrélation r²",
|
||||||
|
cmap="viridis",
|
||||||
|
vmin=0.0,
|
||||||
|
vmax=1.0,
|
||||||
|
)
|
||||||
|
elif transform == "identity":
|
||||||
|
config = HeatmapConfig(
|
||||||
|
filename=filename,
|
||||||
|
title=title,
|
||||||
|
colorbar_label="Coefficient de corrélation r",
|
||||||
|
cmap="coolwarm",
|
||||||
|
vmin=-1.0,
|
||||||
|
vmax=1.0,
|
||||||
|
)
|
||||||
|
|
||||||
output_path = plot_correlation_heatmap(
|
output_path = plot_correlation_heatmap(
|
||||||
corr=corr,
|
corr=corr,
|
||||||
variables=VARIABLES,
|
variables=VARIABLES,
|
||||||
output_path=DOC_DIR / "figures" / config.filename,
|
output_path=args.output_dir / filename,
|
||||||
annotate=True,
|
annotate=True,
|
||||||
title=config.title,
|
title=title,
|
||||||
cmap=config.cmap,
|
cmap=config.cmap,
|
||||||
vmin=config.vmin,
|
vmin=config.vmin,
|
||||||
vmax=config.vmax,
|
vmax=config.vmax,
|
||||||
|
|||||||
Binary file not shown.
|
After Width: | Height: | Size: 108 KiB |
@ -6,19 +6,83 @@
|
|||||||
python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py"
|
python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py"
|
||||||
```
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Interprétation
|
||||||
|
|
||||||
|
Ces graphiques sont désormais en corrélation signée (par défaut) et tracent des zones colorées symétriques pour les relations négatives/positives. Ils mettent en évidence la force, le sens et le décalage temporel entre deux variables.
|
||||||
|
|
||||||
|
Ainsi, il est clair qu'il existe une corrélation forte et immédiate entre la température et l'humidité relative.
|
||||||
|
La corrélation entre température et luminance est également forte, mais elle est décalée : le pic de luminance précède le pic de température de près de deux heures.
|
||||||
|
|
||||||
|
Comme ces deux corrélations sont fortes, on en déduit que l'on devrait logiquement observer une corrélation forte et décalée entre la luminance et l'humidité relative (et c'est bien le cas, comme en témoigne le graphique correspondant).
|
||||||
|
Un rapport similaire, quoique moins prononcé, existe entre la température, l'humidité relative et l'élévation solaire, puisque la luminance en dépend directement.
|
||||||
|
|
||||||
|
### Matrices de corrélation avec lag optimal par paire (depuis les CSV)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlation_heatmap_from_data.py"
|
||||||
|
```
|
||||||
|
|
||||||
|
Cette heatmap est construite à partir des CSV `data/lagged_correlations`, en prenant pour chaque paire ordonnée (X → Y) le r maximal en |r| et le lag associé. Les lags (en minutes, signe conservé, y compris 0) sont annotés uniquement pour les corrélations d'intensité au moins « Modérée » (|r| ≥ 0,3) ; les PNG sont écrits dans `figures/` et les matrices correspondantes (`correlation_matrix_lagged.csv`, `lag_matrix_minutes.csv`) dans `data/`.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Cette matrice met en évidence les plus fortes corrélations et leur décalage optimal.
|
||||||
|
|
||||||
## Corrélations glissantes
|
## Corrélations glissantes
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,191 @@
|
|||||||
|
# scripts/plot_lagged_correlation_heatmap_from_data.py
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||||
|
if str(PROJECT_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||||||
|
|
||||||
|
from meteo.variables import VARIABLES, VARIABLES_BY_KEY
|
||||||
|
from meteo.plots import plot_correlation_heatmap
|
||||||
|
|
||||||
|
|
||||||
|
DATA_DIR = Path("docs/05 - Corrélations binaires avancées/data/lagged_correlations")
|
||||||
|
FIG_DIR = Path("docs/05 - Corrélations binaires avancées/figures")
|
||||||
|
DATA_OUTPUT_DIR = Path("docs/05 - Corrélations binaires avancées/data")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_best_corr_and_lag(csv_path: Path) -> tuple[float, int, bool] | None:
|
||||||
|
"""Retourne (corr, lag, sign_known) au max |r| à partir d'un CSV de lagcorr."""
|
||||||
|
|
||||||
|
df = pd.read_csv(csv_path)
|
||||||
|
|
||||||
|
if "Pearson" in df.columns:
|
||||||
|
series = df["Pearson"]
|
||||||
|
sign_known = True
|
||||||
|
elif "correlation" in df.columns:
|
||||||
|
series = df["correlation"]
|
||||||
|
sign_known = True
|
||||||
|
elif "Pearson |r|" in df.columns:
|
||||||
|
series = df["Pearson |r|"]
|
||||||
|
sign_known = False
|
||||||
|
elif "Pearson (r²)" in df.columns:
|
||||||
|
series = (df["Pearson (r²)"].abs()) ** 0.5
|
||||||
|
sign_known = False
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
abs_series = series.abs()
|
||||||
|
if abs_series.empty or abs_series.isna().all():
|
||||||
|
return None
|
||||||
|
|
||||||
|
idx = abs_series.idxmax()
|
||||||
|
best_corr = series.iloc[idx]
|
||||||
|
best_lag = int(df.loc[idx, "lag_minutes"])
|
||||||
|
# Si pas de signe, on retourne un corr positif (le signe pourra être posé via l'inverse)
|
||||||
|
if not sign_known:
|
||||||
|
best_corr = abs(best_corr)
|
||||||
|
return best_corr, best_lag, sign_known
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pair_best(
|
||||||
|
vx_key: str,
|
||||||
|
vy_key: str,
|
||||||
|
*,
|
||||||
|
data_dir: Path,
|
||||||
|
) -> tuple[float, int, bool, str] | None:
|
||||||
|
"""
|
||||||
|
Retourne (corr, lag, sign_known, source) pour la paire ordonnée vx->vy.
|
||||||
|
Si le CSV direct ne contient pas le signe (|r| ou r²), tente de l'inférer
|
||||||
|
à partir du CSV inverse (vy->vx) en inversant le lag.
|
||||||
|
"""
|
||||||
|
primary = data_dir / f"lagcorr_{vx_key}_to_{vy_key}.csv"
|
||||||
|
reverse = data_dir / f"lagcorr_{vy_key}_to_{vx_key}.csv"
|
||||||
|
|
||||||
|
primary_res = _load_best_corr_and_lag(primary) if primary.exists() else None
|
||||||
|
if primary_res and primary_res[2]:
|
||||||
|
corr, lag, sign_known = primary_res
|
||||||
|
return corr, lag, sign_known, primary.name
|
||||||
|
|
||||||
|
# Tentative d'inférence via le CSV inverse
|
||||||
|
reverse_res = _load_best_corr_and_lag(reverse) if reverse.exists() else None
|
||||||
|
if primary_res and reverse_res and reverse_res[2]:
|
||||||
|
primary_corr, _, _ = primary_res
|
||||||
|
rev_corr, rev_lag, _ = reverse_res
|
||||||
|
corr = primary_corr if primary_res[2] else abs(primary_corr) * (1 if rev_corr >= 0 else -1)
|
||||||
|
lag = -rev_lag
|
||||||
|
return corr, lag, True, f"{primary.name} (signe/lag inférés depuis {reverse.name})"
|
||||||
|
|
||||||
|
if primary_res:
|
||||||
|
corr, lag, sign_known = primary_res
|
||||||
|
return corr, lag, sign_known, primary.name if primary.exists() else "n/a"
|
||||||
|
|
||||||
|
if reverse_res:
|
||||||
|
rev_corr, rev_lag, sign_known = reverse_res
|
||||||
|
corr = rev_corr
|
||||||
|
lag = -rev_lag
|
||||||
|
return corr, lag, sign_known, reverse.name
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Construit des matrices depuis les CSV lagcorr_* existants (max |r| par paire ordonnée)."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-dir",
|
||||||
|
type=Path,
|
||||||
|
default=DATA_DIR,
|
||||||
|
help="Dossier contenant les CSV lagcorr_*_to_*.csv.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--fig-dir",
|
||||||
|
type=Path,
|
||||||
|
default=FIG_DIR,
|
||||||
|
help="Dossier de sortie pour la heatmap.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-output-dir",
|
||||||
|
type=Path,
|
||||||
|
default=DATA_OUTPUT_DIR,
|
||||||
|
help="Dossier de sortie pour les matrices CSV exportées.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--annot-threshold",
|
||||||
|
type=float,
|
||||||
|
default=0.3,
|
||||||
|
help="N'affiche le lag annoté que si |r| >= ce seuil (0 pour tout afficher).",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
args.fig_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
args.data_output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
columns = [v.column for v in VARIABLES]
|
||||||
|
corr_matrix = pd.DataFrame(index=columns, columns=columns, dtype=float)
|
||||||
|
lag_matrix = pd.DataFrame(index=columns, columns=columns, dtype=int)
|
||||||
|
|
||||||
|
missing_files: list[str] = []
|
||||||
|
sign_unknown: list[str] = []
|
||||||
|
|
||||||
|
for vx in VARIABLES:
|
||||||
|
for vy in VARIABLES:
|
||||||
|
if vx == vy:
|
||||||
|
corr_matrix.loc[vx.column, vy.column] = 1.0
|
||||||
|
lag_matrix.loc[vx.column, vy.column] = 0
|
||||||
|
continue
|
||||||
|
res = _get_pair_best(vx.key, vy.key, data_dir=args.data_dir)
|
||||||
|
if res is None:
|
||||||
|
missing_files.append(f"{vx.key}→{vy.key}")
|
||||||
|
corr_matrix.loc[vx.column, vy.column] = float("nan")
|
||||||
|
lag_matrix.loc[vx.column, vy.column] = 0
|
||||||
|
continue
|
||||||
|
|
||||||
|
best_corr, best_lag, sign_known, source = res
|
||||||
|
if not sign_known:
|
||||||
|
sign_unknown.append(f"{vx.key}→{vy.key} (source={source})")
|
||||||
|
corr_matrix.loc[vx.column, vy.column] = best_corr
|
||||||
|
lag_matrix.loc[vx.column, vy.column] = best_lag
|
||||||
|
|
||||||
|
if missing_files:
|
||||||
|
print("⚠ CSV manquants pour certaines paires :", ", ".join(missing_files))
|
||||||
|
if sign_unknown:
|
||||||
|
print("⚠ Signe inconnu (CSV en |r| ou r²) pour :", ", ".join(sign_unknown))
|
||||||
|
|
||||||
|
# Heatmap signée (valeurs positives lorsque le signe est absent des CSV)
|
||||||
|
output_path = args.fig_dir / "correlation_heatmap_lagged.png"
|
||||||
|
annot_df = lag_matrix.copy()
|
||||||
|
if args.annot_threshold > 0:
|
||||||
|
mask = corr_matrix.abs() < args.annot_threshold
|
||||||
|
annot_df = annot_df.mask(mask, "")
|
||||||
|
plot_correlation_heatmap(
|
||||||
|
corr=corr_matrix,
|
||||||
|
variables=VARIABLES,
|
||||||
|
output_path=output_path,
|
||||||
|
annotate=True,
|
||||||
|
annotate_values=annot_df,
|
||||||
|
title="Corrélations (lag optimal par paire, issues des CSV)",
|
||||||
|
cmap="coolwarm",
|
||||||
|
vmin=-1.0,
|
||||||
|
vmax=1.0,
|
||||||
|
colorbar_label="Coefficient de corrélation r",
|
||||||
|
)
|
||||||
|
print(f"✔ Heatmap laggée sauvegardée dans : {output_path}")
|
||||||
|
|
||||||
|
corr_csv = args.data_output_dir / "correlation_matrix_lagged.csv"
|
||||||
|
corr_matrix.to_csv(corr_csv)
|
||||||
|
print(f"✔ Matrice des corrélations exportée : {corr_csv}")
|
||||||
|
|
||||||
|
lag_csv = args.data_output_dir / "lag_matrix_minutes.csv"
|
||||||
|
lag_matrix.to_csv(lag_csv)
|
||||||
|
print(f"✔ Matrice des lags exportée : {lag_csv}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -4,16 +4,21 @@ from __future__ import annotations
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||||
if str(PROJECT_ROOT) not in sys.path:
|
if str(PROJECT_ROOT) not in sys.path:
|
||||||
sys.path.insert(0, str(PROJECT_ROOT))
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||||||
|
|
||||||
from meteo.dataset import load_raw_csv
|
from meteo.dataset import load_raw_csv
|
||||||
from meteo.variables import VARIABLES_BY_KEY
|
from meteo.variables import VARIABLES, VARIABLES_BY_KEY
|
||||||
from meteo.analysis import compute_lagged_correlation
|
from meteo.analysis import compute_lagged_correlation
|
||||||
from meteo.plots import plot_lagged_correlation
|
from meteo.plots import plot_lagged_correlation_multi
|
||||||
from meteo.correlation_presets import DEFAULT_LAGGED_PAIRS
|
from meteo.correlation_presets import (
|
||||||
|
DEFAULT_ABS_CORRELATION_BANDS,
|
||||||
|
DEFAULT_SIGNED_CORRELATION_BANDS,
|
||||||
|
CorrelationBand,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
CSV_PATH = Path("data/weather_minutely.csv")
|
CSV_PATH = Path("data/weather_minutely.csv")
|
||||||
@ -22,39 +27,143 @@ OUTPUT_DIR = DOC_DIR / "figures" / "lagged_correlations"
|
|||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Trace les corrélations décalées pour toutes les paires de variables.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-lag",
|
||||||
|
type=int,
|
||||||
|
default=720,
|
||||||
|
help="Décalage maximal en minutes (par défaut : 720 = ±12h).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--step",
|
||||||
|
type=int,
|
||||||
|
default=10,
|
||||||
|
help="Pas en minutes pour l'évaluation des lags (par défaut : 10).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--resample",
|
||||||
|
default="none",
|
||||||
|
help="Fréquence d'agrégation avant calcul (par défaut : 'none') Exemple : '10min'.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--signed",
|
||||||
|
action="store_true",
|
||||||
|
default=True,
|
||||||
|
help="Affiche les corrélations signées. Utiliser --no-signed pour revenir aux valeurs absolues |r|.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-signed",
|
||||||
|
action="store_false",
|
||||||
|
dest="signed",
|
||||||
|
help="Alias explicite pour repasser en valeurs absolues |r|.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--thresholds",
|
||||||
|
default=None,
|
||||||
|
help=(
|
||||||
|
"Seuils personnalisés (valeurs séparées par des virgules). "
|
||||||
|
"Par défaut, utilise les bandes définies dans meteo.correlation_presets "
|
||||||
|
"(abs ou signées selon --signed)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--only",
|
||||||
|
nargs="*",
|
||||||
|
help="Clés de variables à inclure (par défaut toutes les variables numériques).",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not CSV_PATH.exists():
|
if not CSV_PATH.exists():
|
||||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||||
return
|
return
|
||||||
|
|
||||||
df = load_raw_csv(CSV_PATH)
|
df = load_raw_csv(CSV_PATH)
|
||||||
|
df = df.select_dtypes(include="number")
|
||||||
|
if args.resample and args.resample.lower() != "none":
|
||||||
|
df = df.resample(args.resample).mean()
|
||||||
|
print(f"Dataset rééchantillonné à {args.resample} pour accélérer le calcul.")
|
||||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||||
print(f" Lignes : {len(df)}")
|
print(f" Lignes : {len(df)}")
|
||||||
print(f" Colonnes : {list(df.columns)}")
|
print(f" Colonnes : {list(df.columns)}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
for key_x, key_y in DEFAULT_LAGGED_PAIRS:
|
if args.only:
|
||||||
var_x = VARIABLES_BY_KEY[key_x]
|
missing = [k for k in args.only if k not in VARIABLES_BY_KEY]
|
||||||
var_y = VARIABLES_BY_KEY[key_y]
|
if missing:
|
||||||
|
raise KeyError(f"Variables inconnues : {', '.join(missing)}")
|
||||||
|
variables = [VARIABLES_BY_KEY[k] for k in args.only]
|
||||||
|
else:
|
||||||
|
variables = list(VARIABLES)
|
||||||
|
pairs = [(vx, vy) for i, vx in enumerate(variables) for vy in variables[i + 1 :]]
|
||||||
|
print(f"Paires analysées : {len(pairs)} (combinaisons uniques, sans inverses).")
|
||||||
|
|
||||||
|
for var_x, var_y in pairs:
|
||||||
print(f"→ Corrélation décalée : {var_x.key} → {var_y.key}")
|
print(f"→ Corrélation décalée : {var_x.key} → {var_y.key}")
|
||||||
|
|
||||||
lag_df = compute_lagged_correlation(
|
lag_df_pearson = compute_lagged_correlation(
|
||||||
df=df,
|
df=df,
|
||||||
var_x=var_x,
|
var_x=var_x,
|
||||||
var_y=var_y,
|
var_y=var_y,
|
||||||
max_lag_minutes=360, # ± 6 heures
|
max_lag_minutes=args.max_lag,
|
||||||
step_minutes=10, # pas de 10 minutes
|
step_minutes=args.step,
|
||||||
method="pearson",
|
method="pearson",
|
||||||
)
|
)["correlation"]
|
||||||
|
|
||||||
|
lag_df_spearman = compute_lagged_correlation(
|
||||||
|
df=df,
|
||||||
|
var_x=var_x,
|
||||||
|
var_y=var_y,
|
||||||
|
max_lag_minutes=args.max_lag,
|
||||||
|
step_minutes=args.step,
|
||||||
|
method="spearman",
|
||||||
|
)["correlation"]
|
||||||
|
|
||||||
|
use_abs = not args.signed
|
||||||
|
if use_abs:
|
||||||
|
lag_df_pearson = lag_df_pearson.abs()
|
||||||
|
lag_df_spearman = lag_df_spearman.abs()
|
||||||
|
ylabel = "Corrélation (|r|)"
|
||||||
|
labels = {"Pearson": "Pearson |r|", "Spearman": "Spearman |r|"}
|
||||||
|
y_limits = (0.0, 1.0)
|
||||||
|
threshold_values = (
|
||||||
|
[
|
||||||
|
float(t)
|
||||||
|
for t in (args.thresholds or "").split(",")
|
||||||
|
if t.strip() != ""
|
||||||
|
]
|
||||||
|
if args.thresholds
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
bands = list(DEFAULT_ABS_CORRELATION_BANDS)
|
||||||
|
else:
|
||||||
|
ylabel = "Corrélation"
|
||||||
|
labels = {"Pearson": "Pearson", "Spearman": "Spearman"}
|
||||||
|
y_limits = (-1.0, 1.0)
|
||||||
|
threshold_values: list[float] = []
|
||||||
|
if args.thresholds:
|
||||||
|
threshold_values = [
|
||||||
|
float(t)
|
||||||
|
for t in args.thresholds.split(",")
|
||||||
|
if t.strip() != ""
|
||||||
|
]
|
||||||
|
threshold_values = sorted({v for thr in threshold_values for v in (thr, -thr)})
|
||||||
|
bands = list(DEFAULT_SIGNED_CORRELATION_BANDS)
|
||||||
|
|
||||||
filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png"
|
filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png"
|
||||||
output_path = OUTPUT_DIR / filename
|
output_path = OUTPUT_DIR / filename
|
||||||
|
|
||||||
plot_lagged_correlation(
|
plot_lagged_correlation_multi(
|
||||||
lag_df=lag_df,
|
lag_series={
|
||||||
|
labels["Pearson"]: lag_df_pearson,
|
||||||
|
labels["Spearman"]: lag_df_spearman,
|
||||||
|
},
|
||||||
var_x=var_x,
|
var_x=var_x,
|
||||||
var_y=var_y,
|
var_y=var_y,
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
|
ylabel=ylabel,
|
||||||
|
y_limits=y_limits,
|
||||||
|
thresholds=threshold_values,
|
||||||
|
bands=bands,
|
||||||
)
|
)
|
||||||
|
|
||||||
print("✔ Graphiques de corrélation décalée générés.")
|
print("✔ Graphiques de corrélation décalée générés.")
|
||||||
|
|||||||
@ -23,6 +23,35 @@ HEXBIN_REDUCE_LABELS: dict[str, str] = {
|
|||||||
"max": "maximum",
|
"max": "maximum",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CorrelationBand:
|
||||||
|
"""Intervalle de corrélation, avec couleur et étiquette pour l'affichage."""
|
||||||
|
|
||||||
|
min_value: float
|
||||||
|
max_value: float
|
||||||
|
label: str
|
||||||
|
color: str
|
||||||
|
|
||||||
|
|
||||||
|
# Repères par défaut pour lire rapidement l'intensité (en |r|)
|
||||||
|
DEFAULT_ABS_CORRELATION_BANDS: Sequence[CorrelationBand] = (
|
||||||
|
CorrelationBand(0.0, 0.1, "Quasi nulle", "#f6f6f6"),
|
||||||
|
CorrelationBand(0.1, 0.3, "Faible", "#dce8f7"),
|
||||||
|
CorrelationBand(0.3, 0.5, "Modérée", "#c8e6c9"),
|
||||||
|
CorrelationBand(0.5, 1.0, "Forte", "#ffe0b2"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bandes pour corrélations signées (symétriques autour de 0)
|
||||||
|
DEFAULT_SIGNED_CORRELATION_BANDS: Sequence[CorrelationBand] = (
|
||||||
|
CorrelationBand(-1.0, -0.5, "Forte négative", "#c6dbef"),
|
||||||
|
CorrelationBand(-0.5, -0.3, "Modérée négative", "#deebf7"),
|
||||||
|
CorrelationBand(-0.3, -0.1, "Faible négative", "#edf8fb"),
|
||||||
|
CorrelationBand(-0.1, 0.1, "Quasi nulle", "#f5f5f5"),
|
||||||
|
CorrelationBand(0.1, 0.3, "Faible positive", "#fff7ec"),
|
||||||
|
CorrelationBand(0.3, 0.5, "Modérée positive", "#fee8c8"),
|
||||||
|
CorrelationBand(0.5, 1.0, "Forte positive", "#fdbb84"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class HexbinScenario:
|
class HexbinScenario:
|
||||||
@ -116,4 +145,7 @@ __all__ = [
|
|||||||
"DEFAULT_LAGGED_PAIRS",
|
"DEFAULT_LAGGED_PAIRS",
|
||||||
"DEFAULT_ROLLING_PAIRS",
|
"DEFAULT_ROLLING_PAIRS",
|
||||||
"DEFAULT_HEXBIN_SCENARIOS",
|
"DEFAULT_HEXBIN_SCENARIOS",
|
||||||
|
"CorrelationBand",
|
||||||
|
"DEFAULT_ABS_CORRELATION_BANDS",
|
||||||
|
"DEFAULT_SIGNED_CORRELATION_BANDS",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -13,7 +13,9 @@ from .calendar_overview import (
|
|||||||
from .correlations import (
|
from .correlations import (
|
||||||
plot_correlation_heatmap,
|
plot_correlation_heatmap,
|
||||||
plot_lagged_correlation,
|
plot_lagged_correlation,
|
||||||
|
plot_lagged_correlation_multi,
|
||||||
plot_rolling_correlation_heatmap,
|
plot_rolling_correlation_heatmap,
|
||||||
|
CorrelationBand,
|
||||||
)
|
)
|
||||||
from .rain import plot_daily_rainfall_hyetograph, plot_rainfall_by_season
|
from .rain import plot_daily_rainfall_hyetograph, plot_rainfall_by_season
|
||||||
from .relationships import (
|
from .relationships import (
|
||||||
@ -55,7 +57,9 @@ __all__ = [
|
|||||||
"rainfall_daily_total_series",
|
"rainfall_daily_total_series",
|
||||||
"plot_correlation_heatmap",
|
"plot_correlation_heatmap",
|
||||||
"plot_lagged_correlation",
|
"plot_lagged_correlation",
|
||||||
|
"plot_lagged_correlation_multi",
|
||||||
"plot_rolling_correlation_heatmap",
|
"plot_rolling_correlation_heatmap",
|
||||||
|
"CorrelationBand",
|
||||||
"plot_daily_rainfall_hyetograph",
|
"plot_daily_rainfall_hyetograph",
|
||||||
"plot_rainfall_by_season",
|
"plot_rainfall_by_season",
|
||||||
"plot_event_composite",
|
"plot_event_composite",
|
||||||
|
|||||||
@ -3,16 +3,23 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Sequence
|
from typing import Iterable, Sequence
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from meteo.correlation_presets import CorrelationBand
|
||||||
from .base import export_plot_dataset
|
from .base import export_plot_dataset
|
||||||
from meteo.variables import Variable
|
from meteo.variables import Variable
|
||||||
|
|
||||||
__all__ = ['plot_lagged_correlation', 'plot_correlation_heatmap', 'plot_rolling_correlation_heatmap']
|
__all__ = [
|
||||||
|
'plot_lagged_correlation',
|
||||||
|
'plot_lagged_correlation_multi',
|
||||||
|
'plot_correlation_heatmap',
|
||||||
|
'plot_rolling_correlation_heatmap',
|
||||||
|
'CorrelationBand',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def plot_lagged_correlation(
|
def plot_lagged_correlation(
|
||||||
@ -42,13 +49,89 @@ def plot_lagged_correlation(
|
|||||||
|
|
||||||
return output_path.resolve()
|
return output_path.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_lagged_correlation_multi(
|
||||||
|
lag_series: dict[str, pd.Series],
|
||||||
|
var_x: Variable,
|
||||||
|
var_y: Variable,
|
||||||
|
output_path: str | Path,
|
||||||
|
*,
|
||||||
|
title_suffix: str | None = None,
|
||||||
|
ylabel: str = "Corrélation",
|
||||||
|
y_limits: tuple[float, float] | None = None,
|
||||||
|
thresholds: Sequence[float] | None = None,
|
||||||
|
bands: Iterable["CorrelationBand"] | None = None,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Trace plusieurs courbes de corrélation en fonction du lag (ex. Pearson/Spearman).
|
||||||
|
"""
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
df = pd.concat(lag_series, axis=1)
|
||||||
|
export_plot_dataset(df, output_path)
|
||||||
|
|
||||||
|
plt.figure()
|
||||||
|
colors = ["#1f77b4", "#d1495b", "#2ca02c", "#9467bd"]
|
||||||
|
for idx, (label, series) in enumerate(df.items()):
|
||||||
|
plt.plot(series.index, series, label=label, color=colors[idx % len(colors)], linewidth=1.6)
|
||||||
|
|
||||||
|
ax = plt.gca()
|
||||||
|
if bands:
|
||||||
|
xmin, xmax = df.index.min(), df.index.max()
|
||||||
|
for band in bands:
|
||||||
|
ax.axhspan(band.min_value, band.max_value, color=band.color, alpha=0.25, zorder=0)
|
||||||
|
ax.text(
|
||||||
|
xmax,
|
||||||
|
(band.min_value + band.max_value) / 2.0,
|
||||||
|
band.label,
|
||||||
|
ha="right",
|
||||||
|
va="center",
|
||||||
|
fontsize=8,
|
||||||
|
color="#444444",
|
||||||
|
bbox=dict(facecolor="white", edgecolor="none", alpha=0.6, pad=1.5),
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.axvline(0, linestyle="--", color="#666666", linewidth=1.0)
|
||||||
|
plt.xlabel("Décalage (minutes)\n(lag > 0 : X précède Y)")
|
||||||
|
plt.ylabel(ylabel)
|
||||||
|
title = f"Corrélation décalée : {var_x.label} → {var_y.label}"
|
||||||
|
if title_suffix:
|
||||||
|
title = f"{title} ({title_suffix})"
|
||||||
|
plt.title(title)
|
||||||
|
if thresholds:
|
||||||
|
xmin, xmax = plt.xlim()
|
||||||
|
for thr in thresholds:
|
||||||
|
plt.axhline(thr, color="#999999", linestyle="--", linewidth=1.0, alpha=0.85)
|
||||||
|
plt.text(
|
||||||
|
xmax,
|
||||||
|
thr,
|
||||||
|
f"{thr:.2f}",
|
||||||
|
ha="right",
|
||||||
|
va="center",
|
||||||
|
fontsize=8,
|
||||||
|
color="#555555",
|
||||||
|
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=1.5),
|
||||||
|
)
|
||||||
|
if y_limits is not None:
|
||||||
|
plt.ylim(*y_limits)
|
||||||
|
plt.grid(True, alpha=0.7)
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_path, dpi=150)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return output_path.resolve()
|
||||||
|
|
||||||
def plot_correlation_heatmap(
|
def plot_correlation_heatmap(
|
||||||
corr: pd.DataFrame,
|
corr: pd.DataFrame,
|
||||||
variables: Sequence[Variable],
|
variables: Sequence[Variable],
|
||||||
output_path: str | Path,
|
output_path: str | Path,
|
||||||
*,
|
*,
|
||||||
annotate: bool = True,
|
annotate: bool = True,
|
||||||
|
annotate_values: "pd.DataFrame | None" = None,
|
||||||
title: str | None = None,
|
title: str | None = None,
|
||||||
|
figsize: tuple[float, float] | None = None,
|
||||||
cmap: str | None = None,
|
cmap: str | None = None,
|
||||||
vmin: float | None = None,
|
vmin: float | None = None,
|
||||||
vmax: float | None = None,
|
vmax: float | None = None,
|
||||||
@ -89,7 +172,13 @@ def plot_correlation_heatmap(
|
|||||||
|
|
||||||
data = corr.to_numpy()
|
data = corr.to_numpy()
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
if figsize is None:
|
||||||
|
n = len(variables)
|
||||||
|
# Augmente la taille pour laisser respirer les annotations
|
||||||
|
side = max(6.0, n * 0.9)
|
||||||
|
figsize = (side, side)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=figsize)
|
||||||
if vmin is None:
|
if vmin is None:
|
||||||
vmin = -1.0
|
vmin = -1.0
|
||||||
if vmax is None:
|
if vmax is None:
|
||||||
@ -117,6 +206,11 @@ def plot_correlation_heatmap(
|
|||||||
# Annotation des cases
|
# Annotation des cases
|
||||||
if annotate:
|
if annotate:
|
||||||
n = data.shape[0]
|
n = data.shape[0]
|
||||||
|
annot_data = (
|
||||||
|
annotate_values.loc[columns, columns].to_numpy()
|
||||||
|
if annotate_values is not None
|
||||||
|
else data
|
||||||
|
)
|
||||||
norm = im.norm
|
norm = im.norm
|
||||||
cmap_obj = im.cmap
|
cmap_obj = im.cmap
|
||||||
|
|
||||||
@ -128,18 +222,23 @@ def plot_correlation_heatmap(
|
|||||||
|
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
for j in range(n):
|
for j in range(n):
|
||||||
val = data[i, j]
|
val_corr = data[i, j]
|
||||||
|
val_annot = annot_data[i, j]
|
||||||
if i == j:
|
if i == j:
|
||||||
text = "—"
|
text = "—"
|
||||||
elif np.isnan(val):
|
elif isinstance(val_annot, (float, int, np.floating)) and np.isnan(val_annot):
|
||||||
text = ""
|
text = ""
|
||||||
else:
|
else:
|
||||||
text = f"{val:.2f}"
|
# si annotate_values est fourni, on affiche la valeur annotée brute
|
||||||
|
if annotate_values is not None:
|
||||||
|
text = str(val_annot)
|
||||||
|
else:
|
||||||
|
text = f"{val_corr:.2f}"
|
||||||
|
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
color = _text_color(0.0 if np.isnan(val) else val)
|
color = _text_color(0.0 if np.isnan(val_corr) else val_corr)
|
||||||
ax.text(
|
ax.text(
|
||||||
j,
|
j,
|
||||||
i,
|
i,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user