1

Affiner les heatmaps de corrélation et l'annotation des lags

This commit is contained in:
Richard Dern 2025-11-21 01:46:06 +01:00
parent a36157b52f
commit 2ff719107b
11 changed files with 599 additions and 36 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

View File

@ -122,12 +122,12 @@ python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py"
![](figures/pairwise_scatter/scatter_wind_speed_vs_wind_direction.png) ![](figures/pairwise_scatter/scatter_wind_speed_vs_wind_direction.png)
## Matrices de corrélation ## Matrices de corrélation (instantané, signé)
```shell ```shell
python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py" python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py" --transform absolute --upper-only
``` ```
![](figures/correlation_heatmap.png) ![](figures/correlation_heatmap_abs.png)
![](figures/correlation_heatmap_spearman.png) ![](figures/correlation_heatmap_spearman_abs.png)

View File

@ -4,6 +4,9 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
import sys import sys
import argparse
import pandas as pd
import numpy as np
PROJECT_ROOT = Path(__file__).resolve().parents[3] PROJECT_ROOT = Path(__file__).resolve().parents[3]
@ -20,7 +23,6 @@ CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent DOC_DIR = Path(__file__).resolve().parent.parent
CORRELATION_METHODS: tuple[str, ...] = ("pearson", "spearman") CORRELATION_METHODS: tuple[str, ...] = ("pearson", "spearman")
CORRELATION_TRANSFORM = "square"
@dataclass(frozen=True) @dataclass(frozen=True)
@ -36,13 +38,19 @@ class HeatmapConfig:
HEATMAP_CONFIGS: dict[str, HeatmapConfig] = { HEATMAP_CONFIGS: dict[str, HeatmapConfig] = {
"pearson": HeatmapConfig( "pearson": HeatmapConfig(
filename="correlation_heatmap.png", filename="correlation_heatmap.png",
title="Corrélations R² (coef. de Pearson)", title="Corrélations (coef. de Pearson)",
colorbar_label="Coefficient de corrélation R²", colorbar_label="Coefficient de corrélation",
cmap="viridis",
vmin=0.0,
vmax=1.0,
), ),
"spearman": HeatmapConfig( "spearman": HeatmapConfig(
filename="correlation_heatmap_spearman.png", filename="correlation_heatmap_spearman.png",
title="Corrélations R² (coef. de Spearman)", title="Corrélations (coef. de Spearman)",
colorbar_label="Coefficient de corrélation R²", colorbar_label="Coefficient de corrélation",
cmap="viridis",
vmin=0.0,
vmax=1.0,
), ),
} }
@ -63,36 +71,92 @@ def _get_heatmap_config(method: str) -> HeatmapConfig:
def main() -> None: def main() -> None:
parser = argparse.ArgumentParser(description="Trace des matrices de corrélation instantanées (signées, absolues ou r²).")
parser.add_argument(
"--output-dir",
type=Path,
default=DOC_DIR / "figures",
help="Dossier de sortie pour les heatmaps.",
)
parser.add_argument(
"--transform",
choices=["identity", "absolute", "square"],
default="absolute",
help="Transformation de la matrice (signée, |r| ou r²). Par défaut : |r|.",
)
parser.add_argument(
"--upper-only",
action="store_true",
help="Masque la partie inférieure de la matrice pour alléger la lecture.",
)
args = parser.parse_args()
if not CSV_PATH.exists(): if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}") print(f"⚠ Fichier introuvable : {CSV_PATH}")
print(" Assurez-vous d'avoir généré le dataset minuté.") print(" Assurez-vous d'avoir généré le dataset minuté.")
return return
df = load_raw_csv(CSV_PATH) df = load_raw_csv(CSV_PATH)
df = df[[v.column for v in VARIABLES]]
print(f"Dataset minuté chargé : {CSV_PATH}") print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}") print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}") print(f" Colonnes : {list(df.columns)}")
print() print()
transform = args.transform
matrices = compute_correlation_matrices_for_methods( matrices = compute_correlation_matrices_for_methods(
df=df, df=df,
variables=VARIABLES, variables=VARIABLES,
methods=CORRELATION_METHODS, methods=CORRELATION_METHODS,
transform=CORRELATION_TRANSFORM, transform=transform,
) )
args.output_dir.mkdir(parents=True, exist_ok=True)
for method, corr in matrices.items(): for method, corr in matrices.items():
print(f"Matrice de corrélation (méthode={method}, transform={CORRELATION_TRANSFORM}) :") if args.upper_only:
mask = np.tril(np.ones_like(corr, dtype=bool), k=-1)
corr = corr.mask(mask)
print(f"Matrice de corrélation (méthode={method}, transform={transform}) :")
print(corr) print(corr)
print() print()
config = _get_heatmap_config(method) config = _get_heatmap_config(method)
filename = config.filename
title = config.title
if transform == "absolute":
title = f"{title} (|r|)"
stem, suffix = filename.rsplit(".", 1)
filename = f"{stem}_abs.{suffix}"
elif transform == "square":
title = f"{title} (r²)"
stem, suffix = filename.rsplit(".", 1)
filename = f"{stem}_r2.{suffix}"
config = HeatmapConfig(
filename=filename,
title=title,
colorbar_label="Coefficient de corrélation r²",
cmap="viridis",
vmin=0.0,
vmax=1.0,
)
elif transform == "identity":
config = HeatmapConfig(
filename=filename,
title=title,
colorbar_label="Coefficient de corrélation r",
cmap="coolwarm",
vmin=-1.0,
vmax=1.0,
)
output_path = plot_correlation_heatmap( output_path = plot_correlation_heatmap(
corr=corr, corr=corr,
variables=VARIABLES, variables=VARIABLES,
output_path=DOC_DIR / "figures" / config.filename, output_path=args.output_dir / filename,
annotate=True, annotate=True,
title=config.title, title=title,
cmap=config.cmap, cmap=config.cmap,
vmin=config.vmin, vmin=config.vmin,
vmax=config.vmax, vmax=config.vmax,

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

View File

@ -6,19 +6,83 @@
python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py" python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py"
``` ```
![](figures/lagged_correlations/lagcorr_temperature_to_humidity.png)
![](figures/lagged_correlations/lagcorr_temperature_to_pressure.png)
![](figures/lagged_correlations/lagcorr_temperature_to_rain_rate.png)
![](figures/lagged_correlations/lagcorr_temperature_to_illuminance.png)
![](figures/lagged_correlations/lagcorr_temperature_to_wind_speed.png)
![](figures/lagged_correlations/lagcorr_temperature_to_wind_direction.png)
![](figures/lagged_correlations/lagcorr_temperature_to_sun_elevation.png)
![](figures/lagged_correlations/lagcorr_humidity_to_pressure.png)
![](figures/lagged_correlations/lagcorr_humidity_to_rain_rate.png) ![](figures/lagged_correlations/lagcorr_humidity_to_rain_rate.png)
![](figures/lagged_correlations/lagcorr_illuminance_to_temperature.png) ![](figures/lagged_correlations/lagcorr_humidity_to_illuminance.png)
![](figures/lagged_correlations/lagcorr_pressure_to_illuminance.png) ![](figures/lagged_correlations/lagcorr_humidity_to_wind_speed.png)
![](figures/lagged_correlations/lagcorr_humidity_to_wind_direction.png)
![](figures/lagged_correlations/lagcorr_humidity_to_sun_elevation.png)
![](figures/lagged_correlations/lagcorr_pressure_to_rain_rate.png) ![](figures/lagged_correlations/lagcorr_pressure_to_rain_rate.png)
![](figures/lagged_correlations/lagcorr_pressure_to_illuminance.png)
![](figures/lagged_correlations/lagcorr_pressure_to_wind_speed.png) ![](figures/lagged_correlations/lagcorr_pressure_to_wind_speed.png)
![](figures/lagged_correlations/lagcorr_temperature_to_humidity.png) ![](figures/lagged_correlations/lagcorr_pressure_to_wind_direction.png)
![](figures/lagged_correlations/lagcorr_temperature_to_rain_rate.png) ![](figures/lagged_correlations/lagcorr_pressure_to_sun_elevation.png)
![](figures/lagged_correlations/lagcorr_rain_rate_to_illuminance.png)
![](figures/lagged_correlations/lagcorr_rain_rate_to_wind_speed.png)
![](figures/lagged_correlations/lagcorr_rain_rate_to_wind_direction.png)
![](figures/lagged_correlations/lagcorr_rain_rate_to_sun_elevation.png)
![](figures/lagged_correlations/lagcorr_illuminance_to_wind_speed.png)
![](figures/lagged_correlations/lagcorr_illuminance_to_wind_direction.png)
![](figures/lagged_correlations/lagcorr_illuminance_to_sun_elevation.png)
![](figures/lagged_correlations/lagcorr_wind_speed_to_wind_direction.png)
![](figures/lagged_correlations/lagcorr_wind_speed_to_sun_elevation.png)
![](figures/lagged_correlations/lagcorr_wind_direction_to_sun_elevation.png)
### Interprétation
Ces graphiques sont désormais en corrélation signée (par défaut) et tracent des zones colorées symétriques pour les relations négatives/positives. Ils mettent en évidence la force, le sens et le décalage temporel entre deux variables.
Ainsi, il est clair qu'il existe une corrélation forte et immédiate entre la température et l'humidité relative.
La corrélation entre température et luminance est également forte, mais elle est décalée : le pic de luminance précède le pic de température de près de deux heures.
Comme ces deux corrélations sont fortes, on en déduit que l'on devrait logiquement observer une corrélation forte et décalée entre la luminance et l'humidité relative (et c'est bien le cas, comme en témoigne le graphique correspondant).
Un rapport similaire, quoique moins prononcé, existe entre la température, l'humidité relative et l'élévation solaire, puisque la luminance en dépend directement.
### Matrices de corrélation avec lag optimal par paire (depuis les CSV)
```shell
python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlation_heatmap_from_data.py"
```
Cette heatmap est construite à partir des CSV `data/lagged_correlations`, en prenant pour chaque paire ordonnée (X → Y) le r maximal en |r| et le lag associé. Les lags (en minutes, signe conservé, y compris 0) sont annotés uniquement pour les corrélations d'intensité au moins « Modérée » (|r| ≥ 0,3) ; les PNG sont écrits dans `figures/` et les matrices correspondantes (`correlation_matrix_lagged.csv`, `lag_matrix_minutes.csv`) dans `data/`.
![](figures/correlation_heatmap_lagged.png)
Cette matrice met en évidence les plus fortes corrélations et leur décalage optimal.
## Corrélations glissantes ## Corrélations glissantes

View File

@ -0,0 +1,191 @@
# scripts/plot_lagged_correlation_heatmap_from_data.py
from __future__ import annotations
from pathlib import Path
import sys
import argparse
import pandas as pd
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.variables import VARIABLES, VARIABLES_BY_KEY
from meteo.plots import plot_correlation_heatmap
DATA_DIR = Path("docs/05 - Corrélations binaires avancées/data/lagged_correlations")
FIG_DIR = Path("docs/05 - Corrélations binaires avancées/figures")
DATA_OUTPUT_DIR = Path("docs/05 - Corrélations binaires avancées/data")
def _load_best_corr_and_lag(csv_path: Path) -> tuple[float, int, bool] | None:
"""Retourne (corr, lag, sign_known) au max |r| à partir d'un CSV de lagcorr."""
df = pd.read_csv(csv_path)
if "Pearson" in df.columns:
series = df["Pearson"]
sign_known = True
elif "correlation" in df.columns:
series = df["correlation"]
sign_known = True
elif "Pearson |r|" in df.columns:
series = df["Pearson |r|"]
sign_known = False
elif "Pearson (r²)" in df.columns:
series = (df["Pearson (r²)"].abs()) ** 0.5
sign_known = False
else:
return None
abs_series = series.abs()
if abs_series.empty or abs_series.isna().all():
return None
idx = abs_series.idxmax()
best_corr = series.iloc[idx]
best_lag = int(df.loc[idx, "lag_minutes"])
# Si pas de signe, on retourne un corr positif (le signe pourra être posé via l'inverse)
if not sign_known:
best_corr = abs(best_corr)
return best_corr, best_lag, sign_known
def _get_pair_best(
vx_key: str,
vy_key: str,
*,
data_dir: Path,
) -> tuple[float, int, bool, str] | None:
"""
Retourne (corr, lag, sign_known, source) pour la paire ordonnée vx->vy.
Si le CSV direct ne contient pas le signe (|r| ou ), tente de l'inférer
à partir du CSV inverse (vy->vx) en inversant le lag.
"""
primary = data_dir / f"lagcorr_{vx_key}_to_{vy_key}.csv"
reverse = data_dir / f"lagcorr_{vy_key}_to_{vx_key}.csv"
primary_res = _load_best_corr_and_lag(primary) if primary.exists() else None
if primary_res and primary_res[2]:
corr, lag, sign_known = primary_res
return corr, lag, sign_known, primary.name
# Tentative d'inférence via le CSV inverse
reverse_res = _load_best_corr_and_lag(reverse) if reverse.exists() else None
if primary_res and reverse_res and reverse_res[2]:
primary_corr, _, _ = primary_res
rev_corr, rev_lag, _ = reverse_res
corr = primary_corr if primary_res[2] else abs(primary_corr) * (1 if rev_corr >= 0 else -1)
lag = -rev_lag
return corr, lag, True, f"{primary.name} (signe/lag inférés depuis {reverse.name})"
if primary_res:
corr, lag, sign_known = primary_res
return corr, lag, sign_known, primary.name if primary.exists() else "n/a"
if reverse_res:
rev_corr, rev_lag, sign_known = reverse_res
corr = rev_corr
lag = -rev_lag
return corr, lag, sign_known, reverse.name
return None
def main() -> None:
parser = argparse.ArgumentParser(
description="Construit des matrices depuis les CSV lagcorr_* existants (max |r| par paire ordonnée)."
)
parser.add_argument(
"--data-dir",
type=Path,
default=DATA_DIR,
help="Dossier contenant les CSV lagcorr_*_to_*.csv.",
)
parser.add_argument(
"--fig-dir",
type=Path,
default=FIG_DIR,
help="Dossier de sortie pour la heatmap.",
)
parser.add_argument(
"--data-output-dir",
type=Path,
default=DATA_OUTPUT_DIR,
help="Dossier de sortie pour les matrices CSV exportées.",
)
parser.add_argument(
"--annot-threshold",
type=float,
default=0.3,
help="N'affiche le lag annoté que si |r| >= ce seuil (0 pour tout afficher).",
)
args = parser.parse_args()
args.fig_dir.mkdir(parents=True, exist_ok=True)
args.data_output_dir.mkdir(parents=True, exist_ok=True)
columns = [v.column for v in VARIABLES]
corr_matrix = pd.DataFrame(index=columns, columns=columns, dtype=float)
lag_matrix = pd.DataFrame(index=columns, columns=columns, dtype=int)
missing_files: list[str] = []
sign_unknown: list[str] = []
for vx in VARIABLES:
for vy in VARIABLES:
if vx == vy:
corr_matrix.loc[vx.column, vy.column] = 1.0
lag_matrix.loc[vx.column, vy.column] = 0
continue
res = _get_pair_best(vx.key, vy.key, data_dir=args.data_dir)
if res is None:
missing_files.append(f"{vx.key}{vy.key}")
corr_matrix.loc[vx.column, vy.column] = float("nan")
lag_matrix.loc[vx.column, vy.column] = 0
continue
best_corr, best_lag, sign_known, source = res
if not sign_known:
sign_unknown.append(f"{vx.key}{vy.key} (source={source})")
corr_matrix.loc[vx.column, vy.column] = best_corr
lag_matrix.loc[vx.column, vy.column] = best_lag
if missing_files:
print("⚠ CSV manquants pour certaines paires :", ", ".join(missing_files))
if sign_unknown:
print("⚠ Signe inconnu (CSV en |r| ou r²) pour :", ", ".join(sign_unknown))
# Heatmap signée (valeurs positives lorsque le signe est absent des CSV)
output_path = args.fig_dir / "correlation_heatmap_lagged.png"
annot_df = lag_matrix.copy()
if args.annot_threshold > 0:
mask = corr_matrix.abs() < args.annot_threshold
annot_df = annot_df.mask(mask, "")
plot_correlation_heatmap(
corr=corr_matrix,
variables=VARIABLES,
output_path=output_path,
annotate=True,
annotate_values=annot_df,
title="Corrélations (lag optimal par paire, issues des CSV)",
cmap="coolwarm",
vmin=-1.0,
vmax=1.0,
colorbar_label="Coefficient de corrélation r",
)
print(f"✔ Heatmap laggée sauvegardée dans : {output_path}")
corr_csv = args.data_output_dir / "correlation_matrix_lagged.csv"
corr_matrix.to_csv(corr_csv)
print(f"✔ Matrice des corrélations exportée : {corr_csv}")
lag_csv = args.data_output_dir / "lag_matrix_minutes.csv"
lag_matrix.to_csv(lag_csv)
print(f"✔ Matrice des lags exportée : {lag_csv}")
if __name__ == "__main__":
main()

View File

@ -4,16 +4,21 @@ from __future__ import annotations
from pathlib import Path from pathlib import Path
import sys import sys
import argparse
PROJECT_ROOT = Path(__file__).resolve().parents[3] PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path: if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT)) sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY from meteo.variables import VARIABLES, VARIABLES_BY_KEY
from meteo.analysis import compute_lagged_correlation from meteo.analysis import compute_lagged_correlation
from meteo.plots import plot_lagged_correlation from meteo.plots import plot_lagged_correlation_multi
from meteo.correlation_presets import DEFAULT_LAGGED_PAIRS from meteo.correlation_presets import (
DEFAULT_ABS_CORRELATION_BANDS,
DEFAULT_SIGNED_CORRELATION_BANDS,
CorrelationBand,
)
CSV_PATH = Path("data/weather_minutely.csv") CSV_PATH = Path("data/weather_minutely.csv")
@ -22,39 +27,143 @@ OUTPUT_DIR = DOC_DIR / "figures" / "lagged_correlations"
def main() -> None: def main() -> None:
parser = argparse.ArgumentParser(description="Trace les corrélations décalées pour toutes les paires de variables.")
parser.add_argument(
"--max-lag",
type=int,
default=720,
help="Décalage maximal en minutes (par défaut : 720 = ±12h).",
)
parser.add_argument(
"--step",
type=int,
default=10,
help="Pas en minutes pour l'évaluation des lags (par défaut : 10).",
)
parser.add_argument(
"--resample",
default="none",
help="Fréquence d'agrégation avant calcul (par défaut : 'none') Exemple : '10min'.",
)
parser.add_argument(
"--signed",
action="store_true",
default=True,
help="Affiche les corrélations signées. Utiliser --no-signed pour revenir aux valeurs absolues |r|.",
)
parser.add_argument(
"--no-signed",
action="store_false",
dest="signed",
help="Alias explicite pour repasser en valeurs absolues |r|.",
)
parser.add_argument(
"--thresholds",
default=None,
help=(
"Seuils personnalisés (valeurs séparées par des virgules). "
"Par défaut, utilise les bandes définies dans meteo.correlation_presets "
"(abs ou signées selon --signed)."
),
)
parser.add_argument(
"--only",
nargs="*",
help="Clés de variables à inclure (par défaut toutes les variables numériques).",
)
args = parser.parse_args()
if not CSV_PATH.exists(): if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}") print(f"⚠ Fichier introuvable : {CSV_PATH}")
return return
df = load_raw_csv(CSV_PATH) df = load_raw_csv(CSV_PATH)
df = df.select_dtypes(include="number")
if args.resample and args.resample.lower() != "none":
df = df.resample(args.resample).mean()
print(f"Dataset rééchantillonné à {args.resample} pour accélérer le calcul.")
print(f"Dataset minuté chargé : {CSV_PATH}") print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}") print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}") print(f" Colonnes : {list(df.columns)}")
print() print()
for key_x, key_y in DEFAULT_LAGGED_PAIRS: if args.only:
var_x = VARIABLES_BY_KEY[key_x] missing = [k for k in args.only if k not in VARIABLES_BY_KEY]
var_y = VARIABLES_BY_KEY[key_y] if missing:
raise KeyError(f"Variables inconnues : {', '.join(missing)}")
variables = [VARIABLES_BY_KEY[k] for k in args.only]
else:
variables = list(VARIABLES)
pairs = [(vx, vy) for i, vx in enumerate(variables) for vy in variables[i + 1 :]]
print(f"Paires analysées : {len(pairs)} (combinaisons uniques, sans inverses).")
for var_x, var_y in pairs:
print(f"→ Corrélation décalée : {var_x.key}{var_y.key}") print(f"→ Corrélation décalée : {var_x.key}{var_y.key}")
lag_df = compute_lagged_correlation( lag_df_pearson = compute_lagged_correlation(
df=df, df=df,
var_x=var_x, var_x=var_x,
var_y=var_y, var_y=var_y,
max_lag_minutes=360, # ± 6 heures max_lag_minutes=args.max_lag,
step_minutes=10, # pas de 10 minutes step_minutes=args.step,
method="pearson", method="pearson",
) )["correlation"]
lag_df_spearman = compute_lagged_correlation(
df=df,
var_x=var_x,
var_y=var_y,
max_lag_minutes=args.max_lag,
step_minutes=args.step,
method="spearman",
)["correlation"]
use_abs = not args.signed
if use_abs:
lag_df_pearson = lag_df_pearson.abs()
lag_df_spearman = lag_df_spearman.abs()
ylabel = "Corrélation (|r|)"
labels = {"Pearson": "Pearson |r|", "Spearman": "Spearman |r|"}
y_limits = (0.0, 1.0)
threshold_values = (
[
float(t)
for t in (args.thresholds or "").split(",")
if t.strip() != ""
]
if args.thresholds
else []
)
bands = list(DEFAULT_ABS_CORRELATION_BANDS)
else:
ylabel = "Corrélation"
labels = {"Pearson": "Pearson", "Spearman": "Spearman"}
y_limits = (-1.0, 1.0)
threshold_values: list[float] = []
if args.thresholds:
threshold_values = [
float(t)
for t in args.thresholds.split(",")
if t.strip() != ""
]
threshold_values = sorted({v for thr in threshold_values for v in (thr, -thr)})
bands = list(DEFAULT_SIGNED_CORRELATION_BANDS)
filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png" filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png"
output_path = OUTPUT_DIR / filename output_path = OUTPUT_DIR / filename
plot_lagged_correlation( plot_lagged_correlation_multi(
lag_df=lag_df, lag_series={
labels["Pearson"]: lag_df_pearson,
labels["Spearman"]: lag_df_spearman,
},
var_x=var_x, var_x=var_x,
var_y=var_y, var_y=var_y,
output_path=output_path, output_path=output_path,
ylabel=ylabel,
y_limits=y_limits,
thresholds=threshold_values,
bands=bands,
) )
print("✔ Graphiques de corrélation décalée générés.") print("✔ Graphiques de corrélation décalée générés.")

View File

@ -23,6 +23,35 @@ HEXBIN_REDUCE_LABELS: dict[str, str] = {
"max": "maximum", "max": "maximum",
} }
@dataclass(frozen=True)
class CorrelationBand:
"""Intervalle de corrélation, avec couleur et étiquette pour l'affichage."""
min_value: float
max_value: float
label: str
color: str
# Repères par défaut pour lire rapidement l'intensité (en |r|)
DEFAULT_ABS_CORRELATION_BANDS: Sequence[CorrelationBand] = (
CorrelationBand(0.0, 0.1, "Quasi nulle", "#f6f6f6"),
CorrelationBand(0.1, 0.3, "Faible", "#dce8f7"),
CorrelationBand(0.3, 0.5, "Modérée", "#c8e6c9"),
CorrelationBand(0.5, 1.0, "Forte", "#ffe0b2"),
)
# Bandes pour corrélations signées (symétriques autour de 0)
DEFAULT_SIGNED_CORRELATION_BANDS: Sequence[CorrelationBand] = (
CorrelationBand(-1.0, -0.5, "Forte négative", "#c6dbef"),
CorrelationBand(-0.5, -0.3, "Modérée négative", "#deebf7"),
CorrelationBand(-0.3, -0.1, "Faible négative", "#edf8fb"),
CorrelationBand(-0.1, 0.1, "Quasi nulle", "#f5f5f5"),
CorrelationBand(0.1, 0.3, "Faible positive", "#fff7ec"),
CorrelationBand(0.3, 0.5, "Modérée positive", "#fee8c8"),
CorrelationBand(0.5, 1.0, "Forte positive", "#fdbb84"),
)
@dataclass(frozen=True) @dataclass(frozen=True)
class HexbinScenario: class HexbinScenario:
@ -116,4 +145,7 @@ __all__ = [
"DEFAULT_LAGGED_PAIRS", "DEFAULT_LAGGED_PAIRS",
"DEFAULT_ROLLING_PAIRS", "DEFAULT_ROLLING_PAIRS",
"DEFAULT_HEXBIN_SCENARIOS", "DEFAULT_HEXBIN_SCENARIOS",
"CorrelationBand",
"DEFAULT_ABS_CORRELATION_BANDS",
"DEFAULT_SIGNED_CORRELATION_BANDS",
] ]

View File

@ -13,7 +13,9 @@ from .calendar_overview import (
from .correlations import ( from .correlations import (
plot_correlation_heatmap, plot_correlation_heatmap,
plot_lagged_correlation, plot_lagged_correlation,
plot_lagged_correlation_multi,
plot_rolling_correlation_heatmap, plot_rolling_correlation_heatmap,
CorrelationBand,
) )
from .rain import plot_daily_rainfall_hyetograph, plot_rainfall_by_season from .rain import plot_daily_rainfall_hyetograph, plot_rainfall_by_season
from .relationships import ( from .relationships import (
@ -55,7 +57,9 @@ __all__ = [
"rainfall_daily_total_series", "rainfall_daily_total_series",
"plot_correlation_heatmap", "plot_correlation_heatmap",
"plot_lagged_correlation", "plot_lagged_correlation",
"plot_lagged_correlation_multi",
"plot_rolling_correlation_heatmap", "plot_rolling_correlation_heatmap",
"CorrelationBand",
"plot_daily_rainfall_hyetograph", "plot_daily_rainfall_hyetograph",
"plot_rainfall_by_season", "plot_rainfall_by_season",
"plot_event_composite", "plot_event_composite",

View File

@ -3,16 +3,23 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from typing import Sequence from typing import Iterable, Sequence
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from meteo.correlation_presets import CorrelationBand
from .base import export_plot_dataset from .base import export_plot_dataset
from meteo.variables import Variable from meteo.variables import Variable
__all__ = ['plot_lagged_correlation', 'plot_correlation_heatmap', 'plot_rolling_correlation_heatmap'] __all__ = [
'plot_lagged_correlation',
'plot_lagged_correlation_multi',
'plot_correlation_heatmap',
'plot_rolling_correlation_heatmap',
'CorrelationBand',
]
def plot_lagged_correlation( def plot_lagged_correlation(
@ -42,13 +49,89 @@ def plot_lagged_correlation(
return output_path.resolve() return output_path.resolve()
def plot_lagged_correlation_multi(
lag_series: dict[str, pd.Series],
var_x: Variable,
var_y: Variable,
output_path: str | Path,
*,
title_suffix: str | None = None,
ylabel: str = "Corrélation",
y_limits: tuple[float, float] | None = None,
thresholds: Sequence[float] | None = None,
bands: Iterable["CorrelationBand"] | None = None,
) -> Path:
"""
Trace plusieurs courbes de corrélation en fonction du lag (ex. Pearson/Spearman).
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
df = pd.concat(lag_series, axis=1)
export_plot_dataset(df, output_path)
plt.figure()
colors = ["#1f77b4", "#d1495b", "#2ca02c", "#9467bd"]
for idx, (label, series) in enumerate(df.items()):
plt.plot(series.index, series, label=label, color=colors[idx % len(colors)], linewidth=1.6)
ax = plt.gca()
if bands:
xmin, xmax = df.index.min(), df.index.max()
for band in bands:
ax.axhspan(band.min_value, band.max_value, color=band.color, alpha=0.25, zorder=0)
ax.text(
xmax,
(band.min_value + band.max_value) / 2.0,
band.label,
ha="right",
va="center",
fontsize=8,
color="#444444",
bbox=dict(facecolor="white", edgecolor="none", alpha=0.6, pad=1.5),
)
plt.axvline(0, linestyle="--", color="#666666", linewidth=1.0)
plt.xlabel("Décalage (minutes)\n(lag > 0 : X précède Y)")
plt.ylabel(ylabel)
title = f"Corrélation décalée : {var_x.label}{var_y.label}"
if title_suffix:
title = f"{title} ({title_suffix})"
plt.title(title)
if thresholds:
xmin, xmax = plt.xlim()
for thr in thresholds:
plt.axhline(thr, color="#999999", linestyle="--", linewidth=1.0, alpha=0.85)
plt.text(
xmax,
thr,
f"{thr:.2f}",
ha="right",
va="center",
fontsize=8,
color="#555555",
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=1.5),
)
if y_limits is not None:
plt.ylim(*y_limits)
plt.grid(True, alpha=0.7)
plt.legend()
plt.tight_layout()
plt.savefig(output_path, dpi=150)
plt.close()
return output_path.resolve()
def plot_correlation_heatmap( def plot_correlation_heatmap(
corr: pd.DataFrame, corr: pd.DataFrame,
variables: Sequence[Variable], variables: Sequence[Variable],
output_path: str | Path, output_path: str | Path,
*, *,
annotate: bool = True, annotate: bool = True,
annotate_values: "pd.DataFrame | None" = None,
title: str | None = None, title: str | None = None,
figsize: tuple[float, float] | None = None,
cmap: str | None = None, cmap: str | None = None,
vmin: float | None = None, vmin: float | None = None,
vmax: float | None = None, vmax: float | None = None,
@ -89,7 +172,13 @@ def plot_correlation_heatmap(
data = corr.to_numpy() data = corr.to_numpy()
fig, ax = plt.subplots() if figsize is None:
n = len(variables)
# Augmente la taille pour laisser respirer les annotations
side = max(6.0, n * 0.9)
figsize = (side, side)
fig, ax = plt.subplots(figsize=figsize)
if vmin is None: if vmin is None:
vmin = -1.0 vmin = -1.0
if vmax is None: if vmax is None:
@ -117,6 +206,11 @@ def plot_correlation_heatmap(
# Annotation des cases # Annotation des cases
if annotate: if annotate:
n = data.shape[0] n = data.shape[0]
annot_data = (
annotate_values.loc[columns, columns].to_numpy()
if annotate_values is not None
else data
)
norm = im.norm norm = im.norm
cmap_obj = im.cmap cmap_obj = im.cmap
@ -128,18 +222,23 @@ def plot_correlation_heatmap(
for i in range(n): for i in range(n):
for j in range(n): for j in range(n):
val = data[i, j] val_corr = data[i, j]
val_annot = annot_data[i, j]
if i == j: if i == j:
text = "" text = ""
elif np.isnan(val): elif isinstance(val_annot, (float, int, np.floating)) and np.isnan(val_annot):
text = "" text = ""
else: else:
text = f"{val:.2f}" # si annotate_values est fourni, on affiche la valeur annotée brute
if annotate_values is not None:
text = str(val_annot)
else:
text = f"{val_corr:.2f}"
if not text: if not text:
continue continue
color = _text_color(0.0 if np.isnan(val) else val) color = _text_color(0.0 if np.isnan(val_corr) else val_corr)
ax.text( ax.text(
j, j,
i, i,