diff --git a/docs/04 - Corrélations binaires/figures/correlation_heatmap_abs.png b/docs/04 - Corrélations binaires/figures/correlation_heatmap_abs.png new file mode 100644 index 0000000..2ef8abb Binary files /dev/null and b/docs/04 - Corrélations binaires/figures/correlation_heatmap_abs.png differ diff --git a/docs/04 - Corrélations binaires/figures/correlation_heatmap_spearman_abs.png b/docs/04 - Corrélations binaires/figures/correlation_heatmap_spearman_abs.png new file mode 100644 index 0000000..9be52dc Binary files /dev/null and b/docs/04 - Corrélations binaires/figures/correlation_heatmap_spearman_abs.png differ diff --git a/docs/04 - Corrélations binaires/index.md b/docs/04 - Corrélations binaires/index.md index 2781f61..527f374 100644 --- a/docs/04 - Corrélations binaires/index.md +++ b/docs/04 - Corrélations binaires/index.md @@ -122,12 +122,12 @@ python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py" ![](figures/pairwise_scatter/scatter_wind_speed_vs_wind_direction.png) -## Matrices de corrélation +## Matrices de corrélation (instantané, signé) ```shell -python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py" +python "docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py" --transform absolute --upper-only ``` -![](figures/correlation_heatmap.png) +![](figures/correlation_heatmap_abs.png) -![](figures/correlation_heatmap_spearman.png) +![](figures/correlation_heatmap_spearman_abs.png) diff --git a/docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py b/docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py index 8ab1f97..e97d9c7 100644 --- a/docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py +++ b/docs/04 - Corrélations binaires/scripts/plot_correlation_heatmap.py @@ -4,6 +4,9 @@ from __future__ import annotations from dataclasses import dataclass from pathlib import Path import sys +import argparse +import pandas as pd +import numpy as np PROJECT_ROOT = Path(__file__).resolve().parents[3] @@ -20,7 +23,6 @@ CSV_PATH = Path("data/weather_minutely.csv") DOC_DIR = Path(__file__).resolve().parent.parent CORRELATION_METHODS: tuple[str, ...] = ("pearson", "spearman") -CORRELATION_TRANSFORM = "square" @dataclass(frozen=True) @@ -36,13 +38,19 @@ class HeatmapConfig: HEATMAP_CONFIGS: dict[str, HeatmapConfig] = { "pearson": HeatmapConfig( filename="correlation_heatmap.png", - title="Corrélations R² (coef. de Pearson)", - colorbar_label="Coefficient de corrélation R²", + title="Corrélations (coef. de Pearson)", + colorbar_label="Coefficient de corrélation", + cmap="viridis", + vmin=0.0, + vmax=1.0, ), "spearman": HeatmapConfig( filename="correlation_heatmap_spearman.png", - title="Corrélations R² (coef. de Spearman)", - colorbar_label="Coefficient de corrélation R²", + title="Corrélations (coef. de Spearman)", + colorbar_label="Coefficient de corrélation", + cmap="viridis", + vmin=0.0, + vmax=1.0, ), } @@ -63,36 +71,92 @@ def _get_heatmap_config(method: str) -> HeatmapConfig: def main() -> None: + parser = argparse.ArgumentParser(description="Trace des matrices de corrélation instantanées (signées, absolues ou r²).") + parser.add_argument( + "--output-dir", + type=Path, + default=DOC_DIR / "figures", + help="Dossier de sortie pour les heatmaps.", + ) + parser.add_argument( + "--transform", + choices=["identity", "absolute", "square"], + default="absolute", + help="Transformation de la matrice (signée, |r| ou r²). Par défaut : |r|.", + ) + parser.add_argument( + "--upper-only", + action="store_true", + help="Masque la partie inférieure de la matrice pour alléger la lecture.", + ) + args = parser.parse_args() + if not CSV_PATH.exists(): print(f"⚠ Fichier introuvable : {CSV_PATH}") print(" Assurez-vous d'avoir généré le dataset minuté.") return df = load_raw_csv(CSV_PATH) + df = df[[v.column for v in VARIABLES]] print(f"Dataset minuté chargé : {CSV_PATH}") print(f" Lignes : {len(df)}") print(f" Colonnes : {list(df.columns)}") print() + transform = args.transform matrices = compute_correlation_matrices_for_methods( df=df, variables=VARIABLES, methods=CORRELATION_METHODS, - transform=CORRELATION_TRANSFORM, + transform=transform, ) + args.output_dir.mkdir(parents=True, exist_ok=True) + for method, corr in matrices.items(): - print(f"Matrice de corrélation (méthode={method}, transform={CORRELATION_TRANSFORM}) :") + if args.upper_only: + mask = np.tril(np.ones_like(corr, dtype=bool), k=-1) + corr = corr.mask(mask) + + print(f"Matrice de corrélation (méthode={method}, transform={transform}) :") print(corr) print() config = _get_heatmap_config(method) + filename = config.filename + title = config.title + if transform == "absolute": + title = f"{title} (|r|)" + stem, suffix = filename.rsplit(".", 1) + filename = f"{stem}_abs.{suffix}" + elif transform == "square": + title = f"{title} (r²)" + stem, suffix = filename.rsplit(".", 1) + filename = f"{stem}_r2.{suffix}" + config = HeatmapConfig( + filename=filename, + title=title, + colorbar_label="Coefficient de corrélation r²", + cmap="viridis", + vmin=0.0, + vmax=1.0, + ) + elif transform == "identity": + config = HeatmapConfig( + filename=filename, + title=title, + colorbar_label="Coefficient de corrélation r", + cmap="coolwarm", + vmin=-1.0, + vmax=1.0, + ) + output_path = plot_correlation_heatmap( corr=corr, variables=VARIABLES, - output_path=DOC_DIR / "figures" / config.filename, + output_path=args.output_dir / filename, annotate=True, - title=config.title, + title=title, cmap=config.cmap, vmin=config.vmin, vmax=config.vmax, diff --git a/docs/05 - Corrélations binaires avancées/figures/correlation_heatmap_lagged.png b/docs/05 - Corrélations binaires avancées/figures/correlation_heatmap_lagged.png new file mode 100644 index 0000000..710f6c1 Binary files /dev/null and b/docs/05 - Corrélations binaires avancées/figures/correlation_heatmap_lagged.png differ diff --git a/docs/05 - Corrélations binaires avancées/index.md b/docs/05 - Corrélations binaires avancées/index.md index a05943f..0fa2475 100644 --- a/docs/05 - Corrélations binaires avancées/index.md +++ b/docs/05 - Corrélations binaires avancées/index.md @@ -6,19 +6,83 @@ python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py" ``` +![](figures/lagged_correlations/lagcorr_temperature_to_humidity.png) + +![](figures/lagged_correlations/lagcorr_temperature_to_pressure.png) + +![](figures/lagged_correlations/lagcorr_temperature_to_rain_rate.png) + +![](figures/lagged_correlations/lagcorr_temperature_to_illuminance.png) + +![](figures/lagged_correlations/lagcorr_temperature_to_wind_speed.png) + +![](figures/lagged_correlations/lagcorr_temperature_to_wind_direction.png) + +![](figures/lagged_correlations/lagcorr_temperature_to_sun_elevation.png) + +![](figures/lagged_correlations/lagcorr_humidity_to_pressure.png) + ![](figures/lagged_correlations/lagcorr_humidity_to_rain_rate.png) -![](figures/lagged_correlations/lagcorr_illuminance_to_temperature.png) +![](figures/lagged_correlations/lagcorr_humidity_to_illuminance.png) -![](figures/lagged_correlations/lagcorr_pressure_to_illuminance.png) +![](figures/lagged_correlations/lagcorr_humidity_to_wind_speed.png) + +![](figures/lagged_correlations/lagcorr_humidity_to_wind_direction.png) + +![](figures/lagged_correlations/lagcorr_humidity_to_sun_elevation.png) ![](figures/lagged_correlations/lagcorr_pressure_to_rain_rate.png) +![](figures/lagged_correlations/lagcorr_pressure_to_illuminance.png) + ![](figures/lagged_correlations/lagcorr_pressure_to_wind_speed.png) -![](figures/lagged_correlations/lagcorr_temperature_to_humidity.png) +![](figures/lagged_correlations/lagcorr_pressure_to_wind_direction.png) -![](figures/lagged_correlations/lagcorr_temperature_to_rain_rate.png) +![](figures/lagged_correlations/lagcorr_pressure_to_sun_elevation.png) + +![](figures/lagged_correlations/lagcorr_rain_rate_to_illuminance.png) + +![](figures/lagged_correlations/lagcorr_rain_rate_to_wind_speed.png) + +![](figures/lagged_correlations/lagcorr_rain_rate_to_wind_direction.png) + +![](figures/lagged_correlations/lagcorr_rain_rate_to_sun_elevation.png) + +![](figures/lagged_correlations/lagcorr_illuminance_to_wind_speed.png) + +![](figures/lagged_correlations/lagcorr_illuminance_to_wind_direction.png) + +![](figures/lagged_correlations/lagcorr_illuminance_to_sun_elevation.png) + +![](figures/lagged_correlations/lagcorr_wind_speed_to_wind_direction.png) + +![](figures/lagged_correlations/lagcorr_wind_speed_to_sun_elevation.png) + +![](figures/lagged_correlations/lagcorr_wind_direction_to_sun_elevation.png) + +### Interprétation + +Ces graphiques sont désormais en corrélation signée (par défaut) et tracent des zones colorées symétriques pour les relations négatives/positives. Ils mettent en évidence la force, le sens et le décalage temporel entre deux variables. + +Ainsi, il est clair qu'il existe une corrélation forte et immédiate entre la température et l'humidité relative. +La corrélation entre température et luminance est également forte, mais elle est décalée : le pic de luminance précède le pic de température de près de deux heures. + +Comme ces deux corrélations sont fortes, on en déduit que l'on devrait logiquement observer une corrélation forte et décalée entre la luminance et l'humidité relative (et c'est bien le cas, comme en témoigne le graphique correspondant). +Un rapport similaire, quoique moins prononcé, existe entre la température, l'humidité relative et l'élévation solaire, puisque la luminance en dépend directement. + +### Matrices de corrélation avec lag optimal par paire (depuis les CSV) + +```shell +python "docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlation_heatmap_from_data.py" +``` + +Cette heatmap est construite à partir des CSV `data/lagged_correlations`, en prenant pour chaque paire ordonnée (X → Y) le r maximal en |r| et le lag associé. Les lags (en minutes, signe conservé, y compris 0) sont annotés uniquement pour les corrélations d'intensité au moins « Modérée » (|r| ≥ 0,3) ; les PNG sont écrits dans `figures/` et les matrices correspondantes (`correlation_matrix_lagged.csv`, `lag_matrix_minutes.csv`) dans `data/`. + +![](figures/correlation_heatmap_lagged.png) + +Cette matrice met en évidence les plus fortes corrélations et leur décalage optimal. ## Corrélations glissantes diff --git a/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlation_heatmap_from_data.py b/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlation_heatmap_from_data.py new file mode 100644 index 0000000..063f800 --- /dev/null +++ b/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlation_heatmap_from_data.py @@ -0,0 +1,191 @@ +# scripts/plot_lagged_correlation_heatmap_from_data.py +from __future__ import annotations + +from pathlib import Path +import sys +import argparse + +import pandas as pd + +PROJECT_ROOT = Path(__file__).resolve().parents[3] +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +from meteo.variables import VARIABLES, VARIABLES_BY_KEY +from meteo.plots import plot_correlation_heatmap + + +DATA_DIR = Path("docs/05 - Corrélations binaires avancées/data/lagged_correlations") +FIG_DIR = Path("docs/05 - Corrélations binaires avancées/figures") +DATA_OUTPUT_DIR = Path("docs/05 - Corrélations binaires avancées/data") + + +def _load_best_corr_and_lag(csv_path: Path) -> tuple[float, int, bool] | None: + """Retourne (corr, lag, sign_known) au max |r| à partir d'un CSV de lagcorr.""" + + df = pd.read_csv(csv_path) + + if "Pearson" in df.columns: + series = df["Pearson"] + sign_known = True + elif "correlation" in df.columns: + series = df["correlation"] + sign_known = True + elif "Pearson |r|" in df.columns: + series = df["Pearson |r|"] + sign_known = False + elif "Pearson (r²)" in df.columns: + series = (df["Pearson (r²)"].abs()) ** 0.5 + sign_known = False + else: + return None + + abs_series = series.abs() + if abs_series.empty or abs_series.isna().all(): + return None + + idx = abs_series.idxmax() + best_corr = series.iloc[idx] + best_lag = int(df.loc[idx, "lag_minutes"]) + # Si pas de signe, on retourne un corr positif (le signe pourra être posé via l'inverse) + if not sign_known: + best_corr = abs(best_corr) + return best_corr, best_lag, sign_known + + +def _get_pair_best( + vx_key: str, + vy_key: str, + *, + data_dir: Path, +) -> tuple[float, int, bool, str] | None: + """ + Retourne (corr, lag, sign_known, source) pour la paire ordonnée vx->vy. + Si le CSV direct ne contient pas le signe (|r| ou r²), tente de l'inférer + à partir du CSV inverse (vy->vx) en inversant le lag. + """ + primary = data_dir / f"lagcorr_{vx_key}_to_{vy_key}.csv" + reverse = data_dir / f"lagcorr_{vy_key}_to_{vx_key}.csv" + + primary_res = _load_best_corr_and_lag(primary) if primary.exists() else None + if primary_res and primary_res[2]: + corr, lag, sign_known = primary_res + return corr, lag, sign_known, primary.name + + # Tentative d'inférence via le CSV inverse + reverse_res = _load_best_corr_and_lag(reverse) if reverse.exists() else None + if primary_res and reverse_res and reverse_res[2]: + primary_corr, _, _ = primary_res + rev_corr, rev_lag, _ = reverse_res + corr = primary_corr if primary_res[2] else abs(primary_corr) * (1 if rev_corr >= 0 else -1) + lag = -rev_lag + return corr, lag, True, f"{primary.name} (signe/lag inférés depuis {reverse.name})" + + if primary_res: + corr, lag, sign_known = primary_res + return corr, lag, sign_known, primary.name if primary.exists() else "n/a" + + if reverse_res: + rev_corr, rev_lag, sign_known = reverse_res + corr = rev_corr + lag = -rev_lag + return corr, lag, sign_known, reverse.name + + return None + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Construit des matrices depuis les CSV lagcorr_* existants (max |r| par paire ordonnée)." + ) + parser.add_argument( + "--data-dir", + type=Path, + default=DATA_DIR, + help="Dossier contenant les CSV lagcorr_*_to_*.csv.", + ) + parser.add_argument( + "--fig-dir", + type=Path, + default=FIG_DIR, + help="Dossier de sortie pour la heatmap.", + ) + parser.add_argument( + "--data-output-dir", + type=Path, + default=DATA_OUTPUT_DIR, + help="Dossier de sortie pour les matrices CSV exportées.", + ) + parser.add_argument( + "--annot-threshold", + type=float, + default=0.3, + help="N'affiche le lag annoté que si |r| >= ce seuil (0 pour tout afficher).", + ) + args = parser.parse_args() + + args.fig_dir.mkdir(parents=True, exist_ok=True) + args.data_output_dir.mkdir(parents=True, exist_ok=True) + + columns = [v.column for v in VARIABLES] + corr_matrix = pd.DataFrame(index=columns, columns=columns, dtype=float) + lag_matrix = pd.DataFrame(index=columns, columns=columns, dtype=int) + + missing_files: list[str] = [] + sign_unknown: list[str] = [] + + for vx in VARIABLES: + for vy in VARIABLES: + if vx == vy: + corr_matrix.loc[vx.column, vy.column] = 1.0 + lag_matrix.loc[vx.column, vy.column] = 0 + continue + res = _get_pair_best(vx.key, vy.key, data_dir=args.data_dir) + if res is None: + missing_files.append(f"{vx.key}→{vy.key}") + corr_matrix.loc[vx.column, vy.column] = float("nan") + lag_matrix.loc[vx.column, vy.column] = 0 + continue + + best_corr, best_lag, sign_known, source = res + if not sign_known: + sign_unknown.append(f"{vx.key}→{vy.key} (source={source})") + corr_matrix.loc[vx.column, vy.column] = best_corr + lag_matrix.loc[vx.column, vy.column] = best_lag + + if missing_files: + print("⚠ CSV manquants pour certaines paires :", ", ".join(missing_files)) + if sign_unknown: + print("⚠ Signe inconnu (CSV en |r| ou r²) pour :", ", ".join(sign_unknown)) + + # Heatmap signée (valeurs positives lorsque le signe est absent des CSV) + output_path = args.fig_dir / "correlation_heatmap_lagged.png" + annot_df = lag_matrix.copy() + if args.annot_threshold > 0: + mask = corr_matrix.abs() < args.annot_threshold + annot_df = annot_df.mask(mask, "") + plot_correlation_heatmap( + corr=corr_matrix, + variables=VARIABLES, + output_path=output_path, + annotate=True, + annotate_values=annot_df, + title="Corrélations (lag optimal par paire, issues des CSV)", + cmap="coolwarm", + vmin=-1.0, + vmax=1.0, + colorbar_label="Coefficient de corrélation r", + ) + print(f"✔ Heatmap laggée sauvegardée dans : {output_path}") + + corr_csv = args.data_output_dir / "correlation_matrix_lagged.csv" + corr_matrix.to_csv(corr_csv) + print(f"✔ Matrice des corrélations exportée : {corr_csv}") + + lag_csv = args.data_output_dir / "lag_matrix_minutes.csv" + lag_matrix.to_csv(lag_csv) + print(f"✔ Matrice des lags exportée : {lag_csv}") + + +if __name__ == "__main__": + main() diff --git a/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py b/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py index 4306ca7..610bdc5 100644 --- a/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py +++ b/docs/05 - Corrélations binaires avancées/scripts/plot_lagged_correlations.py @@ -4,16 +4,21 @@ from __future__ import annotations from pathlib import Path import sys +import argparse PROJECT_ROOT = Path(__file__).resolve().parents[3] if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from meteo.dataset import load_raw_csv -from meteo.variables import VARIABLES_BY_KEY +from meteo.variables import VARIABLES, VARIABLES_BY_KEY from meteo.analysis import compute_lagged_correlation -from meteo.plots import plot_lagged_correlation -from meteo.correlation_presets import DEFAULT_LAGGED_PAIRS +from meteo.plots import plot_lagged_correlation_multi +from meteo.correlation_presets import ( + DEFAULT_ABS_CORRELATION_BANDS, + DEFAULT_SIGNED_CORRELATION_BANDS, + CorrelationBand, +) CSV_PATH = Path("data/weather_minutely.csv") @@ -22,39 +27,143 @@ OUTPUT_DIR = DOC_DIR / "figures" / "lagged_correlations" def main() -> None: + parser = argparse.ArgumentParser(description="Trace les corrélations décalées pour toutes les paires de variables.") + parser.add_argument( + "--max-lag", + type=int, + default=720, + help="Décalage maximal en minutes (par défaut : 720 = ±12h).", + ) + parser.add_argument( + "--step", + type=int, + default=10, + help="Pas en minutes pour l'évaluation des lags (par défaut : 10).", + ) + parser.add_argument( + "--resample", + default="none", + help="Fréquence d'agrégation avant calcul (par défaut : 'none') Exemple : '10min'.", + ) + parser.add_argument( + "--signed", + action="store_true", + default=True, + help="Affiche les corrélations signées. Utiliser --no-signed pour revenir aux valeurs absolues |r|.", + ) + parser.add_argument( + "--no-signed", + action="store_false", + dest="signed", + help="Alias explicite pour repasser en valeurs absolues |r|.", + ) + parser.add_argument( + "--thresholds", + default=None, + help=( + "Seuils personnalisés (valeurs séparées par des virgules). " + "Par défaut, utilise les bandes définies dans meteo.correlation_presets " + "(abs ou signées selon --signed)." + ), + ) + parser.add_argument( + "--only", + nargs="*", + help="Clés de variables à inclure (par défaut toutes les variables numériques).", + ) + args = parser.parse_args() + if not CSV_PATH.exists(): print(f"⚠ Fichier introuvable : {CSV_PATH}") return df = load_raw_csv(CSV_PATH) + df = df.select_dtypes(include="number") + if args.resample and args.resample.lower() != "none": + df = df.resample(args.resample).mean() + print(f"Dataset rééchantillonné à {args.resample} pour accélérer le calcul.") print(f"Dataset minuté chargé : {CSV_PATH}") print(f" Lignes : {len(df)}") print(f" Colonnes : {list(df.columns)}") print() - for key_x, key_y in DEFAULT_LAGGED_PAIRS: - var_x = VARIABLES_BY_KEY[key_x] - var_y = VARIABLES_BY_KEY[key_y] + if args.only: + missing = [k for k in args.only if k not in VARIABLES_BY_KEY] + if missing: + raise KeyError(f"Variables inconnues : {', '.join(missing)}") + variables = [VARIABLES_BY_KEY[k] for k in args.only] + else: + variables = list(VARIABLES) + pairs = [(vx, vy) for i, vx in enumerate(variables) for vy in variables[i + 1 :]] + print(f"Paires analysées : {len(pairs)} (combinaisons uniques, sans inverses).") + for var_x, var_y in pairs: print(f"→ Corrélation décalée : {var_x.key} → {var_y.key}") - lag_df = compute_lagged_correlation( + lag_df_pearson = compute_lagged_correlation( df=df, var_x=var_x, var_y=var_y, - max_lag_minutes=360, # ± 6 heures - step_minutes=10, # pas de 10 minutes + max_lag_minutes=args.max_lag, + step_minutes=args.step, method="pearson", - ) + )["correlation"] + + lag_df_spearman = compute_lagged_correlation( + df=df, + var_x=var_x, + var_y=var_y, + max_lag_minutes=args.max_lag, + step_minutes=args.step, + method="spearman", + )["correlation"] + + use_abs = not args.signed + if use_abs: + lag_df_pearson = lag_df_pearson.abs() + lag_df_spearman = lag_df_spearman.abs() + ylabel = "Corrélation (|r|)" + labels = {"Pearson": "Pearson |r|", "Spearman": "Spearman |r|"} + y_limits = (0.0, 1.0) + threshold_values = ( + [ + float(t) + for t in (args.thresholds or "").split(",") + if t.strip() != "" + ] + if args.thresholds + else [] + ) + bands = list(DEFAULT_ABS_CORRELATION_BANDS) + else: + ylabel = "Corrélation" + labels = {"Pearson": "Pearson", "Spearman": "Spearman"} + y_limits = (-1.0, 1.0) + threshold_values: list[float] = [] + if args.thresholds: + threshold_values = [ + float(t) + for t in args.thresholds.split(",") + if t.strip() != "" + ] + threshold_values = sorted({v for thr in threshold_values for v in (thr, -thr)}) + bands = list(DEFAULT_SIGNED_CORRELATION_BANDS) filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png" output_path = OUTPUT_DIR / filename - plot_lagged_correlation( - lag_df=lag_df, + plot_lagged_correlation_multi( + lag_series={ + labels["Pearson"]: lag_df_pearson, + labels["Spearman"]: lag_df_spearman, + }, var_x=var_x, var_y=var_y, output_path=output_path, + ylabel=ylabel, + y_limits=y_limits, + thresholds=threshold_values, + bands=bands, ) print("✔ Graphiques de corrélation décalée générés.") diff --git a/meteo/correlation_presets.py b/meteo/correlation_presets.py index b6ff016..9e4f001 100644 --- a/meteo/correlation_presets.py +++ b/meteo/correlation_presets.py @@ -23,6 +23,35 @@ HEXBIN_REDUCE_LABELS: dict[str, str] = { "max": "maximum", } +@dataclass(frozen=True) +class CorrelationBand: + """Intervalle de corrélation, avec couleur et étiquette pour l'affichage.""" + + min_value: float + max_value: float + label: str + color: str + + +# Repères par défaut pour lire rapidement l'intensité (en |r|) +DEFAULT_ABS_CORRELATION_BANDS: Sequence[CorrelationBand] = ( + CorrelationBand(0.0, 0.1, "Quasi nulle", "#f6f6f6"), + CorrelationBand(0.1, 0.3, "Faible", "#dce8f7"), + CorrelationBand(0.3, 0.5, "Modérée", "#c8e6c9"), + CorrelationBand(0.5, 1.0, "Forte", "#ffe0b2"), +) + +# Bandes pour corrélations signées (symétriques autour de 0) +DEFAULT_SIGNED_CORRELATION_BANDS: Sequence[CorrelationBand] = ( + CorrelationBand(-1.0, -0.5, "Forte négative", "#c6dbef"), + CorrelationBand(-0.5, -0.3, "Modérée négative", "#deebf7"), + CorrelationBand(-0.3, -0.1, "Faible négative", "#edf8fb"), + CorrelationBand(-0.1, 0.1, "Quasi nulle", "#f5f5f5"), + CorrelationBand(0.1, 0.3, "Faible positive", "#fff7ec"), + CorrelationBand(0.3, 0.5, "Modérée positive", "#fee8c8"), + CorrelationBand(0.5, 1.0, "Forte positive", "#fdbb84"), +) + @dataclass(frozen=True) class HexbinScenario: @@ -116,4 +145,7 @@ __all__ = [ "DEFAULT_LAGGED_PAIRS", "DEFAULT_ROLLING_PAIRS", "DEFAULT_HEXBIN_SCENARIOS", + "CorrelationBand", + "DEFAULT_ABS_CORRELATION_BANDS", + "DEFAULT_SIGNED_CORRELATION_BANDS", ] diff --git a/meteo/plots/__init__.py b/meteo/plots/__init__.py index c4152a1..fefaca9 100644 --- a/meteo/plots/__init__.py +++ b/meteo/plots/__init__.py @@ -13,7 +13,9 @@ from .calendar_overview import ( from .correlations import ( plot_correlation_heatmap, plot_lagged_correlation, + plot_lagged_correlation_multi, plot_rolling_correlation_heatmap, + CorrelationBand, ) from .rain import plot_daily_rainfall_hyetograph, plot_rainfall_by_season from .relationships import ( @@ -55,7 +57,9 @@ __all__ = [ "rainfall_daily_total_series", "plot_correlation_heatmap", "plot_lagged_correlation", + "plot_lagged_correlation_multi", "plot_rolling_correlation_heatmap", + "CorrelationBand", "plot_daily_rainfall_hyetograph", "plot_rainfall_by_season", "plot_event_composite", diff --git a/meteo/plots/correlations.py b/meteo/plots/correlations.py index ea198e3..89635b3 100644 --- a/meteo/plots/correlations.py +++ b/meteo/plots/correlations.py @@ -3,16 +3,23 @@ from __future__ import annotations from pathlib import Path -from typing import Sequence +from typing import Iterable, Sequence import matplotlib.pyplot as plt import numpy as np import pandas as pd +from meteo.correlation_presets import CorrelationBand from .base import export_plot_dataset from meteo.variables import Variable -__all__ = ['plot_lagged_correlation', 'plot_correlation_heatmap', 'plot_rolling_correlation_heatmap'] +__all__ = [ + 'plot_lagged_correlation', + 'plot_lagged_correlation_multi', + 'plot_correlation_heatmap', + 'plot_rolling_correlation_heatmap', + 'CorrelationBand', +] def plot_lagged_correlation( @@ -42,13 +49,89 @@ def plot_lagged_correlation( return output_path.resolve() + +def plot_lagged_correlation_multi( + lag_series: dict[str, pd.Series], + var_x: Variable, + var_y: Variable, + output_path: str | Path, + *, + title_suffix: str | None = None, + ylabel: str = "Corrélation", + y_limits: tuple[float, float] | None = None, + thresholds: Sequence[float] | None = None, + bands: Iterable["CorrelationBand"] | None = None, +) -> Path: + """ + Trace plusieurs courbes de corrélation en fonction du lag (ex. Pearson/Spearman). + """ + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + df = pd.concat(lag_series, axis=1) + export_plot_dataset(df, output_path) + + plt.figure() + colors = ["#1f77b4", "#d1495b", "#2ca02c", "#9467bd"] + for idx, (label, series) in enumerate(df.items()): + plt.plot(series.index, series, label=label, color=colors[idx % len(colors)], linewidth=1.6) + + ax = plt.gca() + if bands: + xmin, xmax = df.index.min(), df.index.max() + for band in bands: + ax.axhspan(band.min_value, band.max_value, color=band.color, alpha=0.25, zorder=0) + ax.text( + xmax, + (band.min_value + band.max_value) / 2.0, + band.label, + ha="right", + va="center", + fontsize=8, + color="#444444", + bbox=dict(facecolor="white", edgecolor="none", alpha=0.6, pad=1.5), + ) + + plt.axvline(0, linestyle="--", color="#666666", linewidth=1.0) + plt.xlabel("Décalage (minutes)\n(lag > 0 : X précède Y)") + plt.ylabel(ylabel) + title = f"Corrélation décalée : {var_x.label} → {var_y.label}" + if title_suffix: + title = f"{title} ({title_suffix})" + plt.title(title) + if thresholds: + xmin, xmax = plt.xlim() + for thr in thresholds: + plt.axhline(thr, color="#999999", linestyle="--", linewidth=1.0, alpha=0.85) + plt.text( + xmax, + thr, + f"{thr:.2f}", + ha="right", + va="center", + fontsize=8, + color="#555555", + bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=1.5), + ) + if y_limits is not None: + plt.ylim(*y_limits) + plt.grid(True, alpha=0.7) + plt.legend() + plt.tight_layout() + plt.savefig(output_path, dpi=150) + plt.close() + + return output_path.resolve() + def plot_correlation_heatmap( corr: pd.DataFrame, variables: Sequence[Variable], output_path: str | Path, *, annotate: bool = True, + annotate_values: "pd.DataFrame | None" = None, title: str | None = None, + figsize: tuple[float, float] | None = None, cmap: str | None = None, vmin: float | None = None, vmax: float | None = None, @@ -89,7 +172,13 @@ def plot_correlation_heatmap( data = corr.to_numpy() - fig, ax = plt.subplots() + if figsize is None: + n = len(variables) + # Augmente la taille pour laisser respirer les annotations + side = max(6.0, n * 0.9) + figsize = (side, side) + + fig, ax = plt.subplots(figsize=figsize) if vmin is None: vmin = -1.0 if vmax is None: @@ -117,6 +206,11 @@ def plot_correlation_heatmap( # Annotation des cases if annotate: n = data.shape[0] + annot_data = ( + annotate_values.loc[columns, columns].to_numpy() + if annotate_values is not None + else data + ) norm = im.norm cmap_obj = im.cmap @@ -128,18 +222,23 @@ def plot_correlation_heatmap( for i in range(n): for j in range(n): - val = data[i, j] + val_corr = data[i, j] + val_annot = annot_data[i, j] if i == j: text = "—" - elif np.isnan(val): + elif isinstance(val_annot, (float, int, np.floating)) and np.isnan(val_annot): text = "" else: - text = f"{val:.2f}" + # si annotate_values est fourni, on affiche la valeur annotée brute + if annotate_values is not None: + text = str(val_annot) + else: + text = f"{val_corr:.2f}" if not text: continue - color = _text_color(0.0 if np.isnan(val) else val) + color = _text_color(0.0 if np.isnan(val_corr) else val_corr) ax.text( j, i,