1
donnees_meteo/meteo/plots.py

476 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# meteo/plots.py
from __future__ import annotations
from pathlib import Path
from typing import Callable, Sequence
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import numpy as np
import pandas as pd
from .variables import Variable
def plot_scatter_pair(
df: pd.DataFrame,
var_x: Variable,
var_y: Variable,
output_path: str | Path,
*,
sample_step: int = 10,
color_by_time: bool = True,
cmap: str = "viridis",
) -> Path:
"""
Trace un nuage de points (scatter) pour une paire de variables.
- On sous-échantillonne les données avec `sample_step` (par exemple,
1 point sur 10) pour éviter un graphique illisible.
- Si `color_by_time` vaut True et que l'index est temporel, les points
sont colorés du plus ancien (sombre) au plus récent (clair).
- Lorsque l'axe Y correspond à la direction du vent, on bascule sur
un graphique polaire plus adapté (0° = Nord, sens horaire) avec
un rayon normalisé : centre = valeur minimale, bord = maximale.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# On ne garde que les colonnes pertinentes et les lignes complètes
df_pair = df[[var_x.column, var_y.column]].dropna()
if sample_step > 1:
df_pair = df_pair.iloc[::sample_step, :]
use_polar = var_y.key == "wind_direction"
if use_polar:
fig, ax = plt.subplots(subplot_kw={"projection": "polar"})
else:
fig, ax = plt.subplots()
scatter_kwargs: dict = {"s": 5, "alpha": 0.5}
colorbar_meta: dict | None = None
if color_by_time and isinstance(df_pair.index, pd.DatetimeIndex):
idx = df_pair.index
timestamps = idx.view("int64")
time_span = np.ptp(timestamps)
norm = (
Normalize(vmin=timestamps.min(), vmax=timestamps.max())
if time_span > 0
else None
)
scatter_kwargs |= {"c": timestamps, "cmap": cmap}
if norm is not None:
scatter_kwargs["norm"] = norm
colorbar_meta = {
"index": idx,
"timestamps": timestamps,
"time_span": time_span,
}
if use_polar:
theta = np.deg2rad(df_pair[var_y.column].to_numpy(dtype=float) % 360.0)
radius_raw = df_pair[var_x.column].to_numpy(dtype=float)
if radius_raw.size == 0:
radius = radius_raw
value_min = value_max = float("nan")
else:
value_min = float(np.min(radius_raw))
value_max = float(np.max(radius_raw))
if np.isclose(value_min, value_max):
radius = np.zeros_like(radius_raw)
else:
radius = (radius_raw - value_min) / (value_max - value_min)
scatter = ax.scatter(theta, radius, **scatter_kwargs)
cardinal_angles = np.deg2rad(np.arange(0, 360, 45))
cardinal_labels = ["N", "NE", "E", "SE", "S", "SO", "O", "NO"]
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
ax.set_xticks(cardinal_angles)
ax.set_xticklabels(cardinal_labels)
if radius_raw.size > 0:
if np.isclose(value_min, value_max):
radial_positions = [0.0]
else:
radial_positions = np.linspace(0.0, 1.0, num=5).tolist()
if np.isclose(value_min, value_max):
actual_values = [value_min]
else:
actual_values = [
value_min + pos * (value_max - value_min)
for pos in radial_positions
]
ax.set_yticks(radial_positions)
ax.set_yticklabels([f"{val:.1f}" for val in actual_values])
ax.set_rlabel_position(225)
ax.set_ylim(0.0, 1.0)
unit_suffix = f" {var_x.unit}" if var_x.unit else ""
ax.text(
0.5,
-0.1,
f"Centre = {value_min:.1f}{unit_suffix}, bord = {value_max:.1f}{unit_suffix}",
transform=ax.transAxes,
ha="center",
va="top",
fontsize=8,
)
radial_label = f"{var_x.label} ({var_x.unit})" if var_x.unit else var_x.label
ax.set_ylabel(radial_label, labelpad=20)
else:
scatter = ax.scatter(
df_pair[var_x.column],
df_pair[var_y.column],
**scatter_kwargs,
)
if colorbar_meta is not None:
cbar = fig.colorbar(scatter, ax=ax)
idx = colorbar_meta["index"]
timestamps = colorbar_meta["timestamps"]
time_span = colorbar_meta["time_span"]
def _format_tick_label(ts: pd.Timestamp) -> str:
base = f"{ts.strftime('%Y-%m-%d')}\n{ts.strftime('%H:%M')}"
tz_name = ts.tzname()
return f"{base} ({tz_name})" if tz_name else base
if time_span > 0:
tick_datetimes = pd.date_range(start=idx.min(), end=idx.max(), periods=5)
tick_positions = tick_datetimes.view("int64")
tick_labels = [_format_tick_label(ts) for ts in tick_datetimes]
cbar.set_ticks(tick_positions)
cbar.set_ticklabels(tick_labels)
else:
cbar.set_ticks([timestamps[0]])
ts = idx[0]
cbar.set_ticklabels([_format_tick_label(ts)])
cbar.set_label("Temps (ancien → récent)")
if use_polar:
ax.set_title(f"{var_y.label} en fonction de {var_x.label}")
else:
ax.set_xlabel(f"{var_x.label} ({var_x.unit})")
ax.set_ylabel(f"{var_y.label} ({var_y.unit})")
ax.set_title(f"{var_y.label} en fonction de {var_x.label}")
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_hexbin_with_third_variable(
df: pd.DataFrame,
var_x: Variable,
var_y: Variable,
var_color: Variable,
output_path: str | Path,
*,
gridsize: int = 60,
mincnt: int = 5,
reduce_func: Callable[[np.ndarray], float] | None = None,
reduce_func_label: str | None = None,
cmap: str = "viridis",
) -> Path:
"""
Trace une carte de densité hexbin où la couleur encode une 3e variable.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
reduce_func = reduce_func or np.mean
df_xyz = df[[var_x.column, var_y.column, var_color.column]].dropna()
if df_xyz.empty:
fig, ax = plt.subplots()
ax.text(
0.5,
0.5,
"Pas de données valides pour cette combinaison.",
ha="center",
va="center",
)
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
fig, ax = plt.subplots()
hb = ax.hexbin(
df_xyz[var_x.column],
df_xyz[var_y.column],
C=df_xyz[var_color.column],
reduce_C_function=reduce_func,
gridsize=gridsize,
cmap=cmap,
mincnt=mincnt,
)
func_label = reduce_func_label or getattr(reduce_func, "__name__", "statistique")
colorbar_label = f"{func_label.capitalize()} de {var_color.label}"
cbar = fig.colorbar(hb, ax=ax)
cbar.set_label(colorbar_label)
ax.set_xlabel(f"{var_x.label} ({var_x.unit})")
ax.set_ylabel(f"{var_y.label} ({var_y.unit})")
ax.set_title(
f"{var_y.label} vs {var_x.label}\nCouleur : {func_label} de {var_color.label}"
)
ax.grid(False)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_lagged_correlation(
lag_df: pd.DataFrame,
var_x: Variable,
var_y: Variable,
output_path: str | Path,
) -> Path:
"""
Trace la corrélation en fonction du lag (en minutes) entre deux variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
plt.figure()
plt.plot(lag_df.index, lag_df["correlation"])
plt.axvline(0, linestyle="--") # lag = 0
plt.xlabel("Décalage (minutes)\n(lag > 0 : X précède Y)")
plt.ylabel("Corrélation")
plt.title(f"Corrélation décalée : {var_x.label}{var_y.label}")
plt.grid(True)
plt.tight_layout()
plt.savefig(output_path, dpi=150)
plt.close()
return output_path.resolve()
def plot_correlation_heatmap(
corr: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
annotate: bool = True,
) -> Path:
"""
Trace une heatmap de la matrice de corrélation.
Paramètres
----------
corr :
Matrice de corrélation (index et colonnes doivent correspondre
aux noms de colonnes des variables).
variables :
Liste de Variable, dans l'ordre où elles doivent apparaître.
output_path :
Chemin du fichier image à écrire.
annotate :
Si True, affiche la valeur numérique dans chaque case.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
columns = [v.column for v in variables]
labels = [v.label for v in variables]
# On aligne la matrice sur l'ordre désiré
corr = corr.loc[columns, columns]
data = corr.to_numpy()
fig, ax = plt.subplots()
im = ax.imshow(data, vmin=-1.0, vmax=1.0)
# Ticks et labels
ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha="right")
ax.set_yticklabels(labels)
# Axe en haut/bas selon préférence (ici on laisse en bas)
ax.set_title("Matrice de corrélation (coef. de Pearson)")
# Barre de couleur
cbar = plt.colorbar(im, ax=ax)
cbar.set_label("Corrélation")
# Annotation des cases
if annotate:
n = data.shape[0]
for i in range(n):
for j in range(n):
if i == j:
text = ""
else:
val = data[i, j]
if np.isnan(val):
text = ""
else:
text = f"{val:.2f}"
ax.text(
j,
i,
text,
ha="center",
va="center",
)
plt.tight_layout()
plt.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_rolling_correlation_heatmap(
rolling_corr: pd.DataFrame,
output_path: str | Path,
*,
cmap: str = "coolwarm",
vmin: float = -1.0,
vmax: float = 1.0,
time_tick_count: int = 6,
) -> Path:
"""
Visualise l'évolution de corrélations glissantes pour plusieurs paires.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if rolling_corr.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Aucune donnée de corrélation glissante.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
labels = list(rolling_corr.columns)
data = rolling_corr.to_numpy().T
height = max(3.0, 0.6 * len(labels))
fig, ax = plt.subplots(figsize=(10, height))
im = ax.imshow(data, aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax)
ax.set_yticks(np.arange(len(labels)))
ax.set_yticklabels(labels)
if isinstance(rolling_corr.index, pd.DatetimeIndex):
times = rolling_corr.index
if len(times) > 1:
tick_idx = np.linspace(0, len(times) - 1, num=min(time_tick_count, len(times)), dtype=int)
else:
tick_idx = np.array([0])
tick_labels = [times[i].strftime("%Y-%m-%d\n%H:%M") for i in tick_idx]
else:
tick_idx = np.linspace(0, len(rolling_corr.index) - 1, num=min(time_tick_count, len(rolling_corr.index)), dtype=int)
tick_labels = [str(rolling_corr.index[i]) for i in tick_idx]
ax.set_xticks(tick_idx)
ax.set_xticklabels(tick_labels, rotation=30, ha="right")
ax.set_xlabel("Temps (fin de fenêtre)")
ax.set_ylabel("Paire de variables")
ax.set_title("Corrélations glissantes")
cbar = fig.colorbar(im, ax=ax)
cbar.set_label("Coefficient de corrélation")
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_event_composite(
aligned_segments: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
quantiles: tuple[float, float] = (0.25, 0.75),
baseline_label: str = "Début de l'événement",
) -> Path:
"""
Trace les moyennes/médianes autour d'événements détectés avec éventail inter-quantiles.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if aligned_segments.empty:
fig, ax = plt.subplots()
ax.text(
0.5,
0.5,
"Aucun événement aligné à tracer.",
ha="center",
va="center",
)
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
if "offset_minutes" not in aligned_segments.index.names:
raise ValueError("aligned_segments doit avoir un niveau 'offset_minutes'.")
group = aligned_segments.groupby(level="offset_minutes")
mean_df = group.mean()
median_df = group.median()
q_low, q_high = quantiles
quantile_low = group.quantile(q_low) if q_low is not None else None
quantile_high = group.quantile(q_high) if q_high is not None else None
offsets = mean_df.index.to_numpy(dtype=float)
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(10, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
for ax, var in zip(axes, variables):
col = var.column
ax.axvline(0, color="black", linestyle="--", linewidth=1, label=baseline_label)
ax.plot(offsets, mean_df[col], color="tab:blue", label="Moyenne")
ax.plot(offsets, median_df[col], color="tab:orange", linestyle="--", label="Médiane")
if quantile_low is not None and quantile_high is not None:
ax.fill_between(
offsets,
quantile_low[col],
quantile_high[col],
color="tab:blue",
alpha=0.2,
label=f"IQR {int(q_low*100)}{int(q_high*100)}%",
)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel("Minutes autour de l'événement")
axes[0].legend(loc="upper right")
total_events = len(aligned_segments.index.get_level_values("event_id").unique())
fig.suptitle(f"Composites autour d'événements ({total_events} occurrences)")
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()