1
donnees_meteo/meteo/plots/seasonal_stats.py
2025-11-18 09:01:34 +01:00

352 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Visualisations statistiques agrégées par saison, mois ou intervalles spécialisés."""
from __future__ import annotations
import calendar
from pathlib import Path
from typing import Sequence
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from .base import export_plot_dataset
from meteo.analysis import BinnedStatistics, MONTH_ORDER
from meteo.season import SEASON_LABELS
from meteo.variables import Variable
__all__ = ['plot_seasonal_boxplots', 'plot_monthly_boxplots', 'plot_binned_profiles', 'plot_monthly_anomalies']
def plot_seasonal_boxplots(
df: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
season_column: str = "season",
season_order: Sequence[str] | None = None,
title: str | None = None,
) -> Path:
"""
Trace des boxplots par saison pour une sélection de variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if season_column not in df.columns:
raise KeyError(f"Colonne saison absente : {season_column}")
available = df[season_column].dropna().unique()
if season_order is None:
season_order = [season for season in SEASON_LABELS if season in available]
else:
season_order = [season for season in season_order if season in available]
if not season_order:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Aucune donnée saisonnière disponible.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
dataset_columns = [season_column] + [var.column for var in variables]
export_plot_dataset(df[dataset_columns], output_path)
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(10, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
colors = plt.get_cmap("Set3")(np.linspace(0.2, 0.8, len(season_order)))
labels = [season.capitalize() for season in season_order]
for ax, var in zip(axes, variables):
data = [
df.loc[df[season_column] == season, var.column].dropna().to_numpy()
for season in season_order
]
if not any(len(arr) > 0 for arr in data):
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
ax.set_axis_off()
continue
box = ax.boxplot(
data,
labels=labels,
showfliers=False,
patch_artist=True,
)
for patch, color in zip(box["boxes"], colors):
patch.set_facecolor(color)
patch.set_alpha(0.7)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel("Saison")
if title:
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.95])
else:
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_monthly_boxplots(
df: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
) -> Path:
"""
Boxplots par mois (janvier → décembre) pour plusieurs variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if not isinstance(df.index, pd.DatetimeIndex):
raise TypeError("plot_monthly_boxplots nécessite un DatetimeIndex.")
value_columns = [var.column for var in variables]
dataset = df[value_columns].copy()
dataset.insert(0, "month", df.index.month)
export_plot_dataset(dataset, output_path)
month_labels = [calendar.month_abbr[m].capitalize() for m in MONTH_ORDER]
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
for ax, var in zip(axes, variables):
data = [
df.loc[df.index.month == month, var.column].dropna().to_numpy()
for month in MONTH_ORDER
]
if not any(len(arr) > 0 for arr in data):
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
ax.set_axis_off()
continue
box = ax.boxplot(
data,
labels=month_labels,
showfliers=False,
patch_artist=True,
)
colors = plt.get_cmap("Spectral")(np.linspace(0.2, 0.8, len(data)))
for patch, color in zip(box["boxes"], colors):
patch.set_facecolor(color)
patch.set_alpha(0.7)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel("Mois")
fig.suptitle("Distribution mensuelle")
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_binned_profiles(
stats: BinnedStatistics,
variables: Sequence[Variable],
output_path: str | Path,
*,
xlabel: str,
title: str,
show_counts: bool = False,
) -> Path:
"""
Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if stats.centers.size == 0:
fig, ax = plt.subplots()
ax.text(
0.5,
0.5,
"Aucune donnée suffisante pour ces intervalles.",
ha="center",
va="center",
)
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
bin_summary = pd.DataFrame(
{
"bin_left": stats.intervals.left,
"bin_right": stats.intervals.right,
"center": stats.centers,
}
)
export_plot_dataset(
{
"bins": bin_summary,
"counts": stats.counts,
"mean": stats.mean,
"median": stats.median,
"quantile_low": stats.quantile_low,
"quantile_high": stats.quantile_high,
},
output_path,
)
base_axes = len(variables)
total_axes = base_axes + (1 if show_counts else 0)
fig, axes = plt.subplots(
total_axes,
1,
sharex=True,
figsize=(10, 3 * total_axes),
)
if total_axes == 1:
axes = [axes]
else:
axes = list(axes)
x_values = stats.centers
bin_widths = np.array([interval.length for interval in stats.intervals])
if show_counts:
count_ax = axes.pop(0)
count_ax.bar(
x_values,
stats.counts.to_numpy(dtype=float),
width=bin_widths,
color="lightgray",
edgecolor="gray",
align="center",
)
count_ax.set_ylabel("Nombre de points")
count_ax.grid(True, linestyle=":", alpha=0.4)
count_ax.set_title("Densité des observations par bin")
for ax, var in zip(axes, variables):
col = var.column
ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
if stats.quantile_low is not None and stats.quantile_high is not None:
ax.fill_between(
x_values,
stats.quantile_low[col],
stats.quantile_high[col],
color="tab:blue",
alpha=0.15,
label=(
f"Quantiles {int(stats.quantile_low_level * 100)}{int(stats.quantile_high_level * 100)}%"
if stats.quantile_low_level is not None and stats.quantile_high_level is not None
else "Quantiles"
),
)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel(xlabel)
axes[0].legend(loc="upper right")
axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_monthly_anomalies(
monthly_means: pd.DataFrame,
climatology: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
title: str = "Moyennes mensuelles vs climatologie",
) -> Path:
"""
Compare les moyennes mensuelles observées à la climatologie pour plusieurs variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if monthly_means.empty or climatology.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de données mensuelles disponibles.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
export_frames: list[pd.DataFrame] = []
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
for ax, var in zip(axes, variables):
actual = monthly_means[var.column].dropna()
if actual.empty:
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
ax.set_axis_off()
continue
months = actual.index.month
clim = climatology.loc[months, var.column].to_numpy(dtype=float)
anomaly = actual.to_numpy(dtype=float) - clim
clim_series = pd.Series(clim, index=actual.index, name="climatology")
frame = pd.DataFrame({"actual": actual, "climatology": clim_series})
frame["anomaly"] = frame["actual"] - frame["climatology"]
export_frames.append(pd.concat({var.column: frame}, axis=1))
ax.plot(actual.index, actual, color="tab:blue", label="Moyenne mensuelle")
ax.plot(actual.index, clim, color="tab:gray", linestyle="--", label="Climatologie")
ax.fill_between(
actual.index,
actual,
clim,
where=anomaly >= 0,
color="tab:blue",
alpha=0.15,
)
ax.fill_between(
actual.index,
actual,
clim,
where=anomaly < 0,
color="tab:red",
alpha=0.15,
)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
if export_frames:
export_plot_dataset(pd.concat(export_frames, axis=1), output_path)
axes[-1].set_xlabel("Date")
axes[0].legend(loc="upper right")
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()