352 lines
11 KiB
Python
352 lines
11 KiB
Python
"""Visualisations statistiques agrégées par saison, mois ou intervalles spécialisés."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import calendar
|
||
from pathlib import Path
|
||
from typing import Sequence
|
||
|
||
import matplotlib.dates as mdates
|
||
import matplotlib.pyplot as plt
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
from .base import export_plot_dataset
|
||
from meteo.analysis import BinnedStatistics, MONTH_ORDER
|
||
from meteo.season import SEASON_LABELS
|
||
from meteo.variables import Variable
|
||
|
||
__all__ = ['plot_seasonal_boxplots', 'plot_monthly_boxplots', 'plot_binned_profiles', 'plot_monthly_anomalies']
|
||
|
||
|
||
def plot_seasonal_boxplots(
|
||
df: pd.DataFrame,
|
||
variables: Sequence[Variable],
|
||
output_path: str | Path,
|
||
*,
|
||
season_column: str = "season",
|
||
season_order: Sequence[str] | None = None,
|
||
title: str | None = None,
|
||
) -> Path:
|
||
"""
|
||
Trace des boxplots par saison pour une sélection de variables.
|
||
"""
|
||
output_path = Path(output_path)
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
if season_column not in df.columns:
|
||
raise KeyError(f"Colonne saison absente : {season_column}")
|
||
|
||
available = df[season_column].dropna().unique()
|
||
if season_order is None:
|
||
season_order = [season for season in SEASON_LABELS if season in available]
|
||
else:
|
||
season_order = [season for season in season_order if season in available]
|
||
|
||
if not season_order:
|
||
fig, ax = plt.subplots()
|
||
ax.text(0.5, 0.5, "Aucune donnée saisonnière disponible.", ha="center", va="center")
|
||
ax.set_axis_off()
|
||
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||
plt.close(fig)
|
||
return output_path.resolve()
|
||
|
||
dataset_columns = [season_column] + [var.column for var in variables]
|
||
export_plot_dataset(df[dataset_columns], output_path)
|
||
|
||
n_vars = len(variables)
|
||
fig, axes = plt.subplots(n_vars, 1, figsize=(10, 3 * n_vars), sharex=True)
|
||
if n_vars == 1:
|
||
axes = [axes]
|
||
|
||
colors = plt.get_cmap("Set3")(np.linspace(0.2, 0.8, len(season_order)))
|
||
labels = [season.capitalize() for season in season_order]
|
||
|
||
for ax, var in zip(axes, variables):
|
||
data = [
|
||
df.loc[df[season_column] == season, var.column].dropna().to_numpy()
|
||
for season in season_order
|
||
]
|
||
if not any(len(arr) > 0 for arr in data):
|
||
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
|
||
ax.set_axis_off()
|
||
continue
|
||
|
||
box = ax.boxplot(
|
||
data,
|
||
labels=labels,
|
||
showfliers=False,
|
||
patch_artist=True,
|
||
)
|
||
for patch, color in zip(box["boxes"], colors):
|
||
patch.set_facecolor(color)
|
||
patch.set_alpha(0.7)
|
||
|
||
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
|
||
ax.set_ylabel(ylabel)
|
||
ax.grid(True, linestyle=":", alpha=0.5)
|
||
|
||
axes[-1].set_xlabel("Saison")
|
||
if title:
|
||
fig.suptitle(title)
|
||
fig.tight_layout(rect=[0, 0, 1, 0.95])
|
||
else:
|
||
fig.tight_layout()
|
||
fig.savefig(output_path, dpi=150)
|
||
plt.close(fig)
|
||
return output_path.resolve()
|
||
|
||
def plot_monthly_boxplots(
|
||
df: pd.DataFrame,
|
||
variables: Sequence[Variable],
|
||
output_path: str | Path,
|
||
) -> Path:
|
||
"""
|
||
Boxplots par mois (janvier → décembre) pour plusieurs variables.
|
||
"""
|
||
output_path = Path(output_path)
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
if not isinstance(df.index, pd.DatetimeIndex):
|
||
raise TypeError("plot_monthly_boxplots nécessite un DatetimeIndex.")
|
||
|
||
value_columns = [var.column for var in variables]
|
||
dataset = df[value_columns].copy()
|
||
dataset.insert(0, "month", df.index.month)
|
||
export_plot_dataset(dataset, output_path)
|
||
|
||
month_labels = [calendar.month_abbr[m].capitalize() for m in MONTH_ORDER]
|
||
n_vars = len(variables)
|
||
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 3 * n_vars), sharex=True)
|
||
if n_vars == 1:
|
||
axes = [axes]
|
||
|
||
for ax, var in zip(axes, variables):
|
||
data = [
|
||
df.loc[df.index.month == month, var.column].dropna().to_numpy()
|
||
for month in MONTH_ORDER
|
||
]
|
||
|
||
if not any(len(arr) > 0 for arr in data):
|
||
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
|
||
ax.set_axis_off()
|
||
continue
|
||
|
||
box = ax.boxplot(
|
||
data,
|
||
labels=month_labels,
|
||
showfliers=False,
|
||
patch_artist=True,
|
||
)
|
||
colors = plt.get_cmap("Spectral")(np.linspace(0.2, 0.8, len(data)))
|
||
for patch, color in zip(box["boxes"], colors):
|
||
patch.set_facecolor(color)
|
||
patch.set_alpha(0.7)
|
||
|
||
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
|
||
ax.set_ylabel(ylabel)
|
||
ax.grid(True, linestyle=":", alpha=0.5)
|
||
|
||
axes[-1].set_xlabel("Mois")
|
||
fig.suptitle("Distribution mensuelle")
|
||
fig.tight_layout(rect=[0, 0, 1, 0.97])
|
||
fig.savefig(output_path, dpi=150)
|
||
plt.close(fig)
|
||
return output_path.resolve()
|
||
|
||
def plot_binned_profiles(
|
||
stats: BinnedStatistics,
|
||
variables: Sequence[Variable],
|
||
output_path: str | Path,
|
||
*,
|
||
xlabel: str,
|
||
title: str,
|
||
show_counts: bool = False,
|
||
) -> Path:
|
||
"""
|
||
Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
|
||
"""
|
||
output_path = Path(output_path)
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
if stats.centers.size == 0:
|
||
fig, ax = plt.subplots()
|
||
ax.text(
|
||
0.5,
|
||
0.5,
|
||
"Aucune donnée suffisante pour ces intervalles.",
|
||
ha="center",
|
||
va="center",
|
||
)
|
||
ax.set_axis_off()
|
||
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||
plt.close(fig)
|
||
return output_path.resolve()
|
||
|
||
bin_summary = pd.DataFrame(
|
||
{
|
||
"bin_left": stats.intervals.left,
|
||
"bin_right": stats.intervals.right,
|
||
"center": stats.centers,
|
||
}
|
||
)
|
||
export_plot_dataset(
|
||
{
|
||
"bins": bin_summary,
|
||
"counts": stats.counts,
|
||
"mean": stats.mean,
|
||
"median": stats.median,
|
||
"quantile_low": stats.quantile_low,
|
||
"quantile_high": stats.quantile_high,
|
||
},
|
||
output_path,
|
||
)
|
||
|
||
base_axes = len(variables)
|
||
total_axes = base_axes + (1 if show_counts else 0)
|
||
fig, axes = plt.subplots(
|
||
total_axes,
|
||
1,
|
||
sharex=True,
|
||
figsize=(10, 3 * total_axes),
|
||
)
|
||
|
||
if total_axes == 1:
|
||
axes = [axes]
|
||
else:
|
||
axes = list(axes)
|
||
|
||
x_values = stats.centers
|
||
bin_widths = np.array([interval.length for interval in stats.intervals])
|
||
|
||
if show_counts:
|
||
count_ax = axes.pop(0)
|
||
count_ax.bar(
|
||
x_values,
|
||
stats.counts.to_numpy(dtype=float),
|
||
width=bin_widths,
|
||
color="lightgray",
|
||
edgecolor="gray",
|
||
align="center",
|
||
)
|
||
count_ax.set_ylabel("Nombre de points")
|
||
count_ax.grid(True, linestyle=":", alpha=0.4)
|
||
count_ax.set_title("Densité des observations par bin")
|
||
|
||
for ax, var in zip(axes, variables):
|
||
col = var.column
|
||
ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
|
||
ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
|
||
|
||
if stats.quantile_low is not None and stats.quantile_high is not None:
|
||
ax.fill_between(
|
||
x_values,
|
||
stats.quantile_low[col],
|
||
stats.quantile_high[col],
|
||
color="tab:blue",
|
||
alpha=0.15,
|
||
label=(
|
||
f"Quantiles {int(stats.quantile_low_level * 100)}–{int(stats.quantile_high_level * 100)}%"
|
||
if stats.quantile_low_level is not None and stats.quantile_high_level is not None
|
||
else "Quantiles"
|
||
),
|
||
)
|
||
|
||
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
|
||
ax.set_ylabel(ylabel)
|
||
ax.grid(True, linestyle=":", alpha=0.5)
|
||
|
||
axes[-1].set_xlabel(xlabel)
|
||
axes[0].legend(loc="upper right")
|
||
axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
|
||
|
||
fig.suptitle(title)
|
||
fig.tight_layout(rect=[0, 0, 1, 0.97])
|
||
fig.savefig(output_path, dpi=150)
|
||
plt.close(fig)
|
||
return output_path.resolve()
|
||
|
||
def plot_monthly_anomalies(
|
||
monthly_means: pd.DataFrame,
|
||
climatology: pd.DataFrame,
|
||
variables: Sequence[Variable],
|
||
output_path: str | Path,
|
||
*,
|
||
title: str = "Moyennes mensuelles vs climatologie",
|
||
) -> Path:
|
||
"""
|
||
Compare les moyennes mensuelles observées à la climatologie pour plusieurs variables.
|
||
"""
|
||
output_path = Path(output_path)
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
if monthly_means.empty or climatology.empty:
|
||
fig, ax = plt.subplots()
|
||
ax.text(0.5, 0.5, "Pas de données mensuelles disponibles.", ha="center", va="center")
|
||
ax.set_axis_off()
|
||
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||
plt.close(fig)
|
||
return output_path.resolve()
|
||
|
||
export_frames: list[pd.DataFrame] = []
|
||
|
||
n_vars = len(variables)
|
||
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 3 * n_vars), sharex=True)
|
||
if n_vars == 1:
|
||
axes = [axes]
|
||
|
||
locator = mdates.AutoDateLocator()
|
||
formatter = mdates.ConciseDateFormatter(locator)
|
||
|
||
for ax, var in zip(axes, variables):
|
||
actual = monthly_means[var.column].dropna()
|
||
if actual.empty:
|
||
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
|
||
ax.set_axis_off()
|
||
continue
|
||
|
||
months = actual.index.month
|
||
clim = climatology.loc[months, var.column].to_numpy(dtype=float)
|
||
anomaly = actual.to_numpy(dtype=float) - clim
|
||
|
||
clim_series = pd.Series(clim, index=actual.index, name="climatology")
|
||
frame = pd.DataFrame({"actual": actual, "climatology": clim_series})
|
||
frame["anomaly"] = frame["actual"] - frame["climatology"]
|
||
export_frames.append(pd.concat({var.column: frame}, axis=1))
|
||
|
||
ax.plot(actual.index, actual, color="tab:blue", label="Moyenne mensuelle")
|
||
ax.plot(actual.index, clim, color="tab:gray", linestyle="--", label="Climatologie")
|
||
ax.fill_between(
|
||
actual.index,
|
||
actual,
|
||
clim,
|
||
where=anomaly >= 0,
|
||
color="tab:blue",
|
||
alpha=0.15,
|
||
)
|
||
ax.fill_between(
|
||
actual.index,
|
||
actual,
|
||
clim,
|
||
where=anomaly < 0,
|
||
color="tab:red",
|
||
alpha=0.15,
|
||
)
|
||
|
||
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
|
||
ax.set_ylabel(ylabel)
|
||
ax.grid(True, linestyle=":", alpha=0.5)
|
||
ax.xaxis.set_major_locator(locator)
|
||
ax.xaxis.set_major_formatter(formatter)
|
||
|
||
if export_frames:
|
||
export_plot_dataset(pd.concat(export_frames, axis=1), output_path)
|
||
|
||
axes[-1].set_xlabel("Date")
|
||
axes[0].legend(loc="upper right")
|
||
fig.suptitle(title)
|
||
fig.tight_layout(rect=[0, 0, 1, 0.97])
|
||
fig.savefig(output_path, dpi=150)
|
||
plt.close(fig)
|
||
return output_path.resolve()
|