1

Visualisations étendues (illuminance, calendriers, vent)

This commit is contained in:
2025-11-17 22:50:27 +01:00
parent 45b6beac98
commit 70c9d31eb9
25 changed files with 917 additions and 3 deletions

View File

@@ -10,6 +10,8 @@ import pandas as pd
from .variables import Variable
from .season import SEASON_LABELS
MONTH_ORDER = list(range(1, 13))
def compute_correlation_matrix(
df: pd.DataFrame,
@@ -599,3 +601,145 @@ def compute_rainfall_by_season(
order = [season for season in SEASON_LABELS if season in agg.index]
agg = agg.loc[order]
return agg
def filter_by_condition(
df: pd.DataFrame,
*,
condition: pd.Series,
) -> pd.DataFrame:
"""
Renvoie une copie filtrée du DataFrame selon une condition booleenne alignée.
"""
mask = condition.reindex(df.index)
mask = mask.fillna(False)
return df.loc[mask]
def compute_monthly_climatology(
df: pd.DataFrame,
*,
columns: Sequence[str],
) -> pd.DataFrame:
"""
Moyenne par mois (112) pour les colonnes fournies.
"""
_ensure_datetime_index(df)
missing = [col for col in columns if col not in df.columns]
if missing:
raise KeyError(f"Colonnes absentes : {missing}")
grouped = df[list(columns)].groupby(df.index.month).mean()
grouped = grouped.reindex(MONTH_ORDER)
grouped.index.name = "month"
return grouped
def compute_monthly_means(
df: pd.DataFrame,
*,
columns: Sequence[str],
) -> pd.DataFrame:
"""
Moyennes calendaire par mois (indexé sur la fin de mois).
"""
_ensure_datetime_index(df)
missing = [col for col in columns if col not in df.columns]
if missing:
raise KeyError(f"Colonnes absentes : {missing}")
monthly = df[list(columns)].resample("1ME").mean()
return monthly.dropna(how="all")
def compute_seasonal_hourly_profile(
df: pd.DataFrame,
*,
value_column: str,
season_column: str = "season",
) -> pd.DataFrame:
"""
Retourne une matrice (heures x saisons) contenant la moyenne d'une variable.
"""
_ensure_datetime_index(df)
for col in (value_column, season_column):
if col not in df.columns:
raise KeyError(f"Colonne absente : {col}")
subset = df[[value_column, season_column]].dropna()
if subset.empty:
return pd.DataFrame(index=range(24))
grouped = subset.groupby([season_column, subset.index.hour])[value_column].mean()
pivot = grouped.unstack(season_column)
pivot = pivot.reindex(index=range(24))
order = [season for season in SEASON_LABELS if season in pivot.columns]
if order:
pivot = pivot[order]
pivot.index.name = "hour"
return pivot
def compute_monthly_daylight_hours(
df: pd.DataFrame,
*,
illuminance_column: str = "illuminance",
threshold_lux: float = 1000.0,
) -> pd.Series:
"""
Calcule la durée moyenne de luminosité (> threshold_lux) par mois (en heures par jour).
"""
_ensure_datetime_index(df)
if illuminance_column not in df.columns:
raise KeyError(f"Colonne absente : {illuminance_column}")
subset = df[[illuminance_column]].dropna()
if subset.empty:
return pd.Series(dtype=float)
time_step = _infer_time_step(subset.index)
hours_per_step = time_step.total_seconds() / 3600.0
daylight_flag = (subset[illuminance_column] >= threshold_lux).astype(float)
daylight_hours = daylight_flag * hours_per_step
daily_hours = daylight_hours.resample("1D").sum()
monthly_avg = daily_hours.resample("1ME").mean()
return monthly_avg.dropna()
def compute_mean_wind_components(
df: pd.DataFrame,
*,
freq: str = "1M",
) -> pd.DataFrame:
"""
Calcule les composantes zonale (u) et méridienne (v) du vent pour une fréquence donnée.
Retourne également la vitesse moyenne.
"""
if "wind_speed" not in df.columns or "wind_direction" not in df.columns:
raise KeyError("Les colonnes 'wind_speed' et 'wind_direction' sont requises.")
_ensure_datetime_index(df)
subset = df[["wind_speed", "wind_direction"]].dropna()
if subset.empty:
return pd.DataFrame(columns=["u", "v", "speed"])
radians = np.deg2rad(subset["wind_direction"].to_numpy(dtype=float))
speed = subset["wind_speed"].to_numpy(dtype=float)
u = speed * np.sin(radians) * -1 # composante est-ouest (positive vers l'est)
v = speed * np.cos(radians) * -1 # composante nord-sud (positive vers le nord)
vector_df = pd.DataFrame(
{
"u": u,
"v": v,
"speed": speed,
},
index=subset.index,
)
actual_freq = "1ME" if freq == "1M" else freq
grouped = vector_df.resample(actual_freq).mean()
return grouped.dropna(how="all")

View File

@@ -1,6 +1,7 @@
# meteo/plots.py
from __future__ import annotations
import calendar
from pathlib import Path
from typing import Callable, Sequence
@@ -11,7 +12,7 @@ import matplotlib.dates as mdates
import numpy as np
import pandas as pd
from .analysis import DiurnalCycleStats, BinnedStatistics
from .analysis import DiurnalCycleStats, BinnedStatistics, MONTH_ORDER
from .season import SEASON_LABELS
from .variables import Variable
@@ -672,6 +673,60 @@ def plot_seasonal_boxplots(
return output_path.resolve()
def plot_monthly_boxplots(
df: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
) -> Path:
"""
Boxplots par mois (janvier → décembre) pour plusieurs variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if not isinstance(df.index, pd.DatetimeIndex):
raise TypeError("plot_monthly_boxplots nécessite un DatetimeIndex.")
month_labels = [calendar.month_abbr[m].capitalize() for m in MONTH_ORDER]
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
for ax, var in zip(axes, variables):
data = [
df.loc[df.index.month == month, var.column].dropna().to_numpy()
for month in MONTH_ORDER
]
if not any(len(arr) > 0 for arr in data):
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
ax.set_axis_off()
continue
box = ax.boxplot(
data,
labels=month_labels,
showfliers=False,
patch_artist=True,
)
colors = plt.get_cmap("Spectral")(np.linspace(0.2, 0.8, len(data)))
for patch, color in zip(box["boxes"], colors):
patch.set_facecolor(color)
patch.set_alpha(0.7)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel("Mois")
fig.suptitle("Distribution mensuelle")
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_binned_profiles(
stats: BinnedStatistics,
variables: Sequence[Variable],
@@ -889,3 +944,301 @@ def plot_rainfall_by_season(
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_monthly_anomalies(
monthly_means: pd.DataFrame,
climatology: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
title: str = "Moyennes mensuelles vs climatologie",
) -> Path:
"""
Compare les moyennes mensuelles observées à la climatologie pour plusieurs variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if monthly_means.empty or climatology.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de données mensuelles disponibles.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
for ax, var in zip(axes, variables):
actual = monthly_means[var.column].dropna()
if actual.empty:
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
ax.set_axis_off()
continue
months = actual.index.month
clim = climatology.loc[months, var.column].to_numpy(dtype=float)
anomaly = actual.to_numpy(dtype=float) - clim
ax.plot(actual.index, actual, color="tab:blue", label="Moyenne mensuelle")
ax.plot(actual.index, clim, color="tab:gray", linestyle="--", label="Climatologie")
ax.fill_between(
actual.index,
actual,
clim,
where=anomaly >= 0,
color="tab:blue",
alpha=0.15,
)
ax.fill_between(
actual.index,
actual,
clim,
where=anomaly < 0,
color="tab:red",
alpha=0.15,
)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
axes[-1].set_xlabel("Date")
axes[0].legend(loc="upper right")
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_wind_vector_series(
vector_df: pd.DataFrame,
output_path: str | Path,
*,
title: str = "Vecteurs moyens du vent",
) -> Path:
"""
Représente les composantes moyennes du vent sous forme de flèches (u/v).
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if vector_df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de données de vent.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
times = vector_df.index
x = mdates.date2num(times)
u = vector_df["u"].to_numpy(dtype=float)
v = vector_df["v"].to_numpy(dtype=float)
speed = vector_df["speed"].to_numpy(dtype=float)
fig, ax = plt.subplots(figsize=(12, 4))
q = ax.quiver(
x,
np.zeros_like(x),
u,
v,
speed,
angles="xy",
scale_units="xy",
scale=1,
cmap="viridis",
)
ax.axhline(0, color="black", linewidth=0.5)
ax.set_ylim(-max(abs(v)) * 1.2 if np.any(v) else -1, max(abs(v)) * 1.2 if np.any(v) else 1)
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(ax.xaxis.get_major_locator()))
ax.set_ylabel("Composante nord (v)")
ax.set_xlabel("Date")
ax.set_title(title)
cbar = fig.colorbar(q, ax=ax)
cbar.set_label("Vitesse moyenne (km/h)")
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_calendar_heatmap(
matrix: pd.DataFrame,
output_path: str | Path,
*,
title: str,
cmap: str = "YlGnBu",
colorbar_label: str = "",
) -> Path:
"""
Affiche une heatmap calendrier (lignes = mois, colonnes = jours).
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if matrix.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de données pour la heatmap.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
fig, ax = plt.subplots(figsize=(14, 6))
data = matrix.to_numpy(dtype=float)
im = ax.imshow(data, aspect="auto", cmap=cmap, interpolation="nearest")
ax.set_xticks(np.arange(matrix.shape[1]))
ax.set_xticklabels(matrix.columns, rotation=90)
ax.set_yticks(np.arange(matrix.shape[0]))
ax.set_yticklabels(matrix.index)
ax.set_xlabel("Jour du mois")
ax.set_ylabel("Mois")
ax.set_title(title)
cbar = fig.colorbar(im, ax=ax)
if colorbar_label:
cbar.set_label(colorbar_label)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_weekday_profiles(
weekday_df: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
title: str,
) -> Path:
"""
Affiche les moyennes par jour de semaine pour plusieurs variables.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if weekday_df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de données hebdomadaires.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
weekday_labels = ["Lun", "Mar", "Mer", "Jeu", "Ven", "Sam", "Dim"]
n_vars = len(variables)
fig, axes = plt.subplots(n_vars, 1, figsize=(10, 3 * n_vars), sharex=True)
if n_vars == 1:
axes = [axes]
x = np.arange(len(weekday_labels))
for ax, var in zip(axes, variables):
if var.column not in weekday_df.columns:
ax.text(0.5, 0.5, f"Aucune donnée pour {var.label}.", ha="center", va="center")
ax.set_axis_off()
continue
values = weekday_df[var.column].to_numpy(dtype=float)
ax.plot(x, values, marker="o", label=var.label)
ax.set_ylabel(f"{var.label} ({var.unit})" if var.unit else var.label)
ax.grid(True, linestyle=":", alpha=0.5)
ax.set_xticks(x)
ax.set_xticklabels(weekday_labels)
axes[-1].set_xlabel("Jour de semaine")
axes[0].legend(loc="upper right")
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_seasonal_hourly_profiles(
profile_df: pd.DataFrame,
output_path: str | Path,
*,
title: str,
ylabel: str,
) -> Path:
"""
Courbes moyennes par heure pour chaque saison.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if profile_df.empty or profile_df.isna().all().all():
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de profil saisonnier disponible.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
hours = profile_df.index.to_numpy(dtype=float)
fig, ax = plt.subplots(figsize=(10, 4))
colors = plt.get_cmap("turbo")(np.linspace(0.1, 0.9, profile_df.shape[1]))
for color, season in zip(colors, profile_df.columns):
ax.plot(hours, profile_df[season], label=season.capitalize(), color=color)
ax.set_xlabel("Heure locale")
ax.set_ylabel(ylabel)
ax.set_title(title)
ax.grid(True, linestyle=":", alpha=0.5)
ax.legend()
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_daylight_hours(
monthly_series: pd.Series,
output_path: str | Path,
*,
title: str = "Durée moyenne de luminosité (> seuil)",
) -> Path:
"""
Représente la durée moyenne quotidienne de luminosité par mois.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if monthly_series.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Pas de données sur la luminosité.", ha="center", va="center")
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
months = monthly_series.index
fig, ax = plt.subplots(figsize=(10, 4))
ax.bar(months, monthly_series.values, color="goldenrod", alpha=0.8)
ax.set_ylabel("Heures de luminosité par jour")
ax.set_xlabel("Mois")
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(ax.xaxis.get_major_locator()))
ax.set_title(title)
ax.grid(True, axis="y", linestyle=":", alpha=0.5)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()