1

Réorganisation

This commit is contained in:
2025-11-19 17:01:45 +01:00
parent 566d4400ce
commit 617b12c02e
91 changed files with 874 additions and 1715 deletions

View File

@@ -0,0 +1,119 @@
"""Jeux de paires et scénarios standardisés pour explorer les corrélations."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable, Sequence
import numpy as np
HexbinReducer = Callable[[np.ndarray], float]
HEXBIN_REDUCE_FUNCTIONS: dict[str, HexbinReducer] = {
"mean": np.mean,
"median": np.median,
"max": np.max,
}
HEXBIN_REDUCE_LABELS: dict[str, str] = {
"mean": "moyenne",
"median": "médiane",
"max": "maximum",
}
@dataclass(frozen=True)
class HexbinScenario:
"""Décrit un cas d'usage pour un nuage hexbin coloré par une 3ᵉ variable."""
key_x: str
key_y: str
key_color: str
filename: str
description: str
reduce: str = "mean"
gridsize: int = 60
mincnt: int = 5
def get_reduce_func(self) -> HexbinReducer:
"""Retourne la fonction de réduction associée (moyenne, médiane, ...)."""
return HEXBIN_REDUCE_FUNCTIONS.get(self.reduce, np.mean)
def get_reduce_label(self) -> str:
"""Libellé français de la fonction de réduction utilisée."""
return HEXBIN_REDUCE_LABELS.get(self.reduce, self.reduce)
DEFAULT_LAGGED_PAIRS: Sequence[tuple[str, str]] = (
("temperature", "humidity"),
("temperature", "rain_rate"),
("pressure", "rain_rate"),
("pressure", "wind_speed"),
("pressure", "illuminance"),
("illuminance", "temperature"),
("humidity", "rain_rate"),
)
DEFAULT_ROLLING_PAIRS: Sequence[tuple[str, str]] = (
("temperature", "humidity"),
("pressure", "rain_rate"),
("pressure", "wind_speed"),
("illuminance", "temperature"),
("humidity", "rain_rate"),
)
DEFAULT_HEXBIN_SCENARIOS: Sequence[HexbinScenario] = (
HexbinScenario(
key_x="temperature",
key_y="humidity",
key_color="rain_rate",
filename="hexbin_temp_humidity_color_rain.png",
description=(
"Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
"et comment les épisodes de pluie se situent dans une bande restreinte."
),
reduce="max",
gridsize=50,
mincnt=8,
),
HexbinScenario(
key_x="pressure",
key_y="rain_rate",
key_color="wind_speed",
filename="hexbin_pressure_rain_color_wind.png",
description=(
"Vérifier si des rafales accompagnent vraiment les chutes de pression. "
"On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
),
reduce="median",
gridsize=45,
mincnt=5,
),
HexbinScenario(
key_x="illuminance",
key_y="humidity",
key_color="temperature",
filename="hexbin_lux_humidity_color_temp.png",
description=(
"Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
"mais cela n'implique pas toujours une baisse rapide de température."
),
reduce="mean",
gridsize=55,
mincnt=6,
),
)
__all__ = [
"HexbinScenario",
"DEFAULT_LAGGED_PAIRS",
"DEFAULT_ROLLING_PAIRS",
"DEFAULT_HEXBIN_SCENARIOS",
]

View File

@@ -11,6 +11,7 @@ from .rain import plot_daily_rainfall_hyetograph, plot_rainfall_by_season
from .relationships import (
plot_event_composite,
plot_hexbin_with_third_variable,
plot_pairwise_relationship_grid,
plot_scatter_pair,
)
from .seasonal_profiles import (
@@ -37,6 +38,7 @@ __all__ = [
"plot_rainfall_by_season",
"plot_event_composite",
"plot_hexbin_with_third_variable",
"plot_pairwise_relationship_grid",
"plot_scatter_pair",
"plot_daylight_hours",
"plot_diurnal_cycle",

View File

@@ -21,6 +21,16 @@ def export_plot_dataset(data: Any, output_path: str | Path, *, suffix: str = ".c
output_path = Path(output_path)
dataset_path = output_path.with_suffix(suffix)
# If the image is exported under a "figures" directory, keep the dataset in
# an equivalent "data" directory to avoid mixing assets.
parts = list(dataset_path.parts)
for idx, part in enumerate(parts):
if part == "figures":
parts[idx] = "data"
dataset_path = Path(*parts)
break
dataset_path.parent.mkdir(parents=True, exist_ok=True)
def _normalize(value: Any, *, default_name: str = "value") -> pd.DataFrame:

View File

@@ -13,7 +13,7 @@ import pandas as pd
from .base import export_plot_dataset
from meteo.variables import Variable
__all__ = ['plot_scatter_pair', 'plot_hexbin_with_third_variable', 'plot_event_composite']
__all__ = ['plot_scatter_pair', 'plot_pairwise_relationship_grid', 'plot_hexbin_with_third_variable', 'plot_event_composite']
def plot_scatter_pair(
@@ -193,6 +193,87 @@ def plot_scatter_pair(
return output_path.resolve()
def plot_pairwise_relationship_grid(
df: pd.DataFrame,
variables: Sequence[Variable],
output_path: str | Path,
*,
sample_step: int = 10,
hist_bins: int = 40,
scatter_kwargs: dict | None = None,
) -> Path:
"""Trace un tableau de nuages de points exhaustif (sans doublon)."""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if not variables:
raise ValueError("La liste de variables ne peut pas être vide.")
columns = [v.column for v in variables]
for col in columns:
if col not in df.columns:
raise KeyError(f"Colonne absente dans le DataFrame : {col}")
df_pairs = df[columns].dropna()
if df_pairs.empty:
raise RuntimeError("Aucune ligne complète pour générer les nuages de points.")
if sample_step > 1:
df_pairs = df_pairs.iloc[::sample_step, :]
export_plot_dataset(df_pairs, output_path)
n = len(variables)
fig_size = max(3.0, 1.8 * n)
fig, axes = plt.subplots(n, n, figsize=(fig_size, fig_size), squeeze=False)
default_scatter_kwargs = {"s": 5, "alpha": 0.5}
scatter_kwargs = {**default_scatter_kwargs, **(scatter_kwargs or {})}
for row_idx, var_y in enumerate(variables):
for col_idx, var_x in enumerate(variables):
ax = axes[row_idx][col_idx]
if row_idx < col_idx:
# Triangle supérieur vide pour éviter les doublons
ax.set_visible(False)
continue
if row_idx == col_idx:
series = df_pairs[var_x.column].dropna()
if series.empty:
ax.text(0.5, 0.5, "(vide)", ha="center", va="center")
ax.set_axis_off()
else:
bins = min(hist_bins, max(5, series.nunique()))
ax.hist(series, bins=bins, color="tab:blue", alpha=0.7)
ax.set_ylabel("")
else:
ax.scatter(
df_pairs[var_x.column],
df_pairs[var_y.column],
**scatter_kwargs,
)
if row_idx == n - 1:
ax.set_xlabel(var_x.label)
else:
ax.set_xticklabels([])
if col_idx == 0:
ax.set_ylabel(var_y.label)
else:
ax.set_yticklabels([])
fig.suptitle("Matrice de corrélations simples (nuages de points)")
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_hexbin_with_third_variable(
df: pd.DataFrame,
var_x: Variable,