1

Ajout de l'élévation solaire et visualisations

This commit is contained in:
2025-11-17 22:20:54 +01:00
parent 268d5d65ec
commit 01cf686af3
13 changed files with 487 additions and 1 deletions

View File

@@ -134,6 +134,19 @@ class DiurnalCycleStats:
quantile_high_level: float | None = None
@dataclass
class BinnedStatistics:
centers: np.ndarray
intervals: pd.IntervalIndex
counts: pd.Series
mean: pd.DataFrame
median: pd.DataFrame
quantile_low: pd.DataFrame | None
quantile_high: pd.DataFrame | None
quantile_low_level: float | None = None
quantile_high_level: float | None = None
def compute_rolling_correlation_series(
df: pd.DataFrame,
var_x: Variable,
@@ -456,3 +469,93 @@ def compute_daily_rainfall_totals(
}
)
return result
def compute_binned_statistics(
df: pd.DataFrame,
*,
bin_source_column: str,
target_columns: Sequence[str],
bins: Sequence[float] | np.ndarray,
min_count: int = 30,
quantiles: tuple[float, float] | None = (0.25, 0.75),
) -> BinnedStatistics:
"""
Calcule des statistiques (mean/median/quantiles) pour plusieurs colonnes
en regroupant les données selon des intervalles définis sur une colonne source.
"""
if bin_source_column not in df.columns:
raise KeyError(f"Colonne source absente : {bin_source_column}")
missing_targets = [col for col in target_columns if col not in df.columns]
if missing_targets:
raise KeyError(f"Colonnes cibles absentes : {missing_targets!r}")
subset_cols = [bin_source_column, *target_columns]
data = df[subset_cols].dropna(subset=[bin_source_column])
if data.empty:
empty_interval_index = pd.IntervalIndex([])
empty_df = pd.DataFrame(columns=target_columns)
empty_counts = pd.Series(dtype=int)
return BinnedStatistics(
centers=np.array([]),
intervals=empty_interval_index,
counts=empty_counts,
mean=empty_df,
median=empty_df,
quantile_low=None,
quantile_high=None,
)
categories = pd.cut(data[bin_source_column], bins=bins, include_lowest=True)
grouped = data.groupby(categories, observed=False)
counts = grouped.size()
valid_mask = counts >= max(1, min_count)
valid_intervals = counts.index[valid_mask]
if len(valid_intervals) == 0:
empty_interval_index = pd.IntervalIndex([])
empty_df = pd.DataFrame(columns=target_columns)
empty_counts = pd.Series(dtype=int)
return BinnedStatistics(
centers=np.array([]),
intervals=empty_interval_index,
counts=empty_counts,
mean=empty_df,
median=empty_df,
quantile_low=None,
quantile_high=None,
)
interval_index = pd.IntervalIndex(valid_intervals)
mean_df = grouped[target_columns].mean().loc[interval_index]
median_df = grouped[target_columns].median().loc[interval_index]
q_low = q_high = None
quantile_low_df: pd.DataFrame | None = None
quantile_high_df: pd.DataFrame | None = None
if quantiles is not None:
q_low, q_high = quantiles
if q_low is not None:
quantile_low_df = grouped[target_columns].quantile(q_low).loc[interval_index]
if q_high is not None:
quantile_high_df = grouped[target_columns].quantile(q_high).loc[interval_index]
centers = np.array([interval.mid for interval in interval_index])
filtered_counts = counts.loc[interval_index]
return BinnedStatistics(
centers=centers,
intervals=interval_index,
counts=filtered_counts,
mean=mean_df,
median=median_df,
quantile_low=quantile_low_df,
quantile_high=quantile_high_df,
quantile_low_level=q_low,
quantile_high_level=q_high,
)

View File

@@ -65,3 +65,58 @@ class InfluxSettings:
org=org, # type: ignore[arg-type]
bucket=bucket, # type: ignore[arg-type]
)
@dataclass(frozen=True)
class StationLocation:
"""
Décrit la position géographique de la station météo.
Utilisée pour les calculs astronomiques (ex: élévation du soleil).
"""
latitude: float
longitude: float
elevation_m: float = 0.0
@classmethod
def from_env(cls, *, optional: bool = False) -> Self | None:
"""
Charge les coordonnées GPS depuis les variables d'environnement :
- STATION_LATITUDE (obligatoire)
- STATION_LONGITUDE (obligatoire)
- STATION_ELEVATION (optionnelle, en mètres)
"""
load_dotenv()
lat = os.getenv("STATION_LATITUDE")
lon = os.getenv("STATION_LONGITUDE")
elev = os.getenv("STATION_ELEVATION")
if not lat or not lon:
if optional:
return None
raise RuntimeError(
"Les variables STATION_LATITUDE et STATION_LONGITUDE doivent être définies "
"pour calculer l'élévation solaire."
)
try:
latitude = float(lat)
longitude = float(lon)
elevation = float(elev) if elev else 0.0
except ValueError as exc:
raise RuntimeError(
"STATION_LATITUDE / STATION_LONGITUDE / STATION_ELEVATION doivent être des nombres valides."
) from exc
return cls(latitude=latitude, longitude=longitude, elevation_m=elevation)
def to_astral_observer_kwargs(self) -> dict[str, float]:
"""
Prépare les arguments attendus par astral.Observer.
"""
return {
"latitude": self.latitude,
"longitude": self.longitude,
"elevation": self.elevation_m,
}

View File

@@ -11,7 +11,7 @@ import matplotlib.dates as mdates
import numpy as np
import pandas as pd
from .analysis import DiurnalCycleStats
from .analysis import DiurnalCycleStats, BinnedStatistics
from .variables import Variable
@@ -596,6 +596,100 @@ def plot_diurnal_cycle(
return output_path.resolve()
def plot_binned_profiles(
stats: BinnedStatistics,
variables: Sequence[Variable],
output_path: str | Path,
*,
xlabel: str,
title: str,
show_counts: bool = False,
) -> Path:
"""
Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if stats.centers.size == 0:
fig, ax = plt.subplots()
ax.text(
0.5,
0.5,
"Aucune donnée suffisante pour ces intervalles.",
ha="center",
va="center",
)
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
base_axes = len(variables)
total_axes = base_axes + (1 if show_counts else 0)
fig, axes = plt.subplots(
total_axes,
1,
sharex=True,
figsize=(10, 3 * total_axes),
)
if total_axes == 1:
axes = [axes]
else:
axes = list(axes)
x_values = stats.centers
bin_widths = np.array([interval.length for interval in stats.intervals])
if show_counts:
count_ax = axes.pop(0)
count_ax.bar(
x_values,
stats.counts.to_numpy(dtype=float),
width=bin_widths,
color="lightgray",
edgecolor="gray",
align="center",
)
count_ax.set_ylabel("Nombre de points")
count_ax.grid(True, linestyle=":", alpha=0.4)
count_ax.set_title("Densité des observations par bin")
for ax, var in zip(axes, variables):
col = var.column
ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
if stats.quantile_low is not None and stats.quantile_high is not None:
ax.fill_between(
x_values,
stats.quantile_low[col],
stats.quantile_high[col],
color="tab:blue",
alpha=0.15,
label=(
f"Quantiles {int(stats.quantile_low_level * 100)}{int(stats.quantile_high_level * 100)}%"
if stats.quantile_low_level is not None and stats.quantile_high_level is not None
else "Quantiles"
),
)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel(xlabel)
axes[0].legend(loc="upper right")
axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_daily_rainfall_hyetograph(
daily_rain: pd.DataFrame,
output_path: str | Path,

66
meteo/solar.py Normal file
View File

@@ -0,0 +1,66 @@
# meteo/solar.py
from __future__ import annotations
import pandas as pd
from astral import Observer
from astral.sun import elevation
def _ensure_datetime_index(index: pd.Index) -> pd.DatetimeIndex:
if not isinstance(index, pd.DatetimeIndex):
raise TypeError("Un DatetimeIndex est requis pour calculer l'élévation solaire.")
return index
def _prepare_index(index: pd.DatetimeIndex) -> pd.DatetimeIndex:
"""
Retourne une version timezone-aware (en UTC) du DatetimeIndex fourni.
"""
if index.tz is None:
return index.tz_localize("UTC")
return index.tz_convert("UTC")
def compute_solar_elevation_series(
index: pd.Index,
*,
latitude: float,
longitude: float,
elevation_m: float = 0.0,
series_name: str = "sun_elevation",
) -> pd.Series:
"""
Calcule l'élévation du soleil (en degrés) pour chaque timestamp de l'index.
"""
dt_index = _ensure_datetime_index(index)
observer = Observer(latitude=latitude, longitude=longitude, elevation=elevation_m)
utc_index = _prepare_index(dt_index)
values = [
float(elevation(observer, ts.to_pydatetime()))
for ts in utc_index
]
return pd.Series(values, index=dt_index, name=series_name)
def add_solar_elevation_column(
df: pd.DataFrame,
*,
latitude: float,
longitude: float,
elevation_m: float = 0.0,
column_name: str = "sun_elevation",
) -> pd.DataFrame:
"""
Ajoute une colonne `column_name` contenant l'élévation du soleil en degrés.
"""
series = compute_solar_elevation_series(
df.index,
latitude=latitude,
longitude=longitude,
elevation_m=elevation_m,
series_name=column_name,
)
df[column_name] = series
return df

View File

@@ -65,6 +65,12 @@ VARIABLES: List[Variable] = [
label="Direction du vent",
unit="°",
),
Variable(
key="sun_elevation",
column="sun_elevation",
label="Élévation solaire",
unit="°",
),
]
VARIABLES_BY_KEY: Dict[str, Variable] = {v.key: v for v in VARIABLES}