Ajout de l'élévation solaire et visualisations

2025-11-17 22:20:54 +01:00
parent 268d5d65ec
commit 01cf686af3
13 changed files with 487 additions and 1 deletions
--- a/meteo/analysis.py
+++ b/meteo/analysis.py
@@ -134,6 +134,19 @@ class DiurnalCycleStats:
    quantile_high_level: float | None = None


+@dataclass
+class BinnedStatistics:
+    centers: np.ndarray
+    intervals: pd.IntervalIndex
+    counts: pd.Series
+    mean: pd.DataFrame
+    median: pd.DataFrame
+    quantile_low: pd.DataFrame | None
+    quantile_high: pd.DataFrame | None
+    quantile_low_level: float | None = None
+    quantile_high_level: float | None = None
+
+
 def compute_rolling_correlation_series(
    df: pd.DataFrame,
    var_x: Variable,
@@ -456,3 +469,93 @@ def compute_daily_rainfall_totals(
        }
    )
    return result
+
+
+def compute_binned_statistics(
+    df: pd.DataFrame,
+    *,
+    bin_source_column: str,
+    target_columns: Sequence[str],
+    bins: Sequence[float] | np.ndarray,
+    min_count: int = 30,
+    quantiles: tuple[float, float] | None = (0.25, 0.75),
+) -> BinnedStatistics:
+    """
+    Calcule des statistiques (mean/median/quantiles) pour plusieurs colonnes
+    en regroupant les données selon des intervalles définis sur une colonne source.
+    """
+    if bin_source_column not in df.columns:
+        raise KeyError(f"Colonne source absente : {bin_source_column}")
+
+    missing_targets = [col for col in target_columns if col not in df.columns]
+    if missing_targets:
+        raise KeyError(f"Colonnes cibles absentes : {missing_targets!r}")
+
+    subset_cols = [bin_source_column, *target_columns]
+    data = df[subset_cols].dropna(subset=[bin_source_column])
+
+    if data.empty:
+        empty_interval_index = pd.IntervalIndex([])
+        empty_df = pd.DataFrame(columns=target_columns)
+        empty_counts = pd.Series(dtype=int)
+        return BinnedStatistics(
+            centers=np.array([]),
+            intervals=empty_interval_index,
+            counts=empty_counts,
+            mean=empty_df,
+            median=empty_df,
+            quantile_low=None,
+            quantile_high=None,
+        )
+
+    categories = pd.cut(data[bin_source_column], bins=bins, include_lowest=True)
+    grouped = data.groupby(categories, observed=False)
+
+    counts = grouped.size()
+    valid_mask = counts >= max(1, min_count)
+    valid_intervals = counts.index[valid_mask]
+
+    if len(valid_intervals) == 0:
+        empty_interval_index = pd.IntervalIndex([])
+        empty_df = pd.DataFrame(columns=target_columns)
+        empty_counts = pd.Series(dtype=int)
+        return BinnedStatistics(
+            centers=np.array([]),
+            intervals=empty_interval_index,
+            counts=empty_counts,
+            mean=empty_df,
+            median=empty_df,
+            quantile_low=None,
+            quantile_high=None,
+        )
+
+    interval_index = pd.IntervalIndex(valid_intervals)
+
+    mean_df = grouped[target_columns].mean().loc[interval_index]
+    median_df = grouped[target_columns].median().loc[interval_index]
+
+    q_low = q_high = None
+    quantile_low_df: pd.DataFrame | None = None
+    quantile_high_df: pd.DataFrame | None = None
+
+    if quantiles is not None:
+        q_low, q_high = quantiles
+        if q_low is not None:
+            quantile_low_df = grouped[target_columns].quantile(q_low).loc[interval_index]
+        if q_high is not None:
+            quantile_high_df = grouped[target_columns].quantile(q_high).loc[interval_index]
+
+    centers = np.array([interval.mid for interval in interval_index])
+    filtered_counts = counts.loc[interval_index]
+
+    return BinnedStatistics(
+        centers=centers,
+        intervals=interval_index,
+        counts=filtered_counts,
+        mean=mean_df,
+        median=median_df,
+        quantile_low=quantile_low_df,
+        quantile_high=quantile_high_df,
+        quantile_low_level=q_low,
+        quantile_high_level=q_high,
+    )
--- a/meteo/config.py
+++ b/meteo/config.py
@@ -65,3 +65,58 @@ class InfluxSettings:
            org=org,     # type: ignore[arg-type]
            bucket=bucket,  # type: ignore[arg-type]
        )
+
+
+@dataclass(frozen=True)
+class StationLocation:
+    """
+    Décrit la position géographique de la station météo.
+    Utilisée pour les calculs astronomiques (ex: élévation du soleil).
+    """
+
+    latitude: float
+    longitude: float
+    elevation_m: float = 0.0
+
+    @classmethod
+    def from_env(cls, *, optional: bool = False) -> Self | None:
+        """
+        Charge les coordonnées GPS depuis les variables d'environnement :
+        - STATION_LATITUDE (obligatoire)
+        - STATION_LONGITUDE (obligatoire)
+        - STATION_ELEVATION (optionnelle, en mètres)
+        """
+        load_dotenv()
+
+        lat = os.getenv("STATION_LATITUDE")
+        lon = os.getenv("STATION_LONGITUDE")
+        elev = os.getenv("STATION_ELEVATION")
+
+        if not lat or not lon:
+            if optional:
+                return None
+            raise RuntimeError(
+                "Les variables STATION_LATITUDE et STATION_LONGITUDE doivent être définies "
+                "pour calculer l'élévation solaire."
+            )
+
+        try:
+            latitude = float(lat)
+            longitude = float(lon)
+            elevation = float(elev) if elev else 0.0
+        except ValueError as exc:
+            raise RuntimeError(
+                "STATION_LATITUDE / STATION_LONGITUDE / STATION_ELEVATION doivent être des nombres valides."
+            ) from exc
+
+        return cls(latitude=latitude, longitude=longitude, elevation_m=elevation)
+
+    def to_astral_observer_kwargs(self) -> dict[str, float]:
+        """
+        Prépare les arguments attendus par astral.Observer.
+        """
+        return {
+            "latitude": self.latitude,
+            "longitude": self.longitude,
+            "elevation": self.elevation_m,
+        }
--- a/meteo/plots.py
+++ b/meteo/plots.py
@@ -11,7 +11,7 @@ import matplotlib.dates as mdates
 import numpy as np
 import pandas as pd

-from .analysis import DiurnalCycleStats
+from .analysis import DiurnalCycleStats, BinnedStatistics
 from .variables import Variable


@@ -596,6 +596,100 @@ def plot_diurnal_cycle(
    return output_path.resolve()


+def plot_binned_profiles(
+    stats: BinnedStatistics,
+    variables: Sequence[Variable],
+    output_path: str | Path,
+    *,
+    xlabel: str,
+    title: str,
+    show_counts: bool = False,
+) -> Path:
+    """
+    Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
+    """
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if stats.centers.size == 0:
+        fig, ax = plt.subplots()
+        ax.text(
+            0.5,
+            0.5,
+            "Aucune donnée suffisante pour ces intervalles.",
+            ha="center",
+            va="center",
+        )
+        ax.set_axis_off()
+        fig.savefig(output_path, dpi=150, bbox_inches="tight")
+        plt.close(fig)
+        return output_path.resolve()
+
+    base_axes = len(variables)
+    total_axes = base_axes + (1 if show_counts else 0)
+    fig, axes = plt.subplots(
+        total_axes,
+        1,
+        sharex=True,
+        figsize=(10, 3 * total_axes),
+    )
+
+    if total_axes == 1:
+        axes = [axes]
+    else:
+        axes = list(axes)
+
+    x_values = stats.centers
+    bin_widths = np.array([interval.length for interval in stats.intervals])
+
+    if show_counts:
+        count_ax = axes.pop(0)
+        count_ax.bar(
+            x_values,
+            stats.counts.to_numpy(dtype=float),
+            width=bin_widths,
+            color="lightgray",
+            edgecolor="gray",
+            align="center",
+        )
+        count_ax.set_ylabel("Nombre de points")
+        count_ax.grid(True, linestyle=":", alpha=0.4)
+        count_ax.set_title("Densité des observations par bin")
+
+    for ax, var in zip(axes, variables):
+        col = var.column
+        ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
+        ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
+
+        if stats.quantile_low is not None and stats.quantile_high is not None:
+            ax.fill_between(
+                x_values,
+                stats.quantile_low[col],
+                stats.quantile_high[col],
+                color="tab:blue",
+                alpha=0.15,
+                label=(
+                    f"Quantiles {int(stats.quantile_low_level * 100)}–{int(stats.quantile_high_level * 100)}%"
+                    if stats.quantile_low_level is not None and stats.quantile_high_level is not None
+                    else "Quantiles"
+                ),
+            )
+
+        ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
+        ax.set_ylabel(ylabel)
+        ax.grid(True, linestyle=":", alpha=0.5)
+
+    axes[-1].set_xlabel(xlabel)
+    axes[0].legend(loc="upper right")
+    axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
+
+    fig.suptitle(title)
+    fig.tight_layout(rect=[0, 0, 1, 0.97])
+    fig.savefig(output_path, dpi=150)
+    plt.close(fig)
+    return output_path.resolve()
+
+
 def plot_daily_rainfall_hyetograph(
    daily_rain: pd.DataFrame,
    output_path: str | Path,
--- a/meteo/solar.py
+++ b/meteo/solar.py
@@ -0,0 +1,66 @@
+# meteo/solar.py
+from __future__ import annotations
+
+import pandas as pd
+from astral import Observer
+from astral.sun import elevation
+
+
+def _ensure_datetime_index(index: pd.Index) -> pd.DatetimeIndex:
+    if not isinstance(index, pd.DatetimeIndex):
+        raise TypeError("Un DatetimeIndex est requis pour calculer l'élévation solaire.")
+    return index
+
+
+def _prepare_index(index: pd.DatetimeIndex) -> pd.DatetimeIndex:
+    """
+    Retourne une version timezone-aware (en UTC) du DatetimeIndex fourni.
+    """
+    if index.tz is None:
+        return index.tz_localize("UTC")
+    return index.tz_convert("UTC")
+
+
+def compute_solar_elevation_series(
+    index: pd.Index,
+    *,
+    latitude: float,
+    longitude: float,
+    elevation_m: float = 0.0,
+    series_name: str = "sun_elevation",
+) -> pd.Series:
+    """
+    Calcule l'élévation du soleil (en degrés) pour chaque timestamp de l'index.
+    """
+    dt_index = _ensure_datetime_index(index)
+    observer = Observer(latitude=latitude, longitude=longitude, elevation=elevation_m)
+    utc_index = _prepare_index(dt_index)
+
+    values = [
+        float(elevation(observer, ts.to_pydatetime()))
+        for ts in utc_index
+    ]
+
+    return pd.Series(values, index=dt_index, name=series_name)
+
+
+def add_solar_elevation_column(
+    df: pd.DataFrame,
+    *,
+    latitude: float,
+    longitude: float,
+    elevation_m: float = 0.0,
+    column_name: str = "sun_elevation",
+) -> pd.DataFrame:
+    """
+    Ajoute une colonne `column_name` contenant l'élévation du soleil en degrés.
+    """
+    series = compute_solar_elevation_series(
+        df.index,
+        latitude=latitude,
+        longitude=longitude,
+        elevation_m=elevation_m,
+        series_name=column_name,
+    )
+    df[column_name] = series
+    return df
--- a/meteo/variables.py
+++ b/meteo/variables.py
@@ -65,6 +65,12 @@ VARIABLES: List[Variable] = [
        label="Direction du vent",
        unit="°",
    ),
+    Variable(
+        key="sun_elevation",
+        column="sun_elevation",
+        label="Élévation solaire",
+        unit="°",
+    ),
 ]

 VARIABLES_BY_KEY: Dict[str, Variable] = {v.key: v for v in VARIABLES}