Ajout de l'élévation solaire et visualisations

2025-11-17 22:20:54 +01:00 · 2025-11-17 22:20:54 +01:00 · 01cf686af3
commit 01cf686af3
parent 268d5d65ec
13 changed files with 487 additions and 1 deletions
--- a/.env.example
+++ b/.env.example
@ -2,3 +2,6 @@ INFLUXDB_URL=http://
 INFLUXDB_TOKEN=
 INFLUXDB_ORG=
 INFLUXDB_BUCKET=weather
 STATION_LATITUDE=
 STATION_LONGITUDE=
 STATION_ELEVATION=
--- a/données.md
+++ b/données.md
@ -0,0 +1,3 @@
 # Enrichissement du jeu de données
 - Élévation du soleil
--- a/figures/sun/hexbin_sun_elevation_vs_illuminance.png
+++ b/figures/sun/hexbin_sun_elevation_vs_illuminance.png
--- a/figures/sun/hexbin_sun_elevation_vs_temperature.png
+++ b/figures/sun/hexbin_sun_elevation_vs_temperature.png
--- a/figures/sun/sun_elevation_profiles.png
+++ b/figures/sun/sun_elevation_profiles.png
--- a/meteo/analysis.py
+++ b/meteo/analysis.py
@ -134,6 +134,19 @@ class DiurnalCycleStats:
    quantile_high_level: float | None = None
@dataclass
 class BinnedStatistics:
    centers: np.ndarray
    intervals: pd.IntervalIndex
    counts: pd.Series
    mean: pd.DataFrame
    median: pd.DataFrame
    quantile_low: pd.DataFrame | None
    quantile_high: pd.DataFrame | None
    quantile_low_level: float | None = None
    quantile_high_level: float | None = None
 def compute_rolling_correlation_series(
    df: pd.DataFrame,
    var_x: Variable,
@ -456,3 +469,93 @@ def compute_daily_rainfall_totals(
        }
    )
    return result
 def compute_binned_statistics(
    df: pd.DataFrame,
    *,
    bin_source_column: str,
    target_columns: Sequence[str],
    bins: Sequence[float] | np.ndarray,
    min_count: int = 30,
    quantiles: tuple[float, float] | None = (0.25, 0.75),
 ) -> BinnedStatistics:
    """
    Calcule des statistiques (mean/median/quantiles) pour plusieurs colonnes
    en regroupant les données selon des intervalles définis sur une colonne source.
    """
    if bin_source_column not in df.columns:
        raise KeyError(f"Colonne source absente : {bin_source_column}")
    missing_targets = [col for col in target_columns if col not in df.columns]
    if missing_targets:
        raise KeyError(f"Colonnes cibles absentes : {missing_targets!r}")
    subset_cols = [bin_source_column, *target_columns]
    data = df[subset_cols].dropna(subset=[bin_source_column])
    if data.empty:
        empty_interval_index = pd.IntervalIndex([])
        empty_df = pd.DataFrame(columns=target_columns)
        empty_counts = pd.Series(dtype=int)
        return BinnedStatistics(
            centers=np.array([]),
            intervals=empty_interval_index,
            counts=empty_counts,
            mean=empty_df,
            median=empty_df,
            quantile_low=None,
            quantile_high=None,
        )
    categories = pd.cut(data[bin_source_column], bins=bins, include_lowest=True)
    grouped = data.groupby(categories, observed=False)
    counts = grouped.size()
    valid_mask = counts >= max(1, min_count)
    valid_intervals = counts.index[valid_mask]
    if len(valid_intervals) == 0:
        empty_interval_index = pd.IntervalIndex([])
        empty_df = pd.DataFrame(columns=target_columns)
        empty_counts = pd.Series(dtype=int)
        return BinnedStatistics(
            centers=np.array([]),
            intervals=empty_interval_index,
            counts=empty_counts,
            mean=empty_df,
            median=empty_df,
            quantile_low=None,
            quantile_high=None,
        )
    interval_index = pd.IntervalIndex(valid_intervals)
    mean_df = grouped[target_columns].mean().loc[interval_index]
    median_df = grouped[target_columns].median().loc[interval_index]
    q_low = q_high = None
    quantile_low_df: pd.DataFrame | None = None
    quantile_high_df: pd.DataFrame | None = None
    if quantiles is not None:
        q_low, q_high = quantiles
        if q_low is not None:
            quantile_low_df = grouped[target_columns].quantile(q_low).loc[interval_index]
        if q_high is not None:
            quantile_high_df = grouped[target_columns].quantile(q_high).loc[interval_index]
    centers = np.array([interval.mid for interval in interval_index])
    filtered_counts = counts.loc[interval_index]
    return BinnedStatistics(
        centers=centers,
        intervals=interval_index,
        counts=filtered_counts,
        mean=mean_df,
        median=median_df,
        quantile_low=quantile_low_df,
        quantile_high=quantile_high_df,
        quantile_low_level=q_low,
        quantile_high_level=q_high,
    )
--- a/meteo/config.py
+++ b/meteo/config.py
@ -65,3 +65,58 @@ class InfluxSettings:
            org=org,     # type: ignore[arg-type]
            bucket=bucket,  # type: ignore[arg-type]
        )
@dataclass(frozen=True)
 class StationLocation:
    """
    Décrit la position géographique de la station météo.
    Utilisée pour les calculs astronomiques (ex: élévation du soleil).
    """
    latitude: float
    longitude: float
    elevation_m: float = 0.0
    @classmethod
    def from_env(cls, *, optional: bool = False) -> Self | None:
        """
        Charge les coordonnées GPS depuis les variables d'environnement :
        - STATION_LATITUDE (obligatoire)
        - STATION_LONGITUDE (obligatoire)
        - STATION_ELEVATION (optionnelle, en mètres)
        """
        load_dotenv()
        lat = os.getenv("STATION_LATITUDE")
        lon = os.getenv("STATION_LONGITUDE")
        elev = os.getenv("STATION_ELEVATION")
        if not lat or not lon:
            if optional:
                return None
            raise RuntimeError(
                "Les variables STATION_LATITUDE et STATION_LONGITUDE doivent être définies "
                "pour calculer l'élévation solaire."
            )
        try:
            latitude = float(lat)
            longitude = float(lon)
            elevation = float(elev) if elev else 0.0
        except ValueError as exc:
            raise RuntimeError(
                "STATION_LATITUDE / STATION_LONGITUDE / STATION_ELEVATION doivent être des nombres valides."
            ) from exc
        return cls(latitude=latitude, longitude=longitude, elevation_m=elevation)
    def to_astral_observer_kwargs(self) -> dict[str, float]:
        """
        Prépare les arguments attendus par astral.Observer.
        """
        return {
            "latitude": self.latitude,
            "longitude": self.longitude,
            "elevation": self.elevation_m,
        }
--- a/meteo/plots.py
+++ b/meteo/plots.py
@ -11,7 +11,7 @@ import matplotlib.dates as mdates
 import numpy as np
 import pandas as pd
-from .analysis import DiurnalCycleStats
+from .analysis import DiurnalCycleStats, BinnedStatistics
 from .variables import Variable
@ -596,6 +596,100 @@ def plot_diurnal_cycle(
    return output_path.resolve()
 def plot_binned_profiles(
    stats: BinnedStatistics,
    variables: Sequence[Variable],
    output_path: str | Path,
    *,
    xlabel: str,
    title: str,
    show_counts: bool = False,
 ) -> Path:
    """
    Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
    """
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    if stats.centers.size == 0:
        fig, ax = plt.subplots()
        ax.text(
            0.5,
            0.5,
            "Aucune donnée suffisante pour ces intervalles.",
            ha="center",
            va="center",
        )
        ax.set_axis_off()
        fig.savefig(output_path, dpi=150, bbox_inches="tight")
        plt.close(fig)
        return output_path.resolve()
    base_axes = len(variables)
    total_axes = base_axes + (1 if show_counts else 0)
    fig, axes = plt.subplots(
        total_axes,
        1,
        sharex=True,
        figsize=(10, 3 * total_axes),
    )
    if total_axes == 1:
        axes = [axes]
    else:
        axes = list(axes)
    x_values = stats.centers
    bin_widths = np.array([interval.length for interval in stats.intervals])
    if show_counts:
        count_ax = axes.pop(0)
        count_ax.bar(
            x_values,
            stats.counts.to_numpy(dtype=float),
            width=bin_widths,
            color="lightgray",
            edgecolor="gray",
            align="center",
        )
        count_ax.set_ylabel("Nombre de points")
        count_ax.grid(True, linestyle=":", alpha=0.4)
        count_ax.set_title("Densité des observations par bin")
    for ax, var in zip(axes, variables):
        col = var.column
        ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
        ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
        if stats.quantile_low is not None and stats.quantile_high is not None:
            ax.fill_between(
                x_values,
                stats.quantile_low[col],
                stats.quantile_high[col],
                color="tab:blue",
                alpha=0.15,
                label=(
                    f"Quantiles {int(stats.quantile_low_level * 100)}–{int(stats.quantile_high_level * 100)}%"
                    if stats.quantile_low_level is not None and stats.quantile_high_level is not None
                    else "Quantiles"
                ),
            )
        ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
        ax.set_ylabel(ylabel)
        ax.grid(True, linestyle=":", alpha=0.5)
    axes[-1].set_xlabel(xlabel)
    axes[0].legend(loc="upper right")
    axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
    fig.suptitle(title)
    fig.tight_layout(rect=[0, 0, 1, 0.97])
    fig.savefig(output_path, dpi=150)
    plt.close(fig)
    return output_path.resolve()
 def plot_daily_rainfall_hyetograph(
    daily_rain: pd.DataFrame,
    output_path: str | Path,
--- a/meteo/solar.py
+++ b/meteo/solar.py
@ -0,0 +1,66 @@
 # meteo/solar.py
 from __future__ import annotations
 import pandas as pd
 from astral import Observer
 from astral.sun import elevation
 def _ensure_datetime_index(index: pd.Index) -> pd.DatetimeIndex:
    if not isinstance(index, pd.DatetimeIndex):
        raise TypeError("Un DatetimeIndex est requis pour calculer l'élévation solaire.")
    return index
 def _prepare_index(index: pd.DatetimeIndex) -> pd.DatetimeIndex:
    """
    Retourne une version timezone-aware (en UTC) du DatetimeIndex fourni.
    """
    if index.tz is None:
        return index.tz_localize("UTC")
    return index.tz_convert("UTC")
 def compute_solar_elevation_series(
    index: pd.Index,
    *,
    latitude: float,
    longitude: float,
    elevation_m: float = 0.0,
    series_name: str = "sun_elevation",
 ) -> pd.Series:
    """
    Calcule l'élévation du soleil (en degrés) pour chaque timestamp de l'index.
    """
    dt_index = _ensure_datetime_index(index)
    observer = Observer(latitude=latitude, longitude=longitude, elevation=elevation_m)
    utc_index = _prepare_index(dt_index)
    values = [
        float(elevation(observer, ts.to_pydatetime()))
        for ts in utc_index
    ]
    return pd.Series(values, index=dt_index, name=series_name)
 def add_solar_elevation_column(
    df: pd.DataFrame,
    *,
    latitude: float,
    longitude: float,
    elevation_m: float = 0.0,
    column_name: str = "sun_elevation",
 ) -> pd.DataFrame:
    """
    Ajoute une colonne `column_name` contenant l'élévation du soleil en degrés.
    """
    series = compute_solar_elevation_series(
        df.index,
        latitude=latitude,
        longitude=longitude,
        elevation_m=elevation_m,
        series_name=column_name,
    )
    df[column_name] = series
    return df
--- a/meteo/variables.py
+++ b/meteo/variables.py
@ -65,6 +65,12 @@ VARIABLES: List[Variable] = [
        label="Direction du vent",
        unit="°",
    ),
    Variable(
        key="sun_elevation",
        column="sun_elevation",
        label="Élévation solaire",
        unit="°",
    ),
 ]
 VARIABLES_BY_KEY: Dict[str, Variable] = {v.key: v for v in VARIABLES}
--- a/requirements.txt
+++ b/requirements.txt
@ -9,6 +9,9 @@ numpy
 matplotlib
 seaborn
 # Astronomie / position du soleil
 astral
 # Modèles statistiques / ML
 scikit-learn
 statsmodels
--- a/scripts/make_minutely_dataset.py
+++ b/scripts/make_minutely_dataset.py
@ -4,6 +4,8 @@ from __future__ import annotations
 from pathlib import Path
 from meteo.dataset import load_raw_csv, resample_to_minutes
 from meteo.config import StationLocation
 from meteo.solar import add_solar_elevation_column
 FORMATTED_CSV_PATH = Path("data/weather_filled_1s.csv")
@ -23,6 +25,29 @@ def main() -> None:
    df_min = resample_to_minutes(df_1s)
    print(f"Après resampling 60s : {len(df_min)} lignes")
    try:
        location = StationLocation.from_env(optional=True)
    except RuntimeError as exc:
        print(f"⚠ Coordonnées GPS invalides : {exc}")
        location = None
    if location is not None:
        print(
            f"Ajout de l'élévation solaire (lat={location.latitude}, lon={location.longitude}, "
            f"alt={location.elevation_m} m)..."
        )
        add_solar_elevation_column(
            df_min,
            latitude=location.latitude,
            longitude=location.longitude,
            elevation_m=location.elevation_m,
        )
    else:
        print(
            "ℹ Coordonnées GPS non définies (STATION_LATITUDE / STATION_LONGITUDE). "
            "La colonne sun_elevation ne sera pas ajoutée."
        )
    OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
    df_min.to_csv(OUTPUT_CSV_PATH, index_label="time")
    print(f"✔ Dataset minuté écrit dans : {OUTPUT_CSV_PATH.resolve()}")
--- a/scripts/plot_sun_elevation_relationships.py
+++ b/scripts/plot_sun_elevation_relationships.py
@ -0,0 +1,128 @@
 # scripts/plot_sun_elevation_relationships.py
 from __future__ import annotations
 from pathlib import Path
 import numpy as np
 from meteo.dataset import load_raw_csv
 from meteo.variables import VARIABLES_BY_KEY
 from meteo.analysis import compute_binned_statistics
 from meteo.plots import plot_binned_profiles, plot_hexbin_with_third_variable
 from meteo.config import StationLocation
 from meteo.solar import add_solar_elevation_column
 CSV_PATH = Path("data/weather_minutely.csv")
 OUTPUT_DIR = Path("figures/sun")
 def ensure_sun_elevation(df):
    if "sun_elevation" in df.columns:
        return df
    print("ℹ La colonne 'sun_elevation' est absente, tentative de calcul à la volée.")
    location = StationLocation.from_env(optional=True)
    if location is None:
        print(
            "⚠ Impossible de calculer l'élévation solaire : définissez STATION_LATITUDE et STATION_LONGITUDE "
            "puis regénérez le dataset (scripts/make_minutely_dataset)."
        )
        return None
    print(
        f"→ Calcul d'élévation solaire avec lat={location.latitude}, lon={location.longitude}, "
        f"alt={location.elevation_m} m."
    )
    add_solar_elevation_column(
        df,
        latitude=location.latitude,
        longitude=location.longitude,
        elevation_m=location.elevation_m,
    )
    return df
 def main() -> None:
    if not CSV_PATH.exists():
        print(f"⚠ Fichier introuvable : {CSV_PATH}")
        return
    df = load_raw_csv(CSV_PATH)
    print(f"Dataset minuté chargé : {CSV_PATH}")
    print(f"  Lignes   : {len(df)}")
    print(f"  Colonnes : {list(df.columns)}")
    print()
    df = ensure_sun_elevation(df)
    if df is None or "sun_elevation" not in df.columns:
        return
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    profile_keys = ["temperature", "humidity", "illuminance"]
    profile_vars = [VARIABLES_BY_KEY[key] for key in profile_keys]
    bins = np.arange(-90, 95, 5)  # bins de 5°
    stats = compute_binned_statistics(
        df=df,
        bin_source_column="sun_elevation",
        target_columns=[v.column for v in profile_vars],
        bins=bins,
        min_count=100,
        quantiles=(0.2, 0.8),
    )
    profile_output = OUTPUT_DIR / "sun_elevation_profiles.png"
    plot_binned_profiles(
        stats=stats,
        variables=profile_vars,
        output_path=profile_output,
        xlabel="Élévation solaire (°)",
        title="Profils moyens en fonction de l'élévation solaire",
        show_counts=True,
    )
    print(f"✔ Profils sun vs variables : {profile_output}")
    hexbin_scenarios = [
        {
            "x": "sun_elevation",
            "y": "illuminance",
            "color": "temperature",
            "filename": "hexbin_sun_elevation_vs_illuminance.png",
            "description": "Illuminance en fonction de l'élévation du soleil, couleur = température.",
        },
        {
            "x": "sun_elevation",
            "y": "temperature",
            "color": "humidity",
            "filename": "hexbin_sun_elevation_vs_temperature.png",
            "description": "Température en fonction de l'élévation, couleur = humidité relative.",
        },
    ]
    for scenario in hexbin_scenarios:
        var_x = VARIABLES_BY_KEY[scenario["x"]]
        var_y = VARIABLES_BY_KEY[scenario["y"]]
        var_color = VARIABLES_BY_KEY[scenario["color"]]
        output_path = OUTPUT_DIR / scenario["filename"]
        print(f"→ {scenario['description']}")
        plot_hexbin_with_third_variable(
            df=df,
            var_x=var_x,
            var_y=var_y,
            var_color=var_color,
            output_path=output_path,
            gridsize=60,
            mincnt=10,
            reduce_func_label="moyenne",
            cmap="cividis",
        )
        print(f"   ✔ Hexbin enregistré : {output_path}")
    print("✔ Tous les graphiques liés à l'élévation solaire ont été produits.")
 if __name__ == "__main__":
    main()
		`@ -0,0 +1,3 @@`
							`# Enrichissement du jeu de données`

							`- Élévation du soleil`