1

Ajout de l'élévation solaire et visualisations

This commit is contained in:
Richard Dern 2025-11-17 22:20:54 +01:00
parent 268d5d65ec
commit 01cf686af3
13 changed files with 487 additions and 1 deletions

View File

@ -2,3 +2,6 @@ INFLUXDB_URL=http://
INFLUXDB_TOKEN= INFLUXDB_TOKEN=
INFLUXDB_ORG= INFLUXDB_ORG=
INFLUXDB_BUCKET=weather INFLUXDB_BUCKET=weather
STATION_LATITUDE=
STATION_LONGITUDE=
STATION_ELEVATION=

View File

@ -0,0 +1,3 @@
# Enrichissement du jeu de données
- Élévation du soleil

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 195 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

View File

@ -134,6 +134,19 @@ class DiurnalCycleStats:
quantile_high_level: float | None = None quantile_high_level: float | None = None
@dataclass
class BinnedStatistics:
centers: np.ndarray
intervals: pd.IntervalIndex
counts: pd.Series
mean: pd.DataFrame
median: pd.DataFrame
quantile_low: pd.DataFrame | None
quantile_high: pd.DataFrame | None
quantile_low_level: float | None = None
quantile_high_level: float | None = None
def compute_rolling_correlation_series( def compute_rolling_correlation_series(
df: pd.DataFrame, df: pd.DataFrame,
var_x: Variable, var_x: Variable,
@ -456,3 +469,93 @@ def compute_daily_rainfall_totals(
} }
) )
return result return result
def compute_binned_statistics(
df: pd.DataFrame,
*,
bin_source_column: str,
target_columns: Sequence[str],
bins: Sequence[float] | np.ndarray,
min_count: int = 30,
quantiles: tuple[float, float] | None = (0.25, 0.75),
) -> BinnedStatistics:
"""
Calcule des statistiques (mean/median/quantiles) pour plusieurs colonnes
en regroupant les données selon des intervalles définis sur une colonne source.
"""
if bin_source_column not in df.columns:
raise KeyError(f"Colonne source absente : {bin_source_column}")
missing_targets = [col for col in target_columns if col not in df.columns]
if missing_targets:
raise KeyError(f"Colonnes cibles absentes : {missing_targets!r}")
subset_cols = [bin_source_column, *target_columns]
data = df[subset_cols].dropna(subset=[bin_source_column])
if data.empty:
empty_interval_index = pd.IntervalIndex([])
empty_df = pd.DataFrame(columns=target_columns)
empty_counts = pd.Series(dtype=int)
return BinnedStatistics(
centers=np.array([]),
intervals=empty_interval_index,
counts=empty_counts,
mean=empty_df,
median=empty_df,
quantile_low=None,
quantile_high=None,
)
categories = pd.cut(data[bin_source_column], bins=bins, include_lowest=True)
grouped = data.groupby(categories, observed=False)
counts = grouped.size()
valid_mask = counts >= max(1, min_count)
valid_intervals = counts.index[valid_mask]
if len(valid_intervals) == 0:
empty_interval_index = pd.IntervalIndex([])
empty_df = pd.DataFrame(columns=target_columns)
empty_counts = pd.Series(dtype=int)
return BinnedStatistics(
centers=np.array([]),
intervals=empty_interval_index,
counts=empty_counts,
mean=empty_df,
median=empty_df,
quantile_low=None,
quantile_high=None,
)
interval_index = pd.IntervalIndex(valid_intervals)
mean_df = grouped[target_columns].mean().loc[interval_index]
median_df = grouped[target_columns].median().loc[interval_index]
q_low = q_high = None
quantile_low_df: pd.DataFrame | None = None
quantile_high_df: pd.DataFrame | None = None
if quantiles is not None:
q_low, q_high = quantiles
if q_low is not None:
quantile_low_df = grouped[target_columns].quantile(q_low).loc[interval_index]
if q_high is not None:
quantile_high_df = grouped[target_columns].quantile(q_high).loc[interval_index]
centers = np.array([interval.mid for interval in interval_index])
filtered_counts = counts.loc[interval_index]
return BinnedStatistics(
centers=centers,
intervals=interval_index,
counts=filtered_counts,
mean=mean_df,
median=median_df,
quantile_low=quantile_low_df,
quantile_high=quantile_high_df,
quantile_low_level=q_low,
quantile_high_level=q_high,
)

View File

@ -65,3 +65,58 @@ class InfluxSettings:
org=org, # type: ignore[arg-type] org=org, # type: ignore[arg-type]
bucket=bucket, # type: ignore[arg-type] bucket=bucket, # type: ignore[arg-type]
) )
@dataclass(frozen=True)
class StationLocation:
"""
Décrit la position géographique de la station météo.
Utilisée pour les calculs astronomiques (ex: élévation du soleil).
"""
latitude: float
longitude: float
elevation_m: float = 0.0
@classmethod
def from_env(cls, *, optional: bool = False) -> Self | None:
"""
Charge les coordonnées GPS depuis les variables d'environnement :
- STATION_LATITUDE (obligatoire)
- STATION_LONGITUDE (obligatoire)
- STATION_ELEVATION (optionnelle, en mètres)
"""
load_dotenv()
lat = os.getenv("STATION_LATITUDE")
lon = os.getenv("STATION_LONGITUDE")
elev = os.getenv("STATION_ELEVATION")
if not lat or not lon:
if optional:
return None
raise RuntimeError(
"Les variables STATION_LATITUDE et STATION_LONGITUDE doivent être définies "
"pour calculer l'élévation solaire."
)
try:
latitude = float(lat)
longitude = float(lon)
elevation = float(elev) if elev else 0.0
except ValueError as exc:
raise RuntimeError(
"STATION_LATITUDE / STATION_LONGITUDE / STATION_ELEVATION doivent être des nombres valides."
) from exc
return cls(latitude=latitude, longitude=longitude, elevation_m=elevation)
def to_astral_observer_kwargs(self) -> dict[str, float]:
"""
Prépare les arguments attendus par astral.Observer.
"""
return {
"latitude": self.latitude,
"longitude": self.longitude,
"elevation": self.elevation_m,
}

View File

@ -11,7 +11,7 @@ import matplotlib.dates as mdates
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from .analysis import DiurnalCycleStats from .analysis import DiurnalCycleStats, BinnedStatistics
from .variables import Variable from .variables import Variable
@ -596,6 +596,100 @@ def plot_diurnal_cycle(
return output_path.resolve() return output_path.resolve()
def plot_binned_profiles(
stats: BinnedStatistics,
variables: Sequence[Variable],
output_path: str | Path,
*,
xlabel: str,
title: str,
show_counts: bool = False,
) -> Path:
"""
Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
"""
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
if stats.centers.size == 0:
fig, ax = plt.subplots()
ax.text(
0.5,
0.5,
"Aucune donnée suffisante pour ces intervalles.",
ha="center",
va="center",
)
ax.set_axis_off()
fig.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close(fig)
return output_path.resolve()
base_axes = len(variables)
total_axes = base_axes + (1 if show_counts else 0)
fig, axes = plt.subplots(
total_axes,
1,
sharex=True,
figsize=(10, 3 * total_axes),
)
if total_axes == 1:
axes = [axes]
else:
axes = list(axes)
x_values = stats.centers
bin_widths = np.array([interval.length for interval in stats.intervals])
if show_counts:
count_ax = axes.pop(0)
count_ax.bar(
x_values,
stats.counts.to_numpy(dtype=float),
width=bin_widths,
color="lightgray",
edgecolor="gray",
align="center",
)
count_ax.set_ylabel("Nombre de points")
count_ax.grid(True, linestyle=":", alpha=0.4)
count_ax.set_title("Densité des observations par bin")
for ax, var in zip(axes, variables):
col = var.column
ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
if stats.quantile_low is not None and stats.quantile_high is not None:
ax.fill_between(
x_values,
stats.quantile_low[col],
stats.quantile_high[col],
color="tab:blue",
alpha=0.15,
label=(
f"Quantiles {int(stats.quantile_low_level * 100)}{int(stats.quantile_high_level * 100)}%"
if stats.quantile_low_level is not None and stats.quantile_high_level is not None
else "Quantiles"
),
)
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
ax.set_ylabel(ylabel)
ax.grid(True, linestyle=":", alpha=0.5)
axes[-1].set_xlabel(xlabel)
axes[0].legend(loc="upper right")
axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
fig.suptitle(title)
fig.tight_layout(rect=[0, 0, 1, 0.97])
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_daily_rainfall_hyetograph( def plot_daily_rainfall_hyetograph(
daily_rain: pd.DataFrame, daily_rain: pd.DataFrame,
output_path: str | Path, output_path: str | Path,

66
meteo/solar.py Normal file
View File

@ -0,0 +1,66 @@
# meteo/solar.py
from __future__ import annotations
import pandas as pd
from astral import Observer
from astral.sun import elevation
def _ensure_datetime_index(index: pd.Index) -> pd.DatetimeIndex:
if not isinstance(index, pd.DatetimeIndex):
raise TypeError("Un DatetimeIndex est requis pour calculer l'élévation solaire.")
return index
def _prepare_index(index: pd.DatetimeIndex) -> pd.DatetimeIndex:
"""
Retourne une version timezone-aware (en UTC) du DatetimeIndex fourni.
"""
if index.tz is None:
return index.tz_localize("UTC")
return index.tz_convert("UTC")
def compute_solar_elevation_series(
index: pd.Index,
*,
latitude: float,
longitude: float,
elevation_m: float = 0.0,
series_name: str = "sun_elevation",
) -> pd.Series:
"""
Calcule l'élévation du soleil (en degrés) pour chaque timestamp de l'index.
"""
dt_index = _ensure_datetime_index(index)
observer = Observer(latitude=latitude, longitude=longitude, elevation=elevation_m)
utc_index = _prepare_index(dt_index)
values = [
float(elevation(observer, ts.to_pydatetime()))
for ts in utc_index
]
return pd.Series(values, index=dt_index, name=series_name)
def add_solar_elevation_column(
df: pd.DataFrame,
*,
latitude: float,
longitude: float,
elevation_m: float = 0.0,
column_name: str = "sun_elevation",
) -> pd.DataFrame:
"""
Ajoute une colonne `column_name` contenant l'élévation du soleil en degrés.
"""
series = compute_solar_elevation_series(
df.index,
latitude=latitude,
longitude=longitude,
elevation_m=elevation_m,
series_name=column_name,
)
df[column_name] = series
return df

View File

@ -65,6 +65,12 @@ VARIABLES: List[Variable] = [
label="Direction du vent", label="Direction du vent",
unit="°", unit="°",
), ),
Variable(
key="sun_elevation",
column="sun_elevation",
label="Élévation solaire",
unit="°",
),
] ]
VARIABLES_BY_KEY: Dict[str, Variable] = {v.key: v for v in VARIABLES} VARIABLES_BY_KEY: Dict[str, Variable] = {v.key: v for v in VARIABLES}

View File

@ -9,6 +9,9 @@ numpy
matplotlib matplotlib
seaborn seaborn
# Astronomie / position du soleil
astral
# Modèles statistiques / ML # Modèles statistiques / ML
scikit-learn scikit-learn
statsmodels statsmodels

View File

@ -4,6 +4,8 @@ from __future__ import annotations
from pathlib import Path from pathlib import Path
from meteo.dataset import load_raw_csv, resample_to_minutes from meteo.dataset import load_raw_csv, resample_to_minutes
from meteo.config import StationLocation
from meteo.solar import add_solar_elevation_column
FORMATTED_CSV_PATH = Path("data/weather_filled_1s.csv") FORMATTED_CSV_PATH = Path("data/weather_filled_1s.csv")
@ -23,6 +25,29 @@ def main() -> None:
df_min = resample_to_minutes(df_1s) df_min = resample_to_minutes(df_1s)
print(f"Après resampling 60s : {len(df_min)} lignes") print(f"Après resampling 60s : {len(df_min)} lignes")
try:
location = StationLocation.from_env(optional=True)
except RuntimeError as exc:
print(f"⚠ Coordonnées GPS invalides : {exc}")
location = None
if location is not None:
print(
f"Ajout de l'élévation solaire (lat={location.latitude}, lon={location.longitude}, "
f"alt={location.elevation_m} m)..."
)
add_solar_elevation_column(
df_min,
latitude=location.latitude,
longitude=location.longitude,
elevation_m=location.elevation_m,
)
else:
print(
" Coordonnées GPS non définies (STATION_LATITUDE / STATION_LONGITUDE). "
"La colonne sun_elevation ne sera pas ajoutée."
)
OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True) OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
df_min.to_csv(OUTPUT_CSV_PATH, index_label="time") df_min.to_csv(OUTPUT_CSV_PATH, index_label="time")
print(f"✔ Dataset minuté écrit dans : {OUTPUT_CSV_PATH.resolve()}") print(f"✔ Dataset minuté écrit dans : {OUTPUT_CSV_PATH.resolve()}")

View File

@ -0,0 +1,128 @@
# scripts/plot_sun_elevation_relationships.py
from __future__ import annotations
from pathlib import Path
import numpy as np
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.analysis import compute_binned_statistics
from meteo.plots import plot_binned_profiles, plot_hexbin_with_third_variable
from meteo.config import StationLocation
from meteo.solar import add_solar_elevation_column
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_DIR = Path("figures/sun")
def ensure_sun_elevation(df):
if "sun_elevation" in df.columns:
return df
print(" La colonne 'sun_elevation' est absente, tentative de calcul à la volée.")
location = StationLocation.from_env(optional=True)
if location is None:
print(
"⚠ Impossible de calculer l'élévation solaire : définissez STATION_LATITUDE et STATION_LONGITUDE "
"puis regénérez le dataset (scripts/make_minutely_dataset)."
)
return None
print(
f"→ Calcul d'élévation solaire avec lat={location.latitude}, lon={location.longitude}, "
f"alt={location.elevation_m} m."
)
add_solar_elevation_column(
df,
latitude=location.latitude,
longitude=location.longitude,
elevation_m=location.elevation_m,
)
return df
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
df = ensure_sun_elevation(df)
if df is None or "sun_elevation" not in df.columns:
return
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
profile_keys = ["temperature", "humidity", "illuminance"]
profile_vars = [VARIABLES_BY_KEY[key] for key in profile_keys]
bins = np.arange(-90, 95, 5) # bins de 5°
stats = compute_binned_statistics(
df=df,
bin_source_column="sun_elevation",
target_columns=[v.column for v in profile_vars],
bins=bins,
min_count=100,
quantiles=(0.2, 0.8),
)
profile_output = OUTPUT_DIR / "sun_elevation_profiles.png"
plot_binned_profiles(
stats=stats,
variables=profile_vars,
output_path=profile_output,
xlabel="Élévation solaire (°)",
title="Profils moyens en fonction de l'élévation solaire",
show_counts=True,
)
print(f"✔ Profils sun vs variables : {profile_output}")
hexbin_scenarios = [
{
"x": "sun_elevation",
"y": "illuminance",
"color": "temperature",
"filename": "hexbin_sun_elevation_vs_illuminance.png",
"description": "Illuminance en fonction de l'élévation du soleil, couleur = température.",
},
{
"x": "sun_elevation",
"y": "temperature",
"color": "humidity",
"filename": "hexbin_sun_elevation_vs_temperature.png",
"description": "Température en fonction de l'élévation, couleur = humidité relative.",
},
]
for scenario in hexbin_scenarios:
var_x = VARIABLES_BY_KEY[scenario["x"]]
var_y = VARIABLES_BY_KEY[scenario["y"]]
var_color = VARIABLES_BY_KEY[scenario["color"]]
output_path = OUTPUT_DIR / scenario["filename"]
print(f"{scenario['description']}")
plot_hexbin_with_third_variable(
df=df,
var_x=var_x,
var_y=var_y,
var_color=var_color,
output_path=output_path,
gridsize=60,
mincnt=10,
reduce_func_label="moyenne",
cmap="cividis",
)
print(f" ✔ Hexbin enregistré : {output_path}")
print("✔ Tous les graphiques liés à l'élévation solaire ont été produits.")
if __name__ == "__main__":
main()