Ajout de l'élévation solaire et visualisations
This commit is contained in:
parent
268d5d65ec
commit
01cf686af3
@ -2,3 +2,6 @@ INFLUXDB_URL=http://
|
||||
INFLUXDB_TOKEN=
|
||||
INFLUXDB_ORG=
|
||||
INFLUXDB_BUCKET=weather
|
||||
STATION_LATITUDE=
|
||||
STATION_LONGITUDE=
|
||||
STATION_ELEVATION=
|
||||
3
docs/08 - Enrichissement du jeu de données.md
Normal file
3
docs/08 - Enrichissement du jeu de données.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Enrichissement du jeu de données
|
||||
|
||||
- Élévation du soleil
|
||||
BIN
figures/sun/hexbin_sun_elevation_vs_illuminance.png
Normal file
BIN
figures/sun/hexbin_sun_elevation_vs_illuminance.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 107 KiB |
BIN
figures/sun/hexbin_sun_elevation_vs_temperature.png
Normal file
BIN
figures/sun/hexbin_sun_elevation_vs_temperature.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 195 KiB |
BIN
figures/sun/sun_elevation_profiles.png
Normal file
BIN
figures/sun/sun_elevation_profiles.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 224 KiB |
@ -134,6 +134,19 @@ class DiurnalCycleStats:
|
||||
quantile_high_level: float | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BinnedStatistics:
|
||||
centers: np.ndarray
|
||||
intervals: pd.IntervalIndex
|
||||
counts: pd.Series
|
||||
mean: pd.DataFrame
|
||||
median: pd.DataFrame
|
||||
quantile_low: pd.DataFrame | None
|
||||
quantile_high: pd.DataFrame | None
|
||||
quantile_low_level: float | None = None
|
||||
quantile_high_level: float | None = None
|
||||
|
||||
|
||||
def compute_rolling_correlation_series(
|
||||
df: pd.DataFrame,
|
||||
var_x: Variable,
|
||||
@ -456,3 +469,93 @@ def compute_daily_rainfall_totals(
|
||||
}
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def compute_binned_statistics(
|
||||
df: pd.DataFrame,
|
||||
*,
|
||||
bin_source_column: str,
|
||||
target_columns: Sequence[str],
|
||||
bins: Sequence[float] | np.ndarray,
|
||||
min_count: int = 30,
|
||||
quantiles: tuple[float, float] | None = (0.25, 0.75),
|
||||
) -> BinnedStatistics:
|
||||
"""
|
||||
Calcule des statistiques (mean/median/quantiles) pour plusieurs colonnes
|
||||
en regroupant les données selon des intervalles définis sur une colonne source.
|
||||
"""
|
||||
if bin_source_column not in df.columns:
|
||||
raise KeyError(f"Colonne source absente : {bin_source_column}")
|
||||
|
||||
missing_targets = [col for col in target_columns if col not in df.columns]
|
||||
if missing_targets:
|
||||
raise KeyError(f"Colonnes cibles absentes : {missing_targets!r}")
|
||||
|
||||
subset_cols = [bin_source_column, *target_columns]
|
||||
data = df[subset_cols].dropna(subset=[bin_source_column])
|
||||
|
||||
if data.empty:
|
||||
empty_interval_index = pd.IntervalIndex([])
|
||||
empty_df = pd.DataFrame(columns=target_columns)
|
||||
empty_counts = pd.Series(dtype=int)
|
||||
return BinnedStatistics(
|
||||
centers=np.array([]),
|
||||
intervals=empty_interval_index,
|
||||
counts=empty_counts,
|
||||
mean=empty_df,
|
||||
median=empty_df,
|
||||
quantile_low=None,
|
||||
quantile_high=None,
|
||||
)
|
||||
|
||||
categories = pd.cut(data[bin_source_column], bins=bins, include_lowest=True)
|
||||
grouped = data.groupby(categories, observed=False)
|
||||
|
||||
counts = grouped.size()
|
||||
valid_mask = counts >= max(1, min_count)
|
||||
valid_intervals = counts.index[valid_mask]
|
||||
|
||||
if len(valid_intervals) == 0:
|
||||
empty_interval_index = pd.IntervalIndex([])
|
||||
empty_df = pd.DataFrame(columns=target_columns)
|
||||
empty_counts = pd.Series(dtype=int)
|
||||
return BinnedStatistics(
|
||||
centers=np.array([]),
|
||||
intervals=empty_interval_index,
|
||||
counts=empty_counts,
|
||||
mean=empty_df,
|
||||
median=empty_df,
|
||||
quantile_low=None,
|
||||
quantile_high=None,
|
||||
)
|
||||
|
||||
interval_index = pd.IntervalIndex(valid_intervals)
|
||||
|
||||
mean_df = grouped[target_columns].mean().loc[interval_index]
|
||||
median_df = grouped[target_columns].median().loc[interval_index]
|
||||
|
||||
q_low = q_high = None
|
||||
quantile_low_df: pd.DataFrame | None = None
|
||||
quantile_high_df: pd.DataFrame | None = None
|
||||
|
||||
if quantiles is not None:
|
||||
q_low, q_high = quantiles
|
||||
if q_low is not None:
|
||||
quantile_low_df = grouped[target_columns].quantile(q_low).loc[interval_index]
|
||||
if q_high is not None:
|
||||
quantile_high_df = grouped[target_columns].quantile(q_high).loc[interval_index]
|
||||
|
||||
centers = np.array([interval.mid for interval in interval_index])
|
||||
filtered_counts = counts.loc[interval_index]
|
||||
|
||||
return BinnedStatistics(
|
||||
centers=centers,
|
||||
intervals=interval_index,
|
||||
counts=filtered_counts,
|
||||
mean=mean_df,
|
||||
median=median_df,
|
||||
quantile_low=quantile_low_df,
|
||||
quantile_high=quantile_high_df,
|
||||
quantile_low_level=q_low,
|
||||
quantile_high_level=q_high,
|
||||
)
|
||||
|
||||
@ -65,3 +65,58 @@ class InfluxSettings:
|
||||
org=org, # type: ignore[arg-type]
|
||||
bucket=bucket, # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StationLocation:
|
||||
"""
|
||||
Décrit la position géographique de la station météo.
|
||||
Utilisée pour les calculs astronomiques (ex: élévation du soleil).
|
||||
"""
|
||||
|
||||
latitude: float
|
||||
longitude: float
|
||||
elevation_m: float = 0.0
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, *, optional: bool = False) -> Self | None:
|
||||
"""
|
||||
Charge les coordonnées GPS depuis les variables d'environnement :
|
||||
- STATION_LATITUDE (obligatoire)
|
||||
- STATION_LONGITUDE (obligatoire)
|
||||
- STATION_ELEVATION (optionnelle, en mètres)
|
||||
"""
|
||||
load_dotenv()
|
||||
|
||||
lat = os.getenv("STATION_LATITUDE")
|
||||
lon = os.getenv("STATION_LONGITUDE")
|
||||
elev = os.getenv("STATION_ELEVATION")
|
||||
|
||||
if not lat or not lon:
|
||||
if optional:
|
||||
return None
|
||||
raise RuntimeError(
|
||||
"Les variables STATION_LATITUDE et STATION_LONGITUDE doivent être définies "
|
||||
"pour calculer l'élévation solaire."
|
||||
)
|
||||
|
||||
try:
|
||||
latitude = float(lat)
|
||||
longitude = float(lon)
|
||||
elevation = float(elev) if elev else 0.0
|
||||
except ValueError as exc:
|
||||
raise RuntimeError(
|
||||
"STATION_LATITUDE / STATION_LONGITUDE / STATION_ELEVATION doivent être des nombres valides."
|
||||
) from exc
|
||||
|
||||
return cls(latitude=latitude, longitude=longitude, elevation_m=elevation)
|
||||
|
||||
def to_astral_observer_kwargs(self) -> dict[str, float]:
|
||||
"""
|
||||
Prépare les arguments attendus par astral.Observer.
|
||||
"""
|
||||
return {
|
||||
"latitude": self.latitude,
|
||||
"longitude": self.longitude,
|
||||
"elevation": self.elevation_m,
|
||||
}
|
||||
|
||||
@ -11,7 +11,7 @@ import matplotlib.dates as mdates
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .analysis import DiurnalCycleStats
|
||||
from .analysis import DiurnalCycleStats, BinnedStatistics
|
||||
from .variables import Variable
|
||||
|
||||
|
||||
@ -596,6 +596,100 @@ def plot_diurnal_cycle(
|
||||
return output_path.resolve()
|
||||
|
||||
|
||||
def plot_binned_profiles(
|
||||
stats: BinnedStatistics,
|
||||
variables: Sequence[Variable],
|
||||
output_path: str | Path,
|
||||
*,
|
||||
xlabel: str,
|
||||
title: str,
|
||||
show_counts: bool = False,
|
||||
) -> Path:
|
||||
"""
|
||||
Trace les statistiques agrégées d'une ou plusieurs variables en fonction de bins.
|
||||
"""
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if stats.centers.size == 0:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(
|
||||
0.5,
|
||||
0.5,
|
||||
"Aucune donnée suffisante pour ces intervalles.",
|
||||
ha="center",
|
||||
va="center",
|
||||
)
|
||||
ax.set_axis_off()
|
||||
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
return output_path.resolve()
|
||||
|
||||
base_axes = len(variables)
|
||||
total_axes = base_axes + (1 if show_counts else 0)
|
||||
fig, axes = plt.subplots(
|
||||
total_axes,
|
||||
1,
|
||||
sharex=True,
|
||||
figsize=(10, 3 * total_axes),
|
||||
)
|
||||
|
||||
if total_axes == 1:
|
||||
axes = [axes]
|
||||
else:
|
||||
axes = list(axes)
|
||||
|
||||
x_values = stats.centers
|
||||
bin_widths = np.array([interval.length for interval in stats.intervals])
|
||||
|
||||
if show_counts:
|
||||
count_ax = axes.pop(0)
|
||||
count_ax.bar(
|
||||
x_values,
|
||||
stats.counts.to_numpy(dtype=float),
|
||||
width=bin_widths,
|
||||
color="lightgray",
|
||||
edgecolor="gray",
|
||||
align="center",
|
||||
)
|
||||
count_ax.set_ylabel("Nombre de points")
|
||||
count_ax.grid(True, linestyle=":", alpha=0.4)
|
||||
count_ax.set_title("Densité des observations par bin")
|
||||
|
||||
for ax, var in zip(axes, variables):
|
||||
col = var.column
|
||||
ax.plot(x_values, stats.mean[col], color="tab:blue", label="Moyenne")
|
||||
ax.plot(x_values, stats.median[col], color="tab:orange", linestyle="--", label="Médiane")
|
||||
|
||||
if stats.quantile_low is not None and stats.quantile_high is not None:
|
||||
ax.fill_between(
|
||||
x_values,
|
||||
stats.quantile_low[col],
|
||||
stats.quantile_high[col],
|
||||
color="tab:blue",
|
||||
alpha=0.15,
|
||||
label=(
|
||||
f"Quantiles {int(stats.quantile_low_level * 100)}–{int(stats.quantile_high_level * 100)}%"
|
||||
if stats.quantile_low_level is not None and stats.quantile_high_level is not None
|
||||
else "Quantiles"
|
||||
),
|
||||
)
|
||||
|
||||
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
|
||||
ax.set_ylabel(ylabel)
|
||||
ax.grid(True, linestyle=":", alpha=0.5)
|
||||
|
||||
axes[-1].set_xlabel(xlabel)
|
||||
axes[0].legend(loc="upper right")
|
||||
axes[-1].set_xlim(stats.intervals.left.min(), stats.intervals.right.max())
|
||||
|
||||
fig.suptitle(title)
|
||||
fig.tight_layout(rect=[0, 0, 1, 0.97])
|
||||
fig.savefig(output_path, dpi=150)
|
||||
plt.close(fig)
|
||||
return output_path.resolve()
|
||||
|
||||
|
||||
def plot_daily_rainfall_hyetograph(
|
||||
daily_rain: pd.DataFrame,
|
||||
output_path: str | Path,
|
||||
|
||||
66
meteo/solar.py
Normal file
66
meteo/solar.py
Normal file
@ -0,0 +1,66 @@
|
||||
# meteo/solar.py
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
from astral import Observer
|
||||
from astral.sun import elevation
|
||||
|
||||
|
||||
def _ensure_datetime_index(index: pd.Index) -> pd.DatetimeIndex:
|
||||
if not isinstance(index, pd.DatetimeIndex):
|
||||
raise TypeError("Un DatetimeIndex est requis pour calculer l'élévation solaire.")
|
||||
return index
|
||||
|
||||
|
||||
def _prepare_index(index: pd.DatetimeIndex) -> pd.DatetimeIndex:
|
||||
"""
|
||||
Retourne une version timezone-aware (en UTC) du DatetimeIndex fourni.
|
||||
"""
|
||||
if index.tz is None:
|
||||
return index.tz_localize("UTC")
|
||||
return index.tz_convert("UTC")
|
||||
|
||||
|
||||
def compute_solar_elevation_series(
|
||||
index: pd.Index,
|
||||
*,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
elevation_m: float = 0.0,
|
||||
series_name: str = "sun_elevation",
|
||||
) -> pd.Series:
|
||||
"""
|
||||
Calcule l'élévation du soleil (en degrés) pour chaque timestamp de l'index.
|
||||
"""
|
||||
dt_index = _ensure_datetime_index(index)
|
||||
observer = Observer(latitude=latitude, longitude=longitude, elevation=elevation_m)
|
||||
utc_index = _prepare_index(dt_index)
|
||||
|
||||
values = [
|
||||
float(elevation(observer, ts.to_pydatetime()))
|
||||
for ts in utc_index
|
||||
]
|
||||
|
||||
return pd.Series(values, index=dt_index, name=series_name)
|
||||
|
||||
|
||||
def add_solar_elevation_column(
|
||||
df: pd.DataFrame,
|
||||
*,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
elevation_m: float = 0.0,
|
||||
column_name: str = "sun_elevation",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Ajoute une colonne `column_name` contenant l'élévation du soleil en degrés.
|
||||
"""
|
||||
series = compute_solar_elevation_series(
|
||||
df.index,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
elevation_m=elevation_m,
|
||||
series_name=column_name,
|
||||
)
|
||||
df[column_name] = series
|
||||
return df
|
||||
@ -65,6 +65,12 @@ VARIABLES: List[Variable] = [
|
||||
label="Direction du vent",
|
||||
unit="°",
|
||||
),
|
||||
Variable(
|
||||
key="sun_elevation",
|
||||
column="sun_elevation",
|
||||
label="Élévation solaire",
|
||||
unit="°",
|
||||
),
|
||||
]
|
||||
|
||||
VARIABLES_BY_KEY: Dict[str, Variable] = {v.key: v for v in VARIABLES}
|
||||
|
||||
@ -9,6 +9,9 @@ numpy
|
||||
matplotlib
|
||||
seaborn
|
||||
|
||||
# Astronomie / position du soleil
|
||||
astral
|
||||
|
||||
# Modèles statistiques / ML
|
||||
scikit-learn
|
||||
statsmodels
|
||||
|
||||
@ -4,6 +4,8 @@ from __future__ import annotations
|
||||
from pathlib import Path
|
||||
|
||||
from meteo.dataset import load_raw_csv, resample_to_minutes
|
||||
from meteo.config import StationLocation
|
||||
from meteo.solar import add_solar_elevation_column
|
||||
|
||||
|
||||
FORMATTED_CSV_PATH = Path("data/weather_filled_1s.csv")
|
||||
@ -23,6 +25,29 @@ def main() -> None:
|
||||
df_min = resample_to_minutes(df_1s)
|
||||
print(f"Après resampling 60s : {len(df_min)} lignes")
|
||||
|
||||
try:
|
||||
location = StationLocation.from_env(optional=True)
|
||||
except RuntimeError as exc:
|
||||
print(f"⚠ Coordonnées GPS invalides : {exc}")
|
||||
location = None
|
||||
|
||||
if location is not None:
|
||||
print(
|
||||
f"Ajout de l'élévation solaire (lat={location.latitude}, lon={location.longitude}, "
|
||||
f"alt={location.elevation_m} m)..."
|
||||
)
|
||||
add_solar_elevation_column(
|
||||
df_min,
|
||||
latitude=location.latitude,
|
||||
longitude=location.longitude,
|
||||
elevation_m=location.elevation_m,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"ℹ Coordonnées GPS non définies (STATION_LATITUDE / STATION_LONGITUDE). "
|
||||
"La colonne sun_elevation ne sera pas ajoutée."
|
||||
)
|
||||
|
||||
OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
df_min.to_csv(OUTPUT_CSV_PATH, index_label="time")
|
||||
print(f"✔ Dataset minuté écrit dans : {OUTPUT_CSV_PATH.resolve()}")
|
||||
|
||||
128
scripts/plot_sun_elevation_relationships.py
Normal file
128
scripts/plot_sun_elevation_relationships.py
Normal file
@ -0,0 +1,128 @@
|
||||
# scripts/plot_sun_elevation_relationships.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import VARIABLES_BY_KEY
|
||||
from meteo.analysis import compute_binned_statistics
|
||||
from meteo.plots import plot_binned_profiles, plot_hexbin_with_third_variable
|
||||
from meteo.config import StationLocation
|
||||
from meteo.solar import add_solar_elevation_column
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_DIR = Path("figures/sun")
|
||||
|
||||
|
||||
def ensure_sun_elevation(df):
|
||||
if "sun_elevation" in df.columns:
|
||||
return df
|
||||
|
||||
print("ℹ La colonne 'sun_elevation' est absente, tentative de calcul à la volée.")
|
||||
location = StationLocation.from_env(optional=True)
|
||||
if location is None:
|
||||
print(
|
||||
"⚠ Impossible de calculer l'élévation solaire : définissez STATION_LATITUDE et STATION_LONGITUDE "
|
||||
"puis regénérez le dataset (scripts/make_minutely_dataset)."
|
||||
)
|
||||
return None
|
||||
|
||||
print(
|
||||
f"→ Calcul d'élévation solaire avec lat={location.latitude}, lon={location.longitude}, "
|
||||
f"alt={location.elevation_m} m."
|
||||
)
|
||||
add_solar_elevation_column(
|
||||
df,
|
||||
latitude=location.latitude,
|
||||
longitude=location.longitude,
|
||||
elevation_m=location.elevation_m,
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
df = ensure_sun_elevation(df)
|
||||
if df is None or "sun_elevation" not in df.columns:
|
||||
return
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
profile_keys = ["temperature", "humidity", "illuminance"]
|
||||
profile_vars = [VARIABLES_BY_KEY[key] for key in profile_keys]
|
||||
bins = np.arange(-90, 95, 5) # bins de 5°
|
||||
|
||||
stats = compute_binned_statistics(
|
||||
df=df,
|
||||
bin_source_column="sun_elevation",
|
||||
target_columns=[v.column for v in profile_vars],
|
||||
bins=bins,
|
||||
min_count=100,
|
||||
quantiles=(0.2, 0.8),
|
||||
)
|
||||
|
||||
profile_output = OUTPUT_DIR / "sun_elevation_profiles.png"
|
||||
plot_binned_profiles(
|
||||
stats=stats,
|
||||
variables=profile_vars,
|
||||
output_path=profile_output,
|
||||
xlabel="Élévation solaire (°)",
|
||||
title="Profils moyens en fonction de l'élévation solaire",
|
||||
show_counts=True,
|
||||
)
|
||||
print(f"✔ Profils sun vs variables : {profile_output}")
|
||||
|
||||
hexbin_scenarios = [
|
||||
{
|
||||
"x": "sun_elevation",
|
||||
"y": "illuminance",
|
||||
"color": "temperature",
|
||||
"filename": "hexbin_sun_elevation_vs_illuminance.png",
|
||||
"description": "Illuminance en fonction de l'élévation du soleil, couleur = température.",
|
||||
},
|
||||
{
|
||||
"x": "sun_elevation",
|
||||
"y": "temperature",
|
||||
"color": "humidity",
|
||||
"filename": "hexbin_sun_elevation_vs_temperature.png",
|
||||
"description": "Température en fonction de l'élévation, couleur = humidité relative.",
|
||||
},
|
||||
]
|
||||
|
||||
for scenario in hexbin_scenarios:
|
||||
var_x = VARIABLES_BY_KEY[scenario["x"]]
|
||||
var_y = VARIABLES_BY_KEY[scenario["y"]]
|
||||
var_color = VARIABLES_BY_KEY[scenario["color"]]
|
||||
output_path = OUTPUT_DIR / scenario["filename"]
|
||||
|
||||
print(f"→ {scenario['description']}")
|
||||
plot_hexbin_with_third_variable(
|
||||
df=df,
|
||||
var_x=var_x,
|
||||
var_y=var_y,
|
||||
var_color=var_color,
|
||||
output_path=output_path,
|
||||
gridsize=60,
|
||||
mincnt=10,
|
||||
reduce_func_label="moyenne",
|
||||
cmap="cividis",
|
||||
)
|
||||
print(f" ✔ Hexbin enregistré : {output_path}")
|
||||
|
||||
print("✔ Tous les graphiques liés à l'élévation solaire ont été produits.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user