1
2025-11-19 17:01:45 +01:00

66 lines
2.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# scripts/make_minutely_dataset.py
from __future__ import annotations
from pathlib import Path
import sys
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv, resample_to_minutes
from meteo.config import StationLocation
from meteo.solar import add_solar_elevation_column
from meteo.season import add_season_column
FORMATTED_CSV_PATH = Path("data/weather_filled_1s.csv")
OUTPUT_CSV_PATH = Path("data/weather_minutely.csv")
def main() -> None:
if not FORMATTED_CSV_PATH.exists():
print(f"⚠ Fichier formaté introuvable : {FORMATTED_CSV_PATH}")
print(' Lancez d\'abord : python "docs/02 - Préparation des données/scripts/fill_formatted_1s.py"')
return
df_1s = load_raw_csv(FORMATTED_CSV_PATH)
print(f"Fichier 1s chargé : {FORMATTED_CSV_PATH}")
print(f" Lignes : {len(df_1s)}, colonnes : {list(df_1s.columns)}")
df_min = resample_to_minutes(df_1s)
print(f"Après resampling 60s : {len(df_min)} lignes")
hemisphere = "north"
location = StationLocation.from_env(optional=True)
if location is not None:
hemisphere = "south" if location.latitude < 0 else "north"
print(
f"Ajout de l'élévation solaire (lat={location.latitude}, lon={location.longitude}, "
f"alt={location.elevation_m} m)..."
)
add_solar_elevation_column(
df_min,
latitude=location.latitude,
longitude=location.longitude,
elevation_m=location.elevation_m,
)
else:
print(
" Coordonnées GPS non définies (STATION_LATITUDE / STATION_LONGITUDE). "
"La colonne sun_elevation ne sera pas ajoutée."
)
print(" Saison : hypothèse par défaut = hémisphère nord. Définissez STATION_LATITUDE pour adapter.")
add_season_column(df_min, hemisphere=hemisphere)
OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
df_min.to_csv(OUTPUT_CSV_PATH, index_label="time")
print(f"✔ Dataset minuté écrit dans : {OUTPUT_CSV_PATH.resolve()}")
if __name__ == "__main__":
main()