You've already forked donnees_meteo
Réorganisation
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
# scripts/check_missing_values.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.quality import summarize_missing_values
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
|
||||
summary = summarize_missing_values(df)
|
||||
|
||||
print()
|
||||
print("=== Synthèse des valeurs manquantes ===")
|
||||
print(f"Total de cellules : {summary.total_cells}")
|
||||
print(f"Cellules manquantes : {summary.missing_cells}")
|
||||
print(f"Fraction manquante : {summary.fraction_missing:.6f}")
|
||||
print(f"Lignes complètes : {summary.rows_fully_complete}")
|
||||
print(f"Lignes avec des trous : {summary.rows_with_missing}")
|
||||
print(f"Fraction lignes complètes : {summary.fraction_rows_complete:.6f}")
|
||||
|
||||
print()
|
||||
print("Valeurs manquantes par colonne :")
|
||||
for col, n_missing in summary.missing_by_column.items():
|
||||
print(f" - {col:13s} : {n_missing}")
|
||||
|
||||
if summary.missing_cells == 0:
|
||||
print()
|
||||
print("✔ Aucune valeur manquante dans le dataset minuté.")
|
||||
else:
|
||||
print()
|
||||
print("⚠ Il reste des valeurs manquantes.")
|
||||
print(" Exemple de lignes concernées :")
|
||||
rows_with_missing = df[df.isna().any(axis=1)]
|
||||
print(rows_with_missing.head(10))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,67 @@
|
||||
# scripts/describe_minutely_dataset.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print(f" Période : {df.index[0]} → {df.index[-1]}")
|
||||
print()
|
||||
|
||||
# 1. Résumé statistique classique
|
||||
print("=== describe() ===")
|
||||
print(df.describe())
|
||||
print()
|
||||
|
||||
# 2. Min / max par variable avec leurs dates
|
||||
print("=== Min / max avec dates ===")
|
||||
for col in df.columns:
|
||||
series = df[col]
|
||||
|
||||
min_val = series.min()
|
||||
max_val = series.max()
|
||||
min_ts = series.idxmin()
|
||||
max_ts = series.idxmax()
|
||||
|
||||
print(f"- {col}:")
|
||||
print(f" min = {min_val} à {min_ts}")
|
||||
print(f" max = {max_val} à {max_ts}")
|
||||
print()
|
||||
|
||||
# 3. Vérification rapide de la continuité temporelle
|
||||
print("=== Vérification de la continuité temporelle ===")
|
||||
diffs = df.index.to_series().diff().dropna()
|
||||
counts = diffs.value_counts().sort_index()
|
||||
|
||||
print("Différences d'intervalle (top 5):")
|
||||
print(counts.head())
|
||||
print()
|
||||
|
||||
nb_not_60s = (diffs != pd.Timedelta(minutes=1)).sum()
|
||||
print(f"Nombre d'intervalles ≠ 60s : {nb_not_60s}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,51 @@
|
||||
# tests/export_station_data.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from contextlib import closing
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.config import InfluxSettings
|
||||
from meteo.influx_client import create_influx_client
|
||||
from meteo.station_config import default_station_config
|
||||
from meteo.export import export_station_data
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
Exporte les données de la station météo vers un fichier CSV brut.
|
||||
|
||||
Par défaut, on exporte les 7 derniers jours dans `data/weather_raw_7d.csv`.
|
||||
"""
|
||||
settings = InfluxSettings.from_env()
|
||||
station_config = default_station_config()
|
||||
|
||||
print("Configuration InfluxDB :")
|
||||
print(f" URL : {settings.url}")
|
||||
print(f" Org : {settings.org}")
|
||||
print(f" Bucket : {settings.bucket}")
|
||||
print()
|
||||
|
||||
with closing(create_influx_client(settings)) as client:
|
||||
print("→ Export des 7 derniers jours…")
|
||||
output_path = export_station_data(
|
||||
client=client,
|
||||
bucket=settings.bucket,
|
||||
config=station_config,
|
||||
start="-7d", # à ajuster plus tard si besoin
|
||||
stop=None, # now()
|
||||
output_path="data/weather_raw_7d.csv",
|
||||
file_format="csv",
|
||||
)
|
||||
|
||||
print()
|
||||
print(f"✔ Export terminé : {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,56 @@
|
||||
# tests/export_station_data_full.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from contextlib import closing
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.config import InfluxSettings
|
||||
from meteo.influx_client import create_influx_client
|
||||
from meteo.station_config import default_station_config
|
||||
from meteo.export import export_station_data
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
Exporte l'historique complet de la station météo vers un fichier CSV.
|
||||
|
||||
On utilise `start=0`, ce qui signifie "depuis le début des données"
|
||||
(en pratique depuis l'epoch, donc tout ce que le bucket contient).
|
||||
"""
|
||||
settings = InfluxSettings.from_env()
|
||||
station_config = default_station_config()
|
||||
|
||||
print("Configuration InfluxDB :")
|
||||
print(f" URL : {settings.url}")
|
||||
print(f" Org : {settings.org}")
|
||||
print(f" Bucket : {settings.bucket}")
|
||||
print()
|
||||
|
||||
print("⚠ Attention : un export complet peut produire un fichier volumineux "
|
||||
"et prendre un certain temps si l'historique est long.")
|
||||
print()
|
||||
|
||||
with closing(create_influx_client(settings)) as client:
|
||||
print("→ Export de l'historique complet…")
|
||||
output_path = export_station_data(
|
||||
client=client,
|
||||
bucket=settings.bucket,
|
||||
config=station_config,
|
||||
start="0", # depuis le début des données
|
||||
stop=None, # jusqu'à maintenant
|
||||
output_path="data/weather_raw_full.csv",
|
||||
file_format="csv", # vous pouvez mettre "parquet" si vous préférez
|
||||
)
|
||||
|
||||
print()
|
||||
print(f"✔ Export terminé : {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,38 @@
|
||||
# scripts/fill_formatted_1s.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv, fill_missing_with_previous
|
||||
|
||||
|
||||
INPUT_CSV_PATH = Path("data/weather_formatted_1s.csv")
|
||||
OUTPUT_CSV_PATH = Path("data/weather_filled_1s.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not INPUT_CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {INPUT_CSV_PATH}")
|
||||
print(' Lancez d\'abord : python "docs/02 - Préparation des données/scripts/format_raw_csv.py"')
|
||||
return
|
||||
|
||||
df_1s = load_raw_csv(INPUT_CSV_PATH)
|
||||
print(f"Fichier 1s formaté chargé : {INPUT_CSV_PATH}")
|
||||
print(f" Lignes : {len(df_1s)}, colonnes : {list(df_1s.columns)}")
|
||||
|
||||
df_filled = fill_missing_with_previous(df_1s)
|
||||
print(f"Après propagation des dernières valeurs connues : {len(df_filled)} lignes")
|
||||
|
||||
OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
df_filled.to_csv(OUTPUT_CSV_PATH, index_label="time")
|
||||
print(f"✔ Fichier 1s 'complet' écrit dans : {OUTPUT_CSV_PATH.resolve()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
37
docs/02 - Préparation des données/scripts/format_raw_csv.py
Normal file
37
docs/02 - Préparation des données/scripts/format_raw_csv.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv, combine_close_observations
|
||||
|
||||
|
||||
RAW_CSV_PATH = Path("data/weather_raw_full.csv")
|
||||
OUTPUT_CSV_PATH = Path("data/weather_formatted_1s.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not RAW_CSV_PATH.exists():
|
||||
print(f"⚠ Fichier brut introuvable : {RAW_CSV_PATH}")
|
||||
return
|
||||
|
||||
df_raw = load_raw_csv(RAW_CSV_PATH)
|
||||
print(f"Fichier brut chargé : {RAW_CSV_PATH}")
|
||||
print(f" Lignes : {len(df_raw)}, colonnes : {list(df_raw.columns)}")
|
||||
print(f" Type d'index : {type(df_raw.index)}")
|
||||
|
||||
df_fmt = combine_close_observations(df_raw, freq="1s", agg="mean")
|
||||
print(f"Après combinaison (1s) : {len(df_fmt)} lignes")
|
||||
|
||||
OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
df_fmt.to_csv(OUTPUT_CSV_PATH, index_label="time")
|
||||
print(f"✔ Fichier formaté écrit dans : {OUTPUT_CSV_PATH.resolve()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
52
docs/02 - Préparation des données/scripts/list_time_gaps.py
Normal file
52
docs/02 - Préparation des données/scripts/list_time_gaps.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# scripts/list_time_gaps.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.gaps import find_time_gaps
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
|
||||
gaps = find_time_gaps(df)
|
||||
total_missing = sum(g.missing_intervals for g in gaps)
|
||||
|
||||
print()
|
||||
print("=== Gaps temporels détectés ===")
|
||||
print(f"Nombre de gaps : {len(gaps)}")
|
||||
print(f"Total minutes manquantes (théoriques) : {total_missing}")
|
||||
print()
|
||||
|
||||
if not gaps:
|
||||
print("✔ Aucun gap détecté, la série est parfaitement régulière.")
|
||||
return
|
||||
|
||||
print("Top 10 des gaps les plus longs :")
|
||||
gaps_sorted = sorted(gaps, key=lambda g: g.missing_intervals, reverse=True)[:10]
|
||||
for g in gaps_sorted:
|
||||
print(
|
||||
f"- De {g.before} à {g.after} "
|
||||
f"(durée: {g.duration}, manquants: {g.missing_intervals}, "
|
||||
f"de {g.missing_start} à {g.missing_end})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,65 @@
|
||||
# scripts/make_minutely_dataset.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv, resample_to_minutes
|
||||
from meteo.config import StationLocation
|
||||
from meteo.solar import add_solar_elevation_column
|
||||
from meteo.season import add_season_column
|
||||
|
||||
|
||||
FORMATTED_CSV_PATH = Path("data/weather_filled_1s.csv")
|
||||
OUTPUT_CSV_PATH = Path("data/weather_minutely.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not FORMATTED_CSV_PATH.exists():
|
||||
print(f"⚠ Fichier formaté introuvable : {FORMATTED_CSV_PATH}")
|
||||
print(' Lancez d\'abord : python "docs/02 - Préparation des données/scripts/fill_formatted_1s.py"')
|
||||
return
|
||||
|
||||
df_1s = load_raw_csv(FORMATTED_CSV_PATH)
|
||||
print(f"Fichier 1s chargé : {FORMATTED_CSV_PATH}")
|
||||
print(f" Lignes : {len(df_1s)}, colonnes : {list(df_1s.columns)}")
|
||||
|
||||
df_min = resample_to_minutes(df_1s)
|
||||
print(f"Après resampling 60s : {len(df_min)} lignes")
|
||||
|
||||
hemisphere = "north"
|
||||
location = StationLocation.from_env(optional=True)
|
||||
|
||||
if location is not None:
|
||||
hemisphere = "south" if location.latitude < 0 else "north"
|
||||
print(
|
||||
f"Ajout de l'élévation solaire (lat={location.latitude}, lon={location.longitude}, "
|
||||
f"alt={location.elevation_m} m)..."
|
||||
)
|
||||
add_solar_elevation_column(
|
||||
df_min,
|
||||
latitude=location.latitude,
|
||||
longitude=location.longitude,
|
||||
elevation_m=location.elevation_m,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"ℹ Coordonnées GPS non définies (STATION_LATITUDE / STATION_LONGITUDE). "
|
||||
"La colonne sun_elevation ne sera pas ajoutée."
|
||||
)
|
||||
print("ℹ Saison : hypothèse par défaut = hémisphère nord. Définissez STATION_LATITUDE pour adapter.")
|
||||
|
||||
add_season_column(df_min, hemisphere=hemisphere)
|
||||
|
||||
OUTPUT_CSV_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
df_min.to_csv(OUTPUT_CSV_PATH, index_label="time")
|
||||
print(f"✔ Dataset minuté écrit dans : {OUTPUT_CSV_PATH.resolve()}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user