68 lines
1.7 KiB
Python
68 lines
1.7 KiB
Python
# scripts/describe_minutely_dataset.py
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
import pandas as pd
|
|
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
|
if str(PROJECT_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
from meteo.dataset import load_raw_csv
|
|
|
|
|
|
CSV_PATH = Path("data/weather_minutely.csv")
|
|
|
|
|
|
def main() -> None:
|
|
if not CSV_PATH.exists():
|
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
|
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
|
return
|
|
|
|
df = load_raw_csv(CSV_PATH)
|
|
print(f"Dataset minuté chargé : {CSV_PATH}")
|
|
print(f" Lignes : {len(df)}")
|
|
print(f" Colonnes : {list(df.columns)}")
|
|
print(f" Période : {df.index[0]} → {df.index[-1]}")
|
|
print()
|
|
|
|
# 1. Résumé statistique classique
|
|
print("=== describe() ===")
|
|
print(df.describe())
|
|
print()
|
|
|
|
# 2. Min / max par variable avec leurs dates
|
|
print("=== Min / max avec dates ===")
|
|
for col in df.columns:
|
|
series = df[col]
|
|
|
|
min_val = series.min()
|
|
max_val = series.max()
|
|
min_ts = series.idxmin()
|
|
max_ts = series.idxmax()
|
|
|
|
print(f"- {col}:")
|
|
print(f" min = {min_val} à {min_ts}")
|
|
print(f" max = {max_val} à {max_ts}")
|
|
print()
|
|
|
|
# 3. Vérification rapide de la continuité temporelle
|
|
print("=== Vérification de la continuité temporelle ===")
|
|
diffs = df.index.to_series().diff().dropna()
|
|
counts = diffs.value_counts().sort_index()
|
|
|
|
print("Différences d'intervalle (top 5):")
|
|
print(counts.head())
|
|
print()
|
|
|
|
nb_not_60s = (diffs != pd.Timedelta(minutes=1)).sum()
|
|
print(f"Nombre d'intervalles ≠ 60s : {nb_not_60s}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|