53 lines
1.6 KiB
Python
53 lines
1.6 KiB
Python
# scripts/check_missing_values.py
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from meteo.dataset import load_raw_csv
|
|
from meteo.quality import summarize_missing_values
|
|
|
|
|
|
CSV_PATH = Path("data/weather_minutely.csv")
|
|
|
|
|
|
def main() -> None:
|
|
if not CSV_PATH.exists():
|
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
|
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
|
return
|
|
|
|
df = load_raw_csv(CSV_PATH)
|
|
print(f"Dataset chargé : {CSV_PATH}")
|
|
print(f" Lignes : {len(df)}")
|
|
print(f" Colonnes : {list(df.columns)}")
|
|
|
|
summary = summarize_missing_values(df)
|
|
|
|
print()
|
|
print("=== Synthèse des valeurs manquantes ===")
|
|
print(f"Total de cellules : {summary.total_cells}")
|
|
print(f"Cellules manquantes : {summary.missing_cells}")
|
|
print(f"Fraction manquante : {summary.fraction_missing:.6f}")
|
|
print(f"Lignes complètes : {summary.rows_fully_complete}")
|
|
print(f"Lignes avec des trous : {summary.rows_with_missing}")
|
|
print(f"Fraction lignes complètes : {summary.fraction_rows_complete:.6f}")
|
|
|
|
print()
|
|
print("Valeurs manquantes par colonne :")
|
|
for col, n_missing in summary.missing_by_column.items():
|
|
print(f" - {col:13s} : {n_missing}")
|
|
|
|
if summary.missing_cells == 0:
|
|
print()
|
|
print("✔ Aucune valeur manquante dans le dataset minuté.")
|
|
else:
|
|
print()
|
|
print("⚠ Il reste des valeurs manquantes.")
|
|
print(" Exemple de lignes concernées :")
|
|
rows_with_missing = df[df.isna().any(axis=1)]
|
|
print(rows_with_missing.head(10))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|