1
donnees_meteo/meteo/quality.py
2025-11-17 02:00:28 +01:00

58 lines
1.5 KiB
Python

# meteo/quality.py
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict
import pandas as pd
@dataclass(frozen=True)
class MissingValuesSummary:
"""
Résumé des valeurs manquantes dans un DataFrame.
"""
total_rows: int
total_columns: int
total_cells: int
missing_cells: int
missing_by_column: Dict[str, int]
rows_with_missing: int
rows_fully_complete: int
@property
def fraction_missing(self) -> float:
return self.missing_cells / self.total_cells if self.total_cells else 0.0
@property
def fraction_rows_complete(self) -> float:
return self.rows_fully_complete / self.total_rows if self.total_rows else 0.0
def summarize_missing_values(df: pd.DataFrame) -> MissingValuesSummary:
"""
Calcule un résumé des valeurs manquantes d'un DataFrame.
Ne modifie pas le DataFrame.
"""
missing_mask = df.isna()
total_rows, total_columns = df.shape
total_cells = int(df.size)
missing_cells = int(missing_mask.sum().sum())
missing_by_column = missing_mask.sum().astype(int).to_dict()
rows_with_missing = int(missing_mask.any(axis=1).sum())
rows_fully_complete = int((~missing_mask.any(axis=1)).sum())
return MissingValuesSummary(
total_rows=total_rows,
total_columns=total_columns,
total_cells=total_cells,
missing_cells=missing_cells,
missing_by_column=missing_by_column,
rows_with_missing=rows_with_missing,
rows_fully_complete=rows_fully_complete,
)