58 lines
1.5 KiB
Python
58 lines
1.5 KiB
Python
# meteo/quality.py
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Dict
|
|
|
|
import pandas as pd
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class MissingValuesSummary:
|
|
"""
|
|
Résumé des valeurs manquantes dans un DataFrame.
|
|
"""
|
|
|
|
total_rows: int
|
|
total_columns: int
|
|
total_cells: int
|
|
|
|
missing_cells: int
|
|
missing_by_column: Dict[str, int]
|
|
rows_with_missing: int
|
|
rows_fully_complete: int
|
|
|
|
@property
|
|
def fraction_missing(self) -> float:
|
|
return self.missing_cells / self.total_cells if self.total_cells else 0.0
|
|
|
|
@property
|
|
def fraction_rows_complete(self) -> float:
|
|
return self.rows_fully_complete / self.total_rows if self.total_rows else 0.0
|
|
|
|
|
|
def summarize_missing_values(df: pd.DataFrame) -> MissingValuesSummary:
|
|
"""
|
|
Calcule un résumé des valeurs manquantes d'un DataFrame.
|
|
|
|
Ne modifie pas le DataFrame.
|
|
"""
|
|
missing_mask = df.isna()
|
|
|
|
total_rows, total_columns = df.shape
|
|
total_cells = int(df.size)
|
|
missing_cells = int(missing_mask.sum().sum())
|
|
missing_by_column = missing_mask.sum().astype(int).to_dict()
|
|
rows_with_missing = int(missing_mask.any(axis=1).sum())
|
|
rows_fully_complete = int((~missing_mask.any(axis=1)).sum())
|
|
|
|
return MissingValuesSummary(
|
|
total_rows=total_rows,
|
|
total_columns=total_columns,
|
|
total_cells=total_cells,
|
|
missing_cells=missing_cells,
|
|
missing_by_column=missing_by_column,
|
|
rows_with_missing=rows_with_missing,
|
|
rows_fully_complete=rows_fully_complete,
|
|
)
|