30 lines
810 B
Python
30 lines
810 B
Python
# scripts/inspect_time_column.py
|
|
from __future__ import annotations
|
|
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
|
|
CSV_PATH = Path("data/weather_raw_full.csv")
|
|
|
|
|
|
def main() -> None:
|
|
df = pd.read_csv(CSV_PATH, dtype={"time": "string"})
|
|
print("Aperçu brut de la colonne 'time' :")
|
|
print(df["time"].head(10))
|
|
print()
|
|
|
|
# On tente de parser en ISO8601, mais sans lever d'erreur :
|
|
parsed = pd.to_datetime(df["time"], format="ISO8601", errors="coerce")
|
|
|
|
invalid_mask = parsed.isna()
|
|
nb_invalid = invalid_mask.sum()
|
|
|
|
print(f"Nombre de valeurs 'time' non parsables en ISO8601 : {nb_invalid}")
|
|
if nb_invalid > 0:
|
|
print("Exemples de valeurs problématiques :")
|
|
print(df.loc[invalid_mask, "time"].drop_duplicates().head(20))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|