1
donnees_meteo/docs/12 - Conclusion/scripts/plot_weekday_profiles.py

376 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv
from meteo.plots import export_plot_dataset, plot_weekday_profiles
from meteo.variables import VARIABLES_BY_KEY, Variable
CSV_PATH = PROJECT_ROOT / "data" / "weather_minutely.csv"
DOC_DIR = Path(__file__).resolve().parent.parent
OUTPUT_PROFILES_PATH = DOC_DIR / "figures" / "weekday_profiles.png"
OUTPUT_SCORES_PATH = DOC_DIR / "figures" / "weekday_scores.png"
OUTPUT_RADAR_DIR = DOC_DIR / "figures" / "weekday_radars"
# On se concentre sur le ressenti "agréable" :
# - température (plus élevée = meilleur score),
# - humidité (plus faible = meilleur score),
# - pression atmosphérique (score maximal pour une plage "confortable"),
# - luminosité (plus élevée = meilleur score),
# - vent (plus faible = meilleur score).
VARIABLE_KEYS = ["temperature", "humidity", "pressure", "illuminance", "wind_speed"]
# Type de scoring par variable (clé = Variable.key)
COMFORT_SCORING: dict[str, str] = {
"temperature": "higher_better",
"humidity": "lower_better",
"pressure": "midrange_better",
"illuminance": "higher_better",
"wind_speed": "lower_better",
}
def compute_weekday_means(df: pd.DataFrame, variables: list[Variable]) -> pd.DataFrame:
"""
Calcule, pour chaque jour de semaine (0=lundi,…,6=dimanche),
la moyenne des variables fournies.
"""
if df.empty:
return pd.DataFrame(index=range(7))
weekday_index = df.index.dayofweek
columns = [var.column for var in variables]
weekday_means = df.groupby(weekday_index)[columns].mean()
# S'assure que toutes les valeurs 06 sont présentes, même si certaines manquent.
weekday_means = weekday_means.reindex(range(7))
weekday_means.index.name = "weekday"
return weekday_means
def compute_weekday_scores(weekday_means: pd.DataFrame, variables: list[Variable]) -> pd.DataFrame:
"""
À partir des moyennes par jour, calcule un score normalisé (01) par variable
en fonction d'un critère de confort, puis un score global moyen.
"""
if weekday_means.empty:
return pd.DataFrame(index=range(7))
scores = pd.DataFrame(index=weekday_means.index)
def _normalize_monotonic(series: pd.Series, *, higher_is_better: bool) -> pd.Series:
vmin = float(series.min(skipna=True))
vmax = float(series.max(skipna=True))
if np.isclose(vmax, vmin):
return pd.Series(1.0, index=series.index)
norm = (series - vmin) / (vmax - vmin)
if not higher_is_better:
norm = 1.0 - norm
return norm
def _normalize_midrange(series: pd.Series) -> pd.Series:
"""
Score maximal pour des valeurs proches de la moyenne,
plus faible pour des pressions très basses ou très élevées.
"""
vmin = float(series.min(skipna=True))
vmax = float(series.max(skipna=True))
if np.isclose(vmax, vmin):
return pd.Series(1.0, index=series.index)
mid = 0.5 * (vmin + vmax)
half_range = 0.5 * (vmax - vmin)
# 1 au centre, 0 aux extrêmes (vmin/vmax), valeur >0 dans l'intervalle.
norm = 1.0 - (series - mid).abs() / half_range
norm = norm.clip(lower=0.0, upper=1.0)
return norm
for var in variables:
col = var.column
if col not in weekday_means.columns:
continue
series = weekday_means[col]
if series.isna().all():
continue
scoring = COMFORT_SCORING.get(var.key, "higher_better")
if scoring == "higher_better":
norm = _normalize_monotonic(series, higher_is_better=True)
elif scoring == "lower_better":
norm = _normalize_monotonic(series, higher_is_better=False)
elif scoring == "midrange_better":
norm = _normalize_midrange(series)
else:
# Fallback : plus élevé = meilleur score.
norm = _normalize_monotonic(series, higher_is_better=True)
scores[col] = norm
if scores.empty:
return scores
scores["overall_score"] = scores.mean(axis=1, skipna=True)
scores.index.name = "weekday"
return scores
def plot_overall_weekday_score(
scores: pd.DataFrame,
weekday_labels: list[str],
output_path: Path,
) -> Path | None:
"""
Trace un graphique synthétique du score global par jour de la semaine.
"""
if "overall_score" not in scores.columns or scores["overall_score"].isna().all():
return None
output_path.parent.mkdir(parents=True, exist_ok=True)
export_plot_dataset(scores, output_path)
overall = scores["overall_score"]
x = np.arange(len(weekday_labels))
fig, ax = plt.subplots(figsize=(8, 4))
values = overall.to_numpy(dtype=float)
best_idx = int(np.nanargmax(values))
colors = ["#9ecae1"] * len(values)
colors[best_idx] = "#08519c"
ax.bar(x, values, color=colors)
ax.set_xticks(x)
ax.set_xticklabels(weekday_labels)
ax.set_ylabel("Score global (01)")
ax.set_ylim(0, 1.05)
ax.set_title("Score global d'agrément par jour de semaine")
ax.grid(True, axis="y", linestyle=":", alpha=0.5)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def plot_weekday_radars(
scores: pd.DataFrame,
variables: list[Variable],
weekday_labels_long: list[str],
output_dir: Path,
) -> list[Path]:
"""
Produit un graphique radar par jour de la semaine, avec un axe par variable.
"""
paths: list[Path] = []
if scores.empty:
return paths
output_dir.mkdir(parents=True, exist_ok=True)
var_cols = [v.column for v in variables if v.column in scores.columns]
if not var_cols:
return paths
labels = [v.label for v in variables if v.column in scores.columns]
n_vars = len(labels)
if n_vars == 0:
return paths
angles = np.linspace(0, 2 * np.pi, n_vars, endpoint=False)
angles = np.concatenate([angles, angles[:1]])
for weekday in scores.index:
day_scores = scores.loc[weekday, var_cols]
if day_scores.isna().all():
continue
values = day_scores.to_numpy(dtype=float)
values = np.nan_to_num(values, nan=0.0)
values = np.concatenate([values, values[:1]])
fig, ax = plt.subplots(subplot_kw={"projection": "polar"}, figsize=(6, 6))
ax.plot(angles, values, marker="o")
ax.fill(angles, values, alpha=0.25)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(labels)
ax.set_yticks([0.25, 0.5, 0.75, 1.0])
ax.set_ylim(0, 1.05)
day_label = weekday_labels_long[int(weekday)] if 0 <= int(weekday) < len(weekday_labels_long) else str(weekday)
ax.set_title(f"Profil radar des scores {day_label}")
fig.tight_layout()
filename = f"weekday_radar_{int(weekday)}.png"
output_path = output_dir / filename
# Export des données brutes associées à ce radar
export_plot_dataset(day_scores.to_frame().T, output_path)
fig.savefig(output_path, dpi=150)
plt.close(fig)
paths.append(output_path.resolve())
return paths
def plot_weekday_radar_all(
scores: pd.DataFrame,
variables: list[Variable],
weekday_labels_long: list[str],
output_path: Path,
) -> Path | None:
"""
Produit un seul graphique radar superposant tous les jours de la semaine.
"""
if scores.empty:
return None
output_path.parent.mkdir(parents=True, exist_ok=True)
var_cols = [v.column for v in variables if v.column in scores.columns]
if not var_cols:
return None
labels = [v.label for v in variables if v.column in scores.columns]
n_vars = len(labels)
if n_vars == 0:
return None
angles = np.linspace(0, 2 * np.pi, n_vars, endpoint=False)
angles = np.concatenate([angles, angles[:1]])
fig, ax = plt.subplots(subplot_kw={"projection": "polar"}, figsize=(7, 7))
cmap = plt.get_cmap("tab10")
for idx, weekday in enumerate(scores.index):
day_scores = scores.loc[weekday, var_cols]
if day_scores.isna().all():
continue
values = day_scores.to_numpy(dtype=float)
values = np.nan_to_num(values, nan=0.0)
values = np.concatenate([values, values[:1]])
color = cmap(idx % 10)
day_label = (
weekday_labels_long[int(weekday)]
if 0 <= int(weekday) < len(weekday_labels_long)
else str(weekday)
)
ax.plot(angles, values, marker="o", color=color, label=day_label)
ax.fill(angles, values, color=color, alpha=0.15)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(labels)
ax.set_yticks([0.25, 0.5, 0.75, 1.0])
ax.set_ylim(0, 1.05)
ax.set_title("Profils radar des scores tous les jours")
ax.grid(True, linestyle=":", alpha=0.4)
ax.legend(loc="upper right", bbox_to_anchor=(1.35, 1.05), borderaxespad=0.0)
# Export des données brutes utilisées pour ce radar global
export_plot_dataset(scores[var_cols], output_path)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
return output_path.resolve()
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
variables = [VARIABLES_BY_KEY[key] for key in VARIABLE_KEYS]
weekday_means = compute_weekday_means(df, variables)
output_path = plot_weekday_profiles(
weekday_df=weekday_means,
variables=variables,
output_path=OUTPUT_PROFILES_PATH,
title="Moyennes par jour de semaine",
)
print(f"✔ Profils hebdomadaires exportés : {output_path}")
weekday_labels_long = ["lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"]
for var in variables:
series = weekday_means[var.column]
if series.isna().all():
continue
best_idx = int(series.idxmax())
best_label = weekday_labels_long[best_idx]
best_value = series.max()
unit = f" {var.unit}" if var.unit else ""
print(f"{var.label} maximale en moyenne le {best_label} (≈{best_value:.2f}{unit})")
# Calcul des scores normalisés et du score global.
scores = compute_weekday_scores(weekday_means, variables)
if not scores.empty and "overall_score" in scores.columns:
print()
print("Scores globaux (01) par jour de semaine :")
overall = scores["overall_score"]
for idx, label in enumerate(weekday_labels_long):
value = overall.get(idx)
if pd.isna(value):
continue
print(f" - {label:<9} : {value:.3f}")
best_idx = int(overall.idxmax())
best_label = weekday_labels_long[best_idx]
best_score = overall.max()
print()
print(
f"⇒ Jour le plus « agréable » au sens de ce score normalisé : "
f"{best_label} (score global ≈{best_score:.3f})."
)
# Graphique synthétique des scores globaux.
weekday_labels_short = ["Lun", "Mar", "Mer", "Jeu", "Ven", "Sam", "Dim"]
scores_path = plot_overall_weekday_score(
scores=scores,
weekday_labels=weekday_labels_short,
output_path=OUTPUT_SCORES_PATH,
)
if scores_path is not None:
print(f"✔ Graphique des scores globaux exporté : {scores_path}")
# Graphiques radar par jour.
radar_paths = plot_weekday_radars(
scores=scores,
variables=variables,
weekday_labels_long=weekday_labels_long,
output_dir=OUTPUT_RADAR_DIR,
)
if radar_paths:
print(f"✔ Graphiques radar exportés ({len(radar_paths)}) dans : {OUTPUT_RADAR_DIR}")
# Graphique radar global superposant tous les jours.
radar_all_path = plot_weekday_radar_all(
scores=scores,
variables=variables,
weekday_labels_long=weekday_labels_long,
output_path=DOC_DIR / "figures" / "weekday_radar_all.png",
)
if radar_all_path is not None:
print(f"✔ Graphique radar global exporté : {radar_all_path}")
if __name__ == "__main__":
main()