1

Compare commits

...

2 Commits

Author SHA1 Message Date
a36157b52f Graphiques de corrélations binaires simples 2025-11-20 21:45:24 +01:00
df7fbf07ed Amélioration des vues basiques 2025-11-20 21:12:02 +01:00
42 changed files with 682 additions and 53 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

View File

@ -4,27 +4,29 @@ On peut désormais tracer nos premiers graphiques simples et bruts.
S'ils ne sont pas très instructifs par rapport à ce que nous fournissent Home Assistant et InfluxDB, ils nous permettent au moins de nous assurer que tout fonctionne, et que les données semblent cohérentes. S'ils ne sont pas très instructifs par rapport à ce que nous fournissent Home Assistant et InfluxDB, ils nous permettent au moins de nous assurer que tout fonctionne, et que les données semblent cohérentes.
Les fichiers CSV correspondant à chaque figure sont conservés dans `data/` dans ce dossier. Les fichiers CSV correspondant à chaque figure sont conservés dans `data/` dans ce dossier.
On se limite dans un premier temps aux 7 derniers jours. Les graphiques couvrent maintenant toute la période disponible dans `data/weather_minutely.csv`.
Une agrégation automatique réduit le nombre de points pour rester lisible (plus de courbes "peignes"), et l'axe des dates utilise un format compact qui évite tout chevauchement de labels.
On peut au besoin restreindre la période avec `--days` ou imposer une fréquence d'agrégation avec `--resample`.
```shell ```shell
python "docs/03 - Premiers graphiques/scripts/plot_basic_variables.py" python "docs/03 - Premiers graphiques/scripts/plot_basic_variables.py"
``` ```
![](figures/temperature_last_7_days.png) ![](figures/temperature_overview.png)
![](figures/pressure_last_7_days.png) ![](figures/pressure_overview.png)
![](figures/humidity_last_7_days.png) ![](figures/humidity_overview.png)
![](figures/rain_rate_last_7_days.png) ![](figures/rain_rate_overview.png)
![](figures/illuminance_last_7_days.png) ![](figures/wind_speed_overview.png)
![](figures/wind_speed_last_7_days.png) ![](figures/wind_direction_overview.png)
![](figures/wind_direction_last_7_days.png) ![](figures/illuminance_overview.png)
![](figures/sun_elevation_last_7_days.png) ![](figures/sun_elevation_overview.png)
## Vues calendrier ## Vues calendrier

View File

@ -1,5 +1,5 @@
# scripts/plot_basic_variables.py # scripts/plot_basic_variables.py
"""Génère des séries temporelles simples (7 jours) pour chaque variable météo.""" """Génère des séries temporelles simples pour chaque variable météo."""
from __future__ import annotations from __future__ import annotations
@ -7,7 +7,6 @@ import argparse
from pathlib import Path from pathlib import Path
import sys import sys
import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
@ -16,7 +15,7 @@ if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT)) sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv from meteo.dataset import load_raw_csv
from meteo.plots import export_plot_dataset from meteo.plots import PlotChoice, PlotStyle, plot_basic_series, recommended_style, resample_series_for_plot
from meteo.variables import Variable, VARIABLES from meteo.variables import Variable, VARIABLES
@ -25,47 +24,32 @@ DOC_DIR = Path(__file__).resolve().parent.parent
DEFAULT_OUTPUT_DIR = DOC_DIR / "figures" DEFAULT_OUTPUT_DIR = DOC_DIR / "figures"
def _prepare_slice(df: pd.DataFrame, *, last_days: int) -> pd.DataFrame: def _select_window(df: pd.DataFrame, *, last_days: int | None) -> pd.DataFrame:
"""Extrait la fenêtre temporelle souhaitée et applique une moyenne horaire pour lisser la courbe.""" """Extrait la fenêtre temporelle souhaitée (ou la totalité si None)."""
if last_days is None:
return df
end = df.index.max() end = df.index.max()
start = end - pd.Timedelta(days=last_days) start = end - pd.Timedelta(days=last_days)
df_slice = df.loc[start:end] return df.loc[start:end]
numeric_slice = df_slice.select_dtypes(include="number")
if numeric_slice.empty:
raise RuntimeError("Aucune colonne numérique disponible pour les moyennes horaires.")
return numeric_slice.resample("1h").mean()
def _plot_variable(df_hourly: pd.DataFrame, var: Variable, output_dir: Path) -> Path | None: def _format_ylabel(var: Variable) -> str:
"""Trace la série pour une variable et retourne le chemin de l'image générée."""
if var.column not in df_hourly.columns:
print(f"⚠ Colonne absente pour {var.key} ({var.column}).")
return None
series = df_hourly[var.column].dropna()
if series.empty:
print(f"⚠ Aucun point valide pour {var.key} dans l'intervalle choisi.")
return None
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{var.key}_last_7_days.png"
export_plot_dataset(series.to_frame(name=var.column), output_path)
plt.figure()
plt.plot(series.index, series)
plt.xlabel("Temps (UTC)")
unit_text = f" ({var.unit})" if var.unit else "" unit_text = f" ({var.unit})" if var.unit else ""
plt.ylabel(f"{var.label}{unit_text}") return f"{var.label}{unit_text}"
plt.title(f"{var.label} - Moyenne horaire sur les 7 derniers jours")
plt.grid(True)
plt.tight_layout() def _aggregation_label(choice: PlotChoice, freq: str) -> str:
plt.savefig(output_path, dpi=150) """Texte court pour indiquer l'agrégation appliquée."""
plt.close()
print(f"✔ Graphique généré : {output_path}") base = "moyenne"
return output_path if callable(choice.agg) and getattr(choice.agg, "__name__", "") == "_circular_mean_deg":
base = "moyenne circulaire"
elif choice.agg == "sum":
base = "somme"
elif choice.agg == "median":
base = "médiane"
return f"{base} {freq}"
def main(argv: list[str] | None = None) -> None: def main(argv: list[str] | None = None) -> None:
@ -78,8 +62,23 @@ def main(argv: list[str] | None = None) -> None:
parser.add_argument( parser.add_argument(
"--days", "--days",
type=int, type=int,
default=7, default=None,
help="Nombre de jours à afficher (par défaut : 7).", help="Nombre de jours à afficher (par défaut : toute la période disponible).",
)
parser.add_argument(
"--style",
choices=[style.value for style in PlotStyle],
help="Style de représentation à utiliser pour toutes les variables (par défaut : recommandations par variable).",
)
parser.add_argument(
"--resample",
help="Fréquence pandas à utiliser pour l'agrégation temporelle (par défaut : calcul automatique).",
)
parser.add_argument(
"--max-points",
type=int,
default=420,
help="Nombre de points cible après agrégation automatique (par défaut : 420).",
) )
parser.add_argument( parser.add_argument(
"--output-dir", "--output-dir",
@ -93,7 +92,7 @@ def main(argv: list[str] | None = None) -> None:
raise FileNotFoundError(f"Dataset introuvable : {CSV_PATH}") raise FileNotFoundError(f"Dataset introuvable : {CSV_PATH}")
df = load_raw_csv(CSV_PATH) df = load_raw_csv(CSV_PATH)
df_hourly = _prepare_slice(df, last_days=args.days) df_window = _select_window(df, last_days=args.days)
selected: list[Variable] selected: list[Variable]
if args.only: if args.only:
@ -105,8 +104,44 @@ def main(argv: list[str] | None = None) -> None:
else: else:
selected = list(VARIABLES) selected = list(VARIABLES)
output_dir: Path = args.output_dir
output_dir.mkdir(parents=True, exist_ok=True)
for variable in selected: for variable in selected:
_plot_variable(df_hourly, variable, args.output_dir) if variable.column not in df_window.columns:
print(f"⚠ Colonne absente pour {variable.key} ({variable.column}).")
continue
series = df_window[variable.column].dropna()
if series.empty:
print(f"⚠ Aucun point valide pour {variable.key} sur la période choisie.")
continue
style_choice = recommended_style(variable, args.style)
aggregated, freq_used = resample_series_for_plot(
series,
variable=variable,
freq=args.resample,
target_points=args.max_points,
)
if aggregated.empty:
print(f"⚠ Pas de points après agrégation pour {variable.key}.")
continue
output_path = output_dir / f"{variable.key}_overview.png"
annotate_freq = _aggregation_label(style_choice, freq_used)
plot_basic_series(
aggregated,
variable=variable,
output_path=output_path,
style=style_choice.style,
title=f"{variable.label} — évolution temporelle",
ylabel=_format_ylabel(variable),
annotate_freq=annotate_freq,
)
print(f"✔ Graphique généré : {output_path}")
if __name__ == "__main__": if __name__ == "__main__":

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 164 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 219 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 127 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 205 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 179 KiB

View File

@ -1,7 +1,66 @@
# Corrélations binaires # Corrélations binaires
Cette étape regroupe l'ensemble des scripts dédiés aux corrélations et comparaisons directes entre variables. ## Superpositions simples
Chaque figure déposée dans `figures/` possède son CSV compagnon exporté dans le dossier `data/` au même emplacement.
```shell
python "docs/04 - Corrélations binaires/scripts/plot_pairwise_time_series.py"
```
![](figures/pairwise_timeseries/timeseries_temperature_vs_humidity.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_pressure.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_rain_rate.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_illuminance.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_pressure.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_rain_rate.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_illuminance.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_rain_rate.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_illuminance.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_rain_rate_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_rain_rate_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_rain_rate_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_illuminance_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_illuminance_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_illuminance_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_wind_speed_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_wind_speed_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_wind_direction_vs_sun_elevation.png)
## Nuages de points
```shell ```shell
python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py" python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py"

View File

@ -0,0 +1,169 @@
# scripts/plot_pairwise_time_series.py
from __future__ import annotations
from pathlib import Path
import sys
import argparse
import pandas as pd
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv
from meteo.plots import (
PlotChoice,
PlotStyle,
plot_dual_time_series,
recommended_style,
resample_series_for_plot,
)
from meteo.variables import Variable, VARIABLES, VARIABLES_BY_KEY, iter_variable_pairs
CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent
OUTPUT_DIR = DOC_DIR / "figures" / "pairwise_timeseries"
def _select_variables(keys: list[str] | None) -> list[Variable]:
if not keys:
return list(VARIABLES)
missing = [key for key in keys if key not in VARIABLES_BY_KEY]
if missing:
raise KeyError(f"Variables inconnues : {', '.join(missing)}")
return [VARIABLES_BY_KEY[key] for key in keys]
def _aggregation_label(choice_a: PlotChoice, choice_b: PlotChoice, freq: str) -> str:
agg_labels = set()
for choice in (choice_a, choice_b):
base = "moyenne"
if isinstance(choice.agg, str):
if choice.agg == "sum":
base = "somme"
elif choice.agg == "median":
base = "médiane"
elif getattr(choice.agg, "__name__", "") == "_circular_mean_deg":
base = "moyenne circulaire"
agg_labels.add(base)
if len(agg_labels) == 1:
label = agg_labels.pop()
else:
label = "agrégations mixtes"
return f"{label} {freq}"
def main(argv: list[str] | None = None) -> None:
parser = argparse.ArgumentParser(description="Superpose les séries temporelles de toutes les paires de variables.")
parser.add_argument(
"--only",
nargs="*",
help="Clés de variables à inclure (par défaut : toutes).",
)
parser.add_argument(
"--days",
type=int,
default=None,
help="Limiter aux N derniers jours (par défaut : période complète).",
)
parser.add_argument(
"--style",
choices=[style.value for style in PlotStyle],
help="Style à imposer à toutes les variables (par défaut : style recommandé par variable).",
)
parser.add_argument(
"--resample",
help="Fréquence pandas pour l'agrégation temporelle (par défaut : calcul automatique).",
)
parser.add_argument(
"--max-points",
type=int,
default=420,
help="Nombre de points cible après agrégation automatique (par défaut : 420).",
)
parser.add_argument(
"--output-dir",
type=Path,
default=OUTPUT_DIR,
help="Dossier où stocker les figures.",
)
args = parser.parse_args(argv)
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
if args.days is not None:
end = df.index.max()
start = end - pd.Timedelta(days=args.days)
df = df.loc[start:end]
variables = _select_variables(args.only)
pairs = [(vx, vy) for (vx, vy) in iter_variable_pairs() if vx in variables and vy in variables]
if not pairs:
print("⚠ Aucune paire à tracer.")
return
output_dir: Path = args.output_dir
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Dataset chargé ({len(df)} lignes) → génération de {len(pairs)} paires.")
for var_a, var_b in pairs:
missing: list[str] = []
for col in (var_a.column, var_b.column):
if col not in df.columns:
missing.append(col)
if missing:
print(f"⚠ Colonnes absentes, on passe : {', '.join(missing)}")
continue
series_a = df[var_a.column].dropna()
series_b = df[var_b.column].dropna()
if series_a.empty or series_b.empty:
print(f"⚠ Données insuffisantes pour {var_a.key} / {var_b.key}, on passe.")
continue
choice_a = recommended_style(var_a, args.style)
choice_b = recommended_style(var_b, args.style)
aggregated_a, freq_used = resample_series_for_plot(
series_a,
variable=var_a,
freq=args.resample,
target_points=args.max_points,
)
aggregated_b, _ = resample_series_for_plot(
series_b,
variable=var_b,
freq=freq_used,
target_points=args.max_points,
)
if aggregated_a.empty or aggregated_b.empty:
print(f"⚠ Pas de points après agrégation pour {var_a.key} / {var_b.key}.")
continue
output_path = output_dir / f"timeseries_{var_a.key}_vs_{var_b.key}.png"
label_freq = _aggregation_label(choice_a, choice_b, freq_used)
print(f"{var_a.key} vs {var_b.key} ({freq_used}) → {output_path}")
plot_dual_time_series(
aggregated_a,
var_a,
choice_a,
aggregated_b,
var_b,
choice_b,
output_path=output_path,
title=f"{var_a.label} et {var_b.label} — évolution temporelle",
annotate_freq=label_freq,
)
print("✔ Superpositions temporelles générées.")
if __name__ == "__main__":
main()

View File

@ -22,6 +22,14 @@ from .relationships import (
plot_pairwise_relationship_grid, plot_pairwise_relationship_grid,
plot_scatter_pair, plot_scatter_pair,
) )
from .basic_series import (
PlotChoice,
PlotStyle,
plot_basic_series,
plot_dual_time_series,
recommended_style,
resample_series_for_plot,
)
from .seasonal_profiles import ( from .seasonal_profiles import (
plot_daylight_hours, plot_daylight_hours,
plot_diurnal_cycle, plot_diurnal_cycle,
@ -54,6 +62,12 @@ __all__ = [
"plot_hexbin_with_third_variable", "plot_hexbin_with_third_variable",
"plot_pairwise_relationship_grid", "plot_pairwise_relationship_grid",
"plot_scatter_pair", "plot_scatter_pair",
"PlotChoice",
"PlotStyle",
"plot_basic_series",
"plot_dual_time_series",
"recommended_style",
"resample_series_for_plot",
"plot_daylight_hours", "plot_daylight_hours",
"plot_diurnal_cycle", "plot_diurnal_cycle",
"plot_seasonal_hourly_profiles", "plot_seasonal_hourly_profiles",

350
meteo/plots/basic_series.py Normal file
View File

@ -0,0 +1,350 @@
"""Tracés simples et réutilisables pour les séries temporelles de base."""
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Callable
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from meteo.dataset import _circular_mean_deg
from meteo.variables import Variable
from .base import export_plot_dataset
__all__ = [
"PlotStyle",
"PlotChoice",
"recommended_style",
"resample_series_for_plot",
"plot_basic_series",
"plot_dual_time_series",
]
class PlotStyle(str, Enum):
LINE = "line"
AREA = "area"
BAR = "bar"
SCATTER = "scatter"
@dataclass(frozen=True)
class PlotChoice:
"""Configuration par variable : style et fonction d'agrégation."""
style: PlotStyle
agg: Callable[[pd.Series], float] | str = "mean"
DEFAULT_CHOICES: dict[str, PlotChoice] = {
# Variations continues : lignes ou aires.
"temperature": PlotChoice(PlotStyle.LINE, "mean"),
"pressure": PlotChoice(PlotStyle.LINE, "mean"),
"humidity": PlotChoice(PlotStyle.AREA, "mean"),
"illuminance": PlotChoice(PlotStyle.AREA, "mean"),
"sun_elevation": PlotChoice(PlotStyle.AREA, "mean"),
# Variables dont la perception bénéficie d'autres représentations.
"rain_rate": PlotChoice(PlotStyle.BAR, "mean"),
"wind_speed": PlotChoice(PlotStyle.LINE, "mean"),
"wind_direction": PlotChoice(PlotStyle.SCATTER, _circular_mean_deg),
}
# Palette douce mais contrastée, associée aux variables.
PALETTE = {
"temperature": "#d1495b",
"pressure": "#5c677d",
"humidity": "#2c7bb6",
"rain_rate": "#1b9e77",
"illuminance": "#f4a259",
"wind_speed": "#118ab2",
"wind_direction": "#8e6c8a",
"sun_elevation": "#f08c42",
}
DEFAULT_COLOR = "#386cb0"
def recommended_style(variable: Variable, override: str | None = None) -> PlotChoice:
"""Retourne le style/agrégation par défaut, ou une surcharge utilisateur."""
if override:
style = PlotStyle(override)
agg = DEFAULT_CHOICES.get(variable.key, PlotChoice(style)).agg
return PlotChoice(style, agg)
return DEFAULT_CHOICES.get(variable.key, PlotChoice(PlotStyle.LINE))
def _nice_frequencies() -> list[tuple[str, pd.Timedelta]]:
return [
("5min", pd.Timedelta(minutes=5)),
("10min", pd.Timedelta(minutes=10)),
("15min", pd.Timedelta(minutes=15)),
("30min", pd.Timedelta(minutes=30)),
("1h", pd.Timedelta(hours=1)),
("3h", pd.Timedelta(hours=3)),
("6h", pd.Timedelta(hours=6)),
("12h", pd.Timedelta(hours=12)),
("1d", pd.Timedelta(days=1)),
("3d", pd.Timedelta(days=3)),
("7d", pd.Timedelta(days=7)),
]
def _auto_resample_frequency(index: pd.DatetimeIndex, *, target_points: int = 420) -> str:
"""Choisit une fréquence qui limite le nombre de points tout en conservant la forme générale."""
if index.empty or len(index) < 2:
return "1h"
span = index.max() - index.min()
if span <= pd.Timedelta(0):
return "1h"
for label, delta in _nice_frequencies():
if span / delta <= target_points:
return label
return _nice_frequencies()[-1][0]
def _format_time_axis(ax: plt.Axes) -> None:
locator = mdates.AutoDateLocator(minticks=4, maxticks=8)
formatter = mdates.ConciseDateFormatter(locator, formats=["%Y", "%b", "%d", "%d %H:%M", "%H:%M", "%S"])
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
def _infer_bar_width(index: pd.DatetimeIndex) -> float:
"""
Calcule une largeur de barre raisonnable (en jours) pour les histogrammes temporels.
"""
if len(index) < 2:
return 0.3 # ~7 heures, pour rendre le point visible même isolé
diffs = np.diff(index.asi8) # nanosecondes
median_ns = float(np.median(diffs))
if not np.isfinite(median_ns) or median_ns <= 0:
return 0.1
return pd.to_timedelta(median_ns, unit="ns") / pd.Timedelta(days=1) * 0.8
def _ensure_datetime_index(series: pd.Series) -> pd.Series:
if not isinstance(series.index, pd.DatetimeIndex):
raise TypeError("Une série temporelle (DatetimeIndex) est attendue pour le tracé.")
return series
def _series_color(variable: Variable) -> str:
if variable.key in PALETTE:
return PALETTE[variable.key]
return PALETTE.get(variable.column, DEFAULT_COLOR)
def _format_label(var: Variable) -> str:
unit_text = f" ({var.unit})" if var.unit else ""
return f"{var.label}{unit_text}"
def resample_series_for_plot(
series: pd.Series,
*,
variable: Variable,
freq: str | None = None,
target_points: int = 420,
) -> tuple[pd.Series, str]:
"""
Prépare une série pour l'affichage : resample et agrégation adaptés à la variable.
"""
_ensure_datetime_index(series)
if freq is None:
freq = _auto_resample_frequency(series.index, target_points=target_points)
agg_func = DEFAULT_CHOICES.get(variable.key, PlotChoice(PlotStyle.LINE)).agg
resampled = series.resample(freq).agg(agg_func).dropna()
return resampled, freq
def plot_basic_series(
series: pd.Series,
*,
variable: Variable,
output_path: str | Path,
style: PlotStyle,
title: str,
ylabel: str,
annotate_freq: str | None = None,
) -> Path:
"""
Trace une série temporelle avec un style simple (ligne, aire, barres, nuage de points).
"""
_ensure_datetime_index(series)
if series.empty:
raise ValueError(f"Aucune donnée disponible pour {variable.key} après filtrage.")
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
color = _series_color(variable)
x = mdates.date2num(series.index)
values = series.to_numpy(dtype=float)
fig, ax = plt.subplots(figsize=(11, 4.2))
if style is PlotStyle.LINE:
ax.plot_date(x, values, "-", linewidth=1.8, color=color, label=variable.label)
elif style is PlotStyle.AREA:
ax.fill_between(x, values, step="mid", color=color, alpha=0.2)
ax.plot_date(x, values, "-", linewidth=1.6, color=color)
elif style is PlotStyle.BAR:
width = _infer_bar_width(series.index)
ax.bar(x, values, width=width, color=color, edgecolor=color, linewidth=0.5, alpha=0.85)
elif style is PlotStyle.SCATTER:
ax.scatter(x, values, s=16, color=color, alpha=0.9)
else:
raise ValueError(f"Style inconnu : {style}")
ax.set_title(title)
ax.set_ylabel(ylabel)
_format_time_axis(ax)
ax.grid(True, color="#e0e0e0", linewidth=0.8, alpha=0.7)
ax.margins(x=0.02, y=0.05)
if annotate_freq:
ax.text(
0.99,
0.02,
f"Agrégation : {annotate_freq}",
transform=ax.transAxes,
ha="right",
va="bottom",
fontsize=9,
color="#555555",
)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
export_plot_dataset(series.to_frame(name=variable.column), output_path)
return output_path.resolve()
def _draw_series(ax: plt.Axes, series: pd.Series, *, choice: PlotChoice, color: str, label: str):
x = mdates.date2num(series.index)
values = series.to_numpy(dtype=float)
if choice.style is PlotStyle.LINE:
return ax.plot_date(x, values, "-", linewidth=1.8, color=color, label=label)
if choice.style is PlotStyle.AREA:
ax.fill_between(x, values, step="mid", color=color, alpha=0.15)
return ax.plot_date(x, values, "-", linewidth=1.6, color=color, label=label)
if choice.style is PlotStyle.BAR:
width = _infer_bar_width(series.index) * 0.9
return ax.bar(x, values, width=width, color=color, edgecolor=color, linewidth=0.5, alpha=0.75, label=label)
if choice.style is PlotStyle.SCATTER:
return ax.scatter(x, values, s=16, color=color, alpha=0.9, label=label)
raise ValueError(f"Style inconnu : {choice.style}")
def plot_dual_time_series(
series_left: pd.Series,
variable_left: Variable,
choice_left: PlotChoice,
series_right: pd.Series,
variable_right: Variable,
choice_right: PlotChoice,
*,
output_path: str | Path,
title: str,
annotate_freq: str | None = None,
) -> Path:
"""Superpose deux séries temporelles (axes Y séparés) avec styles adaptés."""
_ensure_datetime_index(series_left)
_ensure_datetime_index(series_right)
if series_left.empty or series_right.empty:
raise ValueError("Les séries à tracer ne peuvent pas être vides.")
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
color_left = _series_color(variable_left)
color_right = _series_color(variable_right)
fig, ax_left = plt.subplots(figsize=(11, 4.6))
ax_right = ax_left.twinx()
artists_left = _draw_series(
ax_left,
series_left,
choice=choice_left,
color=color_left,
label=_format_label(variable_left),
)
artists_right = _draw_series(
ax_right,
series_right,
choice=choice_right,
color=color_right,
label=_format_label(variable_right),
)
ax_left.set_ylabel(_format_label(variable_left), color=color_left)
ax_right.set_ylabel(_format_label(variable_right), color=color_right)
ax_left.tick_params(axis="y", labelcolor=color_left)
ax_right.tick_params(axis="y", labelcolor=color_right)
_format_time_axis(ax_left)
ax_left.grid(True, color="#e0e0e0", linewidth=0.8, alpha=0.7)
ax_left.margins(x=0.02, y=0.05)
ax_right.margins(x=0.02, y=0.05)
ax_left.set_title(title)
handles = []
labels = []
for artist in artists_left if isinstance(artists_left, list) else [artists_left]:
handles.append(artist)
labels.append(artist.get_label())
if isinstance(artists_right, list):
handles.extend(artists_right)
labels.extend([a.get_label() for a in artists_right])
else:
handles.append(artists_right)
labels.append(artists_right.get_label())
ax_left.legend(handles, labels, loc="upper left")
if annotate_freq:
ax_left.text(
0.99,
0.02,
f"Agrégation : {annotate_freq}",
transform=ax_left.transAxes,
ha="right",
va="bottom",
fontsize=9,
color="#555555",
)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
export_plot_dataset(
pd.concat(
{variable_left.column: series_left, variable_right.column: series_right},
axis=1,
),
output_path,
)
return output_path.resolve()