1

Graphiques de corrélations binaires simples

This commit is contained in:
Richard Dern 2025-11-20 21:45:24 +01:00
parent df7fbf07ed
commit a36157b52f
32 changed files with 350 additions and 2 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 164 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 219 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 127 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 205 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 179 KiB

View File

@ -1,7 +1,66 @@
# Corrélations binaires
Cette étape regroupe l'ensemble des scripts dédiés aux corrélations et comparaisons directes entre variables.
Chaque figure déposée dans `figures/` possède son CSV compagnon exporté dans le dossier `data/` au même emplacement.
## Superpositions simples
```shell
python "docs/04 - Corrélations binaires/scripts/plot_pairwise_time_series.py"
```
![](figures/pairwise_timeseries/timeseries_temperature_vs_humidity.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_pressure.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_rain_rate.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_illuminance.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_temperature_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_pressure.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_rain_rate.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_illuminance.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_humidity_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_rain_rate.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_illuminance.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_pressure_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_rain_rate_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_rain_rate_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_rain_rate_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_illuminance_vs_wind_speed.png)
![](figures/pairwise_timeseries/timeseries_illuminance_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_illuminance_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_wind_speed_vs_wind_direction.png)
![](figures/pairwise_timeseries/timeseries_wind_speed_vs_sun_elevation.png)
![](figures/pairwise_timeseries/timeseries_wind_direction_vs_sun_elevation.png)
## Nuages de points
```shell
python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py"

View File

@ -0,0 +1,169 @@
# scripts/plot_pairwise_time_series.py
from __future__ import annotations
from pathlib import Path
import sys
import argparse
import pandas as pd
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from meteo.dataset import load_raw_csv
from meteo.plots import (
PlotChoice,
PlotStyle,
plot_dual_time_series,
recommended_style,
resample_series_for_plot,
)
from meteo.variables import Variable, VARIABLES, VARIABLES_BY_KEY, iter_variable_pairs
CSV_PATH = Path("data/weather_minutely.csv")
DOC_DIR = Path(__file__).resolve().parent.parent
OUTPUT_DIR = DOC_DIR / "figures" / "pairwise_timeseries"
def _select_variables(keys: list[str] | None) -> list[Variable]:
if not keys:
return list(VARIABLES)
missing = [key for key in keys if key not in VARIABLES_BY_KEY]
if missing:
raise KeyError(f"Variables inconnues : {', '.join(missing)}")
return [VARIABLES_BY_KEY[key] for key in keys]
def _aggregation_label(choice_a: PlotChoice, choice_b: PlotChoice, freq: str) -> str:
agg_labels = set()
for choice in (choice_a, choice_b):
base = "moyenne"
if isinstance(choice.agg, str):
if choice.agg == "sum":
base = "somme"
elif choice.agg == "median":
base = "médiane"
elif getattr(choice.agg, "__name__", "") == "_circular_mean_deg":
base = "moyenne circulaire"
agg_labels.add(base)
if len(agg_labels) == 1:
label = agg_labels.pop()
else:
label = "agrégations mixtes"
return f"{label} {freq}"
def main(argv: list[str] | None = None) -> None:
parser = argparse.ArgumentParser(description="Superpose les séries temporelles de toutes les paires de variables.")
parser.add_argument(
"--only",
nargs="*",
help="Clés de variables à inclure (par défaut : toutes).",
)
parser.add_argument(
"--days",
type=int,
default=None,
help="Limiter aux N derniers jours (par défaut : période complète).",
)
parser.add_argument(
"--style",
choices=[style.value for style in PlotStyle],
help="Style à imposer à toutes les variables (par défaut : style recommandé par variable).",
)
parser.add_argument(
"--resample",
help="Fréquence pandas pour l'agrégation temporelle (par défaut : calcul automatique).",
)
parser.add_argument(
"--max-points",
type=int,
default=420,
help="Nombre de points cible après agrégation automatique (par défaut : 420).",
)
parser.add_argument(
"--output-dir",
type=Path,
default=OUTPUT_DIR,
help="Dossier où stocker les figures.",
)
args = parser.parse_args(argv)
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
if args.days is not None:
end = df.index.max()
start = end - pd.Timedelta(days=args.days)
df = df.loc[start:end]
variables = _select_variables(args.only)
pairs = [(vx, vy) for (vx, vy) in iter_variable_pairs() if vx in variables and vy in variables]
if not pairs:
print("⚠ Aucune paire à tracer.")
return
output_dir: Path = args.output_dir
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Dataset chargé ({len(df)} lignes) → génération de {len(pairs)} paires.")
for var_a, var_b in pairs:
missing: list[str] = []
for col in (var_a.column, var_b.column):
if col not in df.columns:
missing.append(col)
if missing:
print(f"⚠ Colonnes absentes, on passe : {', '.join(missing)}")
continue
series_a = df[var_a.column].dropna()
series_b = df[var_b.column].dropna()
if series_a.empty or series_b.empty:
print(f"⚠ Données insuffisantes pour {var_a.key} / {var_b.key}, on passe.")
continue
choice_a = recommended_style(var_a, args.style)
choice_b = recommended_style(var_b, args.style)
aggregated_a, freq_used = resample_series_for_plot(
series_a,
variable=var_a,
freq=args.resample,
target_points=args.max_points,
)
aggregated_b, _ = resample_series_for_plot(
series_b,
variable=var_b,
freq=freq_used,
target_points=args.max_points,
)
if aggregated_a.empty or aggregated_b.empty:
print(f"⚠ Pas de points après agrégation pour {var_a.key} / {var_b.key}.")
continue
output_path = output_dir / f"timeseries_{var_a.key}_vs_{var_b.key}.png"
label_freq = _aggregation_label(choice_a, choice_b, freq_used)
print(f"{var_a.key} vs {var_b.key} ({freq_used}) → {output_path}")
plot_dual_time_series(
aggregated_a,
var_a,
choice_a,
aggregated_b,
var_b,
choice_b,
output_path=output_path,
title=f"{var_a.label} et {var_b.label} — évolution temporelle",
annotate_freq=label_freq,
)
print("✔ Superpositions temporelles générées.")
if __name__ == "__main__":
main()

View File

@ -26,6 +26,7 @@ from .basic_series import (
PlotChoice,
PlotStyle,
plot_basic_series,
plot_dual_time_series,
recommended_style,
resample_series_for_plot,
)
@ -64,6 +65,7 @@ __all__ = [
"PlotChoice",
"PlotStyle",
"plot_basic_series",
"plot_dual_time_series",
"recommended_style",
"resample_series_for_plot",
"plot_daylight_hours",

View File

@ -23,6 +23,7 @@ __all__ = [
"recommended_style",
"resample_series_for_plot",
"plot_basic_series",
"plot_dual_time_series",
]
@ -145,6 +146,11 @@ def _series_color(variable: Variable) -> str:
return PALETTE.get(variable.column, DEFAULT_COLOR)
def _format_label(var: Variable) -> str:
unit_text = f" ({var.unit})" if var.unit else ""
return f"{var.label}{unit_text}"
def resample_series_for_plot(
series: pd.Series,
*,
@ -230,3 +236,115 @@ def plot_basic_series(
export_plot_dataset(series.to_frame(name=variable.column), output_path)
return output_path.resolve()
def _draw_series(ax: plt.Axes, series: pd.Series, *, choice: PlotChoice, color: str, label: str):
x = mdates.date2num(series.index)
values = series.to_numpy(dtype=float)
if choice.style is PlotStyle.LINE:
return ax.plot_date(x, values, "-", linewidth=1.8, color=color, label=label)
if choice.style is PlotStyle.AREA:
ax.fill_between(x, values, step="mid", color=color, alpha=0.15)
return ax.plot_date(x, values, "-", linewidth=1.6, color=color, label=label)
if choice.style is PlotStyle.BAR:
width = _infer_bar_width(series.index) * 0.9
return ax.bar(x, values, width=width, color=color, edgecolor=color, linewidth=0.5, alpha=0.75, label=label)
if choice.style is PlotStyle.SCATTER:
return ax.scatter(x, values, s=16, color=color, alpha=0.9, label=label)
raise ValueError(f"Style inconnu : {choice.style}")
def plot_dual_time_series(
series_left: pd.Series,
variable_left: Variable,
choice_left: PlotChoice,
series_right: pd.Series,
variable_right: Variable,
choice_right: PlotChoice,
*,
output_path: str | Path,
title: str,
annotate_freq: str | None = None,
) -> Path:
"""Superpose deux séries temporelles (axes Y séparés) avec styles adaptés."""
_ensure_datetime_index(series_left)
_ensure_datetime_index(series_right)
if series_left.empty or series_right.empty:
raise ValueError("Les séries à tracer ne peuvent pas être vides.")
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
color_left = _series_color(variable_left)
color_right = _series_color(variable_right)
fig, ax_left = plt.subplots(figsize=(11, 4.6))
ax_right = ax_left.twinx()
artists_left = _draw_series(
ax_left,
series_left,
choice=choice_left,
color=color_left,
label=_format_label(variable_left),
)
artists_right = _draw_series(
ax_right,
series_right,
choice=choice_right,
color=color_right,
label=_format_label(variable_right),
)
ax_left.set_ylabel(_format_label(variable_left), color=color_left)
ax_right.set_ylabel(_format_label(variable_right), color=color_right)
ax_left.tick_params(axis="y", labelcolor=color_left)
ax_right.tick_params(axis="y", labelcolor=color_right)
_format_time_axis(ax_left)
ax_left.grid(True, color="#e0e0e0", linewidth=0.8, alpha=0.7)
ax_left.margins(x=0.02, y=0.05)
ax_right.margins(x=0.02, y=0.05)
ax_left.set_title(title)
handles = []
labels = []
for artist in artists_left if isinstance(artists_left, list) else [artists_left]:
handles.append(artist)
labels.append(artist.get_label())
if isinstance(artists_right, list):
handles.extend(artists_right)
labels.extend([a.get_label() for a in artists_right])
else:
handles.append(artists_right)
labels.append(artists_right.get_label())
ax_left.legend(handles, labels, loc="upper left")
if annotate_freq:
ax_left.text(
0.99,
0.02,
f"Agrégation : {annotate_freq}",
transform=ax_left.transAxes,
ha="right",
va="bottom",
fontsize=9,
color="#555555",
)
fig.tight_layout()
fig.savefig(output_path, dpi=150)
plt.close(fig)
export_plot_dataset(
pd.concat(
{variable_left.column: series_left, variable_right.column: series_right},
axis=1,
),
output_path,
)
return output_path.resolve()