Graphiques de corrélations binaires simples
|
After Width: | Height: | Size: 206 KiB |
|
After Width: | Height: | Size: 192 KiB |
|
After Width: | Height: | Size: 135 KiB |
|
After Width: | Height: | Size: 184 KiB |
|
After Width: | Height: | Size: 164 KiB |
|
After Width: | Height: | Size: 216 KiB |
|
After Width: | Height: | Size: 185 KiB |
|
After Width: | Height: | Size: 176 KiB |
|
After Width: | Height: | Size: 219 KiB |
|
After Width: | Height: | Size: 199 KiB |
|
After Width: | Height: | Size: 129 KiB |
|
After Width: | Height: | Size: 180 KiB |
|
After Width: | Height: | Size: 153 KiB |
|
After Width: | Height: | Size: 209 KiB |
|
After Width: | Height: | Size: 150 KiB |
|
After Width: | Height: | Size: 120 KiB |
|
After Width: | Height: | Size: 90 KiB |
|
After Width: | Height: | Size: 154 KiB |
|
After Width: | Height: | Size: 192 KiB |
|
After Width: | Height: | Size: 194 KiB |
|
After Width: | Height: | Size: 182 KiB |
|
After Width: | Height: | Size: 127 KiB |
|
After Width: | Height: | Size: 174 KiB |
|
After Width: | Height: | Size: 153 KiB |
|
After Width: | Height: | Size: 206 KiB |
|
After Width: | Height: | Size: 146 KiB |
|
After Width: | Height: | Size: 205 KiB |
|
After Width: | Height: | Size: 179 KiB |
@@ -1,7 +1,66 @@
|
||||
# Corrélations binaires
|
||||
|
||||
Cette étape regroupe l'ensemble des scripts dédiés aux corrélations et comparaisons directes entre variables.
|
||||
Chaque figure déposée dans `figures/` possède son CSV compagnon exporté dans le dossier `data/` au même emplacement.
|
||||
## Superpositions simples
|
||||
|
||||
```shell
|
||||
python "docs/04 - Corrélations binaires/scripts/plot_pairwise_time_series.py"
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## Nuages de points
|
||||
|
||||
```shell
|
||||
python "docs/04 - Corrélations binaires/scripts/plot_all_pairwise_scatter.py"
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
# scripts/plot_pairwise_time_series.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import argparse
|
||||
import pandas as pd
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.plots import (
|
||||
PlotChoice,
|
||||
PlotStyle,
|
||||
plot_dual_time_series,
|
||||
recommended_style,
|
||||
resample_series_for_plot,
|
||||
)
|
||||
from meteo.variables import Variable, VARIABLES, VARIABLES_BY_KEY, iter_variable_pairs
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
DOC_DIR = Path(__file__).resolve().parent.parent
|
||||
OUTPUT_DIR = DOC_DIR / "figures" / "pairwise_timeseries"
|
||||
|
||||
|
||||
def _select_variables(keys: list[str] | None) -> list[Variable]:
|
||||
if not keys:
|
||||
return list(VARIABLES)
|
||||
missing = [key for key in keys if key not in VARIABLES_BY_KEY]
|
||||
if missing:
|
||||
raise KeyError(f"Variables inconnues : {', '.join(missing)}")
|
||||
return [VARIABLES_BY_KEY[key] for key in keys]
|
||||
|
||||
|
||||
def _aggregation_label(choice_a: PlotChoice, choice_b: PlotChoice, freq: str) -> str:
|
||||
agg_labels = set()
|
||||
for choice in (choice_a, choice_b):
|
||||
base = "moyenne"
|
||||
if isinstance(choice.agg, str):
|
||||
if choice.agg == "sum":
|
||||
base = "somme"
|
||||
elif choice.agg == "median":
|
||||
base = "médiane"
|
||||
elif getattr(choice.agg, "__name__", "") == "_circular_mean_deg":
|
||||
base = "moyenne circulaire"
|
||||
agg_labels.add(base)
|
||||
if len(agg_labels) == 1:
|
||||
label = agg_labels.pop()
|
||||
else:
|
||||
label = "agrégations mixtes"
|
||||
return f"{label} {freq}"
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
parser = argparse.ArgumentParser(description="Superpose les séries temporelles de toutes les paires de variables.")
|
||||
parser.add_argument(
|
||||
"--only",
|
||||
nargs="*",
|
||||
help="Clés de variables à inclure (par défaut : toutes).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limiter aux N derniers jours (par défaut : période complète).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--style",
|
||||
choices=[style.value for style in PlotStyle],
|
||||
help="Style à imposer à toutes les variables (par défaut : style recommandé par variable).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resample",
|
||||
help="Fréquence pandas pour l'agrégation temporelle (par défaut : calcul automatique).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-points",
|
||||
type=int,
|
||||
default=420,
|
||||
help="Nombre de points cible après agrégation automatique (par défaut : 420).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=OUTPUT_DIR,
|
||||
help="Dossier où stocker les figures.",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
if args.days is not None:
|
||||
end = df.index.max()
|
||||
start = end - pd.Timedelta(days=args.days)
|
||||
df = df.loc[start:end]
|
||||
|
||||
variables = _select_variables(args.only)
|
||||
pairs = [(vx, vy) for (vx, vy) in iter_variable_pairs() if vx in variables and vy in variables]
|
||||
if not pairs:
|
||||
print("⚠ Aucune paire à tracer.")
|
||||
return
|
||||
|
||||
output_dir: Path = args.output_dir
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Dataset chargé ({len(df)} lignes) → génération de {len(pairs)} paires.")
|
||||
|
||||
for var_a, var_b in pairs:
|
||||
missing: list[str] = []
|
||||
for col in (var_a.column, var_b.column):
|
||||
if col not in df.columns:
|
||||
missing.append(col)
|
||||
if missing:
|
||||
print(f"⚠ Colonnes absentes, on passe : {', '.join(missing)}")
|
||||
continue
|
||||
|
||||
series_a = df[var_a.column].dropna()
|
||||
series_b = df[var_b.column].dropna()
|
||||
if series_a.empty or series_b.empty:
|
||||
print(f"⚠ Données insuffisantes pour {var_a.key} / {var_b.key}, on passe.")
|
||||
continue
|
||||
|
||||
choice_a = recommended_style(var_a, args.style)
|
||||
choice_b = recommended_style(var_b, args.style)
|
||||
|
||||
aggregated_a, freq_used = resample_series_for_plot(
|
||||
series_a,
|
||||
variable=var_a,
|
||||
freq=args.resample,
|
||||
target_points=args.max_points,
|
||||
)
|
||||
aggregated_b, _ = resample_series_for_plot(
|
||||
series_b,
|
||||
variable=var_b,
|
||||
freq=freq_used,
|
||||
target_points=args.max_points,
|
||||
)
|
||||
if aggregated_a.empty or aggregated_b.empty:
|
||||
print(f"⚠ Pas de points après agrégation pour {var_a.key} / {var_b.key}.")
|
||||
continue
|
||||
|
||||
output_path = output_dir / f"timeseries_{var_a.key}_vs_{var_b.key}.png"
|
||||
label_freq = _aggregation_label(choice_a, choice_b, freq_used)
|
||||
|
||||
print(f"→ {var_a.key} vs {var_b.key} ({freq_used}) → {output_path}")
|
||||
plot_dual_time_series(
|
||||
aggregated_a,
|
||||
var_a,
|
||||
choice_a,
|
||||
aggregated_b,
|
||||
var_b,
|
||||
choice_b,
|
||||
output_path=output_path,
|
||||
title=f"{var_a.label} et {var_b.label} — évolution temporelle",
|
||||
annotate_freq=label_freq,
|
||||
)
|
||||
|
||||
print("✔ Superpositions temporelles générées.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||