You've already forked donnees_meteo
Nouvelles visualisations exploratoires
This commit is contained in:
128
scripts/plot_hexbin_explorations.py
Normal file
128
scripts/plot_hexbin_explorations.py
Normal file
@@ -0,0 +1,128 @@
|
||||
# scripts/plot_hexbin_explorations.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
import numpy as np
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import VARIABLES_BY_KEY
|
||||
from meteo.plots import plot_hexbin_with_third_variable
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_DIR = Path("figures/hexbin_explorations")
|
||||
|
||||
|
||||
REDUCE_FUNCTIONS: dict[str, Callable[[np.ndarray], float]] = {
|
||||
"mean": np.mean,
|
||||
"median": np.median,
|
||||
"max": np.max,
|
||||
}
|
||||
|
||||
REDUCE_LABEL_FR: dict[str, str] = {
|
||||
"mean": "moyenne",
|
||||
"median": "médiane",
|
||||
"max": "maximum",
|
||||
}
|
||||
|
||||
# Chaque scénario illustre soit une corrélation bien connue,
|
||||
# soit l'absence de structure entre variables.
|
||||
HEXBIN_SCENARIOS: list[dict[str, object]] = [
|
||||
{
|
||||
"x": "temperature",
|
||||
"y": "humidity",
|
||||
"color": "rain_rate",
|
||||
"filename": "hexbin_temp_humidity_color_rain.png",
|
||||
"description": (
|
||||
"Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
|
||||
"et comment les épisodes de pluie se situent dans une bande restreinte."
|
||||
),
|
||||
"reduce": "max",
|
||||
"gridsize": 50,
|
||||
"mincnt": 8,
|
||||
},
|
||||
{
|
||||
"x": "pressure",
|
||||
"y": "rain_rate",
|
||||
"color": "wind_speed",
|
||||
"filename": "hexbin_pressure_rain_color_wind.png",
|
||||
"description": (
|
||||
"Vérifier si des rafales accompagnent vraiment les chutes de pression. "
|
||||
"On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
|
||||
),
|
||||
"reduce": "median",
|
||||
"gridsize": 45,
|
||||
"mincnt": 5,
|
||||
},
|
||||
{
|
||||
"x": "illuminance",
|
||||
"y": "humidity",
|
||||
"color": "temperature",
|
||||
"filename": "hexbin_lux_humidity_color_temp.png",
|
||||
"description": (
|
||||
"Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
|
||||
"mais cela n'implique pas toujours une baisse rapide de température."
|
||||
),
|
||||
"reduce": "mean",
|
||||
"gridsize": 55,
|
||||
"mincnt": 6,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
for scenario in HEXBIN_SCENARIOS:
|
||||
key_x = scenario["x"]
|
||||
key_y = scenario["y"]
|
||||
key_color = scenario["color"]
|
||||
|
||||
var_x = VARIABLES_BY_KEY[key_x]
|
||||
var_y = VARIABLES_BY_KEY[key_y]
|
||||
var_color = VARIABLES_BY_KEY[key_color]
|
||||
|
||||
filename = scenario["filename"]
|
||||
output_path = OUTPUT_DIR / filename
|
||||
|
||||
reduce_name = scenario.get("reduce", "mean")
|
||||
reduce_func = REDUCE_FUNCTIONS.get(reduce_name, np.mean)
|
||||
reduce_label = REDUCE_LABEL_FR.get(reduce_name, reduce_name)
|
||||
|
||||
gridsize = int(scenario.get("gridsize", 60))
|
||||
mincnt = int(scenario.get("mincnt", 5))
|
||||
|
||||
description = scenario["description"]
|
||||
print(f"→ Hexbin {var_y.key} vs {var_x.key} (couleur = {var_color.key})")
|
||||
print(f" {description}")
|
||||
|
||||
plot_hexbin_with_third_variable(
|
||||
df=df,
|
||||
var_x=var_x,
|
||||
var_y=var_y,
|
||||
var_color=var_color,
|
||||
output_path=output_path,
|
||||
gridsize=gridsize,
|
||||
mincnt=mincnt,
|
||||
reduce_func=reduce_func,
|
||||
reduce_func_label=reduce_label,
|
||||
cmap="magma",
|
||||
)
|
||||
print(f" ✔ Graphique enregistré : {output_path}")
|
||||
print()
|
||||
|
||||
print("✔ Tous les graphiques hexbin ont été générés.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
85
scripts/plot_rain_event_composites.py
Normal file
85
scripts/plot_rain_event_composites.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# scripts/plot_rain_event_composites.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import Variable, VARIABLES_BY_KEY
|
||||
from meteo.analysis import detect_threshold_events, build_event_aligned_segments
|
||||
from meteo.plots import plot_event_composite
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_PATH = Path("figures/event_composites/rain_event_composites.png")
|
||||
|
||||
RAIN_THRESHOLD = 0.2 # mm/h : au-dessous on considère qu'il ne pleut pas vraiment
|
||||
MIN_EVENT_DURATION = 5 # minutes
|
||||
MIN_EVENT_GAP = 20 # minutes nécessaires pour considérer un nouvel événement
|
||||
WINDOW_BEFORE = 120 # minutes affichées avant le début de la pluie
|
||||
WINDOW_AFTER = 240 # minutes après le déclenchement
|
||||
|
||||
COMPOSITE_VARIABLE_KEYS: Sequence[str] = [
|
||||
"pressure",
|
||||
"temperature",
|
||||
"humidity",
|
||||
"wind_speed",
|
||||
]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
rain_series = df["rain_rate"]
|
||||
events = detect_threshold_events(
|
||||
rain_series,
|
||||
threshold=RAIN_THRESHOLD,
|
||||
min_duration=pd.Timedelta(minutes=MIN_EVENT_DURATION),
|
||||
min_gap=pd.Timedelta(minutes=MIN_EVENT_GAP),
|
||||
)
|
||||
|
||||
if not events:
|
||||
print("⚠ Aucun événement de pluie détecté avec les paramètres actuels.")
|
||||
return
|
||||
|
||||
print(f"Nombre d'événements détectés : {len(events)}")
|
||||
|
||||
variables: list[Variable] = [VARIABLES_BY_KEY[key] for key in COMPOSITE_VARIABLE_KEYS]
|
||||
columns = [v.column for v in variables]
|
||||
|
||||
aligned_segments = build_event_aligned_segments(
|
||||
df=df,
|
||||
events=events,
|
||||
columns=columns,
|
||||
window_before_minutes=WINDOW_BEFORE,
|
||||
window_after_minutes=WINDOW_AFTER,
|
||||
resample_minutes=1,
|
||||
)
|
||||
|
||||
if aligned_segments.empty:
|
||||
print("⚠ Les segments alignés sont vides (période manquante ?).")
|
||||
return
|
||||
|
||||
output_path = plot_event_composite(
|
||||
aligned_segments=aligned_segments,
|
||||
variables=variables,
|
||||
output_path=OUTPUT_PATH,
|
||||
quantiles=(0.2, 0.8),
|
||||
baseline_label="Début de la pluie",
|
||||
)
|
||||
|
||||
print(f"✔ Graphique composite pluie sauvegardé : {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
65
scripts/plot_rolling_correlation_heatmap.py
Normal file
65
scripts/plot_rolling_correlation_heatmap.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# scripts/plot_rolling_correlation_heatmap.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import VARIABLES_BY_KEY
|
||||
from meteo.analysis import compute_rolling_correlations_for_pairs
|
||||
from meteo.plots import plot_rolling_correlation_heatmap
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_PATH = Path("figures/rolling_correlations/rolling_correlation_heatmap.png")
|
||||
|
||||
ROLLING_PAIRS: list[tuple[str, str]] = [
|
||||
("temperature", "humidity"),
|
||||
("pressure", "rain_rate"),
|
||||
("pressure", "wind_speed"),
|
||||
("illuminance", "temperature"),
|
||||
("humidity", "rain_rate"),
|
||||
]
|
||||
|
||||
WINDOW_MINUTES = 180 # 3 heures pour observer les tendances synoptiques
|
||||
STEP_MINUTES = 30 # on n'échantillonne qu'un point sur 30 minutes
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
pairs = [(VARIABLES_BY_KEY[a], VARIABLES_BY_KEY[b]) for a, b in ROLLING_PAIRS]
|
||||
|
||||
rolling_df = compute_rolling_correlations_for_pairs(
|
||||
df=df,
|
||||
pairs=pairs,
|
||||
window_minutes=WINDOW_MINUTES,
|
||||
min_valid_fraction=0.7,
|
||||
step_minutes=STEP_MINUTES,
|
||||
method="pearson",
|
||||
)
|
||||
|
||||
if rolling_df.empty:
|
||||
print("⚠ Impossible de calculer les corrélations glissantes (données insuffisantes).")
|
||||
return
|
||||
|
||||
output_path = plot_rolling_correlation_heatmap(
|
||||
rolling_corr=rolling_df,
|
||||
output_path=OUTPUT_PATH,
|
||||
cmap="coolwarm",
|
||||
vmin=-1.0,
|
||||
vmax=1.0,
|
||||
)
|
||||
|
||||
print(f"✔ Heatmap de corrélations glissantes enregistrée : {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user