Nouvelles visualisations exploratoires
This commit is contained in:
parent
b72349a369
commit
fd42a692d9
BIN
figures/event_composites/rain_event_composites.png
Normal file
BIN
figures/event_composites/rain_event_composites.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 362 KiB |
BIN
figures/hexbin_explorations/hexbin_lux_humidity_color_temp.png
Normal file
BIN
figures/hexbin_explorations/hexbin_lux_humidity_color_temp.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 173 KiB |
BIN
figures/hexbin_explorations/hexbin_pressure_rain_color_wind.png
Normal file
BIN
figures/hexbin_explorations/hexbin_pressure_rain_color_wind.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 83 KiB |
BIN
figures/hexbin_explorations/hexbin_temp_humidity_color_rain.png
Normal file
BIN
figures/hexbin_explorations/hexbin_temp_humidity_color_rain.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 93 KiB |
BIN
figures/rolling_correlations/rolling_correlation_heatmap.png
Normal file
BIN
figures/rolling_correlations/rolling_correlation_heatmap.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 255 KiB |
@ -1,7 +1,7 @@
|
|||||||
# meteo/analysis.py
|
# meteo/analysis.py
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Literal
|
from typing import Literal, Sequence
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -115,3 +115,199 @@ def compute_lagged_correlation(
|
|||||||
lag_df = lag_df.set_index("lag_minutes")
|
lag_df = lag_df.set_index("lag_minutes")
|
||||||
|
|
||||||
return lag_df
|
return lag_df
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_datetime_index(df: pd.DataFrame) -> pd.DatetimeIndex:
|
||||||
|
if not isinstance(df.index, pd.DatetimeIndex):
|
||||||
|
raise TypeError("Cette fonction nécessite un DataFrame indexé par le temps.")
|
||||||
|
return df.index
|
||||||
|
|
||||||
|
|
||||||
|
def compute_rolling_correlation_series(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
var_x: Variable,
|
||||||
|
var_y: Variable,
|
||||||
|
*,
|
||||||
|
window_minutes: int,
|
||||||
|
min_valid_fraction: float = 0.6,
|
||||||
|
step_minutes: int | None = None,
|
||||||
|
method: Literal["pearson", "spearman"] = "pearson",
|
||||||
|
) -> pd.Series:
|
||||||
|
"""
|
||||||
|
Calcule la corrélation glissante X/Y sur une fenêtre temporelle.
|
||||||
|
Retourne une série indexée par l'instant de fin de fenêtre.
|
||||||
|
"""
|
||||||
|
if not 0 < min_valid_fraction <= 1:
|
||||||
|
raise ValueError("min_valid_fraction doit être dans l'intervalle ]0, 1].")
|
||||||
|
|
||||||
|
for col in (var_x.column, var_y.column):
|
||||||
|
if col not in df.columns:
|
||||||
|
raise KeyError(f"Colonne absente du DataFrame : {col}")
|
||||||
|
|
||||||
|
_ensure_datetime_index(df)
|
||||||
|
pair = df[[var_x.column, var_y.column]].dropna().sort_index()
|
||||||
|
|
||||||
|
if pair.empty:
|
||||||
|
return pd.Series(dtype=float, name=f"{var_x.key}→{var_y.key}")
|
||||||
|
|
||||||
|
window = f"{window_minutes}min"
|
||||||
|
min_periods = max(1, int(window_minutes * min_valid_fraction))
|
||||||
|
if method not in {"pearson"}:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"Les corrélations glissantes ne supportent actuellement que la méthode 'pearson'."
|
||||||
|
)
|
||||||
|
|
||||||
|
rolling_corr = pair[var_x.column].rolling(
|
||||||
|
window=window,
|
||||||
|
min_periods=min_periods,
|
||||||
|
).corr(pair[var_y.column])
|
||||||
|
|
||||||
|
rolling_corr = rolling_corr.dropna()
|
||||||
|
rolling_corr.name = f"{var_x.key}→{var_y.key}"
|
||||||
|
|
||||||
|
if step_minutes and step_minutes > 1:
|
||||||
|
rolling_corr = rolling_corr.resample(f"{step_minutes}min").mean().dropna()
|
||||||
|
|
||||||
|
return rolling_corr
|
||||||
|
|
||||||
|
|
||||||
|
def compute_rolling_correlations_for_pairs(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
pairs: Sequence[tuple[Variable, Variable]],
|
||||||
|
*,
|
||||||
|
window_minutes: int,
|
||||||
|
min_valid_fraction: float = 0.6,
|
||||||
|
step_minutes: int | None = None,
|
||||||
|
method: Literal["pearson", "spearman"] = "pearson",
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Calcule les corrélations glissantes pour plusieurs paires et aligne les
|
||||||
|
résultats dans un DataFrame (index temps, colonnes = 'x→y').
|
||||||
|
"""
|
||||||
|
series_list: list[pd.Series] = []
|
||||||
|
for var_x, var_y in pairs:
|
||||||
|
corr = compute_rolling_correlation_series(
|
||||||
|
df=df,
|
||||||
|
var_x=var_x,
|
||||||
|
var_y=var_y,
|
||||||
|
window_minutes=window_minutes,
|
||||||
|
min_valid_fraction=min_valid_fraction,
|
||||||
|
step_minutes=step_minutes,
|
||||||
|
method=method,
|
||||||
|
)
|
||||||
|
if not corr.empty:
|
||||||
|
series_list.append(corr)
|
||||||
|
|
||||||
|
if not series_list:
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
result = pd.concat(series_list, axis=1)
|
||||||
|
result = result.sort_index()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_time_step(index: pd.DatetimeIndex) -> pd.Timedelta:
|
||||||
|
diffs = index.to_series().diff().dropna()
|
||||||
|
if diffs.empty:
|
||||||
|
return pd.Timedelta(minutes=1)
|
||||||
|
return diffs.median()
|
||||||
|
|
||||||
|
|
||||||
|
def detect_threshold_events(
|
||||||
|
series: pd.Series,
|
||||||
|
*,
|
||||||
|
threshold: float,
|
||||||
|
min_duration: pd.Timedelta,
|
||||||
|
min_gap: pd.Timedelta,
|
||||||
|
) -> list[tuple[pd.Timestamp, pd.Timestamp]]:
|
||||||
|
"""
|
||||||
|
Détecte des événements où `series > threshold` (après remplissage des NaN
|
||||||
|
par False) durant au moins `min_duration`. Les événements séparés d'un
|
||||||
|
intervalle < min_gap sont fusionnés.
|
||||||
|
"""
|
||||||
|
if not isinstance(series.index, pd.DatetimeIndex):
|
||||||
|
raise TypeError("series doit être indexée par le temps.")
|
||||||
|
|
||||||
|
mask = (series > threshold).fillna(False)
|
||||||
|
if not mask.any():
|
||||||
|
return []
|
||||||
|
|
||||||
|
groups = (mask != mask.shift()).cumsum()
|
||||||
|
time_step = _infer_time_step(series.index)
|
||||||
|
raw_events: list[tuple[pd.Timestamp, pd.Timestamp]] = []
|
||||||
|
|
||||||
|
for group_id, group_mask in mask.groupby(groups):
|
||||||
|
if not group_mask.iloc[0]:
|
||||||
|
continue
|
||||||
|
start = group_mask.index[0]
|
||||||
|
end = group_mask.index[-1] + time_step
|
||||||
|
duration = end - start
|
||||||
|
if duration >= min_duration:
|
||||||
|
raw_events.append((start, end))
|
||||||
|
|
||||||
|
if not raw_events:
|
||||||
|
return []
|
||||||
|
|
||||||
|
merged: list[tuple[pd.Timestamp, pd.Timestamp]] = []
|
||||||
|
for start, end in raw_events:
|
||||||
|
if not merged:
|
||||||
|
merged.append((start, end))
|
||||||
|
continue
|
||||||
|
|
||||||
|
prev_start, prev_end = merged[-1]
|
||||||
|
if start - prev_end < min_gap:
|
||||||
|
merged[-1] = (prev_start, max(prev_end, end))
|
||||||
|
else:
|
||||||
|
merged.append((start, end))
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def build_event_aligned_segments(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
events: Sequence[tuple[pd.Timestamp, pd.Timestamp]],
|
||||||
|
columns: Sequence[str],
|
||||||
|
*,
|
||||||
|
window_before_minutes: int,
|
||||||
|
window_after_minutes: int,
|
||||||
|
resample_minutes: int = 1,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Extrait, pour chaque événement, les séries centrées sur son début et
|
||||||
|
retourne un DataFrame MultiIndex (event_id, offset_minutes).
|
||||||
|
"""
|
||||||
|
if not events:
|
||||||
|
return pd.DataFrame(columns=columns)
|
||||||
|
|
||||||
|
index = _ensure_datetime_index(df)
|
||||||
|
data = df[columns].sort_index()
|
||||||
|
|
||||||
|
freq = pd.Timedelta(minutes=resample_minutes)
|
||||||
|
if resample_minutes > 1:
|
||||||
|
data = data.resample(freq).mean()
|
||||||
|
|
||||||
|
before = pd.Timedelta(minutes=window_before_minutes)
|
||||||
|
after = pd.Timedelta(minutes=window_after_minutes)
|
||||||
|
|
||||||
|
segments: list[pd.DataFrame] = []
|
||||||
|
|
||||||
|
for event_id, (start, _end) in enumerate(events):
|
||||||
|
window_start = start - before
|
||||||
|
window_end = start + after
|
||||||
|
window_index = pd.date_range(window_start, window_end, freq=freq)
|
||||||
|
segment = data.reindex(window_index)
|
||||||
|
if segment.empty:
|
||||||
|
continue
|
||||||
|
offsets = ((segment.index - start) / pd.Timedelta(minutes=1)).astype(float)
|
||||||
|
multi_index = pd.MultiIndex.from_arrays(
|
||||||
|
[np.full(len(segment), event_id), offsets],
|
||||||
|
names=["event_id", "offset_minutes"],
|
||||||
|
)
|
||||||
|
segment.index = multi_index
|
||||||
|
segments.append(segment)
|
||||||
|
|
||||||
|
if not segments:
|
||||||
|
return pd.DataFrame(columns=columns)
|
||||||
|
|
||||||
|
aligned = pd.concat(segments)
|
||||||
|
return aligned
|
||||||
|
|||||||
205
meteo/plots.py
205
meteo/plots.py
@ -2,7 +2,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Sequence
|
from typing import Callable, Sequence
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.colors import Normalize
|
from matplotlib.colors import Normalize
|
||||||
@ -168,6 +168,71 @@ def plot_scatter_pair(
|
|||||||
return output_path.resolve()
|
return output_path.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_hexbin_with_third_variable(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
var_x: Variable,
|
||||||
|
var_y: Variable,
|
||||||
|
var_color: Variable,
|
||||||
|
output_path: str | Path,
|
||||||
|
*,
|
||||||
|
gridsize: int = 60,
|
||||||
|
mincnt: int = 5,
|
||||||
|
reduce_func: Callable[[np.ndarray], float] | None = None,
|
||||||
|
reduce_func_label: str | None = None,
|
||||||
|
cmap: str = "viridis",
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Trace une carte de densité hexbin où la couleur encode une 3e variable.
|
||||||
|
"""
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
reduce_func = reduce_func or np.mean
|
||||||
|
|
||||||
|
df_xyz = df[[var_x.column, var_y.column, var_color.column]].dropna()
|
||||||
|
if df_xyz.empty:
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.text(
|
||||||
|
0.5,
|
||||||
|
0.5,
|
||||||
|
"Pas de données valides pour cette combinaison.",
|
||||||
|
ha="center",
|
||||||
|
va="center",
|
||||||
|
)
|
||||||
|
ax.set_axis_off()
|
||||||
|
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return output_path.resolve()
|
||||||
|
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
hb = ax.hexbin(
|
||||||
|
df_xyz[var_x.column],
|
||||||
|
df_xyz[var_y.column],
|
||||||
|
C=df_xyz[var_color.column],
|
||||||
|
reduce_C_function=reduce_func,
|
||||||
|
gridsize=gridsize,
|
||||||
|
cmap=cmap,
|
||||||
|
mincnt=mincnt,
|
||||||
|
)
|
||||||
|
|
||||||
|
func_label = reduce_func_label or getattr(reduce_func, "__name__", "statistique")
|
||||||
|
colorbar_label = f"{func_label.capitalize()} de {var_color.label}"
|
||||||
|
cbar = fig.colorbar(hb, ax=ax)
|
||||||
|
cbar.set_label(colorbar_label)
|
||||||
|
|
||||||
|
ax.set_xlabel(f"{var_x.label} ({var_x.unit})")
|
||||||
|
ax.set_ylabel(f"{var_y.label} ({var_y.unit})")
|
||||||
|
ax.set_title(
|
||||||
|
f"{var_y.label} vs {var_x.label}\nCouleur : {func_label} de {var_color.label}"
|
||||||
|
)
|
||||||
|
ax.grid(False)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(output_path, dpi=150)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
return output_path.resolve()
|
||||||
|
|
||||||
|
|
||||||
def plot_lagged_correlation(
|
def plot_lagged_correlation(
|
||||||
lag_df: pd.DataFrame,
|
lag_df: pd.DataFrame,
|
||||||
var_x: Variable,
|
var_x: Variable,
|
||||||
@ -270,3 +335,141 @@ def plot_correlation_heatmap(
|
|||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
return output_path.resolve()
|
return output_path.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_rolling_correlation_heatmap(
|
||||||
|
rolling_corr: pd.DataFrame,
|
||||||
|
output_path: str | Path,
|
||||||
|
*,
|
||||||
|
cmap: str = "coolwarm",
|
||||||
|
vmin: float = -1.0,
|
||||||
|
vmax: float = 1.0,
|
||||||
|
time_tick_count: int = 6,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Visualise l'évolution de corrélations glissantes pour plusieurs paires.
|
||||||
|
"""
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if rolling_corr.empty:
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.text(0.5, 0.5, "Aucune donnée de corrélation glissante.", ha="center", va="center")
|
||||||
|
ax.set_axis_off()
|
||||||
|
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return output_path.resolve()
|
||||||
|
|
||||||
|
labels = list(rolling_corr.columns)
|
||||||
|
data = rolling_corr.to_numpy().T
|
||||||
|
|
||||||
|
height = max(3.0, 0.6 * len(labels))
|
||||||
|
fig, ax = plt.subplots(figsize=(10, height))
|
||||||
|
im = ax.imshow(data, aspect="auto", cmap=cmap, vmin=vmin, vmax=vmax)
|
||||||
|
|
||||||
|
ax.set_yticks(np.arange(len(labels)))
|
||||||
|
ax.set_yticklabels(labels)
|
||||||
|
|
||||||
|
if isinstance(rolling_corr.index, pd.DatetimeIndex):
|
||||||
|
times = rolling_corr.index
|
||||||
|
if len(times) > 1:
|
||||||
|
tick_idx = np.linspace(0, len(times) - 1, num=min(time_tick_count, len(times)), dtype=int)
|
||||||
|
else:
|
||||||
|
tick_idx = np.array([0])
|
||||||
|
tick_labels = [times[i].strftime("%Y-%m-%d\n%H:%M") for i in tick_idx]
|
||||||
|
else:
|
||||||
|
tick_idx = np.linspace(0, len(rolling_corr.index) - 1, num=min(time_tick_count, len(rolling_corr.index)), dtype=int)
|
||||||
|
tick_labels = [str(rolling_corr.index[i]) for i in tick_idx]
|
||||||
|
|
||||||
|
ax.set_xticks(tick_idx)
|
||||||
|
ax.set_xticklabels(tick_labels, rotation=30, ha="right")
|
||||||
|
|
||||||
|
ax.set_xlabel("Temps (fin de fenêtre)")
|
||||||
|
ax.set_ylabel("Paire de variables")
|
||||||
|
ax.set_title("Corrélations glissantes")
|
||||||
|
|
||||||
|
cbar = fig.colorbar(im, ax=ax)
|
||||||
|
cbar.set_label("Coefficient de corrélation")
|
||||||
|
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(output_path, dpi=150)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
return output_path.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_event_composite(
|
||||||
|
aligned_segments: pd.DataFrame,
|
||||||
|
variables: Sequence[Variable],
|
||||||
|
output_path: str | Path,
|
||||||
|
*,
|
||||||
|
quantiles: tuple[float, float] = (0.25, 0.75),
|
||||||
|
baseline_label: str = "Début de l'événement",
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Trace les moyennes/médianes autour d'événements détectés avec éventail inter-quantiles.
|
||||||
|
"""
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if aligned_segments.empty:
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.text(
|
||||||
|
0.5,
|
||||||
|
0.5,
|
||||||
|
"Aucun événement aligné à tracer.",
|
||||||
|
ha="center",
|
||||||
|
va="center",
|
||||||
|
)
|
||||||
|
ax.set_axis_off()
|
||||||
|
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||||||
|
plt.close(fig)
|
||||||
|
return output_path.resolve()
|
||||||
|
|
||||||
|
if "offset_minutes" not in aligned_segments.index.names:
|
||||||
|
raise ValueError("aligned_segments doit avoir un niveau 'offset_minutes'.")
|
||||||
|
|
||||||
|
group = aligned_segments.groupby(level="offset_minutes")
|
||||||
|
mean_df = group.mean()
|
||||||
|
median_df = group.median()
|
||||||
|
|
||||||
|
q_low, q_high = quantiles
|
||||||
|
quantile_low = group.quantile(q_low) if q_low is not None else None
|
||||||
|
quantile_high = group.quantile(q_high) if q_high is not None else None
|
||||||
|
|
||||||
|
offsets = mean_df.index.to_numpy(dtype=float)
|
||||||
|
n_vars = len(variables)
|
||||||
|
fig, axes = plt.subplots(n_vars, 1, figsize=(10, 3 * n_vars), sharex=True)
|
||||||
|
if n_vars == 1:
|
||||||
|
axes = [axes]
|
||||||
|
|
||||||
|
for ax, var in zip(axes, variables):
|
||||||
|
col = var.column
|
||||||
|
ax.axvline(0, color="black", linestyle="--", linewidth=1, label=baseline_label)
|
||||||
|
ax.plot(offsets, mean_df[col], color="tab:blue", label="Moyenne")
|
||||||
|
ax.plot(offsets, median_df[col], color="tab:orange", linestyle="--", label="Médiane")
|
||||||
|
|
||||||
|
if quantile_low is not None and quantile_high is not None:
|
||||||
|
ax.fill_between(
|
||||||
|
offsets,
|
||||||
|
quantile_low[col],
|
||||||
|
quantile_high[col],
|
||||||
|
color="tab:blue",
|
||||||
|
alpha=0.2,
|
||||||
|
label=f"IQR {int(q_low*100)}–{int(q_high*100)}%",
|
||||||
|
)
|
||||||
|
|
||||||
|
ylabel = f"{var.label} ({var.unit})" if var.unit else var.label
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
ax.grid(True, linestyle=":", alpha=0.5)
|
||||||
|
|
||||||
|
axes[-1].set_xlabel("Minutes autour de l'événement")
|
||||||
|
axes[0].legend(loc="upper right")
|
||||||
|
total_events = len(aligned_segments.index.get_level_values("event_id").unique())
|
||||||
|
fig.suptitle(f"Composites autour d'événements ({total_events} occurrences)")
|
||||||
|
|
||||||
|
fig.tight_layout(rect=[0, 0, 1, 0.97])
|
||||||
|
fig.savefig(output_path, dpi=150)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
return output_path.resolve()
|
||||||
|
|||||||
128
scripts/plot_hexbin_explorations.py
Normal file
128
scripts/plot_hexbin_explorations.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# scripts/plot_hexbin_explorations.py
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from meteo.dataset import load_raw_csv
|
||||||
|
from meteo.variables import VARIABLES_BY_KEY
|
||||||
|
from meteo.plots import plot_hexbin_with_third_variable
|
||||||
|
|
||||||
|
|
||||||
|
CSV_PATH = Path("data/weather_minutely.csv")
|
||||||
|
OUTPUT_DIR = Path("figures/hexbin_explorations")
|
||||||
|
|
||||||
|
|
||||||
|
REDUCE_FUNCTIONS: dict[str, Callable[[np.ndarray], float]] = {
|
||||||
|
"mean": np.mean,
|
||||||
|
"median": np.median,
|
||||||
|
"max": np.max,
|
||||||
|
}
|
||||||
|
|
||||||
|
REDUCE_LABEL_FR: dict[str, str] = {
|
||||||
|
"mean": "moyenne",
|
||||||
|
"median": "médiane",
|
||||||
|
"max": "maximum",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Chaque scénario illustre soit une corrélation bien connue,
|
||||||
|
# soit l'absence de structure entre variables.
|
||||||
|
HEXBIN_SCENARIOS: list[dict[str, object]] = [
|
||||||
|
{
|
||||||
|
"x": "temperature",
|
||||||
|
"y": "humidity",
|
||||||
|
"color": "rain_rate",
|
||||||
|
"filename": "hexbin_temp_humidity_color_rain.png",
|
||||||
|
"description": (
|
||||||
|
"Mettre en évidence comment l'humidité relative plafonne lorsque la température chute "
|
||||||
|
"et comment les épisodes de pluie se situent dans une bande restreinte."
|
||||||
|
),
|
||||||
|
"reduce": "max",
|
||||||
|
"gridsize": 50,
|
||||||
|
"mincnt": 8,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"x": "pressure",
|
||||||
|
"y": "rain_rate",
|
||||||
|
"color": "wind_speed",
|
||||||
|
"filename": "hexbin_pressure_rain_color_wind.png",
|
||||||
|
"description": (
|
||||||
|
"Vérifier si des rafales accompagnent vraiment les chutes de pression. "
|
||||||
|
"On s'attend à voir beaucoup de cases vides : la corrélation est loin d'être systématique."
|
||||||
|
),
|
||||||
|
"reduce": "median",
|
||||||
|
"gridsize": 45,
|
||||||
|
"mincnt": 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"x": "illuminance",
|
||||||
|
"y": "humidity",
|
||||||
|
"color": "temperature",
|
||||||
|
"filename": "hexbin_lux_humidity_color_temp.png",
|
||||||
|
"description": (
|
||||||
|
"Explorer le cycle jour/nuit : l'humidité monte quand l'illuminance chute, "
|
||||||
|
"mais cela n'implique pas toujours une baisse rapide de température."
|
||||||
|
),
|
||||||
|
"reduce": "mean",
|
||||||
|
"gridsize": 55,
|
||||||
|
"mincnt": 6,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if not CSV_PATH.exists():
|
||||||
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||||
|
return
|
||||||
|
|
||||||
|
df = load_raw_csv(CSV_PATH)
|
||||||
|
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||||
|
print(f" Lignes : {len(df)}")
|
||||||
|
print(f" Colonnes : {list(df.columns)}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
for scenario in HEXBIN_SCENARIOS:
|
||||||
|
key_x = scenario["x"]
|
||||||
|
key_y = scenario["y"]
|
||||||
|
key_color = scenario["color"]
|
||||||
|
|
||||||
|
var_x = VARIABLES_BY_KEY[key_x]
|
||||||
|
var_y = VARIABLES_BY_KEY[key_y]
|
||||||
|
var_color = VARIABLES_BY_KEY[key_color]
|
||||||
|
|
||||||
|
filename = scenario["filename"]
|
||||||
|
output_path = OUTPUT_DIR / filename
|
||||||
|
|
||||||
|
reduce_name = scenario.get("reduce", "mean")
|
||||||
|
reduce_func = REDUCE_FUNCTIONS.get(reduce_name, np.mean)
|
||||||
|
reduce_label = REDUCE_LABEL_FR.get(reduce_name, reduce_name)
|
||||||
|
|
||||||
|
gridsize = int(scenario.get("gridsize", 60))
|
||||||
|
mincnt = int(scenario.get("mincnt", 5))
|
||||||
|
|
||||||
|
description = scenario["description"]
|
||||||
|
print(f"→ Hexbin {var_y.key} vs {var_x.key} (couleur = {var_color.key})")
|
||||||
|
print(f" {description}")
|
||||||
|
|
||||||
|
plot_hexbin_with_third_variable(
|
||||||
|
df=df,
|
||||||
|
var_x=var_x,
|
||||||
|
var_y=var_y,
|
||||||
|
var_color=var_color,
|
||||||
|
output_path=output_path,
|
||||||
|
gridsize=gridsize,
|
||||||
|
mincnt=mincnt,
|
||||||
|
reduce_func=reduce_func,
|
||||||
|
reduce_func_label=reduce_label,
|
||||||
|
cmap="magma",
|
||||||
|
)
|
||||||
|
print(f" ✔ Graphique enregistré : {output_path}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("✔ Tous les graphiques hexbin ont été générés.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
85
scripts/plot_rain_event_composites.py
Normal file
85
scripts/plot_rain_event_composites.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# scripts/plot_rain_event_composites.py
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from meteo.dataset import load_raw_csv
|
||||||
|
from meteo.variables import Variable, VARIABLES_BY_KEY
|
||||||
|
from meteo.analysis import detect_threshold_events, build_event_aligned_segments
|
||||||
|
from meteo.plots import plot_event_composite
|
||||||
|
|
||||||
|
|
||||||
|
CSV_PATH = Path("data/weather_minutely.csv")
|
||||||
|
OUTPUT_PATH = Path("figures/event_composites/rain_event_composites.png")
|
||||||
|
|
||||||
|
RAIN_THRESHOLD = 0.2 # mm/h : au-dessous on considère qu'il ne pleut pas vraiment
|
||||||
|
MIN_EVENT_DURATION = 5 # minutes
|
||||||
|
MIN_EVENT_GAP = 20 # minutes nécessaires pour considérer un nouvel événement
|
||||||
|
WINDOW_BEFORE = 120 # minutes affichées avant le début de la pluie
|
||||||
|
WINDOW_AFTER = 240 # minutes après le déclenchement
|
||||||
|
|
||||||
|
COMPOSITE_VARIABLE_KEYS: Sequence[str] = [
|
||||||
|
"pressure",
|
||||||
|
"temperature",
|
||||||
|
"humidity",
|
||||||
|
"wind_speed",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if not CSV_PATH.exists():
|
||||||
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||||
|
return
|
||||||
|
|
||||||
|
df = load_raw_csv(CSV_PATH)
|
||||||
|
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||||
|
print(f" Lignes : {len(df)}")
|
||||||
|
print(f" Colonnes : {list(df.columns)}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
rain_series = df["rain_rate"]
|
||||||
|
events = detect_threshold_events(
|
||||||
|
rain_series,
|
||||||
|
threshold=RAIN_THRESHOLD,
|
||||||
|
min_duration=pd.Timedelta(minutes=MIN_EVENT_DURATION),
|
||||||
|
min_gap=pd.Timedelta(minutes=MIN_EVENT_GAP),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not events:
|
||||||
|
print("⚠ Aucun événement de pluie détecté avec les paramètres actuels.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Nombre d'événements détectés : {len(events)}")
|
||||||
|
|
||||||
|
variables: list[Variable] = [VARIABLES_BY_KEY[key] for key in COMPOSITE_VARIABLE_KEYS]
|
||||||
|
columns = [v.column for v in variables]
|
||||||
|
|
||||||
|
aligned_segments = build_event_aligned_segments(
|
||||||
|
df=df,
|
||||||
|
events=events,
|
||||||
|
columns=columns,
|
||||||
|
window_before_minutes=WINDOW_BEFORE,
|
||||||
|
window_after_minutes=WINDOW_AFTER,
|
||||||
|
resample_minutes=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
if aligned_segments.empty:
|
||||||
|
print("⚠ Les segments alignés sont vides (période manquante ?).")
|
||||||
|
return
|
||||||
|
|
||||||
|
output_path = plot_event_composite(
|
||||||
|
aligned_segments=aligned_segments,
|
||||||
|
variables=variables,
|
||||||
|
output_path=OUTPUT_PATH,
|
||||||
|
quantiles=(0.2, 0.8),
|
||||||
|
baseline_label="Début de la pluie",
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"✔ Graphique composite pluie sauvegardé : {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
65
scripts/plot_rolling_correlation_heatmap.py
Normal file
65
scripts/plot_rolling_correlation_heatmap.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
# scripts/plot_rolling_correlation_heatmap.py
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from meteo.dataset import load_raw_csv
|
||||||
|
from meteo.variables import VARIABLES_BY_KEY
|
||||||
|
from meteo.analysis import compute_rolling_correlations_for_pairs
|
||||||
|
from meteo.plots import plot_rolling_correlation_heatmap
|
||||||
|
|
||||||
|
|
||||||
|
CSV_PATH = Path("data/weather_minutely.csv")
|
||||||
|
OUTPUT_PATH = Path("figures/rolling_correlations/rolling_correlation_heatmap.png")
|
||||||
|
|
||||||
|
ROLLING_PAIRS: list[tuple[str, str]] = [
|
||||||
|
("temperature", "humidity"),
|
||||||
|
("pressure", "rain_rate"),
|
||||||
|
("pressure", "wind_speed"),
|
||||||
|
("illuminance", "temperature"),
|
||||||
|
("humidity", "rain_rate"),
|
||||||
|
]
|
||||||
|
|
||||||
|
WINDOW_MINUTES = 180 # 3 heures pour observer les tendances synoptiques
|
||||||
|
STEP_MINUTES = 30 # on n'échantillonne qu'un point sur 30 minutes
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if not CSV_PATH.exists():
|
||||||
|
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||||
|
return
|
||||||
|
|
||||||
|
df = load_raw_csv(CSV_PATH)
|
||||||
|
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||||
|
print(f" Lignes : {len(df)}")
|
||||||
|
print(f" Colonnes : {list(df.columns)}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
pairs = [(VARIABLES_BY_KEY[a], VARIABLES_BY_KEY[b]) for a, b in ROLLING_PAIRS]
|
||||||
|
|
||||||
|
rolling_df = compute_rolling_correlations_for_pairs(
|
||||||
|
df=df,
|
||||||
|
pairs=pairs,
|
||||||
|
window_minutes=WINDOW_MINUTES,
|
||||||
|
min_valid_fraction=0.7,
|
||||||
|
step_minutes=STEP_MINUTES,
|
||||||
|
method="pearson",
|
||||||
|
)
|
||||||
|
|
||||||
|
if rolling_df.empty:
|
||||||
|
print("⚠ Impossible de calculer les corrélations glissantes (données insuffisantes).")
|
||||||
|
return
|
||||||
|
|
||||||
|
output_path = plot_rolling_correlation_heatmap(
|
||||||
|
rolling_corr=rolling_df,
|
||||||
|
output_path=OUTPUT_PATH,
|
||||||
|
cmap="coolwarm",
|
||||||
|
vmin=-1.0,
|
||||||
|
vmax=1.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"✔ Heatmap de corrélations glissantes enregistrée : {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user