1

Premières analyses de corrélation

This commit is contained in:
2025-11-17 14:59:59 +01:00
parent 62a928ec85
commit 5a546688f1
36 changed files with 631 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
# scripts/plot_all_pairwise_scatter.py
from __future__ import annotations
from pathlib import Path
from meteo.dataset import load_raw_csv
from meteo.variables import iter_variable_pairs
from meteo.plots import plot_scatter_pair
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_DIR = Path("figures/pairwise_scatter")
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
pairs = iter_variable_pairs()
print(f"Nombre de paires de variables : {len(pairs)}")
for var_x, var_y in pairs:
filename = f"scatter_{var_x.key}_vs_{var_y.key}.png"
output_path = OUTPUT_DIR / filename
print(f"→ Trace {var_y.key} en fonction de {var_x.key}{output_path}")
plot_scatter_pair(
df=df,
var_x=var_x,
var_y=var_y,
output_path=output_path,
sample_step=10, # un point sur 10 : ≈ 32k points au lieu de 320k
)
print("✔ Tous les graphiques de nuages de points ont été générés.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,45 @@
# scripts/plot_correlation_heatmap.py
from __future__ import annotations
from pathlib import Path
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES
from meteo.analysis import compute_correlation_matrix_for_variables
from meteo.plots import plot_correlation_heatmap
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_PATH = Path("figures/correlation_heatmap.png")
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
print(" Assurez-vous d'avoir généré le dataset minuté.")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
corr = compute_correlation_matrix_for_variables(df, VARIABLES, method="pearson")
print("Matrice de corrélation (aperçu) :")
print(corr)
print()
output_path = plot_correlation_heatmap(
corr=corr,
variables=VARIABLES,
output_path=OUTPUT_PATH,
annotate=True,
)
print(f"✔ Heatmap de corrélation sauvegardée dans : {output_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,69 @@
# scripts/plot_lagged_correlations.py
from __future__ import annotations
from pathlib import Path
from meteo.dataset import load_raw_csv
from meteo.variables import VARIABLES_BY_KEY
from meteo.analysis import compute_lagged_correlation
from meteo.plots import plot_lagged_correlation
CSV_PATH = Path("data/weather_minutely.csv")
OUTPUT_DIR = Path("figures/lagged_correlations")
# Paires à analyser (clé de variable X, clé de variable Y)
# Convention : X précède potentiellement Y
INTERESTING_PAIRS: list[tuple[str, str]] = [
("temperature", "humidity"),
("temperature", "rain_rate"),
("pressure", "rain_rate"),
("pressure", "wind_speed"),
("pressure", "illuminance"),
("illuminance", "temperature"),
("humidity", "rain_rate"),
]
def main() -> None:
if not CSV_PATH.exists():
print(f"⚠ Fichier introuvable : {CSV_PATH}")
return
df = load_raw_csv(CSV_PATH)
print(f"Dataset minuté chargé : {CSV_PATH}")
print(f" Lignes : {len(df)}")
print(f" Colonnes : {list(df.columns)}")
print()
for key_x, key_y in INTERESTING_PAIRS:
var_x = VARIABLES_BY_KEY[key_x]
var_y = VARIABLES_BY_KEY[key_y]
print(f"→ Corrélation décalée : {var_x.key}{var_y.key}")
lag_df = compute_lagged_correlation(
df=df,
var_x=var_x,
var_y=var_y,
max_lag_minutes=360, # ± 6 heures
step_minutes=10, # pas de 10 minutes
method="pearson",
)
filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png"
output_path = OUTPUT_DIR / filename
plot_lagged_correlation(
lag_df=lag_df,
var_x=var_x,
var_y=var_y,
output_path=output_path,
)
print("✔ Graphiques de corrélation décalée générés.")
if __name__ == "__main__":
main()