You've already forked donnees_meteo
Premières analyses de corrélation
This commit is contained in:
45
scripts/plot_all_pairwise_scatter.py
Normal file
45
scripts/plot_all_pairwise_scatter.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# scripts/plot_all_pairwise_scatter.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import iter_variable_pairs
|
||||
from meteo.plots import plot_scatter_pair
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_DIR = Path("figures/pairwise_scatter")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
|
||||
pairs = iter_variable_pairs()
|
||||
print(f"Nombre de paires de variables : {len(pairs)}")
|
||||
|
||||
for var_x, var_y in pairs:
|
||||
filename = f"scatter_{var_x.key}_vs_{var_y.key}.png"
|
||||
output_path = OUTPUT_DIR / filename
|
||||
|
||||
print(f"→ Trace {var_y.key} en fonction de {var_x.key} → {output_path}")
|
||||
plot_scatter_pair(
|
||||
df=df,
|
||||
var_x=var_x,
|
||||
var_y=var_y,
|
||||
output_path=output_path,
|
||||
sample_step=10, # un point sur 10 : ≈ 32k points au lieu de 320k
|
||||
)
|
||||
|
||||
print("✔ Tous les graphiques de nuages de points ont été générés.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
45
scripts/plot_correlation_heatmap.py
Normal file
45
scripts/plot_correlation_heatmap.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# scripts/plot_correlation_heatmap.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import VARIABLES
|
||||
from meteo.analysis import compute_correlation_matrix_for_variables
|
||||
from meteo.plots import plot_correlation_heatmap
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_PATH = Path("figures/correlation_heatmap.png")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
print(" Assurez-vous d'avoir généré le dataset minuté.")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
corr = compute_correlation_matrix_for_variables(df, VARIABLES, method="pearson")
|
||||
|
||||
print("Matrice de corrélation (aperçu) :")
|
||||
print(corr)
|
||||
print()
|
||||
|
||||
output_path = plot_correlation_heatmap(
|
||||
corr=corr,
|
||||
variables=VARIABLES,
|
||||
output_path=OUTPUT_PATH,
|
||||
annotate=True,
|
||||
)
|
||||
|
||||
print(f"✔ Heatmap de corrélation sauvegardée dans : {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
69
scripts/plot_lagged_correlations.py
Normal file
69
scripts/plot_lagged_correlations.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# scripts/plot_lagged_correlations.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from meteo.dataset import load_raw_csv
|
||||
from meteo.variables import VARIABLES_BY_KEY
|
||||
from meteo.analysis import compute_lagged_correlation
|
||||
from meteo.plots import plot_lagged_correlation
|
||||
|
||||
|
||||
CSV_PATH = Path("data/weather_minutely.csv")
|
||||
OUTPUT_DIR = Path("figures/lagged_correlations")
|
||||
|
||||
|
||||
# Paires à analyser (clé de variable X, clé de variable Y)
|
||||
# Convention : X précède potentiellement Y
|
||||
INTERESTING_PAIRS: list[tuple[str, str]] = [
|
||||
("temperature", "humidity"),
|
||||
("temperature", "rain_rate"),
|
||||
("pressure", "rain_rate"),
|
||||
("pressure", "wind_speed"),
|
||||
("pressure", "illuminance"),
|
||||
("illuminance", "temperature"),
|
||||
("humidity", "rain_rate"),
|
||||
]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not CSV_PATH.exists():
|
||||
print(f"⚠ Fichier introuvable : {CSV_PATH}")
|
||||
return
|
||||
|
||||
df = load_raw_csv(CSV_PATH)
|
||||
print(f"Dataset minuté chargé : {CSV_PATH}")
|
||||
print(f" Lignes : {len(df)}")
|
||||
print(f" Colonnes : {list(df.columns)}")
|
||||
print()
|
||||
|
||||
for key_x, key_y in INTERESTING_PAIRS:
|
||||
var_x = VARIABLES_BY_KEY[key_x]
|
||||
var_y = VARIABLES_BY_KEY[key_y]
|
||||
|
||||
print(f"→ Corrélation décalée : {var_x.key} → {var_y.key}")
|
||||
|
||||
lag_df = compute_lagged_correlation(
|
||||
df=df,
|
||||
var_x=var_x,
|
||||
var_y=var_y,
|
||||
max_lag_minutes=360, # ± 6 heures
|
||||
step_minutes=10, # pas de 10 minutes
|
||||
method="pearson",
|
||||
)
|
||||
|
||||
filename = f"lagcorr_{var_x.key}_to_{var_y.key}.png"
|
||||
output_path = OUTPUT_DIR / filename
|
||||
|
||||
plot_lagged_correlation(
|
||||
lag_df=lag_df,
|
||||
var_x=var_x,
|
||||
var_y=var_y,
|
||||
output_path=output_path,
|
||||
)
|
||||
|
||||
print("✔ Graphiques de corrélation décalée générés.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user