1

Premiers éléments de l'étude

This commit is contained in:
2025-12-01 21:57:05 +01:00
commit 6494948b3c
50 changed files with 2595 additions and 0 deletions

1
scripts/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Scripts d'orchestration pour préparer les données LEGO."""

View File

@@ -0,0 +1,31 @@
"""Assemble un inventaire des pièces par set et par couleur."""
from pathlib import Path
from lib.rebrickable.parts_inventory import write_parts_filtered
SETS_PATH = Path("data/intermediate/sets_enriched.csv")
INVENTORIES_PATH = Path("data/raw/inventories.csv")
INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
COLORS_PATH = Path("data/raw/colors.csv")
INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv")
MINIFIGS_PATH = Path("data/raw/minifigs.csv")
DESTINATION_PATH = Path("data/intermediate/parts_filtered.csv")
def main() -> None:
"""Génère le fichier parts_filtered.csv prêt pour les analyses suivantes."""
write_parts_filtered(
SETS_PATH,
INVENTORIES_PATH,
INVENTORY_PARTS_PATH,
COLORS_PATH,
INVENTORY_MINIFIGS_PATH,
MINIFIGS_PATH,
DESTINATION_PATH,
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,22 @@
"""Calcule des statistiques simples sur les pièces filtrées."""
from pathlib import Path
from lib.rebrickable.parts_stats import read_rows, build_stats, write_parts_stats
PARTS_PATH = Path("data/intermediate/parts_filtered.csv")
SETS_PATH = Path("data/intermediate/sets_enriched.csv")
STATS_PATH = Path("data/final/stats.csv")
DESTINATION_PATH = Path("data/final/parts_stats.csv")
def main() -> None:
"""Charge les pièces filtrées et écrit les statistiques associées."""
rows = read_rows(PARTS_PATH)
stats = build_stats(rows, SETS_PATH, PARTS_PATH, STATS_PATH)
write_parts_stats(DESTINATION_PATH, stats)
if __name__ == "__main__":
main()

26
scripts/compute_stats.py Normal file
View File

@@ -0,0 +1,26 @@
"""Calcule et exporte les statistiques principales sur les sets LEGO filtrés."""
from pathlib import Path
from lib.rebrickable.stats import compute_basic_stats, read_rows, write_stats_csv
THEMES_PATH = Path("data/raw/themes.csv")
ALL_SETS_PATH = Path("data/raw/sets.csv")
FILTERED_SETS_PATH = Path("data/intermediate/sets_filtered.csv")
ENRICHED_SETS_PATH = Path("data/intermediate/sets_enriched.csv")
DESTINATION_PATH = Path("data/final/stats.csv")
def main() -> None:
"""Charge les données, calcule les statistiques et exporte le CSV."""
themes = read_rows(THEMES_PATH)
all_sets = read_rows(ALL_SETS_PATH)
filtered_sets = read_rows(FILTERED_SETS_PATH)
enriched_sets = read_rows(ENRICHED_SETS_PATH)
stats = compute_basic_stats(themes, all_sets, filtered_sets, enriched_sets)
write_stats_csv(DESTINATION_PATH, stats)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,25 @@
"""Télécharge les fichiers nécessaires aux pièces LEGO depuis Rebrickable."""
from pathlib import Path
from lib.rebrickable.downloader import download_rebrickable_files
FILES_TO_DOWNLOAD = [
"inventories.csv.gz",
"inventory_parts.csv.gz",
"parts.csv.gz",
"colors.csv.gz",
"inventory_minifigs.csv.gz",
"minifigs.csv.gz"
]
DESTINATION_DIR = Path("data/raw")
def main() -> None:
"""Lance le téléchargement des fichiers liés aux pièces LEGO."""
download_rebrickable_files(FILES_TO_DOWNLOAD, DESTINATION_DIR)
if __name__ == "__main__":
main()

18
scripts/download_sets.py Normal file
View File

@@ -0,0 +1,18 @@
"""Télécharge le catalogue des sets LEGO depuis Rebrickable."""
from pathlib import Path
from lib.rebrickable.downloader import download_rebrickable_file
SETS_FILE_NAME = "sets.csv.gz"
DESTINATION_DIR = Path("data/raw")
def main() -> None:
"""Lance le téléchargement du fichier des sets."""
download_rebrickable_file(SETS_FILE_NAME, DESTINATION_DIR)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,18 @@
"""Télécharge le catalogue des thèmes LEGO depuis Rebrickable."""
from pathlib import Path
from lib.rebrickable.downloader import download_rebrickable_file
THEMES_FILE_NAME = "themes.csv.gz"
DESTINATION_DIR = Path("data/raw")
def main() -> None:
"""Lance le téléchargement du fichier des thèmes."""
download_rebrickable_file(THEMES_FILE_NAME, DESTINATION_DIR)
if __name__ == "__main__":
main()

31
scripts/enrich_sets.py Normal file
View File

@@ -0,0 +1,31 @@
"""Enrichit les sets filtrés avec des métadonnées et la présence en collection."""
import os
from pathlib import Path
from dotenv import load_dotenv
from lib.rebrickable.enrich_sets import (
enrich_sets,
load_owned_set_ids,
parse_set_collection_root,
write_missing_sets_markdown,
)
SOURCE_PATH = Path("data/intermediate/sets_filtered.csv")
DESTINATION_PATH = Path("data/intermediate/sets_enriched.csv")
MISSING_MARKDOWN_PATH = Path("data/final/sets_missing.md")
def main() -> None:
"""Lance l'enrichissement des sets filtrés."""
load_dotenv()
owned_root = parse_set_collection_root(os.environ.get("MY_SETS", ""))
owned_set_ids = load_owned_set_ids(owned_root) if owned_root is not None else set()
enrich_sets(SOURCE_PATH, DESTINATION_PATH, owned_set_ids)
write_missing_sets_markdown(DESTINATION_PATH, MISSING_MARKDOWN_PATH)
if __name__ == "__main__":
main()

24
scripts/filter_sets.py Normal file
View File

@@ -0,0 +1,24 @@
"""Filtre les sets LEGO pour ne conserver que les thèmes ciblés."""
import os
from pathlib import Path
from dotenv import load_dotenv
from lib.rebrickable.filter_sets import filter_sets_by_theme, parse_theme_ids
SOURCE_PATH = Path("data/raw/sets.csv")
DESTINATION_PATH = Path("data/intermediate/sets_filtered.csv")
OVERRIDES_PATH = Path("config/num_parts_overrides.csv")
def main() -> None:
"""Lance le filtrage des sets à partir des identifiants définis dans l'environnement."""
load_dotenv()
theme_ids = parse_theme_ids(os.environ["THEME_IDS"])
filter_sets_by_theme(SOURCE_PATH, DESTINATION_PATH, theme_ids, OVERRIDES_PATH)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,21 @@
"""Génère une grille artistique des couleurs utilisées."""
from pathlib import Path
from lib.plots.colors_grid import plot_colors_grid
PARTS_PATH = Path("data/intermediate/parts_filtered.csv")
COLORS_PATH = Path("data/raw/colors.csv")
DESTINATION_PATH = Path("figures/step12/colors_grid.png")
MINIFIG_DESTINATION_PATH = Path("figures/step12/colors_grid_minifigs.png")
def main() -> None:
"""Construit les visuels des palettes de couleurs utilisées."""
plot_colors_grid(PARTS_PATH, COLORS_PATH, DESTINATION_PATH, minifig_only=False)
plot_colors_grid(PARTS_PATH, COLORS_PATH, MINIFIG_DESTINATION_PATH, minifig_only=True)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,19 @@
"""Trace la moyenne annuelle et glissante des pièces par set."""
from pathlib import Path
from lib.plots.parts_per_set import plot_parts_per_set
ENRICHED_SETS_PATH = Path("data/intermediate/sets_enriched.csv")
MILESTONES_PATH = Path("config/milestones.csv")
DESTINATION_PATH = Path("figures/step07/avg_parts_per_set.png")
def main() -> None:
"""Génère le graphique des tailles moyennes des sets."""
plot_parts_per_set(ENRICHED_SETS_PATH, MILESTONES_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,19 @@
"""Trace l'évolution du nombre de sets par année (thèmes filtrés) avec jalons."""
from pathlib import Path
from lib.plots.sets_per_year import plot_sets_per_year
ENRICHED_SETS_PATH = Path("data/intermediate/sets_enriched.csv")
MILESTONES_PATH = Path("config/milestones.csv")
DESTINATION_PATH = Path("figures/step07/sets_per_year.png")
def main() -> None:
"""Génère le graphique des sets par année."""
plot_sets_per_year(ENRICHED_SETS_PATH, MILESTONES_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,30 @@
"""Produit un rapport des écarts entre inventaires et catalogue."""
from pathlib import Path
from lib.rebrickable.inventory_reconciliation import (
compute_inventory_gaps,
index_sets_by_num,
load_sets,
write_inventory_gaps_csv,
write_inventory_gaps_markdown,
)
SETS_PATH = Path("data/intermediate/sets_enriched.csv")
PARTS_PATH = Path("data/intermediate/parts_filtered.csv")
DESTINATION_PATH = Path("data/final/inventory_gaps.csv")
MARKDOWN_PATH = Path("data/final/inventory_gaps.md")
def main() -> None:
"""Génère le fichier d'écarts d'inventaire."""
sets = load_sets(SETS_PATH)
sets_by_num = index_sets_by_num(sets)
gaps = compute_inventory_gaps(SETS_PATH, PARTS_PATH)
write_inventory_gaps_csv(DESTINATION_PATH, gaps)
write_inventory_gaps_markdown(MARKDOWN_PATH, gaps, sets_by_num)
if __name__ == "__main__":
main()