From 03d69ff6c88892324d41b434b45fd82cacdccd22 Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Tue, 2 Dec 2025 00:31:22 +0100 Subject: [PATCH] Ajoute le graphique du nombre de minifigs par set --- README.md | 10 +++++ lib/plots/minifig_counts.py | 42 ++++++++++++++++++ lib/rebrickable/minifig_counts.py | 71 +++++++++++++++++++++++++++++++ scripts/plot_minifigs_per_set.py | 24 +++++++++++ tests/test_minifig_counts.py | 47 ++++++++++++++++++++ tests/test_minifig_counts_plot.py | 25 +++++++++++ 6 files changed, 219 insertions(+) create mode 100644 lib/plots/minifig_counts.py create mode 100644 lib/rebrickable/minifig_counts.py create mode 100644 scripts/plot_minifigs_per_set.py create mode 100644 tests/test_minifig_counts.py create mode 100644 tests/test_minifig_counts_plot.py diff --git a/README.md b/README.md index 4e5095e..1d5b133 100644 --- a/README.md +++ b/README.md @@ -223,3 +223,13 @@ Cette étape se lance après le téléchargement des données d'inventaire (éta 2. `python -m scripts.compute_minifigs_by_set` Le script lit l'inventaire agrégé `data/intermediate/parts_filtered.csv` ainsi que le catalogue des pièces (`data/raw/parts.csv`). Il sélectionne les têtes de minifigs (catégorie 59), ignore les rechanges et dédoublonne par set et référence. Le CSV `data/intermediate/minifigs_by_set.csv` contient une ligne par set et par référence de tête : `set_num`, `part_num`, `part_name`. + +### Étape 21 : visualiser le nombre de minifigs par set + +1. `source .venv/bin/activate` +2. `python -m scripts.plot_minifigs_per_set` + +Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_filtered.csv` et `data/raw/parts.csv`, compte les têtes de minifigs hors rechanges et produit deux sorties : + +- `data/intermediate/minifig_counts_by_set.csv` : `set_num`, `set_id`, `name`, `year`, `minifig_count` +- `figures/step20/minifigs_per_set.png` : diagramme en barres horizontales (ordre décroissant) du nombre de minifigs par set filtré diff --git a/lib/plots/minifig_counts.py b/lib/plots/minifig_counts.py new file mode 100644 index 0000000..d9355aa --- /dev/null +++ b/lib/plots/minifig_counts.py @@ -0,0 +1,42 @@ +"""Graphique du nombre de minifigs par set.""" + +from pathlib import Path +from typing import List + +import matplotlib.pyplot as plt + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +def load_counts(path: Path) -> List[dict]: + """Charge le CSV des comptes de minifigs par set.""" + return read_rows(path) + + +def plot_minifigs_per_set(counts_path: Path, destination_path: Path) -> None: + """Trace un diagramme en barres du nombre de minifigs par set (thèmes filtrés).""" + rows = load_counts(counts_path) + labels = [f"{row['set_num']} - {row['name']}" for row in rows] + values = [int(row["minifig_count"]) for row in rows] + positions = list(range(len(rows))) + max_value = max(values) + + height = max(6, len(rows) * 0.18) + fig, ax = plt.subplots(figsize=(14, height)) + bars = ax.barh(positions, values, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.6) + ax.set_yticks(positions) + ax.set_yticklabels(labels) + ax.invert_yaxis() + ax.set_xlabel("Nombre de minifigs") + ax.set_title("Minifigs par set (thèmes filtrés)") + ax.set_xlim(0, max_value + 0.8) + ax.grid(True, axis="x", linestyle="--", alpha=0.25) + for index, bar in enumerate(bars): + value = values[index] + ax.text(value + 0.2, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8) + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=160) + plt.close(fig) diff --git a/lib/rebrickable/minifig_counts.py b/lib/rebrickable/minifig_counts.py new file mode 100644 index 0000000..64d7e6d --- /dev/null +++ b/lib/rebrickable/minifig_counts.py @@ -0,0 +1,71 @@ +"""Comptage des minifigs par set filtré.""" + +import csv +from pathlib import Path +from typing import Dict, Iterable, List, Sequence, Set + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts +from lib.rebrickable.stats import read_rows + + +def load_sets(path: Path) -> List[dict]: + """Charge les sets enrichis depuis un CSV.""" + return read_rows(path) + + +def load_parts_filtered(path: Path) -> List[dict]: + """Charge parts_filtered.csv en mémoire.""" + return read_rows(path) + + +def count_heads_by_set( + sets_rows: Iterable[dict], + parts_rows: Iterable[dict], + head_parts: Set[str], +) -> List[dict]: + """Compte les têtes de minifigs présentes dans chaque set (hors rechanges).""" + counts: Dict[str, int] = {row["set_num"]: 0 for row in sets_rows} + for row in parts_rows: + if row["part_num"] not in head_parts: + continue + if row["is_spare"] == "true": + continue + counts[row["set_num"]] += int(row["quantity_in_set"]) + results: List[dict] = [] + for row in sets_rows: + results.append( + { + "set_num": row["set_num"], + "set_id": row["set_id"], + "name": row["name"], + "year": row["year"], + "minifig_count": counts[row["set_num"]], + } + ) + results.sort(key=lambda r: (-r["minifig_count"], r["set_num"])) + return results + + +def build_minifig_counts_by_set( + sets_path: Path, + parts_filtered_path: Path, + parts_catalog_path: Path, +) -> List[dict]: + """Construit la liste des sets avec leur nombre de minifigs.""" + sets_rows = load_sets(sets_path) + parts_rows = load_parts_filtered(parts_filtered_path) + catalog = load_parts_catalog(parts_catalog_path) + head_parts = select_head_parts(catalog) + return count_heads_by_set(sets_rows, parts_rows, head_parts) + + +def write_minifig_counts(destination_path: Path, rows: Sequence[dict]) -> None: + """Écrit le CSV listant le nombre de minifigs par set.""" + ensure_parent_dir(destination_path) + fieldnames = ["set_num", "set_id", "name", "year", "minifig_count"] + with destination_path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/plot_minifigs_per_set.py b/scripts/plot_minifigs_per_set.py new file mode 100644 index 0000000..1dfb568 --- /dev/null +++ b/scripts/plot_minifigs_per_set.py @@ -0,0 +1,24 @@ +"""Trace le nombre de minifigs par set filtré.""" + +from pathlib import Path + +from lib.plots.minifig_counts import plot_minifigs_per_set +from lib.rebrickable.minifig_counts import build_minifig_counts_by_set, write_minifig_counts + + +SETS_PATH = Path("data/intermediate/sets_enriched.csv") +PARTS_FILTERED_PATH = Path("data/intermediate/parts_filtered.csv") +PARTS_CATALOG_PATH = Path("data/raw/parts.csv") +COUNTS_PATH = Path("data/intermediate/minifig_counts_by_set.csv") +DESTINATION_PATH = Path("figures/step20/minifigs_per_set.png") + + +def main() -> None: + """Construit le CSV de comptage des minifigs et trace le graphique associé.""" + counts = build_minifig_counts_by_set(SETS_PATH, PARTS_FILTERED_PATH, PARTS_CATALOG_PATH) + write_minifig_counts(COUNTS_PATH, counts) + plot_minifigs_per_set(COUNTS_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_counts.py b/tests/test_minifig_counts.py new file mode 100644 index 0000000..0e259dc --- /dev/null +++ b/tests/test_minifig_counts.py @@ -0,0 +1,47 @@ +"""Tests du comptage de minifigs par set.""" + +from pathlib import Path + +from lib.rebrickable.minifig_counts import build_minifig_counts_by_set + + +def write_csv(path: Path, content: str) -> None: + """Écrit un CSV brut.""" + path.write_text(content) + + +def test_build_minifig_counts_by_set_counts_heads_without_spares(tmp_path: Path) -> None: + """Compte les têtes hors rechanges pour chaque set et conserve les sets à 0.""" + sets_path = tmp_path / "sets_enriched.csv" + write_csv( + sets_path, + "set_num,name,year,set_id\n" + "123-1,Set A,2020,123\n" + "124-1,Set B,2021,124\n" + "125-1,Set C,2022,125\n", + ) + parts_filtered_path = tmp_path / "parts_filtered.csv" + write_csv( + parts_filtered_path, + "part_num,color_rgb,is_translucent,set_num,set_id,year,quantity_in_set,is_spare,is_minifig_part\n" + "head-a,ffffff,false,123-1,123,2020,1,false,true\n" + "head-a,ffffff,false,123-1,123,2020,2,true,true\n" + "head-b,ffffff,false,124-1,124,2021,3,false,true\n" + "other,000000,false,124-1,124,2021,1,false,false\n", + ) + parts_catalog_path = tmp_path / "parts.csv" + write_csv( + parts_catalog_path, + "part_num,name,part_cat_id\n" + "head-a,Head A,59\n" + "head-b,Head B,59\n" + "other,Other,1\n", + ) + + counts = build_minifig_counts_by_set(sets_path, parts_filtered_path, parts_catalog_path) + + assert counts == [ + {"set_num": "124-1", "set_id": "124", "name": "Set B", "year": "2021", "minifig_count": 3}, + {"set_num": "123-1", "set_id": "123", "name": "Set A", "year": "2020", "minifig_count": 1}, + {"set_num": "125-1", "set_id": "125", "name": "Set C", "year": "2022", "minifig_count": 0}, + ] diff --git a/tests/test_minifig_counts_plot.py b/tests/test_minifig_counts_plot.py new file mode 100644 index 0000000..c6e82e1 --- /dev/null +++ b/tests/test_minifig_counts_plot.py @@ -0,0 +1,25 @@ +"""Tests du graphique des minifigs par set.""" + +import matplotlib +from pathlib import Path + +from lib.plots.minifig_counts import plot_minifigs_per_set + + +matplotlib.use("Agg") + + +def test_plot_minifigs_per_set_outputs_image(tmp_path: Path) -> None: + """Génère l'image du nombre de minifigs par set.""" + counts_path = tmp_path / "minifig_counts_by_set.csv" + destination_path = tmp_path / "figures" / "step20" / "minifigs_per_set.png" + counts_path.write_text( + "set_num,set_id,name,year,minifig_count\n" + "123-1,123,Set A,2020,2\n" + "124-1,124,Set B,2021,1\n" + ) + + plot_minifigs_per_set(counts_path, destination_path) + + assert destination_path.exists() + assert destination_path.stat().st_size > 0