1

Ajoute le graphique du nombre de minifigs par set

This commit is contained in:
Richard Dern 2025-12-02 00:31:22 +01:00
parent 5b1a94023b
commit 03d69ff6c8
6 changed files with 219 additions and 0 deletions

View File

@ -223,3 +223,13 @@ Cette étape se lance après le téléchargement des données d'inventaire (éta
2. `python -m scripts.compute_minifigs_by_set`
Le script lit l'inventaire agrégé `data/intermediate/parts_filtered.csv` ainsi que le catalogue des pièces (`data/raw/parts.csv`). Il sélectionne les têtes de minifigs (catégorie 59), ignore les rechanges et dédoublonne par set et référence. Le CSV `data/intermediate/minifigs_by_set.csv` contient une ligne par set et par référence de tête : `set_num`, `part_num`, `part_name`.
### Étape 21 : visualiser le nombre de minifigs par set
1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifigs_per_set`
Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_filtered.csv` et `data/raw/parts.csv`, compte les têtes de minifigs hors rechanges et produit deux sorties :
- `data/intermediate/minifig_counts_by_set.csv` : `set_num`, `set_id`, `name`, `year`, `minifig_count`
- `figures/step20/minifigs_per_set.png` : diagramme en barres horizontales (ordre décroissant) du nombre de minifigs par set filtré

View File

@ -0,0 +1,42 @@
"""Graphique du nombre de minifigs par set."""
from pathlib import Path
from typing import List
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_counts(path: Path) -> List[dict]:
"""Charge le CSV des comptes de minifigs par set."""
return read_rows(path)
def plot_minifigs_per_set(counts_path: Path, destination_path: Path) -> None:
"""Trace un diagramme en barres du nombre de minifigs par set (thèmes filtrés)."""
rows = load_counts(counts_path)
labels = [f"{row['set_num']} - {row['name']}" for row in rows]
values = [int(row["minifig_count"]) for row in rows]
positions = list(range(len(rows)))
max_value = max(values)
height = max(6, len(rows) * 0.18)
fig, ax = plt.subplots(figsize=(14, height))
bars = ax.barh(positions, values, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.6)
ax.set_yticks(positions)
ax.set_yticklabels(labels)
ax.invert_yaxis()
ax.set_xlabel("Nombre de minifigs")
ax.set_title("Minifigs par set (thèmes filtrés)")
ax.set_xlim(0, max_value + 0.8)
ax.grid(True, axis="x", linestyle="--", alpha=0.25)
for index, bar in enumerate(bars):
value = values[index]
ax.text(value + 0.2, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@ -0,0 +1,71 @@
"""Comptage des minifigs par set filtré."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
from lib.rebrickable.stats import read_rows
def load_sets(path: Path) -> List[dict]:
"""Charge les sets enrichis depuis un CSV."""
return read_rows(path)
def load_parts_filtered(path: Path) -> List[dict]:
"""Charge parts_filtered.csv en mémoire."""
return read_rows(path)
def count_heads_by_set(
sets_rows: Iterable[dict],
parts_rows: Iterable[dict],
head_parts: Set[str],
) -> List[dict]:
"""Compte les têtes de minifigs présentes dans chaque set (hors rechanges)."""
counts: Dict[str, int] = {row["set_num"]: 0 for row in sets_rows}
for row in parts_rows:
if row["part_num"] not in head_parts:
continue
if row["is_spare"] == "true":
continue
counts[row["set_num"]] += int(row["quantity_in_set"])
results: List[dict] = []
for row in sets_rows:
results.append(
{
"set_num": row["set_num"],
"set_id": row["set_id"],
"name": row["name"],
"year": row["year"],
"minifig_count": counts[row["set_num"]],
}
)
results.sort(key=lambda r: (-r["minifig_count"], r["set_num"]))
return results
def build_minifig_counts_by_set(
sets_path: Path,
parts_filtered_path: Path,
parts_catalog_path: Path,
) -> List[dict]:
"""Construit la liste des sets avec leur nombre de minifigs."""
sets_rows = load_sets(sets_path)
parts_rows = load_parts_filtered(parts_filtered_path)
catalog = load_parts_catalog(parts_catalog_path)
head_parts = select_head_parts(catalog)
return count_heads_by_set(sets_rows, parts_rows, head_parts)
def write_minifig_counts(destination_path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV listant le nombre de minifigs par set."""
ensure_parent_dir(destination_path)
fieldnames = ["set_num", "set_id", "name", "year", "minifig_count"]
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,24 @@
"""Trace le nombre de minifigs par set filtré."""
from pathlib import Path
from lib.plots.minifig_counts import plot_minifigs_per_set
from lib.rebrickable.minifig_counts import build_minifig_counts_by_set, write_minifig_counts
SETS_PATH = Path("data/intermediate/sets_enriched.csv")
PARTS_FILTERED_PATH = Path("data/intermediate/parts_filtered.csv")
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
COUNTS_PATH = Path("data/intermediate/minifig_counts_by_set.csv")
DESTINATION_PATH = Path("figures/step20/minifigs_per_set.png")
def main() -> None:
"""Construit le CSV de comptage des minifigs et trace le graphique associé."""
counts = build_minifig_counts_by_set(SETS_PATH, PARTS_FILTERED_PATH, PARTS_CATALOG_PATH)
write_minifig_counts(COUNTS_PATH, counts)
plot_minifigs_per_set(COUNTS_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,47 @@
"""Tests du comptage de minifigs par set."""
from pathlib import Path
from lib.rebrickable.minifig_counts import build_minifig_counts_by_set
def write_csv(path: Path, content: str) -> None:
"""Écrit un CSV brut."""
path.write_text(content)
def test_build_minifig_counts_by_set_counts_heads_without_spares(tmp_path: Path) -> None:
"""Compte les têtes hors rechanges pour chaque set et conserve les sets à 0."""
sets_path = tmp_path / "sets_enriched.csv"
write_csv(
sets_path,
"set_num,name,year,set_id\n"
"123-1,Set A,2020,123\n"
"124-1,Set B,2021,124\n"
"125-1,Set C,2022,125\n",
)
parts_filtered_path = tmp_path / "parts_filtered.csv"
write_csv(
parts_filtered_path,
"part_num,color_rgb,is_translucent,set_num,set_id,year,quantity_in_set,is_spare,is_minifig_part\n"
"head-a,ffffff,false,123-1,123,2020,1,false,true\n"
"head-a,ffffff,false,123-1,123,2020,2,true,true\n"
"head-b,ffffff,false,124-1,124,2021,3,false,true\n"
"other,000000,false,124-1,124,2021,1,false,false\n",
)
parts_catalog_path = tmp_path / "parts.csv"
write_csv(
parts_catalog_path,
"part_num,name,part_cat_id\n"
"head-a,Head A,59\n"
"head-b,Head B,59\n"
"other,Other,1\n",
)
counts = build_minifig_counts_by_set(sets_path, parts_filtered_path, parts_catalog_path)
assert counts == [
{"set_num": "124-1", "set_id": "124", "name": "Set B", "year": "2021", "minifig_count": 3},
{"set_num": "123-1", "set_id": "123", "name": "Set A", "year": "2020", "minifig_count": 1},
{"set_num": "125-1", "set_id": "125", "name": "Set C", "year": "2022", "minifig_count": 0},
]

View File

@ -0,0 +1,25 @@
"""Tests du graphique des minifigs par set."""
import matplotlib
from pathlib import Path
from lib.plots.minifig_counts import plot_minifigs_per_set
matplotlib.use("Agg")
def test_plot_minifigs_per_set_outputs_image(tmp_path: Path) -> None:
"""Génère l'image du nombre de minifigs par set."""
counts_path = tmp_path / "minifig_counts_by_set.csv"
destination_path = tmp_path / "figures" / "step20" / "minifigs_per_set.png"
counts_path.write_text(
"set_num,set_id,name,year,minifig_count\n"
"123-1,123,Set A,2020,2\n"
"124-1,124,Set B,2021,1\n"
)
plot_minifigs_per_set(counts_path, destination_path)
assert destination_path.exists()
assert destination_path.stat().st_size > 0