From f94669d82e2aa9ad4586ea4f37d772ca96a5ca17 Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Tue, 2 Dec 2025 16:52:42 +0100 Subject: [PATCH] =?UTF-8?q?Ajoute=20l'analyse=20des=20pi=C3=A8ces=20rares?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 13 +++ lib/plots/rare_parts.py | 77 +++++++++++++ lib/rebrickable/rare_parts.py | 204 ++++++++++++++++++++++++++++++++++ scripts/compute_rare_parts.py | 28 +++++ scripts/plot_rare_parts.py | 18 +++ tests/test_rare_parts.py | 169 ++++++++++++++++++++++++++++ tests/test_rare_parts_plot.py | 25 +++++ 7 files changed, 534 insertions(+) create mode 100644 lib/plots/rare_parts.py create mode 100644 lib/rebrickable/rare_parts.py create mode 100644 scripts/compute_rare_parts.py create mode 100644 scripts/plot_rare_parts.py create mode 100644 tests/test_rare_parts.py create mode 100644 tests/test_rare_parts_plot.py diff --git a/README.md b/README.md index 321684e..c4182fa 100644 --- a/README.md +++ b/README.md @@ -272,3 +272,16 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de mini Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires. Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés). + +### Étape 27 : pièces rares (variantes exclusives) + +1. `source .venv/bin/activate` +2. `python -m scripts.compute_rare_parts` +3. `python -m scripts.plot_rare_parts` + +Le calcul lit `data/intermediate/parts_filtered.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/parts.csv` et `data/raw/colors.csv` pour identifier les combinaisons pièce + couleur présentes dans un seul set (rechanges exclues). Il produit : + +- `data/intermediate/rare_parts.csv` : liste détaillée des pièces rares avec set, couleur, nature minifig/hors minifig et possession. +- `data/intermediate/rare_parts_by_set.csv` : agrégat par set (comptes distincts, quantités, focus minifigs). + +Le tracé `figures/step27/rare_parts_per_set.png` met en scène le top des sets contenant le plus de variantes exclusives, en distinguant les pièces de minifigs et l’état de possession. diff --git a/lib/plots/rare_parts.py b/lib/plots/rare_parts.py new file mode 100644 index 0000000..a9269c5 --- /dev/null +++ b/lib/plots/rare_parts.py @@ -0,0 +1,77 @@ +"""Graphique des pièces rares par set.""" + +from pathlib import Path +from typing import List, Tuple + +import matplotlib.pyplot as plt +from matplotlib.patches import Patch + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +def load_top_sets(path: Path, limit: int = 15) -> List[dict]: + """Charge les sets triés par nombre de pièces rares et limite le top.""" + rows = read_rows(path) + sorted_rows = sorted( + rows, + key=lambda row: ( + -int(row["rare_parts_distinct"]), + -int(row["rare_parts_quantity"]), + row["set_num"], + ), + ) + return sorted_rows[:limit] + + +def split_counts(rows: List[dict]) -> Tuple[List[int], List[int]]: + """Sépare les comptages minifig vs hors minifig.""" + non_minifig: List[int] = [] + minifig: List[int] = [] + for row in rows: + total = int(row["rare_parts_distinct"]) + minifig_count = int(row["rare_minifig_parts_distinct"]) + non_minifig.append(total - minifig_count) + minifig.append(minifig_count) + return non_minifig, minifig + + +def plot_rare_parts_per_set(rare_by_set_path: Path, destination_path: Path) -> None: + """Trace le top des sets contenant des pièces exclusives.""" + rows = load_top_sets(rare_by_set_path) + if not rows: + return + non_minifig, minifig = split_counts(rows) + y_positions = list(range(len(rows))) + labels = [f"{row['set_num']} · {row['name']} ({row['year']})" for row in rows] + owned_mask = [row["in_collection"] == "true" for row in rows] + + base_color = "#1f77b4" + accent_color = "#f28e2b" + fig, ax = plt.subplots(figsize=(11, 8)) + for y, value, is_owned in zip(y_positions, non_minifig, owned_mask): + alpha = 0.92 if is_owned else 0.45 + ax.barh(y, value, color=base_color, alpha=alpha, label=None) + for y, value, offset, is_owned in zip(y_positions, minifig, non_minifig, owned_mask): + alpha = 0.92 if is_owned else 0.45 + ax.barh(y, value, left=offset, color=accent_color, alpha=alpha, label=None) + + ax.set_yticks(y_positions) + ax.set_yticklabels(labels) + ax.invert_yaxis() + ax.set_xlabel("Variantes de pièces exclusives (hors rechanges)") + ax.set_title("Pièces rares par set (top)") + ax.grid(axis="x", linestyle="--", alpha=0.35) + + handles = [ + Patch(facecolor=base_color, edgecolor="none", label="Pièces hors minifigs"), + Patch(facecolor=accent_color, edgecolor="none", label="Pièces de minifigs"), + Patch(facecolor="#000000", edgecolor="none", alpha=0.92, label="Set possédé"), + Patch(facecolor="#000000", edgecolor="none", alpha=0.45, label="Set manquant"), + ] + ax.legend(handles=handles, loc="lower right", frameon=False) + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=170) + plt.close(fig) diff --git a/lib/rebrickable/rare_parts.py b/lib/rebrickable/rare_parts.py new file mode 100644 index 0000000..6bd54e0 --- /dev/null +++ b/lib/rebrickable/rare_parts.py @@ -0,0 +1,204 @@ +"""Identification des pièces rares (variantes exclusives à un set).""" + +import csv +from pathlib import Path +from typing import Dict, Iterable, List, Sequence, Set, Tuple + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +def load_parts_catalog(path: Path) -> Dict[str, dict]: + """Charge le catalogue des pièces et l'indexe par référence.""" + catalog: Dict[str, dict] = {} + with path.open() as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + catalog[row["part_num"]] = row + return catalog + + +def load_colors_lookup(path: Path) -> Dict[Tuple[str, str], str]: + """Associe un couple (rgb, is_trans) au nom de couleur.""" + lookup: Dict[Tuple[str, str], str] = {} + for row in read_rows(path): + lookup[(row["rgb"], row["is_trans"].lower())] = row["name"] + return lookup + + +def load_sets_enriched(path: Path) -> Dict[str, dict]: + """Indexe les sets enrichis par numéro complet.""" + sets: Dict[str, dict] = {} + for row in read_rows(path): + sets[row["set_num"]] = row + return sets + + +def aggregate_non_spare_parts(rows: Iterable[dict]) -> List[dict]: + """Agrège les pièces hors rechanges par set et variation couleur.""" + aggregated: Dict[Tuple[str, str, str, str, str, str, str], int] = {} + for row in rows: + if row["is_spare"] == "true": + continue + key = ( + row["set_num"], + row["part_num"], + row["color_rgb"], + row["is_translucent"], + row["is_minifig_part"], + row["set_id"], + row["year"], + ) + aggregated[key] = aggregated.get(key, 0) + int(row["quantity_in_set"]) + result: List[dict] = [] + for key, quantity in aggregated.items(): + set_num, part_num, color_rgb, is_translucent, is_minifig_part, set_id, year = key + result.append( + { + "set_num": set_num, + "part_num": part_num, + "color_rgb": color_rgb, + "is_translucent": is_translucent, + "is_minifig_part": is_minifig_part, + "set_id": set_id, + "year": year, + "quantity_in_set": str(quantity), + } + ) + result.sort(key=lambda row: (row["set_num"], row["part_num"], row["color_rgb"])) + return result + + +def compute_combo_set_counts(rows: Iterable[dict]) -> Dict[Tuple[str, str, str], Set[str]]: + """Compte les sets distincts par combinaison pièce+couleur.""" + combos: Dict[Tuple[str, str, str], Set[str]] = {} + for row in rows: + key = (row["part_num"], row["color_rgb"], row["is_translucent"]) + if key not in combos: + combos[key] = set() + combos[key].add(row["set_num"]) + return combos + + +def build_rare_parts( + parts_filtered_path: Path, + sets_enriched_path: Path, + parts_catalog_path: Path, + colors_path: Path, +) -> Tuple[List[dict], List[dict]]: + """Construit les listes des pièces rares et leur répartition par set.""" + parts_rows = read_rows(parts_filtered_path) + aggregated = aggregate_non_spare_parts(parts_rows) + combo_sets = compute_combo_set_counts(aggregated) + parts_catalog = load_parts_catalog(parts_catalog_path) + color_names = load_colors_lookup(colors_path) + sets_lookup = load_sets_enriched(sets_enriched_path) + + rare_parts: List[dict] = [] + for row in aggregated: + combo_key = (row["part_num"], row["color_rgb"], row["is_translucent"]) + if len(combo_sets[combo_key]) != 1: + continue + set_row = sets_lookup[row["set_num"]] + part = parts_catalog[row["part_num"]] + color_name = color_names[(row["color_rgb"], row["is_translucent"])] + rare_parts.append( + { + "set_num": row["set_num"], + "set_id": row["set_id"], + "set_name": set_row["name"], + "year": set_row["year"], + "part_num": row["part_num"], + "part_name": part["name"], + "part_cat_id": part["part_cat_id"], + "color_rgb": row["color_rgb"], + "color_name": color_name, + "is_translucent": row["is_translucent"], + "is_minifig_part": row["is_minifig_part"], + "quantity_in_set": row["quantity_in_set"], + "in_collection": set_row["in_collection"], + } + ) + rare_parts.sort(key=lambda row: (row["set_num"], row["part_num"], row["color_rgb"])) + + rare_by_set: Dict[str, dict] = {} + for row in rare_parts: + record = rare_by_set.get(row["set_num"]) + if record is None: + record = { + "set_num": row["set_num"], + "set_id": row["set_id"], + "name": row["set_name"], + "year": row["year"], + "in_collection": row["in_collection"], + "rare_parts_distinct": 0, + "rare_parts_quantity": 0, + "rare_minifig_parts_distinct": 0, + "rare_minifig_quantity": 0, + } + rare_by_set[row["set_num"]] = record + record["rare_parts_distinct"] += 1 + record["rare_parts_quantity"] += int(row["quantity_in_set"]) + if row["is_minifig_part"] == "true": + record["rare_minifig_parts_distinct"] += 1 + record["rare_minifig_quantity"] += int(row["quantity_in_set"]) + rare_by_set_rows = list(rare_by_set.values()) + rare_by_set_rows.sort( + key=lambda row: ( + -row["rare_parts_distinct"], + -row["rare_parts_quantity"], + row["set_num"], + ) + ) + for row in rare_by_set_rows: + row["rare_parts_distinct"] = str(row["rare_parts_distinct"]) + row["rare_parts_quantity"] = str(row["rare_parts_quantity"]) + row["rare_minifig_parts_distinct"] = str(row["rare_minifig_parts_distinct"]) + row["rare_minifig_quantity"] = str(row["rare_minifig_quantity"]) + return rare_parts, rare_by_set_rows + + +def write_rare_parts_list(destination_path: Path, rows: Sequence[dict]) -> None: + """Écrit le détail des pièces rares avec leur set et leur couleur.""" + ensure_parent_dir(destination_path) + fieldnames = [ + "set_num", + "set_id", + "set_name", + "year", + "part_num", + "part_name", + "part_cat_id", + "color_rgb", + "color_name", + "is_translucent", + "is_minifig_part", + "quantity_in_set", + "in_collection", + ] + with destination_path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + + +def write_rare_parts_by_set(destination_path: Path, rows: Sequence[dict]) -> None: + """Écrit l'agrégat des pièces rares par set.""" + ensure_parent_dir(destination_path) + fieldnames = [ + "set_num", + "set_id", + "name", + "year", + "in_collection", + "rare_parts_distinct", + "rare_parts_quantity", + "rare_minifig_parts_distinct", + "rare_minifig_quantity", + ] + with destination_path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/compute_rare_parts.py b/scripts/compute_rare_parts.py new file mode 100644 index 0000000..c2b5847 --- /dev/null +++ b/scripts/compute_rare_parts.py @@ -0,0 +1,28 @@ +"""Calcule les pièces rares (variantes exclusives) et leurs agrégats.""" + +from pathlib import Path + +from lib.rebrickable.rare_parts import ( + build_rare_parts, + write_rare_parts_by_set, + write_rare_parts_list, +) + + +PARTS_PATH = Path("data/intermediate/parts_filtered.csv") +SETS_PATH = Path("data/intermediate/sets_enriched.csv") +PARTS_CATALOG_PATH = Path("data/raw/parts.csv") +COLORS_PATH = Path("data/raw/colors.csv") +RARE_PARTS_PATH = Path("data/intermediate/rare_parts.csv") +RARE_PARTS_BY_SET_PATH = Path("data/intermediate/rare_parts_by_set.csv") + + +def main() -> None: + """Construit les fichiers listant les pièces rares et leur répartition par set.""" + rare_parts, rare_by_set = build_rare_parts(PARTS_PATH, SETS_PATH, PARTS_CATALOG_PATH, COLORS_PATH) + write_rare_parts_list(RARE_PARTS_PATH, rare_parts) + write_rare_parts_by_set(RARE_PARTS_BY_SET_PATH, rare_by_set) + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_rare_parts.py b/scripts/plot_rare_parts.py new file mode 100644 index 0000000..ef16ca7 --- /dev/null +++ b/scripts/plot_rare_parts.py @@ -0,0 +1,18 @@ +"""Trace le top des sets avec pièces exclusives.""" + +from pathlib import Path + +from lib.plots.rare_parts import plot_rare_parts_per_set + + +RARE_PARTS_BY_SET_PATH = Path("data/intermediate/rare_parts_by_set.csv") +DESTINATION_PATH = Path("figures/step27/rare_parts_per_set.png") + + +def main() -> None: + """Génère le visuel des pièces rares par set.""" + plot_rare_parts_per_set(RARE_PARTS_BY_SET_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_rare_parts.py b/tests/test_rare_parts.py new file mode 100644 index 0000000..9a038be --- /dev/null +++ b/tests/test_rare_parts.py @@ -0,0 +1,169 @@ +"""Tests du calcul des pièces rares.""" + +import csv +from pathlib import Path + +from lib.rebrickable.rare_parts import build_rare_parts, write_rare_parts_by_set, write_rare_parts_list + + +def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None: + """Écrit un CSV simple pour les besoins de tests.""" + with path.open("w", newline="") as csv_file: + writer = csv.writer(csv_file) + writer.writerow(headers) + writer.writerows(rows) + + +def test_build_rare_parts_detects_exclusive_variations(tmp_path: Path) -> None: + """Identifie les combinaisons pièce+couleur présentes dans un seul set.""" + parts_filtered = tmp_path / "parts_filtered.csv" + write_csv( + parts_filtered, + [ + "part_num", + "color_rgb", + "is_translucent", + "set_num", + "set_id", + "year", + "quantity_in_set", + "is_spare", + "is_minifig_part", + ], + [ + ["p1", "AAAAAA", "false", "1000-1", "1000", "2020", "2", "false", "false"], + ["p1", "AAAAAA", "false", "2000-1", "2000", "2021", "3", "false", "false"], + ["p2", "BBBBBB", "false", "1000-1", "1000", "2020", "1", "false", "true"], + ["p3", "CCCCCC", "true", "2000-1", "2000", "2021", "4", "false", "false"], + ], + ) + sets_enriched = tmp_path / "sets_enriched.csv" + write_csv( + sets_enriched, + ["set_num", "set_id", "name", "year", "in_collection"], + [ + ["1000-1", "1000", "Set A", "2020", "true"], + ["2000-1", "2000", "Set B", "2021", "false"], + ], + ) + parts_catalog = tmp_path / "parts.csv" + write_csv( + parts_catalog, + ["part_num", "name", "part_cat_id"], + [ + ["p1", "Brick 1x1", "1"], + ["p2", "Head Custom", "59"], + ["p3", "Slope 45", "2"], + ], + ) + colors = tmp_path / "colors.csv" + write_csv( + colors, + ["id", "name", "rgb", "is_trans", "num_parts", "num_sets", "y1", "y2"], + [ + ["1", "Gray", "AAAAAA", "false", "0", "0", "0", "0"], + ["2", "Blue", "BBBBBB", "false", "0", "0", "0", "0"], + ["3", "Trans-Clear", "CCCCCC", "true", "0", "0", "0", "0"], + ], + ) + + rare_parts, rare_by_set = build_rare_parts(parts_filtered, sets_enriched, parts_catalog, colors) + + assert rare_parts == [ + { + "set_num": "1000-1", + "set_id": "1000", + "set_name": "Set A", + "year": "2020", + "part_num": "p2", + "part_name": "Head Custom", + "part_cat_id": "59", + "color_rgb": "BBBBBB", + "color_name": "Blue", + "is_translucent": "false", + "is_minifig_part": "true", + "quantity_in_set": "1", + "in_collection": "true", + }, + { + "set_num": "2000-1", + "set_id": "2000", + "set_name": "Set B", + "year": "2021", + "part_num": "p3", + "part_name": "Slope 45", + "part_cat_id": "2", + "color_rgb": "CCCCCC", + "color_name": "Trans-Clear", + "is_translucent": "true", + "is_minifig_part": "false", + "quantity_in_set": "4", + "in_collection": "false", + }, + ] + assert rare_by_set == [ + { + "set_num": "1000-1", + "set_id": "1000", + "name": "Set A", + "year": "2020", + "in_collection": "true", + "rare_parts_distinct": "1", + "rare_parts_quantity": "1", + "rare_minifig_parts_distinct": "1", + "rare_minifig_quantity": "1", + }, + { + "set_num": "2000-1", + "set_id": "2000", + "name": "Set B", + "year": "2021", + "in_collection": "false", + "rare_parts_distinct": "1", + "rare_parts_quantity": "4", + "rare_minifig_parts_distinct": "0", + "rare_minifig_quantity": "0", + }, + ] + + +def test_write_rare_parts_outputs_csv(tmp_path: Path) -> None: + """Sérialise les pièces rares et l’agrégat par set.""" + rare_parts_path = tmp_path / "rare_parts.csv" + rare_by_set_path = tmp_path / "rare_parts_by_set.csv" + rare_parts_sample = [ + { + "set_num": "123-1", + "set_id": "123", + "set_name": "Sample", + "year": "2020", + "part_num": "p1", + "part_name": "Brick", + "part_cat_id": "1", + "color_rgb": "FFFFFF", + "color_name": "White", + "is_translucent": "false", + "is_minifig_part": "false", + "quantity_in_set": "2", + "in_collection": "true", + } + ] + rare_by_set_sample = [ + { + "set_num": "123-1", + "set_id": "123", + "name": "Sample", + "year": "2020", + "in_collection": "true", + "rare_parts_distinct": "1", + "rare_parts_quantity": "2", + "rare_minifig_parts_distinct": "0", + "rare_minifig_quantity": "0", + } + ] + + write_rare_parts_list(rare_parts_path, rare_parts_sample) + write_rare_parts_by_set(rare_by_set_path, rare_by_set_sample) + + assert rare_parts_path.exists() + assert rare_by_set_path.exists() diff --git a/tests/test_rare_parts_plot.py b/tests/test_rare_parts_plot.py new file mode 100644 index 0000000..db5db42 --- /dev/null +++ b/tests/test_rare_parts_plot.py @@ -0,0 +1,25 @@ +"""Tests du graphique des pièces rares par set.""" + +import matplotlib +from pathlib import Path + +from lib.plots.rare_parts import plot_rare_parts_per_set + + +matplotlib.use("Agg") + + +def test_plot_rare_parts_per_set_outputs_image(tmp_path: Path) -> None: + """Génère l'image du top des sets avec pièces exclusives.""" + rare_by_set_path = tmp_path / "rare_parts_by_set.csv" + destination_path = tmp_path / "figures" / "step27" / "rare_parts_per_set.png" + rare_by_set_path.write_text( + "set_num,set_id,name,year,in_collection,rare_parts_distinct,rare_parts_quantity,rare_minifig_parts_distinct,rare_minifig_quantity\n" + "1000-1,1000,Set A,2020,true,3,5,1,2\n" + "2000-1,2000,Set B,2021,false,2,4,0,0\n" + ) + + plot_rare_parts_per_set(rare_by_set_path, destination_path) + + assert destination_path.exists() + assert destination_path.stat().st_size > 0