diff --git a/README.md b/README.md index ef69112..bae399b 100644 --- a/README.md +++ b/README.md @@ -312,3 +312,18 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv`, `dat - `data/intermediate/part_categories_global.csv` : parts globales par catégorie. Les visuels `figures/step29/top_part_categories_area.png`, `figures/step29/part_categories_heatmap.png` et `figures/step29/structural_share_timeline.png` montrent respectivement l’évolution des principales catégories (aire empilée), une heatmap exhaustive catégorie × année, et la trajectoire de la part des pièces structurelles. + +### Étape 30 : visages multiples sur les têtes de minifigs + +1. `source .venv/bin/activate` +2. `python -m scripts.compute_minifig_head_faces` +3. `python -m scripts.plot_minifig_head_faces` + +Le calcul lit `data/intermediate/minifigs_by_set.csv`, `data/raw/parts.csv` et `data/intermediate/sets_enriched.csv` pour annoter les têtes de minifigs contenant plusieurs visages (détection par mots-clés dans le nom de pièce). Il produit : + +- `data/intermediate/minifig_head_faces.csv` : têtes annotées (set, personnage, genre, indicateur dual-face). +- `data/intermediate/minifig_head_faces_by_year.csv` : volume et part des dual-face par année. +- `data/intermediate/minifig_head_faces_by_set.csv` : volume et part par set (possession incluse). +- `data/intermediate/minifig_head_faces_by_character.csv` : volume et part par personnage. + +Les visuels `figures/step30/minifig_head_faces_timeline.png`, `figures/step30/minifig_head_faces_top_sets.png` et `figures/step30/minifig_head_faces_characters.png` montrent respectivement la trajectoire annuelle, le top des sets concernés et les personnages dotés de têtes dual-face. diff --git a/lib/plots/minifig_head_faces.py b/lib/plots/minifig_head_faces.py new file mode 100644 index 0000000..13746ed --- /dev/null +++ b/lib/plots/minifig_head_faces.py @@ -0,0 +1,133 @@ +"""Visualisations des têtes dual-face.""" + +from pathlib import Path +from typing import Iterable, List, Tuple + +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.patches import Patch + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +def load_rows(path: Path) -> List[dict]: + """Charge un CSV en mémoire.""" + return read_rows(path) + + +def plot_dual_faces_timeline(by_year_path: Path, destination_path: Path) -> None: + """Trace la part annuelle des têtes dual-face.""" + rows = load_rows(by_year_path) + if not rows: + return + years = [row["year"] for row in rows] + totals = [int(row["total_heads"]) for row in rows] + duals = [int(row["dual_heads"]) for row in rows] + shares = [float(row["share_dual"]) for row in rows] + x = np.arange(len(years)) + fig, ax = plt.subplots(figsize=(10, 6)) + ax.bar(x, totals, color="#dddddd", alpha=0.4, label="Têtes totales") + ax.plot(x, duals, color="#1f77b4", linewidth=2.0, label="Têtes dual-face (volume)") + ax.plot(x, [s * max(totals) for s in shares], color="#d62728", linestyle="--", linewidth=1.6, label="Part dual-face (échelle volume)") + ax.set_xticks(x) + ax.set_xticklabels(years, rotation=45, ha="right") + ax.set_ylabel("Volume de têtes") + ax.set_title("Têtes de minifigs : volume et part des dual-face par année") + ax.grid(True, linestyle="--", alpha=0.3) + ax.legend(loc="upper left", frameon=False) + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=170) + plt.close(fig) + + +def select_top_sets(rows: Iterable[dict], limit: int = 15) -> List[dict]: + """Sélectionne les sets avec le plus de têtes dual-face.""" + sorted_rows = sorted( + rows, + key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["set_num"]), + ) + return sorted_rows[:limit] + + +def plot_dual_faces_top_sets(by_set_path: Path, destination_path: Path) -> None: + """Top des sets contenant des têtes dual-face.""" + rows = load_rows(by_set_path) + if not rows: + return + top_rows = select_top_sets(rows) + y = np.arange(len(top_rows)) + duals = [int(row["dual_heads"]) for row in top_rows] + labels = [f"{row['set_num']} · {row['name']} ({row['year']})" for row in top_rows] + owned_mask = [row["in_collection"] == "true" for row in top_rows] + + fig, ax = plt.subplots(figsize=(11, 8)) + for pos, value, owned in zip(y, duals, owned_mask): + alpha = 0.9 if owned else 0.45 + ax.barh(pos, value, color="#9467bd", alpha=alpha) + ax.set_yticks(y) + ax.set_yticklabels(labels) + ax.invert_yaxis() + ax.set_xlabel("Nombre de têtes dual-face") + ax.set_title("Top des sets avec têtes dual-face") + ax.grid(axis="x", linestyle="--", alpha=0.3) + legend = [ + Patch(facecolor="#9467bd", edgecolor="none", alpha=0.9, label="Set possédé"), + Patch(facecolor="#9467bd", edgecolor="none", alpha=0.45, label="Set manquant"), + ] + ax.legend(handles=legend, loc="lower right", frameon=False) + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=170) + plt.close(fig) + + +def select_top_characters(rows: Iterable[dict], limit: int = 12) -> List[dict]: + """Sélectionne les personnages avec le plus de têtes dual-face.""" + sorted_rows = sorted( + rows, + key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["known_character"]), + ) + return sorted_rows[:limit] + + +def plot_dual_faces_characters(by_character_path: Path, destination_path: Path) -> None: + """Top des personnages illustrés par des têtes dual-face.""" + rows = load_rows(by_character_path) + if not rows: + return + top_rows = select_top_characters(rows) + y = np.arange(len(top_rows)) + duals = [int(row["dual_heads"]) for row in top_rows] + totals = [int(row["total_heads"]) for row in top_rows] + shares = [float(row["share_dual"]) for row in top_rows] + labels = [row["known_character"] for row in top_rows] + + fig, ax = plt.subplots(figsize=(11, 8)) + ax.barh(y, totals, color="#cccccc", alpha=0.4, label="Têtes totales") + ax.barh(y, duals, color="#e15759", alpha=0.9, label="Têtes dual-face") + for pos, share in zip(y, shares): + ax.text( + totals[pos] + 0.1, + pos, + f"{share*100:.1f}%", + va="center", + ha="left", + fontsize=9, + color="#333333", + ) + ax.set_yticks(y) + ax.set_yticklabels(labels) + ax.invert_yaxis() + ax.set_xlabel("Nombre de têtes") + ax.set_title("Personnages dotés de têtes dual-face") + ax.grid(axis="x", linestyle="--", alpha=0.3) + ax.legend(loc="lower right", frameon=False) + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=170) + plt.close(fig) diff --git a/lib/rebrickable/minifig_head_faces.py b/lib/rebrickable/minifig_head_faces.py new file mode 100644 index 0000000..df719c1 --- /dev/null +++ b/lib/rebrickable/minifig_head_faces.py @@ -0,0 +1,194 @@ +"""Détection des têtes de minifigs à plusieurs visages et agrégats associés.""" + +import csv +from pathlib import Path +from typing import Dict, Iterable, List, Sequence + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +DUAL_FACE_KEYWORDS = [ + "dual sided", + "dual-sided", + "double sided", + "double-sided", + "2 sided", + "2-sided", + "two sided", + "two-sided", + "dual print", + "dual face", + "double face", + "two faces", + "alt face", + "alternate face", +] + + +def load_parts_catalog(path: Path) -> Dict[str, dict]: + """Indexe les pièces par référence.""" + catalog: Dict[str, dict] = {} + with path.open() as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + catalog[row["part_num"]] = row + return catalog + + +def load_sets(path: Path) -> Dict[str, dict]: + """Indexe les sets enrichis par set_num.""" + sets: Dict[str, dict] = {} + for row in read_rows(path): + sets[row["set_num"]] = row + return sets + + +def detect_dual_face(name: str) -> str: + """Détecte une tête dual-face via des mots-clés.""" + lowered = name.lower() + for keyword in DUAL_FACE_KEYWORDS: + if keyword in lowered: + return "true" + return "false" + + +def build_head_faces( + minifigs_by_set_path: Path, + parts_catalog_path: Path, + sets_enriched_path: Path, +) -> List[dict]: + """Construit la liste des têtes annotées selon la présence de visages multiples.""" + heads = read_rows(minifigs_by_set_path) + catalog = load_parts_catalog(parts_catalog_path) + sets_lookup = load_sets(sets_enriched_path) + annotated: List[dict] = [] + for row in heads: + part = catalog[row["part_num"]] + set_row = sets_lookup[row["set_num"]] + is_dual = detect_dual_face(part["name"]) + annotated.append( + { + "set_num": row["set_num"], + "set_id": set_row["set_id"], + "year": set_row["year"], + "name": set_row["name"], + "in_collection": set_row["in_collection"], + "part_num": row["part_num"], + "part_name": part["name"], + "fig_num": row["fig_num"], + "known_character": row["known_character"], + "gender": row["gender"], + "is_dual_face": is_dual, + } + ) + annotated.sort(key=lambda row: (row["set_num"], row["part_num"])) + return annotated + + +def aggregate_by_year(rows: Iterable[dict]) -> List[dict]: + """Agrège les têtes dual-face par année.""" + counts: Dict[str, dict] = {} + for row in rows: + year_entry = counts.get(row["year"]) + if year_entry is None: + year_entry = { + "year": row["year"], + "total_heads": 0, + "dual_heads": 0, + } + counts[row["year"]] = year_entry + year_entry["total_heads"] += 1 + if row["is_dual_face"] == "true": + year_entry["dual_heads"] += 1 + aggregated: List[dict] = [] + for year, entry in counts.items(): + aggregated.append( + { + "year": year, + "total_heads": str(entry["total_heads"]), + "dual_heads": str(entry["dual_heads"]), + "share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}", + } + ) + aggregated.sort(key=lambda row: int(row["year"])) + return aggregated + + +def aggregate_by_set(rows: Iterable[dict]) -> List[dict]: + """Agrège les têtes dual-face par set.""" + counts: Dict[str, dict] = {} + for row in rows: + entry = counts.get(row["set_num"]) + if entry is None: + entry = { + "set_num": row["set_num"], + "set_id": row["set_id"], + "name": row["name"], + "year": row["year"], + "in_collection": row["in_collection"], + "total_heads": 0, + "dual_heads": 0, + } + counts[row["set_num"]] = entry + entry["total_heads"] += 1 + if row["is_dual_face"] == "true": + entry["dual_heads"] += 1 + aggregated: List[dict] = [] + for entry in counts.values(): + aggregated.append( + { + "set_num": entry["set_num"], + "set_id": entry["set_id"], + "name": entry["name"], + "year": entry["year"], + "in_collection": entry["in_collection"], + "total_heads": str(entry["total_heads"]), + "dual_heads": str(entry["dual_heads"]), + "share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}", + } + ) + aggregated.sort(key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["set_num"])) + return aggregated + + +def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: + """Agrège les têtes dual-face par personnage connu.""" + counts: Dict[str, dict] = {} + for row in rows: + character = row["known_character"] or "Inconnu" + entry = counts.get(character) + if entry is None: + entry = { + "known_character": character, + "gender": row["gender"], + "total_heads": 0, + "dual_heads": 0, + } + counts[character] = entry + entry["total_heads"] += 1 + if row["is_dual_face"] == "true": + entry["dual_heads"] += 1 + aggregated: List[dict] = [] + for character, entry in counts.items(): + aggregated.append( + { + "known_character": character, + "gender": entry["gender"], + "total_heads": str(entry["total_heads"]), + "dual_heads": str(entry["dual_heads"]), + "share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}", + } + ) + aggregated.sort(key=lambda row: (-int(row["dual_heads"]), row["known_character"])) + return aggregated + + +def write_csv(destination_path: Path, rows: Sequence[dict], fieldnames: Sequence[str]) -> None: + """Écrit un CSV générique.""" + ensure_parent_dir(destination_path) + with destination_path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/compute_minifig_head_faces.py b/scripts/compute_minifig_head_faces.py new file mode 100644 index 0000000..2e1d859 --- /dev/null +++ b/scripts/compute_minifig_head_faces.py @@ -0,0 +1,64 @@ +"""Calcule les têtes dual-face des minifigs et leurs agrégats.""" + +from pathlib import Path + +from lib.rebrickable.minifig_head_faces import ( + aggregate_by_character, + aggregate_by_set, + aggregate_by_year, + build_head_faces, + write_csv, +) + + +MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") +PARTS_CATALOG_PATH = Path("data/raw/parts.csv") +SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv") +HEAD_FACES_PATH = Path("data/intermediate/minifig_head_faces.csv") +HEAD_FACES_BY_YEAR_PATH = Path("data/intermediate/minifig_head_faces_by_year.csv") +HEAD_FACES_BY_SET_PATH = Path("data/intermediate/minifig_head_faces_by_set.csv") +HEAD_FACES_BY_CHARACTER_PATH = Path("data/intermediate/minifig_head_faces_by_character.csv") + + +def main() -> None: + """Construit les listes et agrégats des têtes à visages multiples.""" + heads = build_head_faces(MINIFIGS_BY_SET_PATH, PARTS_CATALOG_PATH, SETS_ENRICHED_PATH) + by_year = aggregate_by_year(heads) + by_set = aggregate_by_set(heads) + by_character = aggregate_by_character(heads) + write_csv( + HEAD_FACES_PATH, + heads, + [ + "set_num", + "set_id", + "year", + "name", + "in_collection", + "part_num", + "part_name", + "fig_num", + "known_character", + "gender", + "is_dual_face", + ], + ) + write_csv( + HEAD_FACES_BY_YEAR_PATH, + by_year, + ["year", "total_heads", "dual_heads", "share_dual"], + ) + write_csv( + HEAD_FACES_BY_SET_PATH, + by_set, + ["set_num", "set_id", "name", "year", "in_collection", "total_heads", "dual_heads", "share_dual"], + ) + write_csv( + HEAD_FACES_BY_CHARACTER_PATH, + by_character, + ["known_character", "gender", "total_heads", "dual_heads", "share_dual"], + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_minifig_head_faces.py b/scripts/plot_minifig_head_faces.py new file mode 100644 index 0000000..72e4bac --- /dev/null +++ b/scripts/plot_minifig_head_faces.py @@ -0,0 +1,28 @@ +"""Trace les graphiques liés aux têtes dual-face.""" + +from pathlib import Path + +from lib.plots.minifig_head_faces import ( + plot_dual_faces_characters, + plot_dual_faces_timeline, + plot_dual_faces_top_sets, +) + + +BY_YEAR_PATH = Path("data/intermediate/minifig_head_faces_by_year.csv") +BY_SET_PATH = Path("data/intermediate/minifig_head_faces_by_set.csv") +BY_CHARACTER_PATH = Path("data/intermediate/minifig_head_faces_by_character.csv") +TIMELINE_DESTINATION = Path("figures/step30/minifig_head_faces_timeline.png") +TOP_SETS_DESTINATION = Path("figures/step30/minifig_head_faces_top_sets.png") +CHARACTERS_DESTINATION = Path("figures/step30/minifig_head_faces_characters.png") + + +def main() -> None: + """Génère les visuels dual-face.""" + plot_dual_faces_timeline(BY_YEAR_PATH, TIMELINE_DESTINATION) + plot_dual_faces_top_sets(BY_SET_PATH, TOP_SETS_DESTINATION) + plot_dual_faces_characters(BY_CHARACTER_PATH, CHARACTERS_DESTINATION) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_head_faces.py b/tests/test_minifig_head_faces.py new file mode 100644 index 0000000..6274ea6 --- /dev/null +++ b/tests/test_minifig_head_faces.py @@ -0,0 +1,148 @@ +"""Tests des têtes dual-face.""" + +import csv +from pathlib import Path + +from lib.rebrickable.minifig_head_faces import ( + aggregate_by_character, + aggregate_by_set, + aggregate_by_year, + build_head_faces, + detect_dual_face, +) + + +def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None: + """Écrit un CSV simple pour les besoins de tests.""" + with path.open("w", newline="") as csv_file: + writer = csv.writer(csv_file) + writer.writerow(headers) + writer.writerows(rows) + + +def test_detect_dual_face_matches_keywords() -> None: + """Reconnaît les mentions dual-face dans le nom.""" + assert detect_dual_face("Minifig Head Dual Sided Smile") == "true" + assert detect_dual_face("Minifig Head w/ Alt Face") == "true" + assert detect_dual_face("Minifig Head Angry") == "false" + + +def test_build_head_faces_and_aggregates(tmp_path: Path) -> None: + """Construit l'annotation dual-face puis agrège par set/année/personnage.""" + minifigs_by_set = tmp_path / "minifigs_by_set.csv" + write_csv( + minifigs_by_set, + ["set_num", "part_num", "known_character", "fig_num", "gender"], + [ + ["1000-1", "p1", "Alice", "fig-1", "female"], + ["1000-1", "p2", "Bob", "fig-2", "male"], + ["2000-1", "p1", "Alice", "fig-1", "female"], + ], + ) + parts_catalog = tmp_path / "parts.csv" + write_csv( + parts_catalog, + ["part_num", "name", "part_cat_id"], + [ + ["p1", "Minifig Head Dual Sided Grin", "59"], + ["p2", "Minifig Head Serious", "59"], + ], + ) + sets_enriched = tmp_path / "sets_enriched.csv" + write_csv( + sets_enriched, + ["set_num", "set_id", "name", "year", "in_collection"], + [ + ["1000-1", "1000", "Set A", "2020", "true"], + ["2000-1", "2000", "Set B", "2021", "false"], + ], + ) + + heads = build_head_faces(minifigs_by_set, parts_catalog, sets_enriched) + by_year = aggregate_by_year(heads) + by_set = aggregate_by_set(heads) + by_character = aggregate_by_character(heads) + + assert heads == [ + { + "set_num": "1000-1", + "set_id": "1000", + "year": "2020", + "name": "Set A", + "in_collection": "true", + "part_num": "p1", + "part_name": "Minifig Head Dual Sided Grin", + "fig_num": "fig-1", + "known_character": "Alice", + "gender": "female", + "is_dual_face": "true", + }, + { + "set_num": "1000-1", + "set_id": "1000", + "year": "2020", + "name": "Set A", + "in_collection": "true", + "part_num": "p2", + "part_name": "Minifig Head Serious", + "fig_num": "fig-2", + "known_character": "Bob", + "gender": "male", + "is_dual_face": "false", + }, + { + "set_num": "2000-1", + "set_id": "2000", + "year": "2021", + "name": "Set B", + "in_collection": "false", + "part_num": "p1", + "part_name": "Minifig Head Dual Sided Grin", + "fig_num": "fig-1", + "known_character": "Alice", + "gender": "female", + "is_dual_face": "true", + }, + ] + assert by_year == [ + {"year": "2020", "total_heads": "2", "dual_heads": "1", "share_dual": "0.5000"}, + {"year": "2021", "total_heads": "1", "dual_heads": "1", "share_dual": "1.0000"}, + ] + assert by_set == [ + { + "set_num": "2000-1", + "set_id": "2000", + "name": "Set B", + "year": "2021", + "in_collection": "false", + "total_heads": "1", + "dual_heads": "1", + "share_dual": "1.0000", + }, + { + "set_num": "1000-1", + "set_id": "1000", + "name": "Set A", + "year": "2020", + "in_collection": "true", + "total_heads": "2", + "dual_heads": "1", + "share_dual": "0.5000", + }, + ] + assert by_character == [ + { + "known_character": "Alice", + "gender": "female", + "total_heads": "2", + "dual_heads": "2", + "share_dual": "1.0000", + }, + { + "known_character": "Bob", + "gender": "male", + "total_heads": "1", + "dual_heads": "0", + "share_dual": "0.0000", + }, + ] diff --git a/tests/test_minifig_head_faces_plot.py b/tests/test_minifig_head_faces_plot.py new file mode 100644 index 0000000..ade19cc --- /dev/null +++ b/tests/test_minifig_head_faces_plot.py @@ -0,0 +1,45 @@ +"""Tests des graphiques des têtes dual-face.""" + +import matplotlib +from pathlib import Path + +from lib.plots.minifig_head_faces import ( + plot_dual_faces_characters, + plot_dual_faces_timeline, + plot_dual_faces_top_sets, +) + + +matplotlib.use("Agg") + + +def test_plot_minifig_head_faces_outputs_images(tmp_path: Path) -> None: + """Génère les visuels dual-face.""" + by_year = tmp_path / "minifig_head_faces_by_year.csv" + by_year.write_text("year,total_heads,dual_heads,share_dual\n2020,2,1,0.5\n2021,3,2,0.6667\n") + by_set = tmp_path / "minifig_head_faces_by_set.csv" + by_set.write_text( + "set_num,set_id,name,year,in_collection,total_heads,dual_heads,share_dual\n" + "1000-1,1000,Set A,2020,true,2,1,0.5\n" + "2000-1,2000,Set B,2021,false,3,2,0.6667\n" + ) + by_character = tmp_path / "minifig_head_faces_by_character.csv" + by_character.write_text( + "known_character,gender,total_heads,dual_heads,share_dual\n" + "Alice,female,2,2,1.0\n" + "Bob,male,1,0,0.0\n" + ) + timeline_dest = tmp_path / "figures" / "step30" / "minifig_head_faces_timeline.png" + top_sets_dest = tmp_path / "figures" / "step30" / "minifig_head_faces_top_sets.png" + characters_dest = tmp_path / "figures" / "step30" / "minifig_head_faces_characters.png" + + plot_dual_faces_timeline(by_year, timeline_dest) + plot_dual_faces_top_sets(by_set, top_sets_dest) + plot_dual_faces_characters(by_character, characters_dest) + + assert timeline_dest.exists() + assert top_sets_dest.exists() + assert characters_dest.exists() + assert timeline_dest.stat().st_size > 0 + assert top_sets_dest.stat().st_size > 0 + assert characters_dest.stat().st_size > 0