diff --git a/lib/plots/head_reuse.py b/lib/plots/head_reuse.py index 8eb9aad..671e787 100644 --- a/lib/plots/head_reuse.py +++ b/lib/plots/head_reuse.py @@ -5,8 +5,11 @@ from pathlib import Path from typing import List import matplotlib.pyplot as plt +from matplotlib.offsetbox import AnnotationBbox, OffsetImage +from PIL import Image from lib.filesystem import ensure_parent_dir +from lib.rebrickable.resources import sanitize_name def load_head_reuse(path: Path) -> List[dict]: @@ -27,25 +30,65 @@ def format_label(row: dict) -> str: return row["part_num"] -def plot_head_reuse(path: Path, destination_path: Path, top: int = 30) -> None: +def load_head_image(row: dict, resources_dir: Path) -> Image.Image | None: + """Charge l'image d'une tête si disponible localement.""" + set_id = row.get("sample_set_id", "").strip() + character = row.get("known_character", "").strip() + if set_id == "" or character == "": + return None + path = resources_dir / set_id / sanitize_name(character) / "head.jpg" + if not path.exists(): + return None + return Image.open(path) + + +def plot_head_reuse( + path: Path, + destination_path: Path, + top: int | None = None, + resources_dir: Path = Path("figures/rebrickable"), + show_images: bool = True, +) -> None: """Trace un bar chart horizontal mettant en avant les têtes exclusives ou rares.""" rows = load_head_reuse(path) - rows.sort(key=lambda r: (int(r["other_sets"]), -int(r["filtered_sets"]), r["part_num"])) - selected = rows[:top] + rows.sort(key=lambda r: (int(r["total_sets"]), int(r["other_sets"]), r["part_num"])) + selected = rows if top is None else rows[:top] labels = [format_label(r) for r in selected] filtered_counts = [int(r["filtered_sets"]) for r in selected] other_counts = [int(r["other_sets"]) for r in selected] - positions = list(reversed(range(len(selected)))) + positions = list(range(len(selected))) - fig, ax = plt.subplots(figsize=(12, 0.5 * len(selected) + 1.5)) + fig, ax = plt.subplots(figsize=(13, 0.5 * len(selected) + 1.5)) ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés") ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets") ax.set_yticks(positions) - ax.set_yticklabels(reversed(labels)) + ax.set_yticklabels(labels) ax.set_xlabel("Nombre de sets contenant la tête") - ax.invert_yaxis() ax.grid(axis="x", linestyle="--", alpha=0.4) ax.legend() + + if show_images: + max_count = max((f + o) for f, o in zip(filtered_counts, other_counts)) + pad = max_count * 0.15 if max_count > 0 else 1.0 + ax.set_xlim(left=-pad, right=max_count + pad * 0.2) + for row, pos in zip(selected, positions): + image = load_head_image(row, resources_dir) + if image is None: + continue + target_height = 24 + ratio = target_height / image.height + resized = image.resize((int(image.width * ratio), target_height)) + imagebox = OffsetImage(resized) + ab = AnnotationBbox( + imagebox, + (-pad * 0.4, pos), + xycoords=("data", "data"), + box_alignment=(0.5, 0.5), + frameon=False, + ) + ax.add_artist(ab) + + fig.subplots_adjust(left=0.42) fig.tight_layout() ensure_parent_dir(destination_path) diff --git a/lib/rebrickable/head_reuse.py b/lib/rebrickable/head_reuse.py index 7495c78..3c96b14 100644 --- a/lib/rebrickable/head_reuse.py +++ b/lib/rebrickable/head_reuse.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Dict, Iterable, List, Sequence, Set from lib.filesystem import ensure_parent_dir +from lib.rebrickable.minifig_character_sets import load_sets from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts from lib.rebrickable.parts_inventory import ( index_inventory_parts_by_inventory, @@ -76,19 +77,24 @@ def aggregate_head_reuse( minifigs_rows: Iterable[dict], parts_catalog: Dict[str, dict], head_presence: Dict[str, Set[str]], + sets_lookup: Dict[str, dict], ) -> List[dict]: """Construit le tableau des têtes présentes dans les sets filtrés avec leur réutilisation globale.""" filtered_presence = build_filtered_presence(minifigs_rows) labels = build_character_labels(minifigs_rows) aggregates: List[dict] = [] for part_num, filtered_sets in filtered_presence.items(): - all_sets = head_presence.get(part_num, set()) + all_sets = set(head_presence.get(part_num, set())) + all_sets.update(filtered_sets) other_sets = all_sets - filtered_sets + sample_set = sorted(filtered_sets)[0] + sample_set_id = sets_lookup.get(sample_set, {}).get("set_id", sample_set.split("-")[0]) aggregates.append( { "part_num": part_num, "part_name": parts_catalog[part_num]["name"], "known_character": labels.get(part_num, ""), + "sample_set_id": sample_set_id, "filtered_sets": str(len(filtered_sets)), "other_sets": str(len(other_sets)), "total_sets": str(len(all_sets)), @@ -101,7 +107,7 @@ def aggregate_head_reuse( def write_head_reuse(destination_path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des usages de têtes filtrées vs reste du catalogue.""" ensure_parent_dir(destination_path) - fieldnames = ["part_num", "part_name", "known_character", "filtered_sets", "other_sets", "total_sets"] + fieldnames = ["part_num", "part_name", "known_character", "sample_set_id", "filtered_sets", "other_sets", "total_sets"] with destination_path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() diff --git a/scripts/compute_head_reuse.py b/scripts/compute_head_reuse.py index 592057e..e62a309 100644 --- a/scripts/compute_head_reuse.py +++ b/scripts/compute_head_reuse.py @@ -4,12 +4,14 @@ from pathlib import Path from lib.rebrickable.head_reuse import aggregate_head_reuse, build_head_presence, load_minifigs_by_set, write_head_reuse from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts +from lib.rebrickable.minifig_character_sets import load_sets MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") PARTS_CATALOG_PATH = Path("data/raw/parts.csv") INVENTORIES_PATH = Path("data/raw/inventories.csv") INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv") +SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv") DESTINATION_PATH = Path("data/intermediate/head_reuse.csv") @@ -18,8 +20,9 @@ def main() -> None: minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) parts_catalog = load_parts_catalog(PARTS_CATALOG_PATH) head_parts = select_head_parts(parts_catalog) + sets_lookup = load_sets(SETS_ENRICHED_PATH) presence = build_head_presence(INVENTORIES_PATH, INVENTORY_PARTS_PATH, head_parts) - reuse = aggregate_head_reuse(minifigs, parts_catalog, presence) + reuse = aggregate_head_reuse(minifigs, parts_catalog, presence, sets_lookup) write_head_reuse(DESTINATION_PATH, reuse) diff --git a/scripts/plot_head_reuse.py b/scripts/plot_head_reuse.py index 62f1f4e..054d739 100644 --- a/scripts/plot_head_reuse.py +++ b/scripts/plot_head_reuse.py @@ -7,11 +7,12 @@ from lib.plots.head_reuse import plot_head_reuse HEAD_REUSE_PATH = Path("data/intermediate/head_reuse.csv") DESTINATION_PATH = Path("figures/step33/head_reuse.png") +RESOURCES_DIR = Path("figures/rebrickable") def main() -> None: """Charge les données d'usage des têtes et produit le graphique associé.""" - plot_head_reuse(HEAD_REUSE_PATH, DESTINATION_PATH) + plot_head_reuse(HEAD_REUSE_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR) if __name__ == "__main__": diff --git a/tests/test_head_reuse.py b/tests/test_head_reuse.py index c883803..7dd6974 100644 --- a/tests/test_head_reuse.py +++ b/tests/test_head_reuse.py @@ -75,12 +75,19 @@ def test_head_reuse_counts_sets_and_catalog(tmp_path: Path) -> None: ], load_parts_catalog(parts_catalog), presence, + { + "s1-1": {"set_id": "s1"}, + "s2-1": {"set_id": "s2"}, + "s3-1": {"set_id": "s3"}, + "s4-1": {"set_id": "s4"}, + }, ) assert reuse == [ { "part_num": "p1", "part_name": "Head 1", "known_character": "Alice", + "sample_set_id": "s1", "filtered_sets": "1", "other_sets": "0", "total_sets": "1", @@ -89,8 +96,9 @@ def test_head_reuse_counts_sets_and_catalog(tmp_path: Path) -> None: "part_num": "p2", "part_name": "Head 2", "known_character": "Bob", + "sample_set_id": "s1", "filtered_sets": "2", "other_sets": "1", - "total_sets": "2", + "total_sets": "3", }, ]