1

Trie le graphique de réutilisation des têtes par usage décroissant

This commit is contained in:
Richard Dern 2025-12-02 22:51:08 +01:00
parent 14a7dc8561
commit 6dc1f1cac5
5 changed files with 73 additions and 12 deletions

View File

@ -5,8 +5,11 @@ from pathlib import Path
from typing import List from typing import List
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from PIL import Image
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
from lib.rebrickable.resources import sanitize_name
def load_head_reuse(path: Path) -> List[dict]: def load_head_reuse(path: Path) -> List[dict]:
@ -27,25 +30,65 @@ def format_label(row: dict) -> str:
return row["part_num"] return row["part_num"]
def plot_head_reuse(path: Path, destination_path: Path, top: int = 30) -> None: def load_head_image(row: dict, resources_dir: Path) -> Image.Image | None:
"""Charge l'image d'une tête si disponible localement."""
set_id = row.get("sample_set_id", "").strip()
character = row.get("known_character", "").strip()
if set_id == "" or character == "":
return None
path = resources_dir / set_id / sanitize_name(character) / "head.jpg"
if not path.exists():
return None
return Image.open(path)
def plot_head_reuse(
path: Path,
destination_path: Path,
top: int | None = None,
resources_dir: Path = Path("figures/rebrickable"),
show_images: bool = True,
) -> None:
"""Trace un bar chart horizontal mettant en avant les têtes exclusives ou rares.""" """Trace un bar chart horizontal mettant en avant les têtes exclusives ou rares."""
rows = load_head_reuse(path) rows = load_head_reuse(path)
rows.sort(key=lambda r: (int(r["other_sets"]), -int(r["filtered_sets"]), r["part_num"])) rows.sort(key=lambda r: (int(r["total_sets"]), int(r["other_sets"]), r["part_num"]))
selected = rows[:top] selected = rows if top is None else rows[:top]
labels = [format_label(r) for r in selected] labels = [format_label(r) for r in selected]
filtered_counts = [int(r["filtered_sets"]) for r in selected] filtered_counts = [int(r["filtered_sets"]) for r in selected]
other_counts = [int(r["other_sets"]) for r in selected] other_counts = [int(r["other_sets"]) for r in selected]
positions = list(reversed(range(len(selected)))) positions = list(range(len(selected)))
fig, ax = plt.subplots(figsize=(12, 0.5 * len(selected) + 1.5)) fig, ax = plt.subplots(figsize=(13, 0.5 * len(selected) + 1.5))
ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés") ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés")
ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets") ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets")
ax.set_yticks(positions) ax.set_yticks(positions)
ax.set_yticklabels(reversed(labels)) ax.set_yticklabels(labels)
ax.set_xlabel("Nombre de sets contenant la tête") ax.set_xlabel("Nombre de sets contenant la tête")
ax.invert_yaxis()
ax.grid(axis="x", linestyle="--", alpha=0.4) ax.grid(axis="x", linestyle="--", alpha=0.4)
ax.legend() ax.legend()
if show_images:
max_count = max((f + o) for f, o in zip(filtered_counts, other_counts))
pad = max_count * 0.15 if max_count > 0 else 1.0
ax.set_xlim(left=-pad, right=max_count + pad * 0.2)
for row, pos in zip(selected, positions):
image = load_head_image(row, resources_dir)
if image is None:
continue
target_height = 24
ratio = target_height / image.height
resized = image.resize((int(image.width * ratio), target_height))
imagebox = OffsetImage(resized)
ab = AnnotationBbox(
imagebox,
(-pad * 0.4, pos),
xycoords=("data", "data"),
box_alignment=(0.5, 0.5),
frameon=False,
)
ax.add_artist(ab)
fig.subplots_adjust(left=0.42)
fig.tight_layout() fig.tight_layout()
ensure_parent_dir(destination_path) ensure_parent_dir(destination_path)

View File

@ -6,6 +6,7 @@ from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set from typing import Dict, Iterable, List, Sequence, Set
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
from lib.rebrickable.minifig_character_sets import load_sets
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
from lib.rebrickable.parts_inventory import ( from lib.rebrickable.parts_inventory import (
index_inventory_parts_by_inventory, index_inventory_parts_by_inventory,
@ -76,19 +77,24 @@ def aggregate_head_reuse(
minifigs_rows: Iterable[dict], minifigs_rows: Iterable[dict],
parts_catalog: Dict[str, dict], parts_catalog: Dict[str, dict],
head_presence: Dict[str, Set[str]], head_presence: Dict[str, Set[str]],
sets_lookup: Dict[str, dict],
) -> List[dict]: ) -> List[dict]:
"""Construit le tableau des têtes présentes dans les sets filtrés avec leur réutilisation globale.""" """Construit le tableau des têtes présentes dans les sets filtrés avec leur réutilisation globale."""
filtered_presence = build_filtered_presence(minifigs_rows) filtered_presence = build_filtered_presence(minifigs_rows)
labels = build_character_labels(minifigs_rows) labels = build_character_labels(minifigs_rows)
aggregates: List[dict] = [] aggregates: List[dict] = []
for part_num, filtered_sets in filtered_presence.items(): for part_num, filtered_sets in filtered_presence.items():
all_sets = head_presence.get(part_num, set()) all_sets = set(head_presence.get(part_num, set()))
all_sets.update(filtered_sets)
other_sets = all_sets - filtered_sets other_sets = all_sets - filtered_sets
sample_set = sorted(filtered_sets)[0]
sample_set_id = sets_lookup.get(sample_set, {}).get("set_id", sample_set.split("-")[0])
aggregates.append( aggregates.append(
{ {
"part_num": part_num, "part_num": part_num,
"part_name": parts_catalog[part_num]["name"], "part_name": parts_catalog[part_num]["name"],
"known_character": labels.get(part_num, ""), "known_character": labels.get(part_num, ""),
"sample_set_id": sample_set_id,
"filtered_sets": str(len(filtered_sets)), "filtered_sets": str(len(filtered_sets)),
"other_sets": str(len(other_sets)), "other_sets": str(len(other_sets)),
"total_sets": str(len(all_sets)), "total_sets": str(len(all_sets)),
@ -101,7 +107,7 @@ def aggregate_head_reuse(
def write_head_reuse(destination_path: Path, rows: Sequence[dict]) -> None: def write_head_reuse(destination_path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des usages de têtes filtrées vs reste du catalogue.""" """Écrit le CSV des usages de têtes filtrées vs reste du catalogue."""
ensure_parent_dir(destination_path) ensure_parent_dir(destination_path)
fieldnames = ["part_num", "part_name", "known_character", "filtered_sets", "other_sets", "total_sets"] fieldnames = ["part_num", "part_name", "known_character", "sample_set_id", "filtered_sets", "other_sets", "total_sets"]
with destination_path.open("w", newline="") as csv_file: with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader() writer.writeheader()

View File

@ -4,12 +4,14 @@ from pathlib import Path
from lib.rebrickable.head_reuse import aggregate_head_reuse, build_head_presence, load_minifigs_by_set, write_head_reuse from lib.rebrickable.head_reuse import aggregate_head_reuse, build_head_presence, load_minifigs_by_set, write_head_reuse
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
from lib.rebrickable.minifig_character_sets import load_sets
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
PARTS_CATALOG_PATH = Path("data/raw/parts.csv") PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
INVENTORIES_PATH = Path("data/raw/inventories.csv") INVENTORIES_PATH = Path("data/raw/inventories.csv")
INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv") INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
DESTINATION_PATH = Path("data/intermediate/head_reuse.csv") DESTINATION_PATH = Path("data/intermediate/head_reuse.csv")
@ -18,8 +20,9 @@ def main() -> None:
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
parts_catalog = load_parts_catalog(PARTS_CATALOG_PATH) parts_catalog = load_parts_catalog(PARTS_CATALOG_PATH)
head_parts = select_head_parts(parts_catalog) head_parts = select_head_parts(parts_catalog)
sets_lookup = load_sets(SETS_ENRICHED_PATH)
presence = build_head_presence(INVENTORIES_PATH, INVENTORY_PARTS_PATH, head_parts) presence = build_head_presence(INVENTORIES_PATH, INVENTORY_PARTS_PATH, head_parts)
reuse = aggregate_head_reuse(minifigs, parts_catalog, presence) reuse = aggregate_head_reuse(minifigs, parts_catalog, presence, sets_lookup)
write_head_reuse(DESTINATION_PATH, reuse) write_head_reuse(DESTINATION_PATH, reuse)

View File

@ -7,11 +7,12 @@ from lib.plots.head_reuse import plot_head_reuse
HEAD_REUSE_PATH = Path("data/intermediate/head_reuse.csv") HEAD_REUSE_PATH = Path("data/intermediate/head_reuse.csv")
DESTINATION_PATH = Path("figures/step33/head_reuse.png") DESTINATION_PATH = Path("figures/step33/head_reuse.png")
RESOURCES_DIR = Path("figures/rebrickable")
def main() -> None: def main() -> None:
"""Charge les données d'usage des têtes et produit le graphique associé.""" """Charge les données d'usage des têtes et produit le graphique associé."""
plot_head_reuse(HEAD_REUSE_PATH, DESTINATION_PATH) plot_head_reuse(HEAD_REUSE_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -75,12 +75,19 @@ def test_head_reuse_counts_sets_and_catalog(tmp_path: Path) -> None:
], ],
load_parts_catalog(parts_catalog), load_parts_catalog(parts_catalog),
presence, presence,
{
"s1-1": {"set_id": "s1"},
"s2-1": {"set_id": "s2"},
"s3-1": {"set_id": "s3"},
"s4-1": {"set_id": "s4"},
},
) )
assert reuse == [ assert reuse == [
{ {
"part_num": "p1", "part_num": "p1",
"part_name": "Head 1", "part_name": "Head 1",
"known_character": "Alice", "known_character": "Alice",
"sample_set_id": "s1",
"filtered_sets": "1", "filtered_sets": "1",
"other_sets": "0", "other_sets": "0",
"total_sets": "1", "total_sets": "1",
@ -89,8 +96,9 @@ def test_head_reuse_counts_sets_and_catalog(tmp_path: Path) -> None:
"part_num": "p2", "part_num": "p2",
"part_name": "Head 2", "part_name": "Head 2",
"known_character": "Bob", "known_character": "Bob",
"sample_set_id": "s1",
"filtered_sets": "2", "filtered_sets": "2",
"other_sets": "1", "other_sets": "1",
"total_sets": "2", "total_sets": "3",
}, },
] ]