Analyse des têtes dual-face

2025-12-02 17:15:43 +01:00
parent 9d1f2c3089
commit 41798ea3df
7 changed files with 627 additions and 0 deletions
--- a/lib/plots/minifig_head_faces.py
+++ b/lib/plots/minifig_head_faces.py
@@ -0,0 +1,133 @@
+"""Visualisations des têtes dual-face."""
+
+from pathlib import Path
+from typing import Iterable, List, Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.patches import Patch
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+def load_rows(path: Path) -> List[dict]:
+    """Charge un CSV en mémoire."""
+    return read_rows(path)
+
+
+def plot_dual_faces_timeline(by_year_path: Path, destination_path: Path) -> None:
+    """Trace la part annuelle des têtes dual-face."""
+    rows = load_rows(by_year_path)
+    if not rows:
+        return
+    years = [row["year"] for row in rows]
+    totals = [int(row["total_heads"]) for row in rows]
+    duals = [int(row["dual_heads"]) for row in rows]
+    shares = [float(row["share_dual"]) for row in rows]
+    x = np.arange(len(years))
+    fig, ax = plt.subplots(figsize=(10, 6))
+    ax.bar(x, totals, color="#dddddd", alpha=0.4, label="Têtes totales")
+    ax.plot(x, duals, color="#1f77b4", linewidth=2.0, label="Têtes dual-face (volume)")
+    ax.plot(x, [s * max(totals) for s in shares], color="#d62728", linestyle="--", linewidth=1.6, label="Part dual-face (échelle volume)")
+    ax.set_xticks(x)
+    ax.set_xticklabels(years, rotation=45, ha="right")
+    ax.set_ylabel("Volume de têtes")
+    ax.set_title("Têtes de minifigs : volume et part des dual-face par année")
+    ax.grid(True, linestyle="--", alpha=0.3)
+    ax.legend(loc="upper left", frameon=False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=170)
+    plt.close(fig)
+
+
+def select_top_sets(rows: Iterable[dict], limit: int = 15) -> List[dict]:
+    """Sélectionne les sets avec le plus de têtes dual-face."""
+    sorted_rows = sorted(
+        rows,
+        key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["set_num"]),
+    )
+    return sorted_rows[:limit]
+
+
+def plot_dual_faces_top_sets(by_set_path: Path, destination_path: Path) -> None:
+    """Top des sets contenant des têtes dual-face."""
+    rows = load_rows(by_set_path)
+    if not rows:
+        return
+    top_rows = select_top_sets(rows)
+    y = np.arange(len(top_rows))
+    duals = [int(row["dual_heads"]) for row in top_rows]
+    labels = [f"{row['set_num']} · {row['name']} ({row['year']})" for row in top_rows]
+    owned_mask = [row["in_collection"] == "true" for row in top_rows]
+
+    fig, ax = plt.subplots(figsize=(11, 8))
+    for pos, value, owned in zip(y, duals, owned_mask):
+        alpha = 0.9 if owned else 0.45
+        ax.barh(pos, value, color="#9467bd", alpha=alpha)
+    ax.set_yticks(y)
+    ax.set_yticklabels(labels)
+    ax.invert_yaxis()
+    ax.set_xlabel("Nombre de têtes dual-face")
+    ax.set_title("Top des sets avec têtes dual-face")
+    ax.grid(axis="x", linestyle="--", alpha=0.3)
+    legend = [
+        Patch(facecolor="#9467bd", edgecolor="none", alpha=0.9, label="Set possédé"),
+        Patch(facecolor="#9467bd", edgecolor="none", alpha=0.45, label="Set manquant"),
+    ]
+    ax.legend(handles=legend, loc="lower right", frameon=False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=170)
+    plt.close(fig)
+
+
+def select_top_characters(rows: Iterable[dict], limit: int = 12) -> List[dict]:
+    """Sélectionne les personnages avec le plus de têtes dual-face."""
+    sorted_rows = sorted(
+        rows,
+        key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["known_character"]),
+    )
+    return sorted_rows[:limit]
+
+
+def plot_dual_faces_characters(by_character_path: Path, destination_path: Path) -> None:
+    """Top des personnages illustrés par des têtes dual-face."""
+    rows = load_rows(by_character_path)
+    if not rows:
+        return
+    top_rows = select_top_characters(rows)
+    y = np.arange(len(top_rows))
+    duals = [int(row["dual_heads"]) for row in top_rows]
+    totals = [int(row["total_heads"]) for row in top_rows]
+    shares = [float(row["share_dual"]) for row in top_rows]
+    labels = [row["known_character"] for row in top_rows]
+
+    fig, ax = plt.subplots(figsize=(11, 8))
+    ax.barh(y, totals, color="#cccccc", alpha=0.4, label="Têtes totales")
+    ax.barh(y, duals, color="#e15759", alpha=0.9, label="Têtes dual-face")
+    for pos, share in zip(y, shares):
+        ax.text(
+            totals[pos] + 0.1,
+            pos,
+            f"{share*100:.1f}%",
+            va="center",
+            ha="left",
+            fontsize=9,
+            color="#333333",
+        )
+    ax.set_yticks(y)
+    ax.set_yticklabels(labels)
+    ax.invert_yaxis()
+    ax.set_xlabel("Nombre de têtes")
+    ax.set_title("Personnages dotés de têtes dual-face")
+    ax.grid(axis="x", linestyle="--", alpha=0.3)
+    ax.legend(loc="lower right", frameon=False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=170)
+    plt.close(fig)
--- a/lib/rebrickable/minifig_head_faces.py
+++ b/lib/rebrickable/minifig_head_faces.py
@@ -0,0 +1,194 @@
+"""Détection des têtes de minifigs à plusieurs visages et agrégats associés."""
+
+import csv
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+DUAL_FACE_KEYWORDS = [
+    "dual sided",
+    "dual-sided",
+    "double sided",
+    "double-sided",
+    "2 sided",
+    "2-sided",
+    "two sided",
+    "two-sided",
+    "dual print",
+    "dual face",
+    "double face",
+    "two faces",
+    "alt face",
+    "alternate face",
+]
+
+
+def load_parts_catalog(path: Path) -> Dict[str, dict]:
+    """Indexe les pièces par référence."""
+    catalog: Dict[str, dict] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            catalog[row["part_num"]] = row
+    return catalog
+
+
+def load_sets(path: Path) -> Dict[str, dict]:
+    """Indexe les sets enrichis par set_num."""
+    sets: Dict[str, dict] = {}
+    for row in read_rows(path):
+        sets[row["set_num"]] = row
+    return sets
+
+
+def detect_dual_face(name: str) -> str:
+    """Détecte une tête dual-face via des mots-clés."""
+    lowered = name.lower()
+    for keyword in DUAL_FACE_KEYWORDS:
+        if keyword in lowered:
+            return "true"
+    return "false"
+
+
+def build_head_faces(
+    minifigs_by_set_path: Path,
+    parts_catalog_path: Path,
+    sets_enriched_path: Path,
+) -> List[dict]:
+    """Construit la liste des têtes annotées selon la présence de visages multiples."""
+    heads = read_rows(minifigs_by_set_path)
+    catalog = load_parts_catalog(parts_catalog_path)
+    sets_lookup = load_sets(sets_enriched_path)
+    annotated: List[dict] = []
+    for row in heads:
+        part = catalog[row["part_num"]]
+        set_row = sets_lookup[row["set_num"]]
+        is_dual = detect_dual_face(part["name"])
+        annotated.append(
+            {
+                "set_num": row["set_num"],
+                "set_id": set_row["set_id"],
+                "year": set_row["year"],
+                "name": set_row["name"],
+                "in_collection": set_row["in_collection"],
+                "part_num": row["part_num"],
+                "part_name": part["name"],
+                "fig_num": row["fig_num"],
+                "known_character": row["known_character"],
+                "gender": row["gender"],
+                "is_dual_face": is_dual,
+            }
+        )
+    annotated.sort(key=lambda row: (row["set_num"], row["part_num"]))
+    return annotated
+
+
+def aggregate_by_year(rows: Iterable[dict]) -> List[dict]:
+    """Agrège les têtes dual-face par année."""
+    counts: Dict[str, dict] = {}
+    for row in rows:
+        year_entry = counts.get(row["year"])
+        if year_entry is None:
+            year_entry = {
+                "year": row["year"],
+                "total_heads": 0,
+                "dual_heads": 0,
+            }
+            counts[row["year"]] = year_entry
+        year_entry["total_heads"] += 1
+        if row["is_dual_face"] == "true":
+            year_entry["dual_heads"] += 1
+    aggregated: List[dict] = []
+    for year, entry in counts.items():
+        aggregated.append(
+            {
+                "year": year,
+                "total_heads": str(entry["total_heads"]),
+                "dual_heads": str(entry["dual_heads"]),
+                "share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}",
+            }
+        )
+    aggregated.sort(key=lambda row: int(row["year"]))
+    return aggregated
+
+
+def aggregate_by_set(rows: Iterable[dict]) -> List[dict]:
+    """Agrège les têtes dual-face par set."""
+    counts: Dict[str, dict] = {}
+    for row in rows:
+        entry = counts.get(row["set_num"])
+        if entry is None:
+            entry = {
+                "set_num": row["set_num"],
+                "set_id": row["set_id"],
+                "name": row["name"],
+                "year": row["year"],
+                "in_collection": row["in_collection"],
+                "total_heads": 0,
+                "dual_heads": 0,
+            }
+            counts[row["set_num"]] = entry
+        entry["total_heads"] += 1
+        if row["is_dual_face"] == "true":
+            entry["dual_heads"] += 1
+    aggregated: List[dict] = []
+    for entry in counts.values():
+        aggregated.append(
+            {
+                "set_num": entry["set_num"],
+                "set_id": entry["set_id"],
+                "name": entry["name"],
+                "year": entry["year"],
+                "in_collection": entry["in_collection"],
+                "total_heads": str(entry["total_heads"]),
+                "dual_heads": str(entry["dual_heads"]),
+                "share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}",
+            }
+        )
+    aggregated.sort(key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["set_num"]))
+    return aggregated
+
+
+def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
+    """Agrège les têtes dual-face par personnage connu."""
+    counts: Dict[str, dict] = {}
+    for row in rows:
+        character = row["known_character"] or "Inconnu"
+        entry = counts.get(character)
+        if entry is None:
+            entry = {
+                "known_character": character,
+                "gender": row["gender"],
+                "total_heads": 0,
+                "dual_heads": 0,
+            }
+            counts[character] = entry
+        entry["total_heads"] += 1
+        if row["is_dual_face"] == "true":
+            entry["dual_heads"] += 1
+    aggregated: List[dict] = []
+    for character, entry in counts.items():
+        aggregated.append(
+            {
+                "known_character": character,
+                "gender": entry["gender"],
+                "total_heads": str(entry["total_heads"]),
+                "dual_heads": str(entry["dual_heads"]),
+                "share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}",
+            }
+        )
+    aggregated.sort(key=lambda row: (-int(row["dual_heads"]), row["known_character"]))
+    return aggregated
+
+
+def write_csv(destination_path: Path, rows: Sequence[dict], fieldnames: Sequence[str]) -> None:
+    """Écrit un CSV générique."""
+    ensure_parent_dir(destination_path)
+    with destination_path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)