1

Analyse des têtes dual-face

This commit is contained in:
Richard Dern 2025-12-02 17:15:43 +01:00
parent 9d1f2c3089
commit 41798ea3df
7 changed files with 627 additions and 0 deletions

View File

@ -312,3 +312,18 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv`, `dat
- `data/intermediate/part_categories_global.csv` : parts globales par catégorie.
Les visuels `figures/step29/top_part_categories_area.png`, `figures/step29/part_categories_heatmap.png` et `figures/step29/structural_share_timeline.png` montrent respectivement lévolution des principales catégories (aire empilée), une heatmap exhaustive catégorie × année, et la trajectoire de la part des pièces structurelles.
### Étape 30 : visages multiples sur les têtes de minifigs
1. `source .venv/bin/activate`
2. `python -m scripts.compute_minifig_head_faces`
3. `python -m scripts.plot_minifig_head_faces`
Le calcul lit `data/intermediate/minifigs_by_set.csv`, `data/raw/parts.csv` et `data/intermediate/sets_enriched.csv` pour annoter les têtes de minifigs contenant plusieurs visages (détection par mots-clés dans le nom de pièce). Il produit :
- `data/intermediate/minifig_head_faces.csv` : têtes annotées (set, personnage, genre, indicateur dual-face).
- `data/intermediate/minifig_head_faces_by_year.csv` : volume et part des dual-face par année.
- `data/intermediate/minifig_head_faces_by_set.csv` : volume et part par set (possession incluse).
- `data/intermediate/minifig_head_faces_by_character.csv` : volume et part par personnage.
Les visuels `figures/step30/minifig_head_faces_timeline.png`, `figures/step30/minifig_head_faces_top_sets.png` et `figures/step30/minifig_head_faces_characters.png` montrent respectivement la trajectoire annuelle, le top des sets concernés et les personnages dotés de têtes dual-face.

View File

@ -0,0 +1,133 @@
"""Visualisations des têtes dual-face."""
from pathlib import Path
from typing import Iterable, List, Tuple
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_rows(path: Path) -> List[dict]:
"""Charge un CSV en mémoire."""
return read_rows(path)
def plot_dual_faces_timeline(by_year_path: Path, destination_path: Path) -> None:
"""Trace la part annuelle des têtes dual-face."""
rows = load_rows(by_year_path)
if not rows:
return
years = [row["year"] for row in rows]
totals = [int(row["total_heads"]) for row in rows]
duals = [int(row["dual_heads"]) for row in rows]
shares = [float(row["share_dual"]) for row in rows]
x = np.arange(len(years))
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x, totals, color="#dddddd", alpha=0.4, label="Têtes totales")
ax.plot(x, duals, color="#1f77b4", linewidth=2.0, label="Têtes dual-face (volume)")
ax.plot(x, [s * max(totals) for s in shares], color="#d62728", linestyle="--", linewidth=1.6, label="Part dual-face (échelle volume)")
ax.set_xticks(x)
ax.set_xticklabels(years, rotation=45, ha="right")
ax.set_ylabel("Volume de têtes")
ax.set_title("Têtes de minifigs : volume et part des dual-face par année")
ax.grid(True, linestyle="--", alpha=0.3)
ax.legend(loc="upper left", frameon=False)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=170)
plt.close(fig)
def select_top_sets(rows: Iterable[dict], limit: int = 15) -> List[dict]:
"""Sélectionne les sets avec le plus de têtes dual-face."""
sorted_rows = sorted(
rows,
key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["set_num"]),
)
return sorted_rows[:limit]
def plot_dual_faces_top_sets(by_set_path: Path, destination_path: Path) -> None:
"""Top des sets contenant des têtes dual-face."""
rows = load_rows(by_set_path)
if not rows:
return
top_rows = select_top_sets(rows)
y = np.arange(len(top_rows))
duals = [int(row["dual_heads"]) for row in top_rows]
labels = [f"{row['set_num']} · {row['name']} ({row['year']})" for row in top_rows]
owned_mask = [row["in_collection"] == "true" for row in top_rows]
fig, ax = plt.subplots(figsize=(11, 8))
for pos, value, owned in zip(y, duals, owned_mask):
alpha = 0.9 if owned else 0.45
ax.barh(pos, value, color="#9467bd", alpha=alpha)
ax.set_yticks(y)
ax.set_yticklabels(labels)
ax.invert_yaxis()
ax.set_xlabel("Nombre de têtes dual-face")
ax.set_title("Top des sets avec têtes dual-face")
ax.grid(axis="x", linestyle="--", alpha=0.3)
legend = [
Patch(facecolor="#9467bd", edgecolor="none", alpha=0.9, label="Set possédé"),
Patch(facecolor="#9467bd", edgecolor="none", alpha=0.45, label="Set manquant"),
]
ax.legend(handles=legend, loc="lower right", frameon=False)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=170)
plt.close(fig)
def select_top_characters(rows: Iterable[dict], limit: int = 12) -> List[dict]:
"""Sélectionne les personnages avec le plus de têtes dual-face."""
sorted_rows = sorted(
rows,
key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["known_character"]),
)
return sorted_rows[:limit]
def plot_dual_faces_characters(by_character_path: Path, destination_path: Path) -> None:
"""Top des personnages illustrés par des têtes dual-face."""
rows = load_rows(by_character_path)
if not rows:
return
top_rows = select_top_characters(rows)
y = np.arange(len(top_rows))
duals = [int(row["dual_heads"]) for row in top_rows]
totals = [int(row["total_heads"]) for row in top_rows]
shares = [float(row["share_dual"]) for row in top_rows]
labels = [row["known_character"] for row in top_rows]
fig, ax = plt.subplots(figsize=(11, 8))
ax.barh(y, totals, color="#cccccc", alpha=0.4, label="Têtes totales")
ax.barh(y, duals, color="#e15759", alpha=0.9, label="Têtes dual-face")
for pos, share in zip(y, shares):
ax.text(
totals[pos] + 0.1,
pos,
f"{share*100:.1f}%",
va="center",
ha="left",
fontsize=9,
color="#333333",
)
ax.set_yticks(y)
ax.set_yticklabels(labels)
ax.invert_yaxis()
ax.set_xlabel("Nombre de têtes")
ax.set_title("Personnages dotés de têtes dual-face")
ax.grid(axis="x", linestyle="--", alpha=0.3)
ax.legend(loc="lower right", frameon=False)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=170)
plt.close(fig)

View File

@ -0,0 +1,194 @@
"""Détection des têtes de minifigs à plusieurs visages et agrégats associés."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Sequence
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
DUAL_FACE_KEYWORDS = [
"dual sided",
"dual-sided",
"double sided",
"double-sided",
"2 sided",
"2-sided",
"two sided",
"two-sided",
"dual print",
"dual face",
"double face",
"two faces",
"alt face",
"alternate face",
]
def load_parts_catalog(path: Path) -> Dict[str, dict]:
"""Indexe les pièces par référence."""
catalog: Dict[str, dict] = {}
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
catalog[row["part_num"]] = row
return catalog
def load_sets(path: Path) -> Dict[str, dict]:
"""Indexe les sets enrichis par set_num."""
sets: Dict[str, dict] = {}
for row in read_rows(path):
sets[row["set_num"]] = row
return sets
def detect_dual_face(name: str) -> str:
"""Détecte une tête dual-face via des mots-clés."""
lowered = name.lower()
for keyword in DUAL_FACE_KEYWORDS:
if keyword in lowered:
return "true"
return "false"
def build_head_faces(
minifigs_by_set_path: Path,
parts_catalog_path: Path,
sets_enriched_path: Path,
) -> List[dict]:
"""Construit la liste des têtes annotées selon la présence de visages multiples."""
heads = read_rows(minifigs_by_set_path)
catalog = load_parts_catalog(parts_catalog_path)
sets_lookup = load_sets(sets_enriched_path)
annotated: List[dict] = []
for row in heads:
part = catalog[row["part_num"]]
set_row = sets_lookup[row["set_num"]]
is_dual = detect_dual_face(part["name"])
annotated.append(
{
"set_num": row["set_num"],
"set_id": set_row["set_id"],
"year": set_row["year"],
"name": set_row["name"],
"in_collection": set_row["in_collection"],
"part_num": row["part_num"],
"part_name": part["name"],
"fig_num": row["fig_num"],
"known_character": row["known_character"],
"gender": row["gender"],
"is_dual_face": is_dual,
}
)
annotated.sort(key=lambda row: (row["set_num"], row["part_num"]))
return annotated
def aggregate_by_year(rows: Iterable[dict]) -> List[dict]:
"""Agrège les têtes dual-face par année."""
counts: Dict[str, dict] = {}
for row in rows:
year_entry = counts.get(row["year"])
if year_entry is None:
year_entry = {
"year": row["year"],
"total_heads": 0,
"dual_heads": 0,
}
counts[row["year"]] = year_entry
year_entry["total_heads"] += 1
if row["is_dual_face"] == "true":
year_entry["dual_heads"] += 1
aggregated: List[dict] = []
for year, entry in counts.items():
aggregated.append(
{
"year": year,
"total_heads": str(entry["total_heads"]),
"dual_heads": str(entry["dual_heads"]),
"share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}",
}
)
aggregated.sort(key=lambda row: int(row["year"]))
return aggregated
def aggregate_by_set(rows: Iterable[dict]) -> List[dict]:
"""Agrège les têtes dual-face par set."""
counts: Dict[str, dict] = {}
for row in rows:
entry = counts.get(row["set_num"])
if entry is None:
entry = {
"set_num": row["set_num"],
"set_id": row["set_id"],
"name": row["name"],
"year": row["year"],
"in_collection": row["in_collection"],
"total_heads": 0,
"dual_heads": 0,
}
counts[row["set_num"]] = entry
entry["total_heads"] += 1
if row["is_dual_face"] == "true":
entry["dual_heads"] += 1
aggregated: List[dict] = []
for entry in counts.values():
aggregated.append(
{
"set_num": entry["set_num"],
"set_id": entry["set_id"],
"name": entry["name"],
"year": entry["year"],
"in_collection": entry["in_collection"],
"total_heads": str(entry["total_heads"]),
"dual_heads": str(entry["dual_heads"]),
"share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}",
}
)
aggregated.sort(key=lambda row: (-int(row["dual_heads"]), -float(row["share_dual"]), row["set_num"]))
return aggregated
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
"""Agrège les têtes dual-face par personnage connu."""
counts: Dict[str, dict] = {}
for row in rows:
character = row["known_character"] or "Inconnu"
entry = counts.get(character)
if entry is None:
entry = {
"known_character": character,
"gender": row["gender"],
"total_heads": 0,
"dual_heads": 0,
}
counts[character] = entry
entry["total_heads"] += 1
if row["is_dual_face"] == "true":
entry["dual_heads"] += 1
aggregated: List[dict] = []
for character, entry in counts.items():
aggregated.append(
{
"known_character": character,
"gender": entry["gender"],
"total_heads": str(entry["total_heads"]),
"dual_heads": str(entry["dual_heads"]),
"share_dual": f"{entry['dual_heads'] / entry['total_heads']:.4f}",
}
)
aggregated.sort(key=lambda row: (-int(row["dual_heads"]), row["known_character"]))
return aggregated
def write_csv(destination_path: Path, rows: Sequence[dict], fieldnames: Sequence[str]) -> None:
"""Écrit un CSV générique."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,64 @@
"""Calcule les têtes dual-face des minifigs et leurs agrégats."""
from pathlib import Path
from lib.rebrickable.minifig_head_faces import (
aggregate_by_character,
aggregate_by_set,
aggregate_by_year,
build_head_faces,
write_csv,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
HEAD_FACES_PATH = Path("data/intermediate/minifig_head_faces.csv")
HEAD_FACES_BY_YEAR_PATH = Path("data/intermediate/minifig_head_faces_by_year.csv")
HEAD_FACES_BY_SET_PATH = Path("data/intermediate/minifig_head_faces_by_set.csv")
HEAD_FACES_BY_CHARACTER_PATH = Path("data/intermediate/minifig_head_faces_by_character.csv")
def main() -> None:
"""Construit les listes et agrégats des têtes à visages multiples."""
heads = build_head_faces(MINIFIGS_BY_SET_PATH, PARTS_CATALOG_PATH, SETS_ENRICHED_PATH)
by_year = aggregate_by_year(heads)
by_set = aggregate_by_set(heads)
by_character = aggregate_by_character(heads)
write_csv(
HEAD_FACES_PATH,
heads,
[
"set_num",
"set_id",
"year",
"name",
"in_collection",
"part_num",
"part_name",
"fig_num",
"known_character",
"gender",
"is_dual_face",
],
)
write_csv(
HEAD_FACES_BY_YEAR_PATH,
by_year,
["year", "total_heads", "dual_heads", "share_dual"],
)
write_csv(
HEAD_FACES_BY_SET_PATH,
by_set,
["set_num", "set_id", "name", "year", "in_collection", "total_heads", "dual_heads", "share_dual"],
)
write_csv(
HEAD_FACES_BY_CHARACTER_PATH,
by_character,
["known_character", "gender", "total_heads", "dual_heads", "share_dual"],
)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,28 @@
"""Trace les graphiques liés aux têtes dual-face."""
from pathlib import Path
from lib.plots.minifig_head_faces import (
plot_dual_faces_characters,
plot_dual_faces_timeline,
plot_dual_faces_top_sets,
)
BY_YEAR_PATH = Path("data/intermediate/minifig_head_faces_by_year.csv")
BY_SET_PATH = Path("data/intermediate/minifig_head_faces_by_set.csv")
BY_CHARACTER_PATH = Path("data/intermediate/minifig_head_faces_by_character.csv")
TIMELINE_DESTINATION = Path("figures/step30/minifig_head_faces_timeline.png")
TOP_SETS_DESTINATION = Path("figures/step30/minifig_head_faces_top_sets.png")
CHARACTERS_DESTINATION = Path("figures/step30/minifig_head_faces_characters.png")
def main() -> None:
"""Génère les visuels dual-face."""
plot_dual_faces_timeline(BY_YEAR_PATH, TIMELINE_DESTINATION)
plot_dual_faces_top_sets(BY_SET_PATH, TOP_SETS_DESTINATION)
plot_dual_faces_characters(BY_CHARACTER_PATH, CHARACTERS_DESTINATION)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,148 @@
"""Tests des têtes dual-face."""
import csv
from pathlib import Path
from lib.rebrickable.minifig_head_faces import (
aggregate_by_character,
aggregate_by_set,
aggregate_by_year,
build_head_faces,
detect_dual_face,
)
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
"""Écrit un CSV simple pour les besoins de tests."""
with path.open("w", newline="") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(headers)
writer.writerows(rows)
def test_detect_dual_face_matches_keywords() -> None:
"""Reconnaît les mentions dual-face dans le nom."""
assert detect_dual_face("Minifig Head Dual Sided Smile") == "true"
assert detect_dual_face("Minifig Head w/ Alt Face") == "true"
assert detect_dual_face("Minifig Head Angry") == "false"
def test_build_head_faces_and_aggregates(tmp_path: Path) -> None:
"""Construit l'annotation dual-face puis agrège par set/année/personnage."""
minifigs_by_set = tmp_path / "minifigs_by_set.csv"
write_csv(
minifigs_by_set,
["set_num", "part_num", "known_character", "fig_num", "gender"],
[
["1000-1", "p1", "Alice", "fig-1", "female"],
["1000-1", "p2", "Bob", "fig-2", "male"],
["2000-1", "p1", "Alice", "fig-1", "female"],
],
)
parts_catalog = tmp_path / "parts.csv"
write_csv(
parts_catalog,
["part_num", "name", "part_cat_id"],
[
["p1", "Minifig Head Dual Sided Grin", "59"],
["p2", "Minifig Head Serious", "59"],
],
)
sets_enriched = tmp_path / "sets_enriched.csv"
write_csv(
sets_enriched,
["set_num", "set_id", "name", "year", "in_collection"],
[
["1000-1", "1000", "Set A", "2020", "true"],
["2000-1", "2000", "Set B", "2021", "false"],
],
)
heads = build_head_faces(minifigs_by_set, parts_catalog, sets_enriched)
by_year = aggregate_by_year(heads)
by_set = aggregate_by_set(heads)
by_character = aggregate_by_character(heads)
assert heads == [
{
"set_num": "1000-1",
"set_id": "1000",
"year": "2020",
"name": "Set A",
"in_collection": "true",
"part_num": "p1",
"part_name": "Minifig Head Dual Sided Grin",
"fig_num": "fig-1",
"known_character": "Alice",
"gender": "female",
"is_dual_face": "true",
},
{
"set_num": "1000-1",
"set_id": "1000",
"year": "2020",
"name": "Set A",
"in_collection": "true",
"part_num": "p2",
"part_name": "Minifig Head Serious",
"fig_num": "fig-2",
"known_character": "Bob",
"gender": "male",
"is_dual_face": "false",
},
{
"set_num": "2000-1",
"set_id": "2000",
"year": "2021",
"name": "Set B",
"in_collection": "false",
"part_num": "p1",
"part_name": "Minifig Head Dual Sided Grin",
"fig_num": "fig-1",
"known_character": "Alice",
"gender": "female",
"is_dual_face": "true",
},
]
assert by_year == [
{"year": "2020", "total_heads": "2", "dual_heads": "1", "share_dual": "0.5000"},
{"year": "2021", "total_heads": "1", "dual_heads": "1", "share_dual": "1.0000"},
]
assert by_set == [
{
"set_num": "2000-1",
"set_id": "2000",
"name": "Set B",
"year": "2021",
"in_collection": "false",
"total_heads": "1",
"dual_heads": "1",
"share_dual": "1.0000",
},
{
"set_num": "1000-1",
"set_id": "1000",
"name": "Set A",
"year": "2020",
"in_collection": "true",
"total_heads": "2",
"dual_heads": "1",
"share_dual": "0.5000",
},
]
assert by_character == [
{
"known_character": "Alice",
"gender": "female",
"total_heads": "2",
"dual_heads": "2",
"share_dual": "1.0000",
},
{
"known_character": "Bob",
"gender": "male",
"total_heads": "1",
"dual_heads": "0",
"share_dual": "0.0000",
},
]

View File

@ -0,0 +1,45 @@
"""Tests des graphiques des têtes dual-face."""
import matplotlib
from pathlib import Path
from lib.plots.minifig_head_faces import (
plot_dual_faces_characters,
plot_dual_faces_timeline,
plot_dual_faces_top_sets,
)
matplotlib.use("Agg")
def test_plot_minifig_head_faces_outputs_images(tmp_path: Path) -> None:
"""Génère les visuels dual-face."""
by_year = tmp_path / "minifig_head_faces_by_year.csv"
by_year.write_text("year,total_heads,dual_heads,share_dual\n2020,2,1,0.5\n2021,3,2,0.6667\n")
by_set = tmp_path / "minifig_head_faces_by_set.csv"
by_set.write_text(
"set_num,set_id,name,year,in_collection,total_heads,dual_heads,share_dual\n"
"1000-1,1000,Set A,2020,true,2,1,0.5\n"
"2000-1,2000,Set B,2021,false,3,2,0.6667\n"
)
by_character = tmp_path / "minifig_head_faces_by_character.csv"
by_character.write_text(
"known_character,gender,total_heads,dual_heads,share_dual\n"
"Alice,female,2,2,1.0\n"
"Bob,male,1,0,0.0\n"
)
timeline_dest = tmp_path / "figures" / "step30" / "minifig_head_faces_timeline.png"
top_sets_dest = tmp_path / "figures" / "step30" / "minifig_head_faces_top_sets.png"
characters_dest = tmp_path / "figures" / "step30" / "minifig_head_faces_characters.png"
plot_dual_faces_timeline(by_year, timeline_dest)
plot_dual_faces_top_sets(by_set, top_sets_dest)
plot_dual_faces_characters(by_character, characters_dest)
assert timeline_dest.exists()
assert top_sets_dest.exists()
assert characters_dest.exists()
assert timeline_dest.stat().st_size > 0
assert top_sets_dest.stat().st_size > 0
assert characters_dest.stat().st_size > 0