1

300 lines
9.7 KiB
Python

"""Extraction des têtes de minifigs présentes dans chaque set filtré."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set, Tuple
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.minifig_heads import HEAD_CATEGORIES
from lib.rebrickable.parts_inventory import (
index_inventory_minifigs_by_inventory,
index_inventory_parts_by_inventory,
normalize_boolean,
select_latest_inventories,
)
from lib.rebrickable.stats import read_rows
KNOWN_CHARACTERS = [
"Owen Grady",
"Claire Dearing",
"Alan Grant",
"Ellie Sattler",
"Ian Malcolm",
"John Hammond",
"Dennis Nedry",
"Ray Arnold",
"Robert Muldoon",
"Lex Murphy",
"Tim Murphy",
"Donald Gennaro",
"Dr Wu",
"Henry Wu",
"Vic Hoskins",
"Simon Masrani",
"Zia Rodriguez",
"Franklin Webb",
"Rainn DeLaCourt",
"Gunnar Eversol",
"Soyona Santos",
"Kayla Watts",
"Maisie Lockwood",
"Zach Mitchell",
"Gray Mitchell",
"Zach",
"Gray",
"Kenji",
"Darius",
"Yaz",
"Sammy",
"Brooklynn",
"Sinjin Prescott",
"Danny Nedermeyer",
"ACU Trooper",
"Hudson Harper",
"Isabella Delgado",
"Reuben Delgado",
"Allison Miles",
"Henry Loomis",
"Ben",
"Barry"
]
def load_parts_filtered(path: Path) -> List[dict]:
"""Charge parts_filtered.csv en mémoire."""
return read_rows(path)
def load_parts_catalog(path: Path) -> Dict[str, dict]:
"""Construit un index des pièces avec leur catégorie et leur nom."""
catalog: Dict[str, dict] = {}
with path.open() as catalog_file:
reader = csv.DictReader(catalog_file)
for row in reader:
catalog[row["part_num"]] = row
return catalog
def select_head_parts(catalog: Dict[str, dict]) -> Set[str]:
"""Sélectionne les références de têtes via leur catégorie."""
return {part_num for part_num, row in catalog.items() if row["part_cat_id"] in HEAD_CATEGORIES}
def load_minifig_catalog(path: Path) -> Dict[str, dict]:
"""Construit un index des minifigs par identifiant."""
catalog: Dict[str, dict] = {}
with path.open() as minifigs_file:
reader = csv.DictReader(minifigs_file)
for row in reader:
catalog[row["fig_num"]] = row
return catalog
def extract_character_name(part_name: str) -> str:
"""Extrait un nom probable de personnage depuis le nom de pièce."""
cleaned = part_name
prefix = "Minifig Head"
if cleaned.startswith(prefix):
cleaned = cleaned[len(prefix) :]
comma_index = cleaned.find(",")
if comma_index != -1:
cleaned = cleaned[:comma_index]
slash_index = cleaned.find("/")
if slash_index != -1:
cleaned = cleaned[:slash_index]
stripped = cleaned.strip()
if stripped == "":
return "Inconnu"
return stripped
def select_known_character(extracted_name: str) -> str:
"""Associe un personnage connu si le nom extrait correspond à la liste des jalons."""
lowered = extracted_name.lower()
for character in KNOWN_CHARACTERS:
if character.lower() == lowered:
return character
return ""
def load_aliases(path: Path) -> Dict[str, str]:
"""Charge les correspondances alias -> nom canonique."""
aliases: Dict[str, str] = {}
with path.open() as alias_file:
reader = csv.DictReader(alias_file)
for row in reader:
aliases[row["alias"].lower()] = row["canonical"]
return aliases
def load_gender_overrides(path: Path) -> Dict[str, str]:
"""Charge les correspondances personnage -> genre."""
overrides: Dict[str, str] = {}
with path.open() as gender_file:
reader = csv.DictReader(gender_file)
for row in reader:
overrides[row["known_character"].lower()] = row["gender"]
return overrides
def normalize_known_character(raw_known: str, extracted_name: str, aliases: Dict[str, str]) -> str:
"""Nettoie et mappe un nom vers une version canonique."""
base = raw_known or extracted_name
if base == "Inconnu":
base = ""
base = base.strip()
if base == "":
return ""
if "," in base:
base = base.split(",", 1)[0]
if "/" in base:
base = base.split("/", 1)[0]
cleaned = base.strip()
lowered_cleaned = cleaned.lower()
for alias, canonical in aliases.items():
if lowered_cleaned == alias or lowered_cleaned.startswith(alias):
return canonical
for character in KNOWN_CHARACTERS:
lowered = character.lower()
if lowered_cleaned == lowered:
return character
if lowered_cleaned.startswith(f"{lowered} "):
return character
if lowered_cleaned.startswith(f"{lowered}'"):
return character
return cleaned
def build_set_minifigs_lookup(
inventories: Dict[str, dict],
inventory_minifigs_path: Path,
) -> Dict[str, List[str]]:
"""Associe les sets à leurs minifigs via l'inventaire."""
minifigs_by_inventory = index_inventory_minifigs_by_inventory(inventory_minifigs_path)
lookup: Dict[str, List[str]] = {}
for set_num, inventory in inventories.items():
lookup[set_num] = [row["fig_num"] for row in minifigs_by_inventory.get(inventory["id"], [])]
return lookup
def build_minifig_heads_lookup(
minifig_catalog: Dict[str, dict],
inventories: Dict[str, dict],
inventory_parts_path: Path,
head_parts: Set[str],
) -> Dict[str, Set[str]]:
"""Indexe les têtes présentes dans chaque minifig (hors rechanges)."""
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
heads_by_minifig: Dict[str, Set[str]] = {}
for fig_num in minifig_catalog:
inventory = inventories.get(fig_num)
if inventory is None:
continue
heads: Set[str] = set()
for part_row in parts_by_inventory.get(inventory["id"], []):
if part_row["part_num"] not in head_parts:
continue
if normalize_boolean(part_row["is_spare"]) == "true":
continue
heads.add(part_row["part_num"])
if heads:
heads_by_minifig[fig_num] = heads
return heads_by_minifig
def aggregate_heads_by_set(
parts_rows: Iterable[dict],
catalog: Dict[str, dict],
head_parts: Set[str],
set_minifigs: Dict[str, List[str]],
minifig_heads: Dict[str, Set[str]],
minifig_catalog: Dict[str, dict],
aliases: Dict[str, str],
gender_overrides: Dict[str, str],
) -> List[dict]:
"""Agrège les têtes de minifigs par set en éliminant les rechanges et doublons."""
seen: Set[Tuple[str, str]] = set()
heads: List[dict] = []
for row in parts_rows:
if row["part_num"] not in head_parts:
continue
if row["is_spare"] == "true":
continue
key = (row["set_num"], row["part_num"])
if key in seen:
continue
part = catalog[row["part_num"]]
extracted = extract_character_name(part["name"])
possible_figs = [
fig_num for fig_num in set_minifigs.get(row["set_num"], []) if row["part_num"] in minifig_heads.get(fig_num, set())
]
known_character = ""
matched_fig = ""
if len(possible_figs) == 1:
matched_fig = possible_figs[0]
known_character = minifig_catalog.get(matched_fig, {}).get("name", "")
if known_character == "":
known_character = select_known_character(extracted)
normalized = normalize_known_character(known_character, extracted, aliases)
if matched_fig == "":
continue
gender = gender_overrides.get(normalized.lower(), "")
heads.append(
{
"set_num": row["set_num"],
"part_num": row["part_num"],
"known_character": normalized,
"fig_num": matched_fig,
"gender": gender,
}
)
seen.add(key)
heads.sort(key=lambda row: (row["set_num"], row["part_num"]))
return heads
def write_heads_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV intermédiaire listant les têtes de minifigs par set."""
ensure_parent_dir(destination_path)
fieldnames = ["set_num", "part_num", "known_character", "fig_num", "gender"]
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def build_minifigs_by_set(
parts_filtered_path: Path,
parts_catalog_path: Path,
inventories_path: Path,
inventory_parts_path: Path,
inventory_minifigs_path: Path,
minifigs_path: Path,
aliases_path: Path,
gender_overrides_path: Path,
destination_path: Path,
) -> None:
"""Construit le CSV listant les têtes de minifigs présentes par set."""
parts_rows = load_parts_filtered(parts_filtered_path)
parts_catalog = load_parts_catalog(parts_catalog_path)
head_parts = select_head_parts(parts_catalog)
latest_inventories = select_latest_inventories(inventories_path)
minifig_catalog = load_minifig_catalog(minifigs_path)
minifig_heads = build_minifig_heads_lookup(minifig_catalog, latest_inventories, inventory_parts_path, head_parts)
set_minifigs = build_set_minifigs_lookup(latest_inventories, inventory_minifigs_path)
aliases = load_aliases(aliases_path)
gender_overrides = load_gender_overrides(gender_overrides_path)
heads = aggregate_heads_by_set(
parts_rows,
parts_catalog,
head_parts,
set_minifigs,
minifig_heads,
minifig_catalog,
aliases,
gender_overrides,
)
write_heads_by_set(destination_path, heads)