1
etude_lego_jurassic_world/lib/rebrickable/minifig_characters.py

98 lines
3.4 KiB
Python

"""Agrégation des minifigs par personnage représenté."""
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Sequence
from lib.rebrickable.stats import read_rows
from lib.filesystem import ensure_parent_dir
import csv
def load_minifigs_by_set(path: Path) -> List[dict]:
"""Charge le CSV minifigs_by_set."""
return read_rows(path)
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par personnage (fig_num unique)."""
fig_nums_by_character: Dict[str, set] = defaultdict(set)
for row in rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
fig_nums_by_character[character].add(fig_num)
aggregates: List[dict] = []
for character, fig_nums in fig_nums_by_character.items():
aggregates.append({"known_character": character, "minifig_count": len(fig_nums)})
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
return aggregates
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def load_sets_enriched(path: Path) -> Dict[str, str]:
"""Indexe les années par set_num."""
lookup: Dict[str, str] = {}
with path.open() as sets_file:
reader = csv.DictReader(sets_file)
for row in reader:
lookup[row["set_num"]] = row["year"]
return lookup
def aggregate_presence_by_year(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
) -> List[dict]:
"""Construit la présence binaire des personnages par année (hors figurants)."""
excluded = set(excluded_characters or [])
presence: set[tuple[str, int]] = set()
years_all = {int(year) for year in sets_years.values()}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year = sets_years.get(row["set_num"])
if year is None:
continue
presence.add((character, int(year)))
years = sorted(years_all)
characters = sorted({character for character, _ in presence})
results: List[dict] = []
for character in characters:
for year in years:
results.append(
{
"known_character": character,
"year": str(year),
"present": "1" if (character, year) in presence else "0",
}
)
return results
def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None:
"""Écrit la matrice présence binaire année/personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "year", "present"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)