"""Agrégation des minifigs par personnage représenté.""" from collections import defaultdict from pathlib import Path from typing import Dict, Iterable, List, Sequence from lib.rebrickable.stats import read_rows from lib.filesystem import ensure_parent_dir import csv def load_minifigs_by_set(path: Path) -> List[dict]: """Charge le CSV minifigs_by_set.""" return read_rows(path) def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par personnage (fig_num unique).""" fig_nums_by_character: Dict[str, set] = defaultdict(set) for row in rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue fig_nums_by_character[character].add(fig_num) aggregates: List[dict] = [] for character, fig_nums in fig_nums_by_character.items(): aggregates.append({"known_character": character, "minifig_count": len(fig_nums)}) aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"])) return aggregates def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def load_sets_enriched(path: Path) -> Dict[str, str]: """Indexe les années par set_num.""" lookup: Dict[str, str] = {} with path.open() as sets_file: reader = csv.DictReader(sets_file) for row in reader: lookup[row["set_num"]] = row["year"] return lookup def aggregate_presence_by_year( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Construit la présence binaire des personnages par année (hors figurants).""" excluded = set(excluded_characters or []) presence: set[tuple[str, int]] = set() years_all = {int(year) for year in sets_years.values()} for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue year = sets_years.get(row["set_num"]) if year is None: continue presence.add((character, int(year))) years = sorted(years_all) characters = sorted({character for character, _ in presence}) results: List[dict] = [] for character in characters: for year in years: results.append( { "known_character": character, "year": str(year), "present": "1" if (character, year) in presence else "0", } ) return results def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None: """Écrit la matrice présence binaire année/personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "year", "present"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row)