"""Agrégation des minifigs par personnage représenté.""" from collections import defaultdict from pathlib import Path from typing import Dict, Iterable, List, Sequence from lib.rebrickable.stats import read_rows from lib.filesystem import ensure_parent_dir import csv def load_minifigs_by_set(path: Path) -> List[dict]: """Charge le CSV minifigs_by_set.""" return read_rows(path) def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par personnage (fig_num unique).""" fig_nums_by_character: Dict[str, set] = defaultdict(set) for row in rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue fig_nums_by_character[character].add(fig_num) aggregates: List[dict] = [] for character, fig_nums in fig_nums_by_character.items(): aggregates.append({"known_character": character, "minifig_count": len(fig_nums)}) aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"])) return aggregates def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def load_sets_enriched(path: Path) -> Dict[str, str]: """Indexe les années par set_num.""" lookup: Dict[str, str] = {} with path.open() as sets_file: reader = csv.DictReader(sets_file) for row in reader: lookup[row["set_num"]] = row["year"] return lookup def aggregate_presence_by_year( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Compte le nombre total de minifigs par personnage et par année (hors figurants).""" excluded = set(excluded_characters or []) counts: Dict[tuple[str, int], int] = defaultdict(int) years_all = {int(year) for year in sets_years.values()} characters_all: Set[str] = set() for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue year = sets_years.get(row["set_num"]) if year is None: continue year_int = int(year) counts[(character, year_int)] += 1 characters_all.add(character) years = sorted(years_all) characters = sorted(characters_all) results: List[dict] = [] for character in characters: for year in years: count = counts.get((character, year), 0) results.append( { "known_character": character, "year": str(year), "minifig_count": str(count), } ) return results def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None: """Écrit la matrice présence binaire année/personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "year", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row)