"""Agrégation des minifigs par personnage représenté.""" from collections import defaultdict from pathlib import Path from typing import Dict, Iterable, List, Sequence, Set from lib.rebrickable.stats import read_rows from lib.filesystem import ensure_parent_dir import csv def load_minifigs_by_set(path: Path) -> List[dict]: """Charge le CSV minifigs_by_set.""" return read_rows(path) def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par personnage (fig_num unique).""" fig_nums_by_character: Dict[str, set] = defaultdict(set) for row in rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue fig_nums_by_character[character].add(fig_num) aggregates: List[dict] = [] for character, fig_nums in fig_nums_by_character.items(): aggregates.append({"known_character": character, "minifig_count": len(fig_nums)}) aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"])) return aggregates def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def load_sets_enriched(path: Path) -> Dict[str, str]: """Indexe les années par set_num.""" lookup: Dict[str, str] = {} with path.open() as sets_file: reader = csv.DictReader(sets_file) for row in reader: lookup[row["set_num"]] = row["year"] return lookup def aggregate_presence_by_year( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Compte le nombre total de minifigs par personnage et par année (hors figurants).""" excluded = set(excluded_characters or []) counts: Dict[tuple[str, int], int] = defaultdict(int) years_all = {int(year) for year in sets_years.values()} characters_all: Set[str] = set() for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue year = sets_years.get(row["set_num"]) if year is None: continue year_int = int(year) counts[(character, year_int)] += 1 characters_all.add(character) years = sorted(years_all) characters = sorted(characters_all) results: List[dict] = [] for character in characters: for year in years: count = counts.get((character, year), 0) results.append( { "known_character": character, "year": str(year), "minifig_count": str(count), } ) return results def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None: """Écrit la matrice présence binaire année/personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "year", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def aggregate_character_spans( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Calcule la période d'apparition de chaque personnage (bornes min/max des années observées).""" excluded = set(excluded_characters or []) spans: Dict[str, Dict[str, int]] = {} total_counts: Dict[str, int] = defaultdict(int) for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue year = sets_years.get(row["set_num"]) if year is None: continue year_int = int(year) total_counts[character] += 1 current = spans.get(character) if current is None: spans[character] = {"start": year_int, "end": year_int} else: spans[character]["start"] = min(current["start"], year_int) spans[character]["end"] = max(current["end"], year_int) results: List[dict] = [] for character, bounds in spans.items(): results.append( { "known_character": character, "start_year": str(bounds["start"]), "end_year": str(bounds["end"]), "total_minifigs": str(total_counts[character]), } ) results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"])) return results def write_character_spans(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des bornes min/max par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "start_year", "end_year", "total_minifigs"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row)