"""Agrégation des minifigs par personnage représenté.""" import csv from collections import defaultdict from pathlib import Path from typing import Dict, Iterable, List, Sequence, Set from lib.filesystem import ensure_parent_dir from lib.rebrickable.stats import read_rows def load_minifigs_by_set(path: Path) -> List[dict]: """Charge le CSV minifigs_by_set.""" return read_rows(path) def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par personnage (fig_num unique) avec genre.""" fig_nums_by_character: Dict[str, set] = defaultdict(set) genders: Dict[str, str] = {} for row in rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() gender = row.get("gender", "").strip() if character == "" or fig_num == "": continue fig_nums_by_character[character].add(fig_num) if character not in genders: genders[character] = gender aggregates: List[dict] = [] for character, fig_nums in fig_nums_by_character.items(): aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)}) aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"])) return aggregates def aggregate_variations_and_totals( rows: Iterable[dict], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Compte les variations uniques et le total de minifigs par personnage.""" excluded = set(excluded_characters or []) variations: Dict[str, set] = defaultdict(set) totals: Dict[str, int] = defaultdict(int) genders: Dict[str, str] = {} for row in rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() gender = row.get("gender", "").strip() if character == "" or fig_num == "": continue if character in excluded: continue variations[character].add(fig_num) totals[character] += 1 if character not in genders: genders[character] = gender aggregates: List[dict] = [] for character, fig_nums in variations.items(): aggregates.append( { "known_character": character, "gender": genders.get(character, ""), "variation_count": len(fig_nums), "total_minifigs": totals.get(character, 0), } ) aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"])) return aggregates def aggregate_new_characters_by_year( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, start_year: int | None = None, end_year: int | None = None, ) -> List[dict]: """Compte le nombre de personnages introduits par année sur une plage donnée.""" excluded = set(excluded_characters or []) first_year: Dict[str, int] = {} for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue year_str = sets_years.get(row["set_num"]) if year_str is None: continue year_int = int(year_str) current = first_year.get(character) if current is None or year_int < current: first_year[character] = year_int counts: Dict[int, int] = {} if start_year is not None and end_year is not None: for year in range(start_year, end_year + 1): counts[year] = 0 for character, year_int in first_year.items(): if start_year is not None and year_int < start_year: continue if end_year is not None and year_int > end_year: continue counts[year_int] = counts.get(year_int, 0) + 1 years = sorted(counts.keys()) results: List[dict] = [] for year in years: results.append({"year": str(year), "new_characters": str(counts[year])}) return results def aggregate_new_character_sets( minifigs_rows: Iterable[dict], sets_lookup: Dict[str, dict], excluded_characters: Sequence[str] | None = None, start_year: int | None = None, end_year: int | None = None, ) -> List[dict]: """Liste les personnages introduits par année avec les sets correspondants.""" excluded = set(excluded_characters or []) first_year: Dict[str, int] = {} for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue set_row = sets_lookup.get(row["set_num"]) if set_row is None: continue year_int = int(set_row["year"]) current = first_year.get(character) if current is None or year_int < current: first_year[character] = year_int rows: List[dict] = [] seen: set[tuple[str, str]] = set() for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue set_row = sets_lookup.get(row["set_num"]) if set_row is None: continue intro_year = first_year.get(character) if intro_year is None: continue if start_year is not None and intro_year < start_year: continue if end_year is not None and intro_year > end_year: continue if int(set_row["year"]) != intro_year: continue key = (character, set_row["set_num"]) if key in seen: continue rows.append( { "year": str(int(set_row["year"])), "known_character": character, "set_num": set_row["set_num"], "set_id": set_row.get("set_id", ""), "set_name": set_row.get("name", ""), "rebrickable_url": set_row.get("rebrickable_url", ""), } ) seen.add(key) rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"])) return rows def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par genre (fig_num unique).""" genders_by_fig: Dict[str, str] = {} counts: Dict[str, int] = defaultdict(int) for row in rows: fig_num = row["fig_num"].strip() gender = row.get("gender", "").strip().lower() normalized = gender if gender in ("male", "female") else "unknown" if fig_num == "": continue if fig_num in genders_by_fig: continue genders_by_fig[fig_num] = normalized counts[normalized] += 1 aggregates: List[dict] = [] ordered = ["female", "male", "unknown"] for gender in ordered: if gender in counts: aggregates.append({"gender": gender, "minifig_count": str(counts[gender])}) return aggregates def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "gender", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des personnages introduits chaque année.""" ensure_parent_dir(path) fieldnames = ["year", "new_characters"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV listant les personnages introduits et leurs sets.""" ensure_parent_dir(path) fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None: """Écrit un Markdown listant les personnages introduits par année et leurs sets.""" ensure_parent_dir(path) grouped: Dict[str, Dict[str, List[dict]]] = {} for row in rows: year_group = grouped.setdefault(row["year"], {}) characters = year_group.setdefault(row["known_character"], []) characters.append(row) with path.open("w") as md_file: for year in sorted(grouped.keys(), key=int): md_file.write(f"##### {year}\n\n") for character in sorted(grouped[year].keys()): md_file.write(f"- {character}\n") for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]): link = entry["rebrickable_url"] or "" set_id = entry["set_id"] name = entry["set_name"] md_file.write(f" - [{set_id}]({link}) - {name}\n") md_file.write("\n") def write_gender_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par genre.""" ensure_parent_dir(path) fieldnames = ["gender", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV comparant variations et total par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def load_sets_enriched(path: Path) -> Dict[str, str]: """Indexe les années par set_num.""" lookup: Dict[str, str] = {} with path.open() as sets_file: reader = csv.DictReader(sets_file) for row in reader: lookup[row["set_num"]] = row["year"] return lookup def aggregate_presence_by_year( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Compte le nombre total de minifigs par personnage et par année (hors figurants).""" excluded = set(excluded_characters or []) counts: Dict[tuple[str, int], int] = defaultdict(int) years_all = {int(year) for year in sets_years.values()} characters_all: Set[str] = set() for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() if character == "" or fig_num == "": continue if character in excluded: continue year = sets_years.get(row["set_num"]) if year is None: continue year_int = int(year) counts[(character, year_int)] += 1 characters_all.add(character) years = sorted(years_all) characters = sorted(characters_all) results: List[dict] = [] for character in characters: for year in years: count = counts.get((character, year), 0) results.append( { "known_character": character, "year": str(year), "minifig_count": str(count), } ) return results def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None: """Écrit la matrice présence binaire année/personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "year", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row) def aggregate_character_spans( minifigs_rows: Iterable[dict], sets_years: Dict[str, str], excluded_characters: Sequence[str] | None = None, ) -> List[dict]: """Calcule la période d'apparition de chaque personnage (bornes min/max des années observées).""" excluded = set(excluded_characters or []) spans: Dict[str, Dict[str, int]] = {} total_counts: Dict[str, int] = defaultdict(int) genders: Dict[str, str] = {} for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() gender = row.get("gender", "").strip() if character == "" or fig_num == "": continue if character in excluded: continue year = sets_years.get(row["set_num"]) if year is None: continue year_int = int(year) total_counts[character] += 1 if character not in genders: genders[character] = gender current = spans.get(character) if current is None: spans[character] = {"start": year_int, "end": year_int} else: spans[character]["start"] = min(current["start"], year_int) spans[character]["end"] = max(current["end"], year_int) results: List[dict] = [] for character, bounds in spans.items(): results.append( { "known_character": character, "start_year": str(bounds["start"]), "end_year": str(bounds["end"]), "total_minifigs": str(total_counts[character]), "gender": genders.get(character, ""), } ) results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"])) return results def write_character_spans(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des bornes min/max par personnage.""" ensure_parent_dir(path) fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row)