102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
"""Agrégation des minifigs par personnage représenté."""
|
|
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Sequence
|
|
|
|
from lib.rebrickable.stats import read_rows
|
|
from lib.filesystem import ensure_parent_dir
|
|
import csv
|
|
|
|
|
|
def load_minifigs_by_set(path: Path) -> List[dict]:
|
|
"""Charge le CSV minifigs_by_set."""
|
|
return read_rows(path)
|
|
|
|
|
|
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
|
"""Compte les minifigs distinctes par personnage (fig_num unique)."""
|
|
fig_nums_by_character: Dict[str, set] = defaultdict(set)
|
|
for row in rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
fig_nums_by_character[character].add(fig_num)
|
|
aggregates: List[dict] = []
|
|
for character, fig_nums in fig_nums_by_character.items():
|
|
aggregates.append({"known_character": character, "minifig_count": len(fig_nums)})
|
|
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
|
|
return aggregates
|
|
|
|
|
|
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV des comptes par personnage."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["known_character", "minifig_count"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def load_sets_enriched(path: Path) -> Dict[str, str]:
|
|
"""Indexe les années par set_num."""
|
|
lookup: Dict[str, str] = {}
|
|
with path.open() as sets_file:
|
|
reader = csv.DictReader(sets_file)
|
|
for row in reader:
|
|
lookup[row["set_num"]] = row["year"]
|
|
return lookup
|
|
|
|
|
|
def aggregate_presence_by_year(
|
|
minifigs_rows: Iterable[dict],
|
|
sets_years: Dict[str, str],
|
|
excluded_characters: Sequence[str] | None = None,
|
|
) -> List[dict]:
|
|
"""Compte le nombre total de minifigs par personnage et par année (hors figurants)."""
|
|
excluded = set(excluded_characters or [])
|
|
counts: Dict[tuple[str, int], int] = defaultdict(int)
|
|
years_all = {int(year) for year in sets_years.values()}
|
|
characters_all: Set[str] = set()
|
|
for row in minifigs_rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
year = sets_years.get(row["set_num"])
|
|
if year is None:
|
|
continue
|
|
year_int = int(year)
|
|
counts[(character, year_int)] += 1
|
|
characters_all.add(character)
|
|
years = sorted(years_all)
|
|
characters = sorted(characters_all)
|
|
results: List[dict] = []
|
|
for character in characters:
|
|
for year in years:
|
|
count = counts.get((character, year), 0)
|
|
results.append(
|
|
{
|
|
"known_character": character,
|
|
"year": str(year),
|
|
"minifig_count": str(count),
|
|
}
|
|
)
|
|
return results
|
|
|
|
|
|
def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit la matrice présence binaire année/personnage."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["known_character", "year", "minifig_count"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|