1
etude_lego_jurassic_world/lib/rebrickable/minifig_characters.py

389 lines
14 KiB
Python

"""Agrégation des minifigs par personnage représenté."""
import csv
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_minifigs_by_set(path: Path) -> List[dict]:
"""Charge le CSV minifigs_by_set."""
return read_rows(path)
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par personnage (fig_num unique) avec genre."""
fig_nums_by_character: Dict[str, set] = defaultdict(set)
genders: Dict[str, str] = {}
for row in rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
fig_nums_by_character[character].add(fig_num)
if character not in genders:
genders[character] = gender
aggregates: List[dict] = []
for character, fig_nums in fig_nums_by_character.items():
aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)})
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
return aggregates
def aggregate_variations_and_totals(
rows: Iterable[dict],
excluded_characters: Sequence[str] | None = None,
) -> List[dict]:
"""Compte les variations uniques et le total de minifigs par personnage."""
excluded = set(excluded_characters or [])
variations: Dict[str, set] = defaultdict(set)
totals: Dict[str, int] = defaultdict(int)
genders: Dict[str, str] = {}
for row in rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
variations[character].add(fig_num)
totals[character] += 1
if character not in genders:
genders[character] = gender
aggregates: List[dict] = []
for character, fig_nums in variations.items():
aggregates.append(
{
"known_character": character,
"gender": genders.get(character, ""),
"variation_count": len(fig_nums),
"total_minifigs": totals.get(character, 0),
}
)
aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"]))
return aggregates
def aggregate_new_characters_by_year(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
start_year: int | None = None,
end_year: int | None = None,
) -> List[dict]:
"""Compte le nombre de personnages introduits par année sur une plage donnée."""
excluded = set(excluded_characters or [])
first_year: Dict[str, int] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year_str = sets_years.get(row["set_num"])
if year_str is None:
continue
year_int = int(year_str)
current = first_year.get(character)
if current is None or year_int < current:
first_year[character] = year_int
counts: Dict[int, int] = {}
if start_year is not None and end_year is not None:
for year in range(start_year, end_year + 1):
counts[year] = 0
for character, year_int in first_year.items():
if start_year is not None and year_int < start_year:
continue
if end_year is not None and year_int > end_year:
continue
counts[year_int] = counts.get(year_int, 0) + 1
years = sorted(counts.keys())
results: List[dict] = []
for year in years:
results.append({"year": str(year), "new_characters": str(counts[year])})
return results
def aggregate_new_character_sets(
minifigs_rows: Iterable[dict],
sets_lookup: Dict[str, dict],
excluded_characters: Sequence[str] | None = None,
start_year: int | None = None,
end_year: int | None = None,
) -> List[dict]:
"""Liste les personnages introduits par année avec les sets correspondants."""
excluded = set(excluded_characters or [])
first_year: Dict[str, int] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
set_row = sets_lookup.get(row["set_num"])
if set_row is None:
continue
year_int = int(set_row["year"])
current = first_year.get(character)
if current is None or year_int < current:
first_year[character] = year_int
rows: List[dict] = []
seen: set[tuple[str, str]] = set()
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
set_row = sets_lookup.get(row["set_num"])
if set_row is None:
continue
intro_year = first_year.get(character)
if intro_year is None:
continue
if start_year is not None and intro_year < start_year:
continue
if end_year is not None and intro_year > end_year:
continue
if int(set_row["year"]) != intro_year:
continue
key = (character, set_row["set_num"])
if key in seen:
continue
rows.append(
{
"year": str(int(set_row["year"])),
"known_character": character,
"set_num": set_row["set_num"],
"set_id": set_row.get("set_id", ""),
"set_name": set_row.get("name", ""),
"rebrickable_url": set_row.get("rebrickable_url", ""),
}
)
seen.add(key)
rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"]))
return rows
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par genre (fig_num unique)."""
genders_by_fig: Dict[str, str] = {}
counts: Dict[str, int] = defaultdict(int)
for row in rows:
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip().lower()
normalized = gender if gender in ("male", "female") else "unknown"
if fig_num == "":
continue
if fig_num in genders_by_fig:
continue
genders_by_fig[fig_num] = normalized
counts[normalized] += 1
aggregates: List[dict] = []
ordered = ["female", "male", "unknown"]
for gender in ordered:
if gender in counts:
aggregates.append({"gender": gender, "minifig_count": str(counts[gender])})
return aggregates
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "gender", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des personnages introduits chaque année."""
ensure_parent_dir(path)
fieldnames = ["year", "new_characters"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV listant les personnages introduits et leurs sets."""
ensure_parent_dir(path)
fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None:
"""Écrit un Markdown listant les personnages introduits par année et leurs sets."""
ensure_parent_dir(path)
grouped: Dict[str, Dict[str, List[dict]]] = {}
for row in rows:
year_group = grouped.setdefault(row["year"], {})
characters = year_group.setdefault(row["known_character"], [])
characters.append(row)
with path.open("w") as md_file:
for year in sorted(grouped.keys(), key=int):
md_file.write(f"##### {year}\n\n")
for character in sorted(grouped[year].keys()):
md_file.write(f"- {character}\n")
for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]):
link = entry["rebrickable_url"] or ""
set_id = entry["set_id"]
name = entry["set_name"]
md_file.write(f" - [{set_id}]({link}) - {name}\n")
md_file.write("\n")
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par genre."""
ensure_parent_dir(path)
fieldnames = ["gender", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV comparant variations et total par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def load_sets_enriched(path: Path) -> Dict[str, str]:
"""Indexe les années par set_num."""
lookup: Dict[str, str] = {}
with path.open() as sets_file:
reader = csv.DictReader(sets_file)
for row in reader:
lookup[row["set_num"]] = row["year"]
return lookup
def aggregate_presence_by_year(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
) -> List[dict]:
"""Compte le nombre total de minifigs par personnage et par année (hors figurants)."""
excluded = set(excluded_characters or [])
counts: Dict[tuple[str, int], int] = defaultdict(int)
years_all = {int(year) for year in sets_years.values()}
characters_all: Set[str] = set()
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year = sets_years.get(row["set_num"])
if year is None:
continue
year_int = int(year)
counts[(character, year_int)] += 1
characters_all.add(character)
years = sorted(years_all)
characters = sorted(characters_all)
results: List[dict] = []
for character in characters:
for year in years:
count = counts.get((character, year), 0)
results.append(
{
"known_character": character,
"year": str(year),
"minifig_count": str(count),
}
)
return results
def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None:
"""Écrit la matrice présence binaire année/personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "year", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def aggregate_character_spans(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
) -> List[dict]:
"""Calcule la période d'apparition de chaque personnage (bornes min/max des années observées)."""
excluded = set(excluded_characters or [])
spans: Dict[str, Dict[str, int]] = {}
total_counts: Dict[str, int] = defaultdict(int)
genders: Dict[str, str] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year = sets_years.get(row["set_num"])
if year is None:
continue
year_int = int(year)
total_counts[character] += 1
if character not in genders:
genders[character] = gender
current = spans.get(character)
if current is None:
spans[character] = {"start": year_int, "end": year_int}
else:
spans[character]["start"] = min(current["start"], year_int)
spans[character]["end"] = max(current["end"], year_int)
results: List[dict] = []
for character, bounds in spans.items():
results.append(
{
"known_character": character,
"start_year": str(bounds["start"]),
"end_year": str(bounds["end"]),
"total_minifigs": str(total_counts[character]),
"gender": genders.get(character, ""),
}
)
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
return results
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des bornes min/max par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)