422 lines
16 KiB
Python
422 lines
16 KiB
Python
"""Agrégation des minifigs par personnage représenté."""
|
|
|
|
import csv
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Sequence, Set
|
|
|
|
from lib.filesystem import ensure_parent_dir
|
|
from lib.rebrickable.stats import read_rows
|
|
|
|
|
|
def load_minifigs_by_set(path: Path) -> List[dict]:
|
|
"""Charge le CSV minifigs_by_set."""
|
|
return read_rows(path)
|
|
|
|
|
|
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
|
"""Compte les minifigs distinctes par personnage (fig_num unique) avec genre."""
|
|
fig_nums_by_character: Dict[str, set] = defaultdict(set)
|
|
genders: Dict[str, str] = {}
|
|
for row in rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
gender = row.get("gender", "").strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
fig_nums_by_character[character].add(fig_num)
|
|
if character not in genders:
|
|
genders[character] = gender
|
|
aggregates: List[dict] = []
|
|
for character, fig_nums in fig_nums_by_character.items():
|
|
aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)})
|
|
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
|
|
return aggregates
|
|
|
|
|
|
def aggregate_variations_and_totals(
|
|
rows: Iterable[dict],
|
|
excluded_characters: Sequence[str] | None = None,
|
|
) -> List[dict]:
|
|
"""Compte les variations uniques et le total de minifigs par personnage."""
|
|
excluded = set(excluded_characters or [])
|
|
variations: Dict[str, set] = defaultdict(set)
|
|
totals: Dict[str, int] = defaultdict(int)
|
|
genders: Dict[str, str] = {}
|
|
for row in rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
gender = row.get("gender", "").strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
variations[character].add(fig_num)
|
|
totals[character] += 1
|
|
if character not in genders:
|
|
genders[character] = gender
|
|
aggregates: List[dict] = []
|
|
for character, fig_nums in variations.items():
|
|
aggregates.append(
|
|
{
|
|
"known_character": character,
|
|
"gender": genders.get(character, ""),
|
|
"variation_count": len(fig_nums),
|
|
"total_minifigs": totals.get(character, 0),
|
|
}
|
|
)
|
|
aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"]))
|
|
return aggregates
|
|
|
|
|
|
def aggregate_new_characters_by_year(
|
|
minifigs_rows: Iterable[dict],
|
|
sets_years: Dict[str, str],
|
|
excluded_characters: Sequence[str] | None = None,
|
|
start_year: int | None = None,
|
|
end_year: int | None = None,
|
|
) -> List[dict]:
|
|
"""Compte le nombre de personnages introduits par année sur une plage donnée."""
|
|
excluded = set(excluded_characters or [])
|
|
first_year: Dict[str, int] = {}
|
|
for row in minifigs_rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
year_str = sets_years.get(row["set_num"])
|
|
if year_str is None:
|
|
continue
|
|
year_int = int(year_str)
|
|
current = first_year.get(character)
|
|
if current is None or year_int < current:
|
|
first_year[character] = year_int
|
|
counts: Dict[int, int] = {}
|
|
if start_year is not None and end_year is not None:
|
|
for year in range(start_year, end_year + 1):
|
|
counts[year] = 0
|
|
for character, year_int in first_year.items():
|
|
if start_year is not None and year_int < start_year:
|
|
continue
|
|
if end_year is not None and year_int > end_year:
|
|
continue
|
|
counts[year_int] = counts.get(year_int, 0) + 1
|
|
years = sorted(counts.keys())
|
|
results: List[dict] = []
|
|
for year in years:
|
|
results.append({"year": str(year), "new_characters": str(counts[year])})
|
|
return results
|
|
|
|
|
|
def aggregate_new_character_sets(
|
|
minifigs_rows: Iterable[dict],
|
|
sets_lookup: Dict[str, dict],
|
|
excluded_characters: Sequence[str] | None = None,
|
|
start_year: int | None = None,
|
|
end_year: int | None = None,
|
|
) -> List[dict]:
|
|
"""Liste les personnages introduits par année avec les sets correspondants."""
|
|
excluded = set(excluded_characters or [])
|
|
first_year: Dict[str, int] = {}
|
|
for row in minifigs_rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
set_row = sets_lookup.get(row["set_num"])
|
|
if set_row is None:
|
|
continue
|
|
year_int = int(set_row["year"])
|
|
current = first_year.get(character)
|
|
if current is None or year_int < current:
|
|
first_year[character] = year_int
|
|
rows: List[dict] = []
|
|
seen: set[tuple[str, str]] = set()
|
|
for row in minifigs_rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
set_row = sets_lookup.get(row["set_num"])
|
|
if set_row is None:
|
|
continue
|
|
intro_year = first_year.get(character)
|
|
if intro_year is None:
|
|
continue
|
|
if start_year is not None and intro_year < start_year:
|
|
continue
|
|
if end_year is not None and intro_year > end_year:
|
|
continue
|
|
if int(set_row["year"]) != intro_year:
|
|
continue
|
|
key = (character, set_row["set_num"])
|
|
if key in seen:
|
|
continue
|
|
rows.append(
|
|
{
|
|
"year": str(int(set_row["year"])),
|
|
"known_character": character,
|
|
"set_num": set_row["set_num"],
|
|
"set_id": set_row.get("set_id", ""),
|
|
"set_name": set_row.get("name", ""),
|
|
"rebrickable_url": set_row.get("rebrickable_url", ""),
|
|
}
|
|
)
|
|
seen.add(key)
|
|
rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"]))
|
|
return rows
|
|
|
|
|
|
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
|
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
|
genders_by_fig: Dict[str, str] = {}
|
|
counts: Dict[str, int] = defaultdict(int)
|
|
for row in rows:
|
|
fig_num = row["fig_num"].strip()
|
|
gender = row.get("gender", "").strip().lower()
|
|
normalized = gender if gender in ("male", "female") else "unknown"
|
|
if fig_num == "":
|
|
continue
|
|
if fig_num in genders_by_fig:
|
|
continue
|
|
genders_by_fig[fig_num] = normalized
|
|
counts[normalized] += 1
|
|
aggregates: List[dict] = []
|
|
ordered = ["female", "male", "unknown"]
|
|
for gender in ordered:
|
|
if gender in counts:
|
|
aggregates.append({"gender": gender, "minifig_count": str(counts[gender])})
|
|
return aggregates
|
|
|
|
|
|
def aggregate_characters_by_gender(rows: Iterable[dict]) -> List[dict]:
|
|
"""Compte les personnages distincts par genre (hors genres inconnus)."""
|
|
gender_by_character: Dict[str, str] = {}
|
|
counts: Dict[str, int] = defaultdict(int)
|
|
for row in rows:
|
|
character = row["known_character"].strip()
|
|
gender = row.get("gender", "").strip().lower()
|
|
if character == "":
|
|
continue
|
|
if gender not in ("male", "female"):
|
|
continue
|
|
if character in gender_by_character:
|
|
continue
|
|
gender_by_character[character] = gender
|
|
counts[gender] += 1
|
|
aggregates: List[dict] = []
|
|
for gender in ("female", "male"):
|
|
if gender in counts:
|
|
aggregates.append({"gender": gender, "character_count": str(counts[gender])})
|
|
return aggregates
|
|
|
|
|
|
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV des comptes par personnage."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["known_character", "gender", "minifig_count"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def write_character_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV des comptes de personnages par genre."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["gender", "character_count"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV des personnages introduits chaque année."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["year", "new_characters"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV listant les personnages introduits et leurs sets."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit un Markdown listant les personnages introduits par année et leurs sets."""
|
|
ensure_parent_dir(path)
|
|
grouped: Dict[str, Dict[str, List[dict]]] = {}
|
|
for row in rows:
|
|
year_group = grouped.setdefault(row["year"], {})
|
|
characters = year_group.setdefault(row["known_character"], [])
|
|
characters.append(row)
|
|
with path.open("w") as md_file:
|
|
for year in sorted(grouped.keys(), key=int):
|
|
md_file.write(f"##### {year}\n\n")
|
|
for character in sorted(grouped[year].keys()):
|
|
md_file.write(f"- {character}\n")
|
|
for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]):
|
|
link = entry["rebrickable_url"] or ""
|
|
set_id = entry["set_id"]
|
|
name = entry["set_name"]
|
|
md_file.write(f" - [{set_id}]({link}) - {name}\n")
|
|
md_file.write("\n")
|
|
|
|
|
|
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV des comptes par genre."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["gender", "minifig_count"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV comparant variations et total par personnage."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def load_sets_enriched(path: Path) -> Dict[str, str]:
|
|
"""Indexe les années par set_num."""
|
|
lookup: Dict[str, str] = {}
|
|
with path.open() as sets_file:
|
|
reader = csv.DictReader(sets_file)
|
|
for row in reader:
|
|
lookup[row["set_num"]] = row["year"]
|
|
return lookup
|
|
|
|
|
|
def aggregate_presence_by_year(
|
|
minifigs_rows: Iterable[dict],
|
|
sets_years: Dict[str, str],
|
|
excluded_characters: Sequence[str] | None = None,
|
|
) -> List[dict]:
|
|
"""Compte le nombre total de minifigs par personnage et par année (hors figurants)."""
|
|
excluded = set(excluded_characters or [])
|
|
counts: Dict[tuple[str, int], int] = defaultdict(int)
|
|
years_all = {int(year) for year in sets_years.values()}
|
|
characters_all: Set[str] = set()
|
|
for row in minifigs_rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
year = sets_years.get(row["set_num"])
|
|
if year is None:
|
|
continue
|
|
year_int = int(year)
|
|
counts[(character, year_int)] += 1
|
|
characters_all.add(character)
|
|
years = sorted(years_all)
|
|
characters = sorted(characters_all)
|
|
results: List[dict] = []
|
|
for character in characters:
|
|
for year in years:
|
|
count = counts.get((character, year), 0)
|
|
results.append(
|
|
{
|
|
"known_character": character,
|
|
"year": str(year),
|
|
"minifig_count": str(count),
|
|
}
|
|
)
|
|
return results
|
|
|
|
|
|
def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit la matrice présence binaire année/personnage."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["known_character", "year", "minifig_count"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
|
|
def aggregate_character_spans(
|
|
minifigs_rows: Iterable[dict],
|
|
sets_years: Dict[str, str],
|
|
excluded_characters: Sequence[str] | None = None,
|
|
) -> List[dict]:
|
|
"""Calcule la période d'apparition de chaque personnage (bornes min/max des années observées)."""
|
|
excluded = set(excluded_characters or [])
|
|
spans: Dict[str, Dict[str, int]] = {}
|
|
total_counts: Dict[str, int] = defaultdict(int)
|
|
genders: Dict[str, str] = {}
|
|
for row in minifigs_rows:
|
|
character = row["known_character"].strip()
|
|
fig_num = row["fig_num"].strip()
|
|
gender = row.get("gender", "").strip()
|
|
if character == "" or fig_num == "":
|
|
continue
|
|
if character in excluded:
|
|
continue
|
|
year = sets_years.get(row["set_num"])
|
|
if year is None:
|
|
continue
|
|
year_int = int(year)
|
|
total_counts[character] += 1
|
|
if character not in genders:
|
|
genders[character] = gender
|
|
current = spans.get(character)
|
|
if current is None:
|
|
spans[character] = {"start": year_int, "end": year_int}
|
|
else:
|
|
spans[character]["start"] = min(current["start"], year_int)
|
|
spans[character]["end"] = max(current["end"], year_int)
|
|
results: List[dict] = []
|
|
for character, bounds in spans.items():
|
|
results.append(
|
|
{
|
|
"known_character": character,
|
|
"start_year": str(bounds["start"]),
|
|
"end_year": str(bounds["end"]),
|
|
"total_minifigs": str(total_counts[character]),
|
|
"gender": genders.get(character, ""),
|
|
}
|
|
)
|
|
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
|
|
return results
|
|
|
|
|
|
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
|
|
"""Écrit le CSV des bornes min/max par personnage."""
|
|
ensure_parent_dir(path)
|
|
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"]
|
|
with path.open("w", newline="") as csv_file:
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow(row)
|