1

Ajoute le genre des personnages et colore les graphiques

This commit is contained in:
2025-12-02 11:37:13 +01:00
parent 230b9db239
commit f5c1fa6333
12 changed files with 300 additions and 43 deletions

View File

@@ -1,12 +1,12 @@
"""Agrégation des minifigs par personnage représenté."""
import csv
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set
from lib.rebrickable.stats import read_rows
from lib.filesystem import ensure_parent_dir
import csv
from lib.rebrickable.stats import read_rows
def load_minifigs_by_set(path: Path) -> List[dict]:
@@ -15,17 +15,21 @@ def load_minifigs_by_set(path: Path) -> List[dict]:
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par personnage (fig_num unique)."""
"""Compte les minifigs distinctes par personnage (fig_num unique) avec genre."""
fig_nums_by_character: Dict[str, set] = defaultdict(set)
genders: Dict[str, str] = {}
for row in rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
fig_nums_by_character[character].add(fig_num)
if character not in genders:
genders[character] = gender
aggregates: List[dict] = []
for character, fig_nums in fig_nums_by_character.items():
aggregates.append({"known_character": character, "minifig_count": len(fig_nums)})
aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)})
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
return aggregates
@@ -33,7 +37,7 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "minifig_count"]
fieldnames = ["known_character", "gender", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
@@ -110,9 +114,11 @@ def aggregate_character_spans(
excluded = set(excluded_characters or [])
spans: Dict[str, Dict[str, int]] = {}
total_counts: Dict[str, int] = defaultdict(int)
genders: Dict[str, str] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
if character in excluded:
@@ -122,6 +128,8 @@ def aggregate_character_spans(
continue
year_int = int(year)
total_counts[character] += 1
if character not in genders:
genders[character] = gender
current = spans.get(character)
if current is None:
spans[character] = {"start": year_int, "end": year_int}
@@ -136,6 +144,7 @@ def aggregate_character_spans(
"start_year": str(bounds["start"]),
"end_year": str(bounds["end"]),
"total_minifigs": str(total_counts[character]),
"gender": genders.get(character, ""),
}
)
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
@@ -145,7 +154,7 @@ def aggregate_character_spans(
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des bornes min/max par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs"]
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()

View File

@@ -128,6 +128,16 @@ def load_aliases(path: Path) -> Dict[str, str]:
return aliases
def load_gender_overrides(path: Path) -> Dict[str, str]:
"""Charge les correspondances personnage -> genre."""
overrides: Dict[str, str] = {}
with path.open() as gender_file:
reader = csv.DictReader(gender_file)
for row in reader:
overrides[row["known_character"].lower()] = row["gender"]
return overrides
def normalize_known_character(raw_known: str, extracted_name: str, aliases: Dict[str, str]) -> str:
"""Nettoie et mappe un nom vers une version canonique."""
base = raw_known or extracted_name
@@ -201,6 +211,7 @@ def aggregate_heads_by_set(
minifig_heads: Dict[str, Set[str]],
minifig_catalog: Dict[str, dict],
aliases: Dict[str, str],
gender_overrides: Dict[str, str],
) -> List[dict]:
"""Agrège les têtes de minifigs par set en éliminant les rechanges et doublons."""
seen: Set[Tuple[str, str]] = set()
@@ -228,12 +239,14 @@ def aggregate_heads_by_set(
normalized = normalize_known_character(known_character, extracted, aliases)
if matched_fig == "":
continue
gender = gender_overrides.get(normalized.lower(), "")
heads.append(
{
"set_num": row["set_num"],
"part_num": row["part_num"],
"known_character": normalized,
"fig_num": matched_fig,
"gender": gender,
}
)
seen.add(key)
@@ -244,7 +257,7 @@ def aggregate_heads_by_set(
def write_heads_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV intermédiaire listant les têtes de minifigs par set."""
ensure_parent_dir(destination_path)
fieldnames = ["set_num", "part_num", "known_character", "fig_num"]
fieldnames = ["set_num", "part_num", "known_character", "fig_num", "gender"]
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
@@ -260,6 +273,7 @@ def build_minifigs_by_set(
inventory_minifigs_path: Path,
minifigs_path: Path,
aliases_path: Path,
gender_overrides_path: Path,
destination_path: Path,
) -> None:
"""Construit le CSV listant les têtes de minifigs présentes par set."""
@@ -271,6 +285,7 @@ def build_minifigs_by_set(
minifig_heads = build_minifig_heads_lookup(minifig_catalog, latest_inventories, inventory_parts_path, head_parts)
set_minifigs = build_set_minifigs_lookup(latest_inventories, inventory_minifigs_path)
aliases = load_aliases(aliases_path)
gender_overrides = load_gender_overrides(gender_overrides_path)
heads = aggregate_heads_by_set(
parts_rows,
parts_catalog,
@@ -279,5 +294,6 @@ def build_minifigs_by_set(
minifig_heads,
minifig_catalog,
aliases,
gender_overrides,
)
write_heads_by_set(destination_path, heads)