Ajoute l’étape 25 de répartition des minifigs par genre

2025-12-02 11:42:50 +01:00
parent 2732dd95dc
commit 9f6300bc9f
10 changed files with 190 additions and 26 deletions
--- a/lib/rebrickable/minifig_characters.py
+++ b/lib/rebrickable/minifig_characters.py
@@ -34,6 +34,28 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
    return aggregates


+def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
+    """Compte les minifigs distinctes par genre (fig_num unique)."""
+    genders_by_fig: Dict[str, str] = {}
+    counts: Dict[str, int] = defaultdict(int)
+    for row in rows:
+        fig_num = row["fig_num"].strip()
+        gender = row.get("gender", "").strip().lower()
+        normalized = gender if gender in ("male", "female") else "unknown"
+        if fig_num == "":
+            continue
+        if fig_num in genders_by_fig:
+            continue
+        genders_by_fig[fig_num] = normalized
+        counts[normalized] += 1
+    aggregates: List[dict] = []
+    ordered = ["female", "male", "unknown"]
+    for gender in ordered:
+        if gender in counts:
+            aggregates.append({"gender": gender, "minifig_count": str(counts[gender])})
+    return aggregates
+
+
 def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
    """Écrit le CSV des comptes par personnage."""
    ensure_parent_dir(path)
@@ -45,6 +67,17 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
            writer.writerow(row)


+def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
+    """Écrit le CSV des comptes par genre."""
+    ensure_parent_dir(path)
+    fieldnames = ["gender", "minifig_count"]
+    with path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
+
+
 def load_sets_enriched(path: Path) -> Dict[str, str]:
    """Indexe les années par set_num."""
    lookup: Dict[str, str] = {}