diff --git a/README.md b/README.md index 38b156c..34ed736 100644 --- a/README.md +++ b/README.md @@ -273,8 +273,11 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets 1. `source .venv/bin/activate` 2. `python -m scripts.plot_minifig_gender_share` +3. `python -m scripts.plot_minifig_character_genders` -Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins et masculins, les genres inconnus étant ignorés pour ce graphique). +Le premier script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins et masculins, les genres inconnus étant ignorés pour ce graphique, étiquettes en valeurs absolues). + +Le second script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de personnages distincts par genre (femmes/hommes uniquement), sérialise `data/intermediate/minifig_character_gender_counts.csv`, puis trace `figures/step25/minifig_character_gender_share.png` pour comparer la répartition des personnages identifiés. ### Étape 26 : corrélation pièces / minifigs diff --git a/lib/plots/minifig_gender_share.py b/lib/plots/minifig_gender_share.py index f3c537b..de4204c 100644 --- a/lib/plots/minifig_gender_share.py +++ b/lib/plots/minifig_gender_share.py @@ -25,12 +25,7 @@ def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None genders = [row["gender"] for row in rows] counts = [int(row["minifig_count"]) for row in rows] colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] - total = sum(counts) - labels = [] - for gender, count in zip(genders, counts): - percent = (count / total) * 100 if total else 0 - label = f"{GENDER_LABELS.get(gender.strip().lower(), 'Inconnu')} ({percent:.1f} %)" - labels.append(label) + labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)] fig, ax = plt.subplots(figsize=(6, 6)) ax.pie( @@ -48,3 +43,31 @@ def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None fig.tight_layout() fig.savefig(destination_path, dpi=160) plt.close(fig) + + +def plot_character_gender_share(counts_path: Path, destination_path: Path) -> None: + """Trace un diagramme circulaire de la répartition des personnages par genre.""" + rows = [row for row in load_gender_counts(counts_path) if row["gender"].strip().lower() in ("male", "female")] + if not rows: + return + genders = [row["gender"] for row in rows] + counts = [int(row["character_count"]) for row in rows] + colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] + labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)] + + fig, ax = plt.subplots(figsize=(6, 6)) + ax.pie( + counts, + labels=labels, + colors=colors, + startangle=90, + wedgeprops={"linewidth": 0.6, "edgecolor": "#0d0d0d"}, + ) + centre_circle = plt.Circle((0, 0), 0.5, fc="white") + ax.add_artist(centre_circle) + ax.set_title("Répartition des personnages par genre (hors inconnus)") + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=160) + plt.close(fig) diff --git a/lib/rebrickable/minifig_characters.py b/lib/rebrickable/minifig_characters.py index 1d698f6..7b31b9f 100644 --- a/lib/rebrickable/minifig_characters.py +++ b/lib/rebrickable/minifig_characters.py @@ -195,6 +195,28 @@ def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: return aggregates +def aggregate_characters_by_gender(rows: Iterable[dict]) -> List[dict]: + """Compte les personnages distincts par genre (hors genres inconnus).""" + gender_by_character: Dict[str, str] = {} + counts: Dict[str, int] = defaultdict(int) + for row in rows: + character = row["known_character"].strip() + gender = row.get("gender", "").strip().lower() + if character == "": + continue + if gender not in ("male", "female"): + continue + if character in gender_by_character: + continue + gender_by_character[character] = gender + counts[gender] += 1 + aggregates: List[dict] = [] + for gender in ("female", "male"): + if gender in counts: + aggregates.append({"gender": gender, "character_count": str(counts[gender])}) + return aggregates + + def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) @@ -206,6 +228,17 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None: writer.writerow(row) +def write_character_gender_counts(path: Path, rows: Sequence[dict]) -> None: + """Écrit le CSV des comptes de personnages par genre.""" + ensure_parent_dir(path) + fieldnames = ["gender", "character_count"] + with path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + + def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des personnages introduits chaque année.""" ensure_parent_dir(path) diff --git a/scripts/plot_minifig_character_genders.py b/scripts/plot_minifig_character_genders.py new file mode 100644 index 0000000..0599a4e --- /dev/null +++ b/scripts/plot_minifig_character_genders.py @@ -0,0 +1,27 @@ +"""Trace la répartition des personnages identifiés par genre.""" + +from pathlib import Path + +from lib.plots.minifig_gender_share import plot_character_gender_share +from lib.rebrickable.minifig_characters import ( + aggregate_characters_by_gender, + load_minifigs_by_set, + write_character_gender_counts, +) + + +MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") +COUNTS_PATH = Path("data/intermediate/minifig_character_gender_counts.csv") +DESTINATION_PATH = Path("figures/step25/minifig_character_gender_share.png") + + +def main() -> None: + """Construit le CSV de répartition des personnages par genre et trace le graphique.""" + rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) + aggregates = aggregate_characters_by_gender(rows) + write_character_gender_counts(COUNTS_PATH, aggregates) + plot_character_gender_share(COUNTS_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_characters.py b/tests/test_minifig_characters.py index 951115d..07e9064 100644 --- a/tests/test_minifig_characters.py +++ b/tests/test_minifig_characters.py @@ -5,6 +5,7 @@ from pathlib import Path from lib.rebrickable.minifig_characters import ( aggregate_by_character, aggregate_by_gender, + aggregate_characters_by_gender, aggregate_new_character_sets, aggregate_new_characters_by_year, aggregate_variations_and_totals, @@ -12,6 +13,7 @@ from lib.rebrickable.minifig_characters import ( aggregate_presence_by_year, load_sets_enriched, write_character_counts, + write_character_gender_counts, write_new_character_sets_csv, write_new_character_sets_markdown, write_new_characters_by_year, @@ -143,6 +145,23 @@ def test_aggregate_by_gender_counts_unique_figs() -> None: ] +def test_aggregate_characters_by_gender_unique_characters() -> None: + """Compter les personnages distincts par genre (ignorer unknown).""" + aggregates = aggregate_characters_by_gender( + [ + {"known_character": "A", "gender": "male"}, + {"known_character": "A", "gender": "male"}, + {"known_character": "B", "gender": "female"}, + {"known_character": "C", "gender": "unknown"}, + ] + ) + + assert aggregates == [ + {"gender": "female", "character_count": "1"}, + {"gender": "male", "character_count": "1"}, + ] + + def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None: """Compter les nouveaux personnages par année en respectant la plage.""" sets_path = tmp_path / "sets_enriched.csv" @@ -268,6 +287,19 @@ def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None: assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n" +def test_write_character_gender_counts_outputs_csv(tmp_path: Path) -> None: + """Écrit le CSV des comptes de personnages par genre.""" + destination = tmp_path / "character_gender.csv" + rows = [ + {"gender": "female", "character_count": "2"}, + {"gender": "male", "character_count": "3"}, + ] + + write_character_gender_counts(destination, rows) + + assert destination.read_text() == "gender,character_count\nfemale,2\nmale,3\n" + + def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None: """Écrit le CSV des nouveaux personnages par année.""" destination = tmp_path / "new_characters.csv" diff --git a/tests/test_minifig_gender_share_plot.py b/tests/test_minifig_gender_share_plot.py index 8b28123..2c5bc68 100644 --- a/tests/test_minifig_gender_share_plot.py +++ b/tests/test_minifig_gender_share_plot.py @@ -4,6 +4,7 @@ import matplotlib from pathlib import Path from lib.plots.minifig_gender_share import plot_minifig_gender_share +from lib.plots.minifig_gender_share import plot_character_gender_share matplotlib.use("Agg") @@ -24,3 +25,19 @@ def test_plot_minifig_gender_share(tmp_path: Path) -> None: assert destination.exists() assert destination.stat().st_size > 0 + + +def test_plot_character_gender_share(tmp_path: Path) -> None: + """Génère le graphique de répartition par genre au niveau personnages.""" + counts_path = tmp_path / "character_gender.csv" + destination = tmp_path / "figures" / "step25" / "minifig_character_gender_share.png" + counts_path.write_text( + "gender,character_count\n" + "male,3\n" + "female,2\n" + ) + + plot_character_gender_share(counts_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0