diff --git a/README.md b/README.md index 67c61e2..6f73cba 100644 --- a/README.md +++ b/README.md @@ -254,3 +254,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets 2. `python -m scripts.plot_minifig_character_spans` Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans). Les barres sont colorées selon le genre issu de `config/known_character_genders.csv`. + +### Étape 25 : répartition des genres des personnages + +1. `source .venv/bin/activate` +2. `python -m scripts.plot_minifig_gender_share` + +Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins, masculins ou inconnus). diff --git a/lib/plots/gender_palette.py b/lib/plots/gender_palette.py new file mode 100644 index 0000000..1003b52 --- /dev/null +++ b/lib/plots/gender_palette.py @@ -0,0 +1,14 @@ +"""Palette de couleurs et libellés pour les genres des personnages.""" + +GENDER_COLORS = { + "male": "#4c72b0", + "female": "#c44e52", + "unknown": "#7f7f7f", +} + +GENDER_LABELS = { + "male": "Homme", + "female": "Femme", + "unknown": "Inconnu", + "": "Inconnu", +} diff --git a/lib/plots/minifig_character_spans.py b/lib/plots/minifig_character_spans.py index fdf9608..4f72df7 100644 --- a/lib/plots/minifig_character_spans.py +++ b/lib/plots/minifig_character_spans.py @@ -8,22 +8,10 @@ from matplotlib.patches import Patch from lib.filesystem import ensure_parent_dir from lib.milestones import load_milestones +from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS from lib.rebrickable.stats import read_rows -GENDER_COLORS = { - "male": "#4c72b0", - "female": "#c44e52", - "unknown": "#7f7f7f", -} -GENDER_LABELS = { - "male": "Homme", - "female": "Femme", - "unknown": "Inconnu", - "": "Inconnu", -} - - def load_spans(path: Path) -> List[dict]: """Charge le CSV des bornes min/max par personnage.""" return read_rows(path) diff --git a/lib/plots/minifig_characters.py b/lib/plots/minifig_characters.py index 3cd0f9f..c98872d 100644 --- a/lib/plots/minifig_characters.py +++ b/lib/plots/minifig_characters.py @@ -7,22 +7,10 @@ import matplotlib.pyplot as plt from matplotlib.patches import Patch from lib.filesystem import ensure_parent_dir +from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS from lib.rebrickable.stats import read_rows -GENDER_COLORS = { - "male": "#4c72b0", - "female": "#c44e52", - "unknown": "#7f7f7f", -} -GENDER_LABELS = { - "male": "Homme", - "female": "Femme", - "unknown": "Inconnu", - "": "Inconnu", -} - - def load_counts(path: Path) -> List[dict]: """Charge le CSV des comptes par personnage.""" return read_rows(path) diff --git a/lib/plots/minifig_gender_share.py b/lib/plots/minifig_gender_share.py new file mode 100644 index 0000000..20c2d92 --- /dev/null +++ b/lib/plots/minifig_gender_share.py @@ -0,0 +1,48 @@ +"""Diagramme de répartition des minifigs par genre.""" + +from pathlib import Path +from typing import List + +import matplotlib.pyplot as plt + +from lib.filesystem import ensure_parent_dir +from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS +from lib.rebrickable.stats import read_rows + + +def load_gender_counts(path: Path) -> List[dict]: + """Charge le CSV des comptes par genre.""" + return read_rows(path) + + +def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None: + """Trace un diagramme circulaire de la répartition des minifigs par genre.""" + rows = load_gender_counts(counts_path) + if not rows: + return + genders = [row["gender"] for row in rows] + counts = [int(row["minifig_count"]) for row in rows] + colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] + total = sum(counts) + labels = [] + for gender, count in zip(genders, counts): + percent = (count / total) * 100 if total else 0 + label = f"{GENDER_LABELS.get(gender.strip().lower(), 'Inconnu')} ({percent:.1f} %)" + labels.append(label) + + fig, ax = plt.subplots(figsize=(6, 6)) + ax.pie( + counts, + labels=labels, + colors=colors, + startangle=90, + wedgeprops={"linewidth": 0.6, "edgecolor": "#0d0d0d"}, + ) + centre_circle = plt.Circle((0, 0), 0.5, fc="white") + ax.add_artist(centre_circle) + ax.set_title("Répartition des minifigs par genre (thèmes filtrés)") + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=160) + plt.close(fig) diff --git a/lib/rebrickable/minifig_characters.py b/lib/rebrickable/minifig_characters.py index 4fae13e..c03f9b0 100644 --- a/lib/rebrickable/minifig_characters.py +++ b/lib/rebrickable/minifig_characters.py @@ -34,6 +34,28 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: return aggregates +def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: + """Compte les minifigs distinctes par genre (fig_num unique).""" + genders_by_fig: Dict[str, str] = {} + counts: Dict[str, int] = defaultdict(int) + for row in rows: + fig_num = row["fig_num"].strip() + gender = row.get("gender", "").strip().lower() + normalized = gender if gender in ("male", "female") else "unknown" + if fig_num == "": + continue + if fig_num in genders_by_fig: + continue + genders_by_fig[fig_num] = normalized + counts[normalized] += 1 + aggregates: List[dict] = [] + ordered = ["female", "male", "unknown"] + for gender in ordered: + if gender in counts: + aggregates.append({"gender": gender, "minifig_count": str(counts[gender])}) + return aggregates + + def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) @@ -45,6 +67,17 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None: writer.writerow(row) +def write_gender_counts(path: Path, rows: Sequence[dict]) -> None: + """Écrit le CSV des comptes par genre.""" + ensure_parent_dir(path) + fieldnames = ["gender", "minifig_count"] + with path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + + def load_sets_enriched(path: Path) -> Dict[str, str]: """Indexe les années par set_num.""" lookup: Dict[str, str] = {} diff --git a/scripts/plot_minifig_gender_share.py b/scripts/plot_minifig_gender_share.py new file mode 100644 index 0000000..c2f5e28 --- /dev/null +++ b/scripts/plot_minifig_gender_share.py @@ -0,0 +1,27 @@ +"""Trace la répartition des minifigs par genre.""" + +from pathlib import Path + +from lib.plots.minifig_gender_share import plot_minifig_gender_share +from lib.rebrickable.minifig_characters import ( + aggregate_by_gender, + load_minifigs_by_set, + write_gender_counts, +) + + +MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") +GENDER_COUNTS_PATH = Path("data/intermediate/minifig_gender_counts.csv") +DESTINATION_PATH = Path("figures/step25/minifig_gender_share.png") + + +def main() -> None: + """Construit le CSV de répartition par genre et trace le graphique.""" + rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) + aggregates = aggregate_by_gender(rows) + write_gender_counts(GENDER_COUNTS_PATH, aggregates) + plot_minifig_gender_share(GENDER_COUNTS_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_characters.py b/tests/test_minifig_characters.py index 1f80b26..817030a 100644 --- a/tests/test_minifig_characters.py +++ b/tests/test_minifig_characters.py @@ -4,10 +4,12 @@ from pathlib import Path from lib.rebrickable.minifig_characters import ( aggregate_by_character, + aggregate_by_gender, aggregate_character_spans, aggregate_presence_by_year, load_sets_enriched, write_character_counts, + write_gender_counts, ) @@ -66,6 +68,24 @@ def test_aggregate_by_character_counts_unique_figs() -> None: ] +def test_aggregate_by_gender_counts_unique_figs() -> None: + """Compter les minifigs distinctes par genre.""" + aggregates = aggregate_by_gender( + [ + {"fig_num": "fig-a", "gender": "male"}, + {"fig_num": "fig-a", "gender": "male"}, + {"fig_num": "fig-b", "gender": "female"}, + {"fig_num": "fig-c", "gender": ""}, + ] + ) + + assert aggregates == [ + {"gender": "female", "minifig_count": "1"}, + {"gender": "male", "minifig_count": "1"}, + {"gender": "unknown", "minifig_count": "1"}, + ] + + def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: """Écrit le CSV des comptes par personnage.""" destination = tmp_path / "counts.csv" @@ -79,6 +99,19 @@ def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: assert destination.read_text() == "known_character,gender,minifig_count\nA,male,2\nB,female,1\n" +def test_write_gender_counts_outputs_csv(tmp_path: Path) -> None: + """Écrit le CSV des comptes par genre.""" + destination = tmp_path / "gender_counts.csv" + rows = [ + {"gender": "male", "minifig_count": "2"}, + {"gender": "female", "minifig_count": "1"}, + ] + + write_gender_counts(destination, rows) + + assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n" + + def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: """Calcule le total annuel en excluant les figurants.""" sets_path = tmp_path / "sets_enriched.csv" diff --git a/tests/test_minifig_gender_share_plot.py b/tests/test_minifig_gender_share_plot.py new file mode 100644 index 0000000..2f37f23 --- /dev/null +++ b/tests/test_minifig_gender_share_plot.py @@ -0,0 +1,26 @@ +"""Tests du diagramme de répartition des genres.""" + +import matplotlib +from pathlib import Path + +from lib.plots.minifig_gender_share import plot_minifig_gender_share + + +matplotlib.use("Agg") + + +def test_plot_minifig_gender_share(tmp_path: Path) -> None: + """Génère le graphique de répartition par genre.""" + counts_path = tmp_path / "gender_counts.csv" + destination = tmp_path / "figures" / "step25" / "minifig_gender_share.png" + counts_path.write_text( + "gender,minifig_count\n" + "male,2\n" + "female,1\n" + "unknown,1\n" + ) + + plot_minifig_gender_share(counts_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0