1

Ajoute l’étape 25 de répartition des minifigs par genre

This commit is contained in:
Richard Dern 2025-12-02 11:42:50 +01:00
parent f5c1fa6333
commit 1f18195df2
9 changed files with 190 additions and 26 deletions

View File

@ -254,3 +254,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets
2. `python -m scripts.plot_minifig_character_spans` 2. `python -m scripts.plot_minifig_character_spans`
Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans). Les barres sont colorées selon le genre issu de `config/known_character_genders.csv`. Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans). Les barres sont colorées selon le genre issu de `config/known_character_genders.csv`.
### Étape 25 : répartition des genres des personnages
1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifig_gender_share`
Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins, masculins ou inconnus).

View File

@ -0,0 +1,14 @@
"""Palette de couleurs et libellés pour les genres des personnages."""
GENDER_COLORS = {
"male": "#4c72b0",
"female": "#c44e52",
"unknown": "#7f7f7f",
}
GENDER_LABELS = {
"male": "Homme",
"female": "Femme",
"unknown": "Inconnu",
"": "Inconnu",
}

View File

@ -8,22 +8,10 @@ from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones from lib.milestones import load_milestones
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
from lib.rebrickable.stats import read_rows from lib.rebrickable.stats import read_rows
GENDER_COLORS = {
"male": "#4c72b0",
"female": "#c44e52",
"unknown": "#7f7f7f",
}
GENDER_LABELS = {
"male": "Homme",
"female": "Femme",
"unknown": "Inconnu",
"": "Inconnu",
}
def load_spans(path: Path) -> List[dict]: def load_spans(path: Path) -> List[dict]:
"""Charge le CSV des bornes min/max par personnage.""" """Charge le CSV des bornes min/max par personnage."""
return read_rows(path) return read_rows(path)

View File

@ -7,22 +7,10 @@ import matplotlib.pyplot as plt
from matplotlib.patches import Patch from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
from lib.rebrickable.stats import read_rows from lib.rebrickable.stats import read_rows
GENDER_COLORS = {
"male": "#4c72b0",
"female": "#c44e52",
"unknown": "#7f7f7f",
}
GENDER_LABELS = {
"male": "Homme",
"female": "Femme",
"unknown": "Inconnu",
"": "Inconnu",
}
def load_counts(path: Path) -> List[dict]: def load_counts(path: Path) -> List[dict]:
"""Charge le CSV des comptes par personnage.""" """Charge le CSV des comptes par personnage."""
return read_rows(path) return read_rows(path)

View File

@ -0,0 +1,48 @@
"""Diagramme de répartition des minifigs par genre."""
from pathlib import Path
from typing import List
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
from lib.rebrickable.stats import read_rows
def load_gender_counts(path: Path) -> List[dict]:
"""Charge le CSV des comptes par genre."""
return read_rows(path)
def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None:
"""Trace un diagramme circulaire de la répartition des minifigs par genre."""
rows = load_gender_counts(counts_path)
if not rows:
return
genders = [row["gender"] for row in rows]
counts = [int(row["minifig_count"]) for row in rows]
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
total = sum(counts)
labels = []
for gender, count in zip(genders, counts):
percent = (count / total) * 100 if total else 0
label = f"{GENDER_LABELS.get(gender.strip().lower(), 'Inconnu')} ({percent:.1f} %)"
labels.append(label)
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(
counts,
labels=labels,
colors=colors,
startangle=90,
wedgeprops={"linewidth": 0.6, "edgecolor": "#0d0d0d"},
)
centre_circle = plt.Circle((0, 0), 0.5, fc="white")
ax.add_artist(centre_circle)
ax.set_title("Répartition des minifigs par genre (thèmes filtrés)")
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@ -34,6 +34,28 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
return aggregates return aggregates
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par genre (fig_num unique)."""
genders_by_fig: Dict[str, str] = {}
counts: Dict[str, int] = defaultdict(int)
for row in rows:
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip().lower()
normalized = gender if gender in ("male", "female") else "unknown"
if fig_num == "":
continue
if fig_num in genders_by_fig:
continue
genders_by_fig[fig_num] = normalized
counts[normalized] += 1
aggregates: List[dict] = []
ordered = ["female", "male", "unknown"]
for gender in ordered:
if gender in counts:
aggregates.append({"gender": gender, "minifig_count": str(counts[gender])})
return aggregates
def write_character_counts(path: Path, rows: Sequence[dict]) -> None: def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par personnage.""" """Écrit le CSV des comptes par personnage."""
ensure_parent_dir(path) ensure_parent_dir(path)
@ -45,6 +67,17 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
writer.writerow(row) writer.writerow(row)
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par genre."""
ensure_parent_dir(path)
fieldnames = ["gender", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def load_sets_enriched(path: Path) -> Dict[str, str]: def load_sets_enriched(path: Path) -> Dict[str, str]:
"""Indexe les années par set_num.""" """Indexe les années par set_num."""
lookup: Dict[str, str] = {} lookup: Dict[str, str] = {}

View File

@ -0,0 +1,27 @@
"""Trace la répartition des minifigs par genre."""
from pathlib import Path
from lib.plots.minifig_gender_share import plot_minifig_gender_share
from lib.rebrickable.minifig_characters import (
aggregate_by_gender,
load_minifigs_by_set,
write_gender_counts,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
GENDER_COUNTS_PATH = Path("data/intermediate/minifig_gender_counts.csv")
DESTINATION_PATH = Path("figures/step25/minifig_gender_share.png")
def main() -> None:
"""Construit le CSV de répartition par genre et trace le graphique."""
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
aggregates = aggregate_by_gender(rows)
write_gender_counts(GENDER_COUNTS_PATH, aggregates)
plot_minifig_gender_share(GENDER_COUNTS_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@ -4,10 +4,12 @@ from pathlib import Path
from lib.rebrickable.minifig_characters import ( from lib.rebrickable.minifig_characters import (
aggregate_by_character, aggregate_by_character,
aggregate_by_gender,
aggregate_character_spans, aggregate_character_spans,
aggregate_presence_by_year, aggregate_presence_by_year,
load_sets_enriched, load_sets_enriched,
write_character_counts, write_character_counts,
write_gender_counts,
) )
@ -66,6 +68,24 @@ def test_aggregate_by_character_counts_unique_figs() -> None:
] ]
def test_aggregate_by_gender_counts_unique_figs() -> None:
"""Compter les minifigs distinctes par genre."""
aggregates = aggregate_by_gender(
[
{"fig_num": "fig-a", "gender": "male"},
{"fig_num": "fig-a", "gender": "male"},
{"fig_num": "fig-b", "gender": "female"},
{"fig_num": "fig-c", "gender": ""},
]
)
assert aggregates == [
{"gender": "female", "minifig_count": "1"},
{"gender": "male", "minifig_count": "1"},
{"gender": "unknown", "minifig_count": "1"},
]
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des comptes par personnage.""" """Écrit le CSV des comptes par personnage."""
destination = tmp_path / "counts.csv" destination = tmp_path / "counts.csv"
@ -79,6 +99,19 @@ def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
assert destination.read_text() == "known_character,gender,minifig_count\nA,male,2\nB,female,1\n" assert destination.read_text() == "known_character,gender,minifig_count\nA,male,2\nB,female,1\n"
def test_write_gender_counts_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des comptes par genre."""
destination = tmp_path / "gender_counts.csv"
rows = [
{"gender": "male", "minifig_count": "2"},
{"gender": "female", "minifig_count": "1"},
]
write_gender_counts(destination, rows)
assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n"
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
"""Calcule le total annuel en excluant les figurants.""" """Calcule le total annuel en excluant les figurants."""
sets_path = tmp_path / "sets_enriched.csv" sets_path = tmp_path / "sets_enriched.csv"

View File

@ -0,0 +1,26 @@
"""Tests du diagramme de répartition des genres."""
import matplotlib
from pathlib import Path
from lib.plots.minifig_gender_share import plot_minifig_gender_share
matplotlib.use("Agg")
def test_plot_minifig_gender_share(tmp_path: Path) -> None:
"""Génère le graphique de répartition par genre."""
counts_path = tmp_path / "gender_counts.csv"
destination = tmp_path / "figures" / "step25" / "minifig_gender_share.png"
counts_path.write_text(
"gender,minifig_count\n"
"male,2\n"
"female,1\n"
"unknown,1\n"
)
plot_minifig_gender_share(counts_path, destination)
assert destination.exists()
assert destination.stat().st_size > 0