Compare la répartition des genres minifigs vs personnages
This commit is contained in:
parent
cc613a88af
commit
a976b57afe
@ -273,8 +273,11 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets
|
|||||||
|
|
||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.plot_minifig_gender_share`
|
2. `python -m scripts.plot_minifig_gender_share`
|
||||||
|
3. `python -m scripts.plot_minifig_character_genders`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins et masculins, les genres inconnus étant ignorés pour ce graphique).
|
Le premier script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins et masculins, les genres inconnus étant ignorés pour ce graphique, étiquettes en valeurs absolues).
|
||||||
|
|
||||||
|
Le second script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de personnages distincts par genre (femmes/hommes uniquement), sérialise `data/intermediate/minifig_character_gender_counts.csv`, puis trace `figures/step25/minifig_character_gender_share.png` pour comparer la répartition des personnages identifiés.
|
||||||
|
|
||||||
### Étape 26 : corrélation pièces / minifigs
|
### Étape 26 : corrélation pièces / minifigs
|
||||||
|
|
||||||
|
|||||||
@ -25,12 +25,7 @@ def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None
|
|||||||
genders = [row["gender"] for row in rows]
|
genders = [row["gender"] for row in rows]
|
||||||
counts = [int(row["minifig_count"]) for row in rows]
|
counts = [int(row["minifig_count"]) for row in rows]
|
||||||
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
total = sum(counts)
|
labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)]
|
||||||
labels = []
|
|
||||||
for gender, count in zip(genders, counts):
|
|
||||||
percent = (count / total) * 100 if total else 0
|
|
||||||
label = f"{GENDER_LABELS.get(gender.strip().lower(), 'Inconnu')} ({percent:.1f} %)"
|
|
||||||
labels.append(label)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(6, 6))
|
fig, ax = plt.subplots(figsize=(6, 6))
|
||||||
ax.pie(
|
ax.pie(
|
||||||
@ -48,3 +43,31 @@ def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None
|
|||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
fig.savefig(destination_path, dpi=160)
|
fig.savefig(destination_path, dpi=160)
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_character_gender_share(counts_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Trace un diagramme circulaire de la répartition des personnages par genre."""
|
||||||
|
rows = [row for row in load_gender_counts(counts_path) if row["gender"].strip().lower() in ("male", "female")]
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
genders = [row["gender"] for row in rows]
|
||||||
|
counts = [int(row["character_count"]) for row in rows]
|
||||||
|
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
|
labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)]
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(6, 6))
|
||||||
|
ax.pie(
|
||||||
|
counts,
|
||||||
|
labels=labels,
|
||||||
|
colors=colors,
|
||||||
|
startangle=90,
|
||||||
|
wedgeprops={"linewidth": 0.6, "edgecolor": "#0d0d0d"},
|
||||||
|
)
|
||||||
|
centre_circle = plt.Circle((0, 0), 0.5, fc="white")
|
||||||
|
ax.add_artist(centre_circle)
|
||||||
|
ax.set_title("Répartition des personnages par genre (hors inconnus)")
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
|
|||||||
@ -195,6 +195,28 @@ def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
|||||||
return aggregates
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_characters_by_gender(rows: Iterable[dict]) -> List[dict]:
|
||||||
|
"""Compte les personnages distincts par genre (hors genres inconnus)."""
|
||||||
|
gender_by_character: Dict[str, str] = {}
|
||||||
|
counts: Dict[str, int] = defaultdict(int)
|
||||||
|
for row in rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
gender = row.get("gender", "").strip().lower()
|
||||||
|
if character == "":
|
||||||
|
continue
|
||||||
|
if gender not in ("male", "female"):
|
||||||
|
continue
|
||||||
|
if character in gender_by_character:
|
||||||
|
continue
|
||||||
|
gender_by_character[character] = gender
|
||||||
|
counts[gender] += 1
|
||||||
|
aggregates: List[dict] = []
|
||||||
|
for gender in ("female", "male"):
|
||||||
|
if gender in counts:
|
||||||
|
aggregates.append({"gender": gender, "character_count": str(counts[gender])})
|
||||||
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des comptes par personnage."""
|
"""Écrit le CSV des comptes par personnage."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
@ -206,6 +228,17 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
|||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_character_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV des comptes de personnages par genre."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["gender", "character_count"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
|
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des personnages introduits chaque année."""
|
"""Écrit le CSV des personnages introduits chaque année."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
|
|||||||
27
scripts/plot_minifig_character_genders.py
Normal file
27
scripts/plot_minifig_character_genders.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
"""Trace la répartition des personnages identifiés par genre."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.minifig_gender_share import plot_character_gender_share
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_characters_by_gender,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
write_character_gender_counts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
COUNTS_PATH = Path("data/intermediate/minifig_character_gender_counts.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step25/minifig_character_gender_share.png")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le CSV de répartition des personnages par genre et trace le graphique."""
|
||||||
|
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
aggregates = aggregate_characters_by_gender(rows)
|
||||||
|
write_character_gender_counts(COUNTS_PATH, aggregates)
|
||||||
|
plot_character_gender_share(COUNTS_PATH, DESTINATION_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -5,6 +5,7 @@ from pathlib import Path
|
|||||||
from lib.rebrickable.minifig_characters import (
|
from lib.rebrickable.minifig_characters import (
|
||||||
aggregate_by_character,
|
aggregate_by_character,
|
||||||
aggregate_by_gender,
|
aggregate_by_gender,
|
||||||
|
aggregate_characters_by_gender,
|
||||||
aggregate_new_character_sets,
|
aggregate_new_character_sets,
|
||||||
aggregate_new_characters_by_year,
|
aggregate_new_characters_by_year,
|
||||||
aggregate_variations_and_totals,
|
aggregate_variations_and_totals,
|
||||||
@ -12,6 +13,7 @@ from lib.rebrickable.minifig_characters import (
|
|||||||
aggregate_presence_by_year,
|
aggregate_presence_by_year,
|
||||||
load_sets_enriched,
|
load_sets_enriched,
|
||||||
write_character_counts,
|
write_character_counts,
|
||||||
|
write_character_gender_counts,
|
||||||
write_new_character_sets_csv,
|
write_new_character_sets_csv,
|
||||||
write_new_character_sets_markdown,
|
write_new_character_sets_markdown,
|
||||||
write_new_characters_by_year,
|
write_new_characters_by_year,
|
||||||
@ -143,6 +145,23 @@ def test_aggregate_by_gender_counts_unique_figs() -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_characters_by_gender_unique_characters() -> None:
|
||||||
|
"""Compter les personnages distincts par genre (ignorer unknown)."""
|
||||||
|
aggregates = aggregate_characters_by_gender(
|
||||||
|
[
|
||||||
|
{"known_character": "A", "gender": "male"},
|
||||||
|
{"known_character": "A", "gender": "male"},
|
||||||
|
{"known_character": "B", "gender": "female"},
|
||||||
|
{"known_character": "C", "gender": "unknown"},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert aggregates == [
|
||||||
|
{"gender": "female", "character_count": "1"},
|
||||||
|
{"gender": "male", "character_count": "1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
|
def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
|
||||||
"""Compter les nouveaux personnages par année en respectant la plage."""
|
"""Compter les nouveaux personnages par année en respectant la plage."""
|
||||||
sets_path = tmp_path / "sets_enriched.csv"
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
@ -268,6 +287,19 @@ def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None:
|
|||||||
assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n"
|
assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_character_gender_counts_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le CSV des comptes de personnages par genre."""
|
||||||
|
destination = tmp_path / "character_gender.csv"
|
||||||
|
rows = [
|
||||||
|
{"gender": "female", "character_count": "2"},
|
||||||
|
{"gender": "male", "character_count": "3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_character_gender_counts(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == "gender,character_count\nfemale,2\nmale,3\n"
|
||||||
|
|
||||||
|
|
||||||
def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
|
def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
|
||||||
"""Écrit le CSV des nouveaux personnages par année."""
|
"""Écrit le CSV des nouveaux personnages par année."""
|
||||||
destination = tmp_path / "new_characters.csv"
|
destination = tmp_path / "new_characters.csv"
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import matplotlib
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lib.plots.minifig_gender_share import plot_minifig_gender_share
|
from lib.plots.minifig_gender_share import plot_minifig_gender_share
|
||||||
|
from lib.plots.minifig_gender_share import plot_character_gender_share
|
||||||
|
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
@ -24,3 +25,19 @@ def test_plot_minifig_gender_share(tmp_path: Path) -> None:
|
|||||||
|
|
||||||
assert destination.exists()
|
assert destination.exists()
|
||||||
assert destination.stat().st_size > 0
|
assert destination.stat().st_size > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_character_gender_share(tmp_path: Path) -> None:
|
||||||
|
"""Génère le graphique de répartition par genre au niveau personnages."""
|
||||||
|
counts_path = tmp_path / "character_gender.csv"
|
||||||
|
destination = tmp_path / "figures" / "step25" / "minifig_character_gender_share.png"
|
||||||
|
counts_path.write_text(
|
||||||
|
"gender,character_count\n"
|
||||||
|
"male,3\n"
|
||||||
|
"female,2\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_character_gender_share(counts_path, destination)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user