diff --git a/README.md b/README.md index a65206d..67c61e2 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ Cette étape se lance après le téléchargement des données d'inventaire (éta 1. `source .venv/bin/activate` 2. `python -m scripts.compute_minifigs_by_set` -Le script lit l'inventaire agrégé `data/intermediate/parts_filtered.csv`, les inventaires `data/raw/inventories.csv`, `data/raw/inventory_parts.csv`, `data/raw/inventory_minifigs.csv`, le catalogue des pièces (`data/raw/parts.csv`) et celui des minifigs (`data/raw/minifigs.csv`). Il sélectionne les têtes de minifigs (catégorie 59), ignore les rechanges et dédoublonne par set et référence. Si une tête est associée à une minifig précise dans l'inventaire du set, `known_character` est renseigné avec le nom de la minifig et `fig_num` est indiqué ; sinon, `known_character` reste vide après tentative de correspondance automatique. Le CSV `data/intermediate/minifigs_by_set.csv` contient : `set_num`, `part_num`, `known_character`, `fig_num`. +Le script lit l'inventaire agrégé `data/intermediate/parts_filtered.csv`, les inventaires `data/raw/inventories.csv`, `data/raw/inventory_parts.csv`, `data/raw/inventory_minifigs.csv`, le catalogue des pièces (`data/raw/parts.csv`) et celui des minifigs (`data/raw/minifigs.csv`). Il sélectionne les têtes de minifigs (catégorie 59), ignore les rechanges et dédoublonne par set et référence. Si une tête est associée à une minifig précise dans l'inventaire du set, `known_character` est renseigné avec le nom de la minifig et `fig_num` est indiqué ; sinon, `known_character` reste vide après tentative de correspondance automatique. Les correspondances d'alias sont décrites dans `config/known_character_aliases.csv` et les genres des personnages nommés dans `config/known_character_genders.csv`. Le CSV `data/intermediate/minifigs_by_set.csv` contient : `set_num`, `part_num`, `known_character`, `fig_num`, `gender`. ### Étape 21 : visualiser le nombre de minifigs par set @@ -239,7 +239,7 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_ 1. `source .venv/bin/activate` 2. `python -m scripts.plot_minifig_characters` -Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). +Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`. ### Étape 23 : présence annuelle des personnages @@ -253,4 +253,4 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets 1. `source .venv/bin/activate` 2. `python -m scripts.plot_minifig_character_spans` -Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans). +Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans). Les barres sont colorées selon le genre issu de `config/known_character_genders.csv`. diff --git a/config/known_character_genders.csv b/config/known_character_genders.csv new file mode 100644 index 0000000..19dc81c --- /dev/null +++ b/config/known_character_genders.csv @@ -0,0 +1,54 @@ +known_character,gender +ACU Trooper,unknown +Allison Miles,female +Alan Grant,male +Atwater,male +Barry,male +Ben,male +Brooklynn,female +Claire Dearing,female +Danny Nedermeyer,male +Darius,male +Dennis Nedry,male +Donald Gennaro,male +Dr Wu,male +Duncan Kincaid,male +Eli Mills,male +Ellie Sattler,female +Figurant,unknown +Franklin Web,male +Franklin Webb,male +Gray,male +Gray Mitchell,male +Gunnar Eversol,male +Henry Loomis,male +Henry Wu,male +Hudson Harper,male +Ian Malcolm,male +Isabella Delgado,female +John Hammond,male +Kayla Watts,female +Ken Wheatley,male +Kenji,male +Lex Murphy,female +LeClerc,male +Maisie Lockwood,female +Martin Krebs,male +Owen Grady,male +Rainn DeLaCourt,male +Ray Arnold,male +Reuben Delgado,male +Robert Muldoon,male +Sammy,female +Simon Masrani,male +Sinjin Prescott,male +Soyona Santos,female +Teresa Delgado,female +Tim Murphy,male +Vic Hoskins,male +Xavier Dobbs,male +Yaz,female +Zach,male +Zach Mitchell,male +Zia Rodriguez,female +Zora Bennett,female diff --git a/lib/plots/minifig_character_spans.py b/lib/plots/minifig_character_spans.py index 89822fa..fdf9608 100644 --- a/lib/plots/minifig_character_spans.py +++ b/lib/plots/minifig_character_spans.py @@ -1,20 +1,35 @@ """Diagramme de longévité des personnages (bornes d'apparition).""" from pathlib import Path -from typing import List +from typing import Dict, List import matplotlib.pyplot as plt +from matplotlib.patches import Patch from lib.filesystem import ensure_parent_dir +from lib.milestones import load_milestones from lib.rebrickable.stats import read_rows +GENDER_COLORS = { + "male": "#4c72b0", + "female": "#c44e52", + "unknown": "#7f7f7f", +} +GENDER_LABELS = { + "male": "Homme", + "female": "Femme", + "unknown": "Inconnu", + "": "Inconnu", +} + + def load_spans(path: Path) -> List[dict]: """Charge le CSV des bornes min/max par personnage.""" return read_rows(path) -def plot_character_spans(spans_path: Path, destination_path: Path) -> None: +def plot_character_spans(spans_path: Path, destination_path: Path, milestones_path: Path | None = None) -> None: """Trace un diagramme en barres représentant la longévité des personnages.""" rows = load_spans(spans_path) if not rows: @@ -23,18 +38,22 @@ def plot_character_spans(spans_path: Path, destination_path: Path) -> None: starts = [int(row["start_year"]) for row in rows] ends = [int(row["end_year"]) for row in rows] counts = [int(row["total_minifigs"]) for row in rows] + genders = [row.get("gender", "") for row in rows] + colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] positions = list(range(len(rows))) widths = [end - start + 1 for start, end in zip(starts, ends)] min_year = min(starts) max_year = max(ends) height = max(5, len(rows) * 0.3) + milestones = load_milestones(milestones_path) if milestones_path else [] + fig, ax = plt.subplots(figsize=(12, height)) bars = ax.barh( positions, widths, left=starts, - color="#1f77b4", + color=colors, edgecolor="#0d0d0d", linewidth=0.6, ) @@ -57,6 +76,55 @@ def plot_character_spans(spans_path: Path, destination_path: Path) -> None: color="#0d0d0d", ) + legend_entries = [] + seen = set() + for gender in genders: + normalized = gender.strip().lower() + if normalized in seen: + continue + seen.add(normalized) + legend_entries.append( + Patch( + facecolor=GENDER_COLORS.get(normalized, GENDER_COLORS["unknown"]), + edgecolor="#0d0d0d", + linewidth=0.6, + label=GENDER_LABELS.get(normalized, "Inconnu"), + ) + ) + if legend_entries: + ax.legend(handles=legend_entries, title="Genre", loc="lower right") + + if milestones: + milestones_in_range = sorted( + [m for m in milestones if min_year <= m["year"] <= max_year], + key=lambda m: (m["year"], m["description"]), + ) + milestone_offsets: Dict[int, int] = {} + offset_step = 0.2 + max_offset = 0 + y_bottom, y_top = ax.get_ylim() + text_y = y_top - (y_top - y_bottom) * 0.01 + for milestone in milestones_in_range: + year = milestone["year"] + count_for_year = milestone_offsets.get(year, 0) + milestone_offsets[year] = count_for_year + 1 + horizontal_offset = offset_step * (count_for_year // 2 + 1) + max_offset = max(max_offset, count_for_year) + if count_for_year % 2 == 1: + horizontal_offset *= -1 + text_x = year + horizontal_offset + ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65) + ax.text( + text_x, + text_y, + milestone["description"], + rotation=90, + verticalalignment="top", + horizontalalignment="center", + fontsize=8, + color="#d62728", + ) + ensure_parent_dir(destination_path) fig.tight_layout() fig.savefig(destination_path, dpi=160) diff --git a/lib/plots/minifig_characters.py b/lib/plots/minifig_characters.py index a3e0146..3cd0f9f 100644 --- a/lib/plots/minifig_characters.py +++ b/lib/plots/minifig_characters.py @@ -4,11 +4,25 @@ from pathlib import Path from typing import List import matplotlib.pyplot as plt +from matplotlib.patches import Patch from lib.filesystem import ensure_parent_dir from lib.rebrickable.stats import read_rows +GENDER_COLORS = { + "male": "#4c72b0", + "female": "#c44e52", + "unknown": "#7f7f7f", +} +GENDER_LABELS = { + "male": "Homme", + "female": "Femme", + "unknown": "Inconnu", + "": "Inconnu", +} + + def load_counts(path: Path) -> List[dict]: """Charge le CSV des comptes par personnage.""" return read_rows(path) @@ -24,11 +38,13 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No rows = load_counts(counts_path) characters = [row["known_character"] for row in rows] counts = [int(row["minifig_count"]) for row in rows] + genders = [row.get("gender", "") for row in rows] + colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] positions = list(range(len(rows))) height = max(6, len(rows) * 0.22) fig, ax = plt.subplots(figsize=(12, height)) - bars = ax.barh(positions, counts, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.6) + bars = ax.barh(positions, counts, color=colors, edgecolor="#0d0d0d", linewidth=0.6) ax.set_yticks(positions) ax.set_yticklabels(characters) ax.invert_yaxis() @@ -40,6 +56,23 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No for index, bar in enumerate(bars): value = counts[index] ax.text(value + 0.1, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8) + legend_entries = [] + seen = set() + for gender in genders: + normalized = gender.strip().lower() + if normalized in seen: + continue + seen.add(normalized) + legend_entries.append( + Patch( + facecolor=GENDER_COLORS.get(normalized, GENDER_COLORS["unknown"]), + edgecolor="#0d0d0d", + linewidth=0.6, + label=GENDER_LABELS.get(normalized, "Inconnu"), + ) + ) + if legend_entries: + ax.legend(handles=legend_entries, title="Genre", loc="lower right") ensure_parent_dir(destination_path) fig.tight_layout() diff --git a/lib/rebrickable/minifig_characters.py b/lib/rebrickable/minifig_characters.py index 6c3fcf0..4fae13e 100644 --- a/lib/rebrickable/minifig_characters.py +++ b/lib/rebrickable/minifig_characters.py @@ -1,12 +1,12 @@ """Agrégation des minifigs par personnage représenté.""" +import csv from collections import defaultdict from pathlib import Path from typing import Dict, Iterable, List, Sequence, Set -from lib.rebrickable.stats import read_rows from lib.filesystem import ensure_parent_dir -import csv +from lib.rebrickable.stats import read_rows def load_minifigs_by_set(path: Path) -> List[dict]: @@ -15,17 +15,21 @@ def load_minifigs_by_set(path: Path) -> List[dict]: def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: - """Compte les minifigs distinctes par personnage (fig_num unique).""" + """Compte les minifigs distinctes par personnage (fig_num unique) avec genre.""" fig_nums_by_character: Dict[str, set] = defaultdict(set) + genders: Dict[str, str] = {} for row in rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() + gender = row.get("gender", "").strip() if character == "" or fig_num == "": continue fig_nums_by_character[character].add(fig_num) + if character not in genders: + genders[character] = gender aggregates: List[dict] = [] for character, fig_nums in fig_nums_by_character.items(): - aggregates.append({"known_character": character, "minifig_count": len(fig_nums)}) + aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)}) aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"])) return aggregates @@ -33,7 +37,7 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: def write_character_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par personnage.""" ensure_parent_dir(path) - fieldnames = ["known_character", "minifig_count"] + fieldnames = ["known_character", "gender", "minifig_count"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() @@ -110,9 +114,11 @@ def aggregate_character_spans( excluded = set(excluded_characters or []) spans: Dict[str, Dict[str, int]] = {} total_counts: Dict[str, int] = defaultdict(int) + genders: Dict[str, str] = {} for row in minifigs_rows: character = row["known_character"].strip() fig_num = row["fig_num"].strip() + gender = row.get("gender", "").strip() if character == "" or fig_num == "": continue if character in excluded: @@ -122,6 +128,8 @@ def aggregate_character_spans( continue year_int = int(year) total_counts[character] += 1 + if character not in genders: + genders[character] = gender current = spans.get(character) if current is None: spans[character] = {"start": year_int, "end": year_int} @@ -136,6 +144,7 @@ def aggregate_character_spans( "start_year": str(bounds["start"]), "end_year": str(bounds["end"]), "total_minifigs": str(total_counts[character]), + "gender": genders.get(character, ""), } ) results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"])) @@ -145,7 +154,7 @@ def aggregate_character_spans( def write_character_spans(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des bornes min/max par personnage.""" ensure_parent_dir(path) - fieldnames = ["known_character", "start_year", "end_year", "total_minifigs"] + fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"] with path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() diff --git a/lib/rebrickable/minifigs_by_set.py b/lib/rebrickable/minifigs_by_set.py index 40feca3..fd508dd 100644 --- a/lib/rebrickable/minifigs_by_set.py +++ b/lib/rebrickable/minifigs_by_set.py @@ -128,6 +128,16 @@ def load_aliases(path: Path) -> Dict[str, str]: return aliases +def load_gender_overrides(path: Path) -> Dict[str, str]: + """Charge les correspondances personnage -> genre.""" + overrides: Dict[str, str] = {} + with path.open() as gender_file: + reader = csv.DictReader(gender_file) + for row in reader: + overrides[row["known_character"].lower()] = row["gender"] + return overrides + + def normalize_known_character(raw_known: str, extracted_name: str, aliases: Dict[str, str]) -> str: """Nettoie et mappe un nom vers une version canonique.""" base = raw_known or extracted_name @@ -201,6 +211,7 @@ def aggregate_heads_by_set( minifig_heads: Dict[str, Set[str]], minifig_catalog: Dict[str, dict], aliases: Dict[str, str], + gender_overrides: Dict[str, str], ) -> List[dict]: """Agrège les têtes de minifigs par set en éliminant les rechanges et doublons.""" seen: Set[Tuple[str, str]] = set() @@ -228,12 +239,14 @@ def aggregate_heads_by_set( normalized = normalize_known_character(known_character, extracted, aliases) if matched_fig == "": continue + gender = gender_overrides.get(normalized.lower(), "") heads.append( { "set_num": row["set_num"], "part_num": row["part_num"], "known_character": normalized, "fig_num": matched_fig, + "gender": gender, } ) seen.add(key) @@ -244,7 +257,7 @@ def aggregate_heads_by_set( def write_heads_by_set(destination_path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV intermédiaire listant les têtes de minifigs par set.""" ensure_parent_dir(destination_path) - fieldnames = ["set_num", "part_num", "known_character", "fig_num"] + fieldnames = ["set_num", "part_num", "known_character", "fig_num", "gender"] with destination_path.open("w", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() @@ -260,6 +273,7 @@ def build_minifigs_by_set( inventory_minifigs_path: Path, minifigs_path: Path, aliases_path: Path, + gender_overrides_path: Path, destination_path: Path, ) -> None: """Construit le CSV listant les têtes de minifigs présentes par set.""" @@ -271,6 +285,7 @@ def build_minifigs_by_set( minifig_heads = build_minifig_heads_lookup(minifig_catalog, latest_inventories, inventory_parts_path, head_parts) set_minifigs = build_set_minifigs_lookup(latest_inventories, inventory_minifigs_path) aliases = load_aliases(aliases_path) + gender_overrides = load_gender_overrides(gender_overrides_path) heads = aggregate_heads_by_set( parts_rows, parts_catalog, @@ -279,5 +294,6 @@ def build_minifigs_by_set( minifig_heads, minifig_catalog, aliases, + gender_overrides, ) write_heads_by_set(destination_path, heads) diff --git a/scripts/compute_minifigs_by_set.py b/scripts/compute_minifigs_by_set.py index c454247..acdbeed 100644 --- a/scripts/compute_minifigs_by_set.py +++ b/scripts/compute_minifigs_by_set.py @@ -12,6 +12,7 @@ INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv") INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv") MINIFIGS_PATH = Path("data/raw/minifigs.csv") ALIASES_PATH = Path("config/known_character_aliases.csv") +GENDERS_PATH = Path("config/known_character_genders.csv") DESTINATION_PATH = Path("data/intermediate/minifigs_by_set.csv") @@ -25,6 +26,7 @@ def main() -> None: INVENTORY_MINIFIGS_PATH, MINIFIGS_PATH, ALIASES_PATH, + GENDERS_PATH, DESTINATION_PATH, ) diff --git a/scripts/plot_minifig_character_spans.py b/scripts/plot_minifig_character_spans.py index 14ab0cd..423303b 100644 --- a/scripts/plot_minifig_character_spans.py +++ b/scripts/plot_minifig_character_spans.py @@ -15,6 +15,7 @@ MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv") SPANS_PATH = Path("data/intermediate/minifig_character_spans.csv") DESTINATION_PATH = Path("figures/step24/minifig_character_spans.png") +MILESTONES_PATH = Path("config/milestones.csv") EXCLUDED_CHARACTERS = ["Figurant"] @@ -24,7 +25,7 @@ def main() -> None: sets_years = load_sets_enriched(SETS_ENRICHED_PATH) spans = aggregate_character_spans(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS) write_character_spans(SPANS_PATH, spans) - plot_character_spans(SPANS_PATH, DESTINATION_PATH) + plot_character_spans(SPANS_PATH, DESTINATION_PATH, milestones_path=MILESTONES_PATH) if __name__ == "__main__": diff --git a/tests/test_minifig_character_spans_plot.py b/tests/test_minifig_character_spans_plot.py index f571fcc..edb3a2f 100644 --- a/tests/test_minifig_character_spans_plot.py +++ b/tests/test_minifig_character_spans_plot.py @@ -12,11 +12,11 @@ matplotlib.use("Agg") def test_plot_character_spans(tmp_path: Path) -> None: """Génère le graphique de span des personnages.""" spans_path = tmp_path / "minifig_character_spans.csv" - destination = tmp_path / "figures" / "step23" / "minifig_character_spans.png" + destination = tmp_path / "figures" / "step24" / "minifig_character_spans.png" spans_path.write_text( - "known_character,start_year,end_year,total_minifigs\n" - "Owen Grady,2020,2022,3\n" - "Figurant,2019,2020,2\n" + "known_character,start_year,end_year,total_minifigs,gender\n" + "Owen Grady,2020,2022,3,male\n" + "Figurant,2019,2020,2,unknown\n" ) plot_character_spans(spans_path, destination) diff --git a/tests/test_minifig_characters.py b/tests/test_minifig_characters.py index 9460312..1f80b26 100644 --- a/tests/test_minifig_characters.py +++ b/tests/test_minifig_characters.py @@ -15,18 +15,54 @@ def test_aggregate_by_character_counts_unique_figs() -> None: """Compter les minifigs distinctes par personnage en excluant les noms vides.""" aggregates = aggregate_by_character( [ - {"set_num": "123-1", "part_num": "head-a", "known_character": "Owen Grady", "fig_num": "fig-owen-1"}, - {"set_num": "124-1", "part_num": "head-b", "known_character": "Owen Grady", "fig_num": "fig-owen-1"}, - {"set_num": "125-1", "part_num": "head-c", "known_character": "Owen Grady", "fig_num": "fig-owen-2"}, - {"set_num": "126-1", "part_num": "head-d", "known_character": "Figurant", "fig_num": "fig-guard-1"}, - {"set_num": "128-1", "part_num": "head-f", "known_character": "Figurant", "fig_num": "fig-guard-1"}, - {"set_num": "129-1", "part_num": "head-g", "known_character": "", "fig_num": "fig-guard-2"}, + { + "set_num": "123-1", + "part_num": "head-a", + "known_character": "Owen Grady", + "fig_num": "fig-owen-1", + "gender": "male", + }, + { + "set_num": "124-1", + "part_num": "head-b", + "known_character": "Owen Grady", + "fig_num": "fig-owen-1", + "gender": "male", + }, + { + "set_num": "125-1", + "part_num": "head-c", + "known_character": "Owen Grady", + "fig_num": "fig-owen-2", + "gender": "male", + }, + { + "set_num": "126-1", + "part_num": "head-d", + "known_character": "Figurant", + "fig_num": "fig-guard-1", + "gender": "unknown", + }, + { + "set_num": "128-1", + "part_num": "head-f", + "known_character": "Figurant", + "fig_num": "fig-guard-1", + "gender": "unknown", + }, + { + "set_num": "129-1", + "part_num": "head-g", + "known_character": "", + "fig_num": "fig-guard-2", + "gender": "unknown", + }, ] ) assert aggregates == [ - {"known_character": "Owen Grady", "minifig_count": 2}, - {"known_character": "Figurant", "minifig_count": 1}, + {"known_character": "Owen Grady", "gender": "male", "minifig_count": 2}, + {"known_character": "Figurant", "gender": "unknown", "minifig_count": 1}, ] @@ -34,13 +70,13 @@ def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: """Écrit le CSV des comptes par personnage.""" destination = tmp_path / "counts.csv" rows = [ - {"known_character": "A", "minifig_count": 2}, - {"known_character": "B", "minifig_count": 1}, + {"known_character": "A", "gender": "male", "minifig_count": 2}, + {"known_character": "B", "gender": "female", "minifig_count": 1}, ] write_character_counts(destination, rows) - assert destination.read_text() == "known_character,minifig_count\nA,2\nB,1\n" + assert destination.read_text() == "known_character,gender,minifig_count\nA,male,2\nB,female,1\n" def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: @@ -52,8 +88,20 @@ def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: "124-1,2021\n" ) minifigs_rows = [ - {"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"}, - {"set_num": "124-1", "known_character": "Figurant", "fig_num": "fig-guard", "part_num": "head-b"}, + { + "set_num": "123-1", + "known_character": "Owen Grady", + "fig_num": "fig-owen", + "part_num": "head-a", + "gender": "male", + }, + { + "set_num": "124-1", + "known_character": "Figurant", + "fig_num": "fig-guard", + "part_num": "head-b", + "gender": "unknown", + }, ] sets_years = load_sets_enriched(sets_path) @@ -76,13 +124,31 @@ def test_aggregate_character_spans_excludes_figurants(tmp_path: Path) -> None: ) sets_years = load_sets_enriched(sets_path) minifigs_rows = [ - {"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"}, - {"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"}, - {"set_num": "125-1", "known_character": "Figurant", "fig_num": "fig-guard", "part_num": "head-b"}, + { + "set_num": "123-1", + "known_character": "Owen Grady", + "fig_num": "fig-owen", + "part_num": "head-a", + "gender": "male", + }, + { + "set_num": "124-1", + "known_character": "Owen Grady", + "fig_num": "fig-owen", + "part_num": "head-a", + "gender": "male", + }, + { + "set_num": "125-1", + "known_character": "Figurant", + "fig_num": "fig-guard", + "part_num": "head-b", + "gender": "unknown", + }, ] spans = aggregate_character_spans(minifigs_rows, sets_years, excluded_characters=["Figurant"]) assert spans == [ - {"known_character": "Owen Grady", "start_year": "2020", "end_year": "2021", "total_minifigs": "2"}, + {"known_character": "Owen Grady", "start_year": "2020", "end_year": "2021", "total_minifigs": "2", "gender": "male"}, ] diff --git a/tests/test_minifig_characters_plot.py b/tests/test_minifig_characters_plot.py index c73ae6b..f6c5e6a 100644 --- a/tests/test_minifig_characters_plot.py +++ b/tests/test_minifig_characters_plot.py @@ -14,9 +14,9 @@ def test_plot_minifigs_per_character(tmp_path: Path) -> None: counts_path = tmp_path / "counts.csv" destination = tmp_path / "figures" / "step22" / "minifig_characters.png" counts_path.write_text( - "known_character,minifig_count\n" - "Owen Grady,2\n" - "Figurant,1\n" + "known_character,gender,minifig_count\n" + "Owen Grady,male,2\n" + "Figurant,unknown,1\n" ) plot_minifigs_per_character(counts_path, destination) diff --git a/tests/test_minifigs_by_set.py b/tests/test_minifigs_by_set.py index 88970cd..d7b8e0b 100644 --- a/tests/test_minifigs_by_set.py +++ b/tests/test_minifigs_by_set.py @@ -71,6 +71,13 @@ def test_build_minifigs_by_set_filters_spares_and_deduplicates(tmp_path) -> None "alias,canonical\n" "Guard in Helmet with Trans-Brown Visor,Figurant\n", ) + genders_path = tmp_path / "known_character_genders.csv" + write_csv( + genders_path, + "known_character,gender\n" + "Owen Grady,male\n" + "Figurant,unknown\n", + ) destination_path = tmp_path / "minifigs_by_set.csv" build_minifigs_by_set( @@ -81,12 +88,13 @@ def test_build_minifigs_by_set_filters_spares_and_deduplicates(tmp_path) -> None inventory_minifigs_path, minifigs_path, aliases_path, + genders_path, destination_path, ) assert destination_path.read_text() == ( - "set_num,part_num,known_character,fig_num\n" - "123-1,head-a,Owen Grady,fig-owen\n" - "123-1,head-b,Figurant,fig-guard\n" - "124-1,head-b,Figurant,fig-guard\n" + "set_num,part_num,known_character,fig_num,gender\n" + "123-1,head-a,Owen Grady,fig-owen,male\n" + "123-1,head-b,Figurant,fig-guard,unknown\n" + "124-1,head-b,Figurant,fig-guard,unknown\n" )