From ad4479675933cb977e06b9fafffe50e7fd25a5a0 Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Wed, 3 Dec 2025 21:50:28 +0100 Subject: [PATCH] Ajoute le graphique variations vs total par personnage --- README.md | 5 +- lib/plots/minifig_characters.py | 85 ++++++++++++++++++++ lib/rebrickable/minifig_characters.py | 46 +++++++++++ scripts/plot_minifig_character_variations.py | 28 +++++++ tests/test_minifig_characters.py | 64 +++++++++++++++ tests/test_minifig_characters_plot.py | 17 ++++ 6 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 scripts/plot_minifig_character_variations.py diff --git a/README.md b/README.md index c150a83..894ed76 100644 --- a/README.md +++ b/README.md @@ -243,8 +243,11 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_ 1. `source .venv/bin/activate` 2. `python -m scripts.plot_minifig_characters` +3. `python -m scripts.plot_minifig_character_variations` -Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`. +Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`. + +Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figurants, calcule par personnage le nombre de variations (fig_num distincts) et le total réel de minifigs présentes dans les sets filtrés, sérialise `data/intermediate/minifig_character_variations_totals.csv`, puis trace `figures/step22/minifig_character_variations_totals.png` en superposant un fond neutre (total) et une jauge colorée (variations, couleur = genre). ### Étape 23 : présence annuelle des personnages diff --git a/lib/plots/minifig_characters.py b/lib/plots/minifig_characters.py index c98872d..d0c57d9 100644 --- a/lib/plots/minifig_characters.py +++ b/lib/plots/minifig_characters.py @@ -21,6 +21,11 @@ def load_presence(path: Path) -> List[dict]: return read_rows(path) +def load_variations_and_totals(path: Path) -> List[dict]: + """Charge le CSV comparatif variations/total par personnage.""" + return read_rows(path) + + def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None: """Trace un diagramme en barres horizontales du nombre de minifigs par personnage.""" rows = load_counts(counts_path) @@ -68,6 +73,86 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No plt.close(fig) +def plot_character_variations_vs_total(counts_path: Path, destination_path: Path) -> None: + """Superpose le total de minifigs et leurs variations distinctes par personnage.""" + rows = load_variations_and_totals(counts_path) + if not rows: + return + characters = [row["known_character"] for row in rows] + variation_counts = [int(row["variation_count"]) for row in rows] + total_counts = [int(row["total_minifigs"]) for row in rows] + genders = [row.get("gender", "") for row in rows] + gender_colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] + positions = list(range(len(rows))) + height = max(6, len(rows) * 0.24) + background_color = "#d7d7e0" + + fig, ax = plt.subplots(figsize=(12.4, height)) + bars_total = ax.barh( + positions, + total_counts, + color=background_color, + edgecolor="#0d0d0d", + linewidth=0.6, + height=0.6, + label="Total de minifigs", + ) + bars_variations = ax.barh( + positions, + variation_counts, + color=gender_colors, + edgecolor="#0d0d0d", + linewidth=0.8, + height=0.36, + label="Variations distinctes", + ) + ax.set_yticks(positions) + ax.set_yticklabels(characters) + ax.invert_yaxis() + ax.set_xlabel("Nombre de minifigs") + ax.set_title("Variations et total de minifigs par personnage (hors figurants)") + ax.grid(True, axis="x", linestyle="--", alpha=0.25) + max_value = max(total_counts) if total_counts else 0 + ax.set_xlim(0, max_value + 1) + + for index, bar in enumerate(bars_total): + value = total_counts[index] + ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#1a1a1a") + for index, bar in enumerate(bars_variations): + value = variation_counts[index] + ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#0d0d0d") + + legend_entries = [ + Patch(facecolor=background_color, edgecolor="#0d0d0d", linewidth=0.6, label="Total de minifigs"), + Patch( + facecolor=GENDER_COLORS["unknown"], + edgecolor="#0d0d0d", + linewidth=0.8, + label="Variations distinctes (couleur = genre)", + ), + ] + seen = set() + for gender, color in zip(genders, gender_colors): + normalized = gender.strip().lower() + if normalized in seen: + continue + seen.add(normalized) + legend_entries.append( + Patch( + facecolor=color, + edgecolor="#0d0d0d", + linewidth=0.6, + label=GENDER_LABELS.get(normalized, "Inconnu"), + ) + ) + ax.legend(handles=legend_entries, loc="lower right") + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=160) + plt.close(fig) + + def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None: """Trace une heatmap indiquant le nombre de minifigs par personnage et par année.""" rows = load_presence(presence_path) diff --git a/lib/rebrickable/minifig_characters.py b/lib/rebrickable/minifig_characters.py index c03f9b0..843246b 100644 --- a/lib/rebrickable/minifig_characters.py +++ b/lib/rebrickable/minifig_characters.py @@ -34,6 +34,41 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]: return aggregates +def aggregate_variations_and_totals( + rows: Iterable[dict], + excluded_characters: Sequence[str] | None = None, +) -> List[dict]: + """Compte les variations uniques et le total de minifigs par personnage.""" + excluded = set(excluded_characters or []) + variations: Dict[str, set] = defaultdict(set) + totals: Dict[str, int] = defaultdict(int) + genders: Dict[str, str] = {} + for row in rows: + character = row["known_character"].strip() + fig_num = row["fig_num"].strip() + gender = row.get("gender", "").strip() + if character == "" or fig_num == "": + continue + if character in excluded: + continue + variations[character].add(fig_num) + totals[character] += 1 + if character not in genders: + genders[character] = gender + aggregates: List[dict] = [] + for character, fig_nums in variations.items(): + aggregates.append( + { + "known_character": character, + "gender": genders.get(character, ""), + "variation_count": len(fig_nums), + "total_minifigs": totals.get(character, 0), + } + ) + aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"])) + return aggregates + + def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par genre (fig_num unique).""" genders_by_fig: Dict[str, str] = {} @@ -78,6 +113,17 @@ def write_gender_counts(path: Path, rows: Sequence[dict]) -> None: writer.writerow(row) +def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None: + """Écrit le CSV comparant variations et total par personnage.""" + ensure_parent_dir(path) + fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"] + with path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + + def load_sets_enriched(path: Path) -> Dict[str, str]: """Indexe les années par set_num.""" lookup: Dict[str, str] = {} diff --git a/scripts/plot_minifig_character_variations.py b/scripts/plot_minifig_character_variations.py new file mode 100644 index 0000000..33ad157 --- /dev/null +++ b/scripts/plot_minifig_character_variations.py @@ -0,0 +1,28 @@ +"""Trace le total de minifigs et leurs variations distinctes par personnage.""" + +from pathlib import Path + +from lib.plots.minifig_characters import plot_character_variations_vs_total +from lib.rebrickable.minifig_characters import ( + aggregate_variations_and_totals, + load_minifigs_by_set, + write_character_variations_totals, +) + + +MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") +COUNTS_PATH = Path("data/intermediate/minifig_character_variations_totals.csv") +DESTINATION_PATH = Path("figures/step22/minifig_character_variations_totals.png") +EXCLUDED_CHARACTERS = ["Figurant"] + + +def main() -> None: + """Construit le comparatif variations/total et trace le graphique associé.""" + rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) + aggregates = aggregate_variations_and_totals(rows, excluded_characters=EXCLUDED_CHARACTERS) + write_character_variations_totals(COUNTS_PATH, aggregates) + plot_character_variations_vs_total(COUNTS_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_characters.py b/tests/test_minifig_characters.py index 817030a..4fc9140 100644 --- a/tests/test_minifig_characters.py +++ b/tests/test_minifig_characters.py @@ -5,10 +5,12 @@ from pathlib import Path from lib.rebrickable.minifig_characters import ( aggregate_by_character, aggregate_by_gender, + aggregate_variations_and_totals, aggregate_character_spans, aggregate_presence_by_year, load_sets_enriched, write_character_counts, + write_character_variations_totals, write_gender_counts, ) @@ -68,6 +70,55 @@ def test_aggregate_by_character_counts_unique_figs() -> None: ] +def test_aggregate_variations_and_totals_excludes_figurants() -> None: + """Compter le total et les variations en excluant les figurants.""" + aggregates = aggregate_variations_and_totals( + [ + { + "set_num": "123-1", + "part_num": "head-a", + "known_character": "Owen Grady", + "fig_num": "fig-owen-1", + "gender": "male", + }, + { + "set_num": "124-1", + "part_num": "head-b", + "known_character": "Owen Grady", + "fig_num": "fig-owen-1", + "gender": "male", + }, + { + "set_num": "125-1", + "part_num": "head-c", + "known_character": "Owen Grady", + "fig_num": "fig-owen-2", + "gender": "male", + }, + { + "set_num": "126-1", + "part_num": "head-d", + "known_character": "Ellie Sattler", + "fig_num": "fig-ellie-1", + "gender": "female", + }, + { + "set_num": "127-1", + "part_num": "head-e", + "known_character": "Figurant", + "fig_num": "fig-guard-1", + "gender": "unknown", + }, + ], + excluded_characters=["Figurant"], + ) + + assert aggregates == [ + {"known_character": "Owen Grady", "gender": "male", "variation_count": 2, "total_minifigs": 3}, + {"known_character": "Ellie Sattler", "gender": "female", "variation_count": 1, "total_minifigs": 1}, + ] + + def test_aggregate_by_gender_counts_unique_figs() -> None: """Compter les minifigs distinctes par genre.""" aggregates = aggregate_by_gender( @@ -112,6 +163,19 @@ def test_write_gender_counts_outputs_csv(tmp_path: Path) -> None: assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n" +def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None: + """Écrit le CSV comparatif variations/total.""" + destination = tmp_path / "variations.csv" + rows = [ + {"known_character": "A", "gender": "male", "variation_count": 2, "total_minifigs": 3}, + {"known_character": "B", "gender": "female", "variation_count": 1, "total_minifigs": 1}, + ] + + write_character_variations_totals(destination, rows) + + assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n" + + def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: """Calcule le total annuel en excluant les figurants.""" sets_path = tmp_path / "sets_enriched.csv" diff --git a/tests/test_minifig_characters_plot.py b/tests/test_minifig_characters_plot.py index f6c5e6a..c11bb1a 100644 --- a/tests/test_minifig_characters_plot.py +++ b/tests/test_minifig_characters_plot.py @@ -4,6 +4,7 @@ import matplotlib from pathlib import Path from lib.plots.minifig_characters import plot_minifigs_per_character +from lib.plots.minifig_characters import plot_character_variations_vs_total matplotlib.use("Agg") @@ -23,3 +24,19 @@ def test_plot_minifigs_per_character(tmp_path: Path) -> None: assert destination.exists() assert destination.stat().st_size > 0 + + +def test_plot_character_variations_vs_total(tmp_path: Path) -> None: + """Génère l'image comparant total et variations par personnage.""" + counts_path = tmp_path / "variations.csv" + destination = tmp_path / "figures" / "step22" / "minifig_character_variations_totals.png" + counts_path.write_text( + "known_character,gender,variation_count,total_minifigs\n" + "Owen Grady,male,2,3\n" + "Ellie Sattler,female,1,2\n" + ) + + plot_character_variations_vs_total(counts_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0