diff --git a/README.md b/README.md index 2623e19..af90b27 100644 --- a/README.md +++ b/README.md @@ -240,3 +240,10 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_ 2. `python -m scripts.plot_minifig_characters` Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). + +### Étape 23 : présence annuelle des personnages + +1. `source .venv/bin/activate` +2. `python -m scripts.plot_minifig_characters_timeline` + +Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, construit une matrice binaire personnage × année (hors figurants) enregistrée dans `data/intermediate/minifig_characters_year_presence.csv`, puis trace `figures/step22/minifig_characters_timeline.png` (heatmap binaire). diff --git a/lib/plots/minifig_characters.py b/lib/plots/minifig_characters.py index 2c43fde..4eccf89 100644 --- a/lib/plots/minifig_characters.py +++ b/lib/plots/minifig_characters.py @@ -14,6 +14,11 @@ def load_counts(path: Path) -> List[dict]: return read_rows(path) +def load_presence(path: Path) -> List[dict]: + """Charge le CSV de présence par année/personnage.""" + return read_rows(path) + + def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None: """Trace un diagramme en barres horizontales du nombre de minifigs par personnage.""" rows = load_counts(counts_path) @@ -40,3 +45,49 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No fig.tight_layout() fig.savefig(destination_path, dpi=160) plt.close(fig) + + +def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None: + """Trace une heatmap binaire indiquant la présence d'un personnage par année.""" + rows = load_presence(presence_path) + if not rows: + return + years = sorted({int(row["year"]) for row in rows}) + characters = sorted( + {row["known_character"] for row in rows}, + key=lambda name: ( + -sum(1 for r in rows if r["known_character"] == name and r["present"] == "1"), + name, + ), + ) + matrix = [] + for character in characters: + row_values = [] + for year in years: + present = next( + (r["present"] for r in rows if r["known_character"] == character and int(r["year"]) == year), + "0", + ) + row_values.append(int(present)) + matrix.append(row_values) + + height = max(5, len(characters) * 0.35) + fig, ax = plt.subplots(figsize=(12, height)) + cax = ax.imshow(matrix, aspect="auto", cmap="Greens", interpolation="nearest") + ax.set_xticks(range(len(years))) + ax.set_xticklabels(years, rotation=45, ha="right") + ax.set_yticks(range(len(characters))) + ax.set_yticklabels(characters) + ax.set_xlabel("Année") + ax.set_ylabel("Personnage") + ax.set_title("Présence des personnages par année (hors figurants)") + for i, character in enumerate(characters): + for j, year in enumerate(years): + value = matrix[i][j] + if value == 1: + ax.text(j, i, "●", ha="center", va="center", color="#0d0d0d", fontsize=7) + fig.colorbar(cax, ax=ax, fraction=0.046, pad=0.04, label="Présence (1 si minifig)") + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=160) + plt.close(fig) diff --git a/lib/rebrickable/minifig_characters.py b/lib/rebrickable/minifig_characters.py index 0d24b13..f3a7a69 100644 --- a/lib/rebrickable/minifig_characters.py +++ b/lib/rebrickable/minifig_characters.py @@ -39,3 +39,59 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None: writer.writeheader() for row in rows: writer.writerow(row) + + +def load_sets_enriched(path: Path) -> Dict[str, str]: + """Indexe les années par set_num.""" + lookup: Dict[str, str] = {} + with path.open() as sets_file: + reader = csv.DictReader(sets_file) + for row in reader: + lookup[row["set_num"]] = row["year"] + return lookup + + +def aggregate_presence_by_year( + minifigs_rows: Iterable[dict], + sets_years: Dict[str, str], + excluded_characters: Sequence[str] | None = None, +) -> List[dict]: + """Construit la présence binaire des personnages par année (hors figurants).""" + excluded = set(excluded_characters or []) + presence: set[tuple[str, int]] = set() + years_all = {int(year) for year in sets_years.values()} + for row in minifigs_rows: + character = row["known_character"].strip() + fig_num = row["fig_num"].strip() + if character == "" or fig_num == "": + continue + if character in excluded: + continue + year = sets_years.get(row["set_num"]) + if year is None: + continue + presence.add((character, int(year))) + years = sorted(years_all) + characters = sorted({character for character, _ in presence}) + results: List[dict] = [] + for character in characters: + for year in years: + results.append( + { + "known_character": character, + "year": str(year), + "present": "1" if (character, year) in presence else "0", + } + ) + return results + + +def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None: + """Écrit la matrice présence binaire année/personnage.""" + ensure_parent_dir(path) + fieldnames = ["known_character", "year", "present"] + with path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/plot_minifig_characters_timeline.py b/scripts/plot_minifig_characters_timeline.py new file mode 100644 index 0000000..98d9708 --- /dev/null +++ b/scripts/plot_minifig_characters_timeline.py @@ -0,0 +1,31 @@ +"""Trace la présence annuelle des personnages représentés par les minifigs (hors figurants).""" + +from pathlib import Path + +from lib.plots.minifig_characters import plot_character_year_presence +from lib.rebrickable.minifig_characters import ( + aggregate_presence_by_year, + load_minifigs_by_set, + load_sets_enriched, + write_presence_by_year, +) + + +MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") +SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv") +PRESENCE_PATH = Path("data/intermediate/minifig_characters_year_presence.csv") +DESTINATION_PATH = Path("figures/step22/minifig_characters_timeline.png") +EXCLUDED_CHARACTERS = ["Figurant"] + + +def main() -> None: + """Construit la présence par année et trace la heatmap binaire.""" + minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) + sets_years = load_sets_enriched(SETS_ENRICHED_PATH) + presence = aggregate_presence_by_year(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS) + write_presence_by_year(PRESENCE_PATH, presence) + plot_character_year_presence(PRESENCE_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_characters.py b/tests/test_minifig_characters.py index 9dd5944..f35e5c1 100644 --- a/tests/test_minifig_characters.py +++ b/tests/test_minifig_characters.py @@ -3,6 +3,7 @@ from pathlib import Path from lib.rebrickable.minifig_characters import aggregate_by_character, write_character_counts +from lib.rebrickable.minifig_characters import aggregate_presence_by_year, write_presence_by_year, load_sets_enriched def test_aggregate_by_character_counts_unique_figs() -> None: @@ -35,3 +36,25 @@ def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: write_character_counts(destination, rows) assert destination.read_text() == "known_character,minifig_count\nA,2\nB,1\n" + + +def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: + """Calcule la présence annuelle en excluant les figurants.""" + sets_path = tmp_path / "sets_enriched.csv" + sets_path.write_text( + "set_num,year\n" + "123-1,2020\n" + "124-1,2021\n" + ) + minifigs_rows = [ + {"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"}, + {"set_num": "124-1", "known_character": "Figurant", "fig_num": "fig-guard", "part_num": "head-b"}, + ] + sets_years = load_sets_enriched(sets_path) + + presence = aggregate_presence_by_year(minifigs_rows, sets_years, excluded_characters=["Figurant"]) + + assert presence == [ + {"known_character": "Owen Grady", "year": "2020", "present": "1"}, + {"known_character": "Owen Grady", "year": "2021", "present": "0"}, + ] diff --git a/tests/test_minifig_characters_timeline_plot.py b/tests/test_minifig_characters_timeline_plot.py new file mode 100644 index 0000000..74c96a1 --- /dev/null +++ b/tests/test_minifig_characters_timeline_plot.py @@ -0,0 +1,26 @@ +"""Tests du graphique de présence annuelle des personnages.""" + +import matplotlib +from pathlib import Path + +from lib.plots.minifig_characters import plot_character_year_presence + + +matplotlib.use("Agg") + + +def test_plot_character_year_presence(tmp_path: Path) -> None: + """Génère la heatmap binaire personnage × année.""" + presence_path = tmp_path / "minifig_characters_year_presence.csv" + destination = tmp_path / "figures" / "step22" / "minifig_characters_timeline.png" + presence_path.write_text( + "known_character,year,present\n" + "Owen Grady,2020,1\n" + "Owen Grady,2021,0\n" + "Figurant,2020,1\n" + ) + + plot_character_year_presence(presence_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0