1

Ajoute le diagramme de longévité des personnages

This commit is contained in:
Richard Dern 2025-12-02 11:01:01 +01:00
parent 4d37a654f2
commit f019deeef5
6 changed files with 210 additions and 4 deletions

View File

@ -246,4 +246,11 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minif
1. `source .venv/bin/activate` 1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifig_characters_timeline` 2. `python -m scripts.plot_minifig_characters_timeline`
Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, construit une matrice binaire personnage × année (hors figurants) enregistrée dans `data/intermediate/minifig_characters_year_presence.csv`, puis trace `figures/step22/minifig_characters_timeline.png` (heatmap binaire). Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule le nombre de minifigs par personnage et par année (hors figurants) dans `data/intermediate/minifig_characters_year_presence.csv`, puis trace `figures/step22/minifig_characters_timeline.png` (heatmap avec volumes).
### Étape 24 : longévité des personnages
1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifig_character_spans`
Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step23/minifig_character_spans.png` (barres horizontales des spans).

View File

@ -0,0 +1,63 @@
"""Diagramme de longévité des personnages (bornes d'apparition)."""
from pathlib import Path
from typing import List
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_spans(path: Path) -> List[dict]:
"""Charge le CSV des bornes min/max par personnage."""
return read_rows(path)
def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
"""Trace un diagramme en barres représentant la longévité des personnages."""
rows = load_spans(spans_path)
if not rows:
return
characters = [row["known_character"] for row in rows]
starts = [int(row["start_year"]) for row in rows]
ends = [int(row["end_year"]) for row in rows]
counts = [int(row["total_minifigs"]) for row in rows]
positions = list(range(len(rows)))
widths = [end - start + 1 for start, end in zip(starts, ends)]
min_year = min(starts)
max_year = max(ends)
height = max(5, len(rows) * 0.3)
fig, ax = plt.subplots(figsize=(12, height))
bars = ax.barh(
positions,
widths,
left=starts,
color="#1f77b4",
edgecolor="#0d0d0d",
linewidth=0.6,
)
ax.set_yticks(positions)
ax.set_yticklabels(characters)
ax.set_xlabel("Années d'apparition")
ax.set_ylabel("Personnage")
ax.set_title("Longévité des personnages (première à dernière apparition)")
ax.set_xlim(min_year - 1, max_year + 1)
ax.grid(True, axis="x", linestyle="--", alpha=0.25)
for bar, start, end, count in zip(bars, starts, ends, counts):
label = f"{start}{end} ({count})" if start != end else f"{start} ({count})"
ax.text(
start + (end - start) / 2,
bar.get_y() + bar.get_height() / 2,
label,
ha="center",
va="center",
fontsize=8,
color="#0d0d0d",
)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@ -2,7 +2,7 @@
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import Dict, Iterable, List, Sequence from typing import Dict, Iterable, List, Sequence, Set
from lib.rebrickable.stats import read_rows from lib.rebrickable.stats import read_rows
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
@ -99,3 +99,55 @@ def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None:
writer.writeheader() writer.writeheader()
for row in rows: for row in rows:
writer.writerow(row) writer.writerow(row)
def aggregate_character_spans(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
) -> List[dict]:
"""Calcule la période d'apparition de chaque personnage (bornes min/max des années observées)."""
excluded = set(excluded_characters or [])
spans: Dict[str, Dict[str, int]] = {}
total_counts: Dict[str, int] = defaultdict(int)
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year = sets_years.get(row["set_num"])
if year is None:
continue
year_int = int(year)
total_counts[character] += 1
current = spans.get(character)
if current is None:
spans[character] = {"start": year_int, "end": year_int}
else:
spans[character]["start"] = min(current["start"], year_int)
spans[character]["end"] = max(current["end"], year_int)
results: List[dict] = []
for character, bounds in spans.items():
results.append(
{
"known_character": character,
"start_year": str(bounds["start"]),
"end_year": str(bounds["end"]),
"total_minifigs": str(total_counts[character]),
}
)
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
return results
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des bornes min/max par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,31 @@
"""Trace la longévité des personnages représentés par les minifigs."""
from pathlib import Path
from lib.plots.minifig_character_spans import plot_character_spans
from lib.rebrickable.minifig_characters import (
aggregate_character_spans,
load_minifigs_by_set,
load_sets_enriched,
write_character_spans,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
SPANS_PATH = Path("data/intermediate/minifig_character_spans.csv")
DESTINATION_PATH = Path("figures/step23/minifig_character_spans.png")
EXCLUDED_CHARACTERS = ["Figurant"]
def main() -> None:
"""Construit le CSV des bornes et trace le diagramme de longévité."""
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
spans = aggregate_character_spans(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS)
write_character_spans(SPANS_PATH, spans)
plot_character_spans(SPANS_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,25 @@
"""Tests du diagramme de longévité des personnages."""
import matplotlib
from pathlib import Path
from lib.plots.minifig_character_spans import plot_character_spans
matplotlib.use("Agg")
def test_plot_character_spans(tmp_path: Path) -> None:
"""Génère le graphique de span des personnages."""
spans_path = tmp_path / "minifig_character_spans.csv"
destination = tmp_path / "figures" / "step23" / "minifig_character_spans.png"
spans_path.write_text(
"known_character,start_year,end_year,total_minifigs\n"
"Owen Grady,2020,2022,3\n"
"Figurant,2019,2020,2\n"
)
plot_character_spans(spans_path, destination)
assert destination.exists()
assert destination.stat().st_size > 0

View File

@ -2,8 +2,13 @@
from pathlib import Path from pathlib import Path
from lib.rebrickable.minifig_characters import aggregate_by_character, write_character_counts from lib.rebrickable.minifig_characters import (
from lib.rebrickable.minifig_characters import aggregate_presence_by_year, write_presence_by_year, load_sets_enriched aggregate_by_character,
aggregate_character_spans,
aggregate_presence_by_year,
load_sets_enriched,
write_character_counts,
)
def test_aggregate_by_character_counts_unique_figs() -> None: def test_aggregate_by_character_counts_unique_figs() -> None:
@ -58,3 +63,26 @@ def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
{"known_character": "Owen Grady", "year": "2020", "minifig_count": "1"}, {"known_character": "Owen Grady", "year": "2020", "minifig_count": "1"},
{"known_character": "Owen Grady", "year": "2021", "minifig_count": "0"}, {"known_character": "Owen Grady", "year": "2021", "minifig_count": "0"},
] ]
def test_aggregate_character_spans_excludes_figurants(tmp_path: Path) -> None:
"""Calcule les bornes min/max par personnage."""
sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text(
"set_num,year\n"
"123-1,2020\n"
"124-1,2021\n"
"125-1,2022\n"
)
sets_years = load_sets_enriched(sets_path)
minifigs_rows = [
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"},
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"},
{"set_num": "125-1", "known_character": "Figurant", "fig_num": "fig-guard", "part_num": "head-b"},
]
spans = aggregate_character_spans(minifigs_rows, sets_years, excluded_characters=["Figurant"])
assert spans == [
{"known_character": "Owen Grady", "start_year": "2020", "end_year": "2021", "total_minifigs": "2"},
]