1

Aère le graphique des nouveaux personnages

This commit is contained in:
Richard Dern 2025-12-03 22:29:19 +01:00
parent ad44796759
commit 46cef55a75
6 changed files with 240 additions and 1 deletions

View File

@ -244,11 +244,14 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
1. `source .venv/bin/activate` 1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifig_characters` 2. `python -m scripts.plot_minifig_characters`
3. `python -m scripts.plot_minifig_character_variations` 3. `python -m scripts.plot_minifig_character_variations`
4. `python -m scripts.plot_minifig_new_characters`
Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`. Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figurants, calcule par personnage le nombre de variations (fig_num distincts) et le total réel de minifigs présentes dans les sets filtrés, sérialise `data/intermediate/minifig_character_variations_totals.csv`, puis trace `figures/step22/minifig_character_variations_totals.png` en superposant un fond neutre (total) et une jauge colorée (variations, couleur = genre). Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figurants, calcule par personnage le nombre de variations (fig_num distincts) et le total réel de minifigs présentes dans les sets filtrés, sérialise `data/intermediate/minifig_character_variations_totals.csv`, puis trace `figures/step22/minifig_character_variations_totals.png` en superposant un fond neutre (total) et une jauge colorée (variations, couleur = genre).
Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`).
### Étape 23 : présence annuelle des personnages ### Étape 23 : présence annuelle des personnages
1. `source .venv/bin/activate` 1. `source .venv/bin/activate`

View File

@ -1,12 +1,13 @@
"""Graphique du nombre de minifigs par personnage.""" """Graphique du nombre de minifigs par personnage."""
from pathlib import Path from pathlib import Path
from typing import List from typing import Dict, List
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.patches import Patch from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
from lib.rebrickable.stats import read_rows from lib.rebrickable.stats import read_rows
@ -21,6 +22,11 @@ def load_presence(path: Path) -> List[dict]:
return read_rows(path) return read_rows(path)
def load_new_characters(path: Path) -> List[dict]:
"""Charge le CSV des personnages introduits par année."""
return read_rows(path)
def load_variations_and_totals(path: Path) -> List[dict]: def load_variations_and_totals(path: Path) -> List[dict]:
"""Charge le CSV comparatif variations/total par personnage.""" """Charge le CSV comparatif variations/total par personnage."""
return read_rows(path) return read_rows(path)
@ -199,3 +205,73 @@ def plot_character_year_presence(presence_path: Path, destination_path: Path) ->
fig.tight_layout() fig.tight_layout()
fig.savefig(destination_path, dpi=160) fig.savefig(destination_path, dpi=160)
plt.close(fig) plt.close(fig)
def plot_new_characters_per_year(
counts_path: Path,
milestones_path: Path,
destination_path: Path,
start_year: int,
end_year: int,
) -> None:
"""Trace un diagramme en barres du nombre de nouveaux personnages introduits par an."""
rows = load_new_characters(counts_path)
if not rows:
return
counts = {int(row["year"]): int(row["new_characters"]) for row in rows}
years = list(range(start_year, end_year + 1))
values = [counts.get(year, 0) for year in years]
fig_width = max(8.5, len(years) * 0.45 + 2.5)
fig, ax = plt.subplots(figsize=(fig_width, 5.4))
bars = ax.bar(years, values, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.7)
ax.set_xlabel("Année")
ax.set_ylabel("Nouveaux personnages")
ax.set_title("Personnages introduits par an (hors figurants)")
ax.grid(axis="y", linestyle="--", alpha=0.3)
ax.set_xticks(years)
ax.set_xticklabels(years, rotation=45, ha="right")
ax.set_xlim(start_year - 0.6, end_year + 0.6)
y_max = max(values) if values else 0
upper_limit = 20
ax.set_ylim(0, upper_limit)
for bar, value in zip(bars, values):
if value == 0:
continue
ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center", va="bottom", fontsize=8)
milestones = load_milestones(milestones_path)
if milestones:
milestones_in_range = sorted(
[m for m in milestones if start_year <= m["year"] <= end_year],
key=lambda m: (m["year"], m["description"]),
)
offset_step = 0.25
offset_map: Dict[int, int] = {}
top_limit = ax.get_ylim()[1]
label_y = top_limit * 0.96
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = offset_map.get(year, 0)
offset_map[year] = count_for_year + 1
horizontal_offset = offset_step * (count_for_year // 2 + 1)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65, zorder=1)
ax.text(
text_x,
label_y,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@ -69,6 +69,47 @@ def aggregate_variations_and_totals(
return aggregates return aggregates
def aggregate_new_characters_by_year(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
start_year: int | None = None,
end_year: int | None = None,
) -> List[dict]:
"""Compte le nombre de personnages introduits par année sur une plage donnée."""
excluded = set(excluded_characters or [])
first_year: Dict[str, int] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year_str = sets_years.get(row["set_num"])
if year_str is None:
continue
year_int = int(year_str)
current = first_year.get(character)
if current is None or year_int < current:
first_year[character] = year_int
counts: Dict[int, int] = {}
if start_year is not None and end_year is not None:
for year in range(start_year, end_year + 1):
counts[year] = 0
for character, year_int in first_year.items():
if start_year is not None and year_int < start_year:
continue
if end_year is not None and year_int > end_year:
continue
counts[year_int] = counts.get(year_int, 0) + 1
years = sorted(counts.keys())
results: List[dict] = []
for year in years:
results.append({"year": str(year), "new_characters": str(counts[year])})
return results
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par genre (fig_num unique).""" """Compte les minifigs distinctes par genre (fig_num unique)."""
genders_by_fig: Dict[str, str] = {} genders_by_fig: Dict[str, str] = {}
@ -102,6 +143,17 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
writer.writerow(row) writer.writerow(row)
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des personnages introduits chaque année."""
ensure_parent_dir(path)
fieldnames = ["year", "new_characters"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None: def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par genre.""" """Écrit le CSV des comptes par genre."""
ensure_parent_dir(path) ensure_parent_dir(path)

View File

@ -0,0 +1,40 @@
"""Trace le nombre de nouveaux personnages introduits par an (hors figurants)."""
from pathlib import Path
from lib.plots.minifig_characters import plot_new_characters_per_year
from lib.rebrickable.minifig_characters import (
aggregate_new_characters_by_year,
load_minifigs_by_set,
load_sets_enriched,
write_new_characters_by_year,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
COUNTS_PATH = Path("data/intermediate/minifig_new_characters_by_year.csv")
DESTINATION_PATH = Path("figures/step23/minifig_new_characters_per_year.png")
MILESTONES_PATH = Path("config/milestones.csv")
EXCLUDED_CHARACTERS = ["Figurant"]
START_YEAR = 2015
END_YEAR = 2025
def main() -> None:
"""Construit le total de nouveaux personnages par année et trace le graphique."""
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
counts = aggregate_new_characters_by_year(
minifigs,
sets_years,
excluded_characters=EXCLUDED_CHARACTERS,
start_year=START_YEAR,
end_year=END_YEAR,
)
write_new_characters_by_year(COUNTS_PATH, counts)
plot_new_characters_per_year(COUNTS_PATH, MILESTONES_PATH, DESTINATION_PATH, START_YEAR, END_YEAR)
if __name__ == "__main__":
main()

View File

@ -5,11 +5,13 @@ from pathlib import Path
from lib.rebrickable.minifig_characters import ( from lib.rebrickable.minifig_characters import (
aggregate_by_character, aggregate_by_character,
aggregate_by_gender, aggregate_by_gender,
aggregate_new_characters_by_year,
aggregate_variations_and_totals, aggregate_variations_and_totals,
aggregate_character_spans, aggregate_character_spans,
aggregate_presence_by_year, aggregate_presence_by_year,
load_sets_enriched, load_sets_enriched,
write_character_counts, write_character_counts,
write_new_characters_by_year,
write_character_variations_totals, write_character_variations_totals,
write_gender_counts, write_gender_counts,
) )
@ -137,6 +139,39 @@ def test_aggregate_by_gender_counts_unique_figs() -> None:
] ]
def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
"""Compter les nouveaux personnages par année en respectant la plage."""
sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text(
"set_num,year\n"
"123-1,2015\n"
"124-1,2016\n"
"125-1,2017\n"
"126-1,2014\n"
)
sets_years = load_sets_enriched(sets_path)
minifigs_rows = [
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-c"},
{"set_num": "126-1", "known_character": "Alan Grant", "fig_num": "fig-grant-1", "part_num": "head-d"},
]
counts = aggregate_new_characters_by_year(
minifigs_rows,
sets_years,
excluded_characters=["Figurant"],
start_year=2015,
end_year=2017,
)
assert counts == [
{"year": "2015", "new_characters": "1"},
{"year": "2016", "new_characters": "0"},
{"year": "2017", "new_characters": "1"},
]
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des comptes par personnage.""" """Écrit le CSV des comptes par personnage."""
destination = tmp_path / "counts.csv" destination = tmp_path / "counts.csv"
@ -176,6 +211,19 @@ def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None:
assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n" assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n"
def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des nouveaux personnages par année."""
destination = tmp_path / "new_characters.csv"
rows = [
{"year": "2015", "new_characters": "3"},
{"year": "2016", "new_characters": "1"},
]
write_new_characters_by_year(destination, rows)
assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n"
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
"""Calcule le total annuel en excluant les figurants.""" """Calcule le total annuel en excluant les figurants."""
sets_path = tmp_path / "sets_enriched.csv" sets_path = tmp_path / "sets_enriched.csv"

View File

@ -4,6 +4,7 @@ import matplotlib
from pathlib import Path from pathlib import Path
from lib.plots.minifig_characters import plot_minifigs_per_character from lib.plots.minifig_characters import plot_minifigs_per_character
from lib.plots.minifig_characters import plot_new_characters_per_year
from lib.plots.minifig_characters import plot_character_variations_vs_total from lib.plots.minifig_characters import plot_character_variations_vs_total
@ -40,3 +41,22 @@ def test_plot_character_variations_vs_total(tmp_path: Path) -> None:
assert destination.exists() assert destination.exists()
assert destination.stat().st_size > 0 assert destination.stat().st_size > 0
def test_plot_new_characters_per_year(tmp_path: Path) -> None:
"""Génère l'image du nombre de nouveaux personnages par an."""
counts_path = tmp_path / "new_characters.csv"
destination = tmp_path / "figures" / "step23" / "minifig_new_characters_per_year.png"
milestones_path = tmp_path / "milestones.csv"
counts_path.write_text(
"year,new_characters\n"
"2015,2\n"
"2016,0\n"
"2017,1\n"
)
milestones_path.write_text("year,description\n2016,Spin-off\n")
plot_new_characters_per_year(counts_path, milestones_path, destination, start_year=2015, end_year=2017)
assert destination.exists()
assert destination.stat().st_size > 0