1

Chiffre les personnages par année avec heatmap

This commit is contained in:
Richard Dern 2025-12-02 02:02:47 +01:00
parent 21f8b5532c
commit 4d37a654f2
6 changed files with 27 additions and 20 deletions

View File

@ -29,3 +29,4 @@ Guard with Scarf,Figurant
Park Worker,Figurant Park Worker,Figurant
Park Guest in Dark Pink Vest Jacket,Figurant Park Guest in Dark Pink Vest Jacket,Figurant
Wildlife Guard,Figurant Wildlife Guard,Figurant
Kid,Figurant
1 alias canonical
29 Park Worker Figurant
30 Park Guest in Dark Pink Vest Jacket Figurant
31 Wildlife Guard Figurant
32 Kid Figurant

View File

@ -48,7 +48,7 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None: def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None:
"""Trace une heatmap binaire indiquant la présence d'un personnage par année.""" """Trace une heatmap indiquant le nombre de minifigs par personnage et par année."""
rows = load_presence(presence_path) rows = load_presence(presence_path)
if not rows: if not rows:
return return
@ -56,7 +56,7 @@ def plot_character_year_presence(presence_path: Path, destination_path: Path) ->
characters = sorted( characters = sorted(
{row["known_character"] for row in rows}, {row["known_character"] for row in rows},
key=lambda name: ( key=lambda name: (
-sum(1 for r in rows if r["known_character"] == name and r["present"] == "1"), -sum(int(r["minifig_count"]) for r in rows if r["known_character"] == name),
name, name,
), ),
) )
@ -64,11 +64,11 @@ def plot_character_year_presence(presence_path: Path, destination_path: Path) ->
for character in characters: for character in characters:
row_values = [] row_values = []
for year in years: for year in years:
present = next( count = next(
(r["present"] for r in rows if r["known_character"] == character and int(r["year"]) == year), (r["minifig_count"] for r in rows if r["known_character"] == character and int(r["year"]) == year),
"0", "0",
) )
row_values.append(int(present)) row_values.append(int(count))
matrix.append(row_values) matrix.append(row_values)
height = max(5, len(characters) * 0.35) height = max(5, len(characters) * 0.35)
@ -80,13 +80,15 @@ def plot_character_year_presence(presence_path: Path, destination_path: Path) ->
ax.set_yticklabels(characters) ax.set_yticklabels(characters)
ax.set_xlabel("Année") ax.set_xlabel("Année")
ax.set_ylabel("Personnage") ax.set_ylabel("Personnage")
ax.set_title("Présence des personnages par année (hors figurants)") ax.set_title("Nombre de minifigs par personnage et par année (hors figurants)")
for i, character in enumerate(characters): for i, character in enumerate(characters):
for j, year in enumerate(years): for j, year in enumerate(years):
value = matrix[i][j] value = matrix[i][j]
if value == 1: if value == 1:
ax.text(j, i, "", ha="center", va="center", color="#0d0d0d", fontsize=7) ax.text(j, i, "", ha="center", va="center", color="#0d0d0d", fontsize=7)
fig.colorbar(cax, ax=ax, fraction=0.046, pad=0.04, label="Présence (1 si minifig)") elif value > 1:
ax.text(j, i, str(value), ha="center", va="center", color="#0d0d0d", fontsize=7)
fig.colorbar(cax, ax=ax, fraction=0.046, pad=0.04, label="Nombre de minifigs")
ensure_parent_dir(destination_path) ensure_parent_dir(destination_path)
fig.tight_layout() fig.tight_layout()
fig.savefig(destination_path, dpi=160) fig.savefig(destination_path, dpi=160)

View File

@ -56,10 +56,11 @@ def aggregate_presence_by_year(
sets_years: Dict[str, str], sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None, excluded_characters: Sequence[str] | None = None,
) -> List[dict]: ) -> List[dict]:
"""Construit la présence binaire des personnages par année (hors figurants).""" """Compte le nombre total de minifigs par personnage et par année (hors figurants)."""
excluded = set(excluded_characters or []) excluded = set(excluded_characters or [])
presence: set[tuple[str, int]] = set() counts: Dict[tuple[str, int], int] = defaultdict(int)
years_all = {int(year) for year in sets_years.values()} years_all = {int(year) for year in sets_years.values()}
characters_all: Set[str] = set()
for row in minifigs_rows: for row in minifigs_rows:
character = row["known_character"].strip() character = row["known_character"].strip()
fig_num = row["fig_num"].strip() fig_num = row["fig_num"].strip()
@ -70,17 +71,20 @@ def aggregate_presence_by_year(
year = sets_years.get(row["set_num"]) year = sets_years.get(row["set_num"])
if year is None: if year is None:
continue continue
presence.add((character, int(year))) year_int = int(year)
counts[(character, year_int)] += 1
characters_all.add(character)
years = sorted(years_all) years = sorted(years_all)
characters = sorted({character for character, _ in presence}) characters = sorted(characters_all)
results: List[dict] = [] results: List[dict] = []
for character in characters: for character in characters:
for year in years: for year in years:
count = counts.get((character, year), 0)
results.append( results.append(
{ {
"known_character": character, "known_character": character,
"year": str(year), "year": str(year),
"present": "1" if (character, year) in presence else "0", "minifig_count": str(count),
} }
) )
return results return results
@ -89,7 +93,7 @@ def aggregate_presence_by_year(
def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None: def write_presence_by_year(path: Path, rows: Sequence[dict]) -> None:
"""Écrit la matrice présence binaire année/personnage.""" """Écrit la matrice présence binaire année/personnage."""
ensure_parent_dir(path) ensure_parent_dir(path)
fieldnames = ["known_character", "year", "present"] fieldnames = ["known_character", "year", "minifig_count"]
with path.open("w", newline="") as csv_file: with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader() writer.writeheader()

View File

@ -19,7 +19,7 @@ EXCLUDED_CHARACTERS = ["Figurant"]
def main() -> None: def main() -> None:
"""Construit la présence par année et trace la heatmap binaire.""" """Construit le total par année et trace la heatmap."""
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
sets_years = load_sets_enriched(SETS_ENRICHED_PATH) sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
presence = aggregate_presence_by_year(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS) presence = aggregate_presence_by_year(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS)

View File

@ -39,7 +39,7 @@ def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
"""Calcule la présence annuelle en excluant les figurants.""" """Calcule le total annuel en excluant les figurants."""
sets_path = tmp_path / "sets_enriched.csv" sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text( sets_path.write_text(
"set_num,year\n" "set_num,year\n"
@ -55,6 +55,6 @@ def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
presence = aggregate_presence_by_year(minifigs_rows, sets_years, excluded_characters=["Figurant"]) presence = aggregate_presence_by_year(minifigs_rows, sets_years, excluded_characters=["Figurant"])
assert presence == [ assert presence == [
{"known_character": "Owen Grady", "year": "2020", "present": "1"}, {"known_character": "Owen Grady", "year": "2020", "minifig_count": "1"},
{"known_character": "Owen Grady", "year": "2021", "present": "0"}, {"known_character": "Owen Grady", "year": "2021", "minifig_count": "0"},
] ]

View File

@ -14,10 +14,10 @@ def test_plot_character_year_presence(tmp_path: Path) -> None:
presence_path = tmp_path / "minifig_characters_year_presence.csv" presence_path = tmp_path / "minifig_characters_year_presence.csv"
destination = tmp_path / "figures" / "step22" / "minifig_characters_timeline.png" destination = tmp_path / "figures" / "step22" / "minifig_characters_timeline.png"
presence_path.write_text( presence_path.write_text(
"known_character,year,present\n" "known_character,year,minifig_count\n"
"Owen Grady,2020,1\n" "Owen Grady,2020,2\n"
"Owen Grady,2021,0\n" "Owen Grady,2021,0\n"
"Figurant,2020,1\n" "Figurant,2020,3\n"
) )
plot_character_year_presence(presence_path, destination) plot_character_year_presence(presence_path, destination)