Ajoute le graphique variations vs total par personnage
This commit is contained in:
parent
e8cd0d346d
commit
ad44796759
@ -243,8 +243,11 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
|
|||||||
|
|
||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.plot_minifig_characters`
|
2. `python -m scripts.plot_minifig_characters`
|
||||||
|
3. `python -m scripts.plot_minifig_character_variations`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
||||||
|
|
||||||
|
Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figurants, calcule par personnage le nombre de variations (fig_num distincts) et le total réel de minifigs présentes dans les sets filtrés, sérialise `data/intermediate/minifig_character_variations_totals.csv`, puis trace `figures/step22/minifig_character_variations_totals.png` en superposant un fond neutre (total) et une jauge colorée (variations, couleur = genre).
|
||||||
|
|
||||||
### Étape 23 : présence annuelle des personnages
|
### Étape 23 : présence annuelle des personnages
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,11 @@ def load_presence(path: Path) -> List[dict]:
|
|||||||
return read_rows(path)
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_variations_and_totals(path: Path) -> List[dict]:
|
||||||
|
"""Charge le CSV comparatif variations/total par personnage."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None:
|
def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None:
|
||||||
"""Trace un diagramme en barres horizontales du nombre de minifigs par personnage."""
|
"""Trace un diagramme en barres horizontales du nombre de minifigs par personnage."""
|
||||||
rows = load_counts(counts_path)
|
rows = load_counts(counts_path)
|
||||||
@ -68,6 +73,86 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
|
|||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_character_variations_vs_total(counts_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Superpose le total de minifigs et leurs variations distinctes par personnage."""
|
||||||
|
rows = load_variations_and_totals(counts_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
characters = [row["known_character"] for row in rows]
|
||||||
|
variation_counts = [int(row["variation_count"]) for row in rows]
|
||||||
|
total_counts = [int(row["total_minifigs"]) for row in rows]
|
||||||
|
genders = [row.get("gender", "") for row in rows]
|
||||||
|
gender_colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
|
positions = list(range(len(rows)))
|
||||||
|
height = max(6, len(rows) * 0.24)
|
||||||
|
background_color = "#d7d7e0"
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(12.4, height))
|
||||||
|
bars_total = ax.barh(
|
||||||
|
positions,
|
||||||
|
total_counts,
|
||||||
|
color=background_color,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
height=0.6,
|
||||||
|
label="Total de minifigs",
|
||||||
|
)
|
||||||
|
bars_variations = ax.barh(
|
||||||
|
positions,
|
||||||
|
variation_counts,
|
||||||
|
color=gender_colors,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.8,
|
||||||
|
height=0.36,
|
||||||
|
label="Variations distinctes",
|
||||||
|
)
|
||||||
|
ax.set_yticks(positions)
|
||||||
|
ax.set_yticklabels(characters)
|
||||||
|
ax.invert_yaxis()
|
||||||
|
ax.set_xlabel("Nombre de minifigs")
|
||||||
|
ax.set_title("Variations et total de minifigs par personnage (hors figurants)")
|
||||||
|
ax.grid(True, axis="x", linestyle="--", alpha=0.25)
|
||||||
|
max_value = max(total_counts) if total_counts else 0
|
||||||
|
ax.set_xlim(0, max_value + 1)
|
||||||
|
|
||||||
|
for index, bar in enumerate(bars_total):
|
||||||
|
value = total_counts[index]
|
||||||
|
ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#1a1a1a")
|
||||||
|
for index, bar in enumerate(bars_variations):
|
||||||
|
value = variation_counts[index]
|
||||||
|
ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#0d0d0d")
|
||||||
|
|
||||||
|
legend_entries = [
|
||||||
|
Patch(facecolor=background_color, edgecolor="#0d0d0d", linewidth=0.6, label="Total de minifigs"),
|
||||||
|
Patch(
|
||||||
|
facecolor=GENDER_COLORS["unknown"],
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.8,
|
||||||
|
label="Variations distinctes (couleur = genre)",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
seen = set()
|
||||||
|
for gender, color in zip(genders, gender_colors):
|
||||||
|
normalized = gender.strip().lower()
|
||||||
|
if normalized in seen:
|
||||||
|
continue
|
||||||
|
seen.add(normalized)
|
||||||
|
legend_entries.append(
|
||||||
|
Patch(
|
||||||
|
facecolor=color,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
label=GENDER_LABELS.get(normalized, "Inconnu"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
ax.legend(handles=legend_entries, loc="lower right")
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None:
|
def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None:
|
||||||
"""Trace une heatmap indiquant le nombre de minifigs par personnage et par année."""
|
"""Trace une heatmap indiquant le nombre de minifigs par personnage et par année."""
|
||||||
rows = load_presence(presence_path)
|
rows = load_presence(presence_path)
|
||||||
|
|||||||
@ -34,6 +34,41 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
|||||||
return aggregates
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_variations_and_totals(
|
||||||
|
rows: Iterable[dict],
|
||||||
|
excluded_characters: Sequence[str] | None = None,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Compte les variations uniques et le total de minifigs par personnage."""
|
||||||
|
excluded = set(excluded_characters or [])
|
||||||
|
variations: Dict[str, set] = defaultdict(set)
|
||||||
|
totals: Dict[str, int] = defaultdict(int)
|
||||||
|
genders: Dict[str, str] = {}
|
||||||
|
for row in rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
gender = row.get("gender", "").strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
variations[character].add(fig_num)
|
||||||
|
totals[character] += 1
|
||||||
|
if character not in genders:
|
||||||
|
genders[character] = gender
|
||||||
|
aggregates: List[dict] = []
|
||||||
|
for character, fig_nums in variations.items():
|
||||||
|
aggregates.append(
|
||||||
|
{
|
||||||
|
"known_character": character,
|
||||||
|
"gender": genders.get(character, ""),
|
||||||
|
"variation_count": len(fig_nums),
|
||||||
|
"total_minifigs": totals.get(character, 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"]))
|
||||||
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
||||||
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
||||||
genders_by_fig: Dict[str, str] = {}
|
genders_by_fig: Dict[str, str] = {}
|
||||||
@ -78,6 +113,17 @@ def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
|||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV comparant variations et total par personnage."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
def load_sets_enriched(path: Path) -> Dict[str, str]:
|
def load_sets_enriched(path: Path) -> Dict[str, str]:
|
||||||
"""Indexe les années par set_num."""
|
"""Indexe les années par set_num."""
|
||||||
lookup: Dict[str, str] = {}
|
lookup: Dict[str, str] = {}
|
||||||
|
|||||||
28
scripts/plot_minifig_character_variations.py
Normal file
28
scripts/plot_minifig_character_variations.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
"""Trace le total de minifigs et leurs variations distinctes par personnage."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.minifig_characters import plot_character_variations_vs_total
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_variations_and_totals,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
write_character_variations_totals,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
COUNTS_PATH = Path("data/intermediate/minifig_character_variations_totals.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step22/minifig_character_variations_totals.png")
|
||||||
|
EXCLUDED_CHARACTERS = ["Figurant"]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le comparatif variations/total et trace le graphique associé."""
|
||||||
|
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
aggregates = aggregate_variations_and_totals(rows, excluded_characters=EXCLUDED_CHARACTERS)
|
||||||
|
write_character_variations_totals(COUNTS_PATH, aggregates)
|
||||||
|
plot_character_variations_vs_total(COUNTS_PATH, DESTINATION_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -5,10 +5,12 @@ from pathlib import Path
|
|||||||
from lib.rebrickable.minifig_characters import (
|
from lib.rebrickable.minifig_characters import (
|
||||||
aggregate_by_character,
|
aggregate_by_character,
|
||||||
aggregate_by_gender,
|
aggregate_by_gender,
|
||||||
|
aggregate_variations_and_totals,
|
||||||
aggregate_character_spans,
|
aggregate_character_spans,
|
||||||
aggregate_presence_by_year,
|
aggregate_presence_by_year,
|
||||||
load_sets_enriched,
|
load_sets_enriched,
|
||||||
write_character_counts,
|
write_character_counts,
|
||||||
|
write_character_variations_totals,
|
||||||
write_gender_counts,
|
write_gender_counts,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -68,6 +70,55 @@ def test_aggregate_by_character_counts_unique_figs() -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_variations_and_totals_excludes_figurants() -> None:
|
||||||
|
"""Compter le total et les variations en excluant les figurants."""
|
||||||
|
aggregates = aggregate_variations_and_totals(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"part_num": "head-a",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-1",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "124-1",
|
||||||
|
"part_num": "head-b",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-1",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "125-1",
|
||||||
|
"part_num": "head-c",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-2",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "126-1",
|
||||||
|
"part_num": "head-d",
|
||||||
|
"known_character": "Ellie Sattler",
|
||||||
|
"fig_num": "fig-ellie-1",
|
||||||
|
"gender": "female",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "127-1",
|
||||||
|
"part_num": "head-e",
|
||||||
|
"known_character": "Figurant",
|
||||||
|
"fig_num": "fig-guard-1",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
excluded_characters=["Figurant"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert aggregates == [
|
||||||
|
{"known_character": "Owen Grady", "gender": "male", "variation_count": 2, "total_minifigs": 3},
|
||||||
|
{"known_character": "Ellie Sattler", "gender": "female", "variation_count": 1, "total_minifigs": 1},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_by_gender_counts_unique_figs() -> None:
|
def test_aggregate_by_gender_counts_unique_figs() -> None:
|
||||||
"""Compter les minifigs distinctes par genre."""
|
"""Compter les minifigs distinctes par genre."""
|
||||||
aggregates = aggregate_by_gender(
|
aggregates = aggregate_by_gender(
|
||||||
@ -112,6 +163,19 @@ def test_write_gender_counts_outputs_csv(tmp_path: Path) -> None:
|
|||||||
assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n"
|
assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le CSV comparatif variations/total."""
|
||||||
|
destination = tmp_path / "variations.csv"
|
||||||
|
rows = [
|
||||||
|
{"known_character": "A", "gender": "male", "variation_count": 2, "total_minifigs": 3},
|
||||||
|
{"known_character": "B", "gender": "female", "variation_count": 1, "total_minifigs": 1},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_character_variations_totals(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n"
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
||||||
"""Calcule le total annuel en excluant les figurants."""
|
"""Calcule le total annuel en excluant les figurants."""
|
||||||
sets_path = tmp_path / "sets_enriched.csv"
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import matplotlib
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lib.plots.minifig_characters import plot_minifigs_per_character
|
from lib.plots.minifig_characters import plot_minifigs_per_character
|
||||||
|
from lib.plots.minifig_characters import plot_character_variations_vs_total
|
||||||
|
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
@ -23,3 +24,19 @@ def test_plot_minifigs_per_character(tmp_path: Path) -> None:
|
|||||||
|
|
||||||
assert destination.exists()
|
assert destination.exists()
|
||||||
assert destination.stat().st_size > 0
|
assert destination.stat().st_size > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_character_variations_vs_total(tmp_path: Path) -> None:
|
||||||
|
"""Génère l'image comparant total et variations par personnage."""
|
||||||
|
counts_path = tmp_path / "variations.csv"
|
||||||
|
destination = tmp_path / "figures" / "step22" / "minifig_character_variations_totals.png"
|
||||||
|
counts_path.write_text(
|
||||||
|
"known_character,gender,variation_count,total_minifigs\n"
|
||||||
|
"Owen Grady,male,2,3\n"
|
||||||
|
"Ellie Sattler,female,1,2\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_character_variations_vs_total(counts_path, destination)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user