1

Ajoute le genre des personnages et colore les graphiques

This commit is contained in:
2025-12-02 11:37:13 +01:00
parent 230b9db239
commit f5c1fa6333
12 changed files with 300 additions and 43 deletions

View File

@@ -1,20 +1,35 @@
"""Diagramme de longévité des personnages (bornes d'apparition)."""
from pathlib import Path
from typing import List
from typing import Dict, List
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.rebrickable.stats import read_rows
GENDER_COLORS = {
"male": "#4c72b0",
"female": "#c44e52",
"unknown": "#7f7f7f",
}
GENDER_LABELS = {
"male": "Homme",
"female": "Femme",
"unknown": "Inconnu",
"": "Inconnu",
}
def load_spans(path: Path) -> List[dict]:
"""Charge le CSV des bornes min/max par personnage."""
return read_rows(path)
def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
def plot_character_spans(spans_path: Path, destination_path: Path, milestones_path: Path | None = None) -> None:
"""Trace un diagramme en barres représentant la longévité des personnages."""
rows = load_spans(spans_path)
if not rows:
@@ -23,18 +38,22 @@ def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
starts = [int(row["start_year"]) for row in rows]
ends = [int(row["end_year"]) for row in rows]
counts = [int(row["total_minifigs"]) for row in rows]
genders = [row.get("gender", "") for row in rows]
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
positions = list(range(len(rows)))
widths = [end - start + 1 for start, end in zip(starts, ends)]
min_year = min(starts)
max_year = max(ends)
height = max(5, len(rows) * 0.3)
milestones = load_milestones(milestones_path) if milestones_path else []
fig, ax = plt.subplots(figsize=(12, height))
bars = ax.barh(
positions,
widths,
left=starts,
color="#1f77b4",
color=colors,
edgecolor="#0d0d0d",
linewidth=0.6,
)
@@ -57,6 +76,55 @@ def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
color="#0d0d0d",
)
legend_entries = []
seen = set()
for gender in genders:
normalized = gender.strip().lower()
if normalized in seen:
continue
seen.add(normalized)
legend_entries.append(
Patch(
facecolor=GENDER_COLORS.get(normalized, GENDER_COLORS["unknown"]),
edgecolor="#0d0d0d",
linewidth=0.6,
label=GENDER_LABELS.get(normalized, "Inconnu"),
)
)
if legend_entries:
ax.legend(handles=legend_entries, title="Genre", loc="lower right")
if milestones:
milestones_in_range = sorted(
[m for m in milestones if min_year <= m["year"] <= max_year],
key=lambda m: (m["year"], m["description"]),
)
milestone_offsets: Dict[int, int] = {}
offset_step = 0.2
max_offset = 0
y_bottom, y_top = ax.get_ylim()
text_y = y_top - (y_top - y_bottom) * 0.01
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = milestone_offsets.get(year, 0)
milestone_offsets[year] = count_for_year + 1
horizontal_offset = offset_step * (count_for_year // 2 + 1)
max_offset = max(max_offset, count_for_year)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
ax.text(
text_x,
text_y,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)

View File

@@ -4,11 +4,25 @@ from pathlib import Path
from typing import List
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
GENDER_COLORS = {
"male": "#4c72b0",
"female": "#c44e52",
"unknown": "#7f7f7f",
}
GENDER_LABELS = {
"male": "Homme",
"female": "Femme",
"unknown": "Inconnu",
"": "Inconnu",
}
def load_counts(path: Path) -> List[dict]:
"""Charge le CSV des comptes par personnage."""
return read_rows(path)
@@ -24,11 +38,13 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
rows = load_counts(counts_path)
characters = [row["known_character"] for row in rows]
counts = [int(row["minifig_count"]) for row in rows]
genders = [row.get("gender", "") for row in rows]
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
positions = list(range(len(rows)))
height = max(6, len(rows) * 0.22)
fig, ax = plt.subplots(figsize=(12, height))
bars = ax.barh(positions, counts, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.6)
bars = ax.barh(positions, counts, color=colors, edgecolor="#0d0d0d", linewidth=0.6)
ax.set_yticks(positions)
ax.set_yticklabels(characters)
ax.invert_yaxis()
@@ -40,6 +56,23 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
for index, bar in enumerate(bars):
value = counts[index]
ax.text(value + 0.1, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8)
legend_entries = []
seen = set()
for gender in genders:
normalized = gender.strip().lower()
if normalized in seen:
continue
seen.add(normalized)
legend_entries.append(
Patch(
facecolor=GENDER_COLORS.get(normalized, GENDER_COLORS["unknown"]),
edgecolor="#0d0d0d",
linewidth=0.6,
label=GENDER_LABELS.get(normalized, "Inconnu"),
)
)
if legend_entries:
ax.legend(handles=legend_entries, title="Genre", loc="lower right")
ensure_parent_dir(destination_path)
fig.tight_layout()

View File

@@ -1,12 +1,12 @@
"""Agrégation des minifigs par personnage représenté."""
import csv
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set
from lib.rebrickable.stats import read_rows
from lib.filesystem import ensure_parent_dir
import csv
from lib.rebrickable.stats import read_rows
def load_minifigs_by_set(path: Path) -> List[dict]:
@@ -15,17 +15,21 @@ def load_minifigs_by_set(path: Path) -> List[dict]:
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par personnage (fig_num unique)."""
"""Compte les minifigs distinctes par personnage (fig_num unique) avec genre."""
fig_nums_by_character: Dict[str, set] = defaultdict(set)
genders: Dict[str, str] = {}
for row in rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
fig_nums_by_character[character].add(fig_num)
if character not in genders:
genders[character] = gender
aggregates: List[dict] = []
for character, fig_nums in fig_nums_by_character.items():
aggregates.append({"known_character": character, "minifig_count": len(fig_nums)})
aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)})
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
return aggregates
@@ -33,7 +37,7 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "minifig_count"]
fieldnames = ["known_character", "gender", "minifig_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
@@ -110,9 +114,11 @@ def aggregate_character_spans(
excluded = set(excluded_characters or [])
spans: Dict[str, Dict[str, int]] = {}
total_counts: Dict[str, int] = defaultdict(int)
genders: Dict[str, str] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
if character in excluded:
@@ -122,6 +128,8 @@ def aggregate_character_spans(
continue
year_int = int(year)
total_counts[character] += 1
if character not in genders:
genders[character] = gender
current = spans.get(character)
if current is None:
spans[character] = {"start": year_int, "end": year_int}
@@ -136,6 +144,7 @@ def aggregate_character_spans(
"start_year": str(bounds["start"]),
"end_year": str(bounds["end"]),
"total_minifigs": str(total_counts[character]),
"gender": genders.get(character, ""),
}
)
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
@@ -145,7 +154,7 @@ def aggregate_character_spans(
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des bornes min/max par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs"]
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()

View File

@@ -128,6 +128,16 @@ def load_aliases(path: Path) -> Dict[str, str]:
return aliases
def load_gender_overrides(path: Path) -> Dict[str, str]:
"""Charge les correspondances personnage -> genre."""
overrides: Dict[str, str] = {}
with path.open() as gender_file:
reader = csv.DictReader(gender_file)
for row in reader:
overrides[row["known_character"].lower()] = row["gender"]
return overrides
def normalize_known_character(raw_known: str, extracted_name: str, aliases: Dict[str, str]) -> str:
"""Nettoie et mappe un nom vers une version canonique."""
base = raw_known or extracted_name
@@ -201,6 +211,7 @@ def aggregate_heads_by_set(
minifig_heads: Dict[str, Set[str]],
minifig_catalog: Dict[str, dict],
aliases: Dict[str, str],
gender_overrides: Dict[str, str],
) -> List[dict]:
"""Agrège les têtes de minifigs par set en éliminant les rechanges et doublons."""
seen: Set[Tuple[str, str]] = set()
@@ -228,12 +239,14 @@ def aggregate_heads_by_set(
normalized = normalize_known_character(known_character, extracted, aliases)
if matched_fig == "":
continue
gender = gender_overrides.get(normalized.lower(), "")
heads.append(
{
"set_num": row["set_num"],
"part_num": row["part_num"],
"known_character": normalized,
"fig_num": matched_fig,
"gender": gender,
}
)
seen.add(key)
@@ -244,7 +257,7 @@ def aggregate_heads_by_set(
def write_heads_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV intermédiaire listant les têtes de minifigs par set."""
ensure_parent_dir(destination_path)
fieldnames = ["set_num", "part_num", "known_character", "fig_num"]
fieldnames = ["set_num", "part_num", "known_character", "fig_num", "gender"]
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
@@ -260,6 +273,7 @@ def build_minifigs_by_set(
inventory_minifigs_path: Path,
minifigs_path: Path,
aliases_path: Path,
gender_overrides_path: Path,
destination_path: Path,
) -> None:
"""Construit le CSV listant les têtes de minifigs présentes par set."""
@@ -271,6 +285,7 @@ def build_minifigs_by_set(
minifig_heads = build_minifig_heads_lookup(minifig_catalog, latest_inventories, inventory_parts_path, head_parts)
set_minifigs = build_set_minifigs_lookup(latest_inventories, inventory_minifigs_path)
aliases = load_aliases(aliases_path)
gender_overrides = load_gender_overrides(gender_overrides_path)
heads = aggregate_heads_by_set(
parts_rows,
parts_catalog,
@@ -279,5 +294,6 @@ def build_minifigs_by_set(
minifig_heads,
minifig_catalog,
aliases,
gender_overrides,
)
write_heads_by_set(destination_path, heads)