Ajoute le genre des personnages et colore les graphiques
This commit is contained in:
parent
230b9db239
commit
f5c1fa6333
@ -222,7 +222,7 @@ Cette étape se lance après le téléchargement des données d'inventaire (éta
|
|||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.compute_minifigs_by_set`
|
2. `python -m scripts.compute_minifigs_by_set`
|
||||||
|
|
||||||
Le script lit l'inventaire agrégé `data/intermediate/parts_filtered.csv`, les inventaires `data/raw/inventories.csv`, `data/raw/inventory_parts.csv`, `data/raw/inventory_minifigs.csv`, le catalogue des pièces (`data/raw/parts.csv`) et celui des minifigs (`data/raw/minifigs.csv`). Il sélectionne les têtes de minifigs (catégorie 59), ignore les rechanges et dédoublonne par set et référence. Si une tête est associée à une minifig précise dans l'inventaire du set, `known_character` est renseigné avec le nom de la minifig et `fig_num` est indiqué ; sinon, `known_character` reste vide après tentative de correspondance automatique. Le CSV `data/intermediate/minifigs_by_set.csv` contient : `set_num`, `part_num`, `known_character`, `fig_num`.
|
Le script lit l'inventaire agrégé `data/intermediate/parts_filtered.csv`, les inventaires `data/raw/inventories.csv`, `data/raw/inventory_parts.csv`, `data/raw/inventory_minifigs.csv`, le catalogue des pièces (`data/raw/parts.csv`) et celui des minifigs (`data/raw/minifigs.csv`). Il sélectionne les têtes de minifigs (catégorie 59), ignore les rechanges et dédoublonne par set et référence. Si une tête est associée à une minifig précise dans l'inventaire du set, `known_character` est renseigné avec le nom de la minifig et `fig_num` est indiqué ; sinon, `known_character` reste vide après tentative de correspondance automatique. Les correspondances d'alias sont décrites dans `config/known_character_aliases.csv` et les genres des personnages nommés dans `config/known_character_genders.csv`. Le CSV `data/intermediate/minifigs_by_set.csv` contient : `set_num`, `part_num`, `known_character`, `fig_num`, `gender`.
|
||||||
|
|
||||||
### Étape 21 : visualiser le nombre de minifigs par set
|
### Étape 21 : visualiser le nombre de minifigs par set
|
||||||
|
|
||||||
@ -239,7 +239,7 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
|
|||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.plot_minifig_characters`
|
2. `python -m scripts.plot_minifig_characters`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées).
|
Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
||||||
|
|
||||||
### Étape 23 : présence annuelle des personnages
|
### Étape 23 : présence annuelle des personnages
|
||||||
|
|
||||||
@ -253,4 +253,4 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets
|
|||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.plot_minifig_character_spans`
|
2. `python -m scripts.plot_minifig_character_spans`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans).
|
Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, calcule la première et la dernière année d'apparition pour chaque personnage (hors figurants), sérialise `data/intermediate/minifig_character_spans.csv`, puis trace `figures/step24/minifig_character_spans.png` (barres horizontales des spans). Les barres sont colorées selon le genre issu de `config/known_character_genders.csv`.
|
||||||
|
|||||||
54
config/known_character_genders.csv
Normal file
54
config/known_character_genders.csv
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
known_character,gender
|
||||||
|
ACU Trooper,unknown
|
||||||
|
Allison Miles,female
|
||||||
|
Alan Grant,male
|
||||||
|
Atwater,male
|
||||||
|
Barry,male
|
||||||
|
Ben,male
|
||||||
|
Brooklynn,female
|
||||||
|
Claire Dearing,female
|
||||||
|
Danny Nedermeyer,male
|
||||||
|
Darius,male
|
||||||
|
Dennis Nedry,male
|
||||||
|
Donald Gennaro,male
|
||||||
|
Dr Wu,male
|
||||||
|
Duncan Kincaid,male
|
||||||
|
Eli Mills,male
|
||||||
|
Ellie Sattler,female
|
||||||
|
Figurant,unknown
|
||||||
|
Franklin Web,male
|
||||||
|
Franklin Webb,male
|
||||||
|
Gray,male
|
||||||
|
Gray Mitchell,male
|
||||||
|
Gunnar Eversol,male
|
||||||
|
Henry Loomis,male
|
||||||
|
Henry Wu,male
|
||||||
|
Hudson Harper,male
|
||||||
|
Ian Malcolm,male
|
||||||
|
Isabella Delgado,female
|
||||||
|
John Hammond,male
|
||||||
|
Kayla Watts,female
|
||||||
|
Ken Wheatley,male
|
||||||
|
Kenji,male
|
||||||
|
Lex Murphy,female
|
||||||
|
LeClerc,male
|
||||||
|
Maisie Lockwood,female
|
||||||
|
Martin Krebs,male
|
||||||
|
Owen Grady,male
|
||||||
|
Rainn DeLaCourt,male
|
||||||
|
Ray Arnold,male
|
||||||
|
Reuben Delgado,male
|
||||||
|
Robert Muldoon,male
|
||||||
|
Sammy,female
|
||||||
|
Simon Masrani,male
|
||||||
|
Sinjin Prescott,male
|
||||||
|
Soyona Santos,female
|
||||||
|
Teresa Delgado,female
|
||||||
|
Tim Murphy,male
|
||||||
|
Vic Hoskins,male
|
||||||
|
Xavier Dobbs,male
|
||||||
|
Yaz,female
|
||||||
|
Zach,male
|
||||||
|
Zach Mitchell,male
|
||||||
|
Zia Rodriguez,female
|
||||||
|
Zora Bennett,female
|
||||||
|
@ -1,20 +1,35 @@
|
|||||||
"""Diagramme de longévité des personnages (bornes d'apparition)."""
|
"""Diagramme de longévité des personnages (bornes d'apparition)."""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import Dict, List
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.patches import Patch
|
||||||
|
|
||||||
from lib.filesystem import ensure_parent_dir
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.milestones import load_milestones
|
||||||
from lib.rebrickable.stats import read_rows
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
GENDER_COLORS = {
|
||||||
|
"male": "#4c72b0",
|
||||||
|
"female": "#c44e52",
|
||||||
|
"unknown": "#7f7f7f",
|
||||||
|
}
|
||||||
|
GENDER_LABELS = {
|
||||||
|
"male": "Homme",
|
||||||
|
"female": "Femme",
|
||||||
|
"unknown": "Inconnu",
|
||||||
|
"": "Inconnu",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_spans(path: Path) -> List[dict]:
|
def load_spans(path: Path) -> List[dict]:
|
||||||
"""Charge le CSV des bornes min/max par personnage."""
|
"""Charge le CSV des bornes min/max par personnage."""
|
||||||
return read_rows(path)
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
|
def plot_character_spans(spans_path: Path, destination_path: Path, milestones_path: Path | None = None) -> None:
|
||||||
"""Trace un diagramme en barres représentant la longévité des personnages."""
|
"""Trace un diagramme en barres représentant la longévité des personnages."""
|
||||||
rows = load_spans(spans_path)
|
rows = load_spans(spans_path)
|
||||||
if not rows:
|
if not rows:
|
||||||
@ -23,18 +38,22 @@ def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
|
|||||||
starts = [int(row["start_year"]) for row in rows]
|
starts = [int(row["start_year"]) for row in rows]
|
||||||
ends = [int(row["end_year"]) for row in rows]
|
ends = [int(row["end_year"]) for row in rows]
|
||||||
counts = [int(row["total_minifigs"]) for row in rows]
|
counts = [int(row["total_minifigs"]) for row in rows]
|
||||||
|
genders = [row.get("gender", "") for row in rows]
|
||||||
|
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
positions = list(range(len(rows)))
|
positions = list(range(len(rows)))
|
||||||
widths = [end - start + 1 for start, end in zip(starts, ends)]
|
widths = [end - start + 1 for start, end in zip(starts, ends)]
|
||||||
min_year = min(starts)
|
min_year = min(starts)
|
||||||
max_year = max(ends)
|
max_year = max(ends)
|
||||||
height = max(5, len(rows) * 0.3)
|
height = max(5, len(rows) * 0.3)
|
||||||
|
|
||||||
|
milestones = load_milestones(milestones_path) if milestones_path else []
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(12, height))
|
fig, ax = plt.subplots(figsize=(12, height))
|
||||||
bars = ax.barh(
|
bars = ax.barh(
|
||||||
positions,
|
positions,
|
||||||
widths,
|
widths,
|
||||||
left=starts,
|
left=starts,
|
||||||
color="#1f77b4",
|
color=colors,
|
||||||
edgecolor="#0d0d0d",
|
edgecolor="#0d0d0d",
|
||||||
linewidth=0.6,
|
linewidth=0.6,
|
||||||
)
|
)
|
||||||
@ -57,6 +76,55 @@ def plot_character_spans(spans_path: Path, destination_path: Path) -> None:
|
|||||||
color="#0d0d0d",
|
color="#0d0d0d",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
legend_entries = []
|
||||||
|
seen = set()
|
||||||
|
for gender in genders:
|
||||||
|
normalized = gender.strip().lower()
|
||||||
|
if normalized in seen:
|
||||||
|
continue
|
||||||
|
seen.add(normalized)
|
||||||
|
legend_entries.append(
|
||||||
|
Patch(
|
||||||
|
facecolor=GENDER_COLORS.get(normalized, GENDER_COLORS["unknown"]),
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
label=GENDER_LABELS.get(normalized, "Inconnu"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if legend_entries:
|
||||||
|
ax.legend(handles=legend_entries, title="Genre", loc="lower right")
|
||||||
|
|
||||||
|
if milestones:
|
||||||
|
milestones_in_range = sorted(
|
||||||
|
[m for m in milestones if min_year <= m["year"] <= max_year],
|
||||||
|
key=lambda m: (m["year"], m["description"]),
|
||||||
|
)
|
||||||
|
milestone_offsets: Dict[int, int] = {}
|
||||||
|
offset_step = 0.2
|
||||||
|
max_offset = 0
|
||||||
|
y_bottom, y_top = ax.get_ylim()
|
||||||
|
text_y = y_top - (y_top - y_bottom) * 0.01
|
||||||
|
for milestone in milestones_in_range:
|
||||||
|
year = milestone["year"]
|
||||||
|
count_for_year = milestone_offsets.get(year, 0)
|
||||||
|
milestone_offsets[year] = count_for_year + 1
|
||||||
|
horizontal_offset = offset_step * (count_for_year // 2 + 1)
|
||||||
|
max_offset = max(max_offset, count_for_year)
|
||||||
|
if count_for_year % 2 == 1:
|
||||||
|
horizontal_offset *= -1
|
||||||
|
text_x = year + horizontal_offset
|
||||||
|
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
|
||||||
|
ax.text(
|
||||||
|
text_x,
|
||||||
|
text_y,
|
||||||
|
milestone["description"],
|
||||||
|
rotation=90,
|
||||||
|
verticalalignment="top",
|
||||||
|
horizontalalignment="center",
|
||||||
|
fontsize=8,
|
||||||
|
color="#d62728",
|
||||||
|
)
|
||||||
|
|
||||||
ensure_parent_dir(destination_path)
|
ensure_parent_dir(destination_path)
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
fig.savefig(destination_path, dpi=160)
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
|||||||
@ -4,11 +4,25 @@ from pathlib import Path
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.patches import Patch
|
||||||
|
|
||||||
from lib.filesystem import ensure_parent_dir
|
from lib.filesystem import ensure_parent_dir
|
||||||
from lib.rebrickable.stats import read_rows
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
GENDER_COLORS = {
|
||||||
|
"male": "#4c72b0",
|
||||||
|
"female": "#c44e52",
|
||||||
|
"unknown": "#7f7f7f",
|
||||||
|
}
|
||||||
|
GENDER_LABELS = {
|
||||||
|
"male": "Homme",
|
||||||
|
"female": "Femme",
|
||||||
|
"unknown": "Inconnu",
|
||||||
|
"": "Inconnu",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_counts(path: Path) -> List[dict]:
|
def load_counts(path: Path) -> List[dict]:
|
||||||
"""Charge le CSV des comptes par personnage."""
|
"""Charge le CSV des comptes par personnage."""
|
||||||
return read_rows(path)
|
return read_rows(path)
|
||||||
@ -24,11 +38,13 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
|
|||||||
rows = load_counts(counts_path)
|
rows = load_counts(counts_path)
|
||||||
characters = [row["known_character"] for row in rows]
|
characters = [row["known_character"] for row in rows]
|
||||||
counts = [int(row["minifig_count"]) for row in rows]
|
counts = [int(row["minifig_count"]) for row in rows]
|
||||||
|
genders = [row.get("gender", "") for row in rows]
|
||||||
|
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
positions = list(range(len(rows)))
|
positions = list(range(len(rows)))
|
||||||
height = max(6, len(rows) * 0.22)
|
height = max(6, len(rows) * 0.22)
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(12, height))
|
fig, ax = plt.subplots(figsize=(12, height))
|
||||||
bars = ax.barh(positions, counts, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.6)
|
bars = ax.barh(positions, counts, color=colors, edgecolor="#0d0d0d", linewidth=0.6)
|
||||||
ax.set_yticks(positions)
|
ax.set_yticks(positions)
|
||||||
ax.set_yticklabels(characters)
|
ax.set_yticklabels(characters)
|
||||||
ax.invert_yaxis()
|
ax.invert_yaxis()
|
||||||
@ -40,6 +56,23 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
|
|||||||
for index, bar in enumerate(bars):
|
for index, bar in enumerate(bars):
|
||||||
value = counts[index]
|
value = counts[index]
|
||||||
ax.text(value + 0.1, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8)
|
ax.text(value + 0.1, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8)
|
||||||
|
legend_entries = []
|
||||||
|
seen = set()
|
||||||
|
for gender in genders:
|
||||||
|
normalized = gender.strip().lower()
|
||||||
|
if normalized in seen:
|
||||||
|
continue
|
||||||
|
seen.add(normalized)
|
||||||
|
legend_entries.append(
|
||||||
|
Patch(
|
||||||
|
facecolor=GENDER_COLORS.get(normalized, GENDER_COLORS["unknown"]),
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
label=GENDER_LABELS.get(normalized, "Inconnu"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if legend_entries:
|
||||||
|
ax.legend(handles=legend_entries, title="Genre", loc="lower right")
|
||||||
|
|
||||||
ensure_parent_dir(destination_path)
|
ensure_parent_dir(destination_path)
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
"""Agrégation des minifigs par personnage représenté."""
|
"""Agrégation des minifigs par personnage représenté."""
|
||||||
|
|
||||||
|
import csv
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Iterable, List, Sequence, Set
|
from typing import Dict, Iterable, List, Sequence, Set
|
||||||
|
|
||||||
from lib.rebrickable.stats import read_rows
|
|
||||||
from lib.filesystem import ensure_parent_dir
|
from lib.filesystem import ensure_parent_dir
|
||||||
import csv
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
def load_minifigs_by_set(path: Path) -> List[dict]:
|
def load_minifigs_by_set(path: Path) -> List[dict]:
|
||||||
@ -15,17 +15,21 @@ def load_minifigs_by_set(path: Path) -> List[dict]:
|
|||||||
|
|
||||||
|
|
||||||
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
||||||
"""Compte les minifigs distinctes par personnage (fig_num unique)."""
|
"""Compte les minifigs distinctes par personnage (fig_num unique) avec genre."""
|
||||||
fig_nums_by_character: Dict[str, set] = defaultdict(set)
|
fig_nums_by_character: Dict[str, set] = defaultdict(set)
|
||||||
|
genders: Dict[str, str] = {}
|
||||||
for row in rows:
|
for row in rows:
|
||||||
character = row["known_character"].strip()
|
character = row["known_character"].strip()
|
||||||
fig_num = row["fig_num"].strip()
|
fig_num = row["fig_num"].strip()
|
||||||
|
gender = row.get("gender", "").strip()
|
||||||
if character == "" or fig_num == "":
|
if character == "" or fig_num == "":
|
||||||
continue
|
continue
|
||||||
fig_nums_by_character[character].add(fig_num)
|
fig_nums_by_character[character].add(fig_num)
|
||||||
|
if character not in genders:
|
||||||
|
genders[character] = gender
|
||||||
aggregates: List[dict] = []
|
aggregates: List[dict] = []
|
||||||
for character, fig_nums in fig_nums_by_character.items():
|
for character, fig_nums in fig_nums_by_character.items():
|
||||||
aggregates.append({"known_character": character, "minifig_count": len(fig_nums)})
|
aggregates.append({"known_character": character, "gender": genders.get(character, ""), "minifig_count": len(fig_nums)})
|
||||||
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
|
aggregates.sort(key=lambda r: (-r["minifig_count"], r["known_character"]))
|
||||||
return aggregates
|
return aggregates
|
||||||
|
|
||||||
@ -33,7 +37,7 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
|||||||
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des comptes par personnage."""
|
"""Écrit le CSV des comptes par personnage."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
fieldnames = ["known_character", "minifig_count"]
|
fieldnames = ["known_character", "gender", "minifig_count"]
|
||||||
with path.open("w", newline="") as csv_file:
|
with path.open("w", newline="") as csv_file:
|
||||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
@ -110,9 +114,11 @@ def aggregate_character_spans(
|
|||||||
excluded = set(excluded_characters or [])
|
excluded = set(excluded_characters or [])
|
||||||
spans: Dict[str, Dict[str, int]] = {}
|
spans: Dict[str, Dict[str, int]] = {}
|
||||||
total_counts: Dict[str, int] = defaultdict(int)
|
total_counts: Dict[str, int] = defaultdict(int)
|
||||||
|
genders: Dict[str, str] = {}
|
||||||
for row in minifigs_rows:
|
for row in minifigs_rows:
|
||||||
character = row["known_character"].strip()
|
character = row["known_character"].strip()
|
||||||
fig_num = row["fig_num"].strip()
|
fig_num = row["fig_num"].strip()
|
||||||
|
gender = row.get("gender", "").strip()
|
||||||
if character == "" or fig_num == "":
|
if character == "" or fig_num == "":
|
||||||
continue
|
continue
|
||||||
if character in excluded:
|
if character in excluded:
|
||||||
@ -122,6 +128,8 @@ def aggregate_character_spans(
|
|||||||
continue
|
continue
|
||||||
year_int = int(year)
|
year_int = int(year)
|
||||||
total_counts[character] += 1
|
total_counts[character] += 1
|
||||||
|
if character not in genders:
|
||||||
|
genders[character] = gender
|
||||||
current = spans.get(character)
|
current = spans.get(character)
|
||||||
if current is None:
|
if current is None:
|
||||||
spans[character] = {"start": year_int, "end": year_int}
|
spans[character] = {"start": year_int, "end": year_int}
|
||||||
@ -136,6 +144,7 @@ def aggregate_character_spans(
|
|||||||
"start_year": str(bounds["start"]),
|
"start_year": str(bounds["start"]),
|
||||||
"end_year": str(bounds["end"]),
|
"end_year": str(bounds["end"]),
|
||||||
"total_minifigs": str(total_counts[character]),
|
"total_minifigs": str(total_counts[character]),
|
||||||
|
"gender": genders.get(character, ""),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
|
results.sort(key=lambda r: (int(r["start_year"]), int(r["end_year"]), r["known_character"]))
|
||||||
@ -145,7 +154,7 @@ def aggregate_character_spans(
|
|||||||
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
|
def write_character_spans(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des bornes min/max par personnage."""
|
"""Écrit le CSV des bornes min/max par personnage."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs"]
|
fieldnames = ["known_character", "start_year", "end_year", "total_minifigs", "gender"]
|
||||||
with path.open("w", newline="") as csv_file:
|
with path.open("w", newline="") as csv_file:
|
||||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|||||||
@ -128,6 +128,16 @@ def load_aliases(path: Path) -> Dict[str, str]:
|
|||||||
return aliases
|
return aliases
|
||||||
|
|
||||||
|
|
||||||
|
def load_gender_overrides(path: Path) -> Dict[str, str]:
|
||||||
|
"""Charge les correspondances personnage -> genre."""
|
||||||
|
overrides: Dict[str, str] = {}
|
||||||
|
with path.open() as gender_file:
|
||||||
|
reader = csv.DictReader(gender_file)
|
||||||
|
for row in reader:
|
||||||
|
overrides[row["known_character"].lower()] = row["gender"]
|
||||||
|
return overrides
|
||||||
|
|
||||||
|
|
||||||
def normalize_known_character(raw_known: str, extracted_name: str, aliases: Dict[str, str]) -> str:
|
def normalize_known_character(raw_known: str, extracted_name: str, aliases: Dict[str, str]) -> str:
|
||||||
"""Nettoie et mappe un nom vers une version canonique."""
|
"""Nettoie et mappe un nom vers une version canonique."""
|
||||||
base = raw_known or extracted_name
|
base = raw_known or extracted_name
|
||||||
@ -201,6 +211,7 @@ def aggregate_heads_by_set(
|
|||||||
minifig_heads: Dict[str, Set[str]],
|
minifig_heads: Dict[str, Set[str]],
|
||||||
minifig_catalog: Dict[str, dict],
|
minifig_catalog: Dict[str, dict],
|
||||||
aliases: Dict[str, str],
|
aliases: Dict[str, str],
|
||||||
|
gender_overrides: Dict[str, str],
|
||||||
) -> List[dict]:
|
) -> List[dict]:
|
||||||
"""Agrège les têtes de minifigs par set en éliminant les rechanges et doublons."""
|
"""Agrège les têtes de minifigs par set en éliminant les rechanges et doublons."""
|
||||||
seen: Set[Tuple[str, str]] = set()
|
seen: Set[Tuple[str, str]] = set()
|
||||||
@ -228,12 +239,14 @@ def aggregate_heads_by_set(
|
|||||||
normalized = normalize_known_character(known_character, extracted, aliases)
|
normalized = normalize_known_character(known_character, extracted, aliases)
|
||||||
if matched_fig == "":
|
if matched_fig == "":
|
||||||
continue
|
continue
|
||||||
|
gender = gender_overrides.get(normalized.lower(), "")
|
||||||
heads.append(
|
heads.append(
|
||||||
{
|
{
|
||||||
"set_num": row["set_num"],
|
"set_num": row["set_num"],
|
||||||
"part_num": row["part_num"],
|
"part_num": row["part_num"],
|
||||||
"known_character": normalized,
|
"known_character": normalized,
|
||||||
"fig_num": matched_fig,
|
"fig_num": matched_fig,
|
||||||
|
"gender": gender,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
seen.add(key)
|
seen.add(key)
|
||||||
@ -244,7 +257,7 @@ def aggregate_heads_by_set(
|
|||||||
def write_heads_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
|
def write_heads_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV intermédiaire listant les têtes de minifigs par set."""
|
"""Écrit le CSV intermédiaire listant les têtes de minifigs par set."""
|
||||||
ensure_parent_dir(destination_path)
|
ensure_parent_dir(destination_path)
|
||||||
fieldnames = ["set_num", "part_num", "known_character", "fig_num"]
|
fieldnames = ["set_num", "part_num", "known_character", "fig_num", "gender"]
|
||||||
with destination_path.open("w", newline="") as csv_file:
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
@ -260,6 +273,7 @@ def build_minifigs_by_set(
|
|||||||
inventory_minifigs_path: Path,
|
inventory_minifigs_path: Path,
|
||||||
minifigs_path: Path,
|
minifigs_path: Path,
|
||||||
aliases_path: Path,
|
aliases_path: Path,
|
||||||
|
gender_overrides_path: Path,
|
||||||
destination_path: Path,
|
destination_path: Path,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Construit le CSV listant les têtes de minifigs présentes par set."""
|
"""Construit le CSV listant les têtes de minifigs présentes par set."""
|
||||||
@ -271,6 +285,7 @@ def build_minifigs_by_set(
|
|||||||
minifig_heads = build_minifig_heads_lookup(minifig_catalog, latest_inventories, inventory_parts_path, head_parts)
|
minifig_heads = build_minifig_heads_lookup(minifig_catalog, latest_inventories, inventory_parts_path, head_parts)
|
||||||
set_minifigs = build_set_minifigs_lookup(latest_inventories, inventory_minifigs_path)
|
set_minifigs = build_set_minifigs_lookup(latest_inventories, inventory_minifigs_path)
|
||||||
aliases = load_aliases(aliases_path)
|
aliases = load_aliases(aliases_path)
|
||||||
|
gender_overrides = load_gender_overrides(gender_overrides_path)
|
||||||
heads = aggregate_heads_by_set(
|
heads = aggregate_heads_by_set(
|
||||||
parts_rows,
|
parts_rows,
|
||||||
parts_catalog,
|
parts_catalog,
|
||||||
@ -279,5 +294,6 @@ def build_minifigs_by_set(
|
|||||||
minifig_heads,
|
minifig_heads,
|
||||||
minifig_catalog,
|
minifig_catalog,
|
||||||
aliases,
|
aliases,
|
||||||
|
gender_overrides,
|
||||||
)
|
)
|
||||||
write_heads_by_set(destination_path, heads)
|
write_heads_by_set(destination_path, heads)
|
||||||
|
|||||||
@ -12,6 +12,7 @@ INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
|
|||||||
INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv")
|
INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv")
|
||||||
MINIFIGS_PATH = Path("data/raw/minifigs.csv")
|
MINIFIGS_PATH = Path("data/raw/minifigs.csv")
|
||||||
ALIASES_PATH = Path("config/known_character_aliases.csv")
|
ALIASES_PATH = Path("config/known_character_aliases.csv")
|
||||||
|
GENDERS_PATH = Path("config/known_character_genders.csv")
|
||||||
DESTINATION_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
DESTINATION_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
|
||||||
|
|
||||||
@ -25,6 +26,7 @@ def main() -> None:
|
|||||||
INVENTORY_MINIFIGS_PATH,
|
INVENTORY_MINIFIGS_PATH,
|
||||||
MINIFIGS_PATH,
|
MINIFIGS_PATH,
|
||||||
ALIASES_PATH,
|
ALIASES_PATH,
|
||||||
|
GENDERS_PATH,
|
||||||
DESTINATION_PATH,
|
DESTINATION_PATH,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -15,6 +15,7 @@ MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
|||||||
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
SPANS_PATH = Path("data/intermediate/minifig_character_spans.csv")
|
SPANS_PATH = Path("data/intermediate/minifig_character_spans.csv")
|
||||||
DESTINATION_PATH = Path("figures/step24/minifig_character_spans.png")
|
DESTINATION_PATH = Path("figures/step24/minifig_character_spans.png")
|
||||||
|
MILESTONES_PATH = Path("config/milestones.csv")
|
||||||
EXCLUDED_CHARACTERS = ["Figurant"]
|
EXCLUDED_CHARACTERS = ["Figurant"]
|
||||||
|
|
||||||
|
|
||||||
@ -24,7 +25,7 @@ def main() -> None:
|
|||||||
sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
|
sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
|
||||||
spans = aggregate_character_spans(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS)
|
spans = aggregate_character_spans(minifigs, sets_years, excluded_characters=EXCLUDED_CHARACTERS)
|
||||||
write_character_spans(SPANS_PATH, spans)
|
write_character_spans(SPANS_PATH, spans)
|
||||||
plot_character_spans(SPANS_PATH, DESTINATION_PATH)
|
plot_character_spans(SPANS_PATH, DESTINATION_PATH, milestones_path=MILESTONES_PATH)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -12,11 +12,11 @@ matplotlib.use("Agg")
|
|||||||
def test_plot_character_spans(tmp_path: Path) -> None:
|
def test_plot_character_spans(tmp_path: Path) -> None:
|
||||||
"""Génère le graphique de span des personnages."""
|
"""Génère le graphique de span des personnages."""
|
||||||
spans_path = tmp_path / "minifig_character_spans.csv"
|
spans_path = tmp_path / "minifig_character_spans.csv"
|
||||||
destination = tmp_path / "figures" / "step23" / "minifig_character_spans.png"
|
destination = tmp_path / "figures" / "step24" / "minifig_character_spans.png"
|
||||||
spans_path.write_text(
|
spans_path.write_text(
|
||||||
"known_character,start_year,end_year,total_minifigs\n"
|
"known_character,start_year,end_year,total_minifigs,gender\n"
|
||||||
"Owen Grady,2020,2022,3\n"
|
"Owen Grady,2020,2022,3,male\n"
|
||||||
"Figurant,2019,2020,2\n"
|
"Figurant,2019,2020,2,unknown\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
plot_character_spans(spans_path, destination)
|
plot_character_spans(spans_path, destination)
|
||||||
|
|||||||
@ -15,18 +15,54 @@ def test_aggregate_by_character_counts_unique_figs() -> None:
|
|||||||
"""Compter les minifigs distinctes par personnage en excluant les noms vides."""
|
"""Compter les minifigs distinctes par personnage en excluant les noms vides."""
|
||||||
aggregates = aggregate_by_character(
|
aggregates = aggregate_by_character(
|
||||||
[
|
[
|
||||||
{"set_num": "123-1", "part_num": "head-a", "known_character": "Owen Grady", "fig_num": "fig-owen-1"},
|
{
|
||||||
{"set_num": "124-1", "part_num": "head-b", "known_character": "Owen Grady", "fig_num": "fig-owen-1"},
|
"set_num": "123-1",
|
||||||
{"set_num": "125-1", "part_num": "head-c", "known_character": "Owen Grady", "fig_num": "fig-owen-2"},
|
"part_num": "head-a",
|
||||||
{"set_num": "126-1", "part_num": "head-d", "known_character": "Figurant", "fig_num": "fig-guard-1"},
|
"known_character": "Owen Grady",
|
||||||
{"set_num": "128-1", "part_num": "head-f", "known_character": "Figurant", "fig_num": "fig-guard-1"},
|
"fig_num": "fig-owen-1",
|
||||||
{"set_num": "129-1", "part_num": "head-g", "known_character": "", "fig_num": "fig-guard-2"},
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "124-1",
|
||||||
|
"part_num": "head-b",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-1",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "125-1",
|
||||||
|
"part_num": "head-c",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-2",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "126-1",
|
||||||
|
"part_num": "head-d",
|
||||||
|
"known_character": "Figurant",
|
||||||
|
"fig_num": "fig-guard-1",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "128-1",
|
||||||
|
"part_num": "head-f",
|
||||||
|
"known_character": "Figurant",
|
||||||
|
"fig_num": "fig-guard-1",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "129-1",
|
||||||
|
"part_num": "head-g",
|
||||||
|
"known_character": "",
|
||||||
|
"fig_num": "fig-guard-2",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
assert aggregates == [
|
assert aggregates == [
|
||||||
{"known_character": "Owen Grady", "minifig_count": 2},
|
{"known_character": "Owen Grady", "gender": "male", "minifig_count": 2},
|
||||||
{"known_character": "Figurant", "minifig_count": 1},
|
{"known_character": "Figurant", "gender": "unknown", "minifig_count": 1},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -34,13 +70,13 @@ def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
|
|||||||
"""Écrit le CSV des comptes par personnage."""
|
"""Écrit le CSV des comptes par personnage."""
|
||||||
destination = tmp_path / "counts.csv"
|
destination = tmp_path / "counts.csv"
|
||||||
rows = [
|
rows = [
|
||||||
{"known_character": "A", "minifig_count": 2},
|
{"known_character": "A", "gender": "male", "minifig_count": 2},
|
||||||
{"known_character": "B", "minifig_count": 1},
|
{"known_character": "B", "gender": "female", "minifig_count": 1},
|
||||||
]
|
]
|
||||||
|
|
||||||
write_character_counts(destination, rows)
|
write_character_counts(destination, rows)
|
||||||
|
|
||||||
assert destination.read_text() == "known_character,minifig_count\nA,2\nB,1\n"
|
assert destination.read_text() == "known_character,gender,minifig_count\nA,male,2\nB,female,1\n"
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
||||||
@ -52,8 +88,20 @@ def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
|||||||
"124-1,2021\n"
|
"124-1,2021\n"
|
||||||
)
|
)
|
||||||
minifigs_rows = [
|
minifigs_rows = [
|
||||||
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"},
|
{
|
||||||
{"set_num": "124-1", "known_character": "Figurant", "fig_num": "fig-guard", "part_num": "head-b"},
|
"set_num": "123-1",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen",
|
||||||
|
"part_num": "head-a",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "124-1",
|
||||||
|
"known_character": "Figurant",
|
||||||
|
"fig_num": "fig-guard",
|
||||||
|
"part_num": "head-b",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
sets_years = load_sets_enriched(sets_path)
|
sets_years = load_sets_enriched(sets_path)
|
||||||
|
|
||||||
@ -76,13 +124,31 @@ def test_aggregate_character_spans_excludes_figurants(tmp_path: Path) -> None:
|
|||||||
)
|
)
|
||||||
sets_years = load_sets_enriched(sets_path)
|
sets_years = load_sets_enriched(sets_path)
|
||||||
minifigs_rows = [
|
minifigs_rows = [
|
||||||
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"},
|
{
|
||||||
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen", "part_num": "head-a"},
|
"set_num": "123-1",
|
||||||
{"set_num": "125-1", "known_character": "Figurant", "fig_num": "fig-guard", "part_num": "head-b"},
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen",
|
||||||
|
"part_num": "head-a",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "124-1",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen",
|
||||||
|
"part_num": "head-a",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "125-1",
|
||||||
|
"known_character": "Figurant",
|
||||||
|
"fig_num": "fig-guard",
|
||||||
|
"part_num": "head-b",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
spans = aggregate_character_spans(minifigs_rows, sets_years, excluded_characters=["Figurant"])
|
spans = aggregate_character_spans(minifigs_rows, sets_years, excluded_characters=["Figurant"])
|
||||||
|
|
||||||
assert spans == [
|
assert spans == [
|
||||||
{"known_character": "Owen Grady", "start_year": "2020", "end_year": "2021", "total_minifigs": "2"},
|
{"known_character": "Owen Grady", "start_year": "2020", "end_year": "2021", "total_minifigs": "2", "gender": "male"},
|
||||||
]
|
]
|
||||||
|
|||||||
@ -14,9 +14,9 @@ def test_plot_minifigs_per_character(tmp_path: Path) -> None:
|
|||||||
counts_path = tmp_path / "counts.csv"
|
counts_path = tmp_path / "counts.csv"
|
||||||
destination = tmp_path / "figures" / "step22" / "minifig_characters.png"
|
destination = tmp_path / "figures" / "step22" / "minifig_characters.png"
|
||||||
counts_path.write_text(
|
counts_path.write_text(
|
||||||
"known_character,minifig_count\n"
|
"known_character,gender,minifig_count\n"
|
||||||
"Owen Grady,2\n"
|
"Owen Grady,male,2\n"
|
||||||
"Figurant,1\n"
|
"Figurant,unknown,1\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
plot_minifigs_per_character(counts_path, destination)
|
plot_minifigs_per_character(counts_path, destination)
|
||||||
|
|||||||
@ -71,6 +71,13 @@ def test_build_minifigs_by_set_filters_spares_and_deduplicates(tmp_path) -> None
|
|||||||
"alias,canonical\n"
|
"alias,canonical\n"
|
||||||
"Guard in Helmet with Trans-Brown Visor,Figurant\n",
|
"Guard in Helmet with Trans-Brown Visor,Figurant\n",
|
||||||
)
|
)
|
||||||
|
genders_path = tmp_path / "known_character_genders.csv"
|
||||||
|
write_csv(
|
||||||
|
genders_path,
|
||||||
|
"known_character,gender\n"
|
||||||
|
"Owen Grady,male\n"
|
||||||
|
"Figurant,unknown\n",
|
||||||
|
)
|
||||||
destination_path = tmp_path / "minifigs_by_set.csv"
|
destination_path = tmp_path / "minifigs_by_set.csv"
|
||||||
|
|
||||||
build_minifigs_by_set(
|
build_minifigs_by_set(
|
||||||
@ -81,12 +88,13 @@ def test_build_minifigs_by_set_filters_spares_and_deduplicates(tmp_path) -> None
|
|||||||
inventory_minifigs_path,
|
inventory_minifigs_path,
|
||||||
minifigs_path,
|
minifigs_path,
|
||||||
aliases_path,
|
aliases_path,
|
||||||
|
genders_path,
|
||||||
destination_path,
|
destination_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert destination_path.read_text() == (
|
assert destination_path.read_text() == (
|
||||||
"set_num,part_num,known_character,fig_num\n"
|
"set_num,part_num,known_character,fig_num,gender\n"
|
||||||
"123-1,head-a,Owen Grady,fig-owen\n"
|
"123-1,head-a,Owen Grady,fig-owen,male\n"
|
||||||
"123-1,head-b,Figurant,fig-guard\n"
|
"123-1,head-b,Figurant,fig-guard,unknown\n"
|
||||||
"124-1,head-b,Figurant,fig-guard\n"
|
"124-1,head-b,Figurant,fig-guard,unknown\n"
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user