Compare commits
7 Commits
18d5895a05
...
89eb0a92d7
| Author | SHA1 | Date | |
|---|---|---|---|
| 89eb0a92d7 | |||
| 996e4cb9ff | |||
| e5dbf7bbaa | |||
| 871539c4f7 | |||
| f9e1555ecb | |||
| f9854a6949 | |||
| 3c20b74b1c |
16
README.md
@@ -243,8 +243,17 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
|
|||||||
|
|
||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.plot_minifig_characters`
|
2. `python -m scripts.plot_minifig_characters`
|
||||||
|
3. `python -m scripts.plot_minifig_character_variations`
|
||||||
|
4. `python -m scripts.plot_minifig_new_characters`
|
||||||
|
5. `python -m scripts.list_new_minifig_characters`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
||||||
|
|
||||||
|
Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figurants, calcule par personnage le nombre de variations (fig_num distincts) et le total réel de minifigs présentes dans les sets filtrés, sérialise `data/intermediate/minifig_character_variations_totals.csv`, puis trace `figures/step22/minifig_character_variations_totals.png` en superposant un fond neutre (total) et une jauge colorée (variations, couleur = genre).
|
||||||
|
|
||||||
|
Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`).
|
||||||
|
|
||||||
|
Le quatrième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, liste pour chaque année (2015-2025) les personnages introduits et les sets de cette année contenant ces minifigs, puis produit `data/final/minifig_new_characters_by_year.csv` et `data/final/minifig_new_characters_by_year.md` (format markdown minimal pour le blog).
|
||||||
|
|
||||||
### Étape 23 : présence annuelle des personnages
|
### Étape 23 : présence annuelle des personnages
|
||||||
|
|
||||||
@@ -264,8 +273,11 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets
|
|||||||
|
|
||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
2. `python -m scripts.plot_minifig_gender_share`
|
2. `python -m scripts.plot_minifig_gender_share`
|
||||||
|
3. `python -m scripts.plot_minifig_character_genders`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins, masculins ou inconnus).
|
Le premier script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins et masculins, les genres inconnus étant ignorés pour ce graphique, étiquettes en valeurs absolues).
|
||||||
|
|
||||||
|
Le second script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de personnages distincts par genre (femmes/hommes uniquement), sérialise `data/intermediate/minifig_character_gender_counts.csv`, puis trace `figures/step25/minifig_character_gender_share.png` pour comparer la répartition des personnages identifiés.
|
||||||
|
|
||||||
### Étape 26 : corrélation pièces / minifigs
|
### Étape 26 : corrélation pièces / minifigs
|
||||||
|
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 781 KiB After Width: | Height: | Size: 540 KiB |
BIN
figures/step22/minifig_character_variations_totals.png
Normal file
|
After Width: | Height: | Size: 193 KiB |
BIN
figures/step23/minifig_new_characters_per_year.png
Normal file
|
After Width: | Height: | Size: 113 KiB |
BIN
figures/step25/minifig_character_gender_share.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 53 KiB |
|
Before Width: | Height: | Size: 795 KiB After Width: | Height: | Size: 828 KiB |
@@ -1,12 +1,13 @@
|
|||||||
"""Graphique du nombre de minifigs par personnage."""
|
"""Graphique du nombre de minifigs par personnage."""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import Dict, List
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.patches import Patch
|
from matplotlib.patches import Patch
|
||||||
|
|
||||||
from lib.filesystem import ensure_parent_dir
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.milestones import load_milestones
|
||||||
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
|
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
|
||||||
from lib.rebrickable.stats import read_rows
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
@@ -21,6 +22,16 @@ def load_presence(path: Path) -> List[dict]:
|
|||||||
return read_rows(path)
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_new_characters(path: Path) -> List[dict]:
|
||||||
|
"""Charge le CSV des personnages introduits par année."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_variations_and_totals(path: Path) -> List[dict]:
|
||||||
|
"""Charge le CSV comparatif variations/total par personnage."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None:
|
def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None:
|
||||||
"""Trace un diagramme en barres horizontales du nombre de minifigs par personnage."""
|
"""Trace un diagramme en barres horizontales du nombre de minifigs par personnage."""
|
||||||
rows = load_counts(counts_path)
|
rows = load_counts(counts_path)
|
||||||
@@ -68,6 +79,86 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
|
|||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_character_variations_vs_total(counts_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Superpose le total de minifigs et leurs variations distinctes par personnage."""
|
||||||
|
rows = load_variations_and_totals(counts_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
characters = [row["known_character"] for row in rows]
|
||||||
|
variation_counts = [int(row["variation_count"]) for row in rows]
|
||||||
|
total_counts = [int(row["total_minifigs"]) for row in rows]
|
||||||
|
genders = [row.get("gender", "") for row in rows]
|
||||||
|
gender_colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
|
positions = list(range(len(rows)))
|
||||||
|
height = max(6, len(rows) * 0.24)
|
||||||
|
background_color = "#d7d7e0"
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(12.4, height))
|
||||||
|
bars_total = ax.barh(
|
||||||
|
positions,
|
||||||
|
total_counts,
|
||||||
|
color=background_color,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
height=0.6,
|
||||||
|
label="Total de minifigs",
|
||||||
|
)
|
||||||
|
bars_variations = ax.barh(
|
||||||
|
positions,
|
||||||
|
variation_counts,
|
||||||
|
color=gender_colors,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.8,
|
||||||
|
height=0.36,
|
||||||
|
label="Variations distinctes",
|
||||||
|
)
|
||||||
|
ax.set_yticks(positions)
|
||||||
|
ax.set_yticklabels(characters)
|
||||||
|
ax.invert_yaxis()
|
||||||
|
ax.set_xlabel("Nombre de minifigs")
|
||||||
|
ax.set_title("Variations et total de minifigs par personnage (hors figurants)")
|
||||||
|
ax.grid(True, axis="x", linestyle="--", alpha=0.25)
|
||||||
|
max_value = max(total_counts) if total_counts else 0
|
||||||
|
ax.set_xlim(0, max_value + 1)
|
||||||
|
|
||||||
|
for index, bar in enumerate(bars_total):
|
||||||
|
value = total_counts[index]
|
||||||
|
ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#1a1a1a")
|
||||||
|
for index, bar in enumerate(bars_variations):
|
||||||
|
value = variation_counts[index]
|
||||||
|
ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#0d0d0d")
|
||||||
|
|
||||||
|
legend_entries = [
|
||||||
|
Patch(facecolor=background_color, edgecolor="#0d0d0d", linewidth=0.6, label="Total de minifigs"),
|
||||||
|
Patch(
|
||||||
|
facecolor=GENDER_COLORS["unknown"],
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.8,
|
||||||
|
label="Variations distinctes (couleur = genre)",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
seen = set()
|
||||||
|
for gender, color in zip(genders, gender_colors):
|
||||||
|
normalized = gender.strip().lower()
|
||||||
|
if normalized in seen:
|
||||||
|
continue
|
||||||
|
seen.add(normalized)
|
||||||
|
legend_entries.append(
|
||||||
|
Patch(
|
||||||
|
facecolor=color,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
label=GENDER_LABELS.get(normalized, "Inconnu"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
ax.legend(handles=legend_entries, loc="lower right")
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None:
|
def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None:
|
||||||
"""Trace une heatmap indiquant le nombre de minifigs par personnage et par année."""
|
"""Trace une heatmap indiquant le nombre de minifigs par personnage et par année."""
|
||||||
rows = load_presence(presence_path)
|
rows = load_presence(presence_path)
|
||||||
@@ -114,3 +205,73 @@ def plot_character_year_presence(presence_path: Path, destination_path: Path) ->
|
|||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
fig.savefig(destination_path, dpi=160)
|
fig.savefig(destination_path, dpi=160)
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_new_characters_per_year(
|
||||||
|
counts_path: Path,
|
||||||
|
milestones_path: Path,
|
||||||
|
destination_path: Path,
|
||||||
|
start_year: int,
|
||||||
|
end_year: int,
|
||||||
|
) -> None:
|
||||||
|
"""Trace un diagramme en barres du nombre de nouveaux personnages introduits par an."""
|
||||||
|
rows = load_new_characters(counts_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
counts = {int(row["year"]): int(row["new_characters"]) for row in rows}
|
||||||
|
years = list(range(start_year, end_year + 1))
|
||||||
|
values = [counts.get(year, 0) for year in years]
|
||||||
|
|
||||||
|
fig_width = max(8.5, len(years) * 0.45 + 2.5)
|
||||||
|
fig, ax = plt.subplots(figsize=(fig_width, 5.4))
|
||||||
|
bars = ax.bar(years, values, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.7)
|
||||||
|
ax.set_xlabel("Année")
|
||||||
|
ax.set_ylabel("Nouveaux personnages")
|
||||||
|
ax.set_title("Personnages introduits par an (hors figurants)")
|
||||||
|
ax.grid(axis="y", linestyle="--", alpha=0.3)
|
||||||
|
ax.set_xticks(years)
|
||||||
|
ax.set_xticklabels(years, rotation=45, ha="right")
|
||||||
|
ax.set_xlim(start_year - 0.6, end_year + 0.6)
|
||||||
|
y_max = max(values) if values else 0
|
||||||
|
upper_limit = 20
|
||||||
|
ax.set_ylim(0, upper_limit)
|
||||||
|
|
||||||
|
for bar, value in zip(bars, values):
|
||||||
|
if value == 0:
|
||||||
|
continue
|
||||||
|
ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center", va="bottom", fontsize=8)
|
||||||
|
|
||||||
|
milestones = load_milestones(milestones_path)
|
||||||
|
if milestones:
|
||||||
|
milestones_in_range = sorted(
|
||||||
|
[m for m in milestones if start_year <= m["year"] <= end_year],
|
||||||
|
key=lambda m: (m["year"], m["description"]),
|
||||||
|
)
|
||||||
|
offset_step = 0.25
|
||||||
|
offset_map: Dict[int, int] = {}
|
||||||
|
top_limit = ax.get_ylim()[1]
|
||||||
|
label_y = top_limit * 0.96
|
||||||
|
for milestone in milestones_in_range:
|
||||||
|
year = milestone["year"]
|
||||||
|
count_for_year = offset_map.get(year, 0)
|
||||||
|
offset_map[year] = count_for_year + 1
|
||||||
|
horizontal_offset = offset_step * (count_for_year // 2 + 1)
|
||||||
|
if count_for_year % 2 == 1:
|
||||||
|
horizontal_offset *= -1
|
||||||
|
text_x = year + horizontal_offset
|
||||||
|
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65, zorder=1)
|
||||||
|
ax.text(
|
||||||
|
text_x,
|
||||||
|
label_y,
|
||||||
|
milestone["description"],
|
||||||
|
rotation=90,
|
||||||
|
verticalalignment="top",
|
||||||
|
horizontalalignment="center",
|
||||||
|
fontsize=8,
|
||||||
|
color="#d62728",
|
||||||
|
)
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
|
|||||||
@@ -16,7 +16,9 @@ def load_counts(path: Path) -> List[dict]:
|
|||||||
|
|
||||||
def plot_minifigs_per_set(counts_path: Path, destination_path: Path) -> None:
|
def plot_minifigs_per_set(counts_path: Path, destination_path: Path) -> None:
|
||||||
"""Trace un diagramme en barres du nombre de minifigs par set (thèmes filtrés)."""
|
"""Trace un diagramme en barres du nombre de minifigs par set (thèmes filtrés)."""
|
||||||
rows = load_counts(counts_path)
|
rows = [row for row in load_counts(counts_path) if int(row["minifig_count"]) > 0]
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
labels = [f"{row['set_num']} - {row['name']}" for row in rows]
|
labels = [f"{row['set_num']} - {row['name']}" for row in rows]
|
||||||
values = [int(row["minifig_count"]) for row in rows]
|
values = [int(row["minifig_count"]) for row in rows]
|
||||||
positions = list(range(len(rows)))
|
positions = list(range(len(rows)))
|
||||||
|
|||||||
@@ -17,18 +17,15 @@ def load_gender_counts(path: Path) -> List[dict]:
|
|||||||
|
|
||||||
def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None:
|
def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None:
|
||||||
"""Trace un diagramme circulaire de la répartition des minifigs par genre."""
|
"""Trace un diagramme circulaire de la répartition des minifigs par genre."""
|
||||||
rows = load_gender_counts(counts_path)
|
rows = [
|
||||||
|
row for row in load_gender_counts(counts_path) if row["gender"].strip().lower() in ("male", "female")
|
||||||
|
]
|
||||||
if not rows:
|
if not rows:
|
||||||
return
|
return
|
||||||
genders = [row["gender"] for row in rows]
|
genders = [row["gender"] for row in rows]
|
||||||
counts = [int(row["minifig_count"]) for row in rows]
|
counts = [int(row["minifig_count"]) for row in rows]
|
||||||
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
total = sum(counts)
|
labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)]
|
||||||
labels = []
|
|
||||||
for gender, count in zip(genders, counts):
|
|
||||||
percent = (count / total) * 100 if total else 0
|
|
||||||
label = f"{GENDER_LABELS.get(gender.strip().lower(), 'Inconnu')} ({percent:.1f} %)"
|
|
||||||
labels.append(label)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(6, 6))
|
fig, ax = plt.subplots(figsize=(6, 6))
|
||||||
ax.pie(
|
ax.pie(
|
||||||
@@ -46,3 +43,31 @@ def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None
|
|||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
fig.savefig(destination_path, dpi=160)
|
fig.savefig(destination_path, dpi=160)
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_character_gender_share(counts_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Trace un diagramme circulaire de la répartition des personnages par genre."""
|
||||||
|
rows = [row for row in load_gender_counts(counts_path) if row["gender"].strip().lower() in ("male", "female")]
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
genders = [row["gender"] for row in rows]
|
||||||
|
counts = [int(row["character_count"]) for row in rows]
|
||||||
|
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
|
||||||
|
labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)]
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(6, 6))
|
||||||
|
ax.pie(
|
||||||
|
counts,
|
||||||
|
labels=labels,
|
||||||
|
colors=colors,
|
||||||
|
startangle=90,
|
||||||
|
wedgeprops={"linewidth": 0.6, "edgecolor": "#0d0d0d"},
|
||||||
|
)
|
||||||
|
centre_circle = plt.Circle((0, 0), 0.5, fc="white")
|
||||||
|
ax.add_artist(centre_circle)
|
||||||
|
ax.set_title("Répartition des personnages par genre (hors inconnus)")
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ def build_head_presence(
|
|||||||
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
|
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
|
||||||
presence: Dict[str, Set[str]] = {}
|
presence: Dict[str, Set[str]] = {}
|
||||||
for set_num, inventory in inventories.items():
|
for set_num, inventory in inventories.items():
|
||||||
|
if set_num.startswith("fig-"):
|
||||||
|
continue
|
||||||
parts = parts_by_inventory.get(inventory["id"], [])
|
parts = parts_by_inventory.get(inventory["id"], [])
|
||||||
for part_row in parts:
|
for part_row in parts:
|
||||||
if part_row["part_num"] not in head_parts:
|
if part_row["part_num"] not in head_parts:
|
||||||
|
|||||||
@@ -34,6 +34,145 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
|
|||||||
return aggregates
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_variations_and_totals(
|
||||||
|
rows: Iterable[dict],
|
||||||
|
excluded_characters: Sequence[str] | None = None,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Compte les variations uniques et le total de minifigs par personnage."""
|
||||||
|
excluded = set(excluded_characters or [])
|
||||||
|
variations: Dict[str, set] = defaultdict(set)
|
||||||
|
totals: Dict[str, int] = defaultdict(int)
|
||||||
|
genders: Dict[str, str] = {}
|
||||||
|
for row in rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
gender = row.get("gender", "").strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
variations[character].add(fig_num)
|
||||||
|
totals[character] += 1
|
||||||
|
if character not in genders:
|
||||||
|
genders[character] = gender
|
||||||
|
aggregates: List[dict] = []
|
||||||
|
for character, fig_nums in variations.items():
|
||||||
|
aggregates.append(
|
||||||
|
{
|
||||||
|
"known_character": character,
|
||||||
|
"gender": genders.get(character, ""),
|
||||||
|
"variation_count": len(fig_nums),
|
||||||
|
"total_minifigs": totals.get(character, 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"]))
|
||||||
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_new_characters_by_year(
|
||||||
|
minifigs_rows: Iterable[dict],
|
||||||
|
sets_years: Dict[str, str],
|
||||||
|
excluded_characters: Sequence[str] | None = None,
|
||||||
|
start_year: int | None = None,
|
||||||
|
end_year: int | None = None,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Compte le nombre de personnages introduits par année sur une plage donnée."""
|
||||||
|
excluded = set(excluded_characters or [])
|
||||||
|
first_year: Dict[str, int] = {}
|
||||||
|
for row in minifigs_rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
year_str = sets_years.get(row["set_num"])
|
||||||
|
if year_str is None:
|
||||||
|
continue
|
||||||
|
year_int = int(year_str)
|
||||||
|
current = first_year.get(character)
|
||||||
|
if current is None or year_int < current:
|
||||||
|
first_year[character] = year_int
|
||||||
|
counts: Dict[int, int] = {}
|
||||||
|
if start_year is not None and end_year is not None:
|
||||||
|
for year in range(start_year, end_year + 1):
|
||||||
|
counts[year] = 0
|
||||||
|
for character, year_int in first_year.items():
|
||||||
|
if start_year is not None and year_int < start_year:
|
||||||
|
continue
|
||||||
|
if end_year is not None and year_int > end_year:
|
||||||
|
continue
|
||||||
|
counts[year_int] = counts.get(year_int, 0) + 1
|
||||||
|
years = sorted(counts.keys())
|
||||||
|
results: List[dict] = []
|
||||||
|
for year in years:
|
||||||
|
results.append({"year": str(year), "new_characters": str(counts[year])})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_new_character_sets(
|
||||||
|
minifigs_rows: Iterable[dict],
|
||||||
|
sets_lookup: Dict[str, dict],
|
||||||
|
excluded_characters: Sequence[str] | None = None,
|
||||||
|
start_year: int | None = None,
|
||||||
|
end_year: int | None = None,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Liste les personnages introduits par année avec les sets correspondants."""
|
||||||
|
excluded = set(excluded_characters or [])
|
||||||
|
first_year: Dict[str, int] = {}
|
||||||
|
for row in minifigs_rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
set_row = sets_lookup.get(row["set_num"])
|
||||||
|
if set_row is None:
|
||||||
|
continue
|
||||||
|
year_int = int(set_row["year"])
|
||||||
|
current = first_year.get(character)
|
||||||
|
if current is None or year_int < current:
|
||||||
|
first_year[character] = year_int
|
||||||
|
rows: List[dict] = []
|
||||||
|
seen: set[tuple[str, str]] = set()
|
||||||
|
for row in minifigs_rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
set_row = sets_lookup.get(row["set_num"])
|
||||||
|
if set_row is None:
|
||||||
|
continue
|
||||||
|
intro_year = first_year.get(character)
|
||||||
|
if intro_year is None:
|
||||||
|
continue
|
||||||
|
if start_year is not None and intro_year < start_year:
|
||||||
|
continue
|
||||||
|
if end_year is not None and intro_year > end_year:
|
||||||
|
continue
|
||||||
|
if int(set_row["year"]) != intro_year:
|
||||||
|
continue
|
||||||
|
key = (character, set_row["set_num"])
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"year": str(int(set_row["year"])),
|
||||||
|
"known_character": character,
|
||||||
|
"set_num": set_row["set_num"],
|
||||||
|
"set_id": set_row.get("set_id", ""),
|
||||||
|
"set_name": set_row.get("name", ""),
|
||||||
|
"rebrickable_url": set_row.get("rebrickable_url", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
seen.add(key)
|
||||||
|
rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"]))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
||||||
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
||||||
genders_by_fig: Dict[str, str] = {}
|
genders_by_fig: Dict[str, str] = {}
|
||||||
@@ -56,6 +195,28 @@ def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
|||||||
return aggregates
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_characters_by_gender(rows: Iterable[dict]) -> List[dict]:
|
||||||
|
"""Compte les personnages distincts par genre (hors genres inconnus)."""
|
||||||
|
gender_by_character: Dict[str, str] = {}
|
||||||
|
counts: Dict[str, int] = defaultdict(int)
|
||||||
|
for row in rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
gender = row.get("gender", "").strip().lower()
|
||||||
|
if character == "":
|
||||||
|
continue
|
||||||
|
if gender not in ("male", "female"):
|
||||||
|
continue
|
||||||
|
if character in gender_by_character:
|
||||||
|
continue
|
||||||
|
gender_by_character[character] = gender
|
||||||
|
counts[gender] += 1
|
||||||
|
aggregates: List[dict] = []
|
||||||
|
for gender in ("female", "male"):
|
||||||
|
if gender in counts:
|
||||||
|
aggregates.append({"gender": gender, "character_count": str(counts[gender])})
|
||||||
|
return aggregates
|
||||||
|
|
||||||
|
|
||||||
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des comptes par personnage."""
|
"""Écrit le CSV des comptes par personnage."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
@@ -67,6 +228,60 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
|
|||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_character_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV des comptes de personnages par genre."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["gender", "character_count"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV des personnages introduits chaque année."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["year", "new_characters"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV listant les personnages introduits et leurs sets."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit un Markdown listant les personnages introduits par année et leurs sets."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
grouped: Dict[str, Dict[str, List[dict]]] = {}
|
||||||
|
for row in rows:
|
||||||
|
year_group = grouped.setdefault(row["year"], {})
|
||||||
|
characters = year_group.setdefault(row["known_character"], [])
|
||||||
|
characters.append(row)
|
||||||
|
with path.open("w") as md_file:
|
||||||
|
for year in sorted(grouped.keys(), key=int):
|
||||||
|
md_file.write(f"##### {year}\n\n")
|
||||||
|
for character in sorted(grouped[year].keys()):
|
||||||
|
md_file.write(f"- {character}\n")
|
||||||
|
for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]):
|
||||||
|
link = entry["rebrickable_url"] or ""
|
||||||
|
set_id = entry["set_id"]
|
||||||
|
name = entry["set_name"]
|
||||||
|
md_file.write(f" - [{set_id}]({link}) - {name}\n")
|
||||||
|
md_file.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des comptes par genre."""
|
"""Écrit le CSV des comptes par genre."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
@@ -78,6 +293,17 @@ def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
|||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV comparant variations et total par personnage."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
def load_sets_enriched(path: Path) -> Dict[str, str]:
|
def load_sets_enriched(path: Path) -> Dict[str, str]:
|
||||||
"""Indexe les années par set_num."""
|
"""Indexe les années par set_num."""
|
||||||
lookup: Dict[str, str] = {}
|
lookup: Dict[str, str] = {}
|
||||||
|
|||||||
39
scripts/list_new_minifig_characters.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""Liste les nouveaux personnages introduits chaque année et leurs sets associés."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.minifig_character_sets import load_sets
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_new_character_sets,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
write_new_character_sets_csv,
|
||||||
|
write_new_character_sets_markdown,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
CSV_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.csv")
|
||||||
|
MARKDOWN_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.md")
|
||||||
|
EXCLUDED_CHARACTERS = ["Figurant"]
|
||||||
|
START_YEAR = 2015
|
||||||
|
END_YEAR = 2025
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le CSV et le Markdown listant les personnages introduits chaque année."""
|
||||||
|
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
sets_lookup = load_sets(SETS_ENRICHED_PATH)
|
||||||
|
rows = aggregate_new_character_sets(
|
||||||
|
minifigs,
|
||||||
|
sets_lookup,
|
||||||
|
excluded_characters=EXCLUDED_CHARACTERS,
|
||||||
|
start_year=START_YEAR,
|
||||||
|
end_year=END_YEAR,
|
||||||
|
)
|
||||||
|
write_new_character_sets_csv(CSV_DESTINATION_PATH, rows)
|
||||||
|
write_new_character_sets_markdown(MARKDOWN_DESTINATION_PATH, rows)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
27
scripts/plot_minifig_character_genders.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
"""Trace la répartition des personnages identifiés par genre."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.minifig_gender_share import plot_character_gender_share
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_characters_by_gender,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
write_character_gender_counts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
COUNTS_PATH = Path("data/intermediate/minifig_character_gender_counts.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step25/minifig_character_gender_share.png")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le CSV de répartition des personnages par genre et trace le graphique."""
|
||||||
|
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
aggregates = aggregate_characters_by_gender(rows)
|
||||||
|
write_character_gender_counts(COUNTS_PATH, aggregates)
|
||||||
|
plot_character_gender_share(COUNTS_PATH, DESTINATION_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
28
scripts/plot_minifig_character_variations.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""Trace le total de minifigs et leurs variations distinctes par personnage."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.minifig_characters import plot_character_variations_vs_total
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_variations_and_totals,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
write_character_variations_totals,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
COUNTS_PATH = Path("data/intermediate/minifig_character_variations_totals.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step22/minifig_character_variations_totals.png")
|
||||||
|
EXCLUDED_CHARACTERS = ["Figurant"]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le comparatif variations/total et trace le graphique associé."""
|
||||||
|
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
aggregates = aggregate_variations_and_totals(rows, excluded_characters=EXCLUDED_CHARACTERS)
|
||||||
|
write_character_variations_totals(COUNTS_PATH, aggregates)
|
||||||
|
plot_character_variations_vs_total(COUNTS_PATH, DESTINATION_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
40
scripts/plot_minifig_new_characters.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
"""Trace le nombre de nouveaux personnages introduits par an (hors figurants)."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.minifig_characters import plot_new_characters_per_year
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_new_characters_by_year,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
load_sets_enriched,
|
||||||
|
write_new_characters_by_year,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
COUNTS_PATH = Path("data/intermediate/minifig_new_characters_by_year.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step23/minifig_new_characters_per_year.png")
|
||||||
|
MILESTONES_PATH = Path("config/milestones.csv")
|
||||||
|
EXCLUDED_CHARACTERS = ["Figurant"]
|
||||||
|
START_YEAR = 2015
|
||||||
|
END_YEAR = 2025
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le total de nouveaux personnages par année et trace le graphique."""
|
||||||
|
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
|
||||||
|
counts = aggregate_new_characters_by_year(
|
||||||
|
minifigs,
|
||||||
|
sets_years,
|
||||||
|
excluded_characters=EXCLUDED_CHARACTERS,
|
||||||
|
start_year=START_YEAR,
|
||||||
|
end_year=END_YEAR,
|
||||||
|
)
|
||||||
|
write_new_characters_by_year(COUNTS_PATH, counts)
|
||||||
|
plot_new_characters_per_year(COUNTS_PATH, MILESTONES_PATH, DESTINATION_PATH, START_YEAR, END_YEAR)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -5,12 +5,22 @@ from pathlib import Path
|
|||||||
from lib.rebrickable.minifig_characters import (
|
from lib.rebrickable.minifig_characters import (
|
||||||
aggregate_by_character,
|
aggregate_by_character,
|
||||||
aggregate_by_gender,
|
aggregate_by_gender,
|
||||||
|
aggregate_characters_by_gender,
|
||||||
|
aggregate_new_character_sets,
|
||||||
|
aggregate_new_characters_by_year,
|
||||||
|
aggregate_variations_and_totals,
|
||||||
aggregate_character_spans,
|
aggregate_character_spans,
|
||||||
aggregate_presence_by_year,
|
aggregate_presence_by_year,
|
||||||
load_sets_enriched,
|
load_sets_enriched,
|
||||||
write_character_counts,
|
write_character_counts,
|
||||||
|
write_character_gender_counts,
|
||||||
|
write_new_character_sets_csv,
|
||||||
|
write_new_character_sets_markdown,
|
||||||
|
write_new_characters_by_year,
|
||||||
|
write_character_variations_totals,
|
||||||
write_gender_counts,
|
write_gender_counts,
|
||||||
)
|
)
|
||||||
|
from lib.rebrickable.minifig_character_sets import load_sets
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_by_character_counts_unique_figs() -> None:
|
def test_aggregate_by_character_counts_unique_figs() -> None:
|
||||||
@@ -68,6 +78,55 @@ def test_aggregate_by_character_counts_unique_figs() -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_variations_and_totals_excludes_figurants() -> None:
|
||||||
|
"""Compter le total et les variations en excluant les figurants."""
|
||||||
|
aggregates = aggregate_variations_and_totals(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"part_num": "head-a",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-1",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "124-1",
|
||||||
|
"part_num": "head-b",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-1",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "125-1",
|
||||||
|
"part_num": "head-c",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"fig_num": "fig-owen-2",
|
||||||
|
"gender": "male",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "126-1",
|
||||||
|
"part_num": "head-d",
|
||||||
|
"known_character": "Ellie Sattler",
|
||||||
|
"fig_num": "fig-ellie-1",
|
||||||
|
"gender": "female",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "127-1",
|
||||||
|
"part_num": "head-e",
|
||||||
|
"known_character": "Figurant",
|
||||||
|
"fig_num": "fig-guard-1",
|
||||||
|
"gender": "unknown",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
excluded_characters=["Figurant"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert aggregates == [
|
||||||
|
{"known_character": "Owen Grady", "gender": "male", "variation_count": 2, "total_minifigs": 3},
|
||||||
|
{"known_character": "Ellie Sattler", "gender": "female", "variation_count": 1, "total_minifigs": 1},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_by_gender_counts_unique_figs() -> None:
|
def test_aggregate_by_gender_counts_unique_figs() -> None:
|
||||||
"""Compter les minifigs distinctes par genre."""
|
"""Compter les minifigs distinctes par genre."""
|
||||||
aggregates = aggregate_by_gender(
|
aggregates = aggregate_by_gender(
|
||||||
@@ -86,6 +145,109 @@ def test_aggregate_by_gender_counts_unique_figs() -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_characters_by_gender_unique_characters() -> None:
|
||||||
|
"""Compter les personnages distincts par genre (ignorer unknown)."""
|
||||||
|
aggregates = aggregate_characters_by_gender(
|
||||||
|
[
|
||||||
|
{"known_character": "A", "gender": "male"},
|
||||||
|
{"known_character": "A", "gender": "male"},
|
||||||
|
{"known_character": "B", "gender": "female"},
|
||||||
|
{"known_character": "C", "gender": "unknown"},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert aggregates == [
|
||||||
|
{"gender": "female", "character_count": "1"},
|
||||||
|
{"gender": "male", "character_count": "1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
|
||||||
|
"""Compter les nouveaux personnages par année en respectant la plage."""
|
||||||
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
sets_path.write_text(
|
||||||
|
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
|
||||||
|
"123-1,Set A,2015,0,0,,-,http://r/123-1,true\n"
|
||||||
|
"124-1,Set B,2016,0,0,,-,http://r/124-1,true\n"
|
||||||
|
"125-1,Set C,2017,0,0,,-,http://r/125-1,true\n"
|
||||||
|
"126-1,Set D,2014,0,0,,-,http://r/126-1,true\n"
|
||||||
|
)
|
||||||
|
sets_years = load_sets_enriched(sets_path)
|
||||||
|
minifigs_rows = [
|
||||||
|
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
|
||||||
|
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
|
||||||
|
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-c"},
|
||||||
|
{"set_num": "126-1", "known_character": "Alan Grant", "fig_num": "fig-grant-1", "part_num": "head-d"},
|
||||||
|
]
|
||||||
|
|
||||||
|
counts = aggregate_new_characters_by_year(
|
||||||
|
minifigs_rows,
|
||||||
|
sets_years,
|
||||||
|
excluded_characters=["Figurant"],
|
||||||
|
start_year=2015,
|
||||||
|
end_year=2017,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert counts == [
|
||||||
|
{"year": "2015", "new_characters": "1"},
|
||||||
|
{"year": "2016", "new_characters": "0"},
|
||||||
|
{"year": "2017", "new_characters": "1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_new_character_sets_returns_intro_sets(tmp_path: Path) -> None:
|
||||||
|
"""Lister les personnages introduits avec les sets de l'année d'introduction."""
|
||||||
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
sets_path.write_text(
|
||||||
|
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
|
||||||
|
"123-1,Set A,2015,0,0,,123,http://r/123-1,true\n"
|
||||||
|
"124-1,Set B,2015,0,0,,124,http://r/124-1,true\n"
|
||||||
|
"125-1,Set C,2016,0,0,,125,http://r/125-1,true\n"
|
||||||
|
)
|
||||||
|
sets_lookup = load_sets(sets_path)
|
||||||
|
minifigs_rows = [
|
||||||
|
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
|
||||||
|
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
|
||||||
|
{"set_num": "125-1", "known_character": "Owen Grady", "fig_num": "fig-owen-3", "part_num": "head-c"},
|
||||||
|
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-d"},
|
||||||
|
]
|
||||||
|
|
||||||
|
rows = aggregate_new_character_sets(
|
||||||
|
minifigs_rows,
|
||||||
|
sets_lookup,
|
||||||
|
excluded_characters=["Figurant"],
|
||||||
|
start_year=2015,
|
||||||
|
end_year=2016,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rows == [
|
||||||
|
{
|
||||||
|
"year": "2015",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"set_name": "Set A",
|
||||||
|
"rebrickable_url": "http://r/123-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2015",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"set_num": "124-1",
|
||||||
|
"set_id": "124",
|
||||||
|
"set_name": "Set B",
|
||||||
|
"rebrickable_url": "http://r/124-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2016",
|
||||||
|
"known_character": "Ellie Sattler",
|
||||||
|
"set_num": "125-1",
|
||||||
|
"set_id": "125",
|
||||||
|
"set_name": "Set C",
|
||||||
|
"rebrickable_url": "http://r/125-1",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
|
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
|
||||||
"""Écrit le CSV des comptes par personnage."""
|
"""Écrit le CSV des comptes par personnage."""
|
||||||
destination = tmp_path / "counts.csv"
|
destination = tmp_path / "counts.csv"
|
||||||
@@ -112,6 +274,81 @@ def test_write_gender_counts_outputs_csv(tmp_path: Path) -> None:
|
|||||||
assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n"
|
assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le CSV comparatif variations/total."""
|
||||||
|
destination = tmp_path / "variations.csv"
|
||||||
|
rows = [
|
||||||
|
{"known_character": "A", "gender": "male", "variation_count": 2, "total_minifigs": 3},
|
||||||
|
{"known_character": "B", "gender": "female", "variation_count": 1, "total_minifigs": 1},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_character_variations_totals(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_character_gender_counts_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le CSV des comptes de personnages par genre."""
|
||||||
|
destination = tmp_path / "character_gender.csv"
|
||||||
|
rows = [
|
||||||
|
{"gender": "female", "character_count": "2"},
|
||||||
|
{"gender": "male", "character_count": "3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_character_gender_counts(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == "gender,character_count\nfemale,2\nmale,3\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le CSV des nouveaux personnages par année."""
|
||||||
|
destination = tmp_path / "new_characters.csv"
|
||||||
|
rows = [
|
||||||
|
{"year": "2015", "new_characters": "3"},
|
||||||
|
{"year": "2016", "new_characters": "1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_new_characters_by_year(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_new_character_sets_markdown_outputs_md(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le Markdown listant les nouveaux personnages et leurs sets."""
|
||||||
|
destination = tmp_path / "new_characters.md"
|
||||||
|
rows = [
|
||||||
|
{
|
||||||
|
"year": "2015",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"set_name": "Set A",
|
||||||
|
"rebrickable_url": "http://r/123-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2016",
|
||||||
|
"known_character": "Ellie Sattler",
|
||||||
|
"set_num": "125-1",
|
||||||
|
"set_id": "125",
|
||||||
|
"set_name": "Set C",
|
||||||
|
"rebrickable_url": "http://r/125-1",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_new_character_sets_markdown(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == (
|
||||||
|
"##### 2015\n\n"
|
||||||
|
"- Owen Grady\n"
|
||||||
|
" - [123](http://r/123-1) - Set A\n"
|
||||||
|
"\n"
|
||||||
|
"##### 2016\n\n"
|
||||||
|
"- Ellie Sattler\n"
|
||||||
|
" - [125](http://r/125-1) - Set C\n"
|
||||||
|
"\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
||||||
"""Calcule le total annuel en excluant les figurants."""
|
"""Calcule le total annuel en excluant les figurants."""
|
||||||
sets_path = tmp_path / "sets_enriched.csv"
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import matplotlib
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lib.plots.minifig_characters import plot_minifigs_per_character
|
from lib.plots.minifig_characters import plot_minifigs_per_character
|
||||||
|
from lib.plots.minifig_characters import plot_new_characters_per_year
|
||||||
|
from lib.plots.minifig_characters import plot_character_variations_vs_total
|
||||||
|
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
@@ -23,3 +25,38 @@ def test_plot_minifigs_per_character(tmp_path: Path) -> None:
|
|||||||
|
|
||||||
assert destination.exists()
|
assert destination.exists()
|
||||||
assert destination.stat().st_size > 0
|
assert destination.stat().st_size > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_character_variations_vs_total(tmp_path: Path) -> None:
|
||||||
|
"""Génère l'image comparant total et variations par personnage."""
|
||||||
|
counts_path = tmp_path / "variations.csv"
|
||||||
|
destination = tmp_path / "figures" / "step22" / "minifig_character_variations_totals.png"
|
||||||
|
counts_path.write_text(
|
||||||
|
"known_character,gender,variation_count,total_minifigs\n"
|
||||||
|
"Owen Grady,male,2,3\n"
|
||||||
|
"Ellie Sattler,female,1,2\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_character_variations_vs_total(counts_path, destination)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_new_characters_per_year(tmp_path: Path) -> None:
|
||||||
|
"""Génère l'image du nombre de nouveaux personnages par an."""
|
||||||
|
counts_path = tmp_path / "new_characters.csv"
|
||||||
|
destination = tmp_path / "figures" / "step23" / "minifig_new_characters_per_year.png"
|
||||||
|
milestones_path = tmp_path / "milestones.csv"
|
||||||
|
counts_path.write_text(
|
||||||
|
"year,new_characters\n"
|
||||||
|
"2015,2\n"
|
||||||
|
"2016,0\n"
|
||||||
|
"2017,1\n"
|
||||||
|
)
|
||||||
|
milestones_path.write_text("year,description\n2016,Spin-off\n")
|
||||||
|
|
||||||
|
plot_new_characters_per_year(counts_path, milestones_path, destination, start_year=2015, end_year=2017)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ def test_plot_minifigs_per_set_outputs_image(tmp_path: Path) -> None:
|
|||||||
"set_num,set_id,name,year,minifig_count\n"
|
"set_num,set_id,name,year,minifig_count\n"
|
||||||
"123-1,123,Set A,2020,2\n"
|
"123-1,123,Set A,2020,2\n"
|
||||||
"124-1,124,Set B,2021,1\n"
|
"124-1,124,Set B,2021,1\n"
|
||||||
|
"125-1,125,Set C,2021,0\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
plot_minifigs_per_set(counts_path, destination_path)
|
plot_minifigs_per_set(counts_path, destination_path)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import matplotlib
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lib.plots.minifig_gender_share import plot_minifig_gender_share
|
from lib.plots.minifig_gender_share import plot_minifig_gender_share
|
||||||
|
from lib.plots.minifig_gender_share import plot_character_gender_share
|
||||||
|
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
@@ -17,10 +18,26 @@ def test_plot_minifig_gender_share(tmp_path: Path) -> None:
|
|||||||
"gender,minifig_count\n"
|
"gender,minifig_count\n"
|
||||||
"male,2\n"
|
"male,2\n"
|
||||||
"female,1\n"
|
"female,1\n"
|
||||||
"unknown,1\n"
|
"unknown,5\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
plot_minifig_gender_share(counts_path, destination)
|
plot_minifig_gender_share(counts_path, destination)
|
||||||
|
|
||||||
assert destination.exists()
|
assert destination.exists()
|
||||||
assert destination.stat().st_size > 0
|
assert destination.stat().st_size > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_character_gender_share(tmp_path: Path) -> None:
|
||||||
|
"""Génère le graphique de répartition par genre au niveau personnages."""
|
||||||
|
counts_path = tmp_path / "character_gender.csv"
|
||||||
|
destination = tmp_path / "figures" / "step25" / "minifig_character_gender_share.png"
|
||||||
|
counts_path.write_text(
|
||||||
|
"gender,character_count\n"
|
||||||
|
"male,3\n"
|
||||||
|
"female,2\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_character_gender_share(counts_path, destination)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
|
|||||||