1

Compare commits

..

7 Commits

20 changed files with 866 additions and 12 deletions

View File

@@ -243,8 +243,17 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
1. `source .venv/bin/activate` 1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifig_characters` 2. `python -m scripts.plot_minifig_characters`
3. `python -m scripts.plot_minifig_character_variations`
4. `python -m scripts.plot_minifig_new_characters`
5. `python -m scripts.list_new_minifig_characters`
Le script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`. Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figurants, calcule par personnage le nombre de variations (fig_num distincts) et le total réel de minifigs présentes dans les sets filtrés, sérialise `data/intermediate/minifig_character_variations_totals.csv`, puis trace `figures/step22/minifig_character_variations_totals.png` en superposant un fond neutre (total) et une jauge colorée (variations, couleur = genre).
Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`).
Le quatrième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, liste pour chaque année (2015-2025) les personnages introduits et les sets de cette année contenant ces minifigs, puis produit `data/final/minifig_new_characters_by_year.csv` et `data/final/minifig_new_characters_by_year.md` (format markdown minimal pour le blog).
### Étape 23 : présence annuelle des personnages ### Étape 23 : présence annuelle des personnages
@@ -264,8 +273,11 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets
1. `source .venv/bin/activate` 1. `source .venv/bin/activate`
2. `python -m scripts.plot_minifig_gender_share` 2. `python -m scripts.plot_minifig_gender_share`
3. `python -m scripts.plot_minifig_character_genders`
Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins, masculins ou inconnus). Le premier script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins et masculins, les genres inconnus étant ignorés pour ce graphique, étiquettes en valeurs absolues).
Le second script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de personnages distincts par genre (femmes/hommes uniquement), sérialise `data/intermediate/minifig_character_gender_counts.csv`, puis trace `figures/step25/minifig_character_gender_share.png` pour comparer la répartition des personnages identifiés.
### Étape 26 : corrélation pièces / minifigs ### Étape 26 : corrélation pièces / minifigs

Binary file not shown.

Before

Width:  |  Height:  |  Size: 781 KiB

After

Width:  |  Height:  |  Size: 540 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 193 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 795 KiB

After

Width:  |  Height:  |  Size: 828 KiB

View File

@@ -1,12 +1,13 @@
"""Graphique du nombre de minifigs par personnage.""" """Graphique du nombre de minifigs par personnage."""
from pathlib import Path from pathlib import Path
from typing import List from typing import Dict, List
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.patches import Patch from matplotlib.patches import Patch
from lib.filesystem import ensure_parent_dir from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS from lib.plots.gender_palette import GENDER_COLORS, GENDER_LABELS
from lib.rebrickable.stats import read_rows from lib.rebrickable.stats import read_rows
@@ -21,6 +22,16 @@ def load_presence(path: Path) -> List[dict]:
return read_rows(path) return read_rows(path)
def load_new_characters(path: Path) -> List[dict]:
"""Charge le CSV des personnages introduits par année."""
return read_rows(path)
def load_variations_and_totals(path: Path) -> List[dict]:
"""Charge le CSV comparatif variations/total par personnage."""
return read_rows(path)
def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None: def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> None:
"""Trace un diagramme en barres horizontales du nombre de minifigs par personnage.""" """Trace un diagramme en barres horizontales du nombre de minifigs par personnage."""
rows = load_counts(counts_path) rows = load_counts(counts_path)
@@ -68,6 +79,86 @@ def plot_minifigs_per_character(counts_path: Path, destination_path: Path) -> No
plt.close(fig) plt.close(fig)
def plot_character_variations_vs_total(counts_path: Path, destination_path: Path) -> None:
"""Superpose le total de minifigs et leurs variations distinctes par personnage."""
rows = load_variations_and_totals(counts_path)
if not rows:
return
characters = [row["known_character"] for row in rows]
variation_counts = [int(row["variation_count"]) for row in rows]
total_counts = [int(row["total_minifigs"]) for row in rows]
genders = [row.get("gender", "") for row in rows]
gender_colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
positions = list(range(len(rows)))
height = max(6, len(rows) * 0.24)
background_color = "#d7d7e0"
fig, ax = plt.subplots(figsize=(12.4, height))
bars_total = ax.barh(
positions,
total_counts,
color=background_color,
edgecolor="#0d0d0d",
linewidth=0.6,
height=0.6,
label="Total de minifigs",
)
bars_variations = ax.barh(
positions,
variation_counts,
color=gender_colors,
edgecolor="#0d0d0d",
linewidth=0.8,
height=0.36,
label="Variations distinctes",
)
ax.set_yticks(positions)
ax.set_yticklabels(characters)
ax.invert_yaxis()
ax.set_xlabel("Nombre de minifigs")
ax.set_title("Variations et total de minifigs par personnage (hors figurants)")
ax.grid(True, axis="x", linestyle="--", alpha=0.25)
max_value = max(total_counts) if total_counts else 0
ax.set_xlim(0, max_value + 1)
for index, bar in enumerate(bars_total):
value = total_counts[index]
ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#1a1a1a")
for index, bar in enumerate(bars_variations):
value = variation_counts[index]
ax.text(value + 0.12, bar.get_y() + bar.get_height() / 2, str(value), va="center", fontsize=8, color="#0d0d0d")
legend_entries = [
Patch(facecolor=background_color, edgecolor="#0d0d0d", linewidth=0.6, label="Total de minifigs"),
Patch(
facecolor=GENDER_COLORS["unknown"],
edgecolor="#0d0d0d",
linewidth=0.8,
label="Variations distinctes (couleur = genre)",
),
]
seen = set()
for gender, color in zip(genders, gender_colors):
normalized = gender.strip().lower()
if normalized in seen:
continue
seen.add(normalized)
legend_entries.append(
Patch(
facecolor=color,
edgecolor="#0d0d0d",
linewidth=0.6,
label=GENDER_LABELS.get(normalized, "Inconnu"),
)
)
ax.legend(handles=legend_entries, loc="lower right")
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)
def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None: def plot_character_year_presence(presence_path: Path, destination_path: Path) -> None:
"""Trace une heatmap indiquant le nombre de minifigs par personnage et par année.""" """Trace une heatmap indiquant le nombre de minifigs par personnage et par année."""
rows = load_presence(presence_path) rows = load_presence(presence_path)
@@ -114,3 +205,73 @@ def plot_character_year_presence(presence_path: Path, destination_path: Path) ->
fig.tight_layout() fig.tight_layout()
fig.savefig(destination_path, dpi=160) fig.savefig(destination_path, dpi=160)
plt.close(fig) plt.close(fig)
def plot_new_characters_per_year(
counts_path: Path,
milestones_path: Path,
destination_path: Path,
start_year: int,
end_year: int,
) -> None:
"""Trace un diagramme en barres du nombre de nouveaux personnages introduits par an."""
rows = load_new_characters(counts_path)
if not rows:
return
counts = {int(row["year"]): int(row["new_characters"]) for row in rows}
years = list(range(start_year, end_year + 1))
values = [counts.get(year, 0) for year in years]
fig_width = max(8.5, len(years) * 0.45 + 2.5)
fig, ax = plt.subplots(figsize=(fig_width, 5.4))
bars = ax.bar(years, values, color="#1f77b4", edgecolor="#0d0d0d", linewidth=0.7)
ax.set_xlabel("Année")
ax.set_ylabel("Nouveaux personnages")
ax.set_title("Personnages introduits par an (hors figurants)")
ax.grid(axis="y", linestyle="--", alpha=0.3)
ax.set_xticks(years)
ax.set_xticklabels(years, rotation=45, ha="right")
ax.set_xlim(start_year - 0.6, end_year + 0.6)
y_max = max(values) if values else 0
upper_limit = 20
ax.set_ylim(0, upper_limit)
for bar, value in zip(bars, values):
if value == 0:
continue
ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center", va="bottom", fontsize=8)
milestones = load_milestones(milestones_path)
if milestones:
milestones_in_range = sorted(
[m for m in milestones if start_year <= m["year"] <= end_year],
key=lambda m: (m["year"], m["description"]),
)
offset_step = 0.25
offset_map: Dict[int, int] = {}
top_limit = ax.get_ylim()[1]
label_y = top_limit * 0.96
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = offset_map.get(year, 0)
offset_map[year] = count_for_year + 1
horizontal_offset = offset_step * (count_for_year // 2 + 1)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65, zorder=1)
ax.text(
text_x,
label_y,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@@ -16,7 +16,9 @@ def load_counts(path: Path) -> List[dict]:
def plot_minifigs_per_set(counts_path: Path, destination_path: Path) -> None: def plot_minifigs_per_set(counts_path: Path, destination_path: Path) -> None:
"""Trace un diagramme en barres du nombre de minifigs par set (thèmes filtrés).""" """Trace un diagramme en barres du nombre de minifigs par set (thèmes filtrés)."""
rows = load_counts(counts_path) rows = [row for row in load_counts(counts_path) if int(row["minifig_count"]) > 0]
if not rows:
return
labels = [f"{row['set_num']} - {row['name']}" for row in rows] labels = [f"{row['set_num']} - {row['name']}" for row in rows]
values = [int(row["minifig_count"]) for row in rows] values = [int(row["minifig_count"]) for row in rows]
positions = list(range(len(rows))) positions = list(range(len(rows)))

View File

@@ -17,18 +17,15 @@ def load_gender_counts(path: Path) -> List[dict]:
def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None: def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None:
"""Trace un diagramme circulaire de la répartition des minifigs par genre.""" """Trace un diagramme circulaire de la répartition des minifigs par genre."""
rows = load_gender_counts(counts_path) rows = [
row for row in load_gender_counts(counts_path) if row["gender"].strip().lower() in ("male", "female")
]
if not rows: if not rows:
return return
genders = [row["gender"] for row in rows] genders = [row["gender"] for row in rows]
counts = [int(row["minifig_count"]) for row in rows] counts = [int(row["minifig_count"]) for row in rows]
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders] colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
total = sum(counts) labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)]
labels = []
for gender, count in zip(genders, counts):
percent = (count / total) * 100 if total else 0
label = f"{GENDER_LABELS.get(gender.strip().lower(), 'Inconnu')} ({percent:.1f} %)"
labels.append(label)
fig, ax = plt.subplots(figsize=(6, 6)) fig, ax = plt.subplots(figsize=(6, 6))
ax.pie( ax.pie(
@@ -46,3 +43,31 @@ def plot_minifig_gender_share(counts_path: Path, destination_path: Path) -> None
fig.tight_layout() fig.tight_layout()
fig.savefig(destination_path, dpi=160) fig.savefig(destination_path, dpi=160)
plt.close(fig) plt.close(fig)
def plot_character_gender_share(counts_path: Path, destination_path: Path) -> None:
"""Trace un diagramme circulaire de la répartition des personnages par genre."""
rows = [row for row in load_gender_counts(counts_path) if row["gender"].strip().lower() in ("male", "female")]
if not rows:
return
genders = [row["gender"] for row in rows]
counts = [int(row["character_count"]) for row in rows]
colors = [GENDER_COLORS.get(gender.strip().lower(), GENDER_COLORS["unknown"]) for gender in genders]
labels = [f"{GENDER_LABELS.get(g.strip().lower(), 'Inconnu')} ({count})" for g, count in zip(genders, counts)]
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(
counts,
labels=labels,
colors=colors,
startangle=90,
wedgeprops={"linewidth": 0.6, "edgecolor": "#0d0d0d"},
)
centre_circle = plt.Circle((0, 0), 0.5, fc="white")
ax.add_artist(centre_circle)
ax.set_title("Répartition des personnages par genre (hors inconnus)")
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@@ -31,6 +31,8 @@ def build_head_presence(
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path) parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
presence: Dict[str, Set[str]] = {} presence: Dict[str, Set[str]] = {}
for set_num, inventory in inventories.items(): for set_num, inventory in inventories.items():
if set_num.startswith("fig-"):
continue
parts = parts_by_inventory.get(inventory["id"], []) parts = parts_by_inventory.get(inventory["id"], [])
for part_row in parts: for part_row in parts:
if part_row["part_num"] not in head_parts: if part_row["part_num"] not in head_parts:

View File

@@ -34,6 +34,145 @@ def aggregate_by_character(rows: Iterable[dict]) -> List[dict]:
return aggregates return aggregates
def aggregate_variations_and_totals(
rows: Iterable[dict],
excluded_characters: Sequence[str] | None = None,
) -> List[dict]:
"""Compte les variations uniques et le total de minifigs par personnage."""
excluded = set(excluded_characters or [])
variations: Dict[str, set] = defaultdict(set)
totals: Dict[str, int] = defaultdict(int)
genders: Dict[str, str] = {}
for row in rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
gender = row.get("gender", "").strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
variations[character].add(fig_num)
totals[character] += 1
if character not in genders:
genders[character] = gender
aggregates: List[dict] = []
for character, fig_nums in variations.items():
aggregates.append(
{
"known_character": character,
"gender": genders.get(character, ""),
"variation_count": len(fig_nums),
"total_minifigs": totals.get(character, 0),
}
)
aggregates.sort(key=lambda r: (-r["total_minifigs"], -r["variation_count"], r["known_character"]))
return aggregates
def aggregate_new_characters_by_year(
minifigs_rows: Iterable[dict],
sets_years: Dict[str, str],
excluded_characters: Sequence[str] | None = None,
start_year: int | None = None,
end_year: int | None = None,
) -> List[dict]:
"""Compte le nombre de personnages introduits par année sur une plage donnée."""
excluded = set(excluded_characters or [])
first_year: Dict[str, int] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
year_str = sets_years.get(row["set_num"])
if year_str is None:
continue
year_int = int(year_str)
current = first_year.get(character)
if current is None or year_int < current:
first_year[character] = year_int
counts: Dict[int, int] = {}
if start_year is not None and end_year is not None:
for year in range(start_year, end_year + 1):
counts[year] = 0
for character, year_int in first_year.items():
if start_year is not None and year_int < start_year:
continue
if end_year is not None and year_int > end_year:
continue
counts[year_int] = counts.get(year_int, 0) + 1
years = sorted(counts.keys())
results: List[dict] = []
for year in years:
results.append({"year": str(year), "new_characters": str(counts[year])})
return results
def aggregate_new_character_sets(
minifigs_rows: Iterable[dict],
sets_lookup: Dict[str, dict],
excluded_characters: Sequence[str] | None = None,
start_year: int | None = None,
end_year: int | None = None,
) -> List[dict]:
"""Liste les personnages introduits par année avec les sets correspondants."""
excluded = set(excluded_characters or [])
first_year: Dict[str, int] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
set_row = sets_lookup.get(row["set_num"])
if set_row is None:
continue
year_int = int(set_row["year"])
current = first_year.get(character)
if current is None or year_int < current:
first_year[character] = year_int
rows: List[dict] = []
seen: set[tuple[str, str]] = set()
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
set_row = sets_lookup.get(row["set_num"])
if set_row is None:
continue
intro_year = first_year.get(character)
if intro_year is None:
continue
if start_year is not None and intro_year < start_year:
continue
if end_year is not None and intro_year > end_year:
continue
if int(set_row["year"]) != intro_year:
continue
key = (character, set_row["set_num"])
if key in seen:
continue
rows.append(
{
"year": str(int(set_row["year"])),
"known_character": character,
"set_num": set_row["set_num"],
"set_id": set_row.get("set_id", ""),
"set_name": set_row.get("name", ""),
"rebrickable_url": set_row.get("rebrickable_url", ""),
}
)
seen.add(key)
rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"]))
return rows
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par genre (fig_num unique).""" """Compte les minifigs distinctes par genre (fig_num unique)."""
genders_by_fig: Dict[str, str] = {} genders_by_fig: Dict[str, str] = {}
@@ -56,6 +195,28 @@ def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
return aggregates return aggregates
def aggregate_characters_by_gender(rows: Iterable[dict]) -> List[dict]:
"""Compte les personnages distincts par genre (hors genres inconnus)."""
gender_by_character: Dict[str, str] = {}
counts: Dict[str, int] = defaultdict(int)
for row in rows:
character = row["known_character"].strip()
gender = row.get("gender", "").strip().lower()
if character == "":
continue
if gender not in ("male", "female"):
continue
if character in gender_by_character:
continue
gender_by_character[character] = gender
counts[gender] += 1
aggregates: List[dict] = []
for gender in ("female", "male"):
if gender in counts:
aggregates.append({"gender": gender, "character_count": str(counts[gender])})
return aggregates
def write_character_counts(path: Path, rows: Sequence[dict]) -> None: def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par personnage.""" """Écrit le CSV des comptes par personnage."""
ensure_parent_dir(path) ensure_parent_dir(path)
@@ -67,6 +228,60 @@ def write_character_counts(path: Path, rows: Sequence[dict]) -> None:
writer.writerow(row) writer.writerow(row)
def write_character_gender_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes de personnages par genre."""
ensure_parent_dir(path)
fieldnames = ["gender", "character_count"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des personnages introduits chaque année."""
ensure_parent_dir(path)
fieldnames = ["year", "new_characters"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV listant les personnages introduits et leurs sets."""
ensure_parent_dir(path)
fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None:
"""Écrit un Markdown listant les personnages introduits par année et leurs sets."""
ensure_parent_dir(path)
grouped: Dict[str, Dict[str, List[dict]]] = {}
for row in rows:
year_group = grouped.setdefault(row["year"], {})
characters = year_group.setdefault(row["known_character"], [])
characters.append(row)
with path.open("w") as md_file:
for year in sorted(grouped.keys(), key=int):
md_file.write(f"##### {year}\n\n")
for character in sorted(grouped[year].keys()):
md_file.write(f"- {character}\n")
for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]):
link = entry["rebrickable_url"] or ""
set_id = entry["set_id"]
name = entry["set_name"]
md_file.write(f" - [{set_id}]({link}) - {name}\n")
md_file.write("\n")
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None: def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par genre.""" """Écrit le CSV des comptes par genre."""
ensure_parent_dir(path) ensure_parent_dir(path)
@@ -78,6 +293,17 @@ def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
writer.writerow(row) writer.writerow(row)
def write_character_variations_totals(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV comparant variations et total par personnage."""
ensure_parent_dir(path)
fieldnames = ["known_character", "gender", "variation_count", "total_minifigs"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def load_sets_enriched(path: Path) -> Dict[str, str]: def load_sets_enriched(path: Path) -> Dict[str, str]:
"""Indexe les années par set_num.""" """Indexe les années par set_num."""
lookup: Dict[str, str] = {} lookup: Dict[str, str] = {}

View File

@@ -0,0 +1,39 @@
"""Liste les nouveaux personnages introduits chaque année et leurs sets associés."""
from pathlib import Path
from lib.rebrickable.minifig_character_sets import load_sets
from lib.rebrickable.minifig_characters import (
aggregate_new_character_sets,
load_minifigs_by_set,
write_new_character_sets_csv,
write_new_character_sets_markdown,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
CSV_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.csv")
MARKDOWN_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.md")
EXCLUDED_CHARACTERS = ["Figurant"]
START_YEAR = 2015
END_YEAR = 2025
def main() -> None:
"""Construit le CSV et le Markdown listant les personnages introduits chaque année."""
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
sets_lookup = load_sets(SETS_ENRICHED_PATH)
rows = aggregate_new_character_sets(
minifigs,
sets_lookup,
excluded_characters=EXCLUDED_CHARACTERS,
start_year=START_YEAR,
end_year=END_YEAR,
)
write_new_character_sets_csv(CSV_DESTINATION_PATH, rows)
write_new_character_sets_markdown(MARKDOWN_DESTINATION_PATH, rows)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,27 @@
"""Trace la répartition des personnages identifiés par genre."""
from pathlib import Path
from lib.plots.minifig_gender_share import plot_character_gender_share
from lib.rebrickable.minifig_characters import (
aggregate_characters_by_gender,
load_minifigs_by_set,
write_character_gender_counts,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
COUNTS_PATH = Path("data/intermediate/minifig_character_gender_counts.csv")
DESTINATION_PATH = Path("figures/step25/minifig_character_gender_share.png")
def main() -> None:
"""Construit le CSV de répartition des personnages par genre et trace le graphique."""
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
aggregates = aggregate_characters_by_gender(rows)
write_character_gender_counts(COUNTS_PATH, aggregates)
plot_character_gender_share(COUNTS_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,28 @@
"""Trace le total de minifigs et leurs variations distinctes par personnage."""
from pathlib import Path
from lib.plots.minifig_characters import plot_character_variations_vs_total
from lib.rebrickable.minifig_characters import (
aggregate_variations_and_totals,
load_minifigs_by_set,
write_character_variations_totals,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
COUNTS_PATH = Path("data/intermediate/minifig_character_variations_totals.csv")
DESTINATION_PATH = Path("figures/step22/minifig_character_variations_totals.png")
EXCLUDED_CHARACTERS = ["Figurant"]
def main() -> None:
"""Construit le comparatif variations/total et trace le graphique associé."""
rows = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
aggregates = aggregate_variations_and_totals(rows, excluded_characters=EXCLUDED_CHARACTERS)
write_character_variations_totals(COUNTS_PATH, aggregates)
plot_character_variations_vs_total(COUNTS_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,40 @@
"""Trace le nombre de nouveaux personnages introduits par an (hors figurants)."""
from pathlib import Path
from lib.plots.minifig_characters import plot_new_characters_per_year
from lib.rebrickable.minifig_characters import (
aggregate_new_characters_by_year,
load_minifigs_by_set,
load_sets_enriched,
write_new_characters_by_year,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
COUNTS_PATH = Path("data/intermediate/minifig_new_characters_by_year.csv")
DESTINATION_PATH = Path("figures/step23/minifig_new_characters_per_year.png")
MILESTONES_PATH = Path("config/milestones.csv")
EXCLUDED_CHARACTERS = ["Figurant"]
START_YEAR = 2015
END_YEAR = 2025
def main() -> None:
"""Construit le total de nouveaux personnages par année et trace le graphique."""
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
sets_years = load_sets_enriched(SETS_ENRICHED_PATH)
counts = aggregate_new_characters_by_year(
minifigs,
sets_years,
excluded_characters=EXCLUDED_CHARACTERS,
start_year=START_YEAR,
end_year=END_YEAR,
)
write_new_characters_by_year(COUNTS_PATH, counts)
plot_new_characters_per_year(COUNTS_PATH, MILESTONES_PATH, DESTINATION_PATH, START_YEAR, END_YEAR)
if __name__ == "__main__":
main()

View File

@@ -5,12 +5,22 @@ from pathlib import Path
from lib.rebrickable.minifig_characters import ( from lib.rebrickable.minifig_characters import (
aggregate_by_character, aggregate_by_character,
aggregate_by_gender, aggregate_by_gender,
aggregate_characters_by_gender,
aggregate_new_character_sets,
aggregate_new_characters_by_year,
aggregate_variations_and_totals,
aggregate_character_spans, aggregate_character_spans,
aggregate_presence_by_year, aggregate_presence_by_year,
load_sets_enriched, load_sets_enriched,
write_character_counts, write_character_counts,
write_character_gender_counts,
write_new_character_sets_csv,
write_new_character_sets_markdown,
write_new_characters_by_year,
write_character_variations_totals,
write_gender_counts, write_gender_counts,
) )
from lib.rebrickable.minifig_character_sets import load_sets
def test_aggregate_by_character_counts_unique_figs() -> None: def test_aggregate_by_character_counts_unique_figs() -> None:
@@ -68,6 +78,55 @@ def test_aggregate_by_character_counts_unique_figs() -> None:
] ]
def test_aggregate_variations_and_totals_excludes_figurants() -> None:
"""Compter le total et les variations en excluant les figurants."""
aggregates = aggregate_variations_and_totals(
[
{
"set_num": "123-1",
"part_num": "head-a",
"known_character": "Owen Grady",
"fig_num": "fig-owen-1",
"gender": "male",
},
{
"set_num": "124-1",
"part_num": "head-b",
"known_character": "Owen Grady",
"fig_num": "fig-owen-1",
"gender": "male",
},
{
"set_num": "125-1",
"part_num": "head-c",
"known_character": "Owen Grady",
"fig_num": "fig-owen-2",
"gender": "male",
},
{
"set_num": "126-1",
"part_num": "head-d",
"known_character": "Ellie Sattler",
"fig_num": "fig-ellie-1",
"gender": "female",
},
{
"set_num": "127-1",
"part_num": "head-e",
"known_character": "Figurant",
"fig_num": "fig-guard-1",
"gender": "unknown",
},
],
excluded_characters=["Figurant"],
)
assert aggregates == [
{"known_character": "Owen Grady", "gender": "male", "variation_count": 2, "total_minifigs": 3},
{"known_character": "Ellie Sattler", "gender": "female", "variation_count": 1, "total_minifigs": 1},
]
def test_aggregate_by_gender_counts_unique_figs() -> None: def test_aggregate_by_gender_counts_unique_figs() -> None:
"""Compter les minifigs distinctes par genre.""" """Compter les minifigs distinctes par genre."""
aggregates = aggregate_by_gender( aggregates = aggregate_by_gender(
@@ -86,6 +145,109 @@ def test_aggregate_by_gender_counts_unique_figs() -> None:
] ]
def test_aggregate_characters_by_gender_unique_characters() -> None:
"""Compter les personnages distincts par genre (ignorer unknown)."""
aggregates = aggregate_characters_by_gender(
[
{"known_character": "A", "gender": "male"},
{"known_character": "A", "gender": "male"},
{"known_character": "B", "gender": "female"},
{"known_character": "C", "gender": "unknown"},
]
)
assert aggregates == [
{"gender": "female", "character_count": "1"},
{"gender": "male", "character_count": "1"},
]
def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
"""Compter les nouveaux personnages par année en respectant la plage."""
sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text(
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
"123-1,Set A,2015,0,0,,-,http://r/123-1,true\n"
"124-1,Set B,2016,0,0,,-,http://r/124-1,true\n"
"125-1,Set C,2017,0,0,,-,http://r/125-1,true\n"
"126-1,Set D,2014,0,0,,-,http://r/126-1,true\n"
)
sets_years = load_sets_enriched(sets_path)
minifigs_rows = [
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-c"},
{"set_num": "126-1", "known_character": "Alan Grant", "fig_num": "fig-grant-1", "part_num": "head-d"},
]
counts = aggregate_new_characters_by_year(
minifigs_rows,
sets_years,
excluded_characters=["Figurant"],
start_year=2015,
end_year=2017,
)
assert counts == [
{"year": "2015", "new_characters": "1"},
{"year": "2016", "new_characters": "0"},
{"year": "2017", "new_characters": "1"},
]
def test_aggregate_new_character_sets_returns_intro_sets(tmp_path: Path) -> None:
"""Lister les personnages introduits avec les sets de l'année d'introduction."""
sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text(
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
"123-1,Set A,2015,0,0,,123,http://r/123-1,true\n"
"124-1,Set B,2015,0,0,,124,http://r/124-1,true\n"
"125-1,Set C,2016,0,0,,125,http://r/125-1,true\n"
)
sets_lookup = load_sets(sets_path)
minifigs_rows = [
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
{"set_num": "125-1", "known_character": "Owen Grady", "fig_num": "fig-owen-3", "part_num": "head-c"},
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-d"},
]
rows = aggregate_new_character_sets(
minifigs_rows,
sets_lookup,
excluded_characters=["Figurant"],
start_year=2015,
end_year=2016,
)
assert rows == [
{
"year": "2015",
"known_character": "Owen Grady",
"set_num": "123-1",
"set_id": "123",
"set_name": "Set A",
"rebrickable_url": "http://r/123-1",
},
{
"year": "2015",
"known_character": "Owen Grady",
"set_num": "124-1",
"set_id": "124",
"set_name": "Set B",
"rebrickable_url": "http://r/124-1",
},
{
"year": "2016",
"known_character": "Ellie Sattler",
"set_num": "125-1",
"set_id": "125",
"set_name": "Set C",
"rebrickable_url": "http://r/125-1",
},
]
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des comptes par personnage.""" """Écrit le CSV des comptes par personnage."""
destination = tmp_path / "counts.csv" destination = tmp_path / "counts.csv"
@@ -112,6 +274,81 @@ def test_write_gender_counts_outputs_csv(tmp_path: Path) -> None:
assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n" assert destination.read_text() == "gender,minifig_count\nmale,2\nfemale,1\n"
def test_write_character_variations_totals_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV comparatif variations/total."""
destination = tmp_path / "variations.csv"
rows = [
{"known_character": "A", "gender": "male", "variation_count": 2, "total_minifigs": 3},
{"known_character": "B", "gender": "female", "variation_count": 1, "total_minifigs": 1},
]
write_character_variations_totals(destination, rows)
assert destination.read_text() == "known_character,gender,variation_count,total_minifigs\nA,male,2,3\nB,female,1,1\n"
def test_write_character_gender_counts_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des comptes de personnages par genre."""
destination = tmp_path / "character_gender.csv"
rows = [
{"gender": "female", "character_count": "2"},
{"gender": "male", "character_count": "3"},
]
write_character_gender_counts(destination, rows)
assert destination.read_text() == "gender,character_count\nfemale,2\nmale,3\n"
def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des nouveaux personnages par année."""
destination = tmp_path / "new_characters.csv"
rows = [
{"year": "2015", "new_characters": "3"},
{"year": "2016", "new_characters": "1"},
]
write_new_characters_by_year(destination, rows)
assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n"
def test_write_new_character_sets_markdown_outputs_md(tmp_path: Path) -> None:
"""Écrit le Markdown listant les nouveaux personnages et leurs sets."""
destination = tmp_path / "new_characters.md"
rows = [
{
"year": "2015",
"known_character": "Owen Grady",
"set_num": "123-1",
"set_id": "123",
"set_name": "Set A",
"rebrickable_url": "http://r/123-1",
},
{
"year": "2016",
"known_character": "Ellie Sattler",
"set_num": "125-1",
"set_id": "125",
"set_name": "Set C",
"rebrickable_url": "http://r/125-1",
},
]
write_new_character_sets_markdown(destination, rows)
assert destination.read_text() == (
"##### 2015\n\n"
"- Owen Grady\n"
" - [123](http://r/123-1) - Set A\n"
"\n"
"##### 2016\n\n"
"- Ellie Sattler\n"
" - [125](http://r/125-1) - Set C\n"
"\n"
)
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
"""Calcule le total annuel en excluant les figurants.""" """Calcule le total annuel en excluant les figurants."""
sets_path = tmp_path / "sets_enriched.csv" sets_path = tmp_path / "sets_enriched.csv"

View File

@@ -4,6 +4,8 @@ import matplotlib
from pathlib import Path from pathlib import Path
from lib.plots.minifig_characters import plot_minifigs_per_character from lib.plots.minifig_characters import plot_minifigs_per_character
from lib.plots.minifig_characters import plot_new_characters_per_year
from lib.plots.minifig_characters import plot_character_variations_vs_total
matplotlib.use("Agg") matplotlib.use("Agg")
@@ -23,3 +25,38 @@ def test_plot_minifigs_per_character(tmp_path: Path) -> None:
assert destination.exists() assert destination.exists()
assert destination.stat().st_size > 0 assert destination.stat().st_size > 0
def test_plot_character_variations_vs_total(tmp_path: Path) -> None:
"""Génère l'image comparant total et variations par personnage."""
counts_path = tmp_path / "variations.csv"
destination = tmp_path / "figures" / "step22" / "minifig_character_variations_totals.png"
counts_path.write_text(
"known_character,gender,variation_count,total_minifigs\n"
"Owen Grady,male,2,3\n"
"Ellie Sattler,female,1,2\n"
)
plot_character_variations_vs_total(counts_path, destination)
assert destination.exists()
assert destination.stat().st_size > 0
def test_plot_new_characters_per_year(tmp_path: Path) -> None:
"""Génère l'image du nombre de nouveaux personnages par an."""
counts_path = tmp_path / "new_characters.csv"
destination = tmp_path / "figures" / "step23" / "minifig_new_characters_per_year.png"
milestones_path = tmp_path / "milestones.csv"
counts_path.write_text(
"year,new_characters\n"
"2015,2\n"
"2016,0\n"
"2017,1\n"
)
milestones_path.write_text("year,description\n2016,Spin-off\n")
plot_new_characters_per_year(counts_path, milestones_path, destination, start_year=2015, end_year=2017)
assert destination.exists()
assert destination.stat().st_size > 0

View File

@@ -17,6 +17,7 @@ def test_plot_minifigs_per_set_outputs_image(tmp_path: Path) -> None:
"set_num,set_id,name,year,minifig_count\n" "set_num,set_id,name,year,minifig_count\n"
"123-1,123,Set A,2020,2\n" "123-1,123,Set A,2020,2\n"
"124-1,124,Set B,2021,1\n" "124-1,124,Set B,2021,1\n"
"125-1,125,Set C,2021,0\n"
) )
plot_minifigs_per_set(counts_path, destination_path) plot_minifigs_per_set(counts_path, destination_path)

View File

@@ -4,6 +4,7 @@ import matplotlib
from pathlib import Path from pathlib import Path
from lib.plots.minifig_gender_share import plot_minifig_gender_share from lib.plots.minifig_gender_share import plot_minifig_gender_share
from lib.plots.minifig_gender_share import plot_character_gender_share
matplotlib.use("Agg") matplotlib.use("Agg")
@@ -17,10 +18,26 @@ def test_plot_minifig_gender_share(tmp_path: Path) -> None:
"gender,minifig_count\n" "gender,minifig_count\n"
"male,2\n" "male,2\n"
"female,1\n" "female,1\n"
"unknown,1\n" "unknown,5\n"
) )
plot_minifig_gender_share(counts_path, destination) plot_minifig_gender_share(counts_path, destination)
assert destination.exists() assert destination.exists()
assert destination.stat().st_size > 0 assert destination.stat().st_size > 0
def test_plot_character_gender_share(tmp_path: Path) -> None:
"""Génère le graphique de répartition par genre au niveau personnages."""
counts_path = tmp_path / "character_gender.csv"
destination = tmp_path / "figures" / "step25" / "minifig_character_gender_share.png"
counts_path.write_text(
"gender,character_count\n"
"male,3\n"
"female,2\n"
)
plot_character_gender_share(counts_path, destination)
assert destination.exists()
assert destination.stat().st_size > 0