From 47ee76cacf53444abe1d421a4d9ecc097620de4a Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Mon, 1 Dec 2025 23:56:03 +0100 Subject: [PATCH] =?UTF-8?q?Ajoute=20les=20agr=C3=A9gats=20et=20visualisati?= =?UTF-8?q?ons=20globales=20des=20couleurs=20de=20t=C3=AAtes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 ++ lib/plots/global_minifig_heads.py | 90 ++++++++++++++++ lib/plots/minifig_skin_tones.py | 86 ++++++++++++++++ lib/rebrickable/global_minifig_heads.py | 103 +++++++++++++++++++ scripts/compute_global_minifig_heads.py | 29 ++++++ scripts/plot_global_minifig_heads.py | 18 ++++ scripts/plot_global_minifig_skin_tones.py | 19 ++++ tests/test_global_minifig_heads.py | 92 +++++++++++++++++ tests/test_global_minifig_heads_plot.py | 28 +++++ tests/test_global_minifig_skin_tones_plot.py | 29 ++++++ 10 files changed, 502 insertions(+) create mode 100644 lib/plots/global_minifig_heads.py create mode 100644 lib/plots/minifig_skin_tones.py create mode 100644 lib/rebrickable/global_minifig_heads.py create mode 100644 scripts/compute_global_minifig_heads.py create mode 100644 scripts/plot_global_minifig_heads.py create mode 100644 scripts/plot_global_minifig_skin_tones.py create mode 100644 tests/test_global_minifig_heads.py create mode 100644 tests/test_global_minifig_heads_plot.py create mode 100644 tests/test_global_minifig_skin_tones_plot.py diff --git a/README.md b/README.md index 0f15936..7e78601 100644 --- a/README.md +++ b/README.md @@ -200,3 +200,11 @@ Le script identifie les têtes de minifigs via la catégorie Rebrickable dédié 2. `python -m scripts.plot_minifig_heads` Le script lit `data/intermediate/minifig_heads_by_year.csv` et produit `figures/step16/minifig_heads_shares.png` (répartition annuelle des couleurs de têtes, en parts empilées) et `figures/step16/minifig_heads_global.png` (donut global des parts cumulées). Les couleurs sont limitées aux plus fréquentes (avec regroupement des autres). + +### Étape 18 : usage global de la couleur Yellow pour les têtes + +1. `source .venv/bin/activate` +2. `python -m scripts.compute_global_minifig_heads` +3. `python -m scripts.plot_global_minifig_skin_tones` + +Ces scripts lisent les CSV bruts du catalogue complet (`data/raw/inventories.csv`, `inventory_parts.csv`, `parts.csv`, `colors.csv`, `sets.csv`), extraient les têtes de minifigs via `part_cat_id=59`, agrègent les couleurs par année dans `data/intermediate/global_minifig_heads_by_year.csv`, puis tracent `figures/step17/global_minifig_heads_yellow_share.png` montrant la part annuelle de la couleur Yellow comparée au reste, jalons inclus. diff --git a/lib/plots/global_minifig_heads.py b/lib/plots/global_minifig_heads.py new file mode 100644 index 0000000..2ba4b7b --- /dev/null +++ b/lib/plots/global_minifig_heads.py @@ -0,0 +1,90 @@ +"""Visualisation des couleurs de têtes de minifigs sur le catalogue complet.""" + +from pathlib import Path +from typing import Dict, Iterable, List, Tuple + +import matplotlib.pyplot as plt + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +def load_global_heads(heads_path: Path) -> List[dict]: + """Charge l'agrégat global des têtes par année.""" + return read_rows(heads_path) + + +def select_top_colors(rows: Iterable[dict], limit: int = 12) -> List[Tuple[str, str, str]]: + """Retourne les couleurs les plus fréquentes globalement (nom, rgb, is_translucent).""" + totals: Dict[Tuple[str, str, str], int] = {} + for row in rows: + key = (row["color_name"], row["color_rgb"], row["is_translucent"]) + totals[key] = totals.get(key, 0) + int(row["quantity"]) + sorted_colors = sorted(totals.items(), key=lambda item: (-item[1], item[0][0], item[0][1])) + return [color for color, _ in sorted_colors[:limit]] + + +def build_share_matrix( + rows: Iterable[dict], top_colors: List[Tuple[str, str, str]] +) -> Tuple[List[int], List[Tuple[str, str, str]], List[Dict[str, float]]]: + """Construit les parts par année en regroupant les couleurs hors top dans 'Autres'.""" + years = sorted({int(row["year"]) for row in rows}) + colors = top_colors + [("Autres", "444444", "false")] + shares_by_year: List[Dict[str, float]] = [] + rows_by_year: Dict[int, List[dict]] = {year: [] for year in years} + for row in rows: + rows_by_year[int(row["year"])].append(row) + for year in years: + year_rows = rows_by_year[year] + total = sum(int(r["quantity"]) for r in year_rows) + shares: Dict[str, float] = {color[0]: 0.0 for color in colors} + for r in year_rows: + key = (r["color_name"], r["color_rgb"], r["is_translucent"]) + quantity = int(r["quantity"]) + target = "Autres" if key not in top_colors else r["color_name"] + shares[target] = shares.get(target, 0.0) + quantity / total if total > 0 else 0.0 + shares_by_year.append(shares) + return years, colors, shares_by_year + + +def plot_global_head_shares( + heads_path: Path, + destination_path: Path, + top_limit: int = 12, +) -> None: + """Trace les parts des couleurs de têtes de minifigs par année (catalogue complet).""" + rows = load_global_heads(heads_path) + top_colors = select_top_colors(rows, limit=top_limit) + years, colors, shares_by_year = build_share_matrix(rows, top_colors) + + fig, ax = plt.subplots(figsize=(14, 6)) + bottoms = [0.0] * len(years) + y_positions = list(range(len(years))) + for name, color_rgb, is_trans in colors: + values = [shares[name] for shares in shares_by_year] + edge = "#f2f2f2" if is_trans == "true" else "#0d0d0d" + ax.bar( + years, + values, + bottom=bottoms, + color=f"#{color_rgb}", + edgecolor=edge, + label=name, + linewidth=0.7, + ) + bottoms = [b + v for b, v in zip(bottoms, values)] + ax.set_ylim(0, 1.05) + ax.set_ylabel("Part des couleurs (têtes de minifigs, catalogue complet)") + ax.set_xlabel("Année") + if len(years) > 15: + step = max(1, len(years) // 10) + ax.set_xticks(years[::step]) + else: + ax.set_xticks(years) + ax.set_title("Répartition des couleurs de têtes de minifigs par année (catalogue complet)") + ax.legend(loc="upper left", bbox_to_anchor=(1.02, 1), frameon=False) + ax.grid(True, axis="y", linestyle="--", alpha=0.25) + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=170) + plt.close(fig) diff --git a/lib/plots/minifig_skin_tones.py b/lib/plots/minifig_skin_tones.py new file mode 100644 index 0000000..8d34ebb --- /dev/null +++ b/lib/plots/minifig_skin_tones.py @@ -0,0 +1,86 @@ +"""Visualisation de la part des têtes jaunes sur le catalogue global.""" + +from pathlib import Path +from typing import Dict, List + +import matplotlib.pyplot as plt + +from lib.filesystem import ensure_parent_dir +from lib.milestones import load_milestones +from lib.rebrickable.stats import read_rows + + +def compute_yellow_share(rows: List[dict]) -> List[dict]: + """Calcule la part de la couleur Yellow par année.""" + aggregated: Dict[str, Dict[str, int]] = {} + for row in rows: + year = row["year"] + if year not in aggregated: + aggregated[year] = {"yellow": 0, "total": 0} + aggregated[year]["total"] += int(row["quantity"]) + if row["color_name"].lower() == "yellow" or row["color_rgb"].upper() == "FFFF00": + aggregated[year]["yellow"] += int(row["quantity"]) + results = [] + for year in sorted(aggregated.keys(), key=int): + total = aggregated[year]["total"] + yellow = aggregated[year]["yellow"] + share = yellow / total if total > 0 else 0 + results.append({"year": int(year), "yellow_share": share, "total": total}) + return results + + +def plot_yellow_share(heads_path: Path, milestones_path: Path, destination_path: Path) -> None: + """Trace l'évolution de la part de têtes jaunes dans le catalogue complet.""" + rows = read_rows(heads_path) + milestones = load_milestones(milestones_path) + series = compute_yellow_share(rows) + years = [item["year"] for item in series] + shares = [item["yellow_share"] for item in series] + + fig, ax = plt.subplots(figsize=(13, 5.5)) + ax.plot(years, shares, color="#f2c300", marker="o", linewidth=2.4, label="Part Yellow") + ax.fill_between(years, shares, color="#f2c300", alpha=0.18) + ax.set_ylim(0, min(1.0, max(shares + [0.01]) * 1.1)) + ax.set_ylabel("Part de têtes Yellow") + ax.set_xlabel("Année") + if len(years) > 15: + step = max(1, len(years) // 10) + ax.set_xticks(years[::step]) + else: + ax.set_xticks(years) + ax.set_title("Evolution de l'usage des têtes Yellow (catalogue complet)") + ax.grid(True, linestyle="--", alpha=0.3) + if milestones: + min_year = min(years) + max_year = max(years) + milestones_in_range = sorted( + [m for m in milestones if min_year <= m["year"] <= max_year], + key=lambda m: (m["year"], m["description"]), + ) + offset_map: Dict[int, int] = {} + offset_step = 0.35 + top_limit = ax.get_ylim()[1] * 1.05 + for milestone in milestones_in_range: + year = milestone["year"] + count_for_year = offset_map.get(year, 0) + offset_map[year] = count_for_year + 1 + horizontal_offset = offset_step * (count_for_year // 2 + 1) + if count_for_year % 2 == 1: + horizontal_offset *= -1 + text_x = year + horizontal_offset + ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65, zorder=1) + ax.text( + text_x, + top_limit, + milestone["description"], + rotation=90, + verticalalignment="top", + horizontalalignment="center", + fontsize=8, + color="#d62728", + ) + ax.set_ylim(ax.get_ylim()[0], top_limit * (1 + max(offset_map.values(), default=0) * 0.02)) + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=170) + plt.close(fig) diff --git a/lib/rebrickable/global_minifig_heads.py b/lib/rebrickable/global_minifig_heads.py new file mode 100644 index 0000000..2dc1cdb --- /dev/null +++ b/lib/rebrickable/global_minifig_heads.py @@ -0,0 +1,103 @@ +"""Extraction des couleurs de têtes de minifigs sur le catalogue complet.""" + +import csv +from pathlib import Path +from typing import Dict, Iterable, List, Set, Tuple + +from lib.rebrickable.parts_inventory import normalize_boolean, select_latest_inventories + + +HEAD_CATEGORIES = {"59"} + + +def load_head_parts(parts_path: Path, head_categories: Set[str] | None = None) -> Set[str]: + """Construit l'ensemble des références de têtes via leur catégorie.""" + categories = head_categories or HEAD_CATEGORIES + head_parts: Set[str] = set() + with parts_path.open() as parts_file: + reader = csv.DictReader(parts_file) + for row in reader: + if row["part_cat_id"] in categories: + head_parts.add(row["part_num"]) + return head_parts + + +def build_sets_year_lookup(sets_path: Path) -> Dict[str, str]: + """Indexe les années par set_num.""" + lookup: Dict[str, str] = {} + with sets_path.open() as sets_file: + reader = csv.DictReader(sets_file) + for row in reader: + lookup[row["set_num"]] = row["year"] + return lookup + + +def build_color_lookup(colors_path: Path) -> Dict[str, dict]: + """Construit un index des couleurs par identifiant.""" + lookup: Dict[str, dict] = {} + with colors_path.open() as colors_file: + reader = csv.DictReader(colors_file) + for row in reader: + lookup[row["id"]] = { + "rgb": row["rgb"], + "is_translucent": row["is_trans"].lower(), + "name": row["name"], + } + return lookup + + +def aggregate_global_heads_by_year( + inventories_path: Path, + inventory_parts_path: Path, + parts_path: Path, + colors_path: Path, + sets_path: Path, + head_categories: Set[str] | None = None, +) -> List[dict]: + """Agrège les couleurs de têtes par année sur le catalogue complet.""" + head_parts = load_head_parts(parts_path, head_categories) + latest_inventories = select_latest_inventories(inventories_path) + latest_inventory_ids = {data["id"]: set_num for set_num, data in latest_inventories.items()} + colors_lookup = build_color_lookup(colors_path) + sets_year = build_sets_year_lookup(sets_path) + aggregates: Dict[Tuple[str, str, str], dict] = {} + with inventory_parts_path.open() as parts_file: + reader = csv.DictReader(parts_file) + for row in reader: + inventory_id = row["inventory_id"] + if inventory_id not in latest_inventory_ids: + continue + if row["part_num"] not in head_parts: + continue + if normalize_boolean(row["is_spare"]) == "true": + continue + set_num = latest_inventory_ids[inventory_id] + year = sets_year.get(set_num) + if year is None: + continue + color = colors_lookup[row["color_id"]] + key = (year, color["rgb"], color["is_translucent"]) + existing = aggregates.get(key) + if existing is None: + aggregates[key] = { + "year": year, + "color_rgb": color["rgb"], + "is_translucent": color["is_translucent"], + "color_name": color["name"], + "quantity": 0, + } + existing = aggregates[key] + existing["quantity"] += int(row["quantity"]) + results = list(aggregates.values()) + results.sort(key=lambda r: (int(r["year"]), r["color_name"], r["is_translucent"])) + return results + + +def write_global_heads_by_year(destination_path: Path, rows: Iterable[dict]) -> None: + """Sérialise l'agrégat global par année.""" + fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity"] + with destination_path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/compute_global_minifig_heads.py b/scripts/compute_global_minifig_heads.py new file mode 100644 index 0000000..bf58122 --- /dev/null +++ b/scripts/compute_global_minifig_heads.py @@ -0,0 +1,29 @@ +"""Agrégation globale des couleurs de têtes de minifigs (catalogue complet).""" + +from pathlib import Path + +from lib.rebrickable.global_minifig_heads import aggregate_global_heads_by_year, write_global_heads_by_year + + +INVENTORIES_PATH = Path("data/raw/inventories.csv") +INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv") +PARTS_PATH = Path("data/raw/parts.csv") +COLORS_PATH = Path("data/raw/colors.csv") +SETS_PATH = Path("data/raw/sets.csv") +DESTINATION_PATH = Path("data/intermediate/global_minifig_heads_by_year.csv") + + +def main() -> None: + """Construit l'agrégat mondial des têtes de minifigs par couleur et année.""" + heads_by_year = aggregate_global_heads_by_year( + INVENTORIES_PATH, + INVENTORY_PARTS_PATH, + PARTS_PATH, + COLORS_PATH, + SETS_PATH, + ) + write_global_heads_by_year(DESTINATION_PATH, heads_by_year) + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_global_minifig_heads.py b/scripts/plot_global_minifig_heads.py new file mode 100644 index 0000000..49b49cc --- /dev/null +++ b/scripts/plot_global_minifig_heads.py @@ -0,0 +1,18 @@ +"""Répartition annuelle des couleurs de têtes (catalogue complet).""" + +from pathlib import Path + +from lib.plots.global_minifig_heads import plot_global_head_shares + + +HEADS_PATH = Path("data/intermediate/global_minifig_heads_by_year.csv") +DESTINATION_PATH = Path("figures/step17/global_minifig_heads_shares.png") + + +def main() -> None: + """Construit la heatmap stackée des parts de couleurs de têtes.""" + plot_global_head_shares(HEADS_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_global_minifig_skin_tones.py b/scripts/plot_global_minifig_skin_tones.py new file mode 100644 index 0000000..a32dc06 --- /dev/null +++ b/scripts/plot_global_minifig_skin_tones.py @@ -0,0 +1,19 @@ +"""Evolution de l'usage du Yellow pour les têtes minifigs (catalogue complet).""" + +from pathlib import Path + +from lib.plots.minifig_skin_tones import plot_yellow_share + + +HEADS_PATH = Path("data/intermediate/global_minifig_heads_by_year.csv") +MILESTONES_PATH = Path("config/milestones.csv") +DESTINATION_PATH = Path("figures/step17/global_minifig_heads_yellow_share.png") + + +def main() -> None: + """Trace la part de têtes Yellow par année.""" + plot_yellow_share(HEADS_PATH, MILESTONES_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_global_minifig_heads.py b/tests/test_global_minifig_heads.py new file mode 100644 index 0000000..ada9e94 --- /dev/null +++ b/tests/test_global_minifig_heads.py @@ -0,0 +1,92 @@ +"""Tests de l'agrégation globale des têtes de minifigs.""" + +import csv +from pathlib import Path + +from lib.rebrickable.global_minifig_heads import ( + aggregate_global_heads_by_year, + write_global_heads_by_year, +) + + +def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None: + """Écrit un CSV simple pour les besoins des tests.""" + with path.open("w", newline="") as csv_file: + writer = csv.writer(csv_file) + writer.writerow(headers) + writer.writerows(rows) + + +def test_aggregate_global_heads_by_year(tmp_path: Path) -> None: + """Construit un agrégat global par année.""" + inventories = tmp_path / "inventories.csv" + inventory_parts = tmp_path / "inventory_parts.csv" + parts = tmp_path / "parts.csv" + colors = tmp_path / "colors.csv" + sets = tmp_path / "sets.csv" + destination = tmp_path / "global_heads.csv" + write_csv( + inventories, + ["id", "version", "set_num"], + [ + ["1", "1", "1000-1"], + ["2", "2", "1000-1"], + ["3", "1", "2000-1"], + ], + ) + write_csv( + inventory_parts, + ["inventory_id", "part_num", "color_id", "quantity", "is_spare", "img_url"], + [ + ["2", "3626b", "1", "2", "False", ""], + ["3", "3626b", "2", "1", "False", ""], + ["3", "3001", "1", "10", "False", ""], + ], + ) + write_csv( + parts, + ["part_num", "name", "part_cat_id", "part_material"], + [ + ["3626b", "Minifig Head", "59", "Plastic"], + ["3001", "Brick 2 x 4", "11", "Plastic"], + ], + ) + write_csv( + colors, + ["id", "name", "rgb", "is_trans", "num_parts", "num_sets", "y1", "y2"], + [ + ["1", "Yellow", "FFFF00", "False", "0", "0", "0", "0"], + ["2", "Light Flesh", "FFE1BD", "False", "0", "0", "0", "0"], + ], + ) + write_csv( + sets, + ["set_num", "name", "year", "theme_id", "num_parts", "img_url"], + [ + ["1000-1", "Set A", "2020", "1", "0", ""], + ["2000-1", "Set B", "2021", "1", "0", ""], + ], + ) + + rows = aggregate_global_heads_by_year(inventories, inventory_parts, parts, colors, sets) + write_global_heads_by_year(destination, rows) + + with destination.open() as csv_file: + written = list(csv.DictReader(csv_file)) + + assert written == [ + { + "year": "2020", + "color_rgb": "FFFF00", + "is_translucent": "false", + "color_name": "Yellow", + "quantity": "2", + }, + { + "year": "2021", + "color_rgb": "FFE1BD", + "is_translucent": "false", + "color_name": "Light Flesh", + "quantity": "1", + }, + ] diff --git a/tests/test_global_minifig_heads_plot.py b/tests/test_global_minifig_heads_plot.py new file mode 100644 index 0000000..2ef0637 --- /dev/null +++ b/tests/test_global_minifig_heads_plot.py @@ -0,0 +1,28 @@ +"""Tests des visualisations globales des têtes de minifigs.""" + +import matplotlib +from pathlib import Path + +from lib.plots.global_minifig_heads import plot_global_head_shares + + +matplotlib.use("Agg") + + +def test_plot_global_head_shares(tmp_path: Path) -> None: + """Génère un graphique de parts de couleur sur le catalogue complet.""" + heads_path = tmp_path / "global_minifig_heads_by_year.csv" + destination = tmp_path / "figures" / "step17" / "global_minifig_heads_shares.png" + heads_path.write_text( + "year,color_rgb,is_translucent,color_name,quantity\n" + "2020,FFFF00,false,Yellow,2\n" + "2020,FFE1BD,false,Light Flesh,1\n" + "2021,FFE1BD,false,Light Flesh,3\n" + "2021,E7B68F,false,Medium Dark Flesh,1\n" + "2021,FFFF00,false,Yellow,2\n" + ) + + plot_global_head_shares(heads_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0 diff --git a/tests/test_global_minifig_skin_tones_plot.py b/tests/test_global_minifig_skin_tones_plot.py new file mode 100644 index 0000000..fa50f47 --- /dev/null +++ b/tests/test_global_minifig_skin_tones_plot.py @@ -0,0 +1,29 @@ +"""Tests du graphique global sur la part de têtes Yellow.""" + +import matplotlib +from pathlib import Path + +from lib.plots.minifig_skin_tones import plot_yellow_share + + +matplotlib.use("Agg") + + +def test_plot_yellow_share(tmp_path: Path) -> None: + """Génère un graphe de part Yellow sur le catalogue complet.""" + heads_path = tmp_path / "global_minifig_heads_by_year.csv" + milestones_path = tmp_path / "milestones.csv" + destination = tmp_path / "figures" / "step17" / "global_minifig_heads_yellow_share.png" + heads_path.write_text( + "year,color_rgb,is_translucent,color_name,quantity\n" + "2020,FFFF00,false,Yellow,2\n" + "2020,FFE1BD,false,Light Flesh,1\n" + "2021,FFE1BD,false,Light Flesh,3\n" + "2021,FFFF00,false,Yellow,1\n" + ) + milestones_path.write_text("year,description\n2020,Lancement\n") + + plot_yellow_share(heads_path, milestones_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0