From 7b6045941f5b05d5260b98b9fd4b669b28ccfb3e Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Tue, 2 Dec 2025 14:36:24 +0100 Subject: [PATCH] =?UTF-8?q?Ajoute=20l=E2=80=99=C3=A9tape=2027=20de=20palet?= =?UTF-8?q?tes=20dominantes=20par=20set?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 ++ lib/plots/set_color_swatches.py | 89 +++++++++++++++ lib/rebrickable/set_color_swatches.py | 86 ++++++++++++++ scripts/plot_set_color_swatches.py | 25 +++++ tests/test_set_color_swatches.py | 154 ++++++++++++++++++++++++++ tests/test_set_color_swatches_plot.py | 29 +++++ 6 files changed, 390 insertions(+) create mode 100644 lib/plots/set_color_swatches.py create mode 100644 lib/rebrickable/set_color_swatches.py create mode 100644 scripts/plot_set_color_swatches.py create mode 100644 tests/test_set_color_swatches.py create mode 100644 tests/test_set_color_swatches_plot.py diff --git a/README.md b/README.md index 603202f..9be3988 100644 --- a/README.md +++ b/README.md @@ -269,3 +269,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de mini Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires. Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés). + +### Étape 27 : palettes dominantes par set (hors minifigs) + +1. `source .venv/bin/activate` +2. `python -m scripts.plot_set_color_swatches` + +Le script lit `data/intermediate/colors_by_set.csv` (hors rechanges) et `data/intermediate/sets_enriched.csv`, sélectionne pour chaque set les 5 couleurs les plus présentes en excluant les pièces de minifigs (`quantity_non_minifig`), écrit `data/intermediate/set_color_swatches.csv`, puis trace `figures/step27/set_color_swatches.png` affichant chaque set avec ses 5 pastilles de couleurs dominantes. diff --git a/lib/plots/set_color_swatches.py b/lib/plots/set_color_swatches.py new file mode 100644 index 0000000..8a5c864 --- /dev/null +++ b/lib/plots/set_color_swatches.py @@ -0,0 +1,89 @@ +"""Palette dominante par set (hors minifigs).""" + +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Sequence + +import matplotlib.pyplot as plt + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +PLACEHOLDER_COLOR = "#e0e0e0" + + +def load_swatches(path: Path) -> List[dict]: + """Charge le CSV des couleurs dominantes par set.""" + return read_rows(path) + + +def group_swatches(rows: Sequence[dict], top_n: int = 5) -> List[dict]: + """Groupe les couleurs par set et complète avec des placeholders si besoin.""" + grouped: Dict[str, List[dict]] = defaultdict(list) + meta: Dict[str, dict] = {} + for row in rows: + grouped[row["set_num"]].append(row) + meta[row["set_num"]] = {"name": row["name"], "year": int(row["year"])} + result: List[dict] = [] + for set_num, colors in grouped.items(): + sorted_colors = sorted(colors, key=lambda r: int(r["rank"])) + while len(sorted_colors) < top_n: + sorted_colors.append( + { + "set_num": set_num, + "name": meta[set_num]["name"], + "year": str(meta[set_num]["year"]), + "rank": str(len(sorted_colors) + 1), + "color_rgb": "", + "color_name": "N/A", + "quantity_non_minifig": "0", + } + ) + result.append( + { + "set_num": set_num, + "name": meta[set_num]["name"], + "year": meta[set_num]["year"], + "colors": sorted_colors[:top_n], + } + ) + result.sort(key=lambda r: (r["year"], r["name"], r["set_num"])) + return result + + +def plot_set_color_swatches(swatches_path: Path, destination_path: Path) -> None: + """Trace la palette de 5 couleurs dominantes par set (hors minifigs).""" + rows = load_swatches(swatches_path) + if not rows: + return + grouped = group_swatches(rows, top_n=5) + set_labels = [f"{item['year']} – {item['name']}" for item in grouped] + y_positions = list(range(len(grouped))) + height = max(4, len(grouped) * 0.4) + + fig, ax = plt.subplots(figsize=(12, height)) + for y, item in zip(y_positions, grouped): + for idx, color in enumerate(item["colors"]): + rgb = color["color_rgb"].strip() + face_color = f"#{rgb}" if rgb else PLACEHOLDER_COLOR + ax.scatter( + idx, + y, + s=500, + color=face_color, + edgecolor="#0d0d0d", + linewidth=0.6, + ) + ax.set_yticks(y_positions) + ax.set_yticklabels(set_labels) + ax.set_xticks([]) + ax.invert_yaxis() + ax.set_xlim(-0.6, 4.6) + ax.set_title("Top 5 couleurs principales par set (hors minifigs)") + ax.grid(False) + + ensure_parent_dir(destination_path) + fig.tight_layout() + fig.savefig(destination_path, dpi=160) + plt.close(fig) diff --git a/lib/rebrickable/set_color_swatches.py b/lib/rebrickable/set_color_swatches.py new file mode 100644 index 0000000..6baf982 --- /dev/null +++ b/lib/rebrickable/set_color_swatches.py @@ -0,0 +1,86 @@ +"""Préparation des palettes dominantes par set (hors minifigs).""" + +import csv +from collections import defaultdict +from pathlib import Path +from typing import Dict, Iterable, List, Sequence + +from lib.filesystem import ensure_parent_dir +from lib.rebrickable.stats import read_rows + + +def load_colors_by_set(path: Path) -> List[dict]: + """Charge colors_by_set.csv.""" + return read_rows(path) + + +def load_sets_enriched(path: Path) -> Dict[str, dict]: + """Indexe nom et année par set_num.""" + lookup: Dict[str, dict] = {} + with path.open() as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + lookup[row["set_num"]] = {"name": row["name"], "year": int(row["year"]), "set_id": row["set_id"]} + return lookup + + +def build_top_colors_by_set(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]: + """Sélectionne les top couleurs hors minifigs pour chaque set.""" + colors_by_set: Dict[str, List[dict]] = defaultdict(list) + for row in rows: + quantity = int(row["quantity_non_minifig"]) + if quantity <= 0: + continue + set_num = row["set_num"] + set_meta = sets_lookup.get(set_num) + if set_meta is None: + continue + colors_by_set[set_num].append( + { + "set_num": set_num, + "set_id": row["set_id"], + "year": set_meta["year"], + "name": set_meta["name"], + "color_rgb": row["color_rgb"], + "color_name": row["color_name"], + "quantity": quantity, + } + ) + results: List[dict] = [] + for set_num, color_rows in colors_by_set.items(): + sorted_rows = sorted(color_rows, key=lambda r: (-r["quantity"], r["color_name"])) + for rank, color_row in enumerate(sorted_rows[:top_n], start=1): + results.append( + { + "set_num": color_row["set_num"], + "set_id": color_row["set_id"], + "name": color_row["name"], + "year": str(color_row["year"]), + "rank": str(rank), + "color_rgb": color_row["color_rgb"], + "color_name": color_row["color_name"], + "quantity_non_minifig": str(color_row["quantity"]), + } + ) + results.sort(key=lambda r: (int(r["year"]), r["name"], r["set_num"], int(r["rank"]))) + return results + + +def write_top_colors(path: Path, rows: Sequence[dict]) -> None: + """Écrit le CSV des couleurs dominantes par set.""" + ensure_parent_dir(path) + fieldnames = [ + "set_num", + "set_id", + "name", + "year", + "rank", + "color_rgb", + "color_name", + "quantity_non_minifig", + ] + with path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/plot_set_color_swatches.py b/scripts/plot_set_color_swatches.py new file mode 100644 index 0000000..e0cbeb2 --- /dev/null +++ b/scripts/plot_set_color_swatches.py @@ -0,0 +1,25 @@ +"""Trace la palette dominante de chaque set (hors minifigs).""" + +from pathlib import Path + +from lib.plots.set_color_swatches import plot_set_color_swatches +from lib.rebrickable.set_color_swatches import build_top_colors_by_set, load_colors_by_set, load_sets_enriched, write_top_colors + + +COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv") +SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv") +SWATCHES_PATH = Path("data/intermediate/set_color_swatches.csv") +DESTINATION_PATH = Path("figures/step27/set_color_swatches.png") + + +def main() -> None: + """Construit le CSV de top couleurs par set et trace le nuancier.""" + colors_rows = load_colors_by_set(COLORS_BY_SET_PATH) + sets_lookup = load_sets_enriched(SETS_ENRICHED_PATH) + swatches = build_top_colors_by_set(colors_rows, sets_lookup, top_n=5) + write_top_colors(SWATCHES_PATH, swatches) + plot_set_color_swatches(SWATCHES_PATH, DESTINATION_PATH) + + +if __name__ == "__main__": + main() diff --git a/tests/test_set_color_swatches.py b/tests/test_set_color_swatches.py new file mode 100644 index 0000000..2ae1ce9 --- /dev/null +++ b/tests/test_set_color_swatches.py @@ -0,0 +1,154 @@ +"""Tests de la préparation des palettes par set.""" + +from pathlib import Path + +from lib.rebrickable.set_color_swatches import build_top_colors_by_set + + +def write_csv(path: Path, content: str) -> None: + """Écrit un CSV brut.""" + path.write_text(content) + + +def test_build_top_colors_by_set_selects_top5_non_minifig(tmp_path: Path) -> None: + """Sélectionne les 5 couleurs dominantes en excluant les minifigs.""" + colors_path = tmp_path / "colors_by_set.csv" + write_csv( + colors_path, + "set_num,set_id,year,color_rgb,is_translucent,color_name,quantity_total,quantity_non_spare,quantity_minifig,quantity_non_minifig\n" + "123-1,123,2020,111111,false,Black,10,10,0,10\n" + "123-1,123,2020,222222,false,Red,5,5,0,5\n" + "123-1,123,2020,333333,false,Blue,3,3,0,3\n" + "123-1,123,2020,444444,false,Green,2,2,0,2\n" + "123-1,123,2020,555555,false,Yellow,1,1,0,1\n" + "123-1,123,2020,666666,false,Pink,1,1,0,1\n" + "124-1,124,2021,aaaaaa,false,Gray,4,4,4,0\n", + ) + sets_path = tmp_path / "sets_enriched.csv" + write_csv( + sets_path, + "set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n" + "123-1,Set A,2020,1,100,,123,,false\n" + "124-1,Set B,2021,1,50,,124,,false\n", + ) + rows = build_top_colors_by_set( + [ + row + for row in [ + { + "set_num": "123-1", + "set_id": "123", + "year": "2020", + "color_rgb": "111111", + "color_name": "Black", + "quantity_non_minifig": "10", + }, + { + "set_num": "123-1", + "set_id": "123", + "year": "2020", + "color_rgb": "222222", + "color_name": "Red", + "quantity_non_minifig": "5", + }, + { + "set_num": "123-1", + "set_id": "123", + "year": "2020", + "color_rgb": "333333", + "color_name": "Blue", + "quantity_non_minifig": "3", + }, + { + "set_num": "123-1", + "set_id": "123", + "year": "2020", + "color_rgb": "444444", + "color_name": "Green", + "quantity_non_minifig": "2", + }, + { + "set_num": "123-1", + "set_id": "123", + "year": "2020", + "color_rgb": "555555", + "color_name": "Yellow", + "quantity_non_minifig": "1", + }, + { + "set_num": "123-1", + "set_id": "123", + "year": "2020", + "color_rgb": "666666", + "color_name": "Pink", + "quantity_non_minifig": "1", + }, + { + "set_num": "124-1", + "set_id": "124", + "year": "2021", + "color_rgb": "aaaaaa", + "color_name": "Gray", + "quantity_non_minifig": "0", + }, + ] + ], + { + "123-1": {"name": "Set A", "year": 2020, "set_id": "123"}, + "124-1": {"name": "Set B", "year": 2021, "set_id": "124"}, + }, + top_n=5, + ) + + assert rows == [ + { + "set_num": "123-1", + "set_id": "123", + "name": "Set A", + "year": "2020", + "rank": "1", + "color_rgb": "111111", + "color_name": "Black", + "quantity_non_minifig": "10", + }, + { + "set_num": "123-1", + "set_id": "123", + "name": "Set A", + "year": "2020", + "rank": "2", + "color_rgb": "222222", + "color_name": "Red", + "quantity_non_minifig": "5", + }, + { + "set_num": "123-1", + "set_id": "123", + "name": "Set A", + "year": "2020", + "rank": "3", + "color_rgb": "333333", + "color_name": "Blue", + "quantity_non_minifig": "3", + }, + { + "set_num": "123-1", + "set_id": "123", + "name": "Set A", + "year": "2020", + "rank": "4", + "color_rgb": "444444", + "color_name": "Green", + "quantity_non_minifig": "2", + }, + { + "set_num": "123-1", + "set_id": "123", + "name": "Set A", + "year": "2020", + "rank": "5", + "color_rgb": "666666", + "color_name": "Pink", + "quantity_non_minifig": "1", + }, + ] diff --git a/tests/test_set_color_swatches_plot.py b/tests/test_set_color_swatches_plot.py new file mode 100644 index 0000000..b5da81a --- /dev/null +++ b/tests/test_set_color_swatches_plot.py @@ -0,0 +1,29 @@ +"""Tests du graphique de palettes dominantes par set.""" + +import matplotlib +from pathlib import Path + +from lib.plots.set_color_swatches import plot_set_color_swatches + + +matplotlib.use("Agg") + + +def test_plot_set_color_swatches(tmp_path: Path) -> None: + """Génère le nuancier top 5 par set.""" + swatches_path = tmp_path / "set_color_swatches.csv" + destination = tmp_path / "figures" / "step27" / "set_color_swatches.png" + swatches_path.write_text( + "set_num,set_id,name,year,rank,color_rgb,color_name,quantity_non_minifig\n" + "123-1,123,Set A,2020,1,111111,Black,10\n" + "123-1,123,Set A,2020,2,222222,Red,5\n" + "123-1,123,Set A,2020,3,333333,Blue,3\n" + "123-1,123,Set A,2020,4,444444,Green,2\n" + "123-1,123,Set A,2020,5,555555,Yellow,1\n" + "124-1,124,Set B,2021,1,aaaaaa,Gray,4\n" + ) + + plot_set_color_swatches(swatches_path, destination) + + assert destination.exists() + assert destination.stat().st_size > 0