Ajoute l’étape 27 de palettes dominantes par set

2025-12-02 14:36:24 +01:00 · 2025-12-02 14:36:24 +01:00 · 7b6045941f
commit 7b6045941f
parent 1dd713db4a
6 changed files with 390 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -269,3 +269,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de mini

 Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
 Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés).
+
+### Étape 27 : palettes dominantes par set (hors minifigs)
+
+1. `source .venv/bin/activate`
+2. `python -m scripts.plot_set_color_swatches`
+
+Le script lit `data/intermediate/colors_by_set.csv` (hors rechanges) et `data/intermediate/sets_enriched.csv`, sélectionne pour chaque set les 5 couleurs les plus présentes en excluant les pièces de minifigs (`quantity_non_minifig`), écrit `data/intermediate/set_color_swatches.csv`, puis trace `figures/step27/set_color_swatches.png` affichant chaque set avec ses 5 pastilles de couleurs dominantes.
--- a/lib/plots/set_color_swatches.py
+++ b/lib/plots/set_color_swatches.py
@ -0,0 +1,89 @@
+"""Palette dominante par set (hors minifigs)."""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Sequence
+
+import matplotlib.pyplot as plt
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+PLACEHOLDER_COLOR = "#e0e0e0"
+
+
+def load_swatches(path: Path) -> List[dict]:
+    """Charge le CSV des couleurs dominantes par set."""
+    return read_rows(path)
+
+
+def group_swatches(rows: Sequence[dict], top_n: int = 5) -> List[dict]:
+    """Groupe les couleurs par set et complète avec des placeholders si besoin."""
+    grouped: Dict[str, List[dict]] = defaultdict(list)
+    meta: Dict[str, dict] = {}
+    for row in rows:
+        grouped[row["set_num"]].append(row)
+        meta[row["set_num"]] = {"name": row["name"], "year": int(row["year"])}
+    result: List[dict] = []
+    for set_num, colors in grouped.items():
+        sorted_colors = sorted(colors, key=lambda r: int(r["rank"]))
+        while len(sorted_colors) < top_n:
+            sorted_colors.append(
+                {
+                    "set_num": set_num,
+                    "name": meta[set_num]["name"],
+                    "year": str(meta[set_num]["year"]),
+                    "rank": str(len(sorted_colors) + 1),
+                    "color_rgb": "",
+                    "color_name": "N/A",
+                    "quantity_non_minifig": "0",
+                }
+            )
+        result.append(
+            {
+                "set_num": set_num,
+                "name": meta[set_num]["name"],
+                "year": meta[set_num]["year"],
+                "colors": sorted_colors[:top_n],
+            }
+        )
+    result.sort(key=lambda r: (r["year"], r["name"], r["set_num"]))
+    return result
+
+
+def plot_set_color_swatches(swatches_path: Path, destination_path: Path) -> None:
+    """Trace la palette de 5 couleurs dominantes par set (hors minifigs)."""
+    rows = load_swatches(swatches_path)
+    if not rows:
+        return
+    grouped = group_swatches(rows, top_n=5)
+    set_labels = [f"{item['year']} – {item['name']}" for item in grouped]
+    y_positions = list(range(len(grouped)))
+    height = max(4, len(grouped) * 0.4)
+
+    fig, ax = plt.subplots(figsize=(12, height))
+    for y, item in zip(y_positions, grouped):
+        for idx, color in enumerate(item["colors"]):
+            rgb = color["color_rgb"].strip()
+            face_color = f"#{rgb}" if rgb else PLACEHOLDER_COLOR
+            ax.scatter(
+                idx,
+                y,
+                s=500,
+                color=face_color,
+                edgecolor="#0d0d0d",
+                linewidth=0.6,
+            )
+    ax.set_yticks(y_positions)
+    ax.set_yticklabels(set_labels)
+    ax.set_xticks([])
+    ax.invert_yaxis()
+    ax.set_xlim(-0.6, 4.6)
+    ax.set_title("Top 5 couleurs principales par set (hors minifigs)")
+    ax.grid(False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=160)
+    plt.close(fig)
--- a/lib/rebrickable/set_color_swatches.py
+++ b/lib/rebrickable/set_color_swatches.py
@ -0,0 +1,86 @@
+"""Préparation des palettes dominantes par set (hors minifigs)."""
+
+import csv
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+def load_colors_by_set(path: Path) -> List[dict]:
+    """Charge colors_by_set.csv."""
+    return read_rows(path)
+
+
+def load_sets_enriched(path: Path) -> Dict[str, dict]:
+    """Indexe nom et année par set_num."""
+    lookup: Dict[str, dict] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            lookup[row["set_num"]] = {"name": row["name"], "year": int(row["year"]), "set_id": row["set_id"]}
+    return lookup
+
+
+def build_top_colors_by_set(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
+    """Sélectionne les top couleurs hors minifigs pour chaque set."""
+    colors_by_set: Dict[str, List[dict]] = defaultdict(list)
+    for row in rows:
+        quantity = int(row["quantity_non_minifig"])
+        if quantity <= 0:
+            continue
+        set_num = row["set_num"]
+        set_meta = sets_lookup.get(set_num)
+        if set_meta is None:
+            continue
+        colors_by_set[set_num].append(
+            {
+                "set_num": set_num,
+                "set_id": row["set_id"],
+                "year": set_meta["year"],
+                "name": set_meta["name"],
+                "color_rgb": row["color_rgb"],
+                "color_name": row["color_name"],
+                "quantity": quantity,
+            }
+        )
+    results: List[dict] = []
+    for set_num, color_rows in colors_by_set.items():
+        sorted_rows = sorted(color_rows, key=lambda r: (-r["quantity"], r["color_name"]))
+        for rank, color_row in enumerate(sorted_rows[:top_n], start=1):
+            results.append(
+                {
+                    "set_num": color_row["set_num"],
+                    "set_id": color_row["set_id"],
+                    "name": color_row["name"],
+                    "year": str(color_row["year"]),
+                    "rank": str(rank),
+                    "color_rgb": color_row["color_rgb"],
+                    "color_name": color_row["color_name"],
+                    "quantity_non_minifig": str(color_row["quantity"]),
+                }
+            )
+    results.sort(key=lambda r: (int(r["year"]), r["name"], r["set_num"], int(r["rank"])))
+    return results
+
+
+def write_top_colors(path: Path, rows: Sequence[dict]) -> None:
+    """Écrit le CSV des couleurs dominantes par set."""
+    ensure_parent_dir(path)
+    fieldnames = [
+        "set_num",
+        "set_id",
+        "name",
+        "year",
+        "rank",
+        "color_rgb",
+        "color_name",
+        "quantity_non_minifig",
+    ]
+    with path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
--- a/scripts/plot_set_color_swatches.py
+++ b/scripts/plot_set_color_swatches.py
@ -0,0 +1,25 @@
+"""Trace la palette dominante de chaque set (hors minifigs)."""
+
+from pathlib import Path
+
+from lib.plots.set_color_swatches import plot_set_color_swatches
+from lib.rebrickable.set_color_swatches import build_top_colors_by_set, load_colors_by_set, load_sets_enriched, write_top_colors
+
+
+COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
+SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
+SWATCHES_PATH = Path("data/intermediate/set_color_swatches.csv")
+DESTINATION_PATH = Path("figures/step27/set_color_swatches.png")
+
+
+def main() -> None:
+    """Construit le CSV de top couleurs par set et trace le nuancier."""
+    colors_rows = load_colors_by_set(COLORS_BY_SET_PATH)
+    sets_lookup = load_sets_enriched(SETS_ENRICHED_PATH)
+    swatches = build_top_colors_by_set(colors_rows, sets_lookup, top_n=5)
+    write_top_colors(SWATCHES_PATH, swatches)
+    plot_set_color_swatches(SWATCHES_PATH, DESTINATION_PATH)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_set_color_swatches.py
+++ b/tests/test_set_color_swatches.py
@ -0,0 +1,154 @@
+"""Tests de la préparation des palettes par set."""
+
+from pathlib import Path
+
+from lib.rebrickable.set_color_swatches import build_top_colors_by_set
+
+
+def write_csv(path: Path, content: str) -> None:
+    """Écrit un CSV brut."""
+    path.write_text(content)
+
+
+def test_build_top_colors_by_set_selects_top5_non_minifig(tmp_path: Path) -> None:
+    """Sélectionne les 5 couleurs dominantes en excluant les minifigs."""
+    colors_path = tmp_path / "colors_by_set.csv"
+    write_csv(
+        colors_path,
+        "set_num,set_id,year,color_rgb,is_translucent,color_name,quantity_total,quantity_non_spare,quantity_minifig,quantity_non_minifig\n"
+        "123-1,123,2020,111111,false,Black,10,10,0,10\n"
+        "123-1,123,2020,222222,false,Red,5,5,0,5\n"
+        "123-1,123,2020,333333,false,Blue,3,3,0,3\n"
+        "123-1,123,2020,444444,false,Green,2,2,0,2\n"
+        "123-1,123,2020,555555,false,Yellow,1,1,0,1\n"
+        "123-1,123,2020,666666,false,Pink,1,1,0,1\n"
+        "124-1,124,2021,aaaaaa,false,Gray,4,4,4,0\n",
+    )
+    sets_path = tmp_path / "sets_enriched.csv"
+    write_csv(
+        sets_path,
+        "set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
+        "123-1,Set A,2020,1,100,,123,,false\n"
+        "124-1,Set B,2021,1,50,,124,,false\n",
+    )
+    rows = build_top_colors_by_set(
+        [
+            row
+            for row in [
+                {
+                    "set_num": "123-1",
+                    "set_id": "123",
+                    "year": "2020",
+                    "color_rgb": "111111",
+                    "color_name": "Black",
+                    "quantity_non_minifig": "10",
+                },
+                {
+                    "set_num": "123-1",
+                    "set_id": "123",
+                    "year": "2020",
+                    "color_rgb": "222222",
+                    "color_name": "Red",
+                    "quantity_non_minifig": "5",
+                },
+                {
+                    "set_num": "123-1",
+                    "set_id": "123",
+                    "year": "2020",
+                    "color_rgb": "333333",
+                    "color_name": "Blue",
+                    "quantity_non_minifig": "3",
+                },
+                {
+                    "set_num": "123-1",
+                    "set_id": "123",
+                    "year": "2020",
+                    "color_rgb": "444444",
+                    "color_name": "Green",
+                    "quantity_non_minifig": "2",
+                },
+                {
+                    "set_num": "123-1",
+                    "set_id": "123",
+                    "year": "2020",
+                    "color_rgb": "555555",
+                    "color_name": "Yellow",
+                    "quantity_non_minifig": "1",
+                },
+                {
+                    "set_num": "123-1",
+                    "set_id": "123",
+                    "year": "2020",
+                    "color_rgb": "666666",
+                    "color_name": "Pink",
+                    "quantity_non_minifig": "1",
+                },
+                {
+                    "set_num": "124-1",
+                    "set_id": "124",
+                    "year": "2021",
+                    "color_rgb": "aaaaaa",
+                    "color_name": "Gray",
+                    "quantity_non_minifig": "0",
+                },
+            ]
+        ],
+        {
+            "123-1": {"name": "Set A", "year": 2020, "set_id": "123"},
+            "124-1": {"name": "Set B", "year": 2021, "set_id": "124"},
+        },
+        top_n=5,
+    )
+
+    assert rows == [
+        {
+            "set_num": "123-1",
+            "set_id": "123",
+            "name": "Set A",
+            "year": "2020",
+            "rank": "1",
+            "color_rgb": "111111",
+            "color_name": "Black",
+            "quantity_non_minifig": "10",
+        },
+        {
+            "set_num": "123-1",
+            "set_id": "123",
+            "name": "Set A",
+            "year": "2020",
+            "rank": "2",
+            "color_rgb": "222222",
+            "color_name": "Red",
+            "quantity_non_minifig": "5",
+        },
+        {
+            "set_num": "123-1",
+            "set_id": "123",
+            "name": "Set A",
+            "year": "2020",
+            "rank": "3",
+            "color_rgb": "333333",
+            "color_name": "Blue",
+            "quantity_non_minifig": "3",
+        },
+        {
+            "set_num": "123-1",
+            "set_id": "123",
+            "name": "Set A",
+            "year": "2020",
+            "rank": "4",
+            "color_rgb": "444444",
+            "color_name": "Green",
+            "quantity_non_minifig": "2",
+        },
+        {
+            "set_num": "123-1",
+            "set_id": "123",
+            "name": "Set A",
+            "year": "2020",
+            "rank": "5",
+            "color_rgb": "666666",
+            "color_name": "Pink",
+            "quantity_non_minifig": "1",
+        },
+    ]
--- a/tests/test_set_color_swatches_plot.py
+++ b/tests/test_set_color_swatches_plot.py
@ -0,0 +1,29 @@
+"""Tests du graphique de palettes dominantes par set."""
+
+import matplotlib
+from pathlib import Path
+
+from lib.plots.set_color_swatches import plot_set_color_swatches
+
+
+matplotlib.use("Agg")
+
+
+def test_plot_set_color_swatches(tmp_path: Path) -> None:
+    """Génère le nuancier top 5 par set."""
+    swatches_path = tmp_path / "set_color_swatches.csv"
+    destination = tmp_path / "figures" / "step27" / "set_color_swatches.png"
+    swatches_path.write_text(
+        "set_num,set_id,name,year,rank,color_rgb,color_name,quantity_non_minifig\n"
+        "123-1,123,Set A,2020,1,111111,Black,10\n"
+        "123-1,123,Set A,2020,2,222222,Red,5\n"
+        "123-1,123,Set A,2020,3,333333,Blue,3\n"
+        "123-1,123,Set A,2020,4,444444,Green,2\n"
+        "123-1,123,Set A,2020,5,555555,Yellow,1\n"
+        "124-1,124,Set B,2021,1,aaaaaa,Gray,4\n"
+    )
+
+    plot_set_color_swatches(swatches_path, destination)
+
+    assert destination.exists()
+    assert destination.stat().st_size > 0