From 909a1eae717353e03cb29d1dd328f57a8b109555 Mon Sep 17 00:00:00 2001
From: Richard Dern <gitea.local.richard@dern.ovh>
Date: Tue, 2 Dec 2025 15:50:41 +0100
Subject: [PATCH] =?UTF-8?q?Ajoute=20l=E2=80=99=C3=A9tape=2028=20des=20pale?=
 =?UTF-8?q?ttes=20perceptuelles?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                     |   7 +-
 lib/plots/set_color_swatches_perceptual.py    |  93 ++++++++++++
 .../set_color_swatches_perceptual.py          | 143 ++++++++++++++++++
 scripts/plot_set_color_swatches_perceptual.py |  30 ++++
 tests/test_set_color_swatches_perceptual.py   |  84 ++++++++++
 ...test_set_color_swatches_perceptual_plot.py |  28 ++++
 6 files changed, 383 insertions(+), 2 deletions(-)
 create mode 100644 lib/plots/set_color_swatches_perceptual.py
 create mode 100644 lib/rebrickable/set_color_swatches_perceptual.py
 create mode 100644 scripts/plot_set_color_swatches_perceptual.py
 create mode 100644 tests/test_set_color_swatches_perceptual.py
 create mode 100644 tests/test_set_color_swatches_perceptual_plot.py

diff --git a/README.md b/README.md
index e1ff2c1..49ca89d 100644
--- a/README.md
+++ b/README.md
@@ -280,6 +280,9 @@ Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré
 
 Le script lit `data/intermediate/colors_by_set.csv` (hors rechanges) et `data/intermediate/sets_enriched.csv`, sélectionne pour chaque set les 5 couleurs les plus présentes en excluant les pièces de minifigs (`quantity_non_minifig`), écrit `data/intermediate/set_color_swatches.csv`, puis trace `figures/step27/set_color_swatches.png` affichant chaque set avec ses 5 pastilles de couleurs dominantes.
 
-### Étape 28 : palettes perceptuelles par set (en préparation)
+### Étape 28 : palettes perceptuelles par set (hors minifigs, pièces techniques exclues)
 
-Objectif : produire une palette de 5 couleurs « perceptuelles » par set, moins biaisée par le volume de pièces. L’étape s’appuiera sur les mêmes filtres (couleurs 0033B2/05131D exclues, pièces techniques/structurelles ignorées), pondérera les couleurs par parts relatives hors minifigs, appliquera un tri perceptuel et une sélection diversifiée pour refléter l’esthétique plutôt que le poids en pièces. La version volumique (`figures/step27/set_color_swatches.png`) reste disponible en attendant la finalisation de cette étape.
+1. `source .venv/bin/activate`
+2. `python -m scripts.plot_set_color_swatches_perceptual`
+
+Le script lit `data/intermediate/colors_by_set.csv` (filtres appliqués : couleurs ignorées et pièces techniques/structurelles exclues), calcule pour chaque set les parts relatives de couleurs hors minifigs, sélectionne une palette diversifiée de 5 couleurs (priorité à la variété de teinte avant la luminosité), écrit `data/intermediate/set_color_swatches_perceptual.csv`, puis trace `figures/step28/set_color_swatches_perceptual.png` (pastilles dont la taille reflète la part relative).
diff --git a/lib/plots/set_color_swatches_perceptual.py b/lib/plots/set_color_swatches_perceptual.py
new file mode 100644
index 0000000..26f0959
--- /dev/null
+++ b/lib/plots/set_color_swatches_perceptual.py
@@ -0,0 +1,93 @@
+"""Visualisation des palettes perceptuelles (top 5) par set."""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Sequence
+
+import matplotlib.pyplot as plt
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+PLACEHOLDER_COLOR = "#e0e0e0"
+
+
+def load_swatches(path: Path) -> List[dict]:
+    """Charge le CSV des palettes perceptuelles."""
+    return read_rows(path)
+
+
+def group_swatches(rows: Sequence[dict], top_n: int = 5) -> List[dict]:
+    """Groupe les couleurs par set et complète avec placeholders si besoin."""
+    grouped: Dict[str, List[dict]] = defaultdict(list)
+    meta: Dict[str, dict] = {}
+    for row in rows:
+        grouped[row["set_num"]].append(row)
+        meta[row["set_num"]] = {"name": row["name"], "year": int(row["year"])}
+    result: List[dict] = []
+    for set_num, colors in grouped.items():
+        sorted_colors = sorted(colors, key=lambda r: int(r["rank"]))
+        while len(sorted_colors) < top_n:
+            sorted_colors.append(
+                {
+                    "set_num": set_num,
+                    "name": meta[set_num]["name"],
+                    "year": str(meta[set_num]["year"]),
+                    "rank": str(len(sorted_colors) + 1),
+                    "color_rgb": "",
+                    "color_name": "N/A",
+                    "share_non_minifig": "0",
+                    "quantity_non_minifig": "0",
+                }
+            )
+        result.append(
+            {
+                "set_num": set_num,
+                "name": meta[set_num]["name"],
+                "year": meta[set_num]["year"],
+                "colors": sorted_colors[:top_n],
+            }
+        )
+    result.sort(key=lambda r: (r["year"], r["set_num"], r["name"]))
+    return result
+
+
+def plot_set_color_swatches_perceptual(swatches_path: Path, destination_path: Path) -> None:
+    """Trace les 5 couleurs perceptuelles par set avec taille proportionnelle à la part."""
+    rows = load_swatches(swatches_path)
+    if not rows:
+        return
+    grouped = group_swatches(rows, top_n=5)
+    set_labels = [f"{item['year']} – {item['name']}" for item in grouped]
+    y_positions = list(range(len(grouped)))
+    height = max(4, len(grouped) * 0.4)
+
+    fig, ax = plt.subplots(figsize=(12, height))
+    for y, item in zip(y_positions, grouped):
+        for idx, color in enumerate(item["colors"]):
+            rgb = color["color_rgb"].strip()
+            face_color = f"#{rgb}" if rgb else PLACEHOLDER_COLOR
+            share = float(color.get("share_non_minifig", "0"))
+            size = 450 + 900 * share
+            ax.scatter(
+                idx,
+                y,
+                s=size,
+                color=face_color,
+                edgecolor="#0d0d0d",
+                linewidth=0.6,
+                alpha=0.95,
+            )
+    ax.set_yticks(y_positions)
+    ax.set_yticklabels(set_labels)
+    ax.set_xticks([])
+    ax.invert_yaxis()
+    ax.set_xlim(-0.6, 4.6)
+    ax.set_title("Top 5 couleurs perceptuelles par set (hors minifigs, pièces techniques exclues)")
+    ax.grid(False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=160)
+    plt.close(fig)
diff --git a/lib/rebrickable/set_color_swatches_perceptual.py b/lib/rebrickable/set_color_swatches_perceptual.py
new file mode 100644
index 0000000..9b9fe85
--- /dev/null
+++ b/lib/rebrickable/set_color_swatches_perceptual.py
@@ -0,0 +1,143 @@
+"""Construction de palettes perceptuelles (top 5) par set hors minifigs."""
+
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence, Set
+
+from lib.rebrickable.set_color_swatches import color_display_key, load_sets_enriched, parse_rgb_hex, hue_bucket
+from lib.rebrickable.stats import read_rows
+
+
+def load_colors_by_set(path: Path) -> List[dict]:
+    """Charge colors_by_set.csv."""
+    return read_rows(path)
+
+
+def compute_shares(rows: Iterable[dict]) -> Dict[str, List[dict]]:
+    """Calcule les parts relatives de couleurs hors minifigs pour chaque set."""
+    by_set: Dict[str, List[dict]] = {}
+    totals: Dict[str, int] = {}
+    for row in rows:
+        quantity = int(row["quantity_non_minifig"])
+        if quantity <= 0:
+            continue
+        set_num = row["set_num"]
+        totals[set_num] = totals.get(set_num, 0) + quantity
+        current = by_set.get(set_num)
+        if current is None:
+            by_set[set_num] = [row]
+        else:
+            current.append(row)
+    shares: Dict[str, List[dict]] = {}
+    for set_num, color_rows in by_set.items():
+        total = totals.get(set_num, 0)
+        if total == 0:
+            continue
+        shares[set_num] = []
+        for row in color_rows:
+            share = int(row["quantity_non_minifig"]) / total
+            shares[set_num].append(
+                {
+                    "set_num": row["set_num"],
+                    "set_id": row["set_id"],
+                    "name": row.get("name", ""),
+                    "year": row["year"],
+                    "color_rgb": row["color_rgb"],
+                    "color_name": row["color_name"],
+                    "quantity_non_minifig": row["quantity_non_minifig"],
+                    "share_non_minifig": f"{share:.5f}",
+                }
+            )
+    return shares
+
+
+def select_diverse_palette(rows: List[dict], top_n: int) -> List[dict]:
+    """Sélectionne une palette diversifiée : priorité à la part et à la variété de teinte."""
+    sorted_by_share = sorted(rows, key=lambda r: (-float(r["share_non_minifig"]), r["color_name"]))
+    selected: List[dict] = []
+    buckets_used: Set[int] = set()
+    for row in sorted_by_share:
+        r, g, b = parse_rgb_hex(row["color_rgb"])
+        h, _s, _v = __import__("colorsys").rgb_to_hsv(r, g, b)
+        bucket = hue_bucket(h * 360.0)
+        if bucket in buckets_used:
+            continue
+        selected.append(row)
+        buckets_used.add(bucket)
+        if len(selected) == top_n:
+            break
+    if len(selected) < top_n:
+        for row in sorted_by_share:
+            if row in selected:
+                continue
+            selected.append(row)
+            if len(selected) == top_n:
+                break
+    while len(selected) < top_n:
+        selected.append(
+            {
+                "set_num": rows[0]["set_num"] if rows else "",
+                "set_id": rows[0]["set_id"] if rows else "",
+                "name": rows[0]["name"] if rows else "",
+                "year": rows[0]["year"] if rows else "",
+                "color_rgb": "",
+                "color_name": "N/A",
+                "quantity_non_minifig": "0",
+                "share_non_minifig": "0",
+            }
+        )
+    ordered = sorted(selected, key=color_display_key)
+    for rank, row in enumerate(ordered, start=1):
+        row["rank"] = str(rank)
+    return ordered[:top_n]
+
+
+def build_perceptual_swatches(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
+    """Construit les palettes perceptuelles (parts relatives + diversité de teinte)."""
+    shares = compute_shares(rows)
+    swatches: List[dict] = []
+    for set_num, color_rows in shares.items():
+        set_meta = sets_lookup.get(set_num)
+        if set_meta is None:
+            continue
+        selected = select_diverse_palette(color_rows, top_n)
+        for row in selected:
+            swatches.append(
+                {
+                    "set_num": set_num,
+                    "set_id": set_meta["set_id"],
+                    "name": set_meta["name"],
+                    "year": str(set_meta["year"]),
+                    "rank": row["rank"],
+                    "color_rgb": row["color_rgb"],
+                    "color_name": row["color_name"],
+                    "share_non_minifig": row["share_non_minifig"],
+                    "quantity_non_minifig": row["quantity_non_minifig"],
+                }
+            )
+    swatches.sort(key=lambda r: (int(r["year"]), r["set_num"], int(r["rank"])))
+    return swatches
+
+
+def write_perceptual_swatches(path: Path, rows: Sequence[dict]) -> None:
+    """Écrit le CSV des palettes perceptuelles."""
+    from lib.filesystem import ensure_parent_dir
+
+    ensure_parent_dir(path)
+    fieldnames = [
+        "set_num",
+        "set_id",
+        "name",
+        "year",
+        "rank",
+        "color_rgb",
+        "color_name",
+        "share_non_minifig",
+        "quantity_non_minifig",
+    ]
+    with path.open("w", newline="") as csv_file:
+        import csv
+
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
diff --git a/scripts/plot_set_color_swatches_perceptual.py b/scripts/plot_set_color_swatches_perceptual.py
new file mode 100644
index 0000000..c1107c7
--- /dev/null
+++ b/scripts/plot_set_color_swatches_perceptual.py
@@ -0,0 +1,30 @@
+"""Trace les palettes perceptuelles (top 5) par set hors minifigs."""
+
+from pathlib import Path
+
+from lib.plots.set_color_swatches_perceptual import plot_set_color_swatches_perceptual
+from lib.rebrickable.set_color_swatches_perceptual import (
+    build_perceptual_swatches,
+    load_colors_by_set,
+    load_sets_enriched,
+    write_perceptual_swatches,
+)
+
+
+COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
+SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
+SWATCHES_PATH = Path("data/intermediate/set_color_swatches_perceptual.csv")
+DESTINATION_PATH = Path("figures/step28/set_color_swatches_perceptual.png")
+
+
+def main() -> None:
+    """Construit et trace les palettes perceptuelles par set."""
+    colors_rows = load_colors_by_set(COLORS_BY_SET_PATH)
+    sets_lookup = load_sets_enriched(SETS_ENRICHED_PATH)
+    swatches = build_perceptual_swatches(colors_rows, sets_lookup, top_n=5)
+    write_perceptual_swatches(SWATCHES_PATH, swatches)
+    plot_set_color_swatches_perceptual(SWATCHES_PATH, DESTINATION_PATH)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_set_color_swatches_perceptual.py b/tests/test_set_color_swatches_perceptual.py
new file mode 100644
index 0000000..e661494
--- /dev/null
+++ b/tests/test_set_color_swatches_perceptual.py
@@ -0,0 +1,84 @@
+"""Tests des palettes perceptuelles par set."""
+
+from pathlib import Path
+
+from lib.rebrickable.set_color_swatches_perceptual import build_perceptual_swatches
+
+
+def write_csv(path: Path, content: str) -> None:
+    """Écrit un CSV brut."""
+    path.write_text(content)
+
+
+def test_build_perceptual_swatches_diversifies_buckets(tmp_path: Path) -> None:
+    """Sélectionne des couleurs variées par teinte en priorité."""
+    colors_path = tmp_path / "colors_by_set.csv"
+    write_csv(
+        colors_path,
+        "set_num,set_id,year,color_rgb,is_translucent,color_name,quantity_total,quantity_non_spare,quantity_minifig,quantity_non_minifig\n"
+        "123-1,123,2020,FF0000,false,Red,10,10,0,10\n"
+        "123-1,123,2020,00FF00,false,Green,8,8,0,8\n"
+        "123-1,123,2020,0000FF,false,Blue,6,6,0,6\n"
+        "123-1,123,2020,FFFF00,false,Yellow,5,5,0,5\n"
+        "123-1,123,2020,FF00FF,false,Magenta,4,4,0,4\n"
+        "123-1,123,2020,00FFFF,false,Cyan,3,3,0,3\n",
+    )
+    sets_lookup = {"123-1": {"name": "Set A", "year": 2020, "set_id": "123"}}
+    rows = build_perceptual_swatches(
+        [
+            {
+                "set_num": "123-1",
+                "set_id": "123",
+                "year": "2020",
+                "color_rgb": "FF0000",
+                "color_name": "Red",
+                "quantity_non_minifig": "10",
+            },
+            {
+                "set_num": "123-1",
+                "set_id": "123",
+                "year": "2020",
+                "color_rgb": "00FF00",
+                "color_name": "Green",
+                "quantity_non_minifig": "8",
+            },
+            {
+                "set_num": "123-1",
+                "set_id": "123",
+                "year": "2020",
+                "color_rgb": "0000FF",
+                "color_name": "Blue",
+                "quantity_non_minifig": "6",
+            },
+            {
+                "set_num": "123-1",
+                "set_id": "123",
+                "year": "2020",
+                "color_rgb": "FFFF00",
+                "color_name": "Yellow",
+                "quantity_non_minifig": "5",
+            },
+            {
+                "set_num": "123-1",
+                "set_id": "123",
+                "year": "2020",
+                "color_rgb": "FF00FF",
+                "color_name": "Magenta",
+                "quantity_non_minifig": "4",
+            },
+            {
+                "set_num": "123-1",
+                "set_id": "123",
+                "year": "2020",
+                "color_rgb": "00FFFF",
+                "color_name": "Cyan",
+                "quantity_non_minifig": "3",
+            },
+        ],
+        sets_lookup,
+        top_n=5,
+    )
+
+    ranks = [row["rank"] for row in rows if row["set_num"] == "123-1"]
+    assert ranks == ["1", "2", "3", "4", "5"]
+    assert len({row["color_name"] for row in rows}) == 5
diff --git a/tests/test_set_color_swatches_perceptual_plot.py b/tests/test_set_color_swatches_perceptual_plot.py
new file mode 100644
index 0000000..a3dc90d
--- /dev/null
+++ b/tests/test_set_color_swatches_perceptual_plot.py
@@ -0,0 +1,28 @@
+"""Tests du graphique de palettes perceptuelles par set."""
+
+import matplotlib
+from pathlib import Path
+
+from lib.plots.set_color_swatches_perceptual import plot_set_color_swatches_perceptual
+
+
+matplotlib.use("Agg")
+
+
+def test_plot_set_color_swatches_perceptual(tmp_path: Path) -> None:
+    """Génère le graphique perceptuel."""
+    swatches_path = tmp_path / "set_color_swatches_perceptual.csv"
+    destination = tmp_path / "figures" / "step28" / "set_color_swatches_perceptual.png"
+    swatches_path.write_text(
+        "set_num,set_id,name,year,rank,color_rgb,color_name,share_non_minifig,quantity_non_minifig\n"
+        "123-1,123,Set A,2020,1,FF0000,Red,0.40000,10\n"
+        "123-1,123,Set A,2020,2,00FF00,Green,0.30000,8\n"
+        "123-1,123,Set A,2020,3,0000FF,Blue,0.20000,6\n"
+        "123-1,123,Set A,2020,4,FFFF00,Yellow,0.10000,5\n"
+        "123-1,123,Set A,2020,5,00FFFF,Cyan,0.05000,3\n"
+    )
+
+    plot_set_color_swatches_perceptual(swatches_path, destination)
+
+    assert destination.exists()
+    assert destination.stat().st_size > 0