Ajoute l’étape 28 des palettes perceptuelles

2025-12-02 15:50:41 +01:00
parent 74f8fa57e1
commit 909a1eae71
6 changed files with 383 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -280,6 +280,9 @@ Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré
 Le script lit `data/intermediate/colors_by_set.csv` (hors rechanges) et `data/intermediate/sets_enriched.csv`, sélectionne pour chaque set les 5 couleurs les plus présentes en excluant les pièces de minifigs (`quantity_non_minifig`), écrit `data/intermediate/set_color_swatches.csv`, puis trace `figures/step27/set_color_swatches.png` affichant chaque set avec ses 5 pastilles de couleurs dominantes.
-### Étape 28 : palettes perceptuelles par set (en préparation)
+### Étape 28 : palettes perceptuelles par set (hors minifigs, pièces techniques exclues)
-Objectif : produire une palette de 5 couleurs « perceptuelles » par set, moins biaisée par le volume de pièces. L’étape s’appuiera sur les mêmes filtres (couleurs 0033B2/05131D exclues, pièces techniques/structurelles ignorées), pondérera les couleurs par parts relatives hors minifigs, appliquera un tri perceptuel et une sélection diversifiée pour refléter l’esthétique plutôt que le poids en pièces. La version volumique (`figures/step27/set_color_swatches.png`) reste disponible en attendant la finalisation de cette étape.
+1. `source .venv/bin/activate`
 2. `python -m scripts.plot_set_color_swatches_perceptual`
 Le script lit `data/intermediate/colors_by_set.csv` (filtres appliqués : couleurs ignorées et pièces techniques/structurelles exclues), calcule pour chaque set les parts relatives de couleurs hors minifigs, sélectionne une palette diversifiée de 5 couleurs (priorité à la variété de teinte avant la luminosité), écrit `data/intermediate/set_color_swatches_perceptual.csv`, puis trace `figures/step28/set_color_swatches_perceptual.png` (pastilles dont la taille reflète la part relative).
--- a/lib/plots/set_color_swatches_perceptual.py
+++ b/lib/plots/set_color_swatches_perceptual.py
@@ -0,0 +1,93 @@
 """Visualisation des palettes perceptuelles (top 5) par set."""
 from collections import defaultdict
 from pathlib import Path
 from typing import Dict, List, Sequence
 import matplotlib.pyplot as plt
 from lib.filesystem import ensure_parent_dir
 from lib.rebrickable.stats import read_rows
 PLACEHOLDER_COLOR = "#e0e0e0"
 def load_swatches(path: Path) -> List[dict]:
    """Charge le CSV des palettes perceptuelles."""
    return read_rows(path)
 def group_swatches(rows: Sequence[dict], top_n: int = 5) -> List[dict]:
    """Groupe les couleurs par set et complète avec placeholders si besoin."""
    grouped: Dict[str, List[dict]] = defaultdict(list)
    meta: Dict[str, dict] = {}
    for row in rows:
        grouped[row["set_num"]].append(row)
        meta[row["set_num"]] = {"name": row["name"], "year": int(row["year"])}
    result: List[dict] = []
    for set_num, colors in grouped.items():
        sorted_colors = sorted(colors, key=lambda r: int(r["rank"]))
        while len(sorted_colors) < top_n:
            sorted_colors.append(
                {
                    "set_num": set_num,
                    "name": meta[set_num]["name"],
                    "year": str(meta[set_num]["year"]),
                    "rank": str(len(sorted_colors) + 1),
                    "color_rgb": "",
                    "color_name": "N/A",
                    "share_non_minifig": "0",
                    "quantity_non_minifig": "0",
                }
            )
        result.append(
            {
                "set_num": set_num,
                "name": meta[set_num]["name"],
                "year": meta[set_num]["year"],
                "colors": sorted_colors[:top_n],
            }
        )
    result.sort(key=lambda r: (r["year"], r["set_num"], r["name"]))
    return result
 def plot_set_color_swatches_perceptual(swatches_path: Path, destination_path: Path) -> None:
    """Trace les 5 couleurs perceptuelles par set avec taille proportionnelle à la part."""
    rows = load_swatches(swatches_path)
    if not rows:
        return
    grouped = group_swatches(rows, top_n=5)
    set_labels = [f"{item['year']} – {item['name']}" for item in grouped]
    y_positions = list(range(len(grouped)))
    height = max(4, len(grouped) * 0.4)
    fig, ax = plt.subplots(figsize=(12, height))
    for y, item in zip(y_positions, grouped):
        for idx, color in enumerate(item["colors"]):
            rgb = color["color_rgb"].strip()
            face_color = f"#{rgb}" if rgb else PLACEHOLDER_COLOR
            share = float(color.get("share_non_minifig", "0"))
            size = 450 + 900 * share
            ax.scatter(
                idx,
                y,
                s=size,
                color=face_color,
                edgecolor="#0d0d0d",
                linewidth=0.6,
                alpha=0.95,
            )
    ax.set_yticks(y_positions)
    ax.set_yticklabels(set_labels)
    ax.set_xticks([])
    ax.invert_yaxis()
    ax.set_xlim(-0.6, 4.6)
    ax.set_title("Top 5 couleurs perceptuelles par set (hors minifigs, pièces techniques exclues)")
    ax.grid(False)
    ensure_parent_dir(destination_path)
    fig.tight_layout()
    fig.savefig(destination_path, dpi=160)
    plt.close(fig)
--- a/lib/rebrickable/set_color_swatches_perceptual.py
+++ b/lib/rebrickable/set_color_swatches_perceptual.py
@@ -0,0 +1,143 @@
 """Construction de palettes perceptuelles (top 5) par set hors minifigs."""
 from pathlib import Path
 from typing import Dict, Iterable, List, Sequence, Set
 from lib.rebrickable.set_color_swatches import color_display_key, load_sets_enriched, parse_rgb_hex, hue_bucket
 from lib.rebrickable.stats import read_rows
 def load_colors_by_set(path: Path) -> List[dict]:
    """Charge colors_by_set.csv."""
    return read_rows(path)
 def compute_shares(rows: Iterable[dict]) -> Dict[str, List[dict]]:
    """Calcule les parts relatives de couleurs hors minifigs pour chaque set."""
    by_set: Dict[str, List[dict]] = {}
    totals: Dict[str, int] = {}
    for row in rows:
        quantity = int(row["quantity_non_minifig"])
        if quantity <= 0:
            continue
        set_num = row["set_num"]
        totals[set_num] = totals.get(set_num, 0) + quantity
        current = by_set.get(set_num)
        if current is None:
            by_set[set_num] = [row]
        else:
            current.append(row)
    shares: Dict[str, List[dict]] = {}
    for set_num, color_rows in by_set.items():
        total = totals.get(set_num, 0)
        if total == 0:
            continue
        shares[set_num] = []
        for row in color_rows:
            share = int(row["quantity_non_minifig"]) / total
            shares[set_num].append(
                {
                    "set_num": row["set_num"],
                    "set_id": row["set_id"],
                    "name": row.get("name", ""),
                    "year": row["year"],
                    "color_rgb": row["color_rgb"],
                    "color_name": row["color_name"],
                    "quantity_non_minifig": row["quantity_non_minifig"],
                    "share_non_minifig": f"{share:.5f}",
                }
            )
    return shares
 def select_diverse_palette(rows: List[dict], top_n: int) -> List[dict]:
    """Sélectionne une palette diversifiée : priorité à la part et à la variété de teinte."""
    sorted_by_share = sorted(rows, key=lambda r: (-float(r["share_non_minifig"]), r["color_name"]))
    selected: List[dict] = []
    buckets_used: Set[int] = set()
    for row in sorted_by_share:
        r, g, b = parse_rgb_hex(row["color_rgb"])
        h, _s, _v = __import__("colorsys").rgb_to_hsv(r, g, b)
        bucket = hue_bucket(h * 360.0)
        if bucket in buckets_used:
            continue
        selected.append(row)
        buckets_used.add(bucket)
        if len(selected) == top_n:
            break
    if len(selected) < top_n:
        for row in sorted_by_share:
            if row in selected:
                continue
            selected.append(row)
            if len(selected) == top_n:
                break
    while len(selected) < top_n:
        selected.append(
            {
                "set_num": rows[0]["set_num"] if rows else "",
                "set_id": rows[0]["set_id"] if rows else "",
                "name": rows[0]["name"] if rows else "",
                "year": rows[0]["year"] if rows else "",
                "color_rgb": "",
                "color_name": "N/A",
                "quantity_non_minifig": "0",
                "share_non_minifig": "0",
            }
        )
    ordered = sorted(selected, key=color_display_key)
    for rank, row in enumerate(ordered, start=1):
        row["rank"] = str(rank)
    return ordered[:top_n]
 def build_perceptual_swatches(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
    """Construit les palettes perceptuelles (parts relatives + diversité de teinte)."""
    shares = compute_shares(rows)
    swatches: List[dict] = []
    for set_num, color_rows in shares.items():
        set_meta = sets_lookup.get(set_num)
        if set_meta is None:
            continue
        selected = select_diverse_palette(color_rows, top_n)
        for row in selected:
            swatches.append(
                {
                    "set_num": set_num,
                    "set_id": set_meta["set_id"],
                    "name": set_meta["name"],
                    "year": str(set_meta["year"]),
                    "rank": row["rank"],
                    "color_rgb": row["color_rgb"],
                    "color_name": row["color_name"],
                    "share_non_minifig": row["share_non_minifig"],
                    "quantity_non_minifig": row["quantity_non_minifig"],
                }
            )
    swatches.sort(key=lambda r: (int(r["year"]), r["set_num"], int(r["rank"])))
    return swatches
 def write_perceptual_swatches(path: Path, rows: Sequence[dict]) -> None:
    """Écrit le CSV des palettes perceptuelles."""
    from lib.filesystem import ensure_parent_dir
    ensure_parent_dir(path)
    fieldnames = [
        "set_num",
        "set_id",
        "name",
        "year",
        "rank",
        "color_rgb",
        "color_name",
        "share_non_minifig",
        "quantity_non_minifig",
    ]
    with path.open("w", newline="") as csv_file:
        import csv
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)
--- a/scripts/plot_set_color_swatches_perceptual.py
+++ b/scripts/plot_set_color_swatches_perceptual.py
@@ -0,0 +1,30 @@
 """Trace les palettes perceptuelles (top 5) par set hors minifigs."""
 from pathlib import Path
 from lib.plots.set_color_swatches_perceptual import plot_set_color_swatches_perceptual
 from lib.rebrickable.set_color_swatches_perceptual import (
    build_perceptual_swatches,
    load_colors_by_set,
    load_sets_enriched,
    write_perceptual_swatches,
 )
 COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
 SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
 SWATCHES_PATH = Path("data/intermediate/set_color_swatches_perceptual.csv")
 DESTINATION_PATH = Path("figures/step28/set_color_swatches_perceptual.png")
 def main() -> None:
    """Construit et trace les palettes perceptuelles par set."""
    colors_rows = load_colors_by_set(COLORS_BY_SET_PATH)
    sets_lookup = load_sets_enriched(SETS_ENRICHED_PATH)
    swatches = build_perceptual_swatches(colors_rows, sets_lookup, top_n=5)
    write_perceptual_swatches(SWATCHES_PATH, swatches)
    plot_set_color_swatches_perceptual(SWATCHES_PATH, DESTINATION_PATH)
 if __name__ == "__main__":
    main()
--- a/tests/test_set_color_swatches_perceptual.py
+++ b/tests/test_set_color_swatches_perceptual.py
@@ -0,0 +1,84 @@
 """Tests des palettes perceptuelles par set."""
 from pathlib import Path
 from lib.rebrickable.set_color_swatches_perceptual import build_perceptual_swatches
 def write_csv(path: Path, content: str) -> None:
    """Écrit un CSV brut."""
    path.write_text(content)
 def test_build_perceptual_swatches_diversifies_buckets(tmp_path: Path) -> None:
    """Sélectionne des couleurs variées par teinte en priorité."""
    colors_path = tmp_path / "colors_by_set.csv"
    write_csv(
        colors_path,
        "set_num,set_id,year,color_rgb,is_translucent,color_name,quantity_total,quantity_non_spare,quantity_minifig,quantity_non_minifig\n"
        "123-1,123,2020,FF0000,false,Red,10,10,0,10\n"
        "123-1,123,2020,00FF00,false,Green,8,8,0,8\n"
        "123-1,123,2020,0000FF,false,Blue,6,6,0,6\n"
        "123-1,123,2020,FFFF00,false,Yellow,5,5,0,5\n"
        "123-1,123,2020,FF00FF,false,Magenta,4,4,0,4\n"
        "123-1,123,2020,00FFFF,false,Cyan,3,3,0,3\n",
    )
    sets_lookup = {"123-1": {"name": "Set A", "year": 2020, "set_id": "123"}}
    rows = build_perceptual_swatches(
        [
            {
                "set_num": "123-1",
                "set_id": "123",
                "year": "2020",
                "color_rgb": "FF0000",
                "color_name": "Red",
                "quantity_non_minifig": "10",
            },
            {
                "set_num": "123-1",
                "set_id": "123",
                "year": "2020",
                "color_rgb": "00FF00",
                "color_name": "Green",
                "quantity_non_minifig": "8",
            },
            {
                "set_num": "123-1",
                "set_id": "123",
                "year": "2020",
                "color_rgb": "0000FF",
                "color_name": "Blue",
                "quantity_non_minifig": "6",
            },
            {
                "set_num": "123-1",
                "set_id": "123",
                "year": "2020",
                "color_rgb": "FFFF00",
                "color_name": "Yellow",
                "quantity_non_minifig": "5",
            },
            {
                "set_num": "123-1",
                "set_id": "123",
                "year": "2020",
                "color_rgb": "FF00FF",
                "color_name": "Magenta",
                "quantity_non_minifig": "4",
            },
            {
                "set_num": "123-1",
                "set_id": "123",
                "year": "2020",
                "color_rgb": "00FFFF",
                "color_name": "Cyan",
                "quantity_non_minifig": "3",
            },
        ],
        sets_lookup,
        top_n=5,
    )
    ranks = [row["rank"] for row in rows if row["set_num"] == "123-1"]
    assert ranks == ["1", "2", "3", "4", "5"]
    assert len({row["color_name"] for row in rows}) == 5
--- a/tests/test_set_color_swatches_perceptual_plot.py
+++ b/tests/test_set_color_swatches_perceptual_plot.py
@@ -0,0 +1,28 @@
 """Tests du graphique de palettes perceptuelles par set."""
 import matplotlib
 from pathlib import Path
 from lib.plots.set_color_swatches_perceptual import plot_set_color_swatches_perceptual
 matplotlib.use("Agg")
 def test_plot_set_color_swatches_perceptual(tmp_path: Path) -> None:
    """Génère le graphique perceptuel."""
    swatches_path = tmp_path / "set_color_swatches_perceptual.csv"
    destination = tmp_path / "figures" / "step28" / "set_color_swatches_perceptual.png"
    swatches_path.write_text(
        "set_num,set_id,name,year,rank,color_rgb,color_name,share_non_minifig,quantity_non_minifig\n"
        "123-1,123,Set A,2020,1,FF0000,Red,0.40000,10\n"
        "123-1,123,Set A,2020,2,00FF00,Green,0.30000,8\n"
        "123-1,123,Set A,2020,3,0000FF,Blue,0.20000,6\n"
        "123-1,123,Set A,2020,4,FFFF00,Yellow,0.10000,5\n"
        "123-1,123,Set A,2020,5,00FFFF,Cyan,0.05000,3\n"
    )
    plot_set_color_swatches_perceptual(swatches_path, destination)
    assert destination.exists()
    assert destination.stat().st_size > 0