From d067e2075f2dd22bb2e3a89f9bca3e1abdb6498d Mon Sep 17 00:00:00 2001
From: Richard Dern <gitea.local.richard@dern.ovh>
Date: Tue, 2 Dec 2025 16:59:59 +0100
Subject: [PATCH] Ajoute la richesse chromatique par set

---
 README.md                         |  13 ++
 lib/plots/color_richness.py       | 130 ++++++++++++++++++++
 lib/rebrickable/color_richness.py | 150 +++++++++++++++++++++++
 scripts/compute_color_richness.py |  28 +++++
 scripts/plot_color_richness.py    |  26 ++++
 tests/test_color_richness.py      | 196 ++++++++++++++++++++++++++++++
 tests/test_color_richness_plot.py |  38 ++++++
 tests/test_rare_parts.py          |  22 ++--
 8 files changed, 592 insertions(+), 11 deletions(-)
 create mode 100644 lib/plots/color_richness.py
 create mode 100644 lib/rebrickable/color_richness.py
 create mode 100644 scripts/compute_color_richness.py
 create mode 100644 scripts/plot_color_richness.py
 create mode 100644 tests/test_color_richness.py
 create mode 100644 tests/test_color_richness_plot.py

diff --git a/README.md b/README.md
index c4182fa..2b1fe07 100644
--- a/README.md
+++ b/README.md
@@ -285,3 +285,16 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/intermediate/sets_en
 - `data/intermediate/rare_parts_by_set.csv` : agrégat par set (comptes distincts, quantités, focus minifigs).
 
 Le tracé `figures/step27/rare_parts_per_set.png` met en scène le top des sets contenant le plus de variantes exclusives, en distinguant les pièces de minifigs et l’état de possession.
+
+### Étape 28 : richesse chromatique par set
+
+1. `source .venv/bin/activate`
+2. `python -m scripts.compute_color_richness`
+3. `python -m scripts.plot_color_richness`
+
+Le calcul lit `data/intermediate/colors_by_set.csv` et `data/intermediate/sets_enriched.csv` pour mesurer la diversité des palettes (nombre de couleurs distinctes hors rechanges, part des 3 couleurs principales, part de couleurs de minifigs). Il produit :
+
+- `data/intermediate/color_richness_by_set.csv` : métriques détaillées par set (comptes et parts principales, possession).
+- `data/intermediate/color_richness_by_year.csv` : agrégat annuel (moyenne, médiane, bornes de diversité et concentration).
+
+Les graphiques `figures/step28/color_richness_boxplot.png`, `figures/step28/color_richness_top_sets.png` et `figures/step28/color_concentration_scatter.png` montrent respectivement la répartition annuelle, le top des sets les plus colorés et la concentration des palettes (part des 3 couleurs dominantes vs nombre de couleurs).
diff --git a/lib/plots/color_richness.py b/lib/plots/color_richness.py
new file mode 100644
index 0000000..6af58c4
--- /dev/null
+++ b/lib/plots/color_richness.py
@@ -0,0 +1,130 @@
+"""Visualisations de la richesse chromatique par set."""
+
+from pathlib import Path
+from typing import Iterable, List, Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.patches import Patch
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+def load_richness_rows(path: Path) -> List[dict]:
+    """Charge les métriques de richesse chromatique."""
+    return read_rows(path)
+
+
+def build_boxplot_data(rows: Iterable[dict]) -> Tuple[List[List[int]], List[str]]:
+    """Prépare les valeurs de boxplot par année."""
+    grouped: dict[str, List[int]] = {}
+    for row in rows:
+        year_rows = grouped.get(row["year"])
+        if year_rows is None:
+            year_rows = []
+            grouped[row["year"]] = year_rows
+        year_rows.append(int(row["colors_distinct"]))
+    years = sorted(grouped.keys(), key=int)
+    data = [grouped[year] for year in years]
+    return data, years
+
+
+def plot_richness_boxplot(richness_path: Path, destination_path: Path) -> None:
+    """Trace le boxplot du nombre de couleurs distinctes par set et par année."""
+    rows = load_richness_rows(richness_path)
+    if not rows:
+        return
+    data, years = build_boxplot_data(rows)
+    fig, ax = plt.subplots(figsize=(12, 7))
+    box = ax.boxplot(
+        data,
+        orientation="vertical",
+        patch_artist=True,
+        tick_labels=years,
+        boxprops=dict(facecolor="#1f77b4", alpha=0.3),
+        medianprops=dict(color="#0d0d0d", linewidth=1.5),
+        whiskerprops=dict(color="#555555", linestyle="--"),
+        capprops=dict(color="#555555"),
+    )
+    for patch in box["boxes"]:
+        patch.set_edgecolor("#1f77b4")
+    ax.set_xlabel("Année")
+    ax.set_ylabel("Nombre de couleurs distinctes (hors rechanges)")
+    ax.set_title("Richesse chromatique par set (répartition annuelle)")
+    ax.grid(axis="y", linestyle="--", alpha=0.3)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=170)
+    plt.close(fig)
+
+
+def select_top_sets(rows: Iterable[dict], limit: int = 15) -> List[dict]:
+    """Retient les sets les plus colorés et les plus concentrés."""
+    sorted_rows = sorted(
+        rows,
+        key=lambda row: (-int(row["colors_distinct"]), float(row["top3_share"]), row["set_num"]),
+    )
+    return sorted_rows[:limit]
+
+
+def plot_richness_top_sets(richness_path: Path, destination_path: Path) -> None:
+    """Trace le top des sets les plus riches en couleurs."""
+    rows = load_richness_rows(richness_path)
+    if not rows:
+        return
+    top_rows = select_top_sets(rows)
+    y_positions = np.arange(len(top_rows))
+    counts = [int(row["colors_distinct"]) for row in top_rows]
+    labels = [f"{row['set_num']} · {row['name']} ({row['year']})" for row in top_rows]
+    owned_mask = [row["in_collection"] == "true" for row in top_rows]
+
+    fig, ax = plt.subplots(figsize=(11, 8))
+    for y, value, owned in zip(y_positions, counts, owned_mask):
+        alpha = 0.92 if owned else 0.45
+        ax.barh(y, value, color="#2ca02c", alpha=alpha)
+    ax.set_yticks(y_positions)
+    ax.set_yticklabels(labels)
+    ax.invert_yaxis()
+    ax.set_xlabel("Couleurs distinctes (hors rechanges)")
+    ax.set_title("Top des sets les plus colorés")
+    ax.grid(axis="x", linestyle="--", alpha=0.3)
+    legend = [
+        Patch(facecolor="#2ca02c", edgecolor="none", alpha=0.92, label="Set possédé"),
+        Patch(facecolor="#2ca02c", edgecolor="none", alpha=0.45, label="Set manquant"),
+    ]
+    ax.legend(handles=legend, loc="lower right", frameon=False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=170)
+    plt.close(fig)
+
+
+def plot_concentration_scatter(richness_path: Path, destination_path: Path) -> None:
+    """Visualise la concentration de palette vs nombre de couleurs."""
+    rows = load_richness_rows(richness_path)
+    if not rows:
+        return
+    x_values = [int(row["colors_distinct"]) for row in rows]
+    y_values = [float(row["top3_share"]) for row in rows]
+    owned_mask = [row["in_collection"] == "true" for row in rows]
+    colors = ["#1f77b4" if owned else "#bbbbbb" for owned in owned_mask]
+
+    fig, ax = plt.subplots(figsize=(10, 7))
+    ax.scatter(x_values, y_values, c=colors, alpha=0.7, s=32)
+    ax.set_xlabel("Nombre de couleurs distinctes (hors rechanges)")
+    ax.set_ylabel("Part des 3 couleurs principales")
+    ax.set_title("Concentration des palettes")
+    ax.grid(True, linestyle="--", alpha=0.3)
+    legend = [
+        Patch(facecolor="#1f77b4", edgecolor="none", alpha=0.7, label="Set possédé"),
+        Patch(facecolor="#bbbbbb", edgecolor="none", alpha=0.7, label="Set manquant"),
+    ]
+    ax.legend(handles=legend, loc="upper right", frameon=False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=170)
+    plt.close(fig)
diff --git a/lib/rebrickable/color_richness.py b/lib/rebrickable/color_richness.py
new file mode 100644
index 0000000..89d94b9
--- /dev/null
+++ b/lib/rebrickable/color_richness.py
@@ -0,0 +1,150 @@
+"""Métriques de richesse chromatique par set."""
+
+import csv
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import compute_median, read_rows
+
+
+def load_colors_by_set(path: Path) -> List[dict]:
+    """Charge colors_by_set.csv en mémoire."""
+    return read_rows(path)
+
+
+def load_sets(path: Path) -> Dict[str, dict]:
+    """Indexe les sets enrichis par set_num."""
+    sets: Dict[str, dict] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            sets[row["set_num"]] = row
+    return sets
+
+
+def group_by_set(rows: Iterable[dict]) -> Dict[str, List[dict]]:
+    """Regroupe les couleurs par set."""
+    grouped: Dict[str, List[dict]] = {}
+    for row in rows:
+        set_rows = grouped.get(row["set_num"])
+        if set_rows is None:
+            set_rows = []
+            grouped[row["set_num"]] = set_rows
+        set_rows.append(row)
+    return grouped
+
+
+def build_richness_by_set(
+    colors_by_set_path: Path,
+    sets_enriched_path: Path,
+) -> List[dict]:
+    """Construit les métriques de richesse chromatique par set."""
+    colors = load_colors_by_set(colors_by_set_path)
+    sets_lookup = load_sets(sets_enriched_path)
+    grouped = group_by_set(colors)
+    richness: List[dict] = []
+    for set_num, set_rows in grouped.items():
+        total_non_spare = sum(int(row["quantity_non_spare"]) for row in set_rows)
+        colors_distinct = len(set_rows)
+        colors_minifig = sum(1 for row in set_rows if int(row["quantity_minifig"]) > 0)
+        colors_non_minifig = sum(1 for row in set_rows if int(row["quantity_non_minifig"]) > 0)
+        sorted_by_quantity = sorted(set_rows, key=lambda row: int(row["quantity_non_spare"]), reverse=True)
+        top_color = sorted_by_quantity[0]
+        top3_total = sum(int(row["quantity_non_spare"]) for row in sorted_by_quantity[:3])
+        top_share = int(top_color["quantity_non_spare"]) / total_non_spare
+        top3_share = top3_total / total_non_spare
+        set_row = sets_lookup[set_num]
+        richness.append(
+            {
+                "set_num": set_num,
+                "set_id": set_row["set_id"],
+                "name": set_row["name"],
+                "year": set_row["year"],
+                "in_collection": set_row["in_collection"],
+                "colors_distinct": str(colors_distinct),
+                "colors_minifig": str(colors_minifig),
+                "colors_non_minifig": str(colors_non_minifig),
+                "total_parts_non_spare": str(total_non_spare),
+                "top_color_name": top_color["color_name"],
+                "top_color_share": f"{top_share:.4f}",
+                "top3_share": f"{top3_share:.4f}",
+            }
+        )
+    richness.sort(key=lambda row: (-int(row["colors_distinct"]), row["set_num"]))
+    return richness
+
+
+def build_richness_by_year(richness_rows: Iterable[dict]) -> List[dict]:
+    """Agrège les métriques de richesse par année."""
+    grouped: Dict[str, List[dict]] = {}
+    for row in richness_rows:
+        year_rows = grouped.get(row["year"])
+        if year_rows is None:
+            year_rows = []
+            grouped[row["year"]] = year_rows
+        year_rows.append(row)
+    yearly: List[dict] = []
+    for year, rows in grouped.items():
+        distinct_counts = [int(row["colors_distinct"]) for row in rows]
+        top3_shares = [float(row["top3_share"]) for row in rows]
+        average_distinct = sum(distinct_counts) / len(distinct_counts)
+        median_distinct = compute_median(distinct_counts)
+        average_top3 = sum(top3_shares) / len(top3_shares)
+        median_top3 = compute_median([int(share * 10000) for share in top3_shares]) / 10000
+        yearly.append(
+            {
+                "year": year,
+                "average_colors_distinct": f"{average_distinct:.2f}",
+                "median_colors_distinct": f"{median_distinct:.2f}",
+                "max_colors_distinct": str(max(distinct_counts)),
+                "min_colors_distinct": str(min(distinct_counts)),
+                "average_top3_share": f"{average_top3:.4f}",
+                "median_top3_share": f"{median_top3:.4f}",
+            }
+        )
+    yearly.sort(key=lambda row: int(row["year"]))
+    return yearly
+
+
+def write_richness_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
+    """Écrit le CSV des métriques par set."""
+    ensure_parent_dir(destination_path)
+    fieldnames = [
+        "set_num",
+        "set_id",
+        "name",
+        "year",
+        "in_collection",
+        "colors_distinct",
+        "colors_minifig",
+        "colors_non_minifig",
+        "total_parts_non_spare",
+        "top_color_name",
+        "top_color_share",
+        "top3_share",
+    ]
+    with destination_path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
+
+
+def write_richness_by_year(destination_path: Path, rows: Sequence[dict]) -> None:
+    """Écrit le CSV agrégé par année."""
+    ensure_parent_dir(destination_path)
+    fieldnames = [
+        "year",
+        "average_colors_distinct",
+        "median_colors_distinct",
+        "max_colors_distinct",
+        "min_colors_distinct",
+        "average_top3_share",
+        "median_top3_share",
+    ]
+    with destination_path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
diff --git a/scripts/compute_color_richness.py b/scripts/compute_color_richness.py
new file mode 100644
index 0000000..11c085c
--- /dev/null
+++ b/scripts/compute_color_richness.py
@@ -0,0 +1,28 @@
+"""Calcule la richesse chromatique par set et par année."""
+
+from pathlib import Path
+
+from lib.rebrickable.color_richness import (
+    build_richness_by_set,
+    build_richness_by_year,
+    write_richness_by_set,
+    write_richness_by_year,
+)
+
+
+COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
+SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
+RICHNESS_BY_SET_PATH = Path("data/intermediate/color_richness_by_set.csv")
+RICHNESS_BY_YEAR_PATH = Path("data/intermediate/color_richness_by_year.csv")
+
+
+def main() -> None:
+    """Construit les CSV de richesse chromatique."""
+    richness_by_set = build_richness_by_set(COLORS_BY_SET_PATH, SETS_ENRICHED_PATH)
+    richness_by_year = build_richness_by_year(richness_by_set)
+    write_richness_by_set(RICHNESS_BY_SET_PATH, richness_by_set)
+    write_richness_by_year(RICHNESS_BY_YEAR_PATH, richness_by_year)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/plot_color_richness.py b/scripts/plot_color_richness.py
new file mode 100644
index 0000000..2c56e6f
--- /dev/null
+++ b/scripts/plot_color_richness.py
@@ -0,0 +1,26 @@
+"""Trace les graphiques de richesse chromatique par set."""
+
+from pathlib import Path
+
+from lib.plots.color_richness import (
+    plot_concentration_scatter,
+    plot_richness_boxplot,
+    plot_richness_top_sets,
+)
+
+
+RICHNESS_PATH = Path("data/intermediate/color_richness_by_set.csv")
+BOXPLOT_DESTINATION = Path("figures/step28/color_richness_boxplot.png")
+TOP_DESTINATION = Path("figures/step28/color_richness_top_sets.png")
+CONCENTRATION_DESTINATION = Path("figures/step28/color_concentration_scatter.png")
+
+
+def main() -> None:
+    """Génère les visuels de richesse chromatique."""
+    plot_richness_boxplot(RICHNESS_PATH, BOXPLOT_DESTINATION)
+    plot_richness_top_sets(RICHNESS_PATH, TOP_DESTINATION)
+    plot_concentration_scatter(RICHNESS_PATH, CONCENTRATION_DESTINATION)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_color_richness.py b/tests/test_color_richness.py
new file mode 100644
index 0000000..8daeb09
--- /dev/null
+++ b/tests/test_color_richness.py
@@ -0,0 +1,196 @@
+"""Tests des métriques de richesse chromatique."""
+
+import csv
+from pathlib import Path
+
+from lib.rebrickable.color_richness import (
+    build_richness_by_set,
+    build_richness_by_year,
+    write_richness_by_set,
+    write_richness_by_year,
+)
+
+
+def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
+    """Écrit un CSV simple pour les besoins de tests."""
+    with path.open("w", newline="") as csv_file:
+        writer = csv.writer(csv_file)
+        writer.writerow(headers)
+        writer.writerows(rows)
+
+
+def test_build_richness_by_set_computes_shares_and_counts(tmp_path: Path) -> None:
+    """Calcule les partages de couleurs principales et les dénombrements."""
+    colors_by_set = tmp_path / "colors_by_set.csv"
+    write_csv(
+        colors_by_set,
+        [
+            "set_num",
+            "set_id",
+            "year",
+            "color_rgb",
+            "is_translucent",
+            "color_name",
+            "quantity_total",
+            "quantity_non_spare",
+            "quantity_minifig",
+            "quantity_non_minifig",
+        ],
+        [
+            ["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "10", "10", "0", "10"],
+            ["1000-1", "1000", "2020", "BBBBBB", "false", "Blue", "5", "5", "5", "0"],
+            ["2000-1", "2000", "2021", "CCCCCC", "true", "Trans", "3", "3", "0", "3"],
+        ],
+    )
+    sets_enriched = tmp_path / "sets_enriched.csv"
+    write_csv(
+        sets_enriched,
+        ["set_num", "set_id", "name", "year", "in_collection"],
+        [
+            ["1000-1", "1000", "Set A", "2020", "true"],
+            ["2000-1", "2000", "Set B", "2021", "false"],
+        ],
+    )
+
+    richness = build_richness_by_set(colors_by_set, sets_enriched)
+
+    assert richness == [
+        {
+            "set_num": "1000-1",
+            "set_id": "1000",
+            "name": "Set A",
+            "year": "2020",
+            "in_collection": "true",
+            "colors_distinct": "2",
+            "colors_minifig": "1",
+            "colors_non_minifig": "1",
+            "total_parts_non_spare": "15",
+            "top_color_name": "Gray",
+            "top_color_share": "0.6667",
+            "top3_share": "1.0000",
+        },
+        {
+            "set_num": "2000-1",
+            "set_id": "2000",
+            "name": "Set B",
+            "year": "2021",
+            "in_collection": "false",
+            "colors_distinct": "1",
+            "colors_minifig": "0",
+            "colors_non_minifig": "1",
+            "total_parts_non_spare": "3",
+            "top_color_name": "Trans",
+            "top_color_share": "1.0000",
+            "top3_share": "1.0000",
+        },
+    ]
+
+
+def test_build_richness_by_year_aggregates_metrics(tmp_path: Path) -> None:
+    """Agrège les métriques par année."""
+    richness_rows = [
+        {
+            "set_num": "s1",
+            "set_id": "1",
+            "name": "A",
+            "year": "2020",
+            "in_collection": "true",
+            "colors_distinct": "4",
+            "colors_minifig": "1",
+            "colors_non_minifig": "3",
+            "total_parts_non_spare": "10",
+            "top_color_name": "Red",
+            "top_color_share": "0.5000",
+            "top3_share": "0.9000",
+        },
+        {
+            "set_num": "s2",
+            "set_id": "2",
+            "name": "B",
+            "year": "2020",
+            "in_collection": "false",
+            "colors_distinct": "2",
+            "colors_minifig": "0",
+            "colors_non_minifig": "2",
+            "total_parts_non_spare": "5",
+            "top_color_name": "Blue",
+            "top_color_share": "0.6000",
+            "top3_share": "1.0000",
+        },
+        {
+            "set_num": "s3",
+            "set_id": "3",
+            "name": "C",
+            "year": "2021",
+            "in_collection": "true",
+            "colors_distinct": "3",
+            "colors_minifig": "1",
+            "colors_non_minifig": "3",
+            "total_parts_non_spare": "7",
+            "top_color_name": "Green",
+            "top_color_share": "0.5714",
+            "top3_share": "1.0000",
+        },
+    ]
+
+    yearly = build_richness_by_year(richness_rows)
+
+    assert yearly == [
+        {
+            "year": "2020",
+            "average_colors_distinct": "3.00",
+            "median_colors_distinct": "3.00",
+            "max_colors_distinct": "4",
+            "min_colors_distinct": "2",
+            "average_top3_share": "0.9500",
+            "median_top3_share": "0.9500",
+        },
+        {
+            "year": "2021",
+            "average_colors_distinct": "3.00",
+            "median_colors_distinct": "3.00",
+            "max_colors_distinct": "3",
+            "min_colors_distinct": "3",
+            "average_top3_share": "1.0000",
+            "median_top3_share": "1.0000",
+        },
+    ]
+
+
+def test_write_richness_outputs_csv(tmp_path: Path) -> None:
+    """Sérialise les métriques par set et par année."""
+    by_set_path = tmp_path / "color_richness_by_set.csv"
+    by_year_path = tmp_path / "color_richness_by_year.csv"
+    sample_set_rows = [
+        {
+            "set_num": "s1",
+            "set_id": "1",
+            "name": "A",
+            "year": "2020",
+            "in_collection": "true",
+            "colors_distinct": "1",
+            "colors_minifig": "1",
+            "colors_non_minifig": "1",
+            "total_parts_non_spare": "5",
+            "top_color_name": "Red",
+            "top_color_share": "1.0000",
+            "top3_share": "1.0000",
+        }
+    ]
+    sample_year_rows = [
+        {
+            "year": "2020",
+            "average_colors_distinct": "1.00",
+            "median_colors_distinct": "1.00",
+            "max_colors_distinct": "1",
+            "min_colors_distinct": "1",
+            "average_top3_share": "1.0000",
+            "median_top3_share": "1.0000",
+        }
+    ]
+
+    write_richness_by_set(by_set_path, sample_set_rows)
+    write_richness_by_year(by_year_path, sample_year_rows)
+
+    assert by_set_path.exists()
+    assert by_year_path.exists()
diff --git a/tests/test_color_richness_plot.py b/tests/test_color_richness_plot.py
new file mode 100644
index 0000000..eccabfb
--- /dev/null
+++ b/tests/test_color_richness_plot.py
@@ -0,0 +1,38 @@
+"""Tests des visuels de richesse chromatique."""
+
+import matplotlib
+from pathlib import Path
+
+from lib.plots.color_richness import (
+    plot_concentration_scatter,
+    plot_richness_boxplot,
+    plot_richness_top_sets,
+)
+
+
+matplotlib.use("Agg")
+
+
+def test_plot_richness_outputs_images(tmp_path: Path) -> None:
+    """Génère les trois graphiques principaux."""
+    richness_path = tmp_path / "color_richness_by_set.csv"
+    richness_path.write_text(
+        "set_num,set_id,name,year,in_collection,colors_distinct,colors_minifig,colors_non_minifig,total_parts_non_spare,top_color_name,top_color_share,top3_share\n"
+        "1000-1,1000,Set A,2020,true,6,2,5,50,Red,0.4000,0.6500\n"
+        "2000-1,2000,Set B,2021,false,4,1,3,30,Blue,0.5000,0.7500\n"
+        "3000-1,3000,Set C,2021,true,5,1,4,40,Green,0.3000,0.5500\n"
+    )
+    boxplot_dest = tmp_path / "figures" / "step28" / "color_richness_boxplot.png"
+    top_dest = tmp_path / "figures" / "step28" / "color_richness_top_sets.png"
+    scatter_dest = tmp_path / "figures" / "step28" / "color_concentration_scatter.png"
+
+    plot_richness_boxplot(richness_path, boxplot_dest)
+    plot_richness_top_sets(richness_path, top_dest)
+    plot_concentration_scatter(richness_path, scatter_dest)
+
+    assert boxplot_dest.exists()
+    assert top_dest.exists()
+    assert scatter_dest.exists()
+    assert boxplot_dest.stat().st_size > 0
+    assert top_dest.stat().st_size > 0
+    assert scatter_dest.stat().st_size > 0
diff --git a/tests/test_rare_parts.py b/tests/test_rare_parts.py
index 9a038be..67ed813 100644
--- a/tests/test_rare_parts.py
+++ b/tests/test_rare_parts.py
@@ -102,17 +102,6 @@ def test_build_rare_parts_detects_exclusive_variations(tmp_path: Path) -> None:
         },
     ]
     assert rare_by_set == [
-        {
-            "set_num": "1000-1",
-            "set_id": "1000",
-            "name": "Set A",
-            "year": "2020",
-            "in_collection": "true",
-            "rare_parts_distinct": "1",
-            "rare_parts_quantity": "1",
-            "rare_minifig_parts_distinct": "1",
-            "rare_minifig_quantity": "1",
-        },
         {
             "set_num": "2000-1",
             "set_id": "2000",
@@ -124,6 +113,17 @@ def test_build_rare_parts_detects_exclusive_variations(tmp_path: Path) -> None:
             "rare_minifig_parts_distinct": "0",
             "rare_minifig_quantity": "0",
         },
+        {
+            "set_num": "1000-1",
+            "set_id": "1000",
+            "name": "Set A",
+            "year": "2020",
+            "in_collection": "true",
+            "rare_parts_distinct": "1",
+            "rare_parts_quantity": "1",
+            "rare_minifig_parts_distinct": "1",
+            "rare_minifig_quantity": "1",
+        },
     ]