1

Ajoute l’étape 27 de palettes dominantes par set

This commit is contained in:
Richard Dern 2025-12-02 14:36:24 +01:00
parent 1dd713db4a
commit 7b6045941f
6 changed files with 390 additions and 0 deletions

View File

@ -269,3 +269,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de mini
Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés).
### Étape 27 : palettes dominantes par set (hors minifigs)
1. `source .venv/bin/activate`
2. `python -m scripts.plot_set_color_swatches`
Le script lit `data/intermediate/colors_by_set.csv` (hors rechanges) et `data/intermediate/sets_enriched.csv`, sélectionne pour chaque set les 5 couleurs les plus présentes en excluant les pièces de minifigs (`quantity_non_minifig`), écrit `data/intermediate/set_color_swatches.csv`, puis trace `figures/step27/set_color_swatches.png` affichant chaque set avec ses 5 pastilles de couleurs dominantes.

View File

@ -0,0 +1,89 @@
"""Palette dominante par set (hors minifigs)."""
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Sequence
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
PLACEHOLDER_COLOR = "#e0e0e0"
def load_swatches(path: Path) -> List[dict]:
"""Charge le CSV des couleurs dominantes par set."""
return read_rows(path)
def group_swatches(rows: Sequence[dict], top_n: int = 5) -> List[dict]:
"""Groupe les couleurs par set et complète avec des placeholders si besoin."""
grouped: Dict[str, List[dict]] = defaultdict(list)
meta: Dict[str, dict] = {}
for row in rows:
grouped[row["set_num"]].append(row)
meta[row["set_num"]] = {"name": row["name"], "year": int(row["year"])}
result: List[dict] = []
for set_num, colors in grouped.items():
sorted_colors = sorted(colors, key=lambda r: int(r["rank"]))
while len(sorted_colors) < top_n:
sorted_colors.append(
{
"set_num": set_num,
"name": meta[set_num]["name"],
"year": str(meta[set_num]["year"]),
"rank": str(len(sorted_colors) + 1),
"color_rgb": "",
"color_name": "N/A",
"quantity_non_minifig": "0",
}
)
result.append(
{
"set_num": set_num,
"name": meta[set_num]["name"],
"year": meta[set_num]["year"],
"colors": sorted_colors[:top_n],
}
)
result.sort(key=lambda r: (r["year"], r["name"], r["set_num"]))
return result
def plot_set_color_swatches(swatches_path: Path, destination_path: Path) -> None:
"""Trace la palette de 5 couleurs dominantes par set (hors minifigs)."""
rows = load_swatches(swatches_path)
if not rows:
return
grouped = group_swatches(rows, top_n=5)
set_labels = [f"{item['year']} {item['name']}" for item in grouped]
y_positions = list(range(len(grouped)))
height = max(4, len(grouped) * 0.4)
fig, ax = plt.subplots(figsize=(12, height))
for y, item in zip(y_positions, grouped):
for idx, color in enumerate(item["colors"]):
rgb = color["color_rgb"].strip()
face_color = f"#{rgb}" if rgb else PLACEHOLDER_COLOR
ax.scatter(
idx,
y,
s=500,
color=face_color,
edgecolor="#0d0d0d",
linewidth=0.6,
)
ax.set_yticks(y_positions)
ax.set_yticklabels(set_labels)
ax.set_xticks([])
ax.invert_yaxis()
ax.set_xlim(-0.6, 4.6)
ax.set_title("Top 5 couleurs principales par set (hors minifigs)")
ax.grid(False)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=160)
plt.close(fig)

View File

@ -0,0 +1,86 @@
"""Préparation des palettes dominantes par set (hors minifigs)."""
import csv
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Sequence
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_colors_by_set(path: Path) -> List[dict]:
"""Charge colors_by_set.csv."""
return read_rows(path)
def load_sets_enriched(path: Path) -> Dict[str, dict]:
"""Indexe nom et année par set_num."""
lookup: Dict[str, dict] = {}
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
lookup[row["set_num"]] = {"name": row["name"], "year": int(row["year"]), "set_id": row["set_id"]}
return lookup
def build_top_colors_by_set(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
"""Sélectionne les top couleurs hors minifigs pour chaque set."""
colors_by_set: Dict[str, List[dict]] = defaultdict(list)
for row in rows:
quantity = int(row["quantity_non_minifig"])
if quantity <= 0:
continue
set_num = row["set_num"]
set_meta = sets_lookup.get(set_num)
if set_meta is None:
continue
colors_by_set[set_num].append(
{
"set_num": set_num,
"set_id": row["set_id"],
"year": set_meta["year"],
"name": set_meta["name"],
"color_rgb": row["color_rgb"],
"color_name": row["color_name"],
"quantity": quantity,
}
)
results: List[dict] = []
for set_num, color_rows in colors_by_set.items():
sorted_rows = sorted(color_rows, key=lambda r: (-r["quantity"], r["color_name"]))
for rank, color_row in enumerate(sorted_rows[:top_n], start=1):
results.append(
{
"set_num": color_row["set_num"],
"set_id": color_row["set_id"],
"name": color_row["name"],
"year": str(color_row["year"]),
"rank": str(rank),
"color_rgb": color_row["color_rgb"],
"color_name": color_row["color_name"],
"quantity_non_minifig": str(color_row["quantity"]),
}
)
results.sort(key=lambda r: (int(r["year"]), r["name"], r["set_num"], int(r["rank"])))
return results
def write_top_colors(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des couleurs dominantes par set."""
ensure_parent_dir(path)
fieldnames = [
"set_num",
"set_id",
"name",
"year",
"rank",
"color_rgb",
"color_name",
"quantity_non_minifig",
]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,25 @@
"""Trace la palette dominante de chaque set (hors minifigs)."""
from pathlib import Path
from lib.plots.set_color_swatches import plot_set_color_swatches
from lib.rebrickable.set_color_swatches import build_top_colors_by_set, load_colors_by_set, load_sets_enriched, write_top_colors
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
SWATCHES_PATH = Path("data/intermediate/set_color_swatches.csv")
DESTINATION_PATH = Path("figures/step27/set_color_swatches.png")
def main() -> None:
"""Construit le CSV de top couleurs par set et trace le nuancier."""
colors_rows = load_colors_by_set(COLORS_BY_SET_PATH)
sets_lookup = load_sets_enriched(SETS_ENRICHED_PATH)
swatches = build_top_colors_by_set(colors_rows, sets_lookup, top_n=5)
write_top_colors(SWATCHES_PATH, swatches)
plot_set_color_swatches(SWATCHES_PATH, DESTINATION_PATH)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,154 @@
"""Tests de la préparation des palettes par set."""
from pathlib import Path
from lib.rebrickable.set_color_swatches import build_top_colors_by_set
def write_csv(path: Path, content: str) -> None:
"""Écrit un CSV brut."""
path.write_text(content)
def test_build_top_colors_by_set_selects_top5_non_minifig(tmp_path: Path) -> None:
"""Sélectionne les 5 couleurs dominantes en excluant les minifigs."""
colors_path = tmp_path / "colors_by_set.csv"
write_csv(
colors_path,
"set_num,set_id,year,color_rgb,is_translucent,color_name,quantity_total,quantity_non_spare,quantity_minifig,quantity_non_minifig\n"
"123-1,123,2020,111111,false,Black,10,10,0,10\n"
"123-1,123,2020,222222,false,Red,5,5,0,5\n"
"123-1,123,2020,333333,false,Blue,3,3,0,3\n"
"123-1,123,2020,444444,false,Green,2,2,0,2\n"
"123-1,123,2020,555555,false,Yellow,1,1,0,1\n"
"123-1,123,2020,666666,false,Pink,1,1,0,1\n"
"124-1,124,2021,aaaaaa,false,Gray,4,4,4,0\n",
)
sets_path = tmp_path / "sets_enriched.csv"
write_csv(
sets_path,
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
"123-1,Set A,2020,1,100,,123,,false\n"
"124-1,Set B,2021,1,50,,124,,false\n",
)
rows = build_top_colors_by_set(
[
row
for row in [
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"color_rgb": "111111",
"color_name": "Black",
"quantity_non_minifig": "10",
},
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"color_rgb": "222222",
"color_name": "Red",
"quantity_non_minifig": "5",
},
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"color_rgb": "333333",
"color_name": "Blue",
"quantity_non_minifig": "3",
},
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"color_rgb": "444444",
"color_name": "Green",
"quantity_non_minifig": "2",
},
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"color_rgb": "555555",
"color_name": "Yellow",
"quantity_non_minifig": "1",
},
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"color_rgb": "666666",
"color_name": "Pink",
"quantity_non_minifig": "1",
},
{
"set_num": "124-1",
"set_id": "124",
"year": "2021",
"color_rgb": "aaaaaa",
"color_name": "Gray",
"quantity_non_minifig": "0",
},
]
],
{
"123-1": {"name": "Set A", "year": 2020, "set_id": "123"},
"124-1": {"name": "Set B", "year": 2021, "set_id": "124"},
},
top_n=5,
)
assert rows == [
{
"set_num": "123-1",
"set_id": "123",
"name": "Set A",
"year": "2020",
"rank": "1",
"color_rgb": "111111",
"color_name": "Black",
"quantity_non_minifig": "10",
},
{
"set_num": "123-1",
"set_id": "123",
"name": "Set A",
"year": "2020",
"rank": "2",
"color_rgb": "222222",
"color_name": "Red",
"quantity_non_minifig": "5",
},
{
"set_num": "123-1",
"set_id": "123",
"name": "Set A",
"year": "2020",
"rank": "3",
"color_rgb": "333333",
"color_name": "Blue",
"quantity_non_minifig": "3",
},
{
"set_num": "123-1",
"set_id": "123",
"name": "Set A",
"year": "2020",
"rank": "4",
"color_rgb": "444444",
"color_name": "Green",
"quantity_non_minifig": "2",
},
{
"set_num": "123-1",
"set_id": "123",
"name": "Set A",
"year": "2020",
"rank": "5",
"color_rgb": "666666",
"color_name": "Pink",
"quantity_non_minifig": "1",
},
]

View File

@ -0,0 +1,29 @@
"""Tests du graphique de palettes dominantes par set."""
import matplotlib
from pathlib import Path
from lib.plots.set_color_swatches import plot_set_color_swatches
matplotlib.use("Agg")
def test_plot_set_color_swatches(tmp_path: Path) -> None:
"""Génère le nuancier top 5 par set."""
swatches_path = tmp_path / "set_color_swatches.csv"
destination = tmp_path / "figures" / "step27" / "set_color_swatches.png"
swatches_path.write_text(
"set_num,set_id,name,year,rank,color_rgb,color_name,quantity_non_minifig\n"
"123-1,123,Set A,2020,1,111111,Black,10\n"
"123-1,123,Set A,2020,2,222222,Red,5\n"
"123-1,123,Set A,2020,3,333333,Blue,3\n"
"123-1,123,Set A,2020,4,444444,Green,2\n"
"123-1,123,Set A,2020,5,555555,Yellow,1\n"
"124-1,124,Set B,2021,1,aaaaaa,Gray,4\n"
)
plot_set_color_swatches(swatches_path, destination)
assert destination.exists()
assert destination.stat().st_size > 0