Ajoute l’étape 27 de palettes dominantes par set
This commit is contained in:
parent
1dd713db4a
commit
7b6045941f
@ -269,3 +269,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de mini
|
|||||||
|
|
||||||
Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
|
Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
|
||||||
Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés).
|
Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés).
|
||||||
|
|
||||||
|
### Étape 27 : palettes dominantes par set (hors minifigs)
|
||||||
|
|
||||||
|
1. `source .venv/bin/activate`
|
||||||
|
2. `python -m scripts.plot_set_color_swatches`
|
||||||
|
|
||||||
|
Le script lit `data/intermediate/colors_by_set.csv` (hors rechanges) et `data/intermediate/sets_enriched.csv`, sélectionne pour chaque set les 5 couleurs les plus présentes en excluant les pièces de minifigs (`quantity_non_minifig`), écrit `data/intermediate/set_color_swatches.csv`, puis trace `figures/step27/set_color_swatches.png` affichant chaque set avec ses 5 pastilles de couleurs dominantes.
|
||||||
|
|||||||
89
lib/plots/set_color_swatches.py
Normal file
89
lib/plots/set_color_swatches.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
"""Palette dominante par set (hors minifigs)."""
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Sequence
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
PLACEHOLDER_COLOR = "#e0e0e0"
|
||||||
|
|
||||||
|
|
||||||
|
def load_swatches(path: Path) -> List[dict]:
|
||||||
|
"""Charge le CSV des couleurs dominantes par set."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def group_swatches(rows: Sequence[dict], top_n: int = 5) -> List[dict]:
|
||||||
|
"""Groupe les couleurs par set et complète avec des placeholders si besoin."""
|
||||||
|
grouped: Dict[str, List[dict]] = defaultdict(list)
|
||||||
|
meta: Dict[str, dict] = {}
|
||||||
|
for row in rows:
|
||||||
|
grouped[row["set_num"]].append(row)
|
||||||
|
meta[row["set_num"]] = {"name": row["name"], "year": int(row["year"])}
|
||||||
|
result: List[dict] = []
|
||||||
|
for set_num, colors in grouped.items():
|
||||||
|
sorted_colors = sorted(colors, key=lambda r: int(r["rank"]))
|
||||||
|
while len(sorted_colors) < top_n:
|
||||||
|
sorted_colors.append(
|
||||||
|
{
|
||||||
|
"set_num": set_num,
|
||||||
|
"name": meta[set_num]["name"],
|
||||||
|
"year": str(meta[set_num]["year"]),
|
||||||
|
"rank": str(len(sorted_colors) + 1),
|
||||||
|
"color_rgb": "",
|
||||||
|
"color_name": "N/A",
|
||||||
|
"quantity_non_minifig": "0",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
result.append(
|
||||||
|
{
|
||||||
|
"set_num": set_num,
|
||||||
|
"name": meta[set_num]["name"],
|
||||||
|
"year": meta[set_num]["year"],
|
||||||
|
"colors": sorted_colors[:top_n],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
result.sort(key=lambda r: (r["year"], r["name"], r["set_num"]))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def plot_set_color_swatches(swatches_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Trace la palette de 5 couleurs dominantes par set (hors minifigs)."""
|
||||||
|
rows = load_swatches(swatches_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
grouped = group_swatches(rows, top_n=5)
|
||||||
|
set_labels = [f"{item['year']} – {item['name']}" for item in grouped]
|
||||||
|
y_positions = list(range(len(grouped)))
|
||||||
|
height = max(4, len(grouped) * 0.4)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(12, height))
|
||||||
|
for y, item in zip(y_positions, grouped):
|
||||||
|
for idx, color in enumerate(item["colors"]):
|
||||||
|
rgb = color["color_rgb"].strip()
|
||||||
|
face_color = f"#{rgb}" if rgb else PLACEHOLDER_COLOR
|
||||||
|
ax.scatter(
|
||||||
|
idx,
|
||||||
|
y,
|
||||||
|
s=500,
|
||||||
|
color=face_color,
|
||||||
|
edgecolor="#0d0d0d",
|
||||||
|
linewidth=0.6,
|
||||||
|
)
|
||||||
|
ax.set_yticks(y_positions)
|
||||||
|
ax.set_yticklabels(set_labels)
|
||||||
|
ax.set_xticks([])
|
||||||
|
ax.invert_yaxis()
|
||||||
|
ax.set_xlim(-0.6, 4.6)
|
||||||
|
ax.set_title("Top 5 couleurs principales par set (hors minifigs)")
|
||||||
|
ax.grid(False)
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
86
lib/rebrickable/set_color_swatches.py
Normal file
86
lib/rebrickable/set_color_swatches.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
"""Préparation des palettes dominantes par set (hors minifigs)."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, List, Sequence
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
def load_colors_by_set(path: Path) -> List[dict]:
|
||||||
|
"""Charge colors_by_set.csv."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_sets_enriched(path: Path) -> Dict[str, dict]:
|
||||||
|
"""Indexe nom et année par set_num."""
|
||||||
|
lookup: Dict[str, dict] = {}
|
||||||
|
with path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
lookup[row["set_num"]] = {"name": row["name"], "year": int(row["year"]), "set_id": row["set_id"]}
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
def build_top_colors_by_set(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
|
||||||
|
"""Sélectionne les top couleurs hors minifigs pour chaque set."""
|
||||||
|
colors_by_set: Dict[str, List[dict]] = defaultdict(list)
|
||||||
|
for row in rows:
|
||||||
|
quantity = int(row["quantity_non_minifig"])
|
||||||
|
if quantity <= 0:
|
||||||
|
continue
|
||||||
|
set_num = row["set_num"]
|
||||||
|
set_meta = sets_lookup.get(set_num)
|
||||||
|
if set_meta is None:
|
||||||
|
continue
|
||||||
|
colors_by_set[set_num].append(
|
||||||
|
{
|
||||||
|
"set_num": set_num,
|
||||||
|
"set_id": row["set_id"],
|
||||||
|
"year": set_meta["year"],
|
||||||
|
"name": set_meta["name"],
|
||||||
|
"color_rgb": row["color_rgb"],
|
||||||
|
"color_name": row["color_name"],
|
||||||
|
"quantity": quantity,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
results: List[dict] = []
|
||||||
|
for set_num, color_rows in colors_by_set.items():
|
||||||
|
sorted_rows = sorted(color_rows, key=lambda r: (-r["quantity"], r["color_name"]))
|
||||||
|
for rank, color_row in enumerate(sorted_rows[:top_n], start=1):
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"set_num": color_row["set_num"],
|
||||||
|
"set_id": color_row["set_id"],
|
||||||
|
"name": color_row["name"],
|
||||||
|
"year": str(color_row["year"]),
|
||||||
|
"rank": str(rank),
|
||||||
|
"color_rgb": color_row["color_rgb"],
|
||||||
|
"color_name": color_row["color_name"],
|
||||||
|
"quantity_non_minifig": str(color_row["quantity"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
results.sort(key=lambda r: (int(r["year"]), r["name"], r["set_num"], int(r["rank"])))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def write_top_colors(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV des couleurs dominantes par set."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = [
|
||||||
|
"set_num",
|
||||||
|
"set_id",
|
||||||
|
"name",
|
||||||
|
"year",
|
||||||
|
"rank",
|
||||||
|
"color_rgb",
|
||||||
|
"color_name",
|
||||||
|
"quantity_non_minifig",
|
||||||
|
]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
25
scripts/plot_set_color_swatches.py
Normal file
25
scripts/plot_set_color_swatches.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
"""Trace la palette dominante de chaque set (hors minifigs)."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.set_color_swatches import plot_set_color_swatches
|
||||||
|
from lib.rebrickable.set_color_swatches import build_top_colors_by_set, load_colors_by_set, load_sets_enriched, write_top_colors
|
||||||
|
|
||||||
|
|
||||||
|
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
|
||||||
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
SWATCHES_PATH = Path("data/intermediate/set_color_swatches.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step27/set_color_swatches.png")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le CSV de top couleurs par set et trace le nuancier."""
|
||||||
|
colors_rows = load_colors_by_set(COLORS_BY_SET_PATH)
|
||||||
|
sets_lookup = load_sets_enriched(SETS_ENRICHED_PATH)
|
||||||
|
swatches = build_top_colors_by_set(colors_rows, sets_lookup, top_n=5)
|
||||||
|
write_top_colors(SWATCHES_PATH, swatches)
|
||||||
|
plot_set_color_swatches(SWATCHES_PATH, DESTINATION_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
154
tests/test_set_color_swatches.py
Normal file
154
tests/test_set_color_swatches.py
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
"""Tests de la préparation des palettes par set."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.set_color_swatches import build_top_colors_by_set
|
||||||
|
|
||||||
|
|
||||||
|
def write_csv(path: Path, content: str) -> None:
|
||||||
|
"""Écrit un CSV brut."""
|
||||||
|
path.write_text(content)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_top_colors_by_set_selects_top5_non_minifig(tmp_path: Path) -> None:
|
||||||
|
"""Sélectionne les 5 couleurs dominantes en excluant les minifigs."""
|
||||||
|
colors_path = tmp_path / "colors_by_set.csv"
|
||||||
|
write_csv(
|
||||||
|
colors_path,
|
||||||
|
"set_num,set_id,year,color_rgb,is_translucent,color_name,quantity_total,quantity_non_spare,quantity_minifig,quantity_non_minifig\n"
|
||||||
|
"123-1,123,2020,111111,false,Black,10,10,0,10\n"
|
||||||
|
"123-1,123,2020,222222,false,Red,5,5,0,5\n"
|
||||||
|
"123-1,123,2020,333333,false,Blue,3,3,0,3\n"
|
||||||
|
"123-1,123,2020,444444,false,Green,2,2,0,2\n"
|
||||||
|
"123-1,123,2020,555555,false,Yellow,1,1,0,1\n"
|
||||||
|
"123-1,123,2020,666666,false,Pink,1,1,0,1\n"
|
||||||
|
"124-1,124,2021,aaaaaa,false,Gray,4,4,4,0\n",
|
||||||
|
)
|
||||||
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
write_csv(
|
||||||
|
sets_path,
|
||||||
|
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
|
||||||
|
"123-1,Set A,2020,1,100,,123,,false\n"
|
||||||
|
"124-1,Set B,2021,1,50,,124,,false\n",
|
||||||
|
)
|
||||||
|
rows = build_top_colors_by_set(
|
||||||
|
[
|
||||||
|
row
|
||||||
|
for row in [
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "111111",
|
||||||
|
"color_name": "Black",
|
||||||
|
"quantity_non_minifig": "10",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "222222",
|
||||||
|
"color_name": "Red",
|
||||||
|
"quantity_non_minifig": "5",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "333333",
|
||||||
|
"color_name": "Blue",
|
||||||
|
"quantity_non_minifig": "3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "444444",
|
||||||
|
"color_name": "Green",
|
||||||
|
"quantity_non_minifig": "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "555555",
|
||||||
|
"color_name": "Yellow",
|
||||||
|
"quantity_non_minifig": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "666666",
|
||||||
|
"color_name": "Pink",
|
||||||
|
"quantity_non_minifig": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "124-1",
|
||||||
|
"set_id": "124",
|
||||||
|
"year": "2021",
|
||||||
|
"color_rgb": "aaaaaa",
|
||||||
|
"color_name": "Gray",
|
||||||
|
"quantity_non_minifig": "0",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
],
|
||||||
|
{
|
||||||
|
"123-1": {"name": "Set A", "year": 2020, "set_id": "123"},
|
||||||
|
"124-1": {"name": "Set B", "year": 2021, "set_id": "124"},
|
||||||
|
},
|
||||||
|
top_n=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rows == [
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"rank": "1",
|
||||||
|
"color_rgb": "111111",
|
||||||
|
"color_name": "Black",
|
||||||
|
"quantity_non_minifig": "10",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"rank": "2",
|
||||||
|
"color_rgb": "222222",
|
||||||
|
"color_name": "Red",
|
||||||
|
"quantity_non_minifig": "5",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"rank": "3",
|
||||||
|
"color_rgb": "333333",
|
||||||
|
"color_name": "Blue",
|
||||||
|
"quantity_non_minifig": "3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"rank": "4",
|
||||||
|
"color_rgb": "444444",
|
||||||
|
"color_name": "Green",
|
||||||
|
"quantity_non_minifig": "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"rank": "5",
|
||||||
|
"color_rgb": "666666",
|
||||||
|
"color_name": "Pink",
|
||||||
|
"quantity_non_minifig": "1",
|
||||||
|
},
|
||||||
|
]
|
||||||
29
tests/test_set_color_swatches_plot.py
Normal file
29
tests/test_set_color_swatches_plot.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
"""Tests du graphique de palettes dominantes par set."""
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.set_color_swatches import plot_set_color_swatches
|
||||||
|
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_set_color_swatches(tmp_path: Path) -> None:
|
||||||
|
"""Génère le nuancier top 5 par set."""
|
||||||
|
swatches_path = tmp_path / "set_color_swatches.csv"
|
||||||
|
destination = tmp_path / "figures" / "step27" / "set_color_swatches.png"
|
||||||
|
swatches_path.write_text(
|
||||||
|
"set_num,set_id,name,year,rank,color_rgb,color_name,quantity_non_minifig\n"
|
||||||
|
"123-1,123,Set A,2020,1,111111,Black,10\n"
|
||||||
|
"123-1,123,Set A,2020,2,222222,Red,5\n"
|
||||||
|
"123-1,123,Set A,2020,3,333333,Blue,3\n"
|
||||||
|
"123-1,123,Set A,2020,4,444444,Green,2\n"
|
||||||
|
"123-1,123,Set A,2020,5,555555,Yellow,1\n"
|
||||||
|
"124-1,124,Set B,2021,1,aaaaaa,Gray,4\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_set_color_swatches(swatches_path, destination)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
Loading…
x
Reference in New Issue
Block a user