Ajoute l’étape 27 de palettes dominantes par set

2025-12-02 14:36:24 +01:00
parent fbf20e2592
commit a649283cf2
7 changed files with 390 additions and 0 deletions
--- a/lib/rebrickable/set_color_swatches.py
+++ b/lib/rebrickable/set_color_swatches.py
@@ -0,0 +1,86 @@
+"""Préparation des palettes dominantes par set (hors minifigs)."""
+
+import csv
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+def load_colors_by_set(path: Path) -> List[dict]:
+    """Charge colors_by_set.csv."""
+    return read_rows(path)
+
+
+def load_sets_enriched(path: Path) -> Dict[str, dict]:
+    """Indexe nom et année par set_num."""
+    lookup: Dict[str, dict] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            lookup[row["set_num"]] = {"name": row["name"], "year": int(row["year"]), "set_id": row["set_id"]}
+    return lookup
+
+
+def build_top_colors_by_set(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
+    """Sélectionne les top couleurs hors minifigs pour chaque set."""
+    colors_by_set: Dict[str, List[dict]] = defaultdict(list)
+    for row in rows:
+        quantity = int(row["quantity_non_minifig"])
+        if quantity <= 0:
+            continue
+        set_num = row["set_num"]
+        set_meta = sets_lookup.get(set_num)
+        if set_meta is None:
+            continue
+        colors_by_set[set_num].append(
+            {
+                "set_num": set_num,
+                "set_id": row["set_id"],
+                "year": set_meta["year"],
+                "name": set_meta["name"],
+                "color_rgb": row["color_rgb"],
+                "color_name": row["color_name"],
+                "quantity": quantity,
+            }
+        )
+    results: List[dict] = []
+    for set_num, color_rows in colors_by_set.items():
+        sorted_rows = sorted(color_rows, key=lambda r: (-r["quantity"], r["color_name"]))
+        for rank, color_row in enumerate(sorted_rows[:top_n], start=1):
+            results.append(
+                {
+                    "set_num": color_row["set_num"],
+                    "set_id": color_row["set_id"],
+                    "name": color_row["name"],
+                    "year": str(color_row["year"]),
+                    "rank": str(rank),
+                    "color_rgb": color_row["color_rgb"],
+                    "color_name": color_row["color_name"],
+                    "quantity_non_minifig": str(color_row["quantity"]),
+                }
+            )
+    results.sort(key=lambda r: (int(r["year"]), r["name"], r["set_num"], int(r["rank"])))
+    return results
+
+
+def write_top_colors(path: Path, rows: Sequence[dict]) -> None:
+    """Écrit le CSV des couleurs dominantes par set."""
+    ensure_parent_dir(path)
+    fieldnames = [
+        "set_num",
+        "set_id",
+        "name",
+        "year",
+        "rank",
+        "color_rgb",
+        "color_name",
+        "quantity_non_minifig",
+    ]
+    with path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)