1

Ajoute l’étape 27 de palettes dominantes par set

This commit is contained in:
2025-12-02 14:36:24 +01:00
parent fbf20e2592
commit a649283cf2
7 changed files with 390 additions and 0 deletions

View File

@@ -0,0 +1,86 @@
"""Préparation des palettes dominantes par set (hors minifigs)."""
import csv
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Sequence
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_colors_by_set(path: Path) -> List[dict]:
"""Charge colors_by_set.csv."""
return read_rows(path)
def load_sets_enriched(path: Path) -> Dict[str, dict]:
"""Indexe nom et année par set_num."""
lookup: Dict[str, dict] = {}
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
lookup[row["set_num"]] = {"name": row["name"], "year": int(row["year"]), "set_id": row["set_id"]}
return lookup
def build_top_colors_by_set(rows: Iterable[dict], sets_lookup: Dict[str, dict], top_n: int = 5) -> List[dict]:
"""Sélectionne les top couleurs hors minifigs pour chaque set."""
colors_by_set: Dict[str, List[dict]] = defaultdict(list)
for row in rows:
quantity = int(row["quantity_non_minifig"])
if quantity <= 0:
continue
set_num = row["set_num"]
set_meta = sets_lookup.get(set_num)
if set_meta is None:
continue
colors_by_set[set_num].append(
{
"set_num": set_num,
"set_id": row["set_id"],
"year": set_meta["year"],
"name": set_meta["name"],
"color_rgb": row["color_rgb"],
"color_name": row["color_name"],
"quantity": quantity,
}
)
results: List[dict] = []
for set_num, color_rows in colors_by_set.items():
sorted_rows = sorted(color_rows, key=lambda r: (-r["quantity"], r["color_name"]))
for rank, color_row in enumerate(sorted_rows[:top_n], start=1):
results.append(
{
"set_num": color_row["set_num"],
"set_id": color_row["set_id"],
"name": color_row["name"],
"year": str(color_row["year"]),
"rank": str(rank),
"color_rgb": color_row["color_rgb"],
"color_name": color_row["color_name"],
"quantity_non_minifig": str(color_row["quantity"]),
}
)
results.sort(key=lambda r: (int(r["year"]), r["name"], r["set_num"], int(r["rank"])))
return results
def write_top_colors(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des couleurs dominantes par set."""
ensure_parent_dir(path)
fieldnames = [
"set_num",
"set_id",
"name",
"year",
"rank",
"color_rgb",
"color_name",
"quantity_non_minifig",
]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)