Ajouter l’étape 35 : extraction et collage des autocollants

2025-12-03 18:03:43 +01:00
parent f757bfa6bf
commit fc8feec5d9
7 changed files with 340 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -383,3 +383,16 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv`, `dat
 Le téléchargement s’appuie sur `REBRICKABLE_TOKEN` et place les visuels des pièces dans `figures/rebrickable/{set_id}/rare_parts/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/part_rarity_download_log.csv`.

 Le tracé `figures/step34/part_rarity.png` juxtapose, pour chaque pièce de `part_rarity_exclusive.csv`, les occurrences dans les sets filtrés vs le reste du catalogue avec les images incrustées.
+
+### Étape 35 : planches d'autocollants (collage)
+
+1. `source .venv/bin/activate`
+2. `python -m scripts.compute_sticker_parts`
+3. `python -m scripts.download_sticker_resources`
+4. `python -m scripts.plot_sticker_sheets`
+
+Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv` et `data/intermediate/sets_enriched.csv`, conserve les pièces de catégorie 58 (stickers) hors rechanges et produit `data/intermediate/sticker_parts.csv` avec set, année, nom, référence et quantité.
+
+Le téléchargement s’appuie sur `REBRICKABLE_TOKEN` et enregistre les visuels dans `figures/rebrickable/{set_id}/stickers/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/sticker_download_log.csv` (cache partagé `data/intermediate/part_img_cache.csv`).
+
+Le collage `figures/step35/sticker_sheets.png` assemble toutes les planches trouvées (triées par année puis set) avec, sous chaque image, l’année, l’identifiant de set et la référence de la planche.
--- a/lib/plots/sticker_sheets.py
+++ b/lib/plots/sticker_sheets.py
@@ -0,0 +1,70 @@
+"""Assemblage visuel des planches d'autocollants des sets filtrés."""
+
+from pathlib import Path
+from typing import List
+
+from PIL import Image, ImageDraw, ImageFont
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+def load_sticker_parts(path: Path) -> List[dict]:
+    """Charge la liste des autocollants par set."""
+    return read_rows(path)
+
+
+def plot_sticker_sheets(
+    stickers_path: Path,
+    destination_path: Path,
+    resources_dir: Path = Path("figures/rebrickable"),
+    columns: int = 6,
+) -> None:
+    """Assemble les images d'autocollants exclusifs en grille triée par année."""
+    rows = load_sticker_parts(stickers_path)
+    rows.sort(key=lambda r: (int(r["year"]), r["set_num"], r["part_num"]))
+    selected: List[dict] = []
+    images: List[Image.Image] = []
+    for row in rows:
+        image_path = resources_dir / row["set_id"] / "stickers" / f"{row['part_num']}.jpg"
+        if not image_path.exists():
+            continue
+        img = Image.open(image_path).convert("RGBA")
+        max_side = 260
+        ratio = min(max_side / img.width, max_side / img.height, 1.0)
+        if ratio < 1.0:
+            img = img.resize((int(img.width * ratio), int(img.height * ratio)))
+        images.append(img)
+        selected.append(row)
+    if not images:
+        return
+
+    font = ImageFont.load_default()
+    def measure(text: str) -> tuple[int, int]:
+        bbox = ImageDraw.Draw(Image.new("RGB", (10, 10))).textbbox((0, 0), text, font=font)
+        return bbox[2] - bbox[0], bbox[3] - bbox[1]
+
+    labels = [f"{row['year']} • {row['set_id']} • {row['part_num']}" for row in selected]
+    text_height = max(measure(label)[1] for label in labels)
+
+    columns = max(1, columns)
+    rows_count = (len(images) + columns - 1) // columns
+    cell_width = 280
+    cell_height = 220 + text_height + 12
+    width = columns * cell_width
+    height = rows_count * cell_height
+    canvas = Image.new("RGBA", (width, height), (255, 255, 255, 255))
+    draw = ImageDraw.Draw(canvas)
+    for index, (img, label) in enumerate(zip(images, labels)):
+        col = index % columns
+        row_idx = index // columns
+        x = col * cell_width + (cell_width - img.width) // 2
+        y = row_idx * cell_height + 8
+        canvas.paste(img, (x, y), img)
+        text_width, _ = measure(label)
+        text_x = col * cell_width + (cell_width - text_width) // 2
+        text_y = y + img.height + 6
+        draw.text((text_x, text_y), label, fill="#111111", font=font)
+
+    ensure_parent_dir(destination_path)
+    canvas.convert("RGB").save(destination_path, "PNG")
--- a/lib/rebrickable/sticker_parts.py
+++ b/lib/rebrickable/sticker_parts.py
@@ -0,0 +1,86 @@
+"""Sélection des planches d'autocollants pour les sets filtrés."""
+
+import csv
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+STICKER_CATEGORY_ID = "58"
+
+
+def load_parts_catalog(path: Path) -> Dict[str, dict]:
+    """Indexe les pièces par référence."""
+    catalog: Dict[str, dict] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            catalog[row["part_num"]] = row
+    return catalog
+
+
+def load_sets(path: Path) -> Dict[str, dict]:
+    """Indexe les sets enrichis par set_num."""
+    lookup: Dict[str, dict] = {}
+    for row in read_rows(path):
+        lookup[row["set_num"]] = row
+    return lookup
+
+
+def aggregate_stickers(
+    rows: Iterable[dict],
+    parts_catalog: Dict[str, dict],
+) -> Dict[Tuple[str, str], int]:
+    """Cumule les quantités d'autocollants par set et référence."""
+    aggregated: Dict[Tuple[str, str], int] = {}
+    for row in rows:
+        if row["is_spare"] == "true":
+            continue
+        part = parts_catalog[row["part_num"]]
+        if part["part_cat_id"] != STICKER_CATEGORY_ID:
+            continue
+        key = (row["set_num"], row["part_num"])
+        aggregated[key] = aggregated.get(key, 0) + int(row["quantity_in_set"])
+    return aggregated
+
+
+def build_sticker_parts(
+    parts_filtered_path: Path,
+    parts_catalog_path: Path,
+    sets_path: Path,
+) -> List[dict]:
+    """Construit la liste des planches d'autocollants par set."""
+    rows = read_rows(parts_filtered_path)
+    parts_catalog = load_parts_catalog(parts_catalog_path)
+    sets_lookup = load_sets(sets_path)
+    aggregated = aggregate_stickers(rows, parts_catalog)
+    stickers: List[dict] = []
+    for (set_num, part_num), quantity in aggregated.items():
+        set_row = sets_lookup[set_num]
+        part = parts_catalog[part_num]
+        stickers.append(
+            {
+                "set_num": set_num,
+                "set_id": set_row["set_id"],
+                "year": set_row["year"],
+                "name": set_row["name"],
+                "part_num": part_num,
+                "part_name": part["name"],
+                "quantity": str(quantity),
+            }
+        )
+    stickers.sort(key=lambda r: (int(r["year"]), r["set_num"], r["part_num"]))
+    return stickers
+
+
+def write_sticker_parts(destination_path: Path, rows: Iterable[dict]) -> None:
+    """Écrit le CSV des autocollants par set."""
+    ensure_parent_dir(destination_path)
+    fieldnames = ["set_num", "set_id", "year", "name", "part_num", "part_name", "quantity"]
+    with destination_path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
--- a/scripts/compute_sticker_parts.py
+++ b/scripts/compute_sticker_parts.py
@@ -0,0 +1,21 @@
+"""Extrait les planches d'autocollants des sets filtrés."""
+
+from pathlib import Path
+
+from lib.rebrickable.sticker_parts import build_sticker_parts, write_sticker_parts
+
+
+PARTS_FILTERED_PATH = Path("data/intermediate/parts_filtered.csv")
+PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
+SETS_PATH = Path("data/intermediate/sets_enriched.csv")
+DESTINATION_PATH = Path("data/intermediate/sticker_parts.csv")
+
+
+def main() -> None:
+    """Construit le CSV des autocollants présents dans les sets filtrés."""
+    stickers = build_sticker_parts(PARTS_FILTERED_PATH, PARTS_CATALOG_PATH, SETS_PATH)
+    write_sticker_parts(DESTINATION_PATH, stickers)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/plot_sticker_sheets.py
+++ b/scripts/plot_sticker_sheets.py
@@ -0,0 +1,19 @@
+"""Assemble les visuels des planches d'autocollants des sets filtrés."""
+
+from pathlib import Path
+
+from lib.plots.sticker_sheets import plot_sticker_sheets
+
+
+STICKER_PARTS_PATH = Path("data/intermediate/sticker_parts.csv")
+DESTINATION_PATH = Path("figures/step35/sticker_sheets.png")
+RESOURCES_DIR = Path("figures/rebrickable")
+
+
+def main() -> None:
+    """Construit le collage des planches d'autocollants."""
+    plot_sticker_sheets(STICKER_PARTS_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_sticker_parts.py
+++ b/tests/test_sticker_parts.py
@@ -0,0 +1,104 @@
+"""Tests de l'extraction des planches d'autocollants."""
+
+import csv
+from pathlib import Path
+
+from lib.rebrickable.sticker_parts import build_sticker_parts, write_sticker_parts
+
+
+def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
+    """Écrit un CSV simple."""
+    with path.open("w", newline="") as csv_file:
+        writer = csv.writer(csv_file)
+        writer.writerow(headers)
+        writer.writerows(rows)
+
+
+def test_build_sticker_parts_filters_category_and_spares(tmp_path: Path) -> None:
+    """Ne conserve que les autocollants (catégorie 58) hors rechanges."""
+    parts_filtered = tmp_path / "parts_filtered.csv"
+    write_csv(
+        parts_filtered,
+        [
+            "part_num",
+            "color_rgb",
+            "is_translucent",
+            "set_num",
+            "set_id",
+            "year",
+            "quantity_in_set",
+            "is_spare",
+            "is_minifig_part",
+        ],
+        [
+            ["st1", "AAAAAA", "false", "1000-1", "1000", "2020", "1", "false", "false"],
+            ["st1", "AAAAAA", "false", "1000-1", "1000", "2020", "2", "true", "false"],
+            ["br1", "BBBBBB", "false", "1000-1", "1000", "2020", "5", "false", "false"],
+            ["st2", "CCCCCC", "false", "2000-1", "2000", "2021", "3", "false", "false"],
+        ],
+    )
+    parts_catalog = tmp_path / "parts.csv"
+    write_csv(
+        parts_catalog,
+        ["part_num", "name", "part_cat_id", "part_material"],
+        [
+            ["st1", "Sticker Sheet 1", "58", "Plastic"],
+            ["st2", "Sticker Sheet 2", "58", "Plastic"],
+            ["br1", "Brick", "1", "Plastic"],
+        ],
+    )
+    sets_enriched = tmp_path / "sets_enriched.csv"
+    write_csv(
+        sets_enriched,
+        ["set_num", "set_id", "name", "year", "in_collection"],
+        [
+            ["1000-1", "1000", "Set A", "2020", "true"],
+            ["2000-1", "2000", "Set B", "2021", "false"],
+        ],
+    )
+
+    stickers = build_sticker_parts(parts_filtered, parts_catalog, sets_enriched)
+
+    assert stickers == [
+        {
+            "set_num": "1000-1",
+            "set_id": "1000",
+            "year": "2020",
+            "name": "Set A",
+            "part_num": "st1",
+            "part_name": "Sticker Sheet 1",
+            "quantity": "1",
+        },
+        {
+            "set_num": "2000-1",
+            "set_id": "2000",
+            "year": "2021",
+            "name": "Set B",
+            "part_num": "st2",
+            "part_name": "Sticker Sheet 2",
+            "quantity": "3",
+        },
+    ]
+
+
+def test_write_sticker_parts_outputs_csv(tmp_path: Path) -> None:
+    """Sérialise la liste des autocollants par set."""
+    destination = tmp_path / "sticker_parts.csv"
+    rows = [
+        {
+            "set_num": "123-1",
+            "set_id": "123",
+            "year": "2020",
+            "name": "Set",
+            "part_num": "st1",
+            "part_name": "Sticker",
+            "quantity": "1",
+        }
+    ]
+
+    write_sticker_parts(destination, rows)
+
+    assert destination.exists()
+    content = destination.read_text().strip().splitlines()
+    assert content[0] == "set_num,set_id,year,name,part_num,part_name,quantity"
+    assert content[1] == "123-1,123,2020,Set,st1,Sticker,1"
--- a/tests/test_sticker_sheets_plot.py
+++ b/tests/test_sticker_sheets_plot.py
@@ -0,0 +1,27 @@
+"""Tests du collage des planches d'autocollants."""
+
+from pathlib import Path
+from PIL import Image
+
+from lib.plots.sticker_sheets import plot_sticker_sheets
+
+
+def test_plot_sticker_sheets(tmp_path: Path) -> None:
+    """Génère une grille de planches d'autocollants avec labels."""
+    stickers_path = tmp_path / "sticker_parts.csv"
+    resources_dir = tmp_path / "figures" / "rebrickable"
+    (resources_dir / "1000" / "stickers").mkdir(parents=True)
+    (resources_dir / "2000" / "stickers").mkdir(parents=True)
+    Image.new("RGB", (120, 80), color=(255, 0, 0)).save(resources_dir / "1000" / "stickers" / "st1.jpg")
+    Image.new("RGB", (100, 60), color=(0, 255, 0)).save(resources_dir / "2000" / "stickers" / "st2.jpg")
+    stickers_path.write_text(
+        "set_num,set_id,year,name,part_num,part_name,quantity\n"
+        "1000-1,1000,2020,Set A,st1,Sticker 1,1\n"
+        "2000-1,2000,2021,Set B,st2,Sticker 2,1\n"
+    )
+    destination = tmp_path / "figures" / "step35" / "sticker_sheets.png"
+
+    plot_sticker_sheets(stickers_path, destination, resources_dir=resources_dir, columns=2)
+
+    assert destination.exists()
+    assert destination.stat().st_size > 0