1

Ajouter l’étape 35 : extraction et collage des autocollants

This commit is contained in:
Richard Dern 2025-12-03 18:03:43 +01:00
parent f757bfa6bf
commit fc8feec5d9
7 changed files with 340 additions and 0 deletions

View File

@ -383,3 +383,16 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv`, `dat
Le téléchargement sappuie sur `REBRICKABLE_TOKEN` et place les visuels des pièces dans `figures/rebrickable/{set_id}/rare_parts/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/part_rarity_download_log.csv`.
Le tracé `figures/step34/part_rarity.png` juxtapose, pour chaque pièce de `part_rarity_exclusive.csv`, les occurrences dans les sets filtrés vs le reste du catalogue avec les images incrustées.
### Étape 35 : planches d'autocollants (collage)
1. `source .venv/bin/activate`
2. `python -m scripts.compute_sticker_parts`
3. `python -m scripts.download_sticker_resources`
4. `python -m scripts.plot_sticker_sheets`
Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv` et `data/intermediate/sets_enriched.csv`, conserve les pièces de catégorie 58 (stickers) hors rechanges et produit `data/intermediate/sticker_parts.csv` avec set, année, nom, référence et quantité.
Le téléchargement sappuie sur `REBRICKABLE_TOKEN` et enregistre les visuels dans `figures/rebrickable/{set_id}/stickers/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/sticker_download_log.csv` (cache partagé `data/intermediate/part_img_cache.csv`).
Le collage `figures/step35/sticker_sheets.png` assemble toutes les planches trouvées (triées par année puis set) avec, sous chaque image, lannée, lidentifiant de set et la référence de la planche.

View File

@ -0,0 +1,70 @@
"""Assemblage visuel des planches d'autocollants des sets filtrés."""
from pathlib import Path
from typing import List
from PIL import Image, ImageDraw, ImageFont
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
def load_sticker_parts(path: Path) -> List[dict]:
"""Charge la liste des autocollants par set."""
return read_rows(path)
def plot_sticker_sheets(
stickers_path: Path,
destination_path: Path,
resources_dir: Path = Path("figures/rebrickable"),
columns: int = 6,
) -> None:
"""Assemble les images d'autocollants exclusifs en grille triée par année."""
rows = load_sticker_parts(stickers_path)
rows.sort(key=lambda r: (int(r["year"]), r["set_num"], r["part_num"]))
selected: List[dict] = []
images: List[Image.Image] = []
for row in rows:
image_path = resources_dir / row["set_id"] / "stickers" / f"{row['part_num']}.jpg"
if not image_path.exists():
continue
img = Image.open(image_path).convert("RGBA")
max_side = 260
ratio = min(max_side / img.width, max_side / img.height, 1.0)
if ratio < 1.0:
img = img.resize((int(img.width * ratio), int(img.height * ratio)))
images.append(img)
selected.append(row)
if not images:
return
font = ImageFont.load_default()
def measure(text: str) -> tuple[int, int]:
bbox = ImageDraw.Draw(Image.new("RGB", (10, 10))).textbbox((0, 0), text, font=font)
return bbox[2] - bbox[0], bbox[3] - bbox[1]
labels = [f"{row['year']}{row['set_id']}{row['part_num']}" for row in selected]
text_height = max(measure(label)[1] for label in labels)
columns = max(1, columns)
rows_count = (len(images) + columns - 1) // columns
cell_width = 280
cell_height = 220 + text_height + 12
width = columns * cell_width
height = rows_count * cell_height
canvas = Image.new("RGBA", (width, height), (255, 255, 255, 255))
draw = ImageDraw.Draw(canvas)
for index, (img, label) in enumerate(zip(images, labels)):
col = index % columns
row_idx = index // columns
x = col * cell_width + (cell_width - img.width) // 2
y = row_idx * cell_height + 8
canvas.paste(img, (x, y), img)
text_width, _ = measure(label)
text_x = col * cell_width + (cell_width - text_width) // 2
text_y = y + img.height + 6
draw.text((text_x, text_y), label, fill="#111111", font=font)
ensure_parent_dir(destination_path)
canvas.convert("RGB").save(destination_path, "PNG")

View File

@ -0,0 +1,86 @@
"""Sélection des planches d'autocollants pour les sets filtrés."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows
STICKER_CATEGORY_ID = "58"
def load_parts_catalog(path: Path) -> Dict[str, dict]:
"""Indexe les pièces par référence."""
catalog: Dict[str, dict] = {}
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
catalog[row["part_num"]] = row
return catalog
def load_sets(path: Path) -> Dict[str, dict]:
"""Indexe les sets enrichis par set_num."""
lookup: Dict[str, dict] = {}
for row in read_rows(path):
lookup[row["set_num"]] = row
return lookup
def aggregate_stickers(
rows: Iterable[dict],
parts_catalog: Dict[str, dict],
) -> Dict[Tuple[str, str], int]:
"""Cumule les quantités d'autocollants par set et référence."""
aggregated: Dict[Tuple[str, str], int] = {}
for row in rows:
if row["is_spare"] == "true":
continue
part = parts_catalog[row["part_num"]]
if part["part_cat_id"] != STICKER_CATEGORY_ID:
continue
key = (row["set_num"], row["part_num"])
aggregated[key] = aggregated.get(key, 0) + int(row["quantity_in_set"])
return aggregated
def build_sticker_parts(
parts_filtered_path: Path,
parts_catalog_path: Path,
sets_path: Path,
) -> List[dict]:
"""Construit la liste des planches d'autocollants par set."""
rows = read_rows(parts_filtered_path)
parts_catalog = load_parts_catalog(parts_catalog_path)
sets_lookup = load_sets(sets_path)
aggregated = aggregate_stickers(rows, parts_catalog)
stickers: List[dict] = []
for (set_num, part_num), quantity in aggregated.items():
set_row = sets_lookup[set_num]
part = parts_catalog[part_num]
stickers.append(
{
"set_num": set_num,
"set_id": set_row["set_id"],
"year": set_row["year"],
"name": set_row["name"],
"part_num": part_num,
"part_name": part["name"],
"quantity": str(quantity),
}
)
stickers.sort(key=lambda r: (int(r["year"]), r["set_num"], r["part_num"]))
return stickers
def write_sticker_parts(destination_path: Path, rows: Iterable[dict]) -> None:
"""Écrit le CSV des autocollants par set."""
ensure_parent_dir(destination_path)
fieldnames = ["set_num", "set_id", "year", "name", "part_num", "part_name", "quantity"]
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,21 @@
"""Extrait les planches d'autocollants des sets filtrés."""
from pathlib import Path
from lib.rebrickable.sticker_parts import build_sticker_parts, write_sticker_parts
PARTS_FILTERED_PATH = Path("data/intermediate/parts_filtered.csv")
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
SETS_PATH = Path("data/intermediate/sets_enriched.csv")
DESTINATION_PATH = Path("data/intermediate/sticker_parts.csv")
def main() -> None:
"""Construit le CSV des autocollants présents dans les sets filtrés."""
stickers = build_sticker_parts(PARTS_FILTERED_PATH, PARTS_CATALOG_PATH, SETS_PATH)
write_sticker_parts(DESTINATION_PATH, stickers)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,19 @@
"""Assemble les visuels des planches d'autocollants des sets filtrés."""
from pathlib import Path
from lib.plots.sticker_sheets import plot_sticker_sheets
STICKER_PARTS_PATH = Path("data/intermediate/sticker_parts.csv")
DESTINATION_PATH = Path("figures/step35/sticker_sheets.png")
RESOURCES_DIR = Path("figures/rebrickable")
def main() -> None:
"""Construit le collage des planches d'autocollants."""
plot_sticker_sheets(STICKER_PARTS_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
if __name__ == "__main__":
main()

104
tests/test_sticker_parts.py Normal file
View File

@ -0,0 +1,104 @@
"""Tests de l'extraction des planches d'autocollants."""
import csv
from pathlib import Path
from lib.rebrickable.sticker_parts import build_sticker_parts, write_sticker_parts
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
"""Écrit un CSV simple."""
with path.open("w", newline="") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(headers)
writer.writerows(rows)
def test_build_sticker_parts_filters_category_and_spares(tmp_path: Path) -> None:
"""Ne conserve que les autocollants (catégorie 58) hors rechanges."""
parts_filtered = tmp_path / "parts_filtered.csv"
write_csv(
parts_filtered,
[
"part_num",
"color_rgb",
"is_translucent",
"set_num",
"set_id",
"year",
"quantity_in_set",
"is_spare",
"is_minifig_part",
],
[
["st1", "AAAAAA", "false", "1000-1", "1000", "2020", "1", "false", "false"],
["st1", "AAAAAA", "false", "1000-1", "1000", "2020", "2", "true", "false"],
["br1", "BBBBBB", "false", "1000-1", "1000", "2020", "5", "false", "false"],
["st2", "CCCCCC", "false", "2000-1", "2000", "2021", "3", "false", "false"],
],
)
parts_catalog = tmp_path / "parts.csv"
write_csv(
parts_catalog,
["part_num", "name", "part_cat_id", "part_material"],
[
["st1", "Sticker Sheet 1", "58", "Plastic"],
["st2", "Sticker Sheet 2", "58", "Plastic"],
["br1", "Brick", "1", "Plastic"],
],
)
sets_enriched = tmp_path / "sets_enriched.csv"
write_csv(
sets_enriched,
["set_num", "set_id", "name", "year", "in_collection"],
[
["1000-1", "1000", "Set A", "2020", "true"],
["2000-1", "2000", "Set B", "2021", "false"],
],
)
stickers = build_sticker_parts(parts_filtered, parts_catalog, sets_enriched)
assert stickers == [
{
"set_num": "1000-1",
"set_id": "1000",
"year": "2020",
"name": "Set A",
"part_num": "st1",
"part_name": "Sticker Sheet 1",
"quantity": "1",
},
{
"set_num": "2000-1",
"set_id": "2000",
"year": "2021",
"name": "Set B",
"part_num": "st2",
"part_name": "Sticker Sheet 2",
"quantity": "3",
},
]
def test_write_sticker_parts_outputs_csv(tmp_path: Path) -> None:
"""Sérialise la liste des autocollants par set."""
destination = tmp_path / "sticker_parts.csv"
rows = [
{
"set_num": "123-1",
"set_id": "123",
"year": "2020",
"name": "Set",
"part_num": "st1",
"part_name": "Sticker",
"quantity": "1",
}
]
write_sticker_parts(destination, rows)
assert destination.exists()
content = destination.read_text().strip().splitlines()
assert content[0] == "set_num,set_id,year,name,part_num,part_name,quantity"
assert content[1] == "123-1,123,2020,Set,st1,Sticker,1"

View File

@ -0,0 +1,27 @@
"""Tests du collage des planches d'autocollants."""
from pathlib import Path
from PIL import Image
from lib.plots.sticker_sheets import plot_sticker_sheets
def test_plot_sticker_sheets(tmp_path: Path) -> None:
"""Génère une grille de planches d'autocollants avec labels."""
stickers_path = tmp_path / "sticker_parts.csv"
resources_dir = tmp_path / "figures" / "rebrickable"
(resources_dir / "1000" / "stickers").mkdir(parents=True)
(resources_dir / "2000" / "stickers").mkdir(parents=True)
Image.new("RGB", (120, 80), color=(255, 0, 0)).save(resources_dir / "1000" / "stickers" / "st1.jpg")
Image.new("RGB", (100, 60), color=(0, 255, 0)).save(resources_dir / "2000" / "stickers" / "st2.jpg")
stickers_path.write_text(
"set_num,set_id,year,name,part_num,part_name,quantity\n"
"1000-1,1000,2020,Set A,st1,Sticker 1,1\n"
"2000-1,2000,2021,Set B,st2,Sticker 2,1\n"
)
destination = tmp_path / "figures" / "step35" / "sticker_sheets.png"
plot_sticker_sheets(stickers_path, destination, resources_dir=resources_dir, columns=2)
assert destination.exists()
assert destination.stat().st_size > 0