Ajouter l’étape 35 : extraction et collage des autocollants
This commit is contained in:
parent
f757bfa6bf
commit
fc8feec5d9
13
README.md
13
README.md
@ -383,3 +383,16 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv`, `dat
|
|||||||
Le téléchargement s’appuie sur `REBRICKABLE_TOKEN` et place les visuels des pièces dans `figures/rebrickable/{set_id}/rare_parts/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/part_rarity_download_log.csv`.
|
Le téléchargement s’appuie sur `REBRICKABLE_TOKEN` et place les visuels des pièces dans `figures/rebrickable/{set_id}/rare_parts/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/part_rarity_download_log.csv`.
|
||||||
|
|
||||||
Le tracé `figures/step34/part_rarity.png` juxtapose, pour chaque pièce de `part_rarity_exclusive.csv`, les occurrences dans les sets filtrés vs le reste du catalogue avec les images incrustées.
|
Le tracé `figures/step34/part_rarity.png` juxtapose, pour chaque pièce de `part_rarity_exclusive.csv`, les occurrences dans les sets filtrés vs le reste du catalogue avec les images incrustées.
|
||||||
|
|
||||||
|
### Étape 35 : planches d'autocollants (collage)
|
||||||
|
|
||||||
|
1. `source .venv/bin/activate`
|
||||||
|
2. `python -m scripts.compute_sticker_parts`
|
||||||
|
3. `python -m scripts.download_sticker_resources`
|
||||||
|
4. `python -m scripts.plot_sticker_sheets`
|
||||||
|
|
||||||
|
Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv` et `data/intermediate/sets_enriched.csv`, conserve les pièces de catégorie 58 (stickers) hors rechanges et produit `data/intermediate/sticker_parts.csv` avec set, année, nom, référence et quantité.
|
||||||
|
|
||||||
|
Le téléchargement s’appuie sur `REBRICKABLE_TOKEN` et enregistre les visuels dans `figures/rebrickable/{set_id}/stickers/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/sticker_download_log.csv` (cache partagé `data/intermediate/part_img_cache.csv`).
|
||||||
|
|
||||||
|
Le collage `figures/step35/sticker_sheets.png` assemble toutes les planches trouvées (triées par année puis set) avec, sous chaque image, l’année, l’identifiant de set et la référence de la planche.
|
||||||
|
|||||||
70
lib/plots/sticker_sheets.py
Normal file
70
lib/plots/sticker_sheets.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
"""Assemblage visuel des planches d'autocollants des sets filtrés."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
def load_sticker_parts(path: Path) -> List[dict]:
|
||||||
|
"""Charge la liste des autocollants par set."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_sticker_sheets(
|
||||||
|
stickers_path: Path,
|
||||||
|
destination_path: Path,
|
||||||
|
resources_dir: Path = Path("figures/rebrickable"),
|
||||||
|
columns: int = 6,
|
||||||
|
) -> None:
|
||||||
|
"""Assemble les images d'autocollants exclusifs en grille triée par année."""
|
||||||
|
rows = load_sticker_parts(stickers_path)
|
||||||
|
rows.sort(key=lambda r: (int(r["year"]), r["set_num"], r["part_num"]))
|
||||||
|
selected: List[dict] = []
|
||||||
|
images: List[Image.Image] = []
|
||||||
|
for row in rows:
|
||||||
|
image_path = resources_dir / row["set_id"] / "stickers" / f"{row['part_num']}.jpg"
|
||||||
|
if not image_path.exists():
|
||||||
|
continue
|
||||||
|
img = Image.open(image_path).convert("RGBA")
|
||||||
|
max_side = 260
|
||||||
|
ratio = min(max_side / img.width, max_side / img.height, 1.0)
|
||||||
|
if ratio < 1.0:
|
||||||
|
img = img.resize((int(img.width * ratio), int(img.height * ratio)))
|
||||||
|
images.append(img)
|
||||||
|
selected.append(row)
|
||||||
|
if not images:
|
||||||
|
return
|
||||||
|
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
def measure(text: str) -> tuple[int, int]:
|
||||||
|
bbox = ImageDraw.Draw(Image.new("RGB", (10, 10))).textbbox((0, 0), text, font=font)
|
||||||
|
return bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||||
|
|
||||||
|
labels = [f"{row['year']} • {row['set_id']} • {row['part_num']}" for row in selected]
|
||||||
|
text_height = max(measure(label)[1] for label in labels)
|
||||||
|
|
||||||
|
columns = max(1, columns)
|
||||||
|
rows_count = (len(images) + columns - 1) // columns
|
||||||
|
cell_width = 280
|
||||||
|
cell_height = 220 + text_height + 12
|
||||||
|
width = columns * cell_width
|
||||||
|
height = rows_count * cell_height
|
||||||
|
canvas = Image.new("RGBA", (width, height), (255, 255, 255, 255))
|
||||||
|
draw = ImageDraw.Draw(canvas)
|
||||||
|
for index, (img, label) in enumerate(zip(images, labels)):
|
||||||
|
col = index % columns
|
||||||
|
row_idx = index // columns
|
||||||
|
x = col * cell_width + (cell_width - img.width) // 2
|
||||||
|
y = row_idx * cell_height + 8
|
||||||
|
canvas.paste(img, (x, y), img)
|
||||||
|
text_width, _ = measure(label)
|
||||||
|
text_x = col * cell_width + (cell_width - text_width) // 2
|
||||||
|
text_y = y + img.height + 6
|
||||||
|
draw.text((text_x, text_y), label, fill="#111111", font=font)
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
canvas.convert("RGB").save(destination_path, "PNG")
|
||||||
86
lib/rebrickable/sticker_parts.py
Normal file
86
lib/rebrickable/sticker_parts.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
"""Sélection des planches d'autocollants pour les sets filtrés."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, List, Tuple
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
STICKER_CATEGORY_ID = "58"
|
||||||
|
|
||||||
|
|
||||||
|
def load_parts_catalog(path: Path) -> Dict[str, dict]:
|
||||||
|
"""Indexe les pièces par référence."""
|
||||||
|
catalog: Dict[str, dict] = {}
|
||||||
|
with path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
catalog[row["part_num"]] = row
|
||||||
|
return catalog
|
||||||
|
|
||||||
|
|
||||||
|
def load_sets(path: Path) -> Dict[str, dict]:
|
||||||
|
"""Indexe les sets enrichis par set_num."""
|
||||||
|
lookup: Dict[str, dict] = {}
|
||||||
|
for row in read_rows(path):
|
||||||
|
lookup[row["set_num"]] = row
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_stickers(
|
||||||
|
rows: Iterable[dict],
|
||||||
|
parts_catalog: Dict[str, dict],
|
||||||
|
) -> Dict[Tuple[str, str], int]:
|
||||||
|
"""Cumule les quantités d'autocollants par set et référence."""
|
||||||
|
aggregated: Dict[Tuple[str, str], int] = {}
|
||||||
|
for row in rows:
|
||||||
|
if row["is_spare"] == "true":
|
||||||
|
continue
|
||||||
|
part = parts_catalog[row["part_num"]]
|
||||||
|
if part["part_cat_id"] != STICKER_CATEGORY_ID:
|
||||||
|
continue
|
||||||
|
key = (row["set_num"], row["part_num"])
|
||||||
|
aggregated[key] = aggregated.get(key, 0) + int(row["quantity_in_set"])
|
||||||
|
return aggregated
|
||||||
|
|
||||||
|
|
||||||
|
def build_sticker_parts(
|
||||||
|
parts_filtered_path: Path,
|
||||||
|
parts_catalog_path: Path,
|
||||||
|
sets_path: Path,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Construit la liste des planches d'autocollants par set."""
|
||||||
|
rows = read_rows(parts_filtered_path)
|
||||||
|
parts_catalog = load_parts_catalog(parts_catalog_path)
|
||||||
|
sets_lookup = load_sets(sets_path)
|
||||||
|
aggregated = aggregate_stickers(rows, parts_catalog)
|
||||||
|
stickers: List[dict] = []
|
||||||
|
for (set_num, part_num), quantity in aggregated.items():
|
||||||
|
set_row = sets_lookup[set_num]
|
||||||
|
part = parts_catalog[part_num]
|
||||||
|
stickers.append(
|
||||||
|
{
|
||||||
|
"set_num": set_num,
|
||||||
|
"set_id": set_row["set_id"],
|
||||||
|
"year": set_row["year"],
|
||||||
|
"name": set_row["name"],
|
||||||
|
"part_num": part_num,
|
||||||
|
"part_name": part["name"],
|
||||||
|
"quantity": str(quantity),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
stickers.sort(key=lambda r: (int(r["year"]), r["set_num"], r["part_num"]))
|
||||||
|
return stickers
|
||||||
|
|
||||||
|
|
||||||
|
def write_sticker_parts(destination_path: Path, rows: Iterable[dict]) -> None:
|
||||||
|
"""Écrit le CSV des autocollants par set."""
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fieldnames = ["set_num", "set_id", "year", "name", "part_num", "part_name", "quantity"]
|
||||||
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
21
scripts/compute_sticker_parts.py
Normal file
21
scripts/compute_sticker_parts.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
"""Extrait les planches d'autocollants des sets filtrés."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.sticker_parts import build_sticker_parts, write_sticker_parts
|
||||||
|
|
||||||
|
|
||||||
|
PARTS_FILTERED_PATH = Path("data/intermediate/parts_filtered.csv")
|
||||||
|
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
|
||||||
|
SETS_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
DESTINATION_PATH = Path("data/intermediate/sticker_parts.csv")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le CSV des autocollants présents dans les sets filtrés."""
|
||||||
|
stickers = build_sticker_parts(PARTS_FILTERED_PATH, PARTS_CATALOG_PATH, SETS_PATH)
|
||||||
|
write_sticker_parts(DESTINATION_PATH, stickers)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
19
scripts/plot_sticker_sheets.py
Normal file
19
scripts/plot_sticker_sheets.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
"""Assemble les visuels des planches d'autocollants des sets filtrés."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.sticker_sheets import plot_sticker_sheets
|
||||||
|
|
||||||
|
|
||||||
|
STICKER_PARTS_PATH = Path("data/intermediate/sticker_parts.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step35/sticker_sheets.png")
|
||||||
|
RESOURCES_DIR = Path("figures/rebrickable")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le collage des planches d'autocollants."""
|
||||||
|
plot_sticker_sheets(STICKER_PARTS_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
104
tests/test_sticker_parts.py
Normal file
104
tests/test_sticker_parts.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
"""Tests de l'extraction des planches d'autocollants."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.sticker_parts import build_sticker_parts, write_sticker_parts
|
||||||
|
|
||||||
|
|
||||||
|
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
|
||||||
|
"""Écrit un CSV simple."""
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.writer(csv_file)
|
||||||
|
writer.writerow(headers)
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_sticker_parts_filters_category_and_spares(tmp_path: Path) -> None:
|
||||||
|
"""Ne conserve que les autocollants (catégorie 58) hors rechanges."""
|
||||||
|
parts_filtered = tmp_path / "parts_filtered.csv"
|
||||||
|
write_csv(
|
||||||
|
parts_filtered,
|
||||||
|
[
|
||||||
|
"part_num",
|
||||||
|
"color_rgb",
|
||||||
|
"is_translucent",
|
||||||
|
"set_num",
|
||||||
|
"set_id",
|
||||||
|
"year",
|
||||||
|
"quantity_in_set",
|
||||||
|
"is_spare",
|
||||||
|
"is_minifig_part",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
["st1", "AAAAAA", "false", "1000-1", "1000", "2020", "1", "false", "false"],
|
||||||
|
["st1", "AAAAAA", "false", "1000-1", "1000", "2020", "2", "true", "false"],
|
||||||
|
["br1", "BBBBBB", "false", "1000-1", "1000", "2020", "5", "false", "false"],
|
||||||
|
["st2", "CCCCCC", "false", "2000-1", "2000", "2021", "3", "false", "false"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
parts_catalog = tmp_path / "parts.csv"
|
||||||
|
write_csv(
|
||||||
|
parts_catalog,
|
||||||
|
["part_num", "name", "part_cat_id", "part_material"],
|
||||||
|
[
|
||||||
|
["st1", "Sticker Sheet 1", "58", "Plastic"],
|
||||||
|
["st2", "Sticker Sheet 2", "58", "Plastic"],
|
||||||
|
["br1", "Brick", "1", "Plastic"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
sets_enriched = tmp_path / "sets_enriched.csv"
|
||||||
|
write_csv(
|
||||||
|
sets_enriched,
|
||||||
|
["set_num", "set_id", "name", "year", "in_collection"],
|
||||||
|
[
|
||||||
|
["1000-1", "1000", "Set A", "2020", "true"],
|
||||||
|
["2000-1", "2000", "Set B", "2021", "false"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
stickers = build_sticker_parts(parts_filtered, parts_catalog, sets_enriched)
|
||||||
|
|
||||||
|
assert stickers == [
|
||||||
|
{
|
||||||
|
"set_num": "1000-1",
|
||||||
|
"set_id": "1000",
|
||||||
|
"year": "2020",
|
||||||
|
"name": "Set A",
|
||||||
|
"part_num": "st1",
|
||||||
|
"part_name": "Sticker Sheet 1",
|
||||||
|
"quantity": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "2000-1",
|
||||||
|
"set_id": "2000",
|
||||||
|
"year": "2021",
|
||||||
|
"name": "Set B",
|
||||||
|
"part_num": "st2",
|
||||||
|
"part_name": "Sticker Sheet 2",
|
||||||
|
"quantity": "3",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_sticker_parts_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Sérialise la liste des autocollants par set."""
|
||||||
|
destination = tmp_path / "sticker_parts.csv"
|
||||||
|
rows = [
|
||||||
|
{
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"year": "2020",
|
||||||
|
"name": "Set",
|
||||||
|
"part_num": "st1",
|
||||||
|
"part_name": "Sticker",
|
||||||
|
"quantity": "1",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
write_sticker_parts(destination, rows)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
content = destination.read_text().strip().splitlines()
|
||||||
|
assert content[0] == "set_num,set_id,year,name,part_num,part_name,quantity"
|
||||||
|
assert content[1] == "123-1,123,2020,Set,st1,Sticker,1"
|
||||||
27
tests/test_sticker_sheets_plot.py
Normal file
27
tests/test_sticker_sheets_plot.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
"""Tests du collage des planches d'autocollants."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from lib.plots.sticker_sheets import plot_sticker_sheets
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_sticker_sheets(tmp_path: Path) -> None:
|
||||||
|
"""Génère une grille de planches d'autocollants avec labels."""
|
||||||
|
stickers_path = tmp_path / "sticker_parts.csv"
|
||||||
|
resources_dir = tmp_path / "figures" / "rebrickable"
|
||||||
|
(resources_dir / "1000" / "stickers").mkdir(parents=True)
|
||||||
|
(resources_dir / "2000" / "stickers").mkdir(parents=True)
|
||||||
|
Image.new("RGB", (120, 80), color=(255, 0, 0)).save(resources_dir / "1000" / "stickers" / "st1.jpg")
|
||||||
|
Image.new("RGB", (100, 60), color=(0, 255, 0)).save(resources_dir / "2000" / "stickers" / "st2.jpg")
|
||||||
|
stickers_path.write_text(
|
||||||
|
"set_num,set_id,year,name,part_num,part_name,quantity\n"
|
||||||
|
"1000-1,1000,2020,Set A,st1,Sticker 1,1\n"
|
||||||
|
"2000-1,2000,2021,Set B,st2,Sticker 2,1\n"
|
||||||
|
)
|
||||||
|
destination = tmp_path / "figures" / "step35" / "sticker_sheets.png"
|
||||||
|
|
||||||
|
plot_sticker_sheets(stickers_path, destination, resources_dir=resources_dir, columns=2)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
Loading…
x
Reference in New Issue
Block a user