Exclure les pièces de minifigs et intégrer les visuels de rareté
This commit is contained in:
parent
6dc1f1cac5
commit
39af0d3a8b
16
README.md
16
README.md
@ -365,3 +365,19 @@ Le calcul lit `data/intermediate/minifigs_by_set.csv`, `data/raw/parts.csv`, `da
|
|||||||
- `data/intermediate/head_reuse.csv` : pour chaque tête observée dans les sets filtrés, le nombre de sets filtrés qui la contiennent, le nombre de sets du reste du catalogue et le total.
|
- `data/intermediate/head_reuse.csv` : pour chaque tête observée dans les sets filtrés, le nombre de sets filtrés qui la contiennent, le nombre de sets du reste du catalogue et le total.
|
||||||
- `figures/step33/head_reuse.png` : bar chart horizontal montrant, par tête, la part filtrée vs le reste du catalogue (têtes exclusives en haut).
|
- `figures/step33/head_reuse.png` : bar chart horizontal montrant, par tête, la part filtrée vs le reste du catalogue (têtes exclusives en haut).
|
||||||
- Les étiquettes affichent aussi l’identifiant de la minifig (`fig-*`) et un astérisque à côté du set (`set_num*`) lorsqu’il est présent dans la collection.
|
- Les étiquettes affichent aussi l’identifiant de la minifig (`fig-*`) et un astérisque à côté du set (`set_num*`) lorsqu’il est présent dans la collection.
|
||||||
|
|
||||||
|
### Étape 34 : rareté des pièces (occurrences catalogue)
|
||||||
|
|
||||||
|
1. `source .venv/bin/activate`
|
||||||
|
2. `python -m scripts.compute_part_rarity`
|
||||||
|
3. `python -m scripts.download_part_rarity_resources`
|
||||||
|
4. `python -m scripts.plot_part_rarity`
|
||||||
|
|
||||||
|
Le calcul lit `data/intermediate/parts_filtered.csv`, `data/raw/parts.csv`, `data/raw/part_categories.csv`, `data/raw/inventories.csv`, `data/raw/inventory_parts.csv` et `data/intermediate/sets_enriched.csv`. Il additionne `quantity_in_set` pour chaque `part_num` des sets filtrés (rechanges incluses), ignore les catégories animales, stickers et pièces de minifigs (`28`, `58`, `74`, `75`, `13`, `27`, `59`, `60`, `61`, `65`, `70`, `71`, `72`, `73` de `part_categories.csv`, ainsi que `is_minifig_part=true`) pour écarter les moules de dinosaures, les planches d’autocollants et les pièces de figurines, puis compte les occurrences restantes dans le reste du catalogue. Les sorties sont :
|
||||||
|
|
||||||
|
- `data/intermediate/part_rarity.csv` : classement complet des pièces avec leurs occurrences filtrées, catalogue et part filtrée.
|
||||||
|
- `data/intermediate/part_rarity_exclusive.csv` : toutes les pièces exclusives aux sets filtrés, suivies de la première pièce réutilisée ailleurs (pour visualiser la bascule entre exclusivité et réemploi).
|
||||||
|
|
||||||
|
Le téléchargement s’appuie sur `REBRICKABLE_TOKEN` et place les visuels des pièces dans `figures/rebrickable/{set_id}/rare_parts/{part_num}.jpg`, en journalisant les manques dans `data/intermediate/part_rarity_download_log.csv`.
|
||||||
|
|
||||||
|
Le tracé `figures/step34/part_rarity.png` juxtapose, pour chaque pièce de `part_rarity_exclusive.csv`, les occurrences dans les sets filtrés vs le reste du catalogue avec les images incrustées.
|
||||||
|
|||||||
86
lib/plots/part_rarity.py
Normal file
86
lib/plots/part_rarity.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
"""Visualisation des pièces les plus rares observées dans les sets filtrés."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
|
||||||
|
|
||||||
|
def load_part_rarity(path: Path) -> List[dict]:
|
||||||
|
"""Charge le CSV des pièces rares."""
|
||||||
|
rows: List[dict] = []
|
||||||
|
with path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
rows.append(row)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def format_label(row: dict) -> str:
|
||||||
|
"""Formate l’étiquette de l’axe vertical."""
|
||||||
|
return f"{row['part_num']} — {row['part_name']}"
|
||||||
|
|
||||||
|
|
||||||
|
def load_part_image(row: dict, resources_dir: Path) -> Image.Image | None:
|
||||||
|
"""Charge l'image associée à une pièce si elle est disponible."""
|
||||||
|
path = resources_dir / row["sample_set_id"] / "rare_parts" / f"{row['part_num']}.jpg"
|
||||||
|
if not path.exists():
|
||||||
|
return None
|
||||||
|
return Image.open(path)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_part_rarity(
|
||||||
|
path: Path,
|
||||||
|
destination_path: Path,
|
||||||
|
resources_dir: Path = Path("figures/rebrickable"),
|
||||||
|
show_images: bool = True,
|
||||||
|
) -> None:
|
||||||
|
"""Trace un bar chart horizontal des pièces les plus rares avec leurs visuels."""
|
||||||
|
rows = load_part_rarity(path)
|
||||||
|
selected = rows
|
||||||
|
labels = [format_label(row) for row in selected]
|
||||||
|
filtered_counts = [int(row["filtered_quantity"]) for row in selected]
|
||||||
|
other_counts = [int(row["other_sets_quantity"]) for row in selected]
|
||||||
|
positions = list(range(len(selected)))
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(13, 0.55 * len(selected) + 1.4))
|
||||||
|
ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés")
|
||||||
|
ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets")
|
||||||
|
ax.set_yticks(positions)
|
||||||
|
ax.set_yticklabels(labels)
|
||||||
|
ax.set_xlabel("Occurrences de la pièce (rechanges incluses)")
|
||||||
|
ax.grid(axis="x", linestyle="--", alpha=0.4)
|
||||||
|
ax.legend()
|
||||||
|
|
||||||
|
if show_images:
|
||||||
|
max_count = max((f + o) for f, o in zip(filtered_counts, other_counts)) if selected else 0
|
||||||
|
pad = max_count * 0.15 if max_count > 0 else 1.0
|
||||||
|
ax.set_xlim(left=-pad, right=max_count + pad * 0.3)
|
||||||
|
for row, pos in zip(selected, positions):
|
||||||
|
image = load_part_image(row, resources_dir)
|
||||||
|
if image is None:
|
||||||
|
continue
|
||||||
|
target_height = 28
|
||||||
|
ratio = target_height / image.height
|
||||||
|
resized = image.resize((int(image.width * ratio), target_height))
|
||||||
|
imagebox = OffsetImage(resized)
|
||||||
|
ab = AnnotationBbox(
|
||||||
|
imagebox,
|
||||||
|
(-pad * 0.45, pos),
|
||||||
|
xycoords=("data", "data"),
|
||||||
|
box_alignment=(0.5, 0.5),
|
||||||
|
frameon=False,
|
||||||
|
)
|
||||||
|
ax.add_artist(ab)
|
||||||
|
|
||||||
|
fig.subplots_adjust(left=0.42)
|
||||||
|
fig.tight_layout()
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.savefig(destination_path, dpi=150)
|
||||||
|
plt.close(fig)
|
||||||
176
lib/rebrickable/part_rarity.py
Normal file
176
lib/rebrickable/part_rarity.py
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
"""Mesure la rareté des pièces présentes dans les sets filtrés."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, List, Sequence, Set
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.parts_inventory import index_inventory_parts_by_inventory, select_latest_inventories
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
IGNORED_PART_CATEGORY_IDS = {"28", "58", "74", "75"}
|
||||||
|
MINIFIG_PART_CATEGORY_IDS = {"13", "27", "59", "60", "61", "65", "70", "71", "72", "73"}
|
||||||
|
|
||||||
|
|
||||||
|
def load_parts_catalog(path: Path) -> Dict[str, dict]:
|
||||||
|
"""Indexe les pièces par référence avec leur catégorie et leur nom."""
|
||||||
|
catalog: Dict[str, dict] = {}
|
||||||
|
with path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
catalog[row["part_num"]] = row
|
||||||
|
return catalog
|
||||||
|
|
||||||
|
|
||||||
|
def load_part_categories(path: Path) -> Dict[str, str]:
|
||||||
|
"""Associe les identifiants de catégorie à leur libellé."""
|
||||||
|
categories: Dict[str, str] = {}
|
||||||
|
with path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
categories[row["id"]] = row["name"]
|
||||||
|
return categories
|
||||||
|
|
||||||
|
|
||||||
|
def load_filtered_sets(path: Path) -> Dict[str, dict]:
|
||||||
|
"""Charge les sets filtrés avec leurs métadonnées."""
|
||||||
|
lookup: Dict[str, dict] = {}
|
||||||
|
for row in read_rows(path):
|
||||||
|
lookup[row["set_num"]] = row
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_filtered_parts(
|
||||||
|
rows: Iterable[dict],
|
||||||
|
parts_catalog: Dict[str, dict],
|
||||||
|
ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS,
|
||||||
|
ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS,
|
||||||
|
) -> Dict[str, dict]:
|
||||||
|
"""Agrège les quantités par pièce pour les sets filtrés (rechanges incluses)."""
|
||||||
|
aggregated: Dict[str, dict] = {}
|
||||||
|
for row in rows:
|
||||||
|
if row["is_minifig_part"] == "true":
|
||||||
|
continue
|
||||||
|
part = parts_catalog[row["part_num"]]
|
||||||
|
if part["part_cat_id"] in ignored_categories:
|
||||||
|
continue
|
||||||
|
if part["part_cat_id"] in ignored_minifig_categories:
|
||||||
|
continue
|
||||||
|
entry = aggregated.get(row["part_num"])
|
||||||
|
if entry is None:
|
||||||
|
entry = {"quantity": 0, "set_numbers": set()}
|
||||||
|
aggregated[row["part_num"]] = entry
|
||||||
|
entry["quantity"] += int(row["quantity_in_set"])
|
||||||
|
entry["set_numbers"].add(row["set_num"])
|
||||||
|
return aggregated
|
||||||
|
|
||||||
|
|
||||||
|
def compute_other_set_usage(
|
||||||
|
inventories_path: Path,
|
||||||
|
inventory_parts_path: Path,
|
||||||
|
parts_catalog: Dict[str, dict],
|
||||||
|
filtered_set_numbers: Set[str],
|
||||||
|
ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS,
|
||||||
|
ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS,
|
||||||
|
) -> Dict[str, int]:
|
||||||
|
"""Compte les occurrences des pièces dans le reste du catalogue (rechanges incluses)."""
|
||||||
|
inventories = select_latest_inventories(inventories_path)
|
||||||
|
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
|
||||||
|
totals: Dict[str, int] = {}
|
||||||
|
for set_num, inventory in inventories.items():
|
||||||
|
if set_num in filtered_set_numbers:
|
||||||
|
continue
|
||||||
|
for row in parts_by_inventory.get(inventory["id"], []):
|
||||||
|
part = parts_catalog[row["part_num"]]
|
||||||
|
if part["part_cat_id"] in ignored_categories:
|
||||||
|
continue
|
||||||
|
if part["part_cat_id"] in ignored_minifig_categories:
|
||||||
|
continue
|
||||||
|
totals[row["part_num"]] = totals.get(row["part_num"], 0) + int(row["quantity"])
|
||||||
|
return totals
|
||||||
|
|
||||||
|
|
||||||
|
def build_part_rarity(
|
||||||
|
parts_filtered_path: Path,
|
||||||
|
inventories_path: Path,
|
||||||
|
inventory_parts_path: Path,
|
||||||
|
parts_catalog_path: Path,
|
||||||
|
part_categories_path: Path,
|
||||||
|
filtered_sets_path: Path,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Construit le classement de rareté des pièces filtrées."""
|
||||||
|
parts_catalog = load_parts_catalog(parts_catalog_path)
|
||||||
|
categories = load_part_categories(part_categories_path)
|
||||||
|
filtered_sets = load_filtered_sets(filtered_sets_path)
|
||||||
|
filtered_set_numbers = set(filtered_sets.keys())
|
||||||
|
filtered_rows = read_rows(parts_filtered_path)
|
||||||
|
filtered_usage = aggregate_filtered_parts(filtered_rows, parts_catalog)
|
||||||
|
other_usage = compute_other_set_usage(
|
||||||
|
inventories_path,
|
||||||
|
inventory_parts_path,
|
||||||
|
parts_catalog,
|
||||||
|
filtered_set_numbers,
|
||||||
|
)
|
||||||
|
rows: List[dict] = []
|
||||||
|
for part_num, entry in filtered_usage.items():
|
||||||
|
part = parts_catalog[part_num]
|
||||||
|
other_quantity = other_usage.get(part_num, 0)
|
||||||
|
total_quantity = entry["quantity"] + other_quantity
|
||||||
|
sample_set_num = sorted(entry["set_numbers"])[0]
|
||||||
|
sample_set_id = filtered_sets[sample_set_num]["set_id"]
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"part_num": part_num,
|
||||||
|
"part_name": part["name"],
|
||||||
|
"part_cat_id": part["part_cat_id"],
|
||||||
|
"part_category": categories[part["part_cat_id"]],
|
||||||
|
"sample_set_num": sample_set_num,
|
||||||
|
"sample_set_id": sample_set_id,
|
||||||
|
"filtered_quantity": str(entry["quantity"]),
|
||||||
|
"filtered_set_count": str(len(entry["set_numbers"])),
|
||||||
|
"other_sets_quantity": str(other_quantity),
|
||||||
|
"catalog_total_quantity": str(total_quantity),
|
||||||
|
"filtered_share": f"{entry['quantity'] / total_quantity:.4f}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
rows.sort(key=lambda row: (int(row["other_sets_quantity"]), int(row["catalog_total_quantity"]), row["part_num"]))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def write_part_rarity(destination_path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV complet des pièces classées par rareté."""
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fieldnames = [
|
||||||
|
"part_num",
|
||||||
|
"part_name",
|
||||||
|
"part_cat_id",
|
||||||
|
"part_category",
|
||||||
|
"sample_set_num",
|
||||||
|
"sample_set_id",
|
||||||
|
"filtered_quantity",
|
||||||
|
"filtered_set_count",
|
||||||
|
"other_sets_quantity",
|
||||||
|
"catalog_total_quantity",
|
||||||
|
"filtered_share",
|
||||||
|
]
|
||||||
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def select_until_reused(rows: Sequence[dict]) -> List[dict]:
|
||||||
|
"""Retient les pièces exclusives puis la première réutilisée dans d’autres sets."""
|
||||||
|
selected: List[dict] = []
|
||||||
|
for row in rows:
|
||||||
|
selected.append(row)
|
||||||
|
if int(row["other_sets_quantity"]) > 0:
|
||||||
|
break
|
||||||
|
return selected
|
||||||
|
|
||||||
|
|
||||||
|
def load_part_rarity(path: Path) -> List[dict]:
|
||||||
|
"""Charge le CSV de rareté des pièces."""
|
||||||
|
return read_rows(path)
|
||||||
34
scripts/compute_part_rarity.py
Normal file
34
scripts/compute_part_rarity.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
"""Calcule les pièces rares en comparant les sets filtrés au reste du catalogue."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.part_rarity import build_part_rarity, select_until_reused, write_part_rarity
|
||||||
|
|
||||||
|
|
||||||
|
PARTS_FILTERED_PATH = Path("data/intermediate/parts_filtered.csv")
|
||||||
|
INVENTORIES_PATH = Path("data/raw/inventories.csv")
|
||||||
|
INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
|
||||||
|
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
|
||||||
|
PART_CATEGORIES_PATH = Path("data/raw/part_categories.csv")
|
||||||
|
FILTERED_SETS_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
DESTINATION_PATH = Path("data/intermediate/part_rarity.csv")
|
||||||
|
TOP_DESTINATION_PATH = Path("data/intermediate/part_rarity_exclusive.csv")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit les CSV complets et top 10 des pièces les plus rares."""
|
||||||
|
rows = build_part_rarity(
|
||||||
|
PARTS_FILTERED_PATH,
|
||||||
|
INVENTORIES_PATH,
|
||||||
|
INVENTORY_PARTS_PATH,
|
||||||
|
PARTS_CATALOG_PATH,
|
||||||
|
PART_CATEGORIES_PATH,
|
||||||
|
FILTERED_SETS_PATH,
|
||||||
|
)
|
||||||
|
write_part_rarity(DESTINATION_PATH, rows)
|
||||||
|
top_rows = select_until_reused(rows)
|
||||||
|
write_part_rarity(TOP_DESTINATION_PATH, top_rows)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
63
scripts/download_part_rarity_resources.py
Normal file
63
scripts/download_part_rarity_resources.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
"""Télécharge les visuels des pièces les plus rares identifiées à l'étape 34."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from lib.rebrickable.part_rarity import load_part_rarity
|
||||||
|
from lib.rebrickable.resources import (
|
||||||
|
build_part_img_lookup,
|
||||||
|
download_binary,
|
||||||
|
download_resources,
|
||||||
|
fetch_part_img_url,
|
||||||
|
load_part_img_cache,
|
||||||
|
persist_part_img_cache,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
PART_RARITY_TOP_PATH = Path("data/intermediate/part_rarity_exclusive.csv")
|
||||||
|
RESOURCES_DIR = Path("figures/rebrickable")
|
||||||
|
PART_IMG_CACHE_PATH = Path("data/intermediate/part_img_cache.csv")
|
||||||
|
DOWNLOAD_LOG_PATH = Path("data/intermediate/part_rarity_download_log.csv")
|
||||||
|
REQUEST_DELAY_SECONDS_IMAGES = 0.35
|
||||||
|
REQUEST_DELAY_SECONDS_LOOKUP = 0.6
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit les URLs d'images des pièces rares et les télécharge."""
|
||||||
|
load_dotenv()
|
||||||
|
token = os.environ["REBRICKABLE_TOKEN"]
|
||||||
|
session = requests.Session()
|
||||||
|
|
||||||
|
rows = load_part_rarity(PART_RARITY_TOP_PATH)
|
||||||
|
cache = load_part_img_cache(PART_IMG_CACHE_PATH)
|
||||||
|
part_img_lookup = build_part_img_lookup(
|
||||||
|
{row["part_num"] for row in rows},
|
||||||
|
fetcher=lambda part_num: fetch_part_img_url(part_num, token, session),
|
||||||
|
cache_path=PART_IMG_CACHE_PATH,
|
||||||
|
existing_cache=cache,
|
||||||
|
delay_seconds=REQUEST_DELAY_SECONDS_LOOKUP,
|
||||||
|
)
|
||||||
|
if cache:
|
||||||
|
part_img_lookup.update(cache)
|
||||||
|
persist_part_img_cache(PART_IMG_CACHE_PATH, part_img_lookup)
|
||||||
|
|
||||||
|
plan = [
|
||||||
|
{
|
||||||
|
"url": part_img_lookup[row["part_num"]],
|
||||||
|
"path": RESOURCES_DIR / row["sample_set_id"] / "rare_parts" / f"{row['part_num']}.jpg",
|
||||||
|
}
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
download_resources(
|
||||||
|
plan,
|
||||||
|
downloader=lambda url, path: download_binary(url, path, session),
|
||||||
|
delay_seconds=REQUEST_DELAY_SECONDS_IMAGES,
|
||||||
|
log_path=DOWNLOAD_LOG_PATH,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
19
scripts/plot_part_rarity.py
Normal file
19
scripts/plot_part_rarity.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
"""Trace le diagramme des pièces rares pour l'étape 34."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.part_rarity import plot_part_rarity
|
||||||
|
|
||||||
|
|
||||||
|
PART_RARITY_TOP_PATH = Path("data/intermediate/part_rarity_exclusive.csv")
|
||||||
|
DESTINATION_PATH = Path("figures/step34/part_rarity.png")
|
||||||
|
RESOURCES_DIR = Path("figures/rebrickable")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Charge le top des pièces rares et produit le graphique illustré."""
|
||||||
|
plot_part_rarity(PART_RARITY_TOP_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
181
tests/test_part_rarity.py
Normal file
181
tests/test_part_rarity.py
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
"""Tests du calcul de rareté des pièces."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.part_rarity import build_part_rarity, select_until_reused, write_part_rarity
|
||||||
|
|
||||||
|
|
||||||
|
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
|
||||||
|
"""Écrit un CSV utilitaire pour les cas de test."""
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.writer(csv_file)
|
||||||
|
writer.writerow(headers)
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_part_rarity_counts_spares_and_ignores_categories(tmp_path: Path) -> None:
|
||||||
|
"""Comptabilise les pièces (rechanges incluses) et ignore les catégories animales."""
|
||||||
|
parts_filtered = tmp_path / "parts_filtered.csv"
|
||||||
|
write_csv(
|
||||||
|
parts_filtered,
|
||||||
|
[
|
||||||
|
"part_num",
|
||||||
|
"color_rgb",
|
||||||
|
"is_translucent",
|
||||||
|
"set_num",
|
||||||
|
"set_id",
|
||||||
|
"year",
|
||||||
|
"quantity_in_set",
|
||||||
|
"is_spare",
|
||||||
|
"is_minifig_part",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
["p1", "AAAAAA", "false", "1000-1", "1000", "2020", "2", "false", "false"],
|
||||||
|
["p1", "AAAAAA", "false", "2000-1", "2000", "2021", "1", "true", "false"],
|
||||||
|
["p2", "BBBBBB", "false", "1000-1", "1000", "2020", "5", "false", "false"],
|
||||||
|
["p4", "CCCCCC", "false", "2000-1", "2000", "2021", "2", "false", "false"],
|
||||||
|
["p6", "DDDDDD", "false", "2000-1", "2000", "2021", "1", "false", "false"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
sets_enriched = tmp_path / "sets_enriched.csv"
|
||||||
|
write_csv(
|
||||||
|
sets_enriched,
|
||||||
|
["set_num", "name", "year", "theme_id", "num_parts", "img_url", "set_id", "rebrickable_url", "in_collection"],
|
||||||
|
[
|
||||||
|
["1000-1", "Set A", "2020", "1", "10", "http://example.com", "1000", "http://example.com", "false"],
|
||||||
|
["2000-1", "Set B", "2021", "1", "10", "http://example.com", "2000", "http://example.com", "false"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
parts_catalog = tmp_path / "parts.csv"
|
||||||
|
write_csv(
|
||||||
|
parts_catalog,
|
||||||
|
["part_num", "name", "part_cat_id", "part_material"],
|
||||||
|
[
|
||||||
|
["p1", "Brick 1x1", "1", "Plastic"],
|
||||||
|
["p2", "Baby Dino", "28", "Plastic"],
|
||||||
|
["p3", "Raptor Body", "75", "Plastic"],
|
||||||
|
["p4", "Figure Limb", "41", "Plastic"],
|
||||||
|
["p5", "Sticker Sheet", "58", "Plastic"],
|
||||||
|
["p6", "Exclusive Tile", "1", "Plastic"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
part_categories = tmp_path / "part_categories.csv"
|
||||||
|
write_csv(
|
||||||
|
part_categories,
|
||||||
|
["id", "name"],
|
||||||
|
[
|
||||||
|
["1", "Bricks"],
|
||||||
|
["28", "Animals / Creatures"],
|
||||||
|
["41", "Large Buildable Figures"],
|
||||||
|
["75", "Animal / Creature Body Parts"],
|
||||||
|
["58", "Stickers"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
inventories = tmp_path / "inventories.csv"
|
||||||
|
write_csv(
|
||||||
|
inventories,
|
||||||
|
["id", "version", "set_num"],
|
||||||
|
[
|
||||||
|
["1", "1", "3000-1"],
|
||||||
|
["2", "2", "3000-1"],
|
||||||
|
["3", "1", "4000-1"],
|
||||||
|
["4", "1", "1000-1"],
|
||||||
|
["5", "1", "5000-1"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
inventory_parts = tmp_path / "inventory_parts.csv"
|
||||||
|
write_csv(
|
||||||
|
inventory_parts,
|
||||||
|
["inventory_id", "part_num", "color_id", "quantity", "is_spare", "img_url"],
|
||||||
|
[
|
||||||
|
["1", "p1", "1", "1", "False", ""],
|
||||||
|
["2", "p1", "1", "3", "False", ""],
|
||||||
|
["2", "p2", "1", "2", "False", ""],
|
||||||
|
["3", "p4", "1", "4", "True", ""],
|
||||||
|
["4", "p1", "1", "8", "False", ""],
|
||||||
|
["5", "p5", "1", "9", "False", ""],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = build_part_rarity(
|
||||||
|
parts_filtered,
|
||||||
|
inventories,
|
||||||
|
inventory_parts,
|
||||||
|
parts_catalog,
|
||||||
|
part_categories,
|
||||||
|
sets_enriched,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rows == [
|
||||||
|
{
|
||||||
|
"part_num": "p6",
|
||||||
|
"part_name": "Exclusive Tile",
|
||||||
|
"part_cat_id": "1",
|
||||||
|
"part_category": "Bricks",
|
||||||
|
"sample_set_num": "2000-1",
|
||||||
|
"sample_set_id": "2000",
|
||||||
|
"filtered_quantity": "1",
|
||||||
|
"filtered_set_count": "1",
|
||||||
|
"other_sets_quantity": "0",
|
||||||
|
"catalog_total_quantity": "1",
|
||||||
|
"filtered_share": "1.0000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"part_num": "p1",
|
||||||
|
"part_name": "Brick 1x1",
|
||||||
|
"part_cat_id": "1",
|
||||||
|
"part_category": "Bricks",
|
||||||
|
"sample_set_num": "1000-1",
|
||||||
|
"sample_set_id": "1000",
|
||||||
|
"filtered_quantity": "3",
|
||||||
|
"filtered_set_count": "2",
|
||||||
|
"other_sets_quantity": "3",
|
||||||
|
"catalog_total_quantity": "6",
|
||||||
|
"filtered_share": "0.5000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"part_num": "p4",
|
||||||
|
"part_name": "Figure Limb",
|
||||||
|
"part_cat_id": "41",
|
||||||
|
"part_category": "Large Buildable Figures",
|
||||||
|
"sample_set_num": "2000-1",
|
||||||
|
"sample_set_id": "2000",
|
||||||
|
"filtered_quantity": "2",
|
||||||
|
"filtered_set_count": "1",
|
||||||
|
"other_sets_quantity": "4",
|
||||||
|
"catalog_total_quantity": "6",
|
||||||
|
"filtered_share": "0.3333",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
assert select_until_reused(rows) == [rows[0], rows[1]]
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_part_rarity_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Sérialise le classement de rareté."""
|
||||||
|
destination = tmp_path / "part_rarity.csv"
|
||||||
|
rows = [
|
||||||
|
{
|
||||||
|
"part_num": "p1",
|
||||||
|
"part_name": "Brick 1x1",
|
||||||
|
"part_cat_id": "1",
|
||||||
|
"part_category": "Bricks",
|
||||||
|
"sample_set_num": "123-1",
|
||||||
|
"sample_set_id": "123",
|
||||||
|
"filtered_quantity": "3",
|
||||||
|
"filtered_set_count": "2",
|
||||||
|
"other_sets_quantity": "3",
|
||||||
|
"catalog_total_quantity": "6",
|
||||||
|
"filtered_share": "0.5000",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
write_part_rarity(destination, rows)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
content = destination.read_text().strip().splitlines()
|
||||||
|
assert content[0] == (
|
||||||
|
"part_num,part_name,part_cat_id,part_category,sample_set_num,sample_set_id,filtered_quantity,filtered_set_count,"
|
||||||
|
"other_sets_quantity,catalog_total_quantity,filtered_share"
|
||||||
|
)
|
||||||
|
assert content[1] == "p1,Brick 1x1,1,Bricks,123-1,123,3,2,3,6,0.5000"
|
||||||
31
tests/test_part_rarity_plot.py
Normal file
31
tests/test_part_rarity_plot.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
"""Tests des visualisations de rareté des pièces."""
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
from pathlib import Path
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from lib.plots.part_rarity import plot_part_rarity
|
||||||
|
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_part_rarity_with_images(tmp_path: Path) -> None:
|
||||||
|
"""Génère le graphique des pièces rares avec incrustation des visuels."""
|
||||||
|
data_path = tmp_path / "part_rarity_exclusive.csv"
|
||||||
|
destination = tmp_path / "figures" / "step34" / "part_rarity.png"
|
||||||
|
resources_dir = tmp_path / "figures" / "rebrickable"
|
||||||
|
(resources_dir / "1000" / "rare_parts").mkdir(parents=True)
|
||||||
|
(resources_dir / "2000" / "rare_parts").mkdir(parents=True)
|
||||||
|
Image.new("RGB", (50, 50), color=(255, 0, 0)).save(resources_dir / "1000" / "rare_parts" / "p1.jpg")
|
||||||
|
Image.new("RGB", (50, 50), color=(0, 255, 0)).save(resources_dir / "2000" / "rare_parts" / "p2.jpg")
|
||||||
|
data_path.write_text(
|
||||||
|
"part_num,part_name,part_cat_id,part_category,sample_set_num,sample_set_id,filtered_quantity,filtered_set_count,other_sets_quantity,catalog_total_quantity,filtered_share\n"
|
||||||
|
"p1,Brick 1x1,1,Bricks,1000-1,1000,3,2,0,3,1.0000\n"
|
||||||
|
"p2,Plate 1x2,1,Bricks,2000-1,2000,2,1,1,3,0.6667\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_part_rarity(data_path, destination, resources_dir=resources_dir, show_images=True)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
Loading…
x
Reference in New Issue
Block a user