1

Exclure les pièces de minifigs et intégrer les visuels de rareté

This commit is contained in:
2025-12-03 11:34:05 +01:00
parent 2f1344a4c5
commit 8abb8a0304
8 changed files with 606 additions and 0 deletions

86
lib/plots/part_rarity.py Normal file
View File

@@ -0,0 +1,86 @@
"""Visualisation des pièces les plus rares observées dans les sets filtrés."""
import csv
from pathlib import Path
from typing import List
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from PIL import Image
from lib.filesystem import ensure_parent_dir
def load_part_rarity(path: Path) -> List[dict]:
"""Charge le CSV des pièces rares."""
rows: List[dict] = []
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
rows.append(row)
return rows
def format_label(row: dict) -> str:
"""Formate létiquette de laxe vertical."""
return f"{row['part_num']}{row['part_name']}"
def load_part_image(row: dict, resources_dir: Path) -> Image.Image | None:
"""Charge l'image associée à une pièce si elle est disponible."""
path = resources_dir / row["sample_set_id"] / "rare_parts" / f"{row['part_num']}.jpg"
if not path.exists():
return None
return Image.open(path)
def plot_part_rarity(
path: Path,
destination_path: Path,
resources_dir: Path = Path("figures/rebrickable"),
show_images: bool = True,
) -> None:
"""Trace un bar chart horizontal des pièces les plus rares avec leurs visuels."""
rows = load_part_rarity(path)
selected = rows
labels = [format_label(row) for row in selected]
filtered_counts = [int(row["filtered_quantity"]) for row in selected]
other_counts = [int(row["other_sets_quantity"]) for row in selected]
positions = list(range(len(selected)))
fig, ax = plt.subplots(figsize=(13, 0.55 * len(selected) + 1.4))
ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés")
ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets")
ax.set_yticks(positions)
ax.set_yticklabels(labels)
ax.set_xlabel("Occurrences de la pièce (rechanges incluses)")
ax.grid(axis="x", linestyle="--", alpha=0.4)
ax.legend()
if show_images:
max_count = max((f + o) for f, o in zip(filtered_counts, other_counts)) if selected else 0
pad = max_count * 0.15 if max_count > 0 else 1.0
ax.set_xlim(left=-pad, right=max_count + pad * 0.3)
for row, pos in zip(selected, positions):
image = load_part_image(row, resources_dir)
if image is None:
continue
target_height = 28
ratio = target_height / image.height
resized = image.resize((int(image.width * ratio), target_height))
imagebox = OffsetImage(resized)
ab = AnnotationBbox(
imagebox,
(-pad * 0.45, pos),
xycoords=("data", "data"),
box_alignment=(0.5, 0.5),
frameon=False,
)
ax.add_artist(ab)
fig.subplots_adjust(left=0.42)
fig.tight_layout()
ensure_parent_dir(destination_path)
fig.savefig(destination_path, dpi=150)
plt.close(fig)

View File

@@ -0,0 +1,176 @@
"""Mesure la rareté des pièces présentes dans les sets filtrés."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set
from lib.filesystem import ensure_parent_dir
from lib.rebrickable.parts_inventory import index_inventory_parts_by_inventory, select_latest_inventories
from lib.rebrickable.stats import read_rows
IGNORED_PART_CATEGORY_IDS = {"28", "58", "74", "75"}
MINIFIG_PART_CATEGORY_IDS = {"13", "27", "59", "60", "61", "65", "70", "71", "72", "73"}
def load_parts_catalog(path: Path) -> Dict[str, dict]:
"""Indexe les pièces par référence avec leur catégorie et leur nom."""
catalog: Dict[str, dict] = {}
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
catalog[row["part_num"]] = row
return catalog
def load_part_categories(path: Path) -> Dict[str, str]:
"""Associe les identifiants de catégorie à leur libellé."""
categories: Dict[str, str] = {}
with path.open() as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
categories[row["id"]] = row["name"]
return categories
def load_filtered_sets(path: Path) -> Dict[str, dict]:
"""Charge les sets filtrés avec leurs métadonnées."""
lookup: Dict[str, dict] = {}
for row in read_rows(path):
lookup[row["set_num"]] = row
return lookup
def aggregate_filtered_parts(
rows: Iterable[dict],
parts_catalog: Dict[str, dict],
ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS,
ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS,
) -> Dict[str, dict]:
"""Agrège les quantités par pièce pour les sets filtrés (rechanges incluses)."""
aggregated: Dict[str, dict] = {}
for row in rows:
if row["is_minifig_part"] == "true":
continue
part = parts_catalog[row["part_num"]]
if part["part_cat_id"] in ignored_categories:
continue
if part["part_cat_id"] in ignored_minifig_categories:
continue
entry = aggregated.get(row["part_num"])
if entry is None:
entry = {"quantity": 0, "set_numbers": set()}
aggregated[row["part_num"]] = entry
entry["quantity"] += int(row["quantity_in_set"])
entry["set_numbers"].add(row["set_num"])
return aggregated
def compute_other_set_usage(
inventories_path: Path,
inventory_parts_path: Path,
parts_catalog: Dict[str, dict],
filtered_set_numbers: Set[str],
ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS,
ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS,
) -> Dict[str, int]:
"""Compte les occurrences des pièces dans le reste du catalogue (rechanges incluses)."""
inventories = select_latest_inventories(inventories_path)
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
totals: Dict[str, int] = {}
for set_num, inventory in inventories.items():
if set_num in filtered_set_numbers:
continue
for row in parts_by_inventory.get(inventory["id"], []):
part = parts_catalog[row["part_num"]]
if part["part_cat_id"] in ignored_categories:
continue
if part["part_cat_id"] in ignored_minifig_categories:
continue
totals[row["part_num"]] = totals.get(row["part_num"], 0) + int(row["quantity"])
return totals
def build_part_rarity(
parts_filtered_path: Path,
inventories_path: Path,
inventory_parts_path: Path,
parts_catalog_path: Path,
part_categories_path: Path,
filtered_sets_path: Path,
) -> List[dict]:
"""Construit le classement de rareté des pièces filtrées."""
parts_catalog = load_parts_catalog(parts_catalog_path)
categories = load_part_categories(part_categories_path)
filtered_sets = load_filtered_sets(filtered_sets_path)
filtered_set_numbers = set(filtered_sets.keys())
filtered_rows = read_rows(parts_filtered_path)
filtered_usage = aggregate_filtered_parts(filtered_rows, parts_catalog)
other_usage = compute_other_set_usage(
inventories_path,
inventory_parts_path,
parts_catalog,
filtered_set_numbers,
)
rows: List[dict] = []
for part_num, entry in filtered_usage.items():
part = parts_catalog[part_num]
other_quantity = other_usage.get(part_num, 0)
total_quantity = entry["quantity"] + other_quantity
sample_set_num = sorted(entry["set_numbers"])[0]
sample_set_id = filtered_sets[sample_set_num]["set_id"]
rows.append(
{
"part_num": part_num,
"part_name": part["name"],
"part_cat_id": part["part_cat_id"],
"part_category": categories[part["part_cat_id"]],
"sample_set_num": sample_set_num,
"sample_set_id": sample_set_id,
"filtered_quantity": str(entry["quantity"]),
"filtered_set_count": str(len(entry["set_numbers"])),
"other_sets_quantity": str(other_quantity),
"catalog_total_quantity": str(total_quantity),
"filtered_share": f"{entry['quantity'] / total_quantity:.4f}",
}
)
rows.sort(key=lambda row: (int(row["other_sets_quantity"]), int(row["catalog_total_quantity"]), row["part_num"]))
return rows
def write_part_rarity(destination_path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV complet des pièces classées par rareté."""
ensure_parent_dir(destination_path)
fieldnames = [
"part_num",
"part_name",
"part_cat_id",
"part_category",
"sample_set_num",
"sample_set_id",
"filtered_quantity",
"filtered_set_count",
"other_sets_quantity",
"catalog_total_quantity",
"filtered_share",
]
with destination_path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def select_until_reused(rows: Sequence[dict]) -> List[dict]:
"""Retient les pièces exclusives puis la première réutilisée dans dautres sets."""
selected: List[dict] = []
for row in rows:
selected.append(row)
if int(row["other_sets_quantity"]) > 0:
break
return selected
def load_part_rarity(path: Path) -> List[dict]:
"""Charge le CSV de rareté des pièces."""
return read_rows(path)