etude_lego_jurassic_world/lib/rebrickable/rare_parts.py

"""Identification des pièces rares (variantes exclusives à un set)."""

import csv
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Set, Tuple

from lib.filesystem import ensure_parent_dir
from lib.rebrickable.stats import read_rows


def load_parts_catalog(path: Path) -> Dict[str, dict]:
    """Charge le catalogue des pièces et l'indexe par référence."""
    catalog: Dict[str, dict] = {}
    with path.open() as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            catalog[row["part_num"]] = row
    return catalog


def load_colors_lookup(path: Path) -> Dict[Tuple[str, str], str]:
    """Associe un couple (rgb, is_trans) au nom de couleur."""
    lookup: Dict[Tuple[str, str], str] = {}
    for row in read_rows(path):
        lookup[(row["rgb"], row["is_trans"].lower())] = row["name"]
    return lookup


def load_sets_enriched(path: Path) -> Dict[str, dict]:
    """Indexe les sets enrichis par numéro complet."""
    sets: Dict[str, dict] = {}
    for row in read_rows(path):
        sets[row["set_num"]] = row
    return sets


def aggregate_non_spare_parts(rows: Iterable[dict]) -> List[dict]:
    """Agrège les pièces hors rechanges par set et variation couleur."""
    aggregated: Dict[Tuple[str, str, str, str, str, str, str], int] = {}
    for row in rows:
        if row["is_spare"] == "true":
            continue
        key = (
            row["set_num"],
            row["part_num"],
            row["color_rgb"],
            row["is_translucent"],
            row["is_minifig_part"],
            row["set_id"],
            row["year"],
        )
        aggregated[key] = aggregated.get(key, 0) + int(row["quantity_in_set"])
    result: List[dict] = []
    for key, quantity in aggregated.items():
        set_num, part_num, color_rgb, is_translucent, is_minifig_part, set_id, year = key
        result.append(
            {
                "set_num": set_num,
                "part_num": part_num,
                "color_rgb": color_rgb,
                "is_translucent": is_translucent,
                "is_minifig_part": is_minifig_part,
                "set_id": set_id,
                "year": year,
                "quantity_in_set": str(quantity),
            }
        )
    result.sort(key=lambda row: (row["set_num"], row["part_num"], row["color_rgb"]))
    return result


def compute_combo_set_counts(rows: Iterable[dict]) -> Dict[Tuple[str, str, str], Set[str]]:
    """Compte les sets distincts par combinaison pièce+couleur."""
    combos: Dict[Tuple[str, str, str], Set[str]] = {}
    for row in rows:
        key = (row["part_num"], row["color_rgb"], row["is_translucent"])
        if key not in combos:
            combos[key] = set()
        combos[key].add(row["set_num"])
    return combos


def build_rare_parts(
    parts_filtered_path: Path,
    sets_enriched_path: Path,
    parts_catalog_path: Path,
    colors_path: Path,
) -> Tuple[List[dict], List[dict]]:
    """Construit les listes des pièces rares et leur répartition par set."""
    parts_rows = read_rows(parts_filtered_path)
    aggregated = aggregate_non_spare_parts(parts_rows)
    combo_sets = compute_combo_set_counts(aggregated)
    parts_catalog = load_parts_catalog(parts_catalog_path)
    color_names = load_colors_lookup(colors_path)
    sets_lookup = load_sets_enriched(sets_enriched_path)

    rare_parts: List[dict] = []
    for row in aggregated:
        combo_key = (row["part_num"], row["color_rgb"], row["is_translucent"])
        if len(combo_sets[combo_key]) != 1:
            continue
        set_row = sets_lookup[row["set_num"]]
        part = parts_catalog[row["part_num"]]
        color_name = color_names[(row["color_rgb"], row["is_translucent"])]
        rare_parts.append(
            {
                "set_num": row["set_num"],
                "set_id": row["set_id"],
                "set_name": set_row["name"],
                "year": set_row["year"],
                "part_num": row["part_num"],
                "part_name": part["name"],
                "part_cat_id": part["part_cat_id"],
                "color_rgb": row["color_rgb"],
                "color_name": color_name,
                "is_translucent": row["is_translucent"],
                "is_minifig_part": row["is_minifig_part"],
                "quantity_in_set": row["quantity_in_set"],
                "in_collection": set_row["in_collection"],
            }
        )
    rare_parts.sort(key=lambda row: (row["set_num"], row["part_num"], row["color_rgb"]))

    rare_by_set: Dict[str, dict] = {}
    for row in rare_parts:
        record = rare_by_set.get(row["set_num"])
        if record is None:
            record = {
                "set_num": row["set_num"],
                "set_id": row["set_id"],
                "name": row["set_name"],
                "year": row["year"],
                "in_collection": row["in_collection"],
                "rare_parts_distinct": 0,
                "rare_parts_quantity": 0,
                "rare_minifig_parts_distinct": 0,
                "rare_minifig_quantity": 0,
            }
            rare_by_set[row["set_num"]] = record
        record["rare_parts_distinct"] += 1
        record["rare_parts_quantity"] += int(row["quantity_in_set"])
        if row["is_minifig_part"] == "true":
            record["rare_minifig_parts_distinct"] += 1
            record["rare_minifig_quantity"] += int(row["quantity_in_set"])
    rare_by_set_rows = list(rare_by_set.values())
    rare_by_set_rows.sort(
        key=lambda row: (
            -row["rare_parts_distinct"],
            -row["rare_parts_quantity"],
            row["set_num"],
        )
    )
    for row in rare_by_set_rows:
        row["rare_parts_distinct"] = str(row["rare_parts_distinct"])
        row["rare_parts_quantity"] = str(row["rare_parts_quantity"])
        row["rare_minifig_parts_distinct"] = str(row["rare_minifig_parts_distinct"])
        row["rare_minifig_quantity"] = str(row["rare_minifig_quantity"])
    return rare_parts, rare_by_set_rows


def write_rare_parts_list(destination_path: Path, rows: Sequence[dict]) -> None:
    """Écrit le détail des pièces rares avec leur set et leur couleur."""
    ensure_parent_dir(destination_path)
    fieldnames = [
        "set_num",
        "set_id",
        "set_name",
        "year",
        "part_num",
        "part_name",
        "part_cat_id",
        "color_rgb",
        "color_name",
        "is_translucent",
        "is_minifig_part",
        "quantity_in_set",
        "in_collection",
    ]
    with destination_path.open("w", newline="") as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)


def write_rare_parts_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
    """Écrit l'agrégat des pièces rares par set."""
    ensure_parent_dir(destination_path)
    fieldnames = [
        "set_num",
        "set_id",
        "name",
        "year",
        "in_collection",
        "rare_parts_distinct",
        "rare_parts_quantity",
        "rare_minifig_parts_distinct",
        "rare_minifig_quantity",
    ]
    with destination_path.open("w", newline="") as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)