etude_lego_jurassic_world/lib/rebrickable/enrich_sets.py

"""Enrichissement des sets LEGO avec des métadonnées Rebrickable et personnelles."""

import csv
from pathlib import Path
from typing import Iterable, Set

from lib.filesystem import ensure_parent_dir

REBRICKABLE_SET_BASE_URL = "https://rebrickable.com/sets/"


def extract_set_id(set_num: str) -> str:
    """Extrait l'identifiant LEGO (partie avant la révision) depuis set_num."""
    return set_num.split("-", 1)[0]


def build_rebrickable_set_url(set_num: str) -> str:
    """Construit l'URL publique Rebrickable d'un set."""
    return f"{REBRICKABLE_SET_BASE_URL}{set_num}"


def parse_set_collection_root(raw_value: str) -> Path | None:
    """Prépare le chemin de collection, ou None si aucune collection n'est fournie."""
    cleaned = raw_value.strip()
    if not cleaned:
        print("La variable MY_SETS est vide, aucun set en collection.")
        return None
    return Path(cleaned)


def load_owned_set_ids(collection_root: Path) -> Set[str]:
    """Retourne l'ensemble des identifiants de sets présents dans un dossier de collection."""
    if not collection_root.exists():
        print(f"Le dossier {collection_root} n'existe pas, aucun set en collection.")
        return set()
    if not collection_root.is_dir():
        print(f"Le chemin {collection_root} n'est pas un dossier, aucun set en collection.")
        return set()
    entries = [path for path in collection_root.iterdir() if path.is_dir()]
    if not entries:
        print(f"Le dossier {collection_root} est vide, aucun set en collection.")
        return set()
    return {entry.name for entry in entries}


def enrich_sets(
    source_path: Path,
    destination_path: Path,
    owned_set_ids: Iterable[str],
) -> None:
    """Ajoute les colonnes set_id, rebrickable_url et in_collection au catalogue filtré."""
    ensure_parent_dir(destination_path)
    owned_lookup = set(owned_set_ids)
    with source_path.open() as source_file, destination_path.open("w", newline="") as target_file:
        reader = csv.DictReader(source_file)
        fieldnames = reader.fieldnames + ["set_id", "rebrickable_url", "in_collection"]
        writer = csv.DictWriter(target_file, fieldnames=fieldnames)
        writer.writeheader()
        for row in reader:
            set_id = extract_set_id(row["set_num"])
            writer.writerow(
                {
                    **row,
                    "set_id": set_id,
                    "rebrickable_url": build_rebrickable_set_url(row["set_num"]),
                    "in_collection": str(set_id in owned_lookup).lower(),
                }
            )


def write_missing_sets_markdown(enriched_path: Path, destination_path: Path) -> None:
    """Génère un tableau Markdown listant les sets non possédés."""
    with enriched_path.open() as source_file:
        reader = csv.DictReader(source_file)
        rows = [
            row
            for row in reader
            if row["in_collection"] == "false"
        ]
    ensure_parent_dir(destination_path)
    with destination_path.open("w") as target_file:
        target_file.write("| set_id | year | name |\n")
        target_file.write("| --- | --- | --- |\n")
        for row in rows:
            link = f"[{row['set_id']}]({row['rebrickable_url']})"
            target_file.write(f"| {link} | {row['year']} | {row['name']} |\n")