"""Télécharge les ressources (sets, minifigs, têtes) pour les sets filtrés.""" import os from pathlib import Path import requests from dotenv import load_dotenv from lib.rebrickable.resources import ( add_part_img_urls, build_download_plan, build_part_img_lookup, download_binary, download_resources, fetch_part_img_url, load_minifigs_by_set, load_minifigs_catalog, load_sets_enriched, load_part_img_cache, persist_part_img_cache, write_minifigs_by_set_with_images, ) SETS_PATH = Path("data/intermediate/sets_enriched.csv") MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") MINIFIGS_CATALOG_PATH = Path("data/raw/minifigs.csv") RESOURCES_DIR = Path("figures/rebrickable") REQUEST_DELAY_SECONDS_IMAGES = 0.35 PART_IMG_CACHE_PATH = Path("data/intermediate/part_img_cache.csv") DOWNLOAD_LOG_PATH = Path("data/intermediate/resources_download_log.csv") def main() -> None: """Construit les URLs manquantes et télécharge les images associées.""" load_dotenv() token = os.environ["REBRICKABLE_TOKEN"] session = requests.Session() sets = load_sets_enriched(SETS_PATH) minifigs_by_set = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) minifigs_catalog = load_minifigs_catalog(MINIFIGS_CATALOG_PATH) cache = load_part_img_cache(PART_IMG_CACHE_PATH) missing_part_numbers = { row["part_num"] for row in minifigs_by_set if row.get("part_img_url", "").strip() == "" and row["part_num"] not in cache } part_img_lookup = build_part_img_lookup( missing_part_numbers, fetcher=lambda part_num: fetch_part_img_url(part_num, token, session), cache_path=PART_IMG_CACHE_PATH, existing_cache=cache, ) if cache: part_img_lookup.update(cache) persist_part_img_cache(PART_IMG_CACHE_PATH, part_img_lookup) minifigs_with_imgs = add_part_img_urls(minifigs_by_set, part_img_lookup) write_minifigs_by_set_with_images(MINIFIGS_BY_SET_PATH, minifigs_with_imgs) plan = build_download_plan(sets, minifigs_with_imgs, minifigs_catalog, RESOURCES_DIR) download_resources( plan, downloader=lambda url, path: download_binary(url, path, session), delay_seconds=REQUEST_DELAY_SECONDS_IMAGES, log_path=DOWNLOAD_LOG_PATH, ) if __name__ == "__main__": main()