"""Télécharge les visuels des pièces les plus rares identifiées à l'étape 34.""" import os from pathlib import Path import requests from dotenv import load_dotenv from lib.rebrickable.part_rarity import load_part_rarity from lib.rebrickable.resources import ( build_part_img_lookup, download_binary, download_resources, fetch_part_img_url, load_part_img_cache, persist_part_img_cache, ) PART_RARITY_TOP_PATH = Path("data/intermediate/part_rarity_exclusive.csv") RESOURCES_DIR = Path("figures/rebrickable") PART_IMG_CACHE_PATH = Path("data/intermediate/part_img_cache.csv") DOWNLOAD_LOG_PATH = Path("data/intermediate/part_rarity_download_log.csv") REQUEST_DELAY_SECONDS_IMAGES = 0.35 REQUEST_DELAY_SECONDS_LOOKUP = 0.6 def main() -> None: """Construit les URLs d'images des pièces rares et les télécharge.""" load_dotenv() token = os.environ["REBRICKABLE_TOKEN"] session = requests.Session() rows = load_part_rarity(PART_RARITY_TOP_PATH) cache = load_part_img_cache(PART_IMG_CACHE_PATH) part_img_lookup = build_part_img_lookup( {row["part_num"] for row in rows}, fetcher=lambda part_num: fetch_part_img_url(part_num, token, session), cache_path=PART_IMG_CACHE_PATH, existing_cache=cache, delay_seconds=REQUEST_DELAY_SECONDS_LOOKUP, ) if cache: part_img_lookup.update(cache) persist_part_img_cache(PART_IMG_CACHE_PATH, part_img_lookup) plan = [ { "url": part_img_lookup[row["part_num"]], "path": RESOURCES_DIR / row["sample_set_id"] / "rare_parts" / f"{row['part_num']}.jpg", } for row in rows ] download_resources( plan, downloader=lambda url, path: download_binary(url, path, session), delay_seconds=REQUEST_DELAY_SECONDS_IMAGES, log_path=DOWNLOAD_LOG_PATH, ) if __name__ == "__main__": main()