1
etude_lego_jurassic_world/scripts/download_part_rarity_resources.py

64 lines
1.9 KiB
Python

"""Télécharge les visuels des pièces les plus rares identifiées à l'étape 34."""
import os
from pathlib import Path
import requests
from dotenv import load_dotenv
from lib.rebrickable.part_rarity import load_part_rarity
from lib.rebrickable.resources import (
build_part_img_lookup,
download_binary,
download_resources,
fetch_part_img_url,
load_part_img_cache,
persist_part_img_cache,
)
PART_RARITY_TOP_PATH = Path("data/intermediate/part_rarity_exclusive.csv")
RESOURCES_DIR = Path("figures/rebrickable")
PART_IMG_CACHE_PATH = Path("data/intermediate/part_img_cache.csv")
DOWNLOAD_LOG_PATH = Path("data/intermediate/part_rarity_download_log.csv")
REQUEST_DELAY_SECONDS_IMAGES = 0.35
REQUEST_DELAY_SECONDS_LOOKUP = 0.6
def main() -> None:
"""Construit les URLs d'images des pièces rares et les télécharge."""
load_dotenv()
token = os.environ["REBRICKABLE_TOKEN"]
session = requests.Session()
rows = load_part_rarity(PART_RARITY_TOP_PATH)
cache = load_part_img_cache(PART_IMG_CACHE_PATH)
part_img_lookup = build_part_img_lookup(
{row["part_num"] for row in rows},
fetcher=lambda part_num: fetch_part_img_url(part_num, token, session),
cache_path=PART_IMG_CACHE_PATH,
existing_cache=cache,
delay_seconds=REQUEST_DELAY_SECONDS_LOOKUP,
)
if cache:
part_img_lookup.update(cache)
persist_part_img_cache(PART_IMG_CACHE_PATH, part_img_lookup)
plan = [
{
"url": part_img_lookup[row["part_num"]],
"path": RESOURCES_DIR / row["sample_set_id"] / "rare_parts" / f"{row['part_num']}.jpg",
}
for row in rows
]
download_resources(
plan,
downloader=lambda url, path: download_binary(url, path, session),
delay_seconds=REQUEST_DELAY_SECONDS_IMAGES,
log_path=DOWNLOAD_LOG_PATH,
)
if __name__ == "__main__":
main()