etude_lego_jurassic_world/lib/rebrickable/downloader.py

"""Outils de téléchargement pour les fichiers fournis par Rebrickable."""

from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable, List
import gzip
import shutil

import requests


REBRICKABLE_BASE_URL = "https://cdn.rebrickable.com/media/downloads/"
CHUNK_SIZE = 8192
CACHE_TTL = 7


def build_rebrickable_url(file_name: str) -> str:
    """Construit l'URL complète d'un fichier Rebrickable à partir de son nom."""
    return f"{REBRICKABLE_BASE_URL}{file_name}"


def download_rebrickable_file(file_name: str, destination_dir: Path) -> Path:
    """Télécharge un fichier Rebrickable, le décompresse et supprime l'archive."""
    target_path = destination_dir / file_name
    destination_dir.mkdir(parents=True, exist_ok=True)
    decompressed_path = target_path.with_suffix("")
    if decompressed_path.exists():
        cache_age = datetime.now() - datetime.fromtimestamp(decompressed_path.stat().st_mtime)
        if cache_age <= timedelta(days=CACHE_TTL):
            if target_path.exists():
                target_path.unlink()
            return decompressed_path
    response = requests.get(build_rebrickable_url(file_name), stream=True)
    response.raise_for_status()
    with target_path.open("wb") as target_file:
        for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
            target_file.write(chunk)
    with gzip.open(target_path, "rb") as compressed_file:
        with decompressed_path.open("wb") as decompressed_file:
            shutil.copyfileobj(compressed_file, decompressed_file)
    target_path.unlink()
    return decompressed_path


def download_rebrickable_files(file_names: Iterable[str], destination_dir: Path) -> List[Path]:
    """Télécharge en série plusieurs fichiers compressés fournis par Rebrickable."""
    return [download_rebrickable_file(file_name, destination_dir) for file_name in file_names]