You've already forked etude_lego_jurassic_world
Premiers éléments de l'étude
This commit is contained in:
1
lib/rebrickable/__init__.py
Normal file
1
lib/rebrickable/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Fonctionnalités liées aux données Rebrickable."""
|
||||
47
lib/rebrickable/downloader.py
Normal file
47
lib/rebrickable/downloader.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Outils de téléchargement pour les fichiers fournis par Rebrickable."""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List
|
||||
import gzip
|
||||
import shutil
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
REBRICKABLE_BASE_URL = "https://cdn.rebrickable.com/media/downloads/"
|
||||
CHUNK_SIZE = 8192
|
||||
CACHE_TTL = 7
|
||||
|
||||
|
||||
def build_rebrickable_url(file_name: str) -> str:
|
||||
"""Construit l'URL complète d'un fichier Rebrickable à partir de son nom."""
|
||||
return f"{REBRICKABLE_BASE_URL}{file_name}"
|
||||
|
||||
|
||||
def download_rebrickable_file(file_name: str, destination_dir: Path) -> Path:
|
||||
"""Télécharge un fichier Rebrickable, le décompresse et supprime l'archive."""
|
||||
target_path = destination_dir / file_name
|
||||
destination_dir.mkdir(parents=True, exist_ok=True)
|
||||
decompressed_path = target_path.with_suffix("")
|
||||
if decompressed_path.exists():
|
||||
cache_age = datetime.now() - datetime.fromtimestamp(decompressed_path.stat().st_mtime)
|
||||
if cache_age <= timedelta(days=CACHE_TTL):
|
||||
if target_path.exists():
|
||||
target_path.unlink()
|
||||
return decompressed_path
|
||||
response = requests.get(build_rebrickable_url(file_name), stream=True)
|
||||
response.raise_for_status()
|
||||
with target_path.open("wb") as target_file:
|
||||
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
|
||||
target_file.write(chunk)
|
||||
with gzip.open(target_path, "rb") as compressed_file:
|
||||
with decompressed_path.open("wb") as decompressed_file:
|
||||
shutil.copyfileobj(compressed_file, decompressed_file)
|
||||
target_path.unlink()
|
||||
return decompressed_path
|
||||
|
||||
|
||||
def download_rebrickable_files(file_names: Iterable[str], destination_dir: Path) -> List[Path]:
|
||||
"""Télécharge en série plusieurs fichiers compressés fournis par Rebrickable."""
|
||||
return [download_rebrickable_file(file_name, destination_dir) for file_name in file_names]
|
||||
86
lib/rebrickable/enrich_sets.py
Normal file
86
lib/rebrickable/enrich_sets.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Enrichissement des sets LEGO avec des métadonnées Rebrickable et personnelles."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Set
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
REBRICKABLE_SET_BASE_URL = "https://rebrickable.com/sets/"
|
||||
|
||||
|
||||
def extract_set_id(set_num: str) -> str:
|
||||
"""Extrait l'identifiant LEGO (partie avant la révision) depuis set_num."""
|
||||
return set_num.split("-", 1)[0]
|
||||
|
||||
|
||||
def build_rebrickable_set_url(set_num: str) -> str:
|
||||
"""Construit l'URL publique Rebrickable d'un set."""
|
||||
return f"{REBRICKABLE_SET_BASE_URL}{set_num}"
|
||||
|
||||
|
||||
def parse_set_collection_root(raw_value: str) -> Path | None:
|
||||
"""Prépare le chemin de collection, ou None si aucune collection n'est fournie."""
|
||||
cleaned = raw_value.strip()
|
||||
if not cleaned:
|
||||
print("La variable MY_SETS est vide, aucun set en collection.")
|
||||
return None
|
||||
return Path(cleaned)
|
||||
|
||||
|
||||
def load_owned_set_ids(collection_root: Path) -> Set[str]:
|
||||
"""Retourne l'ensemble des identifiants de sets présents dans un dossier de collection."""
|
||||
if not collection_root.exists():
|
||||
print(f"Le dossier {collection_root} n'existe pas, aucun set en collection.")
|
||||
return set()
|
||||
if not collection_root.is_dir():
|
||||
print(f"Le chemin {collection_root} n'est pas un dossier, aucun set en collection.")
|
||||
return set()
|
||||
entries = [path for path in collection_root.iterdir() if path.is_dir()]
|
||||
if not entries:
|
||||
print(f"Le dossier {collection_root} est vide, aucun set en collection.")
|
||||
return set()
|
||||
return {entry.name for entry in entries}
|
||||
|
||||
|
||||
def enrich_sets(
|
||||
source_path: Path,
|
||||
destination_path: Path,
|
||||
owned_set_ids: Iterable[str],
|
||||
) -> None:
|
||||
"""Ajoute les colonnes set_id, rebrickable_url et in_collection au catalogue filtré."""
|
||||
ensure_parent_dir(destination_path)
|
||||
owned_lookup = set(owned_set_ids)
|
||||
with source_path.open() as source_file, destination_path.open("w", newline="") as target_file:
|
||||
reader = csv.DictReader(source_file)
|
||||
fieldnames = reader.fieldnames + ["set_id", "rebrickable_url", "in_collection"]
|
||||
writer = csv.DictWriter(target_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in reader:
|
||||
set_id = extract_set_id(row["set_num"])
|
||||
writer.writerow(
|
||||
{
|
||||
**row,
|
||||
"set_id": set_id,
|
||||
"rebrickable_url": build_rebrickable_set_url(row["set_num"]),
|
||||
"in_collection": str(set_id in owned_lookup).lower(),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def write_missing_sets_markdown(enriched_path: Path, destination_path: Path) -> None:
|
||||
"""Génère un tableau Markdown listant les sets non possédés."""
|
||||
with enriched_path.open() as source_file:
|
||||
reader = csv.DictReader(source_file)
|
||||
rows = [
|
||||
row
|
||||
for row in reader
|
||||
if row["in_collection"] == "false"
|
||||
]
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w") as target_file:
|
||||
target_file.write("| set_id | year | name |\n")
|
||||
target_file.write("| --- | --- | --- |\n")
|
||||
for row in rows:
|
||||
link = f"[{row['set_id']}]({row['rebrickable_url']})"
|
||||
target_file.write(f"| {link} | {row['year']} | {row['name']} |\n")
|
||||
41
lib/rebrickable/filter_sets.py
Normal file
41
lib/rebrickable/filter_sets.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""Filtrage des sets LEGO par identifiants de thèmes Rebrickable."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
def parse_theme_ids(raw_value: str) -> List[str]:
|
||||
"""Extrait les identifiants de thèmes depuis une chaîne séparée par des virgules."""
|
||||
values = [value.strip() for value in raw_value.split(",") if value.strip()]
|
||||
if not values:
|
||||
raise ValueError("Au moins un identifiant de thème est requis.")
|
||||
return values
|
||||
|
||||
|
||||
def filter_sets_by_theme(
|
||||
source_path: Path,
|
||||
destination_path: Path,
|
||||
theme_ids: Iterable[str],
|
||||
overrides_path: Path,
|
||||
) -> None:
|
||||
"""Filtre le catalogue des sets en conservant uniquement les thèmes ciblés avec pièces."""
|
||||
ensure_parent_dir(destination_path)
|
||||
allowed_ids = set(theme_ids)
|
||||
overrides = load_num_parts_overrides(overrides_path)
|
||||
with source_path.open() as source_file, destination_path.open("w", newline="") as target_file:
|
||||
reader = csv.DictReader(source_file)
|
||||
writer = csv.DictWriter(target_file, fieldnames=reader.fieldnames)
|
||||
writer.writeheader()
|
||||
for row in reader:
|
||||
if row["theme_id"] in allowed_ids and int(row["num_parts"]) > 0:
|
||||
override = overrides.get(row["set_num"])
|
||||
writer.writerow({**row, "num_parts": override if override is not None else row["num_parts"]})
|
||||
|
||||
|
||||
def load_num_parts_overrides(overrides_path: Path) -> Dict[str, str]:
|
||||
"""Charge les corrections de nombre de pièces par set."""
|
||||
with overrides_path.open() as overrides_file:
|
||||
reader = csv.DictReader(overrides_file)
|
||||
return {row["set_num"]: row["num_parts"] for row in reader}
|
||||
107
lib/rebrickable/inventory_reconciliation.py
Normal file
107
lib/rebrickable/inventory_reconciliation.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""Rapport des écarts entre catalogue et inventaire agrégé."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
def load_sets(sets_path: Path) -> List[dict]:
|
||||
"""Charge les sets filtrés pour l'analyse."""
|
||||
with sets_path.open() as sets_file:
|
||||
reader = csv.DictReader(sets_file)
|
||||
return list(reader)
|
||||
|
||||
|
||||
def index_sets_by_num(sets: Iterable[dict]) -> Dict[str, dict]:
|
||||
"""Crée un index des sets par numéro complet."""
|
||||
return {row["set_num"]: row for row in sets}
|
||||
|
||||
|
||||
def compute_inventory_totals(parts_path: Path, include_spares: bool) -> Dict[str, int]:
|
||||
"""Calcule le total de pièces par set, avec ou sans rechanges."""
|
||||
totals: Dict[str, int] = {}
|
||||
with parts_path.open() as parts_file:
|
||||
reader = csv.DictReader(parts_file)
|
||||
for row in reader:
|
||||
if not include_spares and row["is_spare"] == "true":
|
||||
continue
|
||||
set_num = row["set_num"]
|
||||
totals[set_num] = totals.get(set_num, 0) + int(row["quantity_in_set"])
|
||||
return totals
|
||||
|
||||
|
||||
def compute_inventory_gaps(sets_path: Path, parts_path: Path) -> List[dict]:
|
||||
"""Liste les sets dont le total de pièces diffère du catalogue."""
|
||||
sets = load_sets(sets_path)
|
||||
totals_with_spares = compute_inventory_totals(parts_path, include_spares=True)
|
||||
totals_without_spares = compute_inventory_totals(parts_path, include_spares=False)
|
||||
gaps: List[dict] = []
|
||||
for set_row in sets:
|
||||
expected_parts = int(set_row["num_parts"])
|
||||
inventory_parts_with_spares = totals_with_spares[set_row["set_num"]]
|
||||
inventory_parts_non_spare = totals_without_spares[set_row["set_num"]]
|
||||
if expected_parts != inventory_parts_with_spares:
|
||||
gaps.append(
|
||||
{
|
||||
"set_num": set_row["set_num"],
|
||||
"set_id": set_row["set_id"],
|
||||
"expected_parts": expected_parts,
|
||||
"inventory_parts": inventory_parts_with_spares,
|
||||
"inventory_parts_non_spare": inventory_parts_non_spare,
|
||||
"delta": abs(expected_parts - inventory_parts_with_spares),
|
||||
"delta_non_spare": abs(expected_parts - inventory_parts_non_spare),
|
||||
"in_collection": set_row["in_collection"],
|
||||
}
|
||||
)
|
||||
return gaps
|
||||
|
||||
|
||||
def write_inventory_gaps_csv(destination_path: Path, gaps: Iterable[dict]) -> None:
|
||||
"""Écrit un CSV listant les sets en écart d'inventaire."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
fieldnames = [
|
||||
"set_num",
|
||||
"set_id",
|
||||
"expected_parts",
|
||||
"inventory_parts",
|
||||
"inventory_parts_non_spare",
|
||||
"delta",
|
||||
"delta_non_spare",
|
||||
"in_collection",
|
||||
]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in gaps:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def build_instructions_url(set_id: str) -> str:
|
||||
"""Construit un lien direct vers la page d'instructions LEGO du set."""
|
||||
return f"https://www.lego.com/service/buildinginstructions/{set_id}"
|
||||
|
||||
|
||||
def write_inventory_gaps_markdown(
|
||||
destination_path: Path,
|
||||
gaps: Iterable[dict],
|
||||
sets_by_num: Dict[str, dict],
|
||||
) -> None:
|
||||
"""Génère un tableau Markdown listant les sets en écart d'inventaire."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w") as markdown_file:
|
||||
markdown_file.write(
|
||||
"| set_id | name | year | delta (spares inclus) | delta (spares exclus) | expected_parts | inventory_parts | inventory_parts_non_spare | in_collection | instructions |\n"
|
||||
)
|
||||
markdown_file.write("| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n")
|
||||
for row in gaps:
|
||||
if row["delta_non_spare"] == 0:
|
||||
continue
|
||||
set_row = sets_by_num[row["set_num"]]
|
||||
set_link = f"[{row['set_id']}]({set_row['rebrickable_url']})"
|
||||
instructions_link = f"[PDF]({build_instructions_url(row['set_id'])})"
|
||||
markdown_file.write(
|
||||
f"| {set_link} | {set_row['name']} | {set_row['year']} | {row['delta']} | {row['delta_non_spare']} | "
|
||||
f"{row['expected_parts']} | {row['inventory_parts']} | {row['inventory_parts_non_spare']} | "
|
||||
f"{row['in_collection']} | {instructions_link} |\n"
|
||||
)
|
||||
143
lib/rebrickable/parts_inventory.py
Normal file
143
lib/rebrickable/parts_inventory.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""Construction d'un inventaire détaillé des pièces par set."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
|
||||
def normalize_boolean(raw_value: str) -> str:
|
||||
"""Normalise une valeur booléenne en chaîne lowercase."""
|
||||
return raw_value.lower()
|
||||
|
||||
|
||||
def select_latest_inventories(inventories_path: Path) -> Dict[str, dict]:
|
||||
"""Retient pour chaque set l'inventaire avec la version la plus élevée."""
|
||||
latest_inventories: Dict[str, dict] = {}
|
||||
with inventories_path.open() as inventories_file:
|
||||
reader = csv.DictReader(inventories_file)
|
||||
for row in reader:
|
||||
current = latest_inventories.get(row["set_num"])
|
||||
if current is None or int(row["version"]) > int(current["version"]):
|
||||
latest_inventories[row["set_num"]] = {"id": row["id"], "version": row["version"]}
|
||||
return latest_inventories
|
||||
|
||||
|
||||
def build_color_lookup(colors_path: Path) -> Dict[str, dict]:
|
||||
"""Construit un index des couleurs par identifiant."""
|
||||
colors: Dict[str, dict] = {}
|
||||
with colors_path.open() as colors_file:
|
||||
reader = csv.DictReader(colors_file)
|
||||
for row in reader:
|
||||
colors[row["id"]] = {
|
||||
"rgb": row["rgb"],
|
||||
"is_translucent": normalize_boolean(row["is_trans"]),
|
||||
}
|
||||
return colors
|
||||
|
||||
|
||||
def index_inventory_parts_by_inventory(inventory_parts_path: Path) -> Dict[str, List[dict]]:
|
||||
"""Indexe les lignes d'inventaire par identifiant d'inventaire."""
|
||||
parts_by_inventory: Dict[str, List[dict]] = {}
|
||||
with inventory_parts_path.open() as parts_file:
|
||||
reader = csv.DictReader(parts_file)
|
||||
for row in reader:
|
||||
inventory_id = row["inventory_id"]
|
||||
if inventory_id not in parts_by_inventory:
|
||||
parts_by_inventory[inventory_id] = []
|
||||
parts_by_inventory[inventory_id].append(row)
|
||||
return parts_by_inventory
|
||||
|
||||
|
||||
def index_inventory_minifigs_by_inventory(inventory_minifigs_path: Path) -> Dict[str, List[dict]]:
|
||||
"""Indexe les minifigs par inventaire."""
|
||||
minifigs_by_inventory: Dict[str, List[dict]] = {}
|
||||
with inventory_minifigs_path.open() as minifigs_file:
|
||||
reader = csv.DictReader(minifigs_file)
|
||||
for row in reader:
|
||||
inventory_id = row["inventory_id"]
|
||||
if inventory_id not in minifigs_by_inventory:
|
||||
minifigs_by_inventory[inventory_id] = []
|
||||
minifigs_by_inventory[inventory_id].append(row)
|
||||
return minifigs_by_inventory
|
||||
|
||||
|
||||
def build_minifig_lookup(minifigs_path: Path) -> Dict[str, dict]:
|
||||
"""Construit un index des minifigs avec leur nombre de pièces."""
|
||||
minifigs: Dict[str, dict] = {}
|
||||
with minifigs_path.open() as minifigs_file:
|
||||
reader = csv.DictReader(minifigs_file)
|
||||
for row in reader:
|
||||
minifigs[row["fig_num"]] = row
|
||||
return minifigs
|
||||
|
||||
|
||||
def write_parts_filtered(
|
||||
sets_path: Path,
|
||||
inventories_path: Path,
|
||||
inventory_parts_path: Path,
|
||||
colors_path: Path,
|
||||
inventory_minifigs_path: Path,
|
||||
minifigs_path: Path,
|
||||
destination_path: Path,
|
||||
) -> None:
|
||||
"""Assemble un CSV agrégé listant les pièces par set et par couleur."""
|
||||
latest_inventories = select_latest_inventories(inventories_path)
|
||||
parts_by_inventory = index_inventory_parts_by_inventory(inventory_parts_path)
|
||||
minifigs_by_inventory = index_inventory_minifigs_by_inventory(inventory_minifigs_path)
|
||||
minifigs = build_minifig_lookup(minifigs_path)
|
||||
colors = build_color_lookup(colors_path)
|
||||
ensure_parent_dir(destination_path)
|
||||
with sets_path.open() as sets_file, destination_path.open("w", newline="") as target_file:
|
||||
sets_reader = csv.DictReader(sets_file)
|
||||
fieldnames = [
|
||||
"part_num",
|
||||
"color_rgb",
|
||||
"is_translucent",
|
||||
"set_num",
|
||||
"set_id",
|
||||
"quantity_in_set",
|
||||
"is_spare",
|
||||
]
|
||||
writer = csv.DictWriter(target_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for set_row in sets_reader:
|
||||
inventory = latest_inventories[set_row["set_num"]]
|
||||
inventory_parts = parts_by_inventory[inventory["id"]]
|
||||
inventory_total_non_spare = sum(
|
||||
int(part_row["quantity"])
|
||||
for part_row in inventory_parts
|
||||
if normalize_boolean(part_row["is_spare"]) == "false"
|
||||
)
|
||||
expected_parts = int(set_row["num_parts"])
|
||||
for part_row in inventory_parts:
|
||||
color = colors[part_row["color_id"]]
|
||||
writer.writerow(
|
||||
{
|
||||
"part_num": part_row["part_num"],
|
||||
"color_rgb": color["rgb"],
|
||||
"is_translucent": color["is_translucent"],
|
||||
"set_num": set_row["set_num"],
|
||||
"set_id": set_row["set_id"],
|
||||
"quantity_in_set": part_row["quantity"],
|
||||
"is_spare": normalize_boolean(part_row["is_spare"]),
|
||||
}
|
||||
)
|
||||
if inventory_total_non_spare < expected_parts:
|
||||
for minifig_row in minifigs_by_inventory.get(inventory["id"], []):
|
||||
minifig_inventory = latest_inventories[minifig_row["fig_num"]]
|
||||
minifig_parts = parts_by_inventory[minifig_inventory["id"]]
|
||||
for part_row in minifig_parts:
|
||||
color = colors[part_row["color_id"]]
|
||||
writer.writerow(
|
||||
{
|
||||
"part_num": part_row["part_num"],
|
||||
"color_rgb": color["rgb"],
|
||||
"is_translucent": color["is_translucent"],
|
||||
"set_num": set_row["set_num"],
|
||||
"set_id": set_row["set_id"],
|
||||
"quantity_in_set": str(int(part_row["quantity"]) * int(minifig_row["quantity"])),
|
||||
"is_spare": normalize_boolean(part_row["is_spare"]),
|
||||
}
|
||||
)
|
||||
101
lib/rebrickable/parts_stats.py
Normal file
101
lib/rebrickable/parts_stats.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Calculs de statistiques simples sur les pièces filtrées."""
|
||||
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Sequence, Tuple
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.rebrickable.inventory_reconciliation import compute_inventory_gaps
|
||||
from lib.rebrickable.stats import read_rows as read_stats_rows
|
||||
|
||||
|
||||
def read_rows(path: Path) -> List[dict]:
|
||||
"""Charge un fichier CSV en mémoire sous forme de dictionnaires."""
|
||||
with path.open() as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
return list(reader)
|
||||
|
||||
|
||||
def select_non_spare_parts(rows: Iterable[dict]) -> List[dict]:
|
||||
"""Filtre les pièces en excluant les rechanges."""
|
||||
return [row for row in rows if row["is_spare"] == "false"]
|
||||
|
||||
|
||||
def variation_key(row: dict) -> Tuple[str, str, str]:
|
||||
"""Clé d'unicité pour une variation de pièce (référence + couleur)."""
|
||||
return (row["part_num"], row["color_rgb"], row["is_translucent"])
|
||||
|
||||
|
||||
def color_key(row: dict) -> Tuple[str, str]:
|
||||
"""Clé d'unicité pour une couleur."""
|
||||
return (row["color_rgb"], row["is_translucent"])
|
||||
|
||||
|
||||
def aggregate_quantities_by_variation(rows: Iterable[dict]) -> Dict[Tuple[str, str, str], int]:
|
||||
"""Calcule la quantité totale par variation de pièce (hors rechanges)."""
|
||||
quantities: Dict[Tuple[str, str, str], int] = defaultdict(int)
|
||||
for row in rows:
|
||||
quantities[variation_key(row)] += int(row["quantity_in_set"])
|
||||
return quantities
|
||||
|
||||
|
||||
def read_total_filtered_parts(stats_path: Path) -> int:
|
||||
"""Lit le total de pièces attendu pour les thèmes filtrés depuis stats.csv."""
|
||||
rows = read_stats_rows(stats_path)
|
||||
return int(
|
||||
next(row["valeur"] for row in rows if row["libelle"] == "Total de pièces pour les thèmes filtrés")
|
||||
)
|
||||
|
||||
|
||||
def build_stats(
|
||||
rows: Iterable[dict],
|
||||
sets_path: Path,
|
||||
parts_path: Path,
|
||||
stats_path: Path,
|
||||
) -> List[Tuple[str, str]]:
|
||||
"""Construit les statistiques principales sur les pièces filtrées et les écarts d'inventaire."""
|
||||
non_spares = select_non_spare_parts(rows)
|
||||
quantities = aggregate_quantities_by_variation(non_spares)
|
||||
total_variations = len(quantities)
|
||||
color_set = {color_key(row) for row in non_spares}
|
||||
least_used_key = min(quantities, key=quantities.get)
|
||||
most_used_key = max(quantities, key=quantities.get)
|
||||
least_used = quantities[least_used_key]
|
||||
most_used = quantities[most_used_key]
|
||||
total_non_spare = sum(quantities.values())
|
||||
gaps = compute_inventory_gaps(sets_path, parts_path)
|
||||
gap_count = len(gaps)
|
||||
worst_gap = max(gaps, key=lambda gap: gap["delta"]) if gap_count > 0 else {"set_id": "none", "delta": 0}
|
||||
catalog_total_parts = read_total_filtered_parts(stats_path)
|
||||
catalog_inventory_delta = catalog_total_parts - total_non_spare
|
||||
|
||||
return [
|
||||
("Total de variations de pièces (hors rechanges)", str(total_variations)),
|
||||
(
|
||||
"Pièce la moins utilisée (référence + couleur)",
|
||||
f"{least_used_key[0]} / {least_used_key[1]} / {least_used_key[2]} ({least_used})",
|
||||
),
|
||||
(
|
||||
"Pièce la plus commune (référence + couleur)",
|
||||
f"{most_used_key[0]} / {most_used_key[1]} / {most_used_key[2]} ({most_used})",
|
||||
),
|
||||
("Total de couleurs utilisées (hors rechanges)", str(len(color_set))),
|
||||
("Total de pièces hors rechanges", str(total_non_spare)),
|
||||
(
|
||||
"Ecart total catalogue (stats) - inventaire (hors rechanges)",
|
||||
str(catalog_inventory_delta),
|
||||
),
|
||||
("Nombre de sets en écart inventaire/catalogue", str(gap_count)),
|
||||
("Ecart maximal inventaire/catalogue", f"{worst_gap['set_id']} ({worst_gap['delta']})"),
|
||||
]
|
||||
|
||||
|
||||
def write_parts_stats(destination_path: Path, stats: Sequence[Tuple[str, str]]) -> None:
|
||||
"""Écrit les statistiques dans un CSV à deux colonnes."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
writer = csv.writer(csv_file)
|
||||
writer.writerow(["libelle", "valeur"])
|
||||
for label, value in stats:
|
||||
writer.writerow([label, value])
|
||||
122
lib/rebrickable/stats.py
Normal file
122
lib/rebrickable/stats.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Calcul des statistiques de base sur les sets LEGO filtrés."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Sequence, Tuple
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
def read_rows(path: Path) -> List[dict]:
|
||||
"""Charge un fichier CSV en mémoire sous forme de dictionnaires."""
|
||||
with path.open() as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
return list(reader)
|
||||
|
||||
|
||||
def write_stats_csv(destination_path: Path, stats: Sequence[Tuple[str, str]]) -> None:
|
||||
"""Écrit les statistiques dans un CSV à deux colonnes."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
writer = csv.writer(csv_file)
|
||||
writer.writerow(["libelle", "valeur"])
|
||||
for label, value in stats:
|
||||
writer.writerow([label, value])
|
||||
|
||||
|
||||
def compute_median(values: List[int]) -> float:
|
||||
"""Calcule la médiane d'une liste de valeurs entières."""
|
||||
sorted_values = sorted(values)
|
||||
middle = len(sorted_values) // 2
|
||||
if len(sorted_values) % 2 == 1:
|
||||
return float(sorted_values[middle])
|
||||
return (sorted_values[middle - 1] + sorted_values[middle]) / 2
|
||||
|
||||
|
||||
def compute_basic_stats(
|
||||
themes: Iterable[dict],
|
||||
all_sets: Iterable[dict],
|
||||
filtered_sets: Iterable[dict],
|
||||
enriched_sets: Iterable[dict],
|
||||
) -> List[Tuple[str, str]]:
|
||||
"""Calcule les statistiques principales à partir des sets chargés."""
|
||||
themes_list = list(themes)
|
||||
all_sets_list = list(all_sets)
|
||||
filtered_sets_list = list(filtered_sets)
|
||||
enriched_sets_list = list(enriched_sets)
|
||||
|
||||
theme_count_total = len(themes_list)
|
||||
total_sets = len(all_sets_list)
|
||||
filtered_sets_count = len(filtered_sets_list)
|
||||
avg_sets_per_theme = total_sets / theme_count_total
|
||||
percent_filtered = (filtered_sets_count / total_sets) * 100
|
||||
owned_sets_count = sum(1 for row in enriched_sets_list if row["in_collection"] == "true")
|
||||
missing_sets_count = sum(1 for row in enriched_sets_list if row["in_collection"] == "false")
|
||||
percent_owned = (owned_sets_count / filtered_sets_count) * 100
|
||||
parts_per_set = [int(row["num_parts"]) for row in filtered_sets_list]
|
||||
avg_parts_per_set = sum(parts_per_set) / filtered_sets_count
|
||||
median_parts_per_set = compute_median(parts_per_set)
|
||||
years = [int(row["year"]) for row in filtered_sets_list]
|
||||
avg_sets_per_year = filtered_sets_count / len(set(years))
|
||||
total_parts = sum(parts_per_set)
|
||||
theme_ids_filtered = {row["theme_id"] for row in filtered_sets_list}
|
||||
min_year = str(min(years))
|
||||
max_year = str(max(years))
|
||||
year_counts = {}
|
||||
for year in years:
|
||||
year_counts[year] = year_counts.get(year, 0) + 1
|
||||
prolific_year, prolific_count = max(year_counts.items(), key=lambda item: (item[1], -item[0]))
|
||||
richest_set = max(filtered_sets_list, key=lambda row: int(row["num_parts"]))
|
||||
lightest_set = min(filtered_sets_list, key=lambda row: int(row["num_parts"]))
|
||||
oldest_set = min(filtered_sets_list, key=lambda row: (int(row["year"]), row["set_num"]))
|
||||
latest_set = max(filtered_sets_list, key=lambda row: (int(row["year"]), row["set_num"]))
|
||||
owned_parts = [int(row["num_parts"]) for row in enriched_sets_list if row["in_collection"] == "true"]
|
||||
missing_parts = [int(row["num_parts"]) for row in enriched_sets_list if row["in_collection"] == "false"]
|
||||
avg_parts_owned = sum(owned_parts) / len(owned_parts)
|
||||
avg_parts_missing = sum(missing_parts) / len(missing_parts)
|
||||
total_parts_owned = sum(owned_parts)
|
||||
percent_parts_owned = (total_parts_owned / total_parts) * 100
|
||||
|
||||
return [
|
||||
("Nombre total de sets (catalogue complet)", str(total_sets)),
|
||||
("Nombre total de thèmes (catalogue complet)", str(theme_count_total)),
|
||||
("Nombre de sets après filtrage (thèmes ciblés)", str(filtered_sets_count)),
|
||||
("Nombre moyen de sets par thème (catalogue complet)", f"{avg_sets_per_theme:.2f}"),
|
||||
("Pourcentage des sets filtrés vs total", f"{percent_filtered:.2f}%"),
|
||||
("Taux de possession (thèmes filtrés)", f"{percent_owned:.2f}%"),
|
||||
("Sets dans la collection", str(owned_sets_count)),
|
||||
("Sets manquants pour la collection", str(missing_sets_count)),
|
||||
("Nombre moyen de pièces par set (thèmes filtrés)", f"{avg_parts_per_set:.2f}"),
|
||||
("Médiane de pièces par set (thèmes filtrés)", f"{median_parts_per_set:.2f}"),
|
||||
("Nombre moyen de sets commercialisés par an (thèmes filtrés)", f"{avg_sets_per_year:.2f}"),
|
||||
("Total de pièces pour les thèmes filtrés", str(total_parts)),
|
||||
("Total de pièces des sets possédés", str(total_parts_owned)),
|
||||
("Pourcentage de pièces possédées (thèmes filtrés)", f"{percent_parts_owned:.2f}%"),
|
||||
("Nombre de thèmes filtrés", str(len(theme_ids_filtered))),
|
||||
("Première année de sortie (thèmes filtrés)", min_year),
|
||||
("Dernière année de sortie (thèmes filtrés)", max_year),
|
||||
("Année la plus prolifique (thèmes filtrés)", f"{prolific_year} ({prolific_count} sets)"),
|
||||
(
|
||||
"Set avec le plus de pièces (thèmes filtrés)",
|
||||
f"{richest_set['set_num']} - {richest_set['name']} ({richest_set['num_parts']} pièces)",
|
||||
),
|
||||
(
|
||||
"Set avec le moins de pièces (thèmes filtrés)",
|
||||
f"{lightest_set['set_num']} - {lightest_set['name']} ({lightest_set['num_parts']} pièces)",
|
||||
),
|
||||
(
|
||||
"Set le plus ancien (thèmes filtrés)",
|
||||
f"{oldest_set['set_num']} - {oldest_set['name']} ({oldest_set['year']})",
|
||||
),
|
||||
(
|
||||
"Set le plus récent (thèmes filtrés)",
|
||||
f"{latest_set['set_num']} - {latest_set['name']} ({latest_set['year']})",
|
||||
),
|
||||
(
|
||||
"Nombre moyen de pièces des sets possédés",
|
||||
f"{avg_parts_owned:.2f}",
|
||||
),
|
||||
(
|
||||
"Nombre moyen de pièces des sets manquants",
|
||||
f"{avg_parts_missing:.2f}",
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user