Ajoute l’étape 26 pièces/minifigs

2025-12-02 14:24:16 +01:00
parent 4001205af1
commit 4ee9b38fbb
7 changed files with 318 additions and 0 deletions
--- a/lib/plots/minifig_parts_correlation.py
+++ b/lib/plots/minifig_parts_correlation.py
@@ -0,0 +1,85 @@
+"""Diagramme de corrélation entre pièces et minifigs par set."""
+
+from pathlib import Path
+from typing import Iterable, Tuple
+
+import matplotlib.pyplot as plt
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.stats import read_rows
+
+
+def load_points(path: Path, scope: str) -> Tuple[list[int], list[int]]:
+    """Charge les points (x=num_parts, y=minifig_count) pour un scope donné."""
+    rows = read_rows(path)
+    xs: list[int] = []
+    ys: list[int] = []
+    for row in rows:
+        if row["scope"] != scope:
+            continue
+        xs.append(int(row["num_parts"]))
+        ys.append(int(row["minifig_count"]))
+    return xs, ys
+
+
+def compute_regression(points: Iterable[Tuple[int, int]]) -> Tuple[float, float]:
+    """Calcule une régression linéaire simple (pente, ordonnée à l'origine)."""
+    xs = [x for x, _ in points]
+    ys = [y for _, y in points]
+    n = len(xs)
+    mean_x = sum(xs) / n
+    mean_y = sum(ys) / n
+    numerator = 0.0
+    denominator = 0.0
+    for x, y in points:
+        dx = x - mean_x
+        dy = y - mean_y
+        numerator += dx * dy
+        denominator += dx * dx
+    slope = numerator / denominator if denominator != 0 else 0.0
+    intercept = mean_y - slope * mean_x
+    return slope, intercept
+
+
+def plot_minifig_parts_correlation(correlation_path: Path, destination_path: Path) -> None:
+    """Trace la corrélation pièces/minifigs pour les sets filtrés vs catalogue global."""
+    filtered_x, filtered_y = load_points(correlation_path, "filtered")
+    catalog_x, catalog_y = load_points(correlation_path, "catalog")
+    filtered_points = list(zip(filtered_x, filtered_y))
+    catalog_points = list(zip(catalog_x, catalog_y))
+    if not filtered_points or not catalog_points:
+        return
+    filtered_slope, filtered_intercept = compute_regression(filtered_points)
+    catalog_slope, catalog_intercept = compute_regression(catalog_points)
+    x_min = min(min(filtered_x), min(catalog_x))
+    x_max = max(max(filtered_x), max(catalog_x))
+
+    fig, ax = plt.subplots(figsize=(10, 7))
+    ax.scatter(catalog_x, catalog_y, color="#bbbbbb", alpha=0.25, s=18, label="Catalogue global")
+    ax.scatter(filtered_x, filtered_y, color="#1f77b4", alpha=0.8, s=28, label="Thèmes filtrés")
+    ax.plot(
+        [x_min, x_max],
+        [catalog_slope * x_min + catalog_intercept, catalog_slope * x_max + catalog_intercept],
+        color="#555555",
+        linestyle="--",
+        linewidth=1.4,
+        label=f"Tendance globale (pente {catalog_slope:.3f})",
+    )
+    ax.plot(
+        [x_min, x_max],
+        [filtered_slope * x_min + filtered_intercept, filtered_slope * x_max + filtered_intercept],
+        color="#1f77b4",
+        linestyle="-",
+        linewidth=1.6,
+        label=f"Tendance thèmes filtrés (pente {filtered_slope:.3f})",
+    )
+    ax.set_xlabel("Nombre de pièces du set")
+    ax.set_ylabel("Nombre de minifigs")
+    ax.set_title("Corrélation pièces / minifigs")
+    ax.grid(True, linestyle="--", alpha=0.3)
+    ax.legend(loc="upper left")
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=160)
+    plt.close(fig)
--- a/lib/rebrickable/minifig_parts_correlation.py
+++ b/lib/rebrickable/minifig_parts_correlation.py
@@ -0,0 +1,96 @@
+"""Prépare les données de corrélation pièces/minifigs par set."""
+
+import csv
+from pathlib import Path
+from typing import Dict, List, Sequence
+
+from lib.filesystem import ensure_parent_dir
+from lib.rebrickable.parts_inventory import index_inventory_minifigs_by_inventory, select_latest_inventories
+from lib.rebrickable.stats import read_rows
+
+
+def load_minifig_counts_by_set(path: Path) -> Dict[str, int]:
+    """Indexe le nombre de minifigs par set filtré."""
+    lookup: Dict[str, int] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            lookup[row["set_num"]] = int(row["minifig_count"])
+    return lookup
+
+
+def load_num_parts(path: Path) -> Dict[str, int]:
+    """Indexe le nombre de pièces par set."""
+    lookup: Dict[str, int] = {}
+    with path.open() as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            lookup[row["set_num"]] = int(row["num_parts"])
+    return lookup
+
+
+def build_global_minifig_counts(inventories_path: Path, inventory_minifigs_path: Path) -> Dict[str, int]:
+    """Calcule le nombre de minifigs par set pour le catalogue complet."""
+    inventories = select_latest_inventories(inventories_path)
+    minifigs_by_inventory = index_inventory_minifigs_by_inventory(inventory_minifigs_path)
+    counts: Dict[str, int] = {}
+    for set_num, inventory in inventories.items():
+        total = 0
+        for row in minifigs_by_inventory.get(inventory["id"], []):
+            total += int(row["quantity"])
+        counts[set_num] = total
+    return counts
+
+
+def build_correlation_rows(
+    filtered_counts_path: Path,
+    filtered_sets_path: Path,
+    all_sets_path: Path,
+    inventories_path: Path,
+    inventory_minifigs_path: Path,
+) -> List[dict]:
+    """Construit les lignes de corrélation pièces/minifigs pour sets filtrés et catalogue."""
+    filtered_counts = load_minifig_counts_by_set(filtered_counts_path)
+    filtered_parts = load_num_parts(filtered_sets_path)
+    rows: List[dict] = []
+    for set_num, minifig_count in filtered_counts.items():
+        num_parts = filtered_parts[set_num]
+        rows.append(
+            {
+                "scope": "filtered",
+                "set_num": set_num,
+                "num_parts": str(num_parts),
+                "minifig_count": str(minifig_count),
+            }
+        )
+    global_parts = load_num_parts(all_sets_path)
+    global_minifigs = build_global_minifig_counts(inventories_path, inventory_minifigs_path)
+    for set_num, num_parts in global_parts.items():
+        if num_parts <= 0:
+            continue
+        minifig_count = global_minifigs.get(set_num, 0)
+        rows.append(
+            {
+                "scope": "catalog",
+                "set_num": set_num,
+                "num_parts": str(num_parts),
+                "minifig_count": str(minifig_count),
+            }
+        )
+    return rows
+
+
+def write_correlation_rows(path: Path, rows: Sequence[dict]) -> None:
+    """Écrit les lignes de corrélation pièces/minifigs."""
+    ensure_parent_dir(path)
+    fieldnames = ["scope", "set_num", "num_parts", "minifig_count"]
+    with path.open("w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow(row)
+
+
+def load_correlation_rows(path: Path) -> List[dict]:
+    """Charge le CSV de corrélation pièces/minifigs."""
+    return read_rows(path)