Premiers éléments de l'étude

2025-12-01 21:57:05 +01:00
commit 22b4dae0ba
46 changed files with 2595 additions and 0 deletions
--- a/lib/plots/init.py
+++ b/lib/plots/init.py
@@ -0,0 +1 @@
+"""Utilitaires de visualisation des données LEGO."""
--- a/lib/plots/colors_grid.py
+++ b/lib/plots/colors_grid.py
@@ -0,0 +1,174 @@
+"""Visualisation des couleurs utilisées dans l'inventaire filtré."""
+
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.lines import Line2D
+
+from lib.filesystem import ensure_parent_dir
+from lib.color_sort import lab_sort_key, sort_hex_colors_lab
+from lib.rebrickable.parts_inventory import normalize_boolean
+from lib.rebrickable.stats import read_rows
+
+
+def sort_colors_perceptually(colors: Iterable[dict]) -> List[dict]:
+    """Trie les couleurs via l'espace Lab (teinte perçue, chroma, luminosité)."""
+    ordered_hex = sort_hex_colors_lab(color["color_rgb"] for color in colors)
+    index_map = {hex_value: index for index, hex_value in enumerate(ordered_hex)}
+    return sorted(colors, key=lambda color: index_map[color["color_rgb"]])
+
+
+def load_used_colors(parts_path: Path, colors_path: Path, minifig_only: bool = False) -> List[dict]:
+    """Charge les couleurs utilisées (hors rechanges) et leurs quantités totales.
+
+    Si minifig_only est vrai, ne conserve que les pièces marquées is_minifig_part=true.
+    Sinon, exclut les pièces de minifig.
+    """
+    rows = read_rows(parts_path)
+    colors_lookup = {(row["rgb"], normalize_boolean(row["is_trans"])): row["name"] for row in read_rows(colors_path)}
+    totals: Dict[Tuple[str, str], int] = {}
+    for row in rows:
+        if minifig_only and row.get("is_minifig_part") != "true":
+            continue
+        if not minifig_only and row.get("is_minifig_part") == "true":
+            continue
+        key = (row["color_rgb"], row["is_translucent"])
+        totals[key] = totals.get(key, 0) + int(row["quantity_in_set"])
+    used_colors = []
+    for (color_rgb, is_translucent), quantity in totals.items():
+        used_colors.append(
+            {
+                "color_rgb": color_rgb,
+                "is_translucent": is_translucent,
+                "name": colors_lookup.get((color_rgb, is_translucent), color_rgb),
+                "quantity": quantity,
+            }
+        )
+    return sort_colors_perceptually(used_colors)
+
+
+def build_hex_positions(count: int, columns: int = 9, spacing: float = 1.1) -> List[Tuple[float, float]]:
+    """Construit des positions hexagonales pour une mise en page aérée."""
+    positions: List[Tuple[float, float]] = []
+    rows = (count + columns - 1) // columns
+    vertical_gap = spacing * 0.85
+    for row in range(rows):
+        offset = 0.0 if row % 2 == 0 else spacing / 2
+        for col in range(columns):
+            index = row * columns + col
+            if index >= count:
+                return positions
+            x = col * spacing + offset
+            y = -row * vertical_gap
+            positions.append((x, y))
+    return positions
+
+
+def build_background(width: float, height: float, resolution: int = 600) -> np.ndarray:
+    """Génère un fond dégradé pour mettre en valeur les couleurs translucides."""
+    x = np.linspace(-1.0, 1.0, resolution)
+    y = np.linspace(-1.0, 1.0, resolution)
+    xv, yv = np.meshgrid(x, y)
+    radial = np.sqrt(xv**2 + yv**2)
+    diagonal = (xv + yv) / 2
+    layer = 0.35 + 0.35 * (1 - radial) + 0.2 * diagonal
+    layer = np.clip(layer, 0.05, 0.95)
+    background = np.dstack((layer * 0.9, layer * 0.92, layer))
+    return background
+
+
+def plot_colors_grid(
+    parts_path: Path,
+    colors_path: Path,
+    destination_path: Path,
+    minifig_only: bool = False,
+) -> None:
+    """Dessine une grille artistique des couleurs utilisées."""
+    colors = load_used_colors(parts_path, colors_path, minifig_only=minifig_only)
+    positions = build_hex_positions(len(colors))
+    x_values = [x for x, _ in positions]
+    y_values = [y for _, y in positions]
+    width = max(x_values) - min(x_values) + 1.5
+    height = max(y_values) - min(y_values) + 1.5
+
+    fig, ax = plt.subplots(figsize=(10, 10), facecolor="#0b0c10")
+    background = build_background(width, height)
+    ax.imshow(
+        background,
+        extent=[min(x_values) - 0.75, min(x_values) - 0.75 + width, min(y_values) - 0.75, min(y_values) - 0.75 + height],
+        origin="lower",
+        zorder=0,
+    )
+
+    max_quantity = max(color["quantity"] for color in colors)
+    min_marker = 720
+    max_marker = 1600
+
+    for (x, y), color in zip(positions, colors):
+        is_translucent = color["is_translucent"] == "true"
+        alpha = 0.65 if is_translucent else 1.0
+        edge = "#f7f7f7" if is_translucent else "#0d0d0d"
+        size = min_marker + (max_marker - min_marker) * (color["quantity"] / max_quantity)
+        if is_translucent:
+            ax.scatter(
+                x,
+                y,
+                s=size * 1.25,
+                c="#ffffff",
+                alpha=0.18,
+                edgecolors="none",
+                linewidths=0,
+                zorder=2,
+            )
+        ax.scatter(
+            x,
+            y,
+            s=size,
+            c=f"#{color['color_rgb']}",
+            alpha=alpha,
+            edgecolors=edge,
+            linewidths=1.1,
+            zorder=3,
+        )
+
+    legend_handles = [
+        Line2D([0], [0], marker="o", color="none", markerfacecolor="#cccccc", markeredgecolor="#0d0d0d", markersize=10, label="Opaque"),
+        Line2D(
+            [0],
+            [0],
+            marker="o",
+            color="none",
+            markerfacecolor="#cccccc",
+            markeredgecolor="#f7f7f7",
+            markersize=10,
+            alpha=0.65,
+            label="Translucide",
+        ),
+    ]
+    legend_y = 1.06 if not minifig_only else 1.08
+    ax.legend(
+        handles=legend_handles,
+        loc="upper center",
+        bbox_to_anchor=(0.5, legend_y),
+        ncol=2,
+        frameon=False,
+        labelcolor="#f0f0f0",
+    )
+
+    title_prefix = "Palette des couleurs utilisées (rechanges incluses)"
+    if minifig_only:
+        title_prefix = "Palette des couleurs de minifigs (rechanges incluses)"
+    ax.set_title(title_prefix, fontsize=14, color="#f0f0f0", pad=28)
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.set_xlim(min(x_values) - 1.0, max(x_values) + 1.0)
+    ax.set_ylim(min(y_values) - 1.0, max(y_values) + 1.0)
+    for spine in ax.spines.values():
+        spine.set_visible(False)
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=200)
+    plt.close(fig)
--- a/lib/plots/parts_per_set.py
+++ b/lib/plots/parts_per_set.py
@@ -0,0 +1,110 @@
+"""Graphiques sur la taille moyenne des sets (pièces par set)."""
+
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+import matplotlib.pyplot as plt
+
+from lib.filesystem import ensure_parent_dir
+from lib.milestones import load_milestones
+from lib.rebrickable.stats import read_rows
+
+
+def compute_average_parts_per_set(rows: Iterable[dict]) -> List[Tuple[int, float]]:
+    """Calcule la moyenne annuelle de pièces par set."""
+    per_year: Dict[int, Dict[str, int]] = {}
+    for row in rows:
+        year = int(row["year"])
+        per_year[year] = per_year.get(year, {"parts": 0, "sets": 0})
+        per_year[year]["parts"] += int(row["num_parts"])
+        per_year[year]["sets"] += 1
+    results: List[Tuple[int, float]] = []
+    for year in sorted(per_year):
+        totals = per_year[year]
+        results.append((year, totals["parts"] / totals["sets"]))
+    return results
+
+
+def compute_rolling_mean(series: List[Tuple[int, float]], window: int) -> List[Tuple[int, float]]:
+    """Calcule la moyenne glissante sur une fenêtre donnée."""
+    values = [value for _, value in series]
+    years = [year for year, _ in series]
+    rolling: List[Tuple[int, float]] = []
+    for index in range(len(values)):
+        if index + 1 < window:
+            rolling.append((years[index], 0.0))
+        else:
+            window_values = values[index - window + 1 : index + 1]
+            rolling.append((years[index], sum(window_values) / window))
+    return rolling
+
+
+def plot_parts_per_set(
+    enriched_sets_path: Path,
+    milestones_path: Path,
+    destination_path: Path,
+    rolling_window: int = 3,
+) -> None:
+    """Génère un graphique de la moyenne annuelle et glissante des pièces par set."""
+    sets_rows = read_rows(enriched_sets_path)
+    milestones = load_milestones(milestones_path)
+    annual_series = compute_average_parts_per_set(sets_rows)
+    rolling_series = compute_rolling_mean(annual_series, rolling_window)
+    years = [year for year, _ in annual_series]
+    annual_values = [value for _, value in annual_series]
+    rolling_values = [value for _, value in rolling_series]
+
+    fig, ax = plt.subplots(figsize=(12, 6))
+    ax.plot(years, annual_values, marker="o", color="#2ca02c", label="Moyenne annuelle (pièces/set)")
+    ax.plot(
+        years,
+        rolling_values,
+        marker="^",
+        color="#9467bd",
+        label=f"Moyenne glissante {rolling_window} ans (pièces/set)",
+    )
+    ax.set_xlabel("Année")
+    ax.set_ylabel("Pièces par set")
+    ax.set_title("Évolution de la taille moyenne des sets (thèmes filtrés)")
+    ax.grid(True, linestyle="--", alpha=0.3)
+    ax.set_xlim(min(years) - 0.4, max(years) + 0.4)
+    ax.set_xticks(list(range(min(years), max(years) + 1)))
+    ax.tick_params(axis="x", labelrotation=45)
+
+    peak = max(max(annual_values), max(rolling_values))
+    top_limit = peak * 2
+    milestones_in_range = sorted(
+        [m for m in milestones if min(years) <= m["year"] <= max(years)],
+        key=lambda m: (m["year"], m["description"]),
+    )
+    milestone_offsets: Dict[int, int] = {}
+    offset_step = 0.4
+    max_offset = 0
+    for milestone in milestones_in_range:
+        year = milestone["year"]
+        count_for_year = milestone_offsets.get(year, 0)
+        milestone_offsets[year] = count_for_year + 1
+        horizontal_offset = offset_step * (count_for_year // 2 + 1)
+        max_offset = max(max_offset, count_for_year)
+        if count_for_year % 2 == 1:
+            horizontal_offset *= -1
+        text_x = year + horizontal_offset
+        ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
+        ax.text(
+            text_x,
+            top_limit,
+            milestone["description"],
+            rotation=90,
+            verticalalignment="top",
+            horizontalalignment="center",
+            fontsize=8,
+            color="#d62728",
+        )
+
+    ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
+    ax.legend(loc="upper left", bbox_to_anchor=(1.12, 1))
+
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=150)
+    plt.close(fig)
--- a/lib/plots/sets_per_year.py
+++ b/lib/plots/sets_per_year.py
@@ -0,0 +1,196 @@
+"""Graphiques montrant le nombre de sets sortis par année."""
+
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+
+import matplotlib.pyplot as plt
+
+from lib.filesystem import ensure_parent_dir
+from lib.milestones import load_milestones
+from lib.rebrickable.stats import read_rows
+
+
+def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
+    """Retourne la liste (année, nombre de sets) triée chronologiquement."""
+    counts: Dict[int, int] = {}
+    for row in rows:
+        year = int(row["year"])
+        counts[year] = counts.get(year, 0) + 1
+    return sorted(counts.items(), key=lambda item: item[0])
+
+
+def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
+    """Retourne la liste (année, total de pièces) triée chronologiquement."""
+    totals: Dict[int, int] = {}
+    for row in rows:
+        year = int(row["year"])
+        totals[year] = totals.get(year, 0) + int(row["num_parts"])
+    return sorted(totals.items(), key=lambda item: item[0])
+
+
+def plot_sets_per_year(
+    enriched_sets_path: Path,
+    milestones_path: Path,
+    destination_path: Path,
+) -> None:
+    """Génère un histogramme annuel avec la moyenne cumulative et les jalons."""
+    sets_rows = read_rows(enriched_sets_path)
+    milestones = load_milestones(milestones_path)
+    raw_series = compute_sets_per_year(sets_rows)
+    raw_parts_series = compute_parts_per_year(sets_rows)
+    min_year = min(year for year, _ in raw_series)
+    max_year = max(year for year, _ in raw_series)
+    series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)]
+    parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)]
+    years = [year for year, _ in series]
+    counts = [count for _, count in series]
+    parts_totals = [total for _, total in parts_series]
+    owned_counts_map: Dict[int, int] = {}
+    owned_parts_map: Dict[int, int] = {}
+    for row in sets_rows:
+        year = int(row["year"])
+        if row["in_collection"] == "true":
+            owned_counts_map[year] = owned_counts_map.get(year, 0) + 1
+            owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"])
+    owned_counts = [owned_counts_map.get(year, 0) for year in years]
+    missing_counts = [total - owned for total, owned in zip(counts, owned_counts)]
+    owned_parts = [owned_parts_map.get(year, 0) for year in years]
+    missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)]
+    first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0)
+    cumulative_mean = []
+    total = 0
+    for index, count in enumerate(counts):
+        total += count
+        cumulative_mean.append(total / (index + 1))
+    cumulative_parts_mean = []
+    rolling_sets = 0
+    rolling_parts = 0
+    for index, (count, parts) in enumerate(zip(counts, parts_totals)):
+        rolling_sets += count
+        rolling_parts += parts
+        if index < first_non_zero_index:
+            cumulative_parts_mean.append(0)
+        else:
+            cumulative_parts_mean.append(rolling_parts / rolling_sets)
+
+    milestones_in_range = sorted(
+        [m for m in milestones if min_year <= m["year"] <= max_year],
+        key=lambda m: (m["year"], m["description"]),
+    )
+
+    fig, ax = plt.subplots(figsize=(14, 6))
+    bar_width = 0.35
+    x_sets = [year - bar_width / 2 for year in years]
+    bars_owned_sets = ax.bar(
+        x_sets,
+        owned_counts,
+        width=bar_width,
+        color="#1f77b4",
+        alpha=0.9,
+        label="Sets possédés",
+        zorder=2,
+    )
+    bars_missing_sets = ax.bar(
+        x_sets,
+        missing_counts,
+        width=bar_width,
+        bottom=owned_counts,
+        color="#9ecae1",
+        alpha=0.8,
+        label="Sets non possédés",
+    )
+    set_mean_line = ax.plot(
+        years,
+        cumulative_mean,
+        color="#ff7f0e",
+        marker="o",
+        label="Moyenne cumulative (sets)",
+        zorder=5,
+    )
+    ax2 = ax.twinx()
+    x_parts = [year + bar_width / 2 for year in years]
+    parts_bars_owned = ax2.bar(
+        x_parts,
+        owned_parts,
+        width=bar_width,
+        color="#2ca02c",
+        alpha=0.9,
+        label="Pièces (sets possédés)",
+        zorder=2,
+    )
+    parts_bars_missing = ax2.bar(
+        x_parts,
+        missing_parts,
+        width=bar_width,
+        bottom=owned_parts,
+        color="#c7e9c0",
+        alpha=0.85,
+        label="Pièces (sets non possédés)",
+    )
+    parts_mean_line = ax2.plot(
+        years,
+        cumulative_parts_mean,
+        color="#9467bd",
+        marker="^",
+        label="Moyenne cumulative (pièces/set)",
+        zorder=6,
+    )
+    parts_peak = max(parts_totals + [1])
+    ax2.set_ylim(0, parts_peak * 1.1)
+    ax.set_xlabel("Année")
+    ax.set_ylabel("Nombre de sets")
+    ax2.set_ylabel("Nombre de pièces")
+    ax.set_title("Nombre de sets par année (thèmes filtrés)")
+    ax.grid(True, linestyle="--", alpha=0.3)
+    ax.set_xlim(min_year - 1, max_year + 0.4)
+    ax.set_xticks(list(range(min_year, max_year + 1)))
+    ax.tick_params(axis="x", labelrotation=45)
+
+    peak = max(max(counts), max(cumulative_mean))
+    top_limit = peak * 2
+    milestone_offsets: Dict[int, int] = {}
+    offset_step = 0.3
+    max_offset = 0
+    for milestone in milestones_in_range:
+        year = milestone["year"]
+        count_for_year = milestone_offsets.get(year, 0)
+        milestone_offsets[year] = count_for_year + 1
+        max_offset = max(max_offset, count_for_year)
+        horizontal_offset = offset_step * (count_for_year // 2 + 1)
+        if count_for_year % 2 == 1:
+            horizontal_offset *= -1
+        text_x = year + horizontal_offset
+        ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
+        ax.text(
+            text_x,
+            top_limit,
+            milestone["description"],
+            rotation=90,
+            verticalalignment="top",
+            horizontalalignment="center",
+            fontsize=8,
+            color="#d62728",
+        )
+
+    ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
+    handles = [
+        bars_owned_sets,
+        bars_missing_sets,
+        parts_bars_owned,
+        parts_bars_missing,
+        set_mean_line[0],
+        parts_mean_line[0],
+    ]
+    labels = [
+        "Sets possédés",
+        "Sets non possédés",
+        "Pièces (sets possédés)",
+        "Pièces (sets non possédés)",
+        "Moyenne cumulative (sets)",
+        "Moyenne cumulative (pièces/set)",
+    ]
+    ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1))
+    ensure_parent_dir(destination_path)
+    fig.tight_layout()
+    fig.savefig(destination_path, dpi=150)
+    plt.close(fig)
				`@@ -0,0 +1 @@`
				`"""Utilitaires de visualisation des données LEGO."""`