1

Premiers éléments de l'étude

This commit is contained in:
2025-12-01 21:57:05 +01:00
commit 22b4dae0ba
46 changed files with 2595 additions and 0 deletions

1
lib/plots/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Utilitaires de visualisation des données LEGO."""

174
lib/plots/colors_grid.py Normal file
View File

@@ -0,0 +1,174 @@
"""Visualisation des couleurs utilisées dans l'inventaire filtré."""
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D
from lib.filesystem import ensure_parent_dir
from lib.color_sort import lab_sort_key, sort_hex_colors_lab
from lib.rebrickable.parts_inventory import normalize_boolean
from lib.rebrickable.stats import read_rows
def sort_colors_perceptually(colors: Iterable[dict]) -> List[dict]:
"""Trie les couleurs via l'espace Lab (teinte perçue, chroma, luminosité)."""
ordered_hex = sort_hex_colors_lab(color["color_rgb"] for color in colors)
index_map = {hex_value: index for index, hex_value in enumerate(ordered_hex)}
return sorted(colors, key=lambda color: index_map[color["color_rgb"]])
def load_used_colors(parts_path: Path, colors_path: Path, minifig_only: bool = False) -> List[dict]:
"""Charge les couleurs utilisées (hors rechanges) et leurs quantités totales.
Si minifig_only est vrai, ne conserve que les pièces marquées is_minifig_part=true.
Sinon, exclut les pièces de minifig.
"""
rows = read_rows(parts_path)
colors_lookup = {(row["rgb"], normalize_boolean(row["is_trans"])): row["name"] for row in read_rows(colors_path)}
totals: Dict[Tuple[str, str], int] = {}
for row in rows:
if minifig_only and row.get("is_minifig_part") != "true":
continue
if not minifig_only and row.get("is_minifig_part") == "true":
continue
key = (row["color_rgb"], row["is_translucent"])
totals[key] = totals.get(key, 0) + int(row["quantity_in_set"])
used_colors = []
for (color_rgb, is_translucent), quantity in totals.items():
used_colors.append(
{
"color_rgb": color_rgb,
"is_translucent": is_translucent,
"name": colors_lookup.get((color_rgb, is_translucent), color_rgb),
"quantity": quantity,
}
)
return sort_colors_perceptually(used_colors)
def build_hex_positions(count: int, columns: int = 9, spacing: float = 1.1) -> List[Tuple[float, float]]:
"""Construit des positions hexagonales pour une mise en page aérée."""
positions: List[Tuple[float, float]] = []
rows = (count + columns - 1) // columns
vertical_gap = spacing * 0.85
for row in range(rows):
offset = 0.0 if row % 2 == 0 else spacing / 2
for col in range(columns):
index = row * columns + col
if index >= count:
return positions
x = col * spacing + offset
y = -row * vertical_gap
positions.append((x, y))
return positions
def build_background(width: float, height: float, resolution: int = 600) -> np.ndarray:
"""Génère un fond dégradé pour mettre en valeur les couleurs translucides."""
x = np.linspace(-1.0, 1.0, resolution)
y = np.linspace(-1.0, 1.0, resolution)
xv, yv = np.meshgrid(x, y)
radial = np.sqrt(xv**2 + yv**2)
diagonal = (xv + yv) / 2
layer = 0.35 + 0.35 * (1 - radial) + 0.2 * diagonal
layer = np.clip(layer, 0.05, 0.95)
background = np.dstack((layer * 0.9, layer * 0.92, layer))
return background
def plot_colors_grid(
parts_path: Path,
colors_path: Path,
destination_path: Path,
minifig_only: bool = False,
) -> None:
"""Dessine une grille artistique des couleurs utilisées."""
colors = load_used_colors(parts_path, colors_path, minifig_only=minifig_only)
positions = build_hex_positions(len(colors))
x_values = [x for x, _ in positions]
y_values = [y for _, y in positions]
width = max(x_values) - min(x_values) + 1.5
height = max(y_values) - min(y_values) + 1.5
fig, ax = plt.subplots(figsize=(10, 10), facecolor="#0b0c10")
background = build_background(width, height)
ax.imshow(
background,
extent=[min(x_values) - 0.75, min(x_values) - 0.75 + width, min(y_values) - 0.75, min(y_values) - 0.75 + height],
origin="lower",
zorder=0,
)
max_quantity = max(color["quantity"] for color in colors)
min_marker = 720
max_marker = 1600
for (x, y), color in zip(positions, colors):
is_translucent = color["is_translucent"] == "true"
alpha = 0.65 if is_translucent else 1.0
edge = "#f7f7f7" if is_translucent else "#0d0d0d"
size = min_marker + (max_marker - min_marker) * (color["quantity"] / max_quantity)
if is_translucent:
ax.scatter(
x,
y,
s=size * 1.25,
c="#ffffff",
alpha=0.18,
edgecolors="none",
linewidths=0,
zorder=2,
)
ax.scatter(
x,
y,
s=size,
c=f"#{color['color_rgb']}",
alpha=alpha,
edgecolors=edge,
linewidths=1.1,
zorder=3,
)
legend_handles = [
Line2D([0], [0], marker="o", color="none", markerfacecolor="#cccccc", markeredgecolor="#0d0d0d", markersize=10, label="Opaque"),
Line2D(
[0],
[0],
marker="o",
color="none",
markerfacecolor="#cccccc",
markeredgecolor="#f7f7f7",
markersize=10,
alpha=0.65,
label="Translucide",
),
]
legend_y = 1.06 if not minifig_only else 1.08
ax.legend(
handles=legend_handles,
loc="upper center",
bbox_to_anchor=(0.5, legend_y),
ncol=2,
frameon=False,
labelcolor="#f0f0f0",
)
title_prefix = "Palette des couleurs utilisées (rechanges incluses)"
if minifig_only:
title_prefix = "Palette des couleurs de minifigs (rechanges incluses)"
ax.set_title(title_prefix, fontsize=14, color="#f0f0f0", pad=28)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlim(min(x_values) - 1.0, max(x_values) + 1.0)
ax.set_ylim(min(y_values) - 1.0, max(y_values) + 1.0)
for spine in ax.spines.values():
spine.set_visible(False)
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=200)
plt.close(fig)

110
lib/plots/parts_per_set.py Normal file
View File

@@ -0,0 +1,110 @@
"""Graphiques sur la taille moyenne des sets (pièces par set)."""
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.rebrickable.stats import read_rows
def compute_average_parts_per_set(rows: Iterable[dict]) -> List[Tuple[int, float]]:
"""Calcule la moyenne annuelle de pièces par set."""
per_year: Dict[int, Dict[str, int]] = {}
for row in rows:
year = int(row["year"])
per_year[year] = per_year.get(year, {"parts": 0, "sets": 0})
per_year[year]["parts"] += int(row["num_parts"])
per_year[year]["sets"] += 1
results: List[Tuple[int, float]] = []
for year in sorted(per_year):
totals = per_year[year]
results.append((year, totals["parts"] / totals["sets"]))
return results
def compute_rolling_mean(series: List[Tuple[int, float]], window: int) -> List[Tuple[int, float]]:
"""Calcule la moyenne glissante sur une fenêtre donnée."""
values = [value for _, value in series]
years = [year for year, _ in series]
rolling: List[Tuple[int, float]] = []
for index in range(len(values)):
if index + 1 < window:
rolling.append((years[index], 0.0))
else:
window_values = values[index - window + 1 : index + 1]
rolling.append((years[index], sum(window_values) / window))
return rolling
def plot_parts_per_set(
enriched_sets_path: Path,
milestones_path: Path,
destination_path: Path,
rolling_window: int = 3,
) -> None:
"""Génère un graphique de la moyenne annuelle et glissante des pièces par set."""
sets_rows = read_rows(enriched_sets_path)
milestones = load_milestones(milestones_path)
annual_series = compute_average_parts_per_set(sets_rows)
rolling_series = compute_rolling_mean(annual_series, rolling_window)
years = [year for year, _ in annual_series]
annual_values = [value for _, value in annual_series]
rolling_values = [value for _, value in rolling_series]
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(years, annual_values, marker="o", color="#2ca02c", label="Moyenne annuelle (pièces/set)")
ax.plot(
years,
rolling_values,
marker="^",
color="#9467bd",
label=f"Moyenne glissante {rolling_window} ans (pièces/set)",
)
ax.set_xlabel("Année")
ax.set_ylabel("Pièces par set")
ax.set_title("Évolution de la taille moyenne des sets (thèmes filtrés)")
ax.grid(True, linestyle="--", alpha=0.3)
ax.set_xlim(min(years) - 0.4, max(years) + 0.4)
ax.set_xticks(list(range(min(years), max(years) + 1)))
ax.tick_params(axis="x", labelrotation=45)
peak = max(max(annual_values), max(rolling_values))
top_limit = peak * 2
milestones_in_range = sorted(
[m for m in milestones if min(years) <= m["year"] <= max(years)],
key=lambda m: (m["year"], m["description"]),
)
milestone_offsets: Dict[int, int] = {}
offset_step = 0.4
max_offset = 0
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = milestone_offsets.get(year, 0)
milestone_offsets[year] = count_for_year + 1
horizontal_offset = offset_step * (count_for_year // 2 + 1)
max_offset = max(max_offset, count_for_year)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
ax.text(
text_x,
top_limit,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
ax.legend(loc="upper left", bbox_to_anchor=(1.12, 1))
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=150)
plt.close(fig)

196
lib/plots/sets_per_year.py Normal file
View File

@@ -0,0 +1,196 @@
"""Graphiques montrant le nombre de sets sortis par année."""
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.rebrickable.stats import read_rows
def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
"""Retourne la liste (année, nombre de sets) triée chronologiquement."""
counts: Dict[int, int] = {}
for row in rows:
year = int(row["year"])
counts[year] = counts.get(year, 0) + 1
return sorted(counts.items(), key=lambda item: item[0])
def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
"""Retourne la liste (année, total de pièces) triée chronologiquement."""
totals: Dict[int, int] = {}
for row in rows:
year = int(row["year"])
totals[year] = totals.get(year, 0) + int(row["num_parts"])
return sorted(totals.items(), key=lambda item: item[0])
def plot_sets_per_year(
enriched_sets_path: Path,
milestones_path: Path,
destination_path: Path,
) -> None:
"""Génère un histogramme annuel avec la moyenne cumulative et les jalons."""
sets_rows = read_rows(enriched_sets_path)
milestones = load_milestones(milestones_path)
raw_series = compute_sets_per_year(sets_rows)
raw_parts_series = compute_parts_per_year(sets_rows)
min_year = min(year for year, _ in raw_series)
max_year = max(year for year, _ in raw_series)
series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)]
parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)]
years = [year for year, _ in series]
counts = [count for _, count in series]
parts_totals = [total for _, total in parts_series]
owned_counts_map: Dict[int, int] = {}
owned_parts_map: Dict[int, int] = {}
for row in sets_rows:
year = int(row["year"])
if row["in_collection"] == "true":
owned_counts_map[year] = owned_counts_map.get(year, 0) + 1
owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"])
owned_counts = [owned_counts_map.get(year, 0) for year in years]
missing_counts = [total - owned for total, owned in zip(counts, owned_counts)]
owned_parts = [owned_parts_map.get(year, 0) for year in years]
missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)]
first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0)
cumulative_mean = []
total = 0
for index, count in enumerate(counts):
total += count
cumulative_mean.append(total / (index + 1))
cumulative_parts_mean = []
rolling_sets = 0
rolling_parts = 0
for index, (count, parts) in enumerate(zip(counts, parts_totals)):
rolling_sets += count
rolling_parts += parts
if index < first_non_zero_index:
cumulative_parts_mean.append(0)
else:
cumulative_parts_mean.append(rolling_parts / rolling_sets)
milestones_in_range = sorted(
[m for m in milestones if min_year <= m["year"] <= max_year],
key=lambda m: (m["year"], m["description"]),
)
fig, ax = plt.subplots(figsize=(14, 6))
bar_width = 0.35
x_sets = [year - bar_width / 2 for year in years]
bars_owned_sets = ax.bar(
x_sets,
owned_counts,
width=bar_width,
color="#1f77b4",
alpha=0.9,
label="Sets possédés",
zorder=2,
)
bars_missing_sets = ax.bar(
x_sets,
missing_counts,
width=bar_width,
bottom=owned_counts,
color="#9ecae1",
alpha=0.8,
label="Sets non possédés",
)
set_mean_line = ax.plot(
years,
cumulative_mean,
color="#ff7f0e",
marker="o",
label="Moyenne cumulative (sets)",
zorder=5,
)
ax2 = ax.twinx()
x_parts = [year + bar_width / 2 for year in years]
parts_bars_owned = ax2.bar(
x_parts,
owned_parts,
width=bar_width,
color="#2ca02c",
alpha=0.9,
label="Pièces (sets possédés)",
zorder=2,
)
parts_bars_missing = ax2.bar(
x_parts,
missing_parts,
width=bar_width,
bottom=owned_parts,
color="#c7e9c0",
alpha=0.85,
label="Pièces (sets non possédés)",
)
parts_mean_line = ax2.plot(
years,
cumulative_parts_mean,
color="#9467bd",
marker="^",
label="Moyenne cumulative (pièces/set)",
zorder=6,
)
parts_peak = max(parts_totals + [1])
ax2.set_ylim(0, parts_peak * 1.1)
ax.set_xlabel("Année")
ax.set_ylabel("Nombre de sets")
ax2.set_ylabel("Nombre de pièces")
ax.set_title("Nombre de sets par année (thèmes filtrés)")
ax.grid(True, linestyle="--", alpha=0.3)
ax.set_xlim(min_year - 1, max_year + 0.4)
ax.set_xticks(list(range(min_year, max_year + 1)))
ax.tick_params(axis="x", labelrotation=45)
peak = max(max(counts), max(cumulative_mean))
top_limit = peak * 2
milestone_offsets: Dict[int, int] = {}
offset_step = 0.3
max_offset = 0
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = milestone_offsets.get(year, 0)
milestone_offsets[year] = count_for_year + 1
max_offset = max(max_offset, count_for_year)
horizontal_offset = offset_step * (count_for_year // 2 + 1)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
ax.text(
text_x,
top_limit,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
handles = [
bars_owned_sets,
bars_missing_sets,
parts_bars_owned,
parts_bars_missing,
set_mean_line[0],
parts_mean_line[0],
]
labels = [
"Sets possédés",
"Sets non possédés",
"Pièces (sets possédés)",
"Pièces (sets non possédés)",
"Moyenne cumulative (sets)",
"Moyenne cumulative (pièces/set)",
]
ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1))
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=150)
plt.close(fig)