You've already forked etude_lego_jurassic_world
Premiers éléments de l'étude
This commit is contained in:
1
lib/plots/__init__.py
Normal file
1
lib/plots/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Utilitaires de visualisation des données LEGO."""
|
||||
174
lib/plots/colors_grid.py
Normal file
174
lib/plots/colors_grid.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""Visualisation des couleurs utilisées dans l'inventaire filtré."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib.lines import Line2D
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.color_sort import lab_sort_key, sort_hex_colors_lab
|
||||
from lib.rebrickable.parts_inventory import normalize_boolean
|
||||
from lib.rebrickable.stats import read_rows
|
||||
|
||||
|
||||
def sort_colors_perceptually(colors: Iterable[dict]) -> List[dict]:
|
||||
"""Trie les couleurs via l'espace Lab (teinte perçue, chroma, luminosité)."""
|
||||
ordered_hex = sort_hex_colors_lab(color["color_rgb"] for color in colors)
|
||||
index_map = {hex_value: index for index, hex_value in enumerate(ordered_hex)}
|
||||
return sorted(colors, key=lambda color: index_map[color["color_rgb"]])
|
||||
|
||||
|
||||
def load_used_colors(parts_path: Path, colors_path: Path, minifig_only: bool = False) -> List[dict]:
|
||||
"""Charge les couleurs utilisées (hors rechanges) et leurs quantités totales.
|
||||
|
||||
Si minifig_only est vrai, ne conserve que les pièces marquées is_minifig_part=true.
|
||||
Sinon, exclut les pièces de minifig.
|
||||
"""
|
||||
rows = read_rows(parts_path)
|
||||
colors_lookup = {(row["rgb"], normalize_boolean(row["is_trans"])): row["name"] for row in read_rows(colors_path)}
|
||||
totals: Dict[Tuple[str, str], int] = {}
|
||||
for row in rows:
|
||||
if minifig_only and row.get("is_minifig_part") != "true":
|
||||
continue
|
||||
if not minifig_only and row.get("is_minifig_part") == "true":
|
||||
continue
|
||||
key = (row["color_rgb"], row["is_translucent"])
|
||||
totals[key] = totals.get(key, 0) + int(row["quantity_in_set"])
|
||||
used_colors = []
|
||||
for (color_rgb, is_translucent), quantity in totals.items():
|
||||
used_colors.append(
|
||||
{
|
||||
"color_rgb": color_rgb,
|
||||
"is_translucent": is_translucent,
|
||||
"name": colors_lookup.get((color_rgb, is_translucent), color_rgb),
|
||||
"quantity": quantity,
|
||||
}
|
||||
)
|
||||
return sort_colors_perceptually(used_colors)
|
||||
|
||||
|
||||
def build_hex_positions(count: int, columns: int = 9, spacing: float = 1.1) -> List[Tuple[float, float]]:
|
||||
"""Construit des positions hexagonales pour une mise en page aérée."""
|
||||
positions: List[Tuple[float, float]] = []
|
||||
rows = (count + columns - 1) // columns
|
||||
vertical_gap = spacing * 0.85
|
||||
for row in range(rows):
|
||||
offset = 0.0 if row % 2 == 0 else spacing / 2
|
||||
for col in range(columns):
|
||||
index = row * columns + col
|
||||
if index >= count:
|
||||
return positions
|
||||
x = col * spacing + offset
|
||||
y = -row * vertical_gap
|
||||
positions.append((x, y))
|
||||
return positions
|
||||
|
||||
|
||||
def build_background(width: float, height: float, resolution: int = 600) -> np.ndarray:
|
||||
"""Génère un fond dégradé pour mettre en valeur les couleurs translucides."""
|
||||
x = np.linspace(-1.0, 1.0, resolution)
|
||||
y = np.linspace(-1.0, 1.0, resolution)
|
||||
xv, yv = np.meshgrid(x, y)
|
||||
radial = np.sqrt(xv**2 + yv**2)
|
||||
diagonal = (xv + yv) / 2
|
||||
layer = 0.35 + 0.35 * (1 - radial) + 0.2 * diagonal
|
||||
layer = np.clip(layer, 0.05, 0.95)
|
||||
background = np.dstack((layer * 0.9, layer * 0.92, layer))
|
||||
return background
|
||||
|
||||
|
||||
def plot_colors_grid(
|
||||
parts_path: Path,
|
||||
colors_path: Path,
|
||||
destination_path: Path,
|
||||
minifig_only: bool = False,
|
||||
) -> None:
|
||||
"""Dessine une grille artistique des couleurs utilisées."""
|
||||
colors = load_used_colors(parts_path, colors_path, minifig_only=minifig_only)
|
||||
positions = build_hex_positions(len(colors))
|
||||
x_values = [x for x, _ in positions]
|
||||
y_values = [y for _, y in positions]
|
||||
width = max(x_values) - min(x_values) + 1.5
|
||||
height = max(y_values) - min(y_values) + 1.5
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 10), facecolor="#0b0c10")
|
||||
background = build_background(width, height)
|
||||
ax.imshow(
|
||||
background,
|
||||
extent=[min(x_values) - 0.75, min(x_values) - 0.75 + width, min(y_values) - 0.75, min(y_values) - 0.75 + height],
|
||||
origin="lower",
|
||||
zorder=0,
|
||||
)
|
||||
|
||||
max_quantity = max(color["quantity"] for color in colors)
|
||||
min_marker = 720
|
||||
max_marker = 1600
|
||||
|
||||
for (x, y), color in zip(positions, colors):
|
||||
is_translucent = color["is_translucent"] == "true"
|
||||
alpha = 0.65 if is_translucent else 1.0
|
||||
edge = "#f7f7f7" if is_translucent else "#0d0d0d"
|
||||
size = min_marker + (max_marker - min_marker) * (color["quantity"] / max_quantity)
|
||||
if is_translucent:
|
||||
ax.scatter(
|
||||
x,
|
||||
y,
|
||||
s=size * 1.25,
|
||||
c="#ffffff",
|
||||
alpha=0.18,
|
||||
edgecolors="none",
|
||||
linewidths=0,
|
||||
zorder=2,
|
||||
)
|
||||
ax.scatter(
|
||||
x,
|
||||
y,
|
||||
s=size,
|
||||
c=f"#{color['color_rgb']}",
|
||||
alpha=alpha,
|
||||
edgecolors=edge,
|
||||
linewidths=1.1,
|
||||
zorder=3,
|
||||
)
|
||||
|
||||
legend_handles = [
|
||||
Line2D([0], [0], marker="o", color="none", markerfacecolor="#cccccc", markeredgecolor="#0d0d0d", markersize=10, label="Opaque"),
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
marker="o",
|
||||
color="none",
|
||||
markerfacecolor="#cccccc",
|
||||
markeredgecolor="#f7f7f7",
|
||||
markersize=10,
|
||||
alpha=0.65,
|
||||
label="Translucide",
|
||||
),
|
||||
]
|
||||
legend_y = 1.06 if not minifig_only else 1.08
|
||||
ax.legend(
|
||||
handles=legend_handles,
|
||||
loc="upper center",
|
||||
bbox_to_anchor=(0.5, legend_y),
|
||||
ncol=2,
|
||||
frameon=False,
|
||||
labelcolor="#f0f0f0",
|
||||
)
|
||||
|
||||
title_prefix = "Palette des couleurs utilisées (rechanges incluses)"
|
||||
if minifig_only:
|
||||
title_prefix = "Palette des couleurs de minifigs (rechanges incluses)"
|
||||
ax.set_title(title_prefix, fontsize=14, color="#f0f0f0", pad=28)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
ax.set_xlim(min(x_values) - 1.0, max(x_values) + 1.0)
|
||||
ax.set_ylim(min(y_values) - 1.0, max(y_values) + 1.0)
|
||||
for spine in ax.spines.values():
|
||||
spine.set_visible(False)
|
||||
|
||||
ensure_parent_dir(destination_path)
|
||||
fig.tight_layout()
|
||||
fig.savefig(destination_path, dpi=200)
|
||||
plt.close(fig)
|
||||
110
lib/plots/parts_per_set.py
Normal file
110
lib/plots/parts_per_set.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Graphiques sur la taille moyenne des sets (pièces par set)."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.milestones import load_milestones
|
||||
from lib.rebrickable.stats import read_rows
|
||||
|
||||
|
||||
def compute_average_parts_per_set(rows: Iterable[dict]) -> List[Tuple[int, float]]:
|
||||
"""Calcule la moyenne annuelle de pièces par set."""
|
||||
per_year: Dict[int, Dict[str, int]] = {}
|
||||
for row in rows:
|
||||
year = int(row["year"])
|
||||
per_year[year] = per_year.get(year, {"parts": 0, "sets": 0})
|
||||
per_year[year]["parts"] += int(row["num_parts"])
|
||||
per_year[year]["sets"] += 1
|
||||
results: List[Tuple[int, float]] = []
|
||||
for year in sorted(per_year):
|
||||
totals = per_year[year]
|
||||
results.append((year, totals["parts"] / totals["sets"]))
|
||||
return results
|
||||
|
||||
|
||||
def compute_rolling_mean(series: List[Tuple[int, float]], window: int) -> List[Tuple[int, float]]:
|
||||
"""Calcule la moyenne glissante sur une fenêtre donnée."""
|
||||
values = [value for _, value in series]
|
||||
years = [year for year, _ in series]
|
||||
rolling: List[Tuple[int, float]] = []
|
||||
for index in range(len(values)):
|
||||
if index + 1 < window:
|
||||
rolling.append((years[index], 0.0))
|
||||
else:
|
||||
window_values = values[index - window + 1 : index + 1]
|
||||
rolling.append((years[index], sum(window_values) / window))
|
||||
return rolling
|
||||
|
||||
|
||||
def plot_parts_per_set(
|
||||
enriched_sets_path: Path,
|
||||
milestones_path: Path,
|
||||
destination_path: Path,
|
||||
rolling_window: int = 3,
|
||||
) -> None:
|
||||
"""Génère un graphique de la moyenne annuelle et glissante des pièces par set."""
|
||||
sets_rows = read_rows(enriched_sets_path)
|
||||
milestones = load_milestones(milestones_path)
|
||||
annual_series = compute_average_parts_per_set(sets_rows)
|
||||
rolling_series = compute_rolling_mean(annual_series, rolling_window)
|
||||
years = [year for year, _ in annual_series]
|
||||
annual_values = [value for _, value in annual_series]
|
||||
rolling_values = [value for _, value in rolling_series]
|
||||
|
||||
fig, ax = plt.subplots(figsize=(12, 6))
|
||||
ax.plot(years, annual_values, marker="o", color="#2ca02c", label="Moyenne annuelle (pièces/set)")
|
||||
ax.plot(
|
||||
years,
|
||||
rolling_values,
|
||||
marker="^",
|
||||
color="#9467bd",
|
||||
label=f"Moyenne glissante {rolling_window} ans (pièces/set)",
|
||||
)
|
||||
ax.set_xlabel("Année")
|
||||
ax.set_ylabel("Pièces par set")
|
||||
ax.set_title("Évolution de la taille moyenne des sets (thèmes filtrés)")
|
||||
ax.grid(True, linestyle="--", alpha=0.3)
|
||||
ax.set_xlim(min(years) - 0.4, max(years) + 0.4)
|
||||
ax.set_xticks(list(range(min(years), max(years) + 1)))
|
||||
ax.tick_params(axis="x", labelrotation=45)
|
||||
|
||||
peak = max(max(annual_values), max(rolling_values))
|
||||
top_limit = peak * 2
|
||||
milestones_in_range = sorted(
|
||||
[m for m in milestones if min(years) <= m["year"] <= max(years)],
|
||||
key=lambda m: (m["year"], m["description"]),
|
||||
)
|
||||
milestone_offsets: Dict[int, int] = {}
|
||||
offset_step = 0.4
|
||||
max_offset = 0
|
||||
for milestone in milestones_in_range:
|
||||
year = milestone["year"]
|
||||
count_for_year = milestone_offsets.get(year, 0)
|
||||
milestone_offsets[year] = count_for_year + 1
|
||||
horizontal_offset = offset_step * (count_for_year // 2 + 1)
|
||||
max_offset = max(max_offset, count_for_year)
|
||||
if count_for_year % 2 == 1:
|
||||
horizontal_offset *= -1
|
||||
text_x = year + horizontal_offset
|
||||
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
|
||||
ax.text(
|
||||
text_x,
|
||||
top_limit,
|
||||
milestone["description"],
|
||||
rotation=90,
|
||||
verticalalignment="top",
|
||||
horizontalalignment="center",
|
||||
fontsize=8,
|
||||
color="#d62728",
|
||||
)
|
||||
|
||||
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
|
||||
ax.legend(loc="upper left", bbox_to_anchor=(1.12, 1))
|
||||
|
||||
ensure_parent_dir(destination_path)
|
||||
fig.tight_layout()
|
||||
fig.savefig(destination_path, dpi=150)
|
||||
plt.close(fig)
|
||||
196
lib/plots/sets_per_year.py
Normal file
196
lib/plots/sets_per_year.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""Graphiques montrant le nombre de sets sortis par année."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.milestones import load_milestones
|
||||
from lib.rebrickable.stats import read_rows
|
||||
|
||||
|
||||
def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
|
||||
"""Retourne la liste (année, nombre de sets) triée chronologiquement."""
|
||||
counts: Dict[int, int] = {}
|
||||
for row in rows:
|
||||
year = int(row["year"])
|
||||
counts[year] = counts.get(year, 0) + 1
|
||||
return sorted(counts.items(), key=lambda item: item[0])
|
||||
|
||||
|
||||
def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
|
||||
"""Retourne la liste (année, total de pièces) triée chronologiquement."""
|
||||
totals: Dict[int, int] = {}
|
||||
for row in rows:
|
||||
year = int(row["year"])
|
||||
totals[year] = totals.get(year, 0) + int(row["num_parts"])
|
||||
return sorted(totals.items(), key=lambda item: item[0])
|
||||
|
||||
|
||||
def plot_sets_per_year(
|
||||
enriched_sets_path: Path,
|
||||
milestones_path: Path,
|
||||
destination_path: Path,
|
||||
) -> None:
|
||||
"""Génère un histogramme annuel avec la moyenne cumulative et les jalons."""
|
||||
sets_rows = read_rows(enriched_sets_path)
|
||||
milestones = load_milestones(milestones_path)
|
||||
raw_series = compute_sets_per_year(sets_rows)
|
||||
raw_parts_series = compute_parts_per_year(sets_rows)
|
||||
min_year = min(year for year, _ in raw_series)
|
||||
max_year = max(year for year, _ in raw_series)
|
||||
series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)]
|
||||
parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)]
|
||||
years = [year for year, _ in series]
|
||||
counts = [count for _, count in series]
|
||||
parts_totals = [total for _, total in parts_series]
|
||||
owned_counts_map: Dict[int, int] = {}
|
||||
owned_parts_map: Dict[int, int] = {}
|
||||
for row in sets_rows:
|
||||
year = int(row["year"])
|
||||
if row["in_collection"] == "true":
|
||||
owned_counts_map[year] = owned_counts_map.get(year, 0) + 1
|
||||
owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"])
|
||||
owned_counts = [owned_counts_map.get(year, 0) for year in years]
|
||||
missing_counts = [total - owned for total, owned in zip(counts, owned_counts)]
|
||||
owned_parts = [owned_parts_map.get(year, 0) for year in years]
|
||||
missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)]
|
||||
first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0)
|
||||
cumulative_mean = []
|
||||
total = 0
|
||||
for index, count in enumerate(counts):
|
||||
total += count
|
||||
cumulative_mean.append(total / (index + 1))
|
||||
cumulative_parts_mean = []
|
||||
rolling_sets = 0
|
||||
rolling_parts = 0
|
||||
for index, (count, parts) in enumerate(zip(counts, parts_totals)):
|
||||
rolling_sets += count
|
||||
rolling_parts += parts
|
||||
if index < first_non_zero_index:
|
||||
cumulative_parts_mean.append(0)
|
||||
else:
|
||||
cumulative_parts_mean.append(rolling_parts / rolling_sets)
|
||||
|
||||
milestones_in_range = sorted(
|
||||
[m for m in milestones if min_year <= m["year"] <= max_year],
|
||||
key=lambda m: (m["year"], m["description"]),
|
||||
)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(14, 6))
|
||||
bar_width = 0.35
|
||||
x_sets = [year - bar_width / 2 for year in years]
|
||||
bars_owned_sets = ax.bar(
|
||||
x_sets,
|
||||
owned_counts,
|
||||
width=bar_width,
|
||||
color="#1f77b4",
|
||||
alpha=0.9,
|
||||
label="Sets possédés",
|
||||
zorder=2,
|
||||
)
|
||||
bars_missing_sets = ax.bar(
|
||||
x_sets,
|
||||
missing_counts,
|
||||
width=bar_width,
|
||||
bottom=owned_counts,
|
||||
color="#9ecae1",
|
||||
alpha=0.8,
|
||||
label="Sets non possédés",
|
||||
)
|
||||
set_mean_line = ax.plot(
|
||||
years,
|
||||
cumulative_mean,
|
||||
color="#ff7f0e",
|
||||
marker="o",
|
||||
label="Moyenne cumulative (sets)",
|
||||
zorder=5,
|
||||
)
|
||||
ax2 = ax.twinx()
|
||||
x_parts = [year + bar_width / 2 for year in years]
|
||||
parts_bars_owned = ax2.bar(
|
||||
x_parts,
|
||||
owned_parts,
|
||||
width=bar_width,
|
||||
color="#2ca02c",
|
||||
alpha=0.9,
|
||||
label="Pièces (sets possédés)",
|
||||
zorder=2,
|
||||
)
|
||||
parts_bars_missing = ax2.bar(
|
||||
x_parts,
|
||||
missing_parts,
|
||||
width=bar_width,
|
||||
bottom=owned_parts,
|
||||
color="#c7e9c0",
|
||||
alpha=0.85,
|
||||
label="Pièces (sets non possédés)",
|
||||
)
|
||||
parts_mean_line = ax2.plot(
|
||||
years,
|
||||
cumulative_parts_mean,
|
||||
color="#9467bd",
|
||||
marker="^",
|
||||
label="Moyenne cumulative (pièces/set)",
|
||||
zorder=6,
|
||||
)
|
||||
parts_peak = max(parts_totals + [1])
|
||||
ax2.set_ylim(0, parts_peak * 1.1)
|
||||
ax.set_xlabel("Année")
|
||||
ax.set_ylabel("Nombre de sets")
|
||||
ax2.set_ylabel("Nombre de pièces")
|
||||
ax.set_title("Nombre de sets par année (thèmes filtrés)")
|
||||
ax.grid(True, linestyle="--", alpha=0.3)
|
||||
ax.set_xlim(min_year - 1, max_year + 0.4)
|
||||
ax.set_xticks(list(range(min_year, max_year + 1)))
|
||||
ax.tick_params(axis="x", labelrotation=45)
|
||||
|
||||
peak = max(max(counts), max(cumulative_mean))
|
||||
top_limit = peak * 2
|
||||
milestone_offsets: Dict[int, int] = {}
|
||||
offset_step = 0.3
|
||||
max_offset = 0
|
||||
for milestone in milestones_in_range:
|
||||
year = milestone["year"]
|
||||
count_for_year = milestone_offsets.get(year, 0)
|
||||
milestone_offsets[year] = count_for_year + 1
|
||||
max_offset = max(max_offset, count_for_year)
|
||||
horizontal_offset = offset_step * (count_for_year // 2 + 1)
|
||||
if count_for_year % 2 == 1:
|
||||
horizontal_offset *= -1
|
||||
text_x = year + horizontal_offset
|
||||
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
|
||||
ax.text(
|
||||
text_x,
|
||||
top_limit,
|
||||
milestone["description"],
|
||||
rotation=90,
|
||||
verticalalignment="top",
|
||||
horizontalalignment="center",
|
||||
fontsize=8,
|
||||
color="#d62728",
|
||||
)
|
||||
|
||||
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
|
||||
handles = [
|
||||
bars_owned_sets,
|
||||
bars_missing_sets,
|
||||
parts_bars_owned,
|
||||
parts_bars_missing,
|
||||
set_mean_line[0],
|
||||
parts_mean_line[0],
|
||||
]
|
||||
labels = [
|
||||
"Sets possédés",
|
||||
"Sets non possédés",
|
||||
"Pièces (sets possédés)",
|
||||
"Pièces (sets non possédés)",
|
||||
"Moyenne cumulative (sets)",
|
||||
"Moyenne cumulative (pièces/set)",
|
||||
]
|
||||
ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1))
|
||||
ensure_parent_dir(destination_path)
|
||||
fig.tight_layout()
|
||||
fig.savefig(destination_path, dpi=150)
|
||||
plt.close(fig)
|
||||
Reference in New Issue
Block a user