1

Premiers éléments de l'étude

This commit is contained in:
2025-12-01 21:57:05 +01:00
commit 6494948b3c
50 changed files with 2595 additions and 0 deletions

196
lib/plots/sets_per_year.py Normal file
View File

@@ -0,0 +1,196 @@
"""Graphiques montrant le nombre de sets sortis par année."""
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.rebrickable.stats import read_rows
def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
"""Retourne la liste (année, nombre de sets) triée chronologiquement."""
counts: Dict[int, int] = {}
for row in rows:
year = int(row["year"])
counts[year] = counts.get(year, 0) + 1
return sorted(counts.items(), key=lambda item: item[0])
def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
"""Retourne la liste (année, total de pièces) triée chronologiquement."""
totals: Dict[int, int] = {}
for row in rows:
year = int(row["year"])
totals[year] = totals.get(year, 0) + int(row["num_parts"])
return sorted(totals.items(), key=lambda item: item[0])
def plot_sets_per_year(
enriched_sets_path: Path,
milestones_path: Path,
destination_path: Path,
) -> None:
"""Génère un histogramme annuel avec la moyenne cumulative et les jalons."""
sets_rows = read_rows(enriched_sets_path)
milestones = load_milestones(milestones_path)
raw_series = compute_sets_per_year(sets_rows)
raw_parts_series = compute_parts_per_year(sets_rows)
min_year = min(year for year, _ in raw_series)
max_year = max(year for year, _ in raw_series)
series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)]
parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)]
years = [year for year, _ in series]
counts = [count for _, count in series]
parts_totals = [total for _, total in parts_series]
owned_counts_map: Dict[int, int] = {}
owned_parts_map: Dict[int, int] = {}
for row in sets_rows:
year = int(row["year"])
if row["in_collection"] == "true":
owned_counts_map[year] = owned_counts_map.get(year, 0) + 1
owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"])
owned_counts = [owned_counts_map.get(year, 0) for year in years]
missing_counts = [total - owned for total, owned in zip(counts, owned_counts)]
owned_parts = [owned_parts_map.get(year, 0) for year in years]
missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)]
first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0)
cumulative_mean = []
total = 0
for index, count in enumerate(counts):
total += count
cumulative_mean.append(total / (index + 1))
cumulative_parts_mean = []
rolling_sets = 0
rolling_parts = 0
for index, (count, parts) in enumerate(zip(counts, parts_totals)):
rolling_sets += count
rolling_parts += parts
if index < first_non_zero_index:
cumulative_parts_mean.append(0)
else:
cumulative_parts_mean.append(rolling_parts / rolling_sets)
milestones_in_range = sorted(
[m for m in milestones if min_year <= m["year"] <= max_year],
key=lambda m: (m["year"], m["description"]),
)
fig, ax = plt.subplots(figsize=(14, 6))
bar_width = 0.35
x_sets = [year - bar_width / 2 for year in years]
bars_owned_sets = ax.bar(
x_sets,
owned_counts,
width=bar_width,
color="#1f77b4",
alpha=0.9,
label="Sets possédés",
zorder=2,
)
bars_missing_sets = ax.bar(
x_sets,
missing_counts,
width=bar_width,
bottom=owned_counts,
color="#9ecae1",
alpha=0.8,
label="Sets non possédés",
)
set_mean_line = ax.plot(
years,
cumulative_mean,
color="#ff7f0e",
marker="o",
label="Moyenne cumulative (sets)",
zorder=5,
)
ax2 = ax.twinx()
x_parts = [year + bar_width / 2 for year in years]
parts_bars_owned = ax2.bar(
x_parts,
owned_parts,
width=bar_width,
color="#2ca02c",
alpha=0.9,
label="Pièces (sets possédés)",
zorder=2,
)
parts_bars_missing = ax2.bar(
x_parts,
missing_parts,
width=bar_width,
bottom=owned_parts,
color="#c7e9c0",
alpha=0.85,
label="Pièces (sets non possédés)",
)
parts_mean_line = ax2.plot(
years,
cumulative_parts_mean,
color="#9467bd",
marker="^",
label="Moyenne cumulative (pièces/set)",
zorder=6,
)
parts_peak = max(parts_totals + [1])
ax2.set_ylim(0, parts_peak * 1.1)
ax.set_xlabel("Année")
ax.set_ylabel("Nombre de sets")
ax2.set_ylabel("Nombre de pièces")
ax.set_title("Nombre de sets par année (thèmes filtrés)")
ax.grid(True, linestyle="--", alpha=0.3)
ax.set_xlim(min_year - 1, max_year + 0.4)
ax.set_xticks(list(range(min_year, max_year + 1)))
ax.tick_params(axis="x", labelrotation=45)
peak = max(max(counts), max(cumulative_mean))
top_limit = peak * 2
milestone_offsets: Dict[int, int] = {}
offset_step = 0.3
max_offset = 0
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = milestone_offsets.get(year, 0)
milestone_offsets[year] = count_for_year + 1
max_offset = max(max_offset, count_for_year)
horizontal_offset = offset_step * (count_for_year // 2 + 1)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
ax.text(
text_x,
top_limit,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
handles = [
bars_owned_sets,
bars_missing_sets,
parts_bars_owned,
parts_bars_missing,
set_mean_line[0],
parts_mean_line[0],
]
labels = [
"Sets possédés",
"Sets non possédés",
"Pièces (sets possédés)",
"Pièces (sets non possédés)",
"Moyenne cumulative (sets)",
"Moyenne cumulative (pièces/set)",
]
ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1))
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=150)
plt.close(fig)