1

197 lines
6.6 KiB
Python

"""Graphiques montrant le nombre de sets sortis par année."""
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
import matplotlib.pyplot as plt
from lib.filesystem import ensure_parent_dir
from lib.milestones import load_milestones
from lib.rebrickable.stats import read_rows
def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
"""Retourne la liste (année, nombre de sets) triée chronologiquement."""
counts: Dict[int, int] = {}
for row in rows:
year = int(row["year"])
counts[year] = counts.get(year, 0) + 1
return sorted(counts.items(), key=lambda item: item[0])
def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
"""Retourne la liste (année, total de pièces) triée chronologiquement."""
totals: Dict[int, int] = {}
for row in rows:
year = int(row["year"])
totals[year] = totals.get(year, 0) + int(row["num_parts"])
return sorted(totals.items(), key=lambda item: item[0])
def plot_sets_per_year(
enriched_sets_path: Path,
milestones_path: Path,
destination_path: Path,
) -> None:
"""Génère un histogramme annuel avec la moyenne cumulative et les jalons."""
sets_rows = read_rows(enriched_sets_path)
milestones = load_milestones(milestones_path)
raw_series = compute_sets_per_year(sets_rows)
raw_parts_series = compute_parts_per_year(sets_rows)
min_year = min(year for year, _ in raw_series)
max_year = max(year for year, _ in raw_series)
series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)]
parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)]
years = [year for year, _ in series]
counts = [count for _, count in series]
parts_totals = [total for _, total in parts_series]
owned_counts_map: Dict[int, int] = {}
owned_parts_map: Dict[int, int] = {}
for row in sets_rows:
year = int(row["year"])
if row["in_collection"] == "true":
owned_counts_map[year] = owned_counts_map.get(year, 0) + 1
owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"])
owned_counts = [owned_counts_map.get(year, 0) for year in years]
missing_counts = [total - owned for total, owned in zip(counts, owned_counts)]
owned_parts = [owned_parts_map.get(year, 0) for year in years]
missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)]
first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0)
cumulative_mean = []
total = 0
for index, count in enumerate(counts):
total += count
cumulative_mean.append(total / (index + 1))
cumulative_parts_mean = []
rolling_sets = 0
rolling_parts = 0
for index, (count, parts) in enumerate(zip(counts, parts_totals)):
rolling_sets += count
rolling_parts += parts
if index < first_non_zero_index:
cumulative_parts_mean.append(0)
else:
cumulative_parts_mean.append(rolling_parts / rolling_sets)
milestones_in_range = sorted(
[m for m in milestones if min_year <= m["year"] <= max_year],
key=lambda m: (m["year"], m["description"]),
)
fig, ax = plt.subplots(figsize=(14, 6))
bar_width = 0.35
x_sets = [year - bar_width / 2 for year in years]
bars_owned_sets = ax.bar(
x_sets,
owned_counts,
width=bar_width,
color="#1f77b4",
alpha=0.9,
label="Sets possédés",
zorder=2,
)
bars_missing_sets = ax.bar(
x_sets,
missing_counts,
width=bar_width,
bottom=owned_counts,
color="#9ecae1",
alpha=0.8,
label="Sets non possédés",
)
set_mean_line = ax.plot(
years,
cumulative_mean,
color="#ff7f0e",
marker="o",
label="Moyenne cumulative (sets)",
zorder=5,
)
ax2 = ax.twinx()
x_parts = [year + bar_width / 2 for year in years]
parts_bars_owned = ax2.bar(
x_parts,
owned_parts,
width=bar_width,
color="#2ca02c",
alpha=0.9,
label="Pièces (sets possédés)",
zorder=2,
)
parts_bars_missing = ax2.bar(
x_parts,
missing_parts,
width=bar_width,
bottom=owned_parts,
color="#c7e9c0",
alpha=0.85,
label="Pièces (sets non possédés)",
)
parts_mean_line = ax2.plot(
years,
cumulative_parts_mean,
color="#9467bd",
marker="^",
label="Moyenne cumulative (pièces/set)",
zorder=6,
)
parts_peak = max(parts_totals + [1])
ax2.set_ylim(0, parts_peak * 1.1)
ax.set_xlabel("Année")
ax.set_ylabel("Nombre de sets")
ax2.set_ylabel("Nombre de pièces")
ax.set_title("Nombre de sets par année (thèmes filtrés)")
ax.grid(True, linestyle="--", alpha=0.3)
ax.set_xlim(min_year - 1, max_year + 0.4)
ax.set_xticks(list(range(min_year, max_year + 1)))
ax.tick_params(axis="x", labelrotation=45)
peak = max(max(counts), max(cumulative_mean))
top_limit = peak * 2
milestone_offsets: Dict[int, int] = {}
offset_step = 0.3
max_offset = 0
for milestone in milestones_in_range:
year = milestone["year"]
count_for_year = milestone_offsets.get(year, 0)
milestone_offsets[year] = count_for_year + 1
max_offset = max(max_offset, count_for_year)
horizontal_offset = offset_step * (count_for_year // 2 + 1)
if count_for_year % 2 == 1:
horizontal_offset *= -1
text_x = year + horizontal_offset
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
ax.text(
text_x,
top_limit,
milestone["description"],
rotation=90,
verticalalignment="top",
horizontalalignment="center",
fontsize=8,
color="#d62728",
)
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
handles = [
bars_owned_sets,
bars_missing_sets,
parts_bars_owned,
parts_bars_missing,
set_mean_line[0],
parts_mean_line[0],
]
labels = [
"Sets possédés",
"Sets non possédés",
"Pièces (sets possédés)",
"Pièces (sets non possédés)",
"Moyenne cumulative (sets)",
"Moyenne cumulative (pièces/set)",
]
ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1))
ensure_parent_dir(destination_path)
fig.tight_layout()
fig.savefig(destination_path, dpi=150)
plt.close(fig)