"""Graphiques montrant le nombre de sets sortis par année.""" from pathlib import Path from typing import Dict, Iterable, List, Tuple import matplotlib.pyplot as plt from lib.filesystem import ensure_parent_dir from lib.milestones import load_milestones from lib.rebrickable.stats import read_rows def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]: """Retourne la liste (année, nombre de sets) triée chronologiquement.""" counts: Dict[int, int] = {} for row in rows: year = int(row["year"]) counts[year] = counts.get(year, 0) + 1 return sorted(counts.items(), key=lambda item: item[0]) def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]: """Retourne la liste (année, total de pièces) triée chronologiquement.""" totals: Dict[int, int] = {} for row in rows: year = int(row["year"]) totals[year] = totals.get(year, 0) + int(row["num_parts"]) return sorted(totals.items(), key=lambda item: item[0]) def plot_sets_per_year( enriched_sets_path: Path, milestones_path: Path, destination_path: Path, ) -> None: """Génère un histogramme annuel avec la moyenne cumulative et les jalons.""" sets_rows = read_rows(enriched_sets_path) milestones = load_milestones(milestones_path) raw_series = compute_sets_per_year(sets_rows) raw_parts_series = compute_parts_per_year(sets_rows) min_year = min(year for year, _ in raw_series) max_year = max(year for year, _ in raw_series) series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)] parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)] years = [year for year, _ in series] counts = [count for _, count in series] parts_totals = [total for _, total in parts_series] owned_counts_map: Dict[int, int] = {} owned_parts_map: Dict[int, int] = {} for row in sets_rows: year = int(row["year"]) if row["in_collection"] == "true": owned_counts_map[year] = owned_counts_map.get(year, 0) + 1 owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"]) owned_counts = [owned_counts_map.get(year, 0) for year in years] missing_counts = [total - owned for total, owned in zip(counts, owned_counts)] owned_parts = [owned_parts_map.get(year, 0) for year in years] missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)] first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0) cumulative_mean = [] total = 0 for index, count in enumerate(counts): total += count cumulative_mean.append(total / (index + 1)) cumulative_parts_mean = [] rolling_sets = 0 rolling_parts = 0 for index, (count, parts) in enumerate(zip(counts, parts_totals)): rolling_sets += count rolling_parts += parts if index < first_non_zero_index: cumulative_parts_mean.append(0) else: cumulative_parts_mean.append(rolling_parts / rolling_sets) milestones_in_range = sorted( [m for m in milestones if min_year <= m["year"] <= max_year], key=lambda m: (m["year"], m["description"]), ) fig, ax = plt.subplots(figsize=(14, 6)) bar_width = 0.35 x_sets = [year - bar_width / 2 for year in years] bars_owned_sets = ax.bar( x_sets, owned_counts, width=bar_width, color="#1f77b4", alpha=0.9, label="Sets possédés", zorder=2, ) bars_missing_sets = ax.bar( x_sets, missing_counts, width=bar_width, bottom=owned_counts, color="#9ecae1", alpha=0.8, label="Sets non possédés", ) set_mean_line = ax.plot( years, cumulative_mean, color="#ff7f0e", marker="o", label="Moyenne cumulative (sets)", zorder=5, ) ax2 = ax.twinx() x_parts = [year + bar_width / 2 for year in years] parts_bars_owned = ax2.bar( x_parts, owned_parts, width=bar_width, color="#2ca02c", alpha=0.9, label="Pièces (sets possédés)", zorder=2, ) parts_bars_missing = ax2.bar( x_parts, missing_parts, width=bar_width, bottom=owned_parts, color="#c7e9c0", alpha=0.85, label="Pièces (sets non possédés)", ) parts_mean_line = ax2.plot( years, cumulative_parts_mean, color="#9467bd", marker="^", label="Moyenne cumulative (pièces/set)", zorder=6, ) parts_peak = max(parts_totals + [1]) ax2.set_ylim(0, parts_peak * 1.1) ax.set_xlabel("Année") ax.set_ylabel("Nombre de sets") ax2.set_ylabel("Nombre de pièces") ax.set_title("Nombre de sets par année (thèmes filtrés)") ax.grid(True, linestyle="--", alpha=0.3) ax.set_xlim(min_year - 1, max_year + 0.4) ax.set_xticks(list(range(min_year, max_year + 1))) ax.tick_params(axis="x", labelrotation=45) peak = max(max(counts), max(cumulative_mean)) top_limit = peak * 2 milestone_offsets: Dict[int, int] = {} offset_step = 0.3 max_offset = 0 for milestone in milestones_in_range: year = milestone["year"] count_for_year = milestone_offsets.get(year, 0) milestone_offsets[year] = count_for_year + 1 max_offset = max(max_offset, count_for_year) horizontal_offset = offset_step * (count_for_year // 2 + 1) if count_for_year % 2 == 1: horizontal_offset *= -1 text_x = year + horizontal_offset ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65) ax.text( text_x, top_limit, milestone["description"], rotation=90, verticalalignment="top", horizontalalignment="center", fontsize=8, color="#d62728", ) ax.set_ylim(0, top_limit * (1 + max_offset * 0.02)) handles = [ bars_owned_sets, bars_missing_sets, parts_bars_owned, parts_bars_missing, set_mean_line[0], parts_mean_line[0], ] labels = [ "Sets possédés", "Sets non possédés", "Pièces (sets possédés)", "Pièces (sets non possédés)", "Moyenne cumulative (sets)", "Moyenne cumulative (pièces/set)", ] ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1)) ensure_parent_dir(destination_path) fig.tight_layout() fig.savefig(destination_path, dpi=150) plt.close(fig)