You've already forked etude_lego_jurassic_world
Premiers éléments de l'étude
This commit is contained in:
196
lib/plots/sets_per_year.py
Normal file
196
lib/plots/sets_per_year.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""Graphiques montrant le nombre de sets sortis par année."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.milestones import load_milestones
|
||||
from lib.rebrickable.stats import read_rows
|
||||
|
||||
|
||||
def compute_sets_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
|
||||
"""Retourne la liste (année, nombre de sets) triée chronologiquement."""
|
||||
counts: Dict[int, int] = {}
|
||||
for row in rows:
|
||||
year = int(row["year"])
|
||||
counts[year] = counts.get(year, 0) + 1
|
||||
return sorted(counts.items(), key=lambda item: item[0])
|
||||
|
||||
|
||||
def compute_parts_per_year(rows: Iterable[dict]) -> List[Tuple[int, int]]:
|
||||
"""Retourne la liste (année, total de pièces) triée chronologiquement."""
|
||||
totals: Dict[int, int] = {}
|
||||
for row in rows:
|
||||
year = int(row["year"])
|
||||
totals[year] = totals.get(year, 0) + int(row["num_parts"])
|
||||
return sorted(totals.items(), key=lambda item: item[0])
|
||||
|
||||
|
||||
def plot_sets_per_year(
|
||||
enriched_sets_path: Path,
|
||||
milestones_path: Path,
|
||||
destination_path: Path,
|
||||
) -> None:
|
||||
"""Génère un histogramme annuel avec la moyenne cumulative et les jalons."""
|
||||
sets_rows = read_rows(enriched_sets_path)
|
||||
milestones = load_milestones(milestones_path)
|
||||
raw_series = compute_sets_per_year(sets_rows)
|
||||
raw_parts_series = compute_parts_per_year(sets_rows)
|
||||
min_year = min(year for year, _ in raw_series)
|
||||
max_year = max(year for year, _ in raw_series)
|
||||
series = [(year, dict(raw_series).get(year, 0)) for year in range(min_year, max_year + 1)]
|
||||
parts_series = [(year, dict(raw_parts_series).get(year, 0)) for year in range(min_year, max_year + 1)]
|
||||
years = [year for year, _ in series]
|
||||
counts = [count for _, count in series]
|
||||
parts_totals = [total for _, total in parts_series]
|
||||
owned_counts_map: Dict[int, int] = {}
|
||||
owned_parts_map: Dict[int, int] = {}
|
||||
for row in sets_rows:
|
||||
year = int(row["year"])
|
||||
if row["in_collection"] == "true":
|
||||
owned_counts_map[year] = owned_counts_map.get(year, 0) + 1
|
||||
owned_parts_map[year] = owned_parts_map.get(year, 0) + int(row["num_parts"])
|
||||
owned_counts = [owned_counts_map.get(year, 0) for year in years]
|
||||
missing_counts = [total - owned for total, owned in zip(counts, owned_counts)]
|
||||
owned_parts = [owned_parts_map.get(year, 0) for year in years]
|
||||
missing_parts = [total - owned for total, owned in zip(parts_totals, owned_parts)]
|
||||
first_non_zero_index = next(index for index, value in enumerate(counts) if value > 0)
|
||||
cumulative_mean = []
|
||||
total = 0
|
||||
for index, count in enumerate(counts):
|
||||
total += count
|
||||
cumulative_mean.append(total / (index + 1))
|
||||
cumulative_parts_mean = []
|
||||
rolling_sets = 0
|
||||
rolling_parts = 0
|
||||
for index, (count, parts) in enumerate(zip(counts, parts_totals)):
|
||||
rolling_sets += count
|
||||
rolling_parts += parts
|
||||
if index < first_non_zero_index:
|
||||
cumulative_parts_mean.append(0)
|
||||
else:
|
||||
cumulative_parts_mean.append(rolling_parts / rolling_sets)
|
||||
|
||||
milestones_in_range = sorted(
|
||||
[m for m in milestones if min_year <= m["year"] <= max_year],
|
||||
key=lambda m: (m["year"], m["description"]),
|
||||
)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(14, 6))
|
||||
bar_width = 0.35
|
||||
x_sets = [year - bar_width / 2 for year in years]
|
||||
bars_owned_sets = ax.bar(
|
||||
x_sets,
|
||||
owned_counts,
|
||||
width=bar_width,
|
||||
color="#1f77b4",
|
||||
alpha=0.9,
|
||||
label="Sets possédés",
|
||||
zorder=2,
|
||||
)
|
||||
bars_missing_sets = ax.bar(
|
||||
x_sets,
|
||||
missing_counts,
|
||||
width=bar_width,
|
||||
bottom=owned_counts,
|
||||
color="#9ecae1",
|
||||
alpha=0.8,
|
||||
label="Sets non possédés",
|
||||
)
|
||||
set_mean_line = ax.plot(
|
||||
years,
|
||||
cumulative_mean,
|
||||
color="#ff7f0e",
|
||||
marker="o",
|
||||
label="Moyenne cumulative (sets)",
|
||||
zorder=5,
|
||||
)
|
||||
ax2 = ax.twinx()
|
||||
x_parts = [year + bar_width / 2 for year in years]
|
||||
parts_bars_owned = ax2.bar(
|
||||
x_parts,
|
||||
owned_parts,
|
||||
width=bar_width,
|
||||
color="#2ca02c",
|
||||
alpha=0.9,
|
||||
label="Pièces (sets possédés)",
|
||||
zorder=2,
|
||||
)
|
||||
parts_bars_missing = ax2.bar(
|
||||
x_parts,
|
||||
missing_parts,
|
||||
width=bar_width,
|
||||
bottom=owned_parts,
|
||||
color="#c7e9c0",
|
||||
alpha=0.85,
|
||||
label="Pièces (sets non possédés)",
|
||||
)
|
||||
parts_mean_line = ax2.plot(
|
||||
years,
|
||||
cumulative_parts_mean,
|
||||
color="#9467bd",
|
||||
marker="^",
|
||||
label="Moyenne cumulative (pièces/set)",
|
||||
zorder=6,
|
||||
)
|
||||
parts_peak = max(parts_totals + [1])
|
||||
ax2.set_ylim(0, parts_peak * 1.1)
|
||||
ax.set_xlabel("Année")
|
||||
ax.set_ylabel("Nombre de sets")
|
||||
ax2.set_ylabel("Nombre de pièces")
|
||||
ax.set_title("Nombre de sets par année (thèmes filtrés)")
|
||||
ax.grid(True, linestyle="--", alpha=0.3)
|
||||
ax.set_xlim(min_year - 1, max_year + 0.4)
|
||||
ax.set_xticks(list(range(min_year, max_year + 1)))
|
||||
ax.tick_params(axis="x", labelrotation=45)
|
||||
|
||||
peak = max(max(counts), max(cumulative_mean))
|
||||
top_limit = peak * 2
|
||||
milestone_offsets: Dict[int, int] = {}
|
||||
offset_step = 0.3
|
||||
max_offset = 0
|
||||
for milestone in milestones_in_range:
|
||||
year = milestone["year"]
|
||||
count_for_year = milestone_offsets.get(year, 0)
|
||||
milestone_offsets[year] = count_for_year + 1
|
||||
max_offset = max(max_offset, count_for_year)
|
||||
horizontal_offset = offset_step * (count_for_year // 2 + 1)
|
||||
if count_for_year % 2 == 1:
|
||||
horizontal_offset *= -1
|
||||
text_x = year + horizontal_offset
|
||||
ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65)
|
||||
ax.text(
|
||||
text_x,
|
||||
top_limit,
|
||||
milestone["description"],
|
||||
rotation=90,
|
||||
verticalalignment="top",
|
||||
horizontalalignment="center",
|
||||
fontsize=8,
|
||||
color="#d62728",
|
||||
)
|
||||
|
||||
ax.set_ylim(0, top_limit * (1 + max_offset * 0.02))
|
||||
handles = [
|
||||
bars_owned_sets,
|
||||
bars_missing_sets,
|
||||
parts_bars_owned,
|
||||
parts_bars_missing,
|
||||
set_mean_line[0],
|
||||
parts_mean_line[0],
|
||||
]
|
||||
labels = [
|
||||
"Sets possédés",
|
||||
"Sets non possédés",
|
||||
"Pièces (sets possédés)",
|
||||
"Pièces (sets non possédés)",
|
||||
"Moyenne cumulative (sets)",
|
||||
"Moyenne cumulative (pièces/set)",
|
||||
]
|
||||
ax.legend(handles, labels, loc="upper left", bbox_to_anchor=(1.12, 1))
|
||||
ensure_parent_dir(destination_path)
|
||||
fig.tight_layout()
|
||||
fig.savefig(destination_path, dpi=150)
|
||||
plt.close(fig)
|
||||
Reference in New Issue
Block a user