Complète l’étape 26 avec l’évolution minifigs/set
This commit is contained in:
parent
f23f54d040
commit
c9f1acee4b
@ -268,3 +268,4 @@ Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de mini
|
|||||||
2. `python -m scripts.plot_minifig_parts_correlation`
|
2. `python -m scripts.plot_minifig_parts_correlation`
|
||||||
|
|
||||||
Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
|
Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
|
||||||
|
Un second export `data/intermediate/minifigs_per_set_timeline.csv` est généré pour l'évolution annuelle du nombre moyen de minifigs par set, visualisé dans `figures/step26/minifigs_per_set_timeline.png` (courbes catalogue vs thèmes filtrés).
|
||||||
|
|||||||
45
lib/plots/minifig_parts_timeline.py
Normal file
45
lib/plots/minifig_parts_timeline.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
"""Évolution annuelle du nombre moyen de minifigs par set."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
def load_minifigs_per_year(path: Path, scope: str) -> List[tuple[int, float]]:
|
||||||
|
"""Charge les moyennes annuelles pour un scope donné."""
|
||||||
|
rows = read_rows(path)
|
||||||
|
values: List[tuple[int, float]] = []
|
||||||
|
for row in rows:
|
||||||
|
if row["scope"] != scope:
|
||||||
|
continue
|
||||||
|
values.append((int(row["year"]), float(row["average_minifigs_per_set"])))
|
||||||
|
values.sort(key=lambda item: item[0])
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
def plot_minifigs_per_set_timeline(path: Path, destination_path: Path) -> None:
|
||||||
|
"""Trace l'évolution annuelle des minifigs par set (global vs filtré)."""
|
||||||
|
filtered = load_minifigs_per_year(path, "filtered")
|
||||||
|
catalog = load_minifigs_per_year(path, "catalog")
|
||||||
|
if not filtered or not catalog:
|
||||||
|
return
|
||||||
|
filtered_years, filtered_values = zip(*filtered)
|
||||||
|
catalog_years, catalog_values = zip(*catalog)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(12, 6))
|
||||||
|
ax.plot(catalog_years, catalog_values, color="#888888", linestyle="--", linewidth=1.6, label="Catalogue global")
|
||||||
|
ax.plot(filtered_years, filtered_values, color="#1f77b4", linewidth=2.2, marker="o", label="Thèmes filtrés")
|
||||||
|
ax.set_xlabel("Année")
|
||||||
|
ax.set_ylabel("Nombre moyen de minifigs par set")
|
||||||
|
ax.set_title("Évolution des minifigs par set")
|
||||||
|
ax.grid(True, linestyle="--", alpha=0.3)
|
||||||
|
ax.legend(loc="upper left")
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=160)
|
||||||
|
plt.close(fig)
|
||||||
@ -80,6 +80,62 @@ def build_correlation_rows(
|
|||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def build_minifigs_per_year(
|
||||||
|
filtered_counts_path: Path,
|
||||||
|
all_sets_path: Path,
|
||||||
|
inventories_path: Path,
|
||||||
|
inventory_minifigs_path: Path,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Calcule le nombre moyen de minifigs par set et par année (filtré vs catalogue)."""
|
||||||
|
filtered_totals: Dict[int, Dict[str, int]] = {}
|
||||||
|
with filtered_counts_path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
year = int(row["year"])
|
||||||
|
current = filtered_totals.get(year)
|
||||||
|
if current is None:
|
||||||
|
filtered_totals[year] = {"minifigs": int(row["minifig_count"]), "sets": 1}
|
||||||
|
else:
|
||||||
|
current["minifigs"] += int(row["minifig_count"])
|
||||||
|
current["sets"] += 1
|
||||||
|
global_minifigs = build_global_minifig_counts(inventories_path, inventory_minifigs_path)
|
||||||
|
catalog_totals: Dict[int, Dict[str, int]] = {}
|
||||||
|
with all_sets_path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
year = int(row["year"])
|
||||||
|
current = catalog_totals.get(year)
|
||||||
|
if current is None:
|
||||||
|
catalog_totals[year] = {"minifigs": global_minifigs.get(row["set_num"], 0), "sets": 1}
|
||||||
|
else:
|
||||||
|
current["minifigs"] += global_minifigs.get(row["set_num"], 0)
|
||||||
|
current["sets"] += 1
|
||||||
|
rows: List[dict] = []
|
||||||
|
for year in sorted(filtered_totals.keys()):
|
||||||
|
totals = filtered_totals[year]
|
||||||
|
average = totals["minifigs"] / totals["sets"]
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"scope": "filtered",
|
||||||
|
"year": str(year),
|
||||||
|
"average_minifigs_per_set": f"{average:.3f}",
|
||||||
|
"set_count": str(totals["sets"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
for year in sorted(catalog_totals.keys()):
|
||||||
|
totals = catalog_totals[year]
|
||||||
|
average = totals["minifigs"] / totals["sets"]
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"scope": "catalog",
|
||||||
|
"year": str(year),
|
||||||
|
"average_minifigs_per_set": f"{average:.3f}",
|
||||||
|
"set_count": str(totals["sets"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def write_correlation_rows(path: Path, rows: Sequence[dict]) -> None:
|
def write_correlation_rows(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit les lignes de corrélation pièces/minifigs."""
|
"""Écrit les lignes de corrélation pièces/minifigs."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
@ -91,6 +147,17 @@ def write_correlation_rows(path: Path, rows: Sequence[dict]) -> None:
|
|||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_minifigs_per_year(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV annuel minifigs / set."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["scope", "year", "average_minifigs_per_set", "set_count"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
def load_correlation_rows(path: Path) -> List[dict]:
|
def load_correlation_rows(path: Path) -> List[dict]:
|
||||||
"""Charge le CSV de corrélation pièces/minifigs."""
|
"""Charge le CSV de corrélation pièces/minifigs."""
|
||||||
return read_rows(path)
|
return read_rows(path)
|
||||||
|
|||||||
@ -3,7 +3,13 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lib.plots.minifig_parts_correlation import plot_minifig_parts_correlation
|
from lib.plots.minifig_parts_correlation import plot_minifig_parts_correlation
|
||||||
from lib.rebrickable.minifig_parts_correlation import build_correlation_rows, write_correlation_rows
|
from lib.plots.minifig_parts_timeline import plot_minifigs_per_set_timeline
|
||||||
|
from lib.rebrickable.minifig_parts_correlation import (
|
||||||
|
build_correlation_rows,
|
||||||
|
build_minifigs_per_year,
|
||||||
|
write_correlation_rows,
|
||||||
|
write_minifigs_per_year,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
FILTERED_MINIFIG_COUNTS_PATH = Path("data/intermediate/minifig_counts_by_set.csv")
|
FILTERED_MINIFIG_COUNTS_PATH = Path("data/intermediate/minifig_counts_by_set.csv")
|
||||||
@ -12,7 +18,9 @@ ALL_SETS_PATH = Path("data/raw/sets.csv")
|
|||||||
INVENTORIES_PATH = Path("data/raw/inventories.csv")
|
INVENTORIES_PATH = Path("data/raw/inventories.csv")
|
||||||
INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv")
|
INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv")
|
||||||
CORRELATION_PATH = Path("data/intermediate/minifig_parts_correlation.csv")
|
CORRELATION_PATH = Path("data/intermediate/minifig_parts_correlation.csv")
|
||||||
|
MINIFIGS_PER_YEAR_PATH = Path("data/intermediate/minifigs_per_set_timeline.csv")
|
||||||
DESTINATION_PATH = Path("figures/step26/minifig_parts_correlation.png")
|
DESTINATION_PATH = Path("figures/step26/minifig_parts_correlation.png")
|
||||||
|
TIMELINE_DESTINATION_PATH = Path("figures/step26/minifigs_per_set_timeline.png")
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
@ -26,6 +34,14 @@ def main() -> None:
|
|||||||
)
|
)
|
||||||
write_correlation_rows(CORRELATION_PATH, rows)
|
write_correlation_rows(CORRELATION_PATH, rows)
|
||||||
plot_minifig_parts_correlation(CORRELATION_PATH, DESTINATION_PATH)
|
plot_minifig_parts_correlation(CORRELATION_PATH, DESTINATION_PATH)
|
||||||
|
per_year = build_minifigs_per_year(
|
||||||
|
FILTERED_MINIFIG_COUNTS_PATH,
|
||||||
|
ALL_SETS_PATH,
|
||||||
|
INVENTORIES_PATH,
|
||||||
|
INVENTORY_MINIFIGS_PATH,
|
||||||
|
)
|
||||||
|
write_minifigs_per_year(MINIFIGS_PER_YEAR_PATH, per_year)
|
||||||
|
plot_minifigs_per_set_timeline(MINIFIGS_PER_YEAR_PATH, TIMELINE_DESTINATION_PATH)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lib.rebrickable.minifig_parts_correlation import build_correlation_rows
|
from lib.rebrickable.minifig_parts_correlation import build_correlation_rows, build_minifigs_per_year
|
||||||
|
|
||||||
|
|
||||||
def write_csv(path: Path, content: str) -> None:
|
def write_csv(path: Path, content: str) -> None:
|
||||||
@ -68,3 +68,56 @@ def test_build_correlation_rows_merges_filtered_and_catalog(tmp_path: Path) -> N
|
|||||||
{"scope": "catalog", "set_num": "124-1", "num_parts": "150", "minifig_count": "1"},
|
{"scope": "catalog", "set_num": "124-1", "num_parts": "150", "minifig_count": "1"},
|
||||||
{"scope": "catalog", "set_num": "200-1", "num_parts": "100", "minifig_count": "3"},
|
{"scope": "catalog", "set_num": "200-1", "num_parts": "100", "minifig_count": "3"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_minifigs_per_year_computes_averages(tmp_path: Path) -> None:
|
||||||
|
"""Calcule les moyennes annuelles minifigs/set pour filtrés et catalogue."""
|
||||||
|
filtered_counts_path = tmp_path / "minifig_counts_by_set.csv"
|
||||||
|
write_csv(
|
||||||
|
filtered_counts_path,
|
||||||
|
"set_num,set_id,name,year,minifig_count\n"
|
||||||
|
"123-1,123,Set A,2020,2\n"
|
||||||
|
"124-1,124,Set B,2020,1\n"
|
||||||
|
"125-1,125,Set C,2021,3\n",
|
||||||
|
)
|
||||||
|
all_sets_path = tmp_path / "sets.csv"
|
||||||
|
write_csv(
|
||||||
|
all_sets_path,
|
||||||
|
"set_num,name,year,theme_id,num_parts\n"
|
||||||
|
"123-1,Set A,2020,1,300\n"
|
||||||
|
"124-1,Set B,2020,1,150\n"
|
||||||
|
"125-1,Set C,2021,1,100\n"
|
||||||
|
"200-1,Set D,2020,1,50\n",
|
||||||
|
)
|
||||||
|
inventories_path = tmp_path / "inventories.csv"
|
||||||
|
write_csv(
|
||||||
|
inventories_path,
|
||||||
|
"id,version,set_num\n"
|
||||||
|
"10,1,123-1\n"
|
||||||
|
"20,1,124-1\n"
|
||||||
|
"30,1,125-1\n"
|
||||||
|
"40,1,200-1\n",
|
||||||
|
)
|
||||||
|
inventory_minifigs_path = tmp_path / "inventory_minifigs.csv"
|
||||||
|
write_csv(
|
||||||
|
inventory_minifigs_path,
|
||||||
|
"inventory_id,fig_num,quantity\n"
|
||||||
|
"10,fig-a,2\n"
|
||||||
|
"20,fig-b,1\n"
|
||||||
|
"30,fig-c,3\n"
|
||||||
|
"40,fig-d,4\n",
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = build_minifigs_per_year(
|
||||||
|
filtered_counts_path,
|
||||||
|
all_sets_path,
|
||||||
|
inventories_path,
|
||||||
|
inventory_minifigs_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rows == [
|
||||||
|
{"scope": "filtered", "year": "2020", "average_minifigs_per_set": "1.500", "set_count": "2"},
|
||||||
|
{"scope": "filtered", "year": "2021", "average_minifigs_per_set": "3.000", "set_count": "1"},
|
||||||
|
{"scope": "catalog", "year": "2020", "average_minifigs_per_set": "2.333", "set_count": "3"},
|
||||||
|
{"scope": "catalog", "year": "2021", "average_minifigs_per_set": "3.000", "set_count": "1"},
|
||||||
|
]
|
||||||
|
|||||||
27
tests/test_minifig_parts_timeline_plot.py
Normal file
27
tests/test_minifig_parts_timeline_plot.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
"""Tests du graphique d'évolution minifigs/set."""
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.minifig_parts_timeline import plot_minifigs_per_set_timeline
|
||||||
|
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_minifigs_per_set_timeline(tmp_path: Path) -> None:
|
||||||
|
"""Génère la courbe minifigs/set."""
|
||||||
|
timeline_path = tmp_path / "minifigs_per_set_timeline.csv"
|
||||||
|
destination = tmp_path / "figures" / "step26" / "minifigs_per_set_timeline.png"
|
||||||
|
timeline_path.write_text(
|
||||||
|
"scope,year,average_minifigs_per_set,set_count\n"
|
||||||
|
"filtered,2020,1.500,2\n"
|
||||||
|
"filtered,2021,2.000,1\n"
|
||||||
|
"catalog,2020,1.000,3\n"
|
||||||
|
"catalog,2021,2.500,2\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
plot_minifigs_per_set_timeline(timeline_path, destination)
|
||||||
|
|
||||||
|
assert destination.exists()
|
||||||
|
assert destination.stat().st_size > 0
|
||||||
Loading…
x
Reference in New Issue
Block a user