Ajoute l’étape 26 pièces/minifigs
This commit is contained in:
parent
71f3509cc8
commit
f23f54d040
@ -261,3 +261,10 @@ Le script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets
|
||||
2. `python -m scripts.plot_minifig_gender_share`
|
||||
|
||||
Le script lit `data/intermediate/minifigs_by_set.csv`, agrège le nombre de minifigs distinctes par genre (basé sur `config/known_character_genders.csv`), écrit `data/intermediate/minifig_gender_counts.csv`, puis trace `figures/step25/minifig_gender_share.png` (donut indiquant la part des personnages féminins, masculins ou inconnus).
|
||||
|
||||
### Étape 26 : corrélation pièces / minifigs
|
||||
|
||||
1. `source .venv/bin/activate`
|
||||
2. `python -m scripts.plot_minifig_parts_correlation`
|
||||
|
||||
Le script lit `data/intermediate/minifig_counts_by_set.csv`, `data/intermediate/sets_enriched.csv`, `data/raw/sets.csv`, `data/raw/inventories.csv` et `data/raw/inventory_minifigs.csv`, produit `data/intermediate/minifig_parts_correlation.csv` (pièces vs minifigs pour le catalogue global et les thèmes filtrés), puis trace `figures/step26/minifig_parts_correlation.png` en superposant les nuages de points et leurs tendances linéaires.
|
||||
|
||||
85
lib/plots/minifig_parts_correlation.py
Normal file
85
lib/plots/minifig_parts_correlation.py
Normal file
@ -0,0 +1,85 @@
|
||||
"""Diagramme de corrélation entre pièces et minifigs par set."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.rebrickable.stats import read_rows
|
||||
|
||||
|
||||
def load_points(path: Path, scope: str) -> Tuple[list[int], list[int]]:
|
||||
"""Charge les points (x=num_parts, y=minifig_count) pour un scope donné."""
|
||||
rows = read_rows(path)
|
||||
xs: list[int] = []
|
||||
ys: list[int] = []
|
||||
for row in rows:
|
||||
if row["scope"] != scope:
|
||||
continue
|
||||
xs.append(int(row["num_parts"]))
|
||||
ys.append(int(row["minifig_count"]))
|
||||
return xs, ys
|
||||
|
||||
|
||||
def compute_regression(points: Iterable[Tuple[int, int]]) -> Tuple[float, float]:
|
||||
"""Calcule une régression linéaire simple (pente, ordonnée à l'origine)."""
|
||||
xs = [x for x, _ in points]
|
||||
ys = [y for _, y in points]
|
||||
n = len(xs)
|
||||
mean_x = sum(xs) / n
|
||||
mean_y = sum(ys) / n
|
||||
numerator = 0.0
|
||||
denominator = 0.0
|
||||
for x, y in points:
|
||||
dx = x - mean_x
|
||||
dy = y - mean_y
|
||||
numerator += dx * dy
|
||||
denominator += dx * dx
|
||||
slope = numerator / denominator if denominator != 0 else 0.0
|
||||
intercept = mean_y - slope * mean_x
|
||||
return slope, intercept
|
||||
|
||||
|
||||
def plot_minifig_parts_correlation(correlation_path: Path, destination_path: Path) -> None:
|
||||
"""Trace la corrélation pièces/minifigs pour les sets filtrés vs catalogue global."""
|
||||
filtered_x, filtered_y = load_points(correlation_path, "filtered")
|
||||
catalog_x, catalog_y = load_points(correlation_path, "catalog")
|
||||
filtered_points = list(zip(filtered_x, filtered_y))
|
||||
catalog_points = list(zip(catalog_x, catalog_y))
|
||||
if not filtered_points or not catalog_points:
|
||||
return
|
||||
filtered_slope, filtered_intercept = compute_regression(filtered_points)
|
||||
catalog_slope, catalog_intercept = compute_regression(catalog_points)
|
||||
x_min = min(min(filtered_x), min(catalog_x))
|
||||
x_max = max(max(filtered_x), max(catalog_x))
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 7))
|
||||
ax.scatter(catalog_x, catalog_y, color="#bbbbbb", alpha=0.25, s=18, label="Catalogue global")
|
||||
ax.scatter(filtered_x, filtered_y, color="#1f77b4", alpha=0.8, s=28, label="Thèmes filtrés")
|
||||
ax.plot(
|
||||
[x_min, x_max],
|
||||
[catalog_slope * x_min + catalog_intercept, catalog_slope * x_max + catalog_intercept],
|
||||
color="#555555",
|
||||
linestyle="--",
|
||||
linewidth=1.4,
|
||||
label=f"Tendance globale (pente {catalog_slope:.3f})",
|
||||
)
|
||||
ax.plot(
|
||||
[x_min, x_max],
|
||||
[filtered_slope * x_min + filtered_intercept, filtered_slope * x_max + filtered_intercept],
|
||||
color="#1f77b4",
|
||||
linestyle="-",
|
||||
linewidth=1.6,
|
||||
label=f"Tendance thèmes filtrés (pente {filtered_slope:.3f})",
|
||||
)
|
||||
ax.set_xlabel("Nombre de pièces du set")
|
||||
ax.set_ylabel("Nombre de minifigs")
|
||||
ax.set_title("Corrélation pièces / minifigs")
|
||||
ax.grid(True, linestyle="--", alpha=0.3)
|
||||
ax.legend(loc="upper left")
|
||||
|
||||
ensure_parent_dir(destination_path)
|
||||
fig.tight_layout()
|
||||
fig.savefig(destination_path, dpi=160)
|
||||
plt.close(fig)
|
||||
96
lib/rebrickable/minifig_parts_correlation.py
Normal file
96
lib/rebrickable/minifig_parts_correlation.py
Normal file
@ -0,0 +1,96 @@
|
||||
"""Prépare les données de corrélation pièces/minifigs par set."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Sequence
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
from lib.rebrickable.parts_inventory import index_inventory_minifigs_by_inventory, select_latest_inventories
|
||||
from lib.rebrickable.stats import read_rows
|
||||
|
||||
|
||||
def load_minifig_counts_by_set(path: Path) -> Dict[str, int]:
|
||||
"""Indexe le nombre de minifigs par set filtré."""
|
||||
lookup: Dict[str, int] = {}
|
||||
with path.open() as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
for row in reader:
|
||||
lookup[row["set_num"]] = int(row["minifig_count"])
|
||||
return lookup
|
||||
|
||||
|
||||
def load_num_parts(path: Path) -> Dict[str, int]:
|
||||
"""Indexe le nombre de pièces par set."""
|
||||
lookup: Dict[str, int] = {}
|
||||
with path.open() as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
for row in reader:
|
||||
lookup[row["set_num"]] = int(row["num_parts"])
|
||||
return lookup
|
||||
|
||||
|
||||
def build_global_minifig_counts(inventories_path: Path, inventory_minifigs_path: Path) -> Dict[str, int]:
|
||||
"""Calcule le nombre de minifigs par set pour le catalogue complet."""
|
||||
inventories = select_latest_inventories(inventories_path)
|
||||
minifigs_by_inventory = index_inventory_minifigs_by_inventory(inventory_minifigs_path)
|
||||
counts: Dict[str, int] = {}
|
||||
for set_num, inventory in inventories.items():
|
||||
total = 0
|
||||
for row in minifigs_by_inventory.get(inventory["id"], []):
|
||||
total += int(row["quantity"])
|
||||
counts[set_num] = total
|
||||
return counts
|
||||
|
||||
|
||||
def build_correlation_rows(
|
||||
filtered_counts_path: Path,
|
||||
filtered_sets_path: Path,
|
||||
all_sets_path: Path,
|
||||
inventories_path: Path,
|
||||
inventory_minifigs_path: Path,
|
||||
) -> List[dict]:
|
||||
"""Construit les lignes de corrélation pièces/minifigs pour sets filtrés et catalogue."""
|
||||
filtered_counts = load_minifig_counts_by_set(filtered_counts_path)
|
||||
filtered_parts = load_num_parts(filtered_sets_path)
|
||||
rows: List[dict] = []
|
||||
for set_num, minifig_count in filtered_counts.items():
|
||||
num_parts = filtered_parts[set_num]
|
||||
rows.append(
|
||||
{
|
||||
"scope": "filtered",
|
||||
"set_num": set_num,
|
||||
"num_parts": str(num_parts),
|
||||
"minifig_count": str(minifig_count),
|
||||
}
|
||||
)
|
||||
global_parts = load_num_parts(all_sets_path)
|
||||
global_minifigs = build_global_minifig_counts(inventories_path, inventory_minifigs_path)
|
||||
for set_num, num_parts in global_parts.items():
|
||||
if num_parts <= 0:
|
||||
continue
|
||||
minifig_count = global_minifigs.get(set_num, 0)
|
||||
rows.append(
|
||||
{
|
||||
"scope": "catalog",
|
||||
"set_num": set_num,
|
||||
"num_parts": str(num_parts),
|
||||
"minifig_count": str(minifig_count),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def write_correlation_rows(path: Path, rows: Sequence[dict]) -> None:
|
||||
"""Écrit les lignes de corrélation pièces/minifigs."""
|
||||
ensure_parent_dir(path)
|
||||
fieldnames = ["scope", "set_num", "num_parts", "minifig_count"]
|
||||
with path.open("w", newline="") as csv_file:
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def load_correlation_rows(path: Path) -> List[dict]:
|
||||
"""Charge le CSV de corrélation pièces/minifigs."""
|
||||
return read_rows(path)
|
||||
32
scripts/plot_minifig_parts_correlation.py
Normal file
32
scripts/plot_minifig_parts_correlation.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""Trace la corrélation entre nombre de pièces et nombre de minifigs par set."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from lib.plots.minifig_parts_correlation import plot_minifig_parts_correlation
|
||||
from lib.rebrickable.minifig_parts_correlation import build_correlation_rows, write_correlation_rows
|
||||
|
||||
|
||||
FILTERED_MINIFIG_COUNTS_PATH = Path("data/intermediate/minifig_counts_by_set.csv")
|
||||
FILTERED_SETS_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||
ALL_SETS_PATH = Path("data/raw/sets.csv")
|
||||
INVENTORIES_PATH = Path("data/raw/inventories.csv")
|
||||
INVENTORY_MINIFIGS_PATH = Path("data/raw/inventory_minifigs.csv")
|
||||
CORRELATION_PATH = Path("data/intermediate/minifig_parts_correlation.csv")
|
||||
DESTINATION_PATH = Path("figures/step26/minifig_parts_correlation.png")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Construit le CSV de corrélation et trace le diagramme comparatif."""
|
||||
rows = build_correlation_rows(
|
||||
FILTERED_MINIFIG_COUNTS_PATH,
|
||||
FILTERED_SETS_PATH,
|
||||
ALL_SETS_PATH,
|
||||
INVENTORIES_PATH,
|
||||
INVENTORY_MINIFIGS_PATH,
|
||||
)
|
||||
write_correlation_rows(CORRELATION_PATH, rows)
|
||||
plot_minifig_parts_correlation(CORRELATION_PATH, DESTINATION_PATH)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
70
tests/test_minifig_parts_correlation.py
Normal file
70
tests/test_minifig_parts_correlation.py
Normal file
@ -0,0 +1,70 @@
|
||||
"""Tests de la préparation de corrélation pièces/minifigs."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from lib.rebrickable.minifig_parts_correlation import build_correlation_rows
|
||||
|
||||
|
||||
def write_csv(path: Path, content: str) -> None:
|
||||
"""Écrit un CSV brut."""
|
||||
path.write_text(content)
|
||||
|
||||
|
||||
def test_build_correlation_rows_merges_filtered_and_catalog(tmp_path: Path) -> None:
|
||||
"""Construit les lignes de corrélation pour filtrés et catalogue global."""
|
||||
filtered_counts_path = tmp_path / "minifig_counts_by_set.csv"
|
||||
write_csv(
|
||||
filtered_counts_path,
|
||||
"set_num,set_id,name,year,minifig_count\n"
|
||||
"123-1,123,Set A,2020,2\n"
|
||||
"124-1,124,Set B,2021,1\n",
|
||||
)
|
||||
filtered_sets_path = tmp_path / "sets_enriched.csv"
|
||||
write_csv(
|
||||
filtered_sets_path,
|
||||
"set_num,num_parts\n"
|
||||
"123-1,300\n"
|
||||
"124-1,150\n",
|
||||
)
|
||||
all_sets_path = tmp_path / "sets.csv"
|
||||
write_csv(
|
||||
all_sets_path,
|
||||
"set_num,name,year,theme_id,num_parts\n"
|
||||
"123-1,Set A,2020,1,300\n"
|
||||
"124-1,Set B,2021,1,150\n"
|
||||
"200-1,Set C,2019,1,100\n",
|
||||
)
|
||||
inventories_path = tmp_path / "inventories.csv"
|
||||
write_csv(
|
||||
inventories_path,
|
||||
"id,version,set_num\n"
|
||||
"10,1,123-1\n"
|
||||
"20,2,123-1\n"
|
||||
"30,1,124-1\n"
|
||||
"40,1,200-1\n",
|
||||
)
|
||||
inventory_minifigs_path = tmp_path / "inventory_minifigs.csv"
|
||||
write_csv(
|
||||
inventory_minifigs_path,
|
||||
"inventory_id,fig_num,quantity\n"
|
||||
"10,fig-a,1\n"
|
||||
"20,fig-a,2\n"
|
||||
"30,fig-b,1\n"
|
||||
"40,fig-c,3\n",
|
||||
)
|
||||
|
||||
rows = build_correlation_rows(
|
||||
filtered_counts_path,
|
||||
filtered_sets_path,
|
||||
all_sets_path,
|
||||
inventories_path,
|
||||
inventory_minifigs_path,
|
||||
)
|
||||
|
||||
assert rows == [
|
||||
{"scope": "filtered", "set_num": "123-1", "num_parts": "300", "minifig_count": "2"},
|
||||
{"scope": "filtered", "set_num": "124-1", "num_parts": "150", "minifig_count": "1"},
|
||||
{"scope": "catalog", "set_num": "123-1", "num_parts": "300", "minifig_count": "2"},
|
||||
{"scope": "catalog", "set_num": "124-1", "num_parts": "150", "minifig_count": "1"},
|
||||
{"scope": "catalog", "set_num": "200-1", "num_parts": "100", "minifig_count": "3"},
|
||||
]
|
||||
28
tests/test_minifig_parts_correlation_plot.py
Normal file
28
tests/test_minifig_parts_correlation_plot.py
Normal file
@ -0,0 +1,28 @@
|
||||
"""Tests du graphique de corrélation pièces/minifigs."""
|
||||
|
||||
import matplotlib
|
||||
from pathlib import Path
|
||||
|
||||
from lib.plots.minifig_parts_correlation import plot_minifig_parts_correlation
|
||||
|
||||
|
||||
matplotlib.use("Agg")
|
||||
|
||||
|
||||
def test_plot_minifig_parts_correlation(tmp_path: Path) -> None:
|
||||
"""Génère le graphique comparatif pièces/minifigs."""
|
||||
correlation_path = tmp_path / "minifig_parts_correlation.csv"
|
||||
destination = tmp_path / "figures" / "step26" / "minifig_parts_correlation.png"
|
||||
correlation_path.write_text(
|
||||
"scope,set_num,num_parts,minifig_count\n"
|
||||
"filtered,123-1,300,2\n"
|
||||
"filtered,124-1,150,1\n"
|
||||
"catalog,123-1,300,2\n"
|
||||
"catalog,124-1,150,1\n"
|
||||
"catalog,200-1,100,3\n"
|
||||
)
|
||||
|
||||
plot_minifig_parts_correlation(correlation_path, destination)
|
||||
|
||||
assert destination.exists()
|
||||
assert destination.stat().st_size > 0
|
||||
Loading…
x
Reference in New Issue
Block a user