Ajoute la richesse chromatique par set
This commit is contained in:
parent
f94669d82e
commit
d067e2075f
13
README.md
13
README.md
@ -285,3 +285,16 @@ Le calcul lit `data/intermediate/parts_filtered.csv`, `data/intermediate/sets_en
|
|||||||
- `data/intermediate/rare_parts_by_set.csv` : agrégat par set (comptes distincts, quantités, focus minifigs).
|
- `data/intermediate/rare_parts_by_set.csv` : agrégat par set (comptes distincts, quantités, focus minifigs).
|
||||||
|
|
||||||
Le tracé `figures/step27/rare_parts_per_set.png` met en scène le top des sets contenant le plus de variantes exclusives, en distinguant les pièces de minifigs et l’état de possession.
|
Le tracé `figures/step27/rare_parts_per_set.png` met en scène le top des sets contenant le plus de variantes exclusives, en distinguant les pièces de minifigs et l’état de possession.
|
||||||
|
|
||||||
|
### Étape 28 : richesse chromatique par set
|
||||||
|
|
||||||
|
1. `source .venv/bin/activate`
|
||||||
|
2. `python -m scripts.compute_color_richness`
|
||||||
|
3. `python -m scripts.plot_color_richness`
|
||||||
|
|
||||||
|
Le calcul lit `data/intermediate/colors_by_set.csv` et `data/intermediate/sets_enriched.csv` pour mesurer la diversité des palettes (nombre de couleurs distinctes hors rechanges, part des 3 couleurs principales, part de couleurs de minifigs). Il produit :
|
||||||
|
|
||||||
|
- `data/intermediate/color_richness_by_set.csv` : métriques détaillées par set (comptes et parts principales, possession).
|
||||||
|
- `data/intermediate/color_richness_by_year.csv` : agrégat annuel (moyenne, médiane, bornes de diversité et concentration).
|
||||||
|
|
||||||
|
Les graphiques `figures/step28/color_richness_boxplot.png`, `figures/step28/color_richness_top_sets.png` et `figures/step28/color_concentration_scatter.png` montrent respectivement la répartition annuelle, le top des sets les plus colorés et la concentration des palettes (part des 3 couleurs dominantes vs nombre de couleurs).
|
||||||
|
|||||||
130
lib/plots/color_richness.py
Normal file
130
lib/plots/color_richness.py
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
"""Visualisations de la richesse chromatique par set."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable, List, Tuple
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib.patches import Patch
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import read_rows
|
||||||
|
|
||||||
|
|
||||||
|
def load_richness_rows(path: Path) -> List[dict]:
|
||||||
|
"""Charge les métriques de richesse chromatique."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def build_boxplot_data(rows: Iterable[dict]) -> Tuple[List[List[int]], List[str]]:
|
||||||
|
"""Prépare les valeurs de boxplot par année."""
|
||||||
|
grouped: dict[str, List[int]] = {}
|
||||||
|
for row in rows:
|
||||||
|
year_rows = grouped.get(row["year"])
|
||||||
|
if year_rows is None:
|
||||||
|
year_rows = []
|
||||||
|
grouped[row["year"]] = year_rows
|
||||||
|
year_rows.append(int(row["colors_distinct"]))
|
||||||
|
years = sorted(grouped.keys(), key=int)
|
||||||
|
data = [grouped[year] for year in years]
|
||||||
|
return data, years
|
||||||
|
|
||||||
|
|
||||||
|
def plot_richness_boxplot(richness_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Trace le boxplot du nombre de couleurs distinctes par set et par année."""
|
||||||
|
rows = load_richness_rows(richness_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
data, years = build_boxplot_data(rows)
|
||||||
|
fig, ax = plt.subplots(figsize=(12, 7))
|
||||||
|
box = ax.boxplot(
|
||||||
|
data,
|
||||||
|
orientation="vertical",
|
||||||
|
patch_artist=True,
|
||||||
|
tick_labels=years,
|
||||||
|
boxprops=dict(facecolor="#1f77b4", alpha=0.3),
|
||||||
|
medianprops=dict(color="#0d0d0d", linewidth=1.5),
|
||||||
|
whiskerprops=dict(color="#555555", linestyle="--"),
|
||||||
|
capprops=dict(color="#555555"),
|
||||||
|
)
|
||||||
|
for patch in box["boxes"]:
|
||||||
|
patch.set_edgecolor("#1f77b4")
|
||||||
|
ax.set_xlabel("Année")
|
||||||
|
ax.set_ylabel("Nombre de couleurs distinctes (hors rechanges)")
|
||||||
|
ax.set_title("Richesse chromatique par set (répartition annuelle)")
|
||||||
|
ax.grid(axis="y", linestyle="--", alpha=0.3)
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=170)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def select_top_sets(rows: Iterable[dict], limit: int = 15) -> List[dict]:
|
||||||
|
"""Retient les sets les plus colorés et les plus concentrés."""
|
||||||
|
sorted_rows = sorted(
|
||||||
|
rows,
|
||||||
|
key=lambda row: (-int(row["colors_distinct"]), float(row["top3_share"]), row["set_num"]),
|
||||||
|
)
|
||||||
|
return sorted_rows[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def plot_richness_top_sets(richness_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Trace le top des sets les plus riches en couleurs."""
|
||||||
|
rows = load_richness_rows(richness_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
top_rows = select_top_sets(rows)
|
||||||
|
y_positions = np.arange(len(top_rows))
|
||||||
|
counts = [int(row["colors_distinct"]) for row in top_rows]
|
||||||
|
labels = [f"{row['set_num']} · {row['name']} ({row['year']})" for row in top_rows]
|
||||||
|
owned_mask = [row["in_collection"] == "true" for row in top_rows]
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(11, 8))
|
||||||
|
for y, value, owned in zip(y_positions, counts, owned_mask):
|
||||||
|
alpha = 0.92 if owned else 0.45
|
||||||
|
ax.barh(y, value, color="#2ca02c", alpha=alpha)
|
||||||
|
ax.set_yticks(y_positions)
|
||||||
|
ax.set_yticklabels(labels)
|
||||||
|
ax.invert_yaxis()
|
||||||
|
ax.set_xlabel("Couleurs distinctes (hors rechanges)")
|
||||||
|
ax.set_title("Top des sets les plus colorés")
|
||||||
|
ax.grid(axis="x", linestyle="--", alpha=0.3)
|
||||||
|
legend = [
|
||||||
|
Patch(facecolor="#2ca02c", edgecolor="none", alpha=0.92, label="Set possédé"),
|
||||||
|
Patch(facecolor="#2ca02c", edgecolor="none", alpha=0.45, label="Set manquant"),
|
||||||
|
]
|
||||||
|
ax.legend(handles=legend, loc="lower right", frameon=False)
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=170)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_concentration_scatter(richness_path: Path, destination_path: Path) -> None:
|
||||||
|
"""Visualise la concentration de palette vs nombre de couleurs."""
|
||||||
|
rows = load_richness_rows(richness_path)
|
||||||
|
if not rows:
|
||||||
|
return
|
||||||
|
x_values = [int(row["colors_distinct"]) for row in rows]
|
||||||
|
y_values = [float(row["top3_share"]) for row in rows]
|
||||||
|
owned_mask = [row["in_collection"] == "true" for row in rows]
|
||||||
|
colors = ["#1f77b4" if owned else "#bbbbbb" for owned in owned_mask]
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(10, 7))
|
||||||
|
ax.scatter(x_values, y_values, c=colors, alpha=0.7, s=32)
|
||||||
|
ax.set_xlabel("Nombre de couleurs distinctes (hors rechanges)")
|
||||||
|
ax.set_ylabel("Part des 3 couleurs principales")
|
||||||
|
ax.set_title("Concentration des palettes")
|
||||||
|
ax.grid(True, linestyle="--", alpha=0.3)
|
||||||
|
legend = [
|
||||||
|
Patch(facecolor="#1f77b4", edgecolor="none", alpha=0.7, label="Set possédé"),
|
||||||
|
Patch(facecolor="#bbbbbb", edgecolor="none", alpha=0.7, label="Set manquant"),
|
||||||
|
]
|
||||||
|
ax.legend(handles=legend, loc="upper right", frameon=False)
|
||||||
|
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(destination_path, dpi=170)
|
||||||
|
plt.close(fig)
|
||||||
150
lib/rebrickable/color_richness.py
Normal file
150
lib/rebrickable/color_richness.py
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
"""Métriques de richesse chromatique par set."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, List, Sequence
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.stats import compute_median, read_rows
|
||||||
|
|
||||||
|
|
||||||
|
def load_colors_by_set(path: Path) -> List[dict]:
|
||||||
|
"""Charge colors_by_set.csv en mémoire."""
|
||||||
|
return read_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_sets(path: Path) -> Dict[str, dict]:
|
||||||
|
"""Indexe les sets enrichis par set_num."""
|
||||||
|
sets: Dict[str, dict] = {}
|
||||||
|
with path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
for row in reader:
|
||||||
|
sets[row["set_num"]] = row
|
||||||
|
return sets
|
||||||
|
|
||||||
|
|
||||||
|
def group_by_set(rows: Iterable[dict]) -> Dict[str, List[dict]]:
|
||||||
|
"""Regroupe les couleurs par set."""
|
||||||
|
grouped: Dict[str, List[dict]] = {}
|
||||||
|
for row in rows:
|
||||||
|
set_rows = grouped.get(row["set_num"])
|
||||||
|
if set_rows is None:
|
||||||
|
set_rows = []
|
||||||
|
grouped[row["set_num"]] = set_rows
|
||||||
|
set_rows.append(row)
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
|
def build_richness_by_set(
|
||||||
|
colors_by_set_path: Path,
|
||||||
|
sets_enriched_path: Path,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Construit les métriques de richesse chromatique par set."""
|
||||||
|
colors = load_colors_by_set(colors_by_set_path)
|
||||||
|
sets_lookup = load_sets(sets_enriched_path)
|
||||||
|
grouped = group_by_set(colors)
|
||||||
|
richness: List[dict] = []
|
||||||
|
for set_num, set_rows in grouped.items():
|
||||||
|
total_non_spare = sum(int(row["quantity_non_spare"]) for row in set_rows)
|
||||||
|
colors_distinct = len(set_rows)
|
||||||
|
colors_minifig = sum(1 for row in set_rows if int(row["quantity_minifig"]) > 0)
|
||||||
|
colors_non_minifig = sum(1 for row in set_rows if int(row["quantity_non_minifig"]) > 0)
|
||||||
|
sorted_by_quantity = sorted(set_rows, key=lambda row: int(row["quantity_non_spare"]), reverse=True)
|
||||||
|
top_color = sorted_by_quantity[0]
|
||||||
|
top3_total = sum(int(row["quantity_non_spare"]) for row in sorted_by_quantity[:3])
|
||||||
|
top_share = int(top_color["quantity_non_spare"]) / total_non_spare
|
||||||
|
top3_share = top3_total / total_non_spare
|
||||||
|
set_row = sets_lookup[set_num]
|
||||||
|
richness.append(
|
||||||
|
{
|
||||||
|
"set_num": set_num,
|
||||||
|
"set_id": set_row["set_id"],
|
||||||
|
"name": set_row["name"],
|
||||||
|
"year": set_row["year"],
|
||||||
|
"in_collection": set_row["in_collection"],
|
||||||
|
"colors_distinct": str(colors_distinct),
|
||||||
|
"colors_minifig": str(colors_minifig),
|
||||||
|
"colors_non_minifig": str(colors_non_minifig),
|
||||||
|
"total_parts_non_spare": str(total_non_spare),
|
||||||
|
"top_color_name": top_color["color_name"],
|
||||||
|
"top_color_share": f"{top_share:.4f}",
|
||||||
|
"top3_share": f"{top3_share:.4f}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
richness.sort(key=lambda row: (-int(row["colors_distinct"]), row["set_num"]))
|
||||||
|
return richness
|
||||||
|
|
||||||
|
|
||||||
|
def build_richness_by_year(richness_rows: Iterable[dict]) -> List[dict]:
|
||||||
|
"""Agrège les métriques de richesse par année."""
|
||||||
|
grouped: Dict[str, List[dict]] = {}
|
||||||
|
for row in richness_rows:
|
||||||
|
year_rows = grouped.get(row["year"])
|
||||||
|
if year_rows is None:
|
||||||
|
year_rows = []
|
||||||
|
grouped[row["year"]] = year_rows
|
||||||
|
year_rows.append(row)
|
||||||
|
yearly: List[dict] = []
|
||||||
|
for year, rows in grouped.items():
|
||||||
|
distinct_counts = [int(row["colors_distinct"]) for row in rows]
|
||||||
|
top3_shares = [float(row["top3_share"]) for row in rows]
|
||||||
|
average_distinct = sum(distinct_counts) / len(distinct_counts)
|
||||||
|
median_distinct = compute_median(distinct_counts)
|
||||||
|
average_top3 = sum(top3_shares) / len(top3_shares)
|
||||||
|
median_top3 = compute_median([int(share * 10000) for share in top3_shares]) / 10000
|
||||||
|
yearly.append(
|
||||||
|
{
|
||||||
|
"year": year,
|
||||||
|
"average_colors_distinct": f"{average_distinct:.2f}",
|
||||||
|
"median_colors_distinct": f"{median_distinct:.2f}",
|
||||||
|
"max_colors_distinct": str(max(distinct_counts)),
|
||||||
|
"min_colors_distinct": str(min(distinct_counts)),
|
||||||
|
"average_top3_share": f"{average_top3:.4f}",
|
||||||
|
"median_top3_share": f"{median_top3:.4f}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
yearly.sort(key=lambda row: int(row["year"]))
|
||||||
|
return yearly
|
||||||
|
|
||||||
|
|
||||||
|
def write_richness_by_set(destination_path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV des métriques par set."""
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fieldnames = [
|
||||||
|
"set_num",
|
||||||
|
"set_id",
|
||||||
|
"name",
|
||||||
|
"year",
|
||||||
|
"in_collection",
|
||||||
|
"colors_distinct",
|
||||||
|
"colors_minifig",
|
||||||
|
"colors_non_minifig",
|
||||||
|
"total_parts_non_spare",
|
||||||
|
"top_color_name",
|
||||||
|
"top_color_share",
|
||||||
|
"top3_share",
|
||||||
|
]
|
||||||
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_richness_by_year(destination_path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV agrégé par année."""
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
fieldnames = [
|
||||||
|
"year",
|
||||||
|
"average_colors_distinct",
|
||||||
|
"median_colors_distinct",
|
||||||
|
"max_colors_distinct",
|
||||||
|
"min_colors_distinct",
|
||||||
|
"average_top3_share",
|
||||||
|
"median_top3_share",
|
||||||
|
]
|
||||||
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
28
scripts/compute_color_richness.py
Normal file
28
scripts/compute_color_richness.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
"""Calcule la richesse chromatique par set et par année."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.color_richness import (
|
||||||
|
build_richness_by_set,
|
||||||
|
build_richness_by_year,
|
||||||
|
write_richness_by_set,
|
||||||
|
write_richness_by_year,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
|
||||||
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
RICHNESS_BY_SET_PATH = Path("data/intermediate/color_richness_by_set.csv")
|
||||||
|
RICHNESS_BY_YEAR_PATH = Path("data/intermediate/color_richness_by_year.csv")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit les CSV de richesse chromatique."""
|
||||||
|
richness_by_set = build_richness_by_set(COLORS_BY_SET_PATH, SETS_ENRICHED_PATH)
|
||||||
|
richness_by_year = build_richness_by_year(richness_by_set)
|
||||||
|
write_richness_by_set(RICHNESS_BY_SET_PATH, richness_by_set)
|
||||||
|
write_richness_by_year(RICHNESS_BY_YEAR_PATH, richness_by_year)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
26
scripts/plot_color_richness.py
Normal file
26
scripts/plot_color_richness.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
"""Trace les graphiques de richesse chromatique par set."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.color_richness import (
|
||||||
|
plot_concentration_scatter,
|
||||||
|
plot_richness_boxplot,
|
||||||
|
plot_richness_top_sets,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
RICHNESS_PATH = Path("data/intermediate/color_richness_by_set.csv")
|
||||||
|
BOXPLOT_DESTINATION = Path("figures/step28/color_richness_boxplot.png")
|
||||||
|
TOP_DESTINATION = Path("figures/step28/color_richness_top_sets.png")
|
||||||
|
CONCENTRATION_DESTINATION = Path("figures/step28/color_concentration_scatter.png")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Génère les visuels de richesse chromatique."""
|
||||||
|
plot_richness_boxplot(RICHNESS_PATH, BOXPLOT_DESTINATION)
|
||||||
|
plot_richness_top_sets(RICHNESS_PATH, TOP_DESTINATION)
|
||||||
|
plot_concentration_scatter(RICHNESS_PATH, CONCENTRATION_DESTINATION)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
196
tests/test_color_richness.py
Normal file
196
tests/test_color_richness.py
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
"""Tests des métriques de richesse chromatique."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.color_richness import (
|
||||||
|
build_richness_by_set,
|
||||||
|
build_richness_by_year,
|
||||||
|
write_richness_by_set,
|
||||||
|
write_richness_by_year,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
|
||||||
|
"""Écrit un CSV simple pour les besoins de tests."""
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.writer(csv_file)
|
||||||
|
writer.writerow(headers)
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_richness_by_set_computes_shares_and_counts(tmp_path: Path) -> None:
|
||||||
|
"""Calcule les partages de couleurs principales et les dénombrements."""
|
||||||
|
colors_by_set = tmp_path / "colors_by_set.csv"
|
||||||
|
write_csv(
|
||||||
|
colors_by_set,
|
||||||
|
[
|
||||||
|
"set_num",
|
||||||
|
"set_id",
|
||||||
|
"year",
|
||||||
|
"color_rgb",
|
||||||
|
"is_translucent",
|
||||||
|
"color_name",
|
||||||
|
"quantity_total",
|
||||||
|
"quantity_non_spare",
|
||||||
|
"quantity_minifig",
|
||||||
|
"quantity_non_minifig",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "10", "10", "0", "10"],
|
||||||
|
["1000-1", "1000", "2020", "BBBBBB", "false", "Blue", "5", "5", "5", "0"],
|
||||||
|
["2000-1", "2000", "2021", "CCCCCC", "true", "Trans", "3", "3", "0", "3"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
sets_enriched = tmp_path / "sets_enriched.csv"
|
||||||
|
write_csv(
|
||||||
|
sets_enriched,
|
||||||
|
["set_num", "set_id", "name", "year", "in_collection"],
|
||||||
|
[
|
||||||
|
["1000-1", "1000", "Set A", "2020", "true"],
|
||||||
|
["2000-1", "2000", "Set B", "2021", "false"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
richness = build_richness_by_set(colors_by_set, sets_enriched)
|
||||||
|
|
||||||
|
assert richness == [
|
||||||
|
{
|
||||||
|
"set_num": "1000-1",
|
||||||
|
"set_id": "1000",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"in_collection": "true",
|
||||||
|
"colors_distinct": "2",
|
||||||
|
"colors_minifig": "1",
|
||||||
|
"colors_non_minifig": "1",
|
||||||
|
"total_parts_non_spare": "15",
|
||||||
|
"top_color_name": "Gray",
|
||||||
|
"top_color_share": "0.6667",
|
||||||
|
"top3_share": "1.0000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "2000-1",
|
||||||
|
"set_id": "2000",
|
||||||
|
"name": "Set B",
|
||||||
|
"year": "2021",
|
||||||
|
"in_collection": "false",
|
||||||
|
"colors_distinct": "1",
|
||||||
|
"colors_minifig": "0",
|
||||||
|
"colors_non_minifig": "1",
|
||||||
|
"total_parts_non_spare": "3",
|
||||||
|
"top_color_name": "Trans",
|
||||||
|
"top_color_share": "1.0000",
|
||||||
|
"top3_share": "1.0000",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_richness_by_year_aggregates_metrics(tmp_path: Path) -> None:
|
||||||
|
"""Agrège les métriques par année."""
|
||||||
|
richness_rows = [
|
||||||
|
{
|
||||||
|
"set_num": "s1",
|
||||||
|
"set_id": "1",
|
||||||
|
"name": "A",
|
||||||
|
"year": "2020",
|
||||||
|
"in_collection": "true",
|
||||||
|
"colors_distinct": "4",
|
||||||
|
"colors_minifig": "1",
|
||||||
|
"colors_non_minifig": "3",
|
||||||
|
"total_parts_non_spare": "10",
|
||||||
|
"top_color_name": "Red",
|
||||||
|
"top_color_share": "0.5000",
|
||||||
|
"top3_share": "0.9000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "s2",
|
||||||
|
"set_id": "2",
|
||||||
|
"name": "B",
|
||||||
|
"year": "2020",
|
||||||
|
"in_collection": "false",
|
||||||
|
"colors_distinct": "2",
|
||||||
|
"colors_minifig": "0",
|
||||||
|
"colors_non_minifig": "2",
|
||||||
|
"total_parts_non_spare": "5",
|
||||||
|
"top_color_name": "Blue",
|
||||||
|
"top_color_share": "0.6000",
|
||||||
|
"top3_share": "1.0000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"set_num": "s3",
|
||||||
|
"set_id": "3",
|
||||||
|
"name": "C",
|
||||||
|
"year": "2021",
|
||||||
|
"in_collection": "true",
|
||||||
|
"colors_distinct": "3",
|
||||||
|
"colors_minifig": "1",
|
||||||
|
"colors_non_minifig": "3",
|
||||||
|
"total_parts_non_spare": "7",
|
||||||
|
"top_color_name": "Green",
|
||||||
|
"top_color_share": "0.5714",
|
||||||
|
"top3_share": "1.0000",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
yearly = build_richness_by_year(richness_rows)
|
||||||
|
|
||||||
|
assert yearly == [
|
||||||
|
{
|
||||||
|
"year": "2020",
|
||||||
|
"average_colors_distinct": "3.00",
|
||||||
|
"median_colors_distinct": "3.00",
|
||||||
|
"max_colors_distinct": "4",
|
||||||
|
"min_colors_distinct": "2",
|
||||||
|
"average_top3_share": "0.9500",
|
||||||
|
"median_top3_share": "0.9500",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2021",
|
||||||
|
"average_colors_distinct": "3.00",
|
||||||
|
"median_colors_distinct": "3.00",
|
||||||
|
"max_colors_distinct": "3",
|
||||||
|
"min_colors_distinct": "3",
|
||||||
|
"average_top3_share": "1.0000",
|
||||||
|
"median_top3_share": "1.0000",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_richness_outputs_csv(tmp_path: Path) -> None:
|
||||||
|
"""Sérialise les métriques par set et par année."""
|
||||||
|
by_set_path = tmp_path / "color_richness_by_set.csv"
|
||||||
|
by_year_path = tmp_path / "color_richness_by_year.csv"
|
||||||
|
sample_set_rows = [
|
||||||
|
{
|
||||||
|
"set_num": "s1",
|
||||||
|
"set_id": "1",
|
||||||
|
"name": "A",
|
||||||
|
"year": "2020",
|
||||||
|
"in_collection": "true",
|
||||||
|
"colors_distinct": "1",
|
||||||
|
"colors_minifig": "1",
|
||||||
|
"colors_non_minifig": "1",
|
||||||
|
"total_parts_non_spare": "5",
|
||||||
|
"top_color_name": "Red",
|
||||||
|
"top_color_share": "1.0000",
|
||||||
|
"top3_share": "1.0000",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
sample_year_rows = [
|
||||||
|
{
|
||||||
|
"year": "2020",
|
||||||
|
"average_colors_distinct": "1.00",
|
||||||
|
"median_colors_distinct": "1.00",
|
||||||
|
"max_colors_distinct": "1",
|
||||||
|
"min_colors_distinct": "1",
|
||||||
|
"average_top3_share": "1.0000",
|
||||||
|
"median_top3_share": "1.0000",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
write_richness_by_set(by_set_path, sample_set_rows)
|
||||||
|
write_richness_by_year(by_year_path, sample_year_rows)
|
||||||
|
|
||||||
|
assert by_set_path.exists()
|
||||||
|
assert by_year_path.exists()
|
||||||
38
tests/test_color_richness_plot.py
Normal file
38
tests/test_color_richness_plot.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
"""Tests des visuels de richesse chromatique."""
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.plots.color_richness import (
|
||||||
|
plot_concentration_scatter,
|
||||||
|
plot_richness_boxplot,
|
||||||
|
plot_richness_top_sets,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
|
||||||
|
|
||||||
|
def test_plot_richness_outputs_images(tmp_path: Path) -> None:
|
||||||
|
"""Génère les trois graphiques principaux."""
|
||||||
|
richness_path = tmp_path / "color_richness_by_set.csv"
|
||||||
|
richness_path.write_text(
|
||||||
|
"set_num,set_id,name,year,in_collection,colors_distinct,colors_minifig,colors_non_minifig,total_parts_non_spare,top_color_name,top_color_share,top3_share\n"
|
||||||
|
"1000-1,1000,Set A,2020,true,6,2,5,50,Red,0.4000,0.6500\n"
|
||||||
|
"2000-1,2000,Set B,2021,false,4,1,3,30,Blue,0.5000,0.7500\n"
|
||||||
|
"3000-1,3000,Set C,2021,true,5,1,4,40,Green,0.3000,0.5500\n"
|
||||||
|
)
|
||||||
|
boxplot_dest = tmp_path / "figures" / "step28" / "color_richness_boxplot.png"
|
||||||
|
top_dest = tmp_path / "figures" / "step28" / "color_richness_top_sets.png"
|
||||||
|
scatter_dest = tmp_path / "figures" / "step28" / "color_concentration_scatter.png"
|
||||||
|
|
||||||
|
plot_richness_boxplot(richness_path, boxplot_dest)
|
||||||
|
plot_richness_top_sets(richness_path, top_dest)
|
||||||
|
plot_concentration_scatter(richness_path, scatter_dest)
|
||||||
|
|
||||||
|
assert boxplot_dest.exists()
|
||||||
|
assert top_dest.exists()
|
||||||
|
assert scatter_dest.exists()
|
||||||
|
assert boxplot_dest.stat().st_size > 0
|
||||||
|
assert top_dest.stat().st_size > 0
|
||||||
|
assert scatter_dest.stat().st_size > 0
|
||||||
@ -102,17 +102,6 @@ def test_build_rare_parts_detects_exclusive_variations(tmp_path: Path) -> None:
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
assert rare_by_set == [
|
assert rare_by_set == [
|
||||||
{
|
|
||||||
"set_num": "1000-1",
|
|
||||||
"set_id": "1000",
|
|
||||||
"name": "Set A",
|
|
||||||
"year": "2020",
|
|
||||||
"in_collection": "true",
|
|
||||||
"rare_parts_distinct": "1",
|
|
||||||
"rare_parts_quantity": "1",
|
|
||||||
"rare_minifig_parts_distinct": "1",
|
|
||||||
"rare_minifig_quantity": "1",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"set_num": "2000-1",
|
"set_num": "2000-1",
|
||||||
"set_id": "2000",
|
"set_id": "2000",
|
||||||
@ -124,6 +113,17 @@ def test_build_rare_parts_detects_exclusive_variations(tmp_path: Path) -> None:
|
|||||||
"rare_minifig_parts_distinct": "0",
|
"rare_minifig_parts_distinct": "0",
|
||||||
"rare_minifig_quantity": "0",
|
"rare_minifig_quantity": "0",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"set_num": "1000-1",
|
||||||
|
"set_id": "1000",
|
||||||
|
"name": "Set A",
|
||||||
|
"year": "2020",
|
||||||
|
"in_collection": "true",
|
||||||
|
"rare_parts_distinct": "1",
|
||||||
|
"rare_parts_quantity": "1",
|
||||||
|
"rare_minifig_parts_distinct": "1",
|
||||||
|
"rare_minifig_quantity": "1",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user