1

Ajoute l'agrégation annuelle des palettes de couleurs

This commit is contained in:
Richard Dern 2025-12-01 22:40:56 +01:00
parent f8a2464447
commit fb2ef5f16f
4 changed files with 267 additions and 0 deletions

View File

@ -170,3 +170,10 @@ Le script lit `data/intermediate/parts_filtered.csv` et `data/raw/colors.csv`, p
2. `python -m scripts.build_colors_by_set`
Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette.
### Étape 14 : évolution annuelle des palettes
1. `source .venv/bin/activate`
2. `python -m scripts.compute_colors_timeline`
Le script lit `data/intermediate/colors_by_set.csv` et produit deux agrégats : `data/intermediate/colors_timeline.csv` (statistiques annuelles : nombre de couleurs distinctes, nouvelles, perdues, part des translucides, top couleurs) et `data/intermediate/colors_year_color_matrix.csv` (quantités totales année × couleur) pour préparer heatmaps et analyses temporelles.

View File

@ -0,0 +1,121 @@
"""Agrégation annuelle des palettes de couleurs."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
from lib.filesystem import ensure_parent_dir
def load_colors_by_set(colors_by_set_path: Path) -> List[dict]:
"""Charge le fichier colors_by_set.csv en mémoire."""
with colors_by_set_path.open() as csv_file:
reader = csv.DictReader(csv_file)
return list(reader)
def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]:
"""Regroupe les lignes par année."""
grouped: Dict[str, List[dict]] = {}
for row in rows:
year = row["year"]
if year not in grouped:
grouped[year] = []
grouped[year].append(row)
return grouped
def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]:
"""Construit les métriques annuelles sur les palettes de couleurs."""
grouped = group_rows_by_year(rows)
years = sorted(grouped.keys(), key=int)
seen_colors: set[Tuple[str, str]] = set()
previous_colors: set[Tuple[str, str]] = set()
stats: List[dict] = []
for year in years:
year_rows = grouped[year]
colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows}
colors_distinct = len(colors)
colors_new = len(colors - seen_colors)
colors_lost = len(previous_colors - colors)
total_quantity = sum(int(row["quantity_total"]) for row in year_rows)
translucent_quantity = sum(
int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true"
)
share_translucent = translucent_quantity / total_quantity
totals_by_color: Dict[Tuple[str, str], int] = {}
names_by_color: Dict[Tuple[str, str], str] = {}
for row in year_rows:
key = (row["color_rgb"], row["is_translucent"])
totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"])
names_by_color[key] = row["color_name"]
top_colors = sorted(
totals_by_color.items(),
key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]),
)[:5]
top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors)
stats.append(
{
"year": year,
"colors_distinct": str(colors_distinct),
"colors_new": str(colors_new),
"colors_lost": str(colors_lost),
"share_translucent": f"{share_translucent:.4f}",
"total_quantity": str(total_quantity),
"top_colors": top_colors_label,
}
)
seen_colors.update(colors)
previous_colors = colors
return stats
def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]:
"""Construit une matrice année × couleur basée sur les quantités totales."""
totals: Dict[Tuple[str, str, str, str], int] = {}
for row in rows:
key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"])
totals[key] = totals.get(key, 0) + int(row["quantity_total"])
matrix = []
for (year, color_rgb, is_translucent, color_name), quantity in totals.items():
matrix.append(
{
"year": year,
"color_rgb": color_rgb,
"is_translucent": is_translucent,
"color_name": color_name,
"quantity_total": str(quantity),
}
)
matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"]))
return matrix
def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None:
"""Écrit les statistiques annuelles dans un CSV dédié."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
fieldnames = [
"year",
"colors_distinct",
"colors_new",
"colors_lost",
"share_translucent",
"total_quantity",
"top_colors",
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in stats:
writer.writerow(row)
def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None:
"""Sérialise la matrice année × couleur pour alimenter des heatmaps."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,29 @@
"""Calcule l'évolution annuelle des palettes de couleurs."""
from pathlib import Path
from lib.rebrickable.colors_timeline import (
build_year_color_matrix,
compute_yearly_stats,
load_colors_by_set,
write_year_color_matrix,
write_yearly_stats,
)
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
TIMELINE_PATH = Path("data/intermediate/colors_timeline.csv")
MATRIX_PATH = Path("data/intermediate/colors_year_color_matrix.csv")
def main() -> None:
"""Construit les agrégats annuels et la matrice année × couleur."""
colors_by_set = load_colors_by_set(COLORS_BY_SET_PATH)
timeline = compute_yearly_stats(colors_by_set)
matrix = build_year_color_matrix(colors_by_set)
write_yearly_stats(TIMELINE_PATH, timeline)
write_year_color_matrix(MATRIX_PATH, matrix)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,110 @@
"""Tests des agrégats annuels de palettes de couleurs."""
import csv
from pathlib import Path
from lib.rebrickable.colors_timeline import (
build_year_color_matrix,
compute_yearly_stats,
load_colors_by_set,
write_year_color_matrix,
write_yearly_stats,
)
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
"""Écrit un CSV simple pour les besoins des tests."""
with path.open("w", newline="") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(headers)
writer.writerows(rows)
def test_compute_yearly_stats(tmp_path: Path) -> None:
"""Calcule les métriques annuelles sur les palettes."""
source_path = tmp_path / "colors_by_set.csv"
timeline_path = tmp_path / "colors_timeline.csv"
matrix_path = tmp_path / "colors_year_color_matrix.csv"
write_csv(
source_path,
[
"set_num",
"set_id",
"year",
"color_rgb",
"is_translucent",
"color_name",
"quantity_total",
"quantity_non_spare",
"quantity_minifig",
"quantity_non_minifig",
],
[
["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "2", "2", "0", "2"],
["1000-1", "1000", "2020", "BBBBBB", "true", "Trans-Black", "1", "1", "0", "1"],
["2000-1", "2000", "2021", "BBBBBB", "true", "Trans-Black", "3", "3", "0", "3"],
["2000-1", "2000", "2021", "CCCCCC", "false", "Blue", "4", "4", "4", "0"],
],
)
rows = load_colors_by_set(source_path)
timeline = compute_yearly_stats(rows)
matrix = build_year_color_matrix(rows)
write_yearly_stats(timeline_path, timeline)
write_year_color_matrix(matrix_path, matrix)
with timeline_path.open() as csv_file:
timeline_rows = list(csv.DictReader(csv_file))
with matrix_path.open() as csv_file:
matrix_rows = list(csv.DictReader(csv_file))
assert timeline_rows == [
{
"year": "2020",
"colors_distinct": "2",
"colors_new": "2",
"colors_lost": "0",
"share_translucent": "0.3333",
"total_quantity": "3",
"top_colors": "Gray (2), Trans-Black (1)",
},
{
"year": "2021",
"colors_distinct": "2",
"colors_new": "1",
"colors_lost": "1",
"share_translucent": "0.4286",
"total_quantity": "7",
"top_colors": "Blue (4), Trans-Black (3)",
},
]
assert matrix_rows == [
{
"year": "2020",
"color_rgb": "AAAAAA",
"is_translucent": "false",
"color_name": "Gray",
"quantity_total": "2",
},
{
"year": "2020",
"color_rgb": "BBBBBB",
"is_translucent": "true",
"color_name": "Trans-Black",
"quantity_total": "1",
},
{
"year": "2021",
"color_rgb": "CCCCCC",
"is_translucent": "false",
"color_name": "Blue",
"quantity_total": "4",
},
{
"year": "2021",
"color_rgb": "BBBBBB",
"is_translucent": "true",
"color_name": "Trans-Black",
"quantity_total": "3",
},
]