1

Ajoute l'agrégation annuelle des palettes de couleurs

This commit is contained in:
Richard Dern 2025-12-01 22:40:56 +01:00
parent f8a2464447
commit fb2ef5f16f
4 changed files with 267 additions and 0 deletions

View File

@ -170,3 +170,10 @@ Le script lit `data/intermediate/parts_filtered.csv` et `data/raw/colors.csv`, p
2. `python -m scripts.build_colors_by_set` 2. `python -m scripts.build_colors_by_set`
Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette. Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette.
### Étape 14 : évolution annuelle des palettes
1. `source .venv/bin/activate`
2. `python -m scripts.compute_colors_timeline`
Le script lit `data/intermediate/colors_by_set.csv` et produit deux agrégats : `data/intermediate/colors_timeline.csv` (statistiques annuelles : nombre de couleurs distinctes, nouvelles, perdues, part des translucides, top couleurs) et `data/intermediate/colors_year_color_matrix.csv` (quantités totales année × couleur) pour préparer heatmaps et analyses temporelles.

View File

@ -0,0 +1,121 @@
"""Agrégation annuelle des palettes de couleurs."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
from lib.filesystem import ensure_parent_dir
def load_colors_by_set(colors_by_set_path: Path) -> List[dict]:
"""Charge le fichier colors_by_set.csv en mémoire."""
with colors_by_set_path.open() as csv_file:
reader = csv.DictReader(csv_file)
return list(reader)
def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]:
"""Regroupe les lignes par année."""
grouped: Dict[str, List[dict]] = {}
for row in rows:
year = row["year"]
if year not in grouped:
grouped[year] = []
grouped[year].append(row)
return grouped
def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]:
"""Construit les métriques annuelles sur les palettes de couleurs."""
grouped = group_rows_by_year(rows)
years = sorted(grouped.keys(), key=int)
seen_colors: set[Tuple[str, str]] = set()
previous_colors: set[Tuple[str, str]] = set()
stats: List[dict] = []
for year in years:
year_rows = grouped[year]
colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows}
colors_distinct = len(colors)
colors_new = len(colors - seen_colors)
colors_lost = len(previous_colors - colors)
total_quantity = sum(int(row["quantity_total"]) for row in year_rows)
translucent_quantity = sum(
int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true"
)
share_translucent = translucent_quantity / total_quantity
totals_by_color: Dict[Tuple[str, str], int] = {}
names_by_color: Dict[Tuple[str, str], str] = {}
for row in year_rows:
key = (row["color_rgb"], row["is_translucent"])
totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"])
names_by_color[key] = row["color_name"]
top_colors = sorted(
totals_by_color.items(),
key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]),
)[:5]
top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors)
stats.append(
{
"year": year,
"colors_distinct": str(colors_distinct),
"colors_new": str(colors_new),
"colors_lost": str(colors_lost),
"share_translucent": f"{share_translucent:.4f}",
"total_quantity": str(total_quantity),
"top_colors": top_colors_label,
}
)
seen_colors.update(colors)
previous_colors = colors
return stats
def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]:
"""Construit une matrice année × couleur basée sur les quantités totales."""
totals: Dict[Tuple[str, str, str, str], int] = {}
for row in rows:
key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"])
totals[key] = totals.get(key, 0) + int(row["quantity_total"])
matrix = []
for (year, color_rgb, is_translucent, color_name), quantity in totals.items():
matrix.append(
{
"year": year,
"color_rgb": color_rgb,
"is_translucent": is_translucent,
"color_name": color_name,
"quantity_total": str(quantity),
}
)
matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"]))
return matrix
def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None:
"""Écrit les statistiques annuelles dans un CSV dédié."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
fieldnames = [
"year",
"colors_distinct",
"colors_new",
"colors_lost",
"share_translucent",
"total_quantity",
"top_colors",
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in stats:
writer.writerow(row)
def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None:
"""Sérialise la matrice année × couleur pour alimenter des heatmaps."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)

View File

@ -0,0 +1,29 @@
"""Calcule l'évolution annuelle des palettes de couleurs."""
from pathlib import Path
from lib.rebrickable.colors_timeline import (
build_year_color_matrix,
compute_yearly_stats,
load_colors_by_set,
write_year_color_matrix,
write_yearly_stats,
)
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
TIMELINE_PATH = Path("data/intermediate/colors_timeline.csv")
MATRIX_PATH = Path("data/intermediate/colors_year_color_matrix.csv")
def main() -> None:
"""Construit les agrégats annuels et la matrice année × couleur."""
colors_by_set = load_colors_by_set(COLORS_BY_SET_PATH)
timeline = compute_yearly_stats(colors_by_set)
matrix = build_year_color_matrix(colors_by_set)
write_yearly_stats(TIMELINE_PATH, timeline)
write_year_color_matrix(MATRIX_PATH, matrix)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,110 @@
"""Tests des agrégats annuels de palettes de couleurs."""
import csv
from pathlib import Path
from lib.rebrickable.colors_timeline import (
build_year_color_matrix,
compute_yearly_stats,
load_colors_by_set,
write_year_color_matrix,
write_yearly_stats,
)
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
"""Écrit un CSV simple pour les besoins des tests."""
with path.open("w", newline="") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(headers)
writer.writerows(rows)
def test_compute_yearly_stats(tmp_path: Path) -> None:
"""Calcule les métriques annuelles sur les palettes."""
source_path = tmp_path / "colors_by_set.csv"
timeline_path = tmp_path / "colors_timeline.csv"
matrix_path = tmp_path / "colors_year_color_matrix.csv"
write_csv(
source_path,
[
"set_num",
"set_id",
"year",
"color_rgb",
"is_translucent",
"color_name",
"quantity_total",
"quantity_non_spare",
"quantity_minifig",
"quantity_non_minifig",
],
[
["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "2", "2", "0", "2"],
["1000-1", "1000", "2020", "BBBBBB", "true", "Trans-Black", "1", "1", "0", "1"],
["2000-1", "2000", "2021", "BBBBBB", "true", "Trans-Black", "3", "3", "0", "3"],
["2000-1", "2000", "2021", "CCCCCC", "false", "Blue", "4", "4", "4", "0"],
],
)
rows = load_colors_by_set(source_path)
timeline = compute_yearly_stats(rows)
matrix = build_year_color_matrix(rows)
write_yearly_stats(timeline_path, timeline)
write_year_color_matrix(matrix_path, matrix)
with timeline_path.open() as csv_file:
timeline_rows = list(csv.DictReader(csv_file))
with matrix_path.open() as csv_file:
matrix_rows = list(csv.DictReader(csv_file))
assert timeline_rows == [
{
"year": "2020",
"colors_distinct": "2",
"colors_new": "2",
"colors_lost": "0",
"share_translucent": "0.3333",
"total_quantity": "3",
"top_colors": "Gray (2), Trans-Black (1)",
},
{
"year": "2021",
"colors_distinct": "2",
"colors_new": "1",
"colors_lost": "1",
"share_translucent": "0.4286",
"total_quantity": "7",
"top_colors": "Blue (4), Trans-Black (3)",
},
]
assert matrix_rows == [
{
"year": "2020",
"color_rgb": "AAAAAA",
"is_translucent": "false",
"color_name": "Gray",
"quantity_total": "2",
},
{
"year": "2020",
"color_rgb": "BBBBBB",
"is_translucent": "true",
"color_name": "Trans-Black",
"quantity_total": "1",
},
{
"year": "2021",
"color_rgb": "CCCCCC",
"is_translucent": "false",
"color_name": "Blue",
"quantity_total": "4",
},
{
"year": "2021",
"color_rgb": "BBBBBB",
"is_translucent": "true",
"color_name": "Trans-Black",
"quantity_total": "3",
},
]