From fb2ef5f16faba9763e095109e5ed5f21279ce97b Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Mon, 1 Dec 2025 22:40:56 +0100 Subject: [PATCH] =?UTF-8?q?Ajoute=20l'agr=C3=A9gation=20annuelle=20des=20p?= =?UTF-8?q?alettes=20de=20couleurs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 ++ lib/rebrickable/colors_timeline.py | 121 +++++++++++++++++++++++++++++ scripts/compute_colors_timeline.py | 29 +++++++ tests/test_colors_timeline.py | 110 ++++++++++++++++++++++++++ 4 files changed, 267 insertions(+) create mode 100644 lib/rebrickable/colors_timeline.py create mode 100644 scripts/compute_colors_timeline.py create mode 100644 tests/test_colors_timeline.py diff --git a/README.md b/README.md index a052b7f..c69b17d 100644 --- a/README.md +++ b/README.md @@ -170,3 +170,10 @@ Le script lit `data/intermediate/parts_filtered.csv` et `data/raw/colors.csv`, p 2. `python -m scripts.build_colors_by_set` Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette. + +### Étape 14 : évolution annuelle des palettes + +1. `source .venv/bin/activate` +2. `python -m scripts.compute_colors_timeline` + +Le script lit `data/intermediate/colors_by_set.csv` et produit deux agrégats : `data/intermediate/colors_timeline.csv` (statistiques annuelles : nombre de couleurs distinctes, nouvelles, perdues, part des translucides, top couleurs) et `data/intermediate/colors_year_color_matrix.csv` (quantités totales année × couleur) pour préparer heatmaps et analyses temporelles. diff --git a/lib/rebrickable/colors_timeline.py b/lib/rebrickable/colors_timeline.py new file mode 100644 index 0000000..34cfc34 --- /dev/null +++ b/lib/rebrickable/colors_timeline.py @@ -0,0 +1,121 @@ +"""Agrégation annuelle des palettes de couleurs.""" + +import csv +from pathlib import Path +from typing import Dict, Iterable, List, Tuple + +from lib.filesystem import ensure_parent_dir + + +def load_colors_by_set(colors_by_set_path: Path) -> List[dict]: + """Charge le fichier colors_by_set.csv en mémoire.""" + with colors_by_set_path.open() as csv_file: + reader = csv.DictReader(csv_file) + return list(reader) + + +def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]: + """Regroupe les lignes par année.""" + grouped: Dict[str, List[dict]] = {} + for row in rows: + year = row["year"] + if year not in grouped: + grouped[year] = [] + grouped[year].append(row) + return grouped + + +def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]: + """Construit les métriques annuelles sur les palettes de couleurs.""" + grouped = group_rows_by_year(rows) + years = sorted(grouped.keys(), key=int) + seen_colors: set[Tuple[str, str]] = set() + previous_colors: set[Tuple[str, str]] = set() + stats: List[dict] = [] + for year in years: + year_rows = grouped[year] + colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows} + colors_distinct = len(colors) + colors_new = len(colors - seen_colors) + colors_lost = len(previous_colors - colors) + total_quantity = sum(int(row["quantity_total"]) for row in year_rows) + translucent_quantity = sum( + int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true" + ) + share_translucent = translucent_quantity / total_quantity + totals_by_color: Dict[Tuple[str, str], int] = {} + names_by_color: Dict[Tuple[str, str], str] = {} + for row in year_rows: + key = (row["color_rgb"], row["is_translucent"]) + totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"]) + names_by_color[key] = row["color_name"] + top_colors = sorted( + totals_by_color.items(), + key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]), + )[:5] + top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors) + stats.append( + { + "year": year, + "colors_distinct": str(colors_distinct), + "colors_new": str(colors_new), + "colors_lost": str(colors_lost), + "share_translucent": f"{share_translucent:.4f}", + "total_quantity": str(total_quantity), + "top_colors": top_colors_label, + } + ) + seen_colors.update(colors) + previous_colors = colors + return stats + + +def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]: + """Construit une matrice année × couleur basée sur les quantités totales.""" + totals: Dict[Tuple[str, str, str, str], int] = {} + for row in rows: + key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"]) + totals[key] = totals.get(key, 0) + int(row["quantity_total"]) + matrix = [] + for (year, color_rgb, is_translucent, color_name), quantity in totals.items(): + matrix.append( + { + "year": year, + "color_rgb": color_rgb, + "is_translucent": is_translucent, + "color_name": color_name, + "quantity_total": str(quantity), + } + ) + matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"])) + return matrix + + +def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None: + """Écrit les statistiques annuelles dans un CSV dédié.""" + ensure_parent_dir(destination_path) + with destination_path.open("w", newline="") as csv_file: + fieldnames = [ + "year", + "colors_distinct", + "colors_new", + "colors_lost", + "share_translucent", + "total_quantity", + "top_colors", + ] + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in stats: + writer.writerow(row) + + +def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None: + """Sérialise la matrice année × couleur pour alimenter des heatmaps.""" + ensure_parent_dir(destination_path) + with destination_path.open("w", newline="") as csv_file: + fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"] + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) diff --git a/scripts/compute_colors_timeline.py b/scripts/compute_colors_timeline.py new file mode 100644 index 0000000..3875615 --- /dev/null +++ b/scripts/compute_colors_timeline.py @@ -0,0 +1,29 @@ +"""Calcule l'évolution annuelle des palettes de couleurs.""" + +from pathlib import Path + +from lib.rebrickable.colors_timeline import ( + build_year_color_matrix, + compute_yearly_stats, + load_colors_by_set, + write_year_color_matrix, + write_yearly_stats, +) + + +COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv") +TIMELINE_PATH = Path("data/intermediate/colors_timeline.csv") +MATRIX_PATH = Path("data/intermediate/colors_year_color_matrix.csv") + + +def main() -> None: + """Construit les agrégats annuels et la matrice année × couleur.""" + colors_by_set = load_colors_by_set(COLORS_BY_SET_PATH) + timeline = compute_yearly_stats(colors_by_set) + matrix = build_year_color_matrix(colors_by_set) + write_yearly_stats(TIMELINE_PATH, timeline) + write_year_color_matrix(MATRIX_PATH, matrix) + + +if __name__ == "__main__": + main() diff --git a/tests/test_colors_timeline.py b/tests/test_colors_timeline.py new file mode 100644 index 0000000..6830a8f --- /dev/null +++ b/tests/test_colors_timeline.py @@ -0,0 +1,110 @@ +"""Tests des agrégats annuels de palettes de couleurs.""" + +import csv +from pathlib import Path + +from lib.rebrickable.colors_timeline import ( + build_year_color_matrix, + compute_yearly_stats, + load_colors_by_set, + write_year_color_matrix, + write_yearly_stats, +) + + +def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None: + """Écrit un CSV simple pour les besoins des tests.""" + with path.open("w", newline="") as csv_file: + writer = csv.writer(csv_file) + writer.writerow(headers) + writer.writerows(rows) + + +def test_compute_yearly_stats(tmp_path: Path) -> None: + """Calcule les métriques annuelles sur les palettes.""" + source_path = tmp_path / "colors_by_set.csv" + timeline_path = tmp_path / "colors_timeline.csv" + matrix_path = tmp_path / "colors_year_color_matrix.csv" + write_csv( + source_path, + [ + "set_num", + "set_id", + "year", + "color_rgb", + "is_translucent", + "color_name", + "quantity_total", + "quantity_non_spare", + "quantity_minifig", + "quantity_non_minifig", + ], + [ + ["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "2", "2", "0", "2"], + ["1000-1", "1000", "2020", "BBBBBB", "true", "Trans-Black", "1", "1", "0", "1"], + ["2000-1", "2000", "2021", "BBBBBB", "true", "Trans-Black", "3", "3", "0", "3"], + ["2000-1", "2000", "2021", "CCCCCC", "false", "Blue", "4", "4", "4", "0"], + ], + ) + + rows = load_colors_by_set(source_path) + timeline = compute_yearly_stats(rows) + matrix = build_year_color_matrix(rows) + write_yearly_stats(timeline_path, timeline) + write_year_color_matrix(matrix_path, matrix) + + with timeline_path.open() as csv_file: + timeline_rows = list(csv.DictReader(csv_file)) + with matrix_path.open() as csv_file: + matrix_rows = list(csv.DictReader(csv_file)) + + assert timeline_rows == [ + { + "year": "2020", + "colors_distinct": "2", + "colors_new": "2", + "colors_lost": "0", + "share_translucent": "0.3333", + "total_quantity": "3", + "top_colors": "Gray (2), Trans-Black (1)", + }, + { + "year": "2021", + "colors_distinct": "2", + "colors_new": "1", + "colors_lost": "1", + "share_translucent": "0.4286", + "total_quantity": "7", + "top_colors": "Blue (4), Trans-Black (3)", + }, + ] + assert matrix_rows == [ + { + "year": "2020", + "color_rgb": "AAAAAA", + "is_translucent": "false", + "color_name": "Gray", + "quantity_total": "2", + }, + { + "year": "2020", + "color_rgb": "BBBBBB", + "is_translucent": "true", + "color_name": "Trans-Black", + "quantity_total": "1", + }, + { + "year": "2021", + "color_rgb": "CCCCCC", + "is_translucent": "false", + "color_name": "Blue", + "quantity_total": "4", + }, + { + "year": "2021", + "color_rgb": "BBBBBB", + "is_translucent": "true", + "color_name": "Trans-Black", + "quantity_total": "3", + }, + ]