1

Ajoute l'agrégation annuelle des palettes de couleurs

This commit is contained in:
2025-12-01 22:40:56 +01:00
parent 696c2606e1
commit 19508a3c0f
4 changed files with 267 additions and 0 deletions

View File

@@ -0,0 +1,121 @@
"""Agrégation annuelle des palettes de couleurs."""
import csv
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
from lib.filesystem import ensure_parent_dir
def load_colors_by_set(colors_by_set_path: Path) -> List[dict]:
"""Charge le fichier colors_by_set.csv en mémoire."""
with colors_by_set_path.open() as csv_file:
reader = csv.DictReader(csv_file)
return list(reader)
def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]:
"""Regroupe les lignes par année."""
grouped: Dict[str, List[dict]] = {}
for row in rows:
year = row["year"]
if year not in grouped:
grouped[year] = []
grouped[year].append(row)
return grouped
def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]:
"""Construit les métriques annuelles sur les palettes de couleurs."""
grouped = group_rows_by_year(rows)
years = sorted(grouped.keys(), key=int)
seen_colors: set[Tuple[str, str]] = set()
previous_colors: set[Tuple[str, str]] = set()
stats: List[dict] = []
for year in years:
year_rows = grouped[year]
colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows}
colors_distinct = len(colors)
colors_new = len(colors - seen_colors)
colors_lost = len(previous_colors - colors)
total_quantity = sum(int(row["quantity_total"]) for row in year_rows)
translucent_quantity = sum(
int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true"
)
share_translucent = translucent_quantity / total_quantity
totals_by_color: Dict[Tuple[str, str], int] = {}
names_by_color: Dict[Tuple[str, str], str] = {}
for row in year_rows:
key = (row["color_rgb"], row["is_translucent"])
totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"])
names_by_color[key] = row["color_name"]
top_colors = sorted(
totals_by_color.items(),
key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]),
)[:5]
top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors)
stats.append(
{
"year": year,
"colors_distinct": str(colors_distinct),
"colors_new": str(colors_new),
"colors_lost": str(colors_lost),
"share_translucent": f"{share_translucent:.4f}",
"total_quantity": str(total_quantity),
"top_colors": top_colors_label,
}
)
seen_colors.update(colors)
previous_colors = colors
return stats
def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]:
"""Construit une matrice année × couleur basée sur les quantités totales."""
totals: Dict[Tuple[str, str, str, str], int] = {}
for row in rows:
key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"])
totals[key] = totals.get(key, 0) + int(row["quantity_total"])
matrix = []
for (year, color_rgb, is_translucent, color_name), quantity in totals.items():
matrix.append(
{
"year": year,
"color_rgb": color_rgb,
"is_translucent": is_translucent,
"color_name": color_name,
"quantity_total": str(quantity),
}
)
matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"]))
return matrix
def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None:
"""Écrit les statistiques annuelles dans un CSV dédié."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
fieldnames = [
"year",
"colors_distinct",
"colors_new",
"colors_lost",
"share_translucent",
"total_quantity",
"top_colors",
]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in stats:
writer.writerow(row)
def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None:
"""Sérialise la matrice année × couleur pour alimenter des heatmaps."""
ensure_parent_dir(destination_path)
with destination_path.open("w", newline="") as csv_file:
fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)