You've already forked etude_lego_jurassic_world
Ajoute l'agrégation annuelle des palettes de couleurs
This commit is contained in:
121
lib/rebrickable/colors_timeline.py
Normal file
121
lib/rebrickable/colors_timeline.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Agrégation annuelle des palettes de couleurs."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
|
||||
def load_colors_by_set(colors_by_set_path: Path) -> List[dict]:
|
||||
"""Charge le fichier colors_by_set.csv en mémoire."""
|
||||
with colors_by_set_path.open() as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
return list(reader)
|
||||
|
||||
|
||||
def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]:
|
||||
"""Regroupe les lignes par année."""
|
||||
grouped: Dict[str, List[dict]] = {}
|
||||
for row in rows:
|
||||
year = row["year"]
|
||||
if year not in grouped:
|
||||
grouped[year] = []
|
||||
grouped[year].append(row)
|
||||
return grouped
|
||||
|
||||
|
||||
def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]:
|
||||
"""Construit les métriques annuelles sur les palettes de couleurs."""
|
||||
grouped = group_rows_by_year(rows)
|
||||
years = sorted(grouped.keys(), key=int)
|
||||
seen_colors: set[Tuple[str, str]] = set()
|
||||
previous_colors: set[Tuple[str, str]] = set()
|
||||
stats: List[dict] = []
|
||||
for year in years:
|
||||
year_rows = grouped[year]
|
||||
colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows}
|
||||
colors_distinct = len(colors)
|
||||
colors_new = len(colors - seen_colors)
|
||||
colors_lost = len(previous_colors - colors)
|
||||
total_quantity = sum(int(row["quantity_total"]) for row in year_rows)
|
||||
translucent_quantity = sum(
|
||||
int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true"
|
||||
)
|
||||
share_translucent = translucent_quantity / total_quantity
|
||||
totals_by_color: Dict[Tuple[str, str], int] = {}
|
||||
names_by_color: Dict[Tuple[str, str], str] = {}
|
||||
for row in year_rows:
|
||||
key = (row["color_rgb"], row["is_translucent"])
|
||||
totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"])
|
||||
names_by_color[key] = row["color_name"]
|
||||
top_colors = sorted(
|
||||
totals_by_color.items(),
|
||||
key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]),
|
||||
)[:5]
|
||||
top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors)
|
||||
stats.append(
|
||||
{
|
||||
"year": year,
|
||||
"colors_distinct": str(colors_distinct),
|
||||
"colors_new": str(colors_new),
|
||||
"colors_lost": str(colors_lost),
|
||||
"share_translucent": f"{share_translucent:.4f}",
|
||||
"total_quantity": str(total_quantity),
|
||||
"top_colors": top_colors_label,
|
||||
}
|
||||
)
|
||||
seen_colors.update(colors)
|
||||
previous_colors = colors
|
||||
return stats
|
||||
|
||||
|
||||
def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]:
|
||||
"""Construit une matrice année × couleur basée sur les quantités totales."""
|
||||
totals: Dict[Tuple[str, str, str, str], int] = {}
|
||||
for row in rows:
|
||||
key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"])
|
||||
totals[key] = totals.get(key, 0) + int(row["quantity_total"])
|
||||
matrix = []
|
||||
for (year, color_rgb, is_translucent, color_name), quantity in totals.items():
|
||||
matrix.append(
|
||||
{
|
||||
"year": year,
|
||||
"color_rgb": color_rgb,
|
||||
"is_translucent": is_translucent,
|
||||
"color_name": color_name,
|
||||
"quantity_total": str(quantity),
|
||||
}
|
||||
)
|
||||
matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"]))
|
||||
return matrix
|
||||
|
||||
|
||||
def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None:
|
||||
"""Écrit les statistiques annuelles dans un CSV dédié."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
fieldnames = [
|
||||
"year",
|
||||
"colors_distinct",
|
||||
"colors_new",
|
||||
"colors_lost",
|
||||
"share_translucent",
|
||||
"total_quantity",
|
||||
"top_colors",
|
||||
]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in stats:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None:
|
||||
"""Sérialise la matrice année × couleur pour alimenter des heatmaps."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
Reference in New Issue
Block a user