Ajoute l'agrégation annuelle des palettes de couleurs
This commit is contained in:
parent
f8a2464447
commit
fb2ef5f16f
@ -170,3 +170,10 @@ Le script lit `data/intermediate/parts_filtered.csv` et `data/raw/colors.csv`, p
|
||||
2. `python -m scripts.build_colors_by_set`
|
||||
|
||||
Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette.
|
||||
|
||||
### Étape 14 : évolution annuelle des palettes
|
||||
|
||||
1. `source .venv/bin/activate`
|
||||
2. `python -m scripts.compute_colors_timeline`
|
||||
|
||||
Le script lit `data/intermediate/colors_by_set.csv` et produit deux agrégats : `data/intermediate/colors_timeline.csv` (statistiques annuelles : nombre de couleurs distinctes, nouvelles, perdues, part des translucides, top couleurs) et `data/intermediate/colors_year_color_matrix.csv` (quantités totales année × couleur) pour préparer heatmaps et analyses temporelles.
|
||||
|
||||
121
lib/rebrickable/colors_timeline.py
Normal file
121
lib/rebrickable/colors_timeline.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""Agrégation annuelle des palettes de couleurs."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
from lib.filesystem import ensure_parent_dir
|
||||
|
||||
|
||||
def load_colors_by_set(colors_by_set_path: Path) -> List[dict]:
|
||||
"""Charge le fichier colors_by_set.csv en mémoire."""
|
||||
with colors_by_set_path.open() as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
return list(reader)
|
||||
|
||||
|
||||
def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]:
|
||||
"""Regroupe les lignes par année."""
|
||||
grouped: Dict[str, List[dict]] = {}
|
||||
for row in rows:
|
||||
year = row["year"]
|
||||
if year not in grouped:
|
||||
grouped[year] = []
|
||||
grouped[year].append(row)
|
||||
return grouped
|
||||
|
||||
|
||||
def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]:
|
||||
"""Construit les métriques annuelles sur les palettes de couleurs."""
|
||||
grouped = group_rows_by_year(rows)
|
||||
years = sorted(grouped.keys(), key=int)
|
||||
seen_colors: set[Tuple[str, str]] = set()
|
||||
previous_colors: set[Tuple[str, str]] = set()
|
||||
stats: List[dict] = []
|
||||
for year in years:
|
||||
year_rows = grouped[year]
|
||||
colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows}
|
||||
colors_distinct = len(colors)
|
||||
colors_new = len(colors - seen_colors)
|
||||
colors_lost = len(previous_colors - colors)
|
||||
total_quantity = sum(int(row["quantity_total"]) for row in year_rows)
|
||||
translucent_quantity = sum(
|
||||
int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true"
|
||||
)
|
||||
share_translucent = translucent_quantity / total_quantity
|
||||
totals_by_color: Dict[Tuple[str, str], int] = {}
|
||||
names_by_color: Dict[Tuple[str, str], str] = {}
|
||||
for row in year_rows:
|
||||
key = (row["color_rgb"], row["is_translucent"])
|
||||
totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"])
|
||||
names_by_color[key] = row["color_name"]
|
||||
top_colors = sorted(
|
||||
totals_by_color.items(),
|
||||
key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]),
|
||||
)[:5]
|
||||
top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors)
|
||||
stats.append(
|
||||
{
|
||||
"year": year,
|
||||
"colors_distinct": str(colors_distinct),
|
||||
"colors_new": str(colors_new),
|
||||
"colors_lost": str(colors_lost),
|
||||
"share_translucent": f"{share_translucent:.4f}",
|
||||
"total_quantity": str(total_quantity),
|
||||
"top_colors": top_colors_label,
|
||||
}
|
||||
)
|
||||
seen_colors.update(colors)
|
||||
previous_colors = colors
|
||||
return stats
|
||||
|
||||
|
||||
def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]:
|
||||
"""Construit une matrice année × couleur basée sur les quantités totales."""
|
||||
totals: Dict[Tuple[str, str, str, str], int] = {}
|
||||
for row in rows:
|
||||
key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"])
|
||||
totals[key] = totals.get(key, 0) + int(row["quantity_total"])
|
||||
matrix = []
|
||||
for (year, color_rgb, is_translucent, color_name), quantity in totals.items():
|
||||
matrix.append(
|
||||
{
|
||||
"year": year,
|
||||
"color_rgb": color_rgb,
|
||||
"is_translucent": is_translucent,
|
||||
"color_name": color_name,
|
||||
"quantity_total": str(quantity),
|
||||
}
|
||||
)
|
||||
matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"]))
|
||||
return matrix
|
||||
|
||||
|
||||
def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None:
|
||||
"""Écrit les statistiques annuelles dans un CSV dédié."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
fieldnames = [
|
||||
"year",
|
||||
"colors_distinct",
|
||||
"colors_new",
|
||||
"colors_lost",
|
||||
"share_translucent",
|
||||
"total_quantity",
|
||||
"top_colors",
|
||||
]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in stats:
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None:
|
||||
"""Sérialise la matrice année × couleur pour alimenter des heatmaps."""
|
||||
ensure_parent_dir(destination_path)
|
||||
with destination_path.open("w", newline="") as csv_file:
|
||||
fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
29
scripts/compute_colors_timeline.py
Normal file
29
scripts/compute_colors_timeline.py
Normal file
@ -0,0 +1,29 @@
|
||||
"""Calcule l'évolution annuelle des palettes de couleurs."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from lib.rebrickable.colors_timeline import (
|
||||
build_year_color_matrix,
|
||||
compute_yearly_stats,
|
||||
load_colors_by_set,
|
||||
write_year_color_matrix,
|
||||
write_yearly_stats,
|
||||
)
|
||||
|
||||
|
||||
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
|
||||
TIMELINE_PATH = Path("data/intermediate/colors_timeline.csv")
|
||||
MATRIX_PATH = Path("data/intermediate/colors_year_color_matrix.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Construit les agrégats annuels et la matrice année × couleur."""
|
||||
colors_by_set = load_colors_by_set(COLORS_BY_SET_PATH)
|
||||
timeline = compute_yearly_stats(colors_by_set)
|
||||
matrix = build_year_color_matrix(colors_by_set)
|
||||
write_yearly_stats(TIMELINE_PATH, timeline)
|
||||
write_year_color_matrix(MATRIX_PATH, matrix)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
110
tests/test_colors_timeline.py
Normal file
110
tests/test_colors_timeline.py
Normal file
@ -0,0 +1,110 @@
|
||||
"""Tests des agrégats annuels de palettes de couleurs."""
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from lib.rebrickable.colors_timeline import (
|
||||
build_year_color_matrix,
|
||||
compute_yearly_stats,
|
||||
load_colors_by_set,
|
||||
write_year_color_matrix,
|
||||
write_yearly_stats,
|
||||
)
|
||||
|
||||
|
||||
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
|
||||
"""Écrit un CSV simple pour les besoins des tests."""
|
||||
with path.open("w", newline="") as csv_file:
|
||||
writer = csv.writer(csv_file)
|
||||
writer.writerow(headers)
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def test_compute_yearly_stats(tmp_path: Path) -> None:
|
||||
"""Calcule les métriques annuelles sur les palettes."""
|
||||
source_path = tmp_path / "colors_by_set.csv"
|
||||
timeline_path = tmp_path / "colors_timeline.csv"
|
||||
matrix_path = tmp_path / "colors_year_color_matrix.csv"
|
||||
write_csv(
|
||||
source_path,
|
||||
[
|
||||
"set_num",
|
||||
"set_id",
|
||||
"year",
|
||||
"color_rgb",
|
||||
"is_translucent",
|
||||
"color_name",
|
||||
"quantity_total",
|
||||
"quantity_non_spare",
|
||||
"quantity_minifig",
|
||||
"quantity_non_minifig",
|
||||
],
|
||||
[
|
||||
["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "2", "2", "0", "2"],
|
||||
["1000-1", "1000", "2020", "BBBBBB", "true", "Trans-Black", "1", "1", "0", "1"],
|
||||
["2000-1", "2000", "2021", "BBBBBB", "true", "Trans-Black", "3", "3", "0", "3"],
|
||||
["2000-1", "2000", "2021", "CCCCCC", "false", "Blue", "4", "4", "4", "0"],
|
||||
],
|
||||
)
|
||||
|
||||
rows = load_colors_by_set(source_path)
|
||||
timeline = compute_yearly_stats(rows)
|
||||
matrix = build_year_color_matrix(rows)
|
||||
write_yearly_stats(timeline_path, timeline)
|
||||
write_year_color_matrix(matrix_path, matrix)
|
||||
|
||||
with timeline_path.open() as csv_file:
|
||||
timeline_rows = list(csv.DictReader(csv_file))
|
||||
with matrix_path.open() as csv_file:
|
||||
matrix_rows = list(csv.DictReader(csv_file))
|
||||
|
||||
assert timeline_rows == [
|
||||
{
|
||||
"year": "2020",
|
||||
"colors_distinct": "2",
|
||||
"colors_new": "2",
|
||||
"colors_lost": "0",
|
||||
"share_translucent": "0.3333",
|
||||
"total_quantity": "3",
|
||||
"top_colors": "Gray (2), Trans-Black (1)",
|
||||
},
|
||||
{
|
||||
"year": "2021",
|
||||
"colors_distinct": "2",
|
||||
"colors_new": "1",
|
||||
"colors_lost": "1",
|
||||
"share_translucent": "0.4286",
|
||||
"total_quantity": "7",
|
||||
"top_colors": "Blue (4), Trans-Black (3)",
|
||||
},
|
||||
]
|
||||
assert matrix_rows == [
|
||||
{
|
||||
"year": "2020",
|
||||
"color_rgb": "AAAAAA",
|
||||
"is_translucent": "false",
|
||||
"color_name": "Gray",
|
||||
"quantity_total": "2",
|
||||
},
|
||||
{
|
||||
"year": "2020",
|
||||
"color_rgb": "BBBBBB",
|
||||
"is_translucent": "true",
|
||||
"color_name": "Trans-Black",
|
||||
"quantity_total": "1",
|
||||
},
|
||||
{
|
||||
"year": "2021",
|
||||
"color_rgb": "CCCCCC",
|
||||
"is_translucent": "false",
|
||||
"color_name": "Blue",
|
||||
"quantity_total": "4",
|
||||
},
|
||||
{
|
||||
"year": "2021",
|
||||
"color_rgb": "BBBBBB",
|
||||
"is_translucent": "true",
|
||||
"color_name": "Trans-Black",
|
||||
"quantity_total": "3",
|
||||
},
|
||||
]
|
||||
Loading…
x
Reference in New Issue
Block a user