Ajoute l'agrégation annuelle des palettes de couleurs
This commit is contained in:
parent
f8a2464447
commit
fb2ef5f16f
@ -170,3 +170,10 @@ Le script lit `data/intermediate/parts_filtered.csv` et `data/raw/colors.csv`, p
|
|||||||
2. `python -m scripts.build_colors_by_set`
|
2. `python -m scripts.build_colors_by_set`
|
||||||
|
|
||||||
Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette.
|
Le script agrège `data/intermediate/parts_filtered.csv` avec les libellés de couleurs `data/raw/colors.csv` et produit `data/intermediate/colors_by_set.csv` contenant, pour chaque set et chaque couleur, les quantités totales, hors rechanges, issues des minifigs et hors minifigs. Ce fichier sert de base aux visualisations et matrices de palette.
|
||||||
|
|
||||||
|
### Étape 14 : évolution annuelle des palettes
|
||||||
|
|
||||||
|
1. `source .venv/bin/activate`
|
||||||
|
2. `python -m scripts.compute_colors_timeline`
|
||||||
|
|
||||||
|
Le script lit `data/intermediate/colors_by_set.csv` et produit deux agrégats : `data/intermediate/colors_timeline.csv` (statistiques annuelles : nombre de couleurs distinctes, nouvelles, perdues, part des translucides, top couleurs) et `data/intermediate/colors_year_color_matrix.csv` (quantités totales année × couleur) pour préparer heatmaps et analyses temporelles.
|
||||||
|
|||||||
121
lib/rebrickable/colors_timeline.py
Normal file
121
lib/rebrickable/colors_timeline.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
"""Agrégation annuelle des palettes de couleurs."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, List, Tuple
|
||||||
|
|
||||||
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
|
||||||
|
|
||||||
|
def load_colors_by_set(colors_by_set_path: Path) -> List[dict]:
|
||||||
|
"""Charge le fichier colors_by_set.csv en mémoire."""
|
||||||
|
with colors_by_set_path.open() as csv_file:
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
return list(reader)
|
||||||
|
|
||||||
|
|
||||||
|
def group_rows_by_year(rows: Iterable[dict]) -> Dict[str, List[dict]]:
|
||||||
|
"""Regroupe les lignes par année."""
|
||||||
|
grouped: Dict[str, List[dict]] = {}
|
||||||
|
for row in rows:
|
||||||
|
year = row["year"]
|
||||||
|
if year not in grouped:
|
||||||
|
grouped[year] = []
|
||||||
|
grouped[year].append(row)
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
|
def compute_yearly_stats(rows: Iterable[dict]) -> List[dict]:
|
||||||
|
"""Construit les métriques annuelles sur les palettes de couleurs."""
|
||||||
|
grouped = group_rows_by_year(rows)
|
||||||
|
years = sorted(grouped.keys(), key=int)
|
||||||
|
seen_colors: set[Tuple[str, str]] = set()
|
||||||
|
previous_colors: set[Tuple[str, str]] = set()
|
||||||
|
stats: List[dict] = []
|
||||||
|
for year in years:
|
||||||
|
year_rows = grouped[year]
|
||||||
|
colors = {(row["color_rgb"], row["is_translucent"]) for row in year_rows}
|
||||||
|
colors_distinct = len(colors)
|
||||||
|
colors_new = len(colors - seen_colors)
|
||||||
|
colors_lost = len(previous_colors - colors)
|
||||||
|
total_quantity = sum(int(row["quantity_total"]) for row in year_rows)
|
||||||
|
translucent_quantity = sum(
|
||||||
|
int(row["quantity_total"]) for row in year_rows if row["is_translucent"] == "true"
|
||||||
|
)
|
||||||
|
share_translucent = translucent_quantity / total_quantity
|
||||||
|
totals_by_color: Dict[Tuple[str, str], int] = {}
|
||||||
|
names_by_color: Dict[Tuple[str, str], str] = {}
|
||||||
|
for row in year_rows:
|
||||||
|
key = (row["color_rgb"], row["is_translucent"])
|
||||||
|
totals_by_color[key] = totals_by_color.get(key, 0) + int(row["quantity_total"])
|
||||||
|
names_by_color[key] = row["color_name"]
|
||||||
|
top_colors = sorted(
|
||||||
|
totals_by_color.items(),
|
||||||
|
key=lambda item: (-item[1], names_by_color[item[0]], item[0][1]),
|
||||||
|
)[:5]
|
||||||
|
top_colors_label = ", ".join(f"{names_by_color[key]} ({value})" for key, value in top_colors)
|
||||||
|
stats.append(
|
||||||
|
{
|
||||||
|
"year": year,
|
||||||
|
"colors_distinct": str(colors_distinct),
|
||||||
|
"colors_new": str(colors_new),
|
||||||
|
"colors_lost": str(colors_lost),
|
||||||
|
"share_translucent": f"{share_translucent:.4f}",
|
||||||
|
"total_quantity": str(total_quantity),
|
||||||
|
"top_colors": top_colors_label,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
seen_colors.update(colors)
|
||||||
|
previous_colors = colors
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def build_year_color_matrix(rows: Iterable[dict]) -> List[dict]:
|
||||||
|
"""Construit une matrice année × couleur basée sur les quantités totales."""
|
||||||
|
totals: Dict[Tuple[str, str, str, str], int] = {}
|
||||||
|
for row in rows:
|
||||||
|
key = (row["year"], row["color_rgb"], row["is_translucent"], row["color_name"])
|
||||||
|
totals[key] = totals.get(key, 0) + int(row["quantity_total"])
|
||||||
|
matrix = []
|
||||||
|
for (year, color_rgb, is_translucent, color_name), quantity in totals.items():
|
||||||
|
matrix.append(
|
||||||
|
{
|
||||||
|
"year": year,
|
||||||
|
"color_rgb": color_rgb,
|
||||||
|
"is_translucent": is_translucent,
|
||||||
|
"color_name": color_name,
|
||||||
|
"quantity_total": str(quantity),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
matrix.sort(key=lambda row: (int(row["year"]), row["color_name"], row["is_translucent"]))
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
|
||||||
|
def write_yearly_stats(destination_path: Path, stats: Iterable[dict]) -> None:
|
||||||
|
"""Écrit les statistiques annuelles dans un CSV dédié."""
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
|
fieldnames = [
|
||||||
|
"year",
|
||||||
|
"colors_distinct",
|
||||||
|
"colors_new",
|
||||||
|
"colors_lost",
|
||||||
|
"share_translucent",
|
||||||
|
"total_quantity",
|
||||||
|
"top_colors",
|
||||||
|
]
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in stats:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_year_color_matrix(destination_path: Path, rows: Iterable[dict]) -> None:
|
||||||
|
"""Sérialise la matrice année × couleur pour alimenter des heatmaps."""
|
||||||
|
ensure_parent_dir(destination_path)
|
||||||
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
|
fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity_total"]
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
29
scripts/compute_colors_timeline.py
Normal file
29
scripts/compute_colors_timeline.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
"""Calcule l'évolution annuelle des palettes de couleurs."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.colors_timeline import (
|
||||||
|
build_year_color_matrix,
|
||||||
|
compute_yearly_stats,
|
||||||
|
load_colors_by_set,
|
||||||
|
write_year_color_matrix,
|
||||||
|
write_yearly_stats,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
COLORS_BY_SET_PATH = Path("data/intermediate/colors_by_set.csv")
|
||||||
|
TIMELINE_PATH = Path("data/intermediate/colors_timeline.csv")
|
||||||
|
MATRIX_PATH = Path("data/intermediate/colors_year_color_matrix.csv")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit les agrégats annuels et la matrice année × couleur."""
|
||||||
|
colors_by_set = load_colors_by_set(COLORS_BY_SET_PATH)
|
||||||
|
timeline = compute_yearly_stats(colors_by_set)
|
||||||
|
matrix = build_year_color_matrix(colors_by_set)
|
||||||
|
write_yearly_stats(TIMELINE_PATH, timeline)
|
||||||
|
write_year_color_matrix(MATRIX_PATH, matrix)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
110
tests/test_colors_timeline.py
Normal file
110
tests/test_colors_timeline.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
"""Tests des agrégats annuels de palettes de couleurs."""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.colors_timeline import (
|
||||||
|
build_year_color_matrix,
|
||||||
|
compute_yearly_stats,
|
||||||
|
load_colors_by_set,
|
||||||
|
write_year_color_matrix,
|
||||||
|
write_yearly_stats,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
|
||||||
|
"""Écrit un CSV simple pour les besoins des tests."""
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.writer(csv_file)
|
||||||
|
writer.writerow(headers)
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
||||||
|
|
||||||
|
def test_compute_yearly_stats(tmp_path: Path) -> None:
|
||||||
|
"""Calcule les métriques annuelles sur les palettes."""
|
||||||
|
source_path = tmp_path / "colors_by_set.csv"
|
||||||
|
timeline_path = tmp_path / "colors_timeline.csv"
|
||||||
|
matrix_path = tmp_path / "colors_year_color_matrix.csv"
|
||||||
|
write_csv(
|
||||||
|
source_path,
|
||||||
|
[
|
||||||
|
"set_num",
|
||||||
|
"set_id",
|
||||||
|
"year",
|
||||||
|
"color_rgb",
|
||||||
|
"is_translucent",
|
||||||
|
"color_name",
|
||||||
|
"quantity_total",
|
||||||
|
"quantity_non_spare",
|
||||||
|
"quantity_minifig",
|
||||||
|
"quantity_non_minifig",
|
||||||
|
],
|
||||||
|
[
|
||||||
|
["1000-1", "1000", "2020", "AAAAAA", "false", "Gray", "2", "2", "0", "2"],
|
||||||
|
["1000-1", "1000", "2020", "BBBBBB", "true", "Trans-Black", "1", "1", "0", "1"],
|
||||||
|
["2000-1", "2000", "2021", "BBBBBB", "true", "Trans-Black", "3", "3", "0", "3"],
|
||||||
|
["2000-1", "2000", "2021", "CCCCCC", "false", "Blue", "4", "4", "4", "0"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
rows = load_colors_by_set(source_path)
|
||||||
|
timeline = compute_yearly_stats(rows)
|
||||||
|
matrix = build_year_color_matrix(rows)
|
||||||
|
write_yearly_stats(timeline_path, timeline)
|
||||||
|
write_year_color_matrix(matrix_path, matrix)
|
||||||
|
|
||||||
|
with timeline_path.open() as csv_file:
|
||||||
|
timeline_rows = list(csv.DictReader(csv_file))
|
||||||
|
with matrix_path.open() as csv_file:
|
||||||
|
matrix_rows = list(csv.DictReader(csv_file))
|
||||||
|
|
||||||
|
assert timeline_rows == [
|
||||||
|
{
|
||||||
|
"year": "2020",
|
||||||
|
"colors_distinct": "2",
|
||||||
|
"colors_new": "2",
|
||||||
|
"colors_lost": "0",
|
||||||
|
"share_translucent": "0.3333",
|
||||||
|
"total_quantity": "3",
|
||||||
|
"top_colors": "Gray (2), Trans-Black (1)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2021",
|
||||||
|
"colors_distinct": "2",
|
||||||
|
"colors_new": "1",
|
||||||
|
"colors_lost": "1",
|
||||||
|
"share_translucent": "0.4286",
|
||||||
|
"total_quantity": "7",
|
||||||
|
"top_colors": "Blue (4), Trans-Black (3)",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
assert matrix_rows == [
|
||||||
|
{
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "AAAAAA",
|
||||||
|
"is_translucent": "false",
|
||||||
|
"color_name": "Gray",
|
||||||
|
"quantity_total": "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2020",
|
||||||
|
"color_rgb": "BBBBBB",
|
||||||
|
"is_translucent": "true",
|
||||||
|
"color_name": "Trans-Black",
|
||||||
|
"quantity_total": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2021",
|
||||||
|
"color_rgb": "CCCCCC",
|
||||||
|
"is_translucent": "false",
|
||||||
|
"color_name": "Blue",
|
||||||
|
"quantity_total": "4",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2021",
|
||||||
|
"color_rgb": "BBBBBB",
|
||||||
|
"is_translucent": "true",
|
||||||
|
"color_name": "Trans-Black",
|
||||||
|
"quantity_total": "3",
|
||||||
|
},
|
||||||
|
]
|
||||||
Loading…
x
Reference in New Issue
Block a user