Ajoute les agrégats et visualisations globales des couleurs de têtes

2025-12-01 23:56:03 +01:00
parent d7b4ad8031
commit 47ee76cacf
10 changed files with 502 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -200,3 +200,11 @@ Le script identifie les têtes de minifigs via la catégorie Rebrickable dédié
 2. `python -m scripts.plot_minifig_heads`
 Le script lit `data/intermediate/minifig_heads_by_year.csv` et produit `figures/step16/minifig_heads_shares.png` (répartition annuelle des couleurs de têtes, en parts empilées) et `figures/step16/minifig_heads_global.png` (donut global des parts cumulées). Les couleurs sont limitées aux plus fréquentes (avec regroupement des autres).
 ### Étape 18 : usage global de la couleur Yellow pour les têtes
 1. `source .venv/bin/activate`
 2. `python -m scripts.compute_global_minifig_heads`
 3. `python -m scripts.plot_global_minifig_skin_tones`
 Ces scripts lisent les CSV bruts du catalogue complet (`data/raw/inventories.csv`, `inventory_parts.csv`, `parts.csv`, `colors.csv`, `sets.csv`), extraient les têtes de minifigs via `part_cat_id=59`, agrègent les couleurs par année dans `data/intermediate/global_minifig_heads_by_year.csv`, puis tracent `figures/step17/global_minifig_heads_yellow_share.png` montrant la part annuelle de la couleur Yellow comparée au reste, jalons inclus.
--- a/lib/plots/global_minifig_heads.py
+++ b/lib/plots/global_minifig_heads.py
@@ -0,0 +1,90 @@
 """Visualisation des couleurs de têtes de minifigs sur le catalogue complet."""
 from pathlib import Path
 from typing import Dict, Iterable, List, Tuple
 import matplotlib.pyplot as plt
 from lib.filesystem import ensure_parent_dir
 from lib.rebrickable.stats import read_rows
 def load_global_heads(heads_path: Path) -> List[dict]:
    """Charge l'agrégat global des têtes par année."""
    return read_rows(heads_path)
 def select_top_colors(rows: Iterable[dict], limit: int = 12) -> List[Tuple[str, str, str]]:
    """Retourne les couleurs les plus fréquentes globalement (nom, rgb, is_translucent)."""
    totals: Dict[Tuple[str, str, str], int] = {}
    for row in rows:
        key = (row["color_name"], row["color_rgb"], row["is_translucent"])
        totals[key] = totals.get(key, 0) + int(row["quantity"])
    sorted_colors = sorted(totals.items(), key=lambda item: (-item[1], item[0][0], item[0][1]))
    return [color for color, _ in sorted_colors[:limit]]
 def build_share_matrix(
    rows: Iterable[dict], top_colors: List[Tuple[str, str, str]]
 ) -> Tuple[List[int], List[Tuple[str, str, str]], List[Dict[str, float]]]:
    """Construit les parts par année en regroupant les couleurs hors top dans 'Autres'."""
    years = sorted({int(row["year"]) for row in rows})
    colors = top_colors + [("Autres", "444444", "false")]
    shares_by_year: List[Dict[str, float]] = []
    rows_by_year: Dict[int, List[dict]] = {year: [] for year in years}
    for row in rows:
        rows_by_year[int(row["year"])].append(row)
    for year in years:
        year_rows = rows_by_year[year]
        total = sum(int(r["quantity"]) for r in year_rows)
        shares: Dict[str, float] = {color[0]: 0.0 for color in colors}
        for r in year_rows:
            key = (r["color_name"], r["color_rgb"], r["is_translucent"])
            quantity = int(r["quantity"])
            target = "Autres" if key not in top_colors else r["color_name"]
            shares[target] = shares.get(target, 0.0) + quantity / total if total > 0 else 0.0
        shares_by_year.append(shares)
    return years, colors, shares_by_year
 def plot_global_head_shares(
    heads_path: Path,
    destination_path: Path,
    top_limit: int = 12,
 ) -> None:
    """Trace les parts des couleurs de têtes de minifigs par année (catalogue complet)."""
    rows = load_global_heads(heads_path)
    top_colors = select_top_colors(rows, limit=top_limit)
    years, colors, shares_by_year = build_share_matrix(rows, top_colors)
    fig, ax = plt.subplots(figsize=(14, 6))
    bottoms = [0.0] * len(years)
    y_positions = list(range(len(years)))
    for name, color_rgb, is_trans in colors:
        values = [shares[name] for shares in shares_by_year]
        edge = "#f2f2f2" if is_trans == "true" else "#0d0d0d"
        ax.bar(
            years,
            values,
            bottom=bottoms,
            color=f"#{color_rgb}",
            edgecolor=edge,
            label=name,
            linewidth=0.7,
        )
        bottoms = [b + v for b, v in zip(bottoms, values)]
    ax.set_ylim(0, 1.05)
    ax.set_ylabel("Part des couleurs (têtes de minifigs, catalogue complet)")
    ax.set_xlabel("Année")
    if len(years) > 15:
        step = max(1, len(years) // 10)
        ax.set_xticks(years[::step])
    else:
        ax.set_xticks(years)
    ax.set_title("Répartition des couleurs de têtes de minifigs par année (catalogue complet)")
    ax.legend(loc="upper left", bbox_to_anchor=(1.02, 1), frameon=False)
    ax.grid(True, axis="y", linestyle="--", alpha=0.25)
    ensure_parent_dir(destination_path)
    fig.tight_layout()
    fig.savefig(destination_path, dpi=170)
    plt.close(fig)
--- a/lib/plots/minifig_skin_tones.py
+++ b/lib/plots/minifig_skin_tones.py
@@ -0,0 +1,86 @@
 """Visualisation de la part des têtes jaunes sur le catalogue global."""
 from pathlib import Path
 from typing import Dict, List
 import matplotlib.pyplot as plt
 from lib.filesystem import ensure_parent_dir
 from lib.milestones import load_milestones
 from lib.rebrickable.stats import read_rows
 def compute_yellow_share(rows: List[dict]) -> List[dict]:
    """Calcule la part de la couleur Yellow par année."""
    aggregated: Dict[str, Dict[str, int]] = {}
    for row in rows:
        year = row["year"]
        if year not in aggregated:
            aggregated[year] = {"yellow": 0, "total": 0}
        aggregated[year]["total"] += int(row["quantity"])
        if row["color_name"].lower() == "yellow" or row["color_rgb"].upper() == "FFFF00":
            aggregated[year]["yellow"] += int(row["quantity"])
    results = []
    for year in sorted(aggregated.keys(), key=int):
        total = aggregated[year]["total"]
        yellow = aggregated[year]["yellow"]
        share = yellow / total if total > 0 else 0
        results.append({"year": int(year), "yellow_share": share, "total": total})
    return results
 def plot_yellow_share(heads_path: Path, milestones_path: Path, destination_path: Path) -> None:
    """Trace l'évolution de la part de têtes jaunes dans le catalogue complet."""
    rows = read_rows(heads_path)
    milestones = load_milestones(milestones_path)
    series = compute_yellow_share(rows)
    years = [item["year"] for item in series]
    shares = [item["yellow_share"] for item in series]
    fig, ax = plt.subplots(figsize=(13, 5.5))
    ax.plot(years, shares, color="#f2c300", marker="o", linewidth=2.4, label="Part Yellow")
    ax.fill_between(years, shares, color="#f2c300", alpha=0.18)
    ax.set_ylim(0, min(1.0, max(shares + [0.01]) * 1.1))
    ax.set_ylabel("Part de têtes Yellow")
    ax.set_xlabel("Année")
    if len(years) > 15:
        step = max(1, len(years) // 10)
        ax.set_xticks(years[::step])
    else:
        ax.set_xticks(years)
    ax.set_title("Evolution de l'usage des têtes Yellow (catalogue complet)")
    ax.grid(True, linestyle="--", alpha=0.3)
    if milestones:
        min_year = min(years)
        max_year = max(years)
        milestones_in_range = sorted(
            [m for m in milestones if min_year <= m["year"] <= max_year],
            key=lambda m: (m["year"], m["description"]),
        )
        offset_map: Dict[int, int] = {}
        offset_step = 0.35
        top_limit = ax.get_ylim()[1] * 1.05
        for milestone in milestones_in_range:
            year = milestone["year"]
            count_for_year = offset_map.get(year, 0)
            offset_map[year] = count_for_year + 1
            horizontal_offset = offset_step * (count_for_year // 2 + 1)
            if count_for_year % 2 == 1:
                horizontal_offset *= -1
            text_x = year + horizontal_offset
            ax.axvline(year, color="#d62728", linestyle="--", linewidth=1, alpha=0.65, zorder=1)
            ax.text(
                text_x,
                top_limit,
                milestone["description"],
                rotation=90,
                verticalalignment="top",
                horizontalalignment="center",
                fontsize=8,
                color="#d62728",
            )
        ax.set_ylim(ax.get_ylim()[0], top_limit * (1 + max(offset_map.values(), default=0) * 0.02))
    ensure_parent_dir(destination_path)
    fig.tight_layout()
    fig.savefig(destination_path, dpi=170)
    plt.close(fig)
--- a/lib/rebrickable/global_minifig_heads.py
+++ b/lib/rebrickable/global_minifig_heads.py
@@ -0,0 +1,103 @@
 """Extraction des couleurs de têtes de minifigs sur le catalogue complet."""
 import csv
 from pathlib import Path
 from typing import Dict, Iterable, List, Set, Tuple
 from lib.rebrickable.parts_inventory import normalize_boolean, select_latest_inventories
 HEAD_CATEGORIES = {"59"}
 def load_head_parts(parts_path: Path, head_categories: Set[str] | None = None) -> Set[str]:
    """Construit l'ensemble des références de têtes via leur catégorie."""
    categories = head_categories or HEAD_CATEGORIES
    head_parts: Set[str] = set()
    with parts_path.open() as parts_file:
        reader = csv.DictReader(parts_file)
        for row in reader:
            if row["part_cat_id"] in categories:
                head_parts.add(row["part_num"])
    return head_parts
 def build_sets_year_lookup(sets_path: Path) -> Dict[str, str]:
    """Indexe les années par set_num."""
    lookup: Dict[str, str] = {}
    with sets_path.open() as sets_file:
        reader = csv.DictReader(sets_file)
        for row in reader:
            lookup[row["set_num"]] = row["year"]
    return lookup
 def build_color_lookup(colors_path: Path) -> Dict[str, dict]:
    """Construit un index des couleurs par identifiant."""
    lookup: Dict[str, dict] = {}
    with colors_path.open() as colors_file:
        reader = csv.DictReader(colors_file)
        for row in reader:
            lookup[row["id"]] = {
                "rgb": row["rgb"],
                "is_translucent": row["is_trans"].lower(),
                "name": row["name"],
            }
    return lookup
 def aggregate_global_heads_by_year(
    inventories_path: Path,
    inventory_parts_path: Path,
    parts_path: Path,
    colors_path: Path,
    sets_path: Path,
    head_categories: Set[str] | None = None,
 ) -> List[dict]:
    """Agrège les couleurs de têtes par année sur le catalogue complet."""
    head_parts = load_head_parts(parts_path, head_categories)
    latest_inventories = select_latest_inventories(inventories_path)
    latest_inventory_ids = {data["id"]: set_num for set_num, data in latest_inventories.items()}
    colors_lookup = build_color_lookup(colors_path)
    sets_year = build_sets_year_lookup(sets_path)
    aggregates: Dict[Tuple[str, str, str], dict] = {}
    with inventory_parts_path.open() as parts_file:
        reader = csv.DictReader(parts_file)
        for row in reader:
            inventory_id = row["inventory_id"]
            if inventory_id not in latest_inventory_ids:
                continue
            if row["part_num"] not in head_parts:
                continue
            if normalize_boolean(row["is_spare"]) == "true":
                continue
            set_num = latest_inventory_ids[inventory_id]
            year = sets_year.get(set_num)
            if year is None:
                continue
            color = colors_lookup[row["color_id"]]
            key = (year, color["rgb"], color["is_translucent"])
            existing = aggregates.get(key)
            if existing is None:
                aggregates[key] = {
                    "year": year,
                    "color_rgb": color["rgb"],
                    "is_translucent": color["is_translucent"],
                    "color_name": color["name"],
                    "quantity": 0,
                }
                existing = aggregates[key]
            existing["quantity"] += int(row["quantity"])
    results = list(aggregates.values())
    results.sort(key=lambda r: (int(r["year"]), r["color_name"], r["is_translucent"]))
    return results
 def write_global_heads_by_year(destination_path: Path, rows: Iterable[dict]) -> None:
    """Sérialise l'agrégat global par année."""
    fieldnames = ["year", "color_rgb", "is_translucent", "color_name", "quantity"]
    with destination_path.open("w", newline="") as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)
--- a/scripts/compute_global_minifig_heads.py
+++ b/scripts/compute_global_minifig_heads.py
@@ -0,0 +1,29 @@
 """Agrégation globale des couleurs de têtes de minifigs (catalogue complet)."""
 from pathlib import Path
 from lib.rebrickable.global_minifig_heads import aggregate_global_heads_by_year, write_global_heads_by_year
 INVENTORIES_PATH = Path("data/raw/inventories.csv")
 INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
 PARTS_PATH = Path("data/raw/parts.csv")
 COLORS_PATH = Path("data/raw/colors.csv")
 SETS_PATH = Path("data/raw/sets.csv")
 DESTINATION_PATH = Path("data/intermediate/global_minifig_heads_by_year.csv")
 def main() -> None:
    """Construit l'agrégat mondial des têtes de minifigs par couleur et année."""
    heads_by_year = aggregate_global_heads_by_year(
        INVENTORIES_PATH,
        INVENTORY_PARTS_PATH,
        PARTS_PATH,
        COLORS_PATH,
        SETS_PATH,
    )
    write_global_heads_by_year(DESTINATION_PATH, heads_by_year)
 if __name__ == "__main__":
    main()
--- a/scripts/plot_global_minifig_heads.py
+++ b/scripts/plot_global_minifig_heads.py
@@ -0,0 +1,18 @@
 """Répartition annuelle des couleurs de têtes (catalogue complet)."""
 from pathlib import Path
 from lib.plots.global_minifig_heads import plot_global_head_shares
 HEADS_PATH = Path("data/intermediate/global_minifig_heads_by_year.csv")
 DESTINATION_PATH = Path("figures/step17/global_minifig_heads_shares.png")
 def main() -> None:
    """Construit la heatmap stackée des parts de couleurs de têtes."""
    plot_global_head_shares(HEADS_PATH, DESTINATION_PATH)
 if __name__ == "__main__":
    main()
--- a/scripts/plot_global_minifig_skin_tones.py
+++ b/scripts/plot_global_minifig_skin_tones.py
@@ -0,0 +1,19 @@
 """Evolution de l'usage du Yellow pour les têtes minifigs (catalogue complet)."""
 from pathlib import Path
 from lib.plots.minifig_skin_tones import plot_yellow_share
 HEADS_PATH = Path("data/intermediate/global_minifig_heads_by_year.csv")
 MILESTONES_PATH = Path("config/milestones.csv")
 DESTINATION_PATH = Path("figures/step17/global_minifig_heads_yellow_share.png")
 def main() -> None:
    """Trace la part de têtes Yellow par année."""
    plot_yellow_share(HEADS_PATH, MILESTONES_PATH, DESTINATION_PATH)
 if __name__ == "__main__":
    main()
--- a/tests/test_global_minifig_heads.py
+++ b/tests/test_global_minifig_heads.py
@@ -0,0 +1,92 @@
 """Tests de l'agrégation globale des têtes de minifigs."""
 import csv
 from pathlib import Path
 from lib.rebrickable.global_minifig_heads import (
    aggregate_global_heads_by_year,
    write_global_heads_by_year,
 )
 def write_csv(path: Path, headers: list[str], rows: list[list[str]]) -> None:
    """Écrit un CSV simple pour les besoins des tests."""
    with path.open("w", newline="") as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(headers)
        writer.writerows(rows)
 def test_aggregate_global_heads_by_year(tmp_path: Path) -> None:
    """Construit un agrégat global par année."""
    inventories = tmp_path / "inventories.csv"
    inventory_parts = tmp_path / "inventory_parts.csv"
    parts = tmp_path / "parts.csv"
    colors = tmp_path / "colors.csv"
    sets = tmp_path / "sets.csv"
    destination = tmp_path / "global_heads.csv"
    write_csv(
        inventories,
        ["id", "version", "set_num"],
        [
            ["1", "1", "1000-1"],
            ["2", "2", "1000-1"],
            ["3", "1", "2000-1"],
        ],
    )
    write_csv(
        inventory_parts,
        ["inventory_id", "part_num", "color_id", "quantity", "is_spare", "img_url"],
        [
            ["2", "3626b", "1", "2", "False", ""],
            ["3", "3626b", "2", "1", "False", ""],
            ["3", "3001", "1", "10", "False", ""],
        ],
    )
    write_csv(
        parts,
        ["part_num", "name", "part_cat_id", "part_material"],
        [
            ["3626b", "Minifig Head", "59", "Plastic"],
            ["3001", "Brick 2 x 4", "11", "Plastic"],
        ],
    )
    write_csv(
        colors,
        ["id", "name", "rgb", "is_trans", "num_parts", "num_sets", "y1", "y2"],
        [
            ["1", "Yellow", "FFFF00", "False", "0", "0", "0", "0"],
            ["2", "Light Flesh", "FFE1BD", "False", "0", "0", "0", "0"],
        ],
    )
    write_csv(
        sets,
        ["set_num", "name", "year", "theme_id", "num_parts", "img_url"],
        [
            ["1000-1", "Set A", "2020", "1", "0", ""],
            ["2000-1", "Set B", "2021", "1", "0", ""],
        ],
    )
    rows = aggregate_global_heads_by_year(inventories, inventory_parts, parts, colors, sets)
    write_global_heads_by_year(destination, rows)
    with destination.open() as csv_file:
        written = list(csv.DictReader(csv_file))
    assert written == [
        {
            "year": "2020",
            "color_rgb": "FFFF00",
            "is_translucent": "false",
            "color_name": "Yellow",
            "quantity": "2",
        },
        {
            "year": "2021",
            "color_rgb": "FFE1BD",
            "is_translucent": "false",
            "color_name": "Light Flesh",
            "quantity": "1",
        },
    ]
--- a/tests/test_global_minifig_heads_plot.py
+++ b/tests/test_global_minifig_heads_plot.py
@@ -0,0 +1,28 @@
 """Tests des visualisations globales des têtes de minifigs."""
 import matplotlib
 from pathlib import Path
 from lib.plots.global_minifig_heads import plot_global_head_shares
 matplotlib.use("Agg")
 def test_plot_global_head_shares(tmp_path: Path) -> None:
    """Génère un graphique de parts de couleur sur le catalogue complet."""
    heads_path = tmp_path / "global_minifig_heads_by_year.csv"
    destination = tmp_path / "figures" / "step17" / "global_minifig_heads_shares.png"
    heads_path.write_text(
        "year,color_rgb,is_translucent,color_name,quantity\n"
        "2020,FFFF00,false,Yellow,2\n"
        "2020,FFE1BD,false,Light Flesh,1\n"
        "2021,FFE1BD,false,Light Flesh,3\n"
        "2021,E7B68F,false,Medium Dark Flesh,1\n"
        "2021,FFFF00,false,Yellow,2\n"
    )
    plot_global_head_shares(heads_path, destination)
    assert destination.exists()
    assert destination.stat().st_size > 0
--- a/tests/test_global_minifig_skin_tones_plot.py
+++ b/tests/test_global_minifig_skin_tones_plot.py
@@ -0,0 +1,29 @@
 """Tests du graphique global sur la part de têtes Yellow."""
 import matplotlib
 from pathlib import Path
 from lib.plots.minifig_skin_tones import plot_yellow_share
 matplotlib.use("Agg")
 def test_plot_yellow_share(tmp_path: Path) -> None:
    """Génère un graphe de part Yellow sur le catalogue complet."""
    heads_path = tmp_path / "global_minifig_heads_by_year.csv"
    milestones_path = tmp_path / "milestones.csv"
    destination = tmp_path / "figures" / "step17" / "global_minifig_heads_yellow_share.png"
    heads_path.write_text(
        "year,color_rgb,is_translucent,color_name,quantity\n"
        "2020,FFFF00,false,Yellow,2\n"
        "2020,FFE1BD,false,Light Flesh,1\n"
        "2021,FFE1BD,false,Light Flesh,3\n"
        "2021,FFFF00,false,Yellow,1\n"
    )
    milestones_path.write_text("year,description\n2020,Lancement\n")
    plot_yellow_share(heads_path, milestones_path, destination)
    assert destination.exists()
    assert destination.stat().st_size > 0