Trie le graphique de réutilisation des têtes par usage décroissant
This commit is contained in:
parent
14a7dc8561
commit
6dc1f1cac5
@ -5,8 +5,11 @@ from pathlib import Path
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from lib.filesystem import ensure_parent_dir
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.resources import sanitize_name
|
||||||
|
|
||||||
|
|
||||||
def load_head_reuse(path: Path) -> List[dict]:
|
def load_head_reuse(path: Path) -> List[dict]:
|
||||||
@ -27,25 +30,65 @@ def format_label(row: dict) -> str:
|
|||||||
return row["part_num"]
|
return row["part_num"]
|
||||||
|
|
||||||
|
|
||||||
def plot_head_reuse(path: Path, destination_path: Path, top: int = 30) -> None:
|
def load_head_image(row: dict, resources_dir: Path) -> Image.Image | None:
|
||||||
|
"""Charge l'image d'une tête si disponible localement."""
|
||||||
|
set_id = row.get("sample_set_id", "").strip()
|
||||||
|
character = row.get("known_character", "").strip()
|
||||||
|
if set_id == "" or character == "":
|
||||||
|
return None
|
||||||
|
path = resources_dir / set_id / sanitize_name(character) / "head.jpg"
|
||||||
|
if not path.exists():
|
||||||
|
return None
|
||||||
|
return Image.open(path)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_head_reuse(
|
||||||
|
path: Path,
|
||||||
|
destination_path: Path,
|
||||||
|
top: int | None = None,
|
||||||
|
resources_dir: Path = Path("figures/rebrickable"),
|
||||||
|
show_images: bool = True,
|
||||||
|
) -> None:
|
||||||
"""Trace un bar chart horizontal mettant en avant les têtes exclusives ou rares."""
|
"""Trace un bar chart horizontal mettant en avant les têtes exclusives ou rares."""
|
||||||
rows = load_head_reuse(path)
|
rows = load_head_reuse(path)
|
||||||
rows.sort(key=lambda r: (int(r["other_sets"]), -int(r["filtered_sets"]), r["part_num"]))
|
rows.sort(key=lambda r: (int(r["total_sets"]), int(r["other_sets"]), r["part_num"]))
|
||||||
selected = rows[:top]
|
selected = rows if top is None else rows[:top]
|
||||||
labels = [format_label(r) for r in selected]
|
labels = [format_label(r) for r in selected]
|
||||||
filtered_counts = [int(r["filtered_sets"]) for r in selected]
|
filtered_counts = [int(r["filtered_sets"]) for r in selected]
|
||||||
other_counts = [int(r["other_sets"]) for r in selected]
|
other_counts = [int(r["other_sets"]) for r in selected]
|
||||||
positions = list(reversed(range(len(selected))))
|
positions = list(range(len(selected)))
|
||||||
|
|
||||||
fig, ax = plt.subplots(figsize=(12, 0.5 * len(selected) + 1.5))
|
fig, ax = plt.subplots(figsize=(13, 0.5 * len(selected) + 1.5))
|
||||||
ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés")
|
ax.barh(positions, filtered_counts, color="#1f78b4", label="Sets filtrés")
|
||||||
ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets")
|
ax.barh(positions, other_counts, left=filtered_counts, color="#b2df8a", label="Autres sets")
|
||||||
ax.set_yticks(positions)
|
ax.set_yticks(positions)
|
||||||
ax.set_yticklabels(reversed(labels))
|
ax.set_yticklabels(labels)
|
||||||
ax.set_xlabel("Nombre de sets contenant la tête")
|
ax.set_xlabel("Nombre de sets contenant la tête")
|
||||||
ax.invert_yaxis()
|
|
||||||
ax.grid(axis="x", linestyle="--", alpha=0.4)
|
ax.grid(axis="x", linestyle="--", alpha=0.4)
|
||||||
ax.legend()
|
ax.legend()
|
||||||
|
|
||||||
|
if show_images:
|
||||||
|
max_count = max((f + o) for f, o in zip(filtered_counts, other_counts))
|
||||||
|
pad = max_count * 0.15 if max_count > 0 else 1.0
|
||||||
|
ax.set_xlim(left=-pad, right=max_count + pad * 0.2)
|
||||||
|
for row, pos in zip(selected, positions):
|
||||||
|
image = load_head_image(row, resources_dir)
|
||||||
|
if image is None:
|
||||||
|
continue
|
||||||
|
target_height = 24
|
||||||
|
ratio = target_height / image.height
|
||||||
|
resized = image.resize((int(image.width * ratio), target_height))
|
||||||
|
imagebox = OffsetImage(resized)
|
||||||
|
ab = AnnotationBbox(
|
||||||
|
imagebox,
|
||||||
|
(-pad * 0.4, pos),
|
||||||
|
xycoords=("data", "data"),
|
||||||
|
box_alignment=(0.5, 0.5),
|
||||||
|
frameon=False,
|
||||||
|
)
|
||||||
|
ax.add_artist(ab)
|
||||||
|
|
||||||
|
fig.subplots_adjust(left=0.42)
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
|
|
||||||
ensure_parent_dir(destination_path)
|
ensure_parent_dir(destination_path)
|
||||||
|
|||||||
@ -6,6 +6,7 @@ from pathlib import Path
|
|||||||
from typing import Dict, Iterable, List, Sequence, Set
|
from typing import Dict, Iterable, List, Sequence, Set
|
||||||
|
|
||||||
from lib.filesystem import ensure_parent_dir
|
from lib.filesystem import ensure_parent_dir
|
||||||
|
from lib.rebrickable.minifig_character_sets import load_sets
|
||||||
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
|
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
|
||||||
from lib.rebrickable.parts_inventory import (
|
from lib.rebrickable.parts_inventory import (
|
||||||
index_inventory_parts_by_inventory,
|
index_inventory_parts_by_inventory,
|
||||||
@ -76,19 +77,24 @@ def aggregate_head_reuse(
|
|||||||
minifigs_rows: Iterable[dict],
|
minifigs_rows: Iterable[dict],
|
||||||
parts_catalog: Dict[str, dict],
|
parts_catalog: Dict[str, dict],
|
||||||
head_presence: Dict[str, Set[str]],
|
head_presence: Dict[str, Set[str]],
|
||||||
|
sets_lookup: Dict[str, dict],
|
||||||
) -> List[dict]:
|
) -> List[dict]:
|
||||||
"""Construit le tableau des têtes présentes dans les sets filtrés avec leur réutilisation globale."""
|
"""Construit le tableau des têtes présentes dans les sets filtrés avec leur réutilisation globale."""
|
||||||
filtered_presence = build_filtered_presence(minifigs_rows)
|
filtered_presence = build_filtered_presence(minifigs_rows)
|
||||||
labels = build_character_labels(minifigs_rows)
|
labels = build_character_labels(minifigs_rows)
|
||||||
aggregates: List[dict] = []
|
aggregates: List[dict] = []
|
||||||
for part_num, filtered_sets in filtered_presence.items():
|
for part_num, filtered_sets in filtered_presence.items():
|
||||||
all_sets = head_presence.get(part_num, set())
|
all_sets = set(head_presence.get(part_num, set()))
|
||||||
|
all_sets.update(filtered_sets)
|
||||||
other_sets = all_sets - filtered_sets
|
other_sets = all_sets - filtered_sets
|
||||||
|
sample_set = sorted(filtered_sets)[0]
|
||||||
|
sample_set_id = sets_lookup.get(sample_set, {}).get("set_id", sample_set.split("-")[0])
|
||||||
aggregates.append(
|
aggregates.append(
|
||||||
{
|
{
|
||||||
"part_num": part_num,
|
"part_num": part_num,
|
||||||
"part_name": parts_catalog[part_num]["name"],
|
"part_name": parts_catalog[part_num]["name"],
|
||||||
"known_character": labels.get(part_num, ""),
|
"known_character": labels.get(part_num, ""),
|
||||||
|
"sample_set_id": sample_set_id,
|
||||||
"filtered_sets": str(len(filtered_sets)),
|
"filtered_sets": str(len(filtered_sets)),
|
||||||
"other_sets": str(len(other_sets)),
|
"other_sets": str(len(other_sets)),
|
||||||
"total_sets": str(len(all_sets)),
|
"total_sets": str(len(all_sets)),
|
||||||
@ -101,7 +107,7 @@ def aggregate_head_reuse(
|
|||||||
def write_head_reuse(destination_path: Path, rows: Sequence[dict]) -> None:
|
def write_head_reuse(destination_path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des usages de têtes filtrées vs reste du catalogue."""
|
"""Écrit le CSV des usages de têtes filtrées vs reste du catalogue."""
|
||||||
ensure_parent_dir(destination_path)
|
ensure_parent_dir(destination_path)
|
||||||
fieldnames = ["part_num", "part_name", "known_character", "filtered_sets", "other_sets", "total_sets"]
|
fieldnames = ["part_num", "part_name", "known_character", "sample_set_id", "filtered_sets", "other_sets", "total_sets"]
|
||||||
with destination_path.open("w", newline="") as csv_file:
|
with destination_path.open("w", newline="") as csv_file:
|
||||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|||||||
@ -4,12 +4,14 @@ from pathlib import Path
|
|||||||
|
|
||||||
from lib.rebrickable.head_reuse import aggregate_head_reuse, build_head_presence, load_minifigs_by_set, write_head_reuse
|
from lib.rebrickable.head_reuse import aggregate_head_reuse, build_head_presence, load_minifigs_by_set, write_head_reuse
|
||||||
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
|
from lib.rebrickable.minifigs_by_set import load_parts_catalog, select_head_parts
|
||||||
|
from lib.rebrickable.minifig_character_sets import load_sets
|
||||||
|
|
||||||
|
|
||||||
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
|
PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
|
||||||
INVENTORIES_PATH = Path("data/raw/inventories.csv")
|
INVENTORIES_PATH = Path("data/raw/inventories.csv")
|
||||||
INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
|
INVENTORY_PARTS_PATH = Path("data/raw/inventory_parts.csv")
|
||||||
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
DESTINATION_PATH = Path("data/intermediate/head_reuse.csv")
|
DESTINATION_PATH = Path("data/intermediate/head_reuse.csv")
|
||||||
|
|
||||||
|
|
||||||
@ -18,8 +20,9 @@ def main() -> None:
|
|||||||
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
parts_catalog = load_parts_catalog(PARTS_CATALOG_PATH)
|
parts_catalog = load_parts_catalog(PARTS_CATALOG_PATH)
|
||||||
head_parts = select_head_parts(parts_catalog)
|
head_parts = select_head_parts(parts_catalog)
|
||||||
|
sets_lookup = load_sets(SETS_ENRICHED_PATH)
|
||||||
presence = build_head_presence(INVENTORIES_PATH, INVENTORY_PARTS_PATH, head_parts)
|
presence = build_head_presence(INVENTORIES_PATH, INVENTORY_PARTS_PATH, head_parts)
|
||||||
reuse = aggregate_head_reuse(minifigs, parts_catalog, presence)
|
reuse = aggregate_head_reuse(minifigs, parts_catalog, presence, sets_lookup)
|
||||||
write_head_reuse(DESTINATION_PATH, reuse)
|
write_head_reuse(DESTINATION_PATH, reuse)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,11 +7,12 @@ from lib.plots.head_reuse import plot_head_reuse
|
|||||||
|
|
||||||
HEAD_REUSE_PATH = Path("data/intermediate/head_reuse.csv")
|
HEAD_REUSE_PATH = Path("data/intermediate/head_reuse.csv")
|
||||||
DESTINATION_PATH = Path("figures/step33/head_reuse.png")
|
DESTINATION_PATH = Path("figures/step33/head_reuse.png")
|
||||||
|
RESOURCES_DIR = Path("figures/rebrickable")
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
"""Charge les données d'usage des têtes et produit le graphique associé."""
|
"""Charge les données d'usage des têtes et produit le graphique associé."""
|
||||||
plot_head_reuse(HEAD_REUSE_PATH, DESTINATION_PATH)
|
plot_head_reuse(HEAD_REUSE_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -75,12 +75,19 @@ def test_head_reuse_counts_sets_and_catalog(tmp_path: Path) -> None:
|
|||||||
],
|
],
|
||||||
load_parts_catalog(parts_catalog),
|
load_parts_catalog(parts_catalog),
|
||||||
presence,
|
presence,
|
||||||
|
{
|
||||||
|
"s1-1": {"set_id": "s1"},
|
||||||
|
"s2-1": {"set_id": "s2"},
|
||||||
|
"s3-1": {"set_id": "s3"},
|
||||||
|
"s4-1": {"set_id": "s4"},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
assert reuse == [
|
assert reuse == [
|
||||||
{
|
{
|
||||||
"part_num": "p1",
|
"part_num": "p1",
|
||||||
"part_name": "Head 1",
|
"part_name": "Head 1",
|
||||||
"known_character": "Alice",
|
"known_character": "Alice",
|
||||||
|
"sample_set_id": "s1",
|
||||||
"filtered_sets": "1",
|
"filtered_sets": "1",
|
||||||
"other_sets": "0",
|
"other_sets": "0",
|
||||||
"total_sets": "1",
|
"total_sets": "1",
|
||||||
@ -89,8 +96,9 @@ def test_head_reuse_counts_sets_and_catalog(tmp_path: Path) -> None:
|
|||||||
"part_num": "p2",
|
"part_num": "p2",
|
||||||
"part_name": "Head 2",
|
"part_name": "Head 2",
|
||||||
"known_character": "Bob",
|
"known_character": "Bob",
|
||||||
|
"sample_set_id": "s1",
|
||||||
"filtered_sets": "2",
|
"filtered_sets": "2",
|
||||||
"other_sets": "1",
|
"other_sets": "1",
|
||||||
"total_sets": "2",
|
"total_sets": "3",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user