1

Exclusion des pièces imprimées dans la recherche de rareté

This commit is contained in:
Richard Dern 2025-12-03 17:21:00 +01:00
parent 3f81f0bbde
commit a474e57694
4 changed files with 40 additions and 1 deletions

View File

@ -46,6 +46,7 @@ def aggregate_filtered_parts(
parts_catalog: Dict[str, dict], parts_catalog: Dict[str, dict],
ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS, ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS,
ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS, ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS,
exclude_printed: bool = False,
) -> Dict[str, dict]: ) -> Dict[str, dict]:
"""Agrège les quantités par pièce pour les sets filtrés (rechanges incluses).""" """Agrège les quantités par pièce pour les sets filtrés (rechanges incluses)."""
aggregated: Dict[str, dict] = {} aggregated: Dict[str, dict] = {}
@ -57,6 +58,8 @@ def aggregate_filtered_parts(
continue continue
if part["part_cat_id"] in ignored_minifig_categories: if part["part_cat_id"] in ignored_minifig_categories:
continue continue
if exclude_printed and "print" in part["name"].lower():
continue
entry = aggregated.get(row["part_num"]) entry = aggregated.get(row["part_num"])
if entry is None: if entry is None:
entry = {"quantity": 0, "set_numbers": set()} entry = {"quantity": 0, "set_numbers": set()}
@ -73,6 +76,7 @@ def compute_other_set_usage(
filtered_set_numbers: Set[str], filtered_set_numbers: Set[str],
ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS, ignored_categories: Set[str] = IGNORED_PART_CATEGORY_IDS,
ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS, ignored_minifig_categories: Set[str] = MINIFIG_PART_CATEGORY_IDS,
exclude_printed: bool = False,
) -> Dict[str, int]: ) -> Dict[str, int]:
"""Compte les occurrences des pièces dans le reste du catalogue (rechanges incluses).""" """Compte les occurrences des pièces dans le reste du catalogue (rechanges incluses)."""
inventories = select_latest_inventories(inventories_path) inventories = select_latest_inventories(inventories_path)
@ -87,6 +91,8 @@ def compute_other_set_usage(
continue continue
if part["part_cat_id"] in ignored_minifig_categories: if part["part_cat_id"] in ignored_minifig_categories:
continue continue
if exclude_printed and "print" in part["name"].lower():
continue
totals[row["part_num"]] = totals.get(row["part_num"], 0) + int(row["quantity"]) totals[row["part_num"]] = totals.get(row["part_num"], 0) + int(row["quantity"])
return totals return totals
@ -98,6 +104,7 @@ def build_part_rarity(
parts_catalog_path: Path, parts_catalog_path: Path,
part_categories_path: Path, part_categories_path: Path,
filtered_sets_path: Path, filtered_sets_path: Path,
exclude_printed: bool = False,
) -> List[dict]: ) -> List[dict]:
"""Construit le classement de rareté des pièces filtrées.""" """Construit le classement de rareté des pièces filtrées."""
parts_catalog = load_parts_catalog(parts_catalog_path) parts_catalog = load_parts_catalog(parts_catalog_path)
@ -105,12 +112,13 @@ def build_part_rarity(
filtered_sets = load_filtered_sets(filtered_sets_path) filtered_sets = load_filtered_sets(filtered_sets_path)
filtered_set_numbers = set(filtered_sets.keys()) filtered_set_numbers = set(filtered_sets.keys())
filtered_rows = read_rows(parts_filtered_path) filtered_rows = read_rows(parts_filtered_path)
filtered_usage = aggregate_filtered_parts(filtered_rows, parts_catalog) filtered_usage = aggregate_filtered_parts(filtered_rows, parts_catalog, exclude_printed=exclude_printed)
other_usage = compute_other_set_usage( other_usage = compute_other_set_usage(
inventories_path, inventories_path,
inventory_parts_path, inventory_parts_path,
parts_catalog, parts_catalog,
filtered_set_numbers, filtered_set_numbers,
exclude_printed=exclude_printed,
) )
rows: List[dict] = [] rows: List[dict] = []
for part_num, entry in filtered_usage.items(): for part_num, entry in filtered_usage.items():

View File

@ -12,7 +12,9 @@ PARTS_CATALOG_PATH = Path("data/raw/parts.csv")
PART_CATEGORIES_PATH = Path("data/raw/part_categories.csv") PART_CATEGORIES_PATH = Path("data/raw/part_categories.csv")
FILTERED_SETS_PATH = Path("data/intermediate/sets_enriched.csv") FILTERED_SETS_PATH = Path("data/intermediate/sets_enriched.csv")
DESTINATION_PATH = Path("data/intermediate/part_rarity.csv") DESTINATION_PATH = Path("data/intermediate/part_rarity.csv")
DESTINATION_PRINTED_EXCLUDED_PATH = Path("data/intermediate/part_rarity_no_print.csv")
TOP_DESTINATION_PATH = Path("data/intermediate/part_rarity_exclusive.csv") TOP_DESTINATION_PATH = Path("data/intermediate/part_rarity_exclusive.csv")
TOP_PRINTED_EXCLUDED_PATH = Path("data/intermediate/part_rarity_exclusive_no_print.csv")
def main() -> None: def main() -> None:
@ -29,6 +31,19 @@ def main() -> None:
top_rows = select_until_reused(rows) top_rows = select_until_reused(rows)
write_part_rarity(TOP_DESTINATION_PATH, top_rows) write_part_rarity(TOP_DESTINATION_PATH, top_rows)
rows_no_print = build_part_rarity(
PARTS_FILTERED_PATH,
INVENTORIES_PATH,
INVENTORY_PARTS_PATH,
PARTS_CATALOG_PATH,
PART_CATEGORIES_PATH,
FILTERED_SETS_PATH,
exclude_printed=True,
)
write_part_rarity(DESTINATION_PRINTED_EXCLUDED_PATH, rows_no_print)
top_rows_no_print = select_until_reused(rows_no_print)
write_part_rarity(TOP_PRINTED_EXCLUDED_PATH, top_rows_no_print)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -8,11 +8,14 @@ from lib.plots.part_rarity import plot_part_rarity
PART_RARITY_TOP_PATH = Path("data/intermediate/part_rarity_exclusive.csv") PART_RARITY_TOP_PATH = Path("data/intermediate/part_rarity_exclusive.csv")
DESTINATION_PATH = Path("figures/step34/part_rarity.png") DESTINATION_PATH = Path("figures/step34/part_rarity.png")
RESOURCES_DIR = Path("figures/rebrickable") RESOURCES_DIR = Path("figures/rebrickable")
PART_RARITY_NO_PRINT_PATH = Path("data/intermediate/part_rarity_exclusive_no_print.csv")
DESTINATION_NO_PRINT = Path("figures/step34/part_rarity_no_print.png")
def main() -> None: def main() -> None:
"""Charge le top des pièces rares et produit le graphique illustré.""" """Charge le top des pièces rares et produit le graphique illustré."""
plot_part_rarity(PART_RARITY_TOP_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR) plot_part_rarity(PART_RARITY_TOP_PATH, DESTINATION_PATH, resources_dir=RESOURCES_DIR)
plot_part_rarity(PART_RARITY_NO_PRINT_PATH, DESTINATION_NO_PRINT, resources_dir=RESOURCES_DIR)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -58,6 +58,7 @@ def test_build_part_rarity_counts_spares_and_ignores_categories(tmp_path: Path)
["p4", "Figure Limb", "41", "Plastic"], ["p4", "Figure Limb", "41", "Plastic"],
["p5", "Sticker Sheet", "58", "Plastic"], ["p5", "Sticker Sheet", "58", "Plastic"],
["p6", "Exclusive Tile", "1", "Plastic"], ["p6", "Exclusive Tile", "1", "Plastic"],
["p7", "Slope 45 print", "1", "Plastic"],
], ],
) )
part_categories = tmp_path / "part_categories.csv" part_categories = tmp_path / "part_categories.csv"
@ -95,6 +96,7 @@ def test_build_part_rarity_counts_spares_and_ignores_categories(tmp_path: Path)
["3", "p4", "1", "4", "True", ""], ["3", "p4", "1", "4", "True", ""],
["4", "p1", "1", "8", "False", ""], ["4", "p1", "1", "8", "False", ""],
["5", "p5", "1", "9", "False", ""], ["5", "p5", "1", "9", "False", ""],
["5", "p7", "1", "5", "False", ""],
], ],
) )
@ -150,6 +152,17 @@ def test_build_part_rarity_counts_spares_and_ignores_categories(tmp_path: Path)
] ]
assert select_until_reused(rows) == [rows[0], rows[1]] assert select_until_reused(rows) == [rows[0], rows[1]]
rows_no_print = build_part_rarity(
parts_filtered,
inventories,
inventory_parts,
parts_catalog,
part_categories,
sets_enriched,
exclude_printed=True,
)
assert all(r["part_num"] != "p7" for r in rows_no_print)
def test_write_part_rarity_outputs_csv(tmp_path: Path) -> None: def test_write_part_rarity_outputs_csv(tmp_path: Path) -> None:
"""Sérialise le classement de rareté.""" """Sérialise le classement de rareté."""