From d5ff3cd0765dfcf4aebec14d682dc81ef21f16a1 Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Wed, 3 Dec 2025 22:41:06 +0100 Subject: [PATCH] =?UTF-8?q?Liste=20les=20nouveaux=20personnages=20par=20an?= =?UTF-8?q?n=C3=A9e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 + lib/rebrickable/minifig_characters.py | 95 +++++++++++++++++++++++ scripts/list_new_minifig_characters.py | 39 ++++++++++ tests/test_minifig_characters.py | 103 +++++++++++++++++++++++-- 4 files changed, 235 insertions(+), 5 deletions(-) create mode 100644 scripts/list_new_minifig_characters.py diff --git a/README.md b/README.md index 1e4a70a..c681a83 100644 --- a/README.md +++ b/README.md @@ -245,6 +245,7 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_ 2. `python -m scripts.plot_minifig_characters` 3. `python -m scripts.plot_minifig_character_variations` 4. `python -m scripts.plot_minifig_new_characters` +5. `python -m scripts.list_new_minifig_characters` Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`. @@ -252,6 +253,8 @@ Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figuran Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`). +Le quatrième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, liste pour chaque année (2015-2025) les personnages introduits et les sets de cette année contenant ces minifigs, puis produit `data/final/minifig_new_characters_by_year.csv` et `data/final/minifig_new_characters_by_year.md` (format markdown minimal pour le blog). + ### Étape 23 : présence annuelle des personnages 1. `source .venv/bin/activate` diff --git a/lib/rebrickable/minifig_characters.py b/lib/rebrickable/minifig_characters.py index 03c23a3..1d698f6 100644 --- a/lib/rebrickable/minifig_characters.py +++ b/lib/rebrickable/minifig_characters.py @@ -110,6 +110,69 @@ def aggregate_new_characters_by_year( return results +def aggregate_new_character_sets( + minifigs_rows: Iterable[dict], + sets_lookup: Dict[str, dict], + excluded_characters: Sequence[str] | None = None, + start_year: int | None = None, + end_year: int | None = None, +) -> List[dict]: + """Liste les personnages introduits par année avec les sets correspondants.""" + excluded = set(excluded_characters or []) + first_year: Dict[str, int] = {} + for row in minifigs_rows: + character = row["known_character"].strip() + fig_num = row["fig_num"].strip() + if character == "" or fig_num == "": + continue + if character in excluded: + continue + set_row = sets_lookup.get(row["set_num"]) + if set_row is None: + continue + year_int = int(set_row["year"]) + current = first_year.get(character) + if current is None or year_int < current: + first_year[character] = year_int + rows: List[dict] = [] + seen: set[tuple[str, str]] = set() + for row in minifigs_rows: + character = row["known_character"].strip() + fig_num = row["fig_num"].strip() + if character == "" or fig_num == "": + continue + if character in excluded: + continue + set_row = sets_lookup.get(row["set_num"]) + if set_row is None: + continue + intro_year = first_year.get(character) + if intro_year is None: + continue + if start_year is not None and intro_year < start_year: + continue + if end_year is not None and intro_year > end_year: + continue + if int(set_row["year"]) != intro_year: + continue + key = (character, set_row["set_num"]) + if key in seen: + continue + rows.append( + { + "year": str(int(set_row["year"])), + "known_character": character, + "set_num": set_row["set_num"], + "set_id": set_row.get("set_id", ""), + "set_name": set_row.get("name", ""), + "rebrickable_url": set_row.get("rebrickable_url", ""), + } + ) + seen.add(key) + rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"])) + return rows + + def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]: """Compte les minifigs distinctes par genre (fig_num unique).""" genders_by_fig: Dict[str, str] = {} @@ -154,6 +217,38 @@ def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None: writer.writerow(row) +def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None: + """Écrit le CSV listant les personnages introduits et leurs sets.""" + ensure_parent_dir(path) + fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"] + with path.open("w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + + +def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None: + """Écrit un Markdown listant les personnages introduits par année et leurs sets.""" + ensure_parent_dir(path) + grouped: Dict[str, Dict[str, List[dict]]] = {} + for row in rows: + year_group = grouped.setdefault(row["year"], {}) + characters = year_group.setdefault(row["known_character"], []) + characters.append(row) + with path.open("w") as md_file: + for year in sorted(grouped.keys(), key=int): + md_file.write(f"##### {year}\n\n") + for character in sorted(grouped[year].keys()): + md_file.write(f"- {character}\n") + for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]): + link = entry["rebrickable_url"] or "" + set_id = entry["set_id"] + name = entry["set_name"] + md_file.write(f" - [{set_id}]({link}) - {name}\n") + md_file.write("\n") + + def write_gender_counts(path: Path, rows: Sequence[dict]) -> None: """Écrit le CSV des comptes par genre.""" ensure_parent_dir(path) diff --git a/scripts/list_new_minifig_characters.py b/scripts/list_new_minifig_characters.py new file mode 100644 index 0000000..3659eff --- /dev/null +++ b/scripts/list_new_minifig_characters.py @@ -0,0 +1,39 @@ +"""Liste les nouveaux personnages introduits chaque année et leurs sets associés.""" + +from pathlib import Path + +from lib.rebrickable.minifig_character_sets import load_sets +from lib.rebrickable.minifig_characters import ( + aggregate_new_character_sets, + load_minifigs_by_set, + write_new_character_sets_csv, + write_new_character_sets_markdown, +) + + +MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv") +SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv") +CSV_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.csv") +MARKDOWN_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.md") +EXCLUDED_CHARACTERS = ["Figurant"] +START_YEAR = 2015 +END_YEAR = 2025 + + +def main() -> None: + """Construit le CSV et le Markdown listant les personnages introduits chaque année.""" + minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH) + sets_lookup = load_sets(SETS_ENRICHED_PATH) + rows = aggregate_new_character_sets( + minifigs, + sets_lookup, + excluded_characters=EXCLUDED_CHARACTERS, + start_year=START_YEAR, + end_year=END_YEAR, + ) + write_new_character_sets_csv(CSV_DESTINATION_PATH, rows) + write_new_character_sets_markdown(MARKDOWN_DESTINATION_PATH, rows) + + +if __name__ == "__main__": + main() diff --git a/tests/test_minifig_characters.py b/tests/test_minifig_characters.py index 24441ff..951115d 100644 --- a/tests/test_minifig_characters.py +++ b/tests/test_minifig_characters.py @@ -5,16 +5,20 @@ from pathlib import Path from lib.rebrickable.minifig_characters import ( aggregate_by_character, aggregate_by_gender, + aggregate_new_character_sets, aggregate_new_characters_by_year, aggregate_variations_and_totals, aggregate_character_spans, aggregate_presence_by_year, load_sets_enriched, write_character_counts, + write_new_character_sets_csv, + write_new_character_sets_markdown, write_new_characters_by_year, write_character_variations_totals, write_gender_counts, ) +from lib.rebrickable.minifig_character_sets import load_sets def test_aggregate_by_character_counts_unique_figs() -> None: @@ -143,11 +147,11 @@ def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None: """Compter les nouveaux personnages par année en respectant la plage.""" sets_path = tmp_path / "sets_enriched.csv" sets_path.write_text( - "set_num,year\n" - "123-1,2015\n" - "124-1,2016\n" - "125-1,2017\n" - "126-1,2014\n" + "set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n" + "123-1,Set A,2015,0,0,,-,http://r/123-1,true\n" + "124-1,Set B,2016,0,0,,-,http://r/124-1,true\n" + "125-1,Set C,2017,0,0,,-,http://r/125-1,true\n" + "126-1,Set D,2014,0,0,,-,http://r/126-1,true\n" ) sets_years = load_sets_enriched(sets_path) minifigs_rows = [ @@ -172,6 +176,59 @@ def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None: ] +def test_aggregate_new_character_sets_returns_intro_sets(tmp_path: Path) -> None: + """Lister les personnages introduits avec les sets de l'année d'introduction.""" + sets_path = tmp_path / "sets_enriched.csv" + sets_path.write_text( + "set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n" + "123-1,Set A,2015,0,0,,123,http://r/123-1,true\n" + "124-1,Set B,2015,0,0,,124,http://r/124-1,true\n" + "125-1,Set C,2016,0,0,,125,http://r/125-1,true\n" + ) + sets_lookup = load_sets(sets_path) + minifigs_rows = [ + {"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"}, + {"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"}, + {"set_num": "125-1", "known_character": "Owen Grady", "fig_num": "fig-owen-3", "part_num": "head-c"}, + {"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-d"}, + ] + + rows = aggregate_new_character_sets( + minifigs_rows, + sets_lookup, + excluded_characters=["Figurant"], + start_year=2015, + end_year=2016, + ) + + assert rows == [ + { + "year": "2015", + "known_character": "Owen Grady", + "set_num": "123-1", + "set_id": "123", + "set_name": "Set A", + "rebrickable_url": "http://r/123-1", + }, + { + "year": "2015", + "known_character": "Owen Grady", + "set_num": "124-1", + "set_id": "124", + "set_name": "Set B", + "rebrickable_url": "http://r/124-1", + }, + { + "year": "2016", + "known_character": "Ellie Sattler", + "set_num": "125-1", + "set_id": "125", + "set_name": "Set C", + "rebrickable_url": "http://r/125-1", + }, + ] + + def test_write_character_counts_outputs_csv(tmp_path: Path) -> None: """Écrit le CSV des comptes par personnage.""" destination = tmp_path / "counts.csv" @@ -224,6 +281,42 @@ def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None: assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n" +def test_write_new_character_sets_markdown_outputs_md(tmp_path: Path) -> None: + """Écrit le Markdown listant les nouveaux personnages et leurs sets.""" + destination = tmp_path / "new_characters.md" + rows = [ + { + "year": "2015", + "known_character": "Owen Grady", + "set_num": "123-1", + "set_id": "123", + "set_name": "Set A", + "rebrickable_url": "http://r/123-1", + }, + { + "year": "2016", + "known_character": "Ellie Sattler", + "set_num": "125-1", + "set_id": "125", + "set_name": "Set C", + "rebrickable_url": "http://r/125-1", + }, + ] + + write_new_character_sets_markdown(destination, rows) + + assert destination.read_text() == ( + "##### 2015\n\n" + "- Owen Grady\n" + " - [123](http://r/123-1) - Set A\n" + "\n" + "##### 2016\n\n" + "- Ellie Sattler\n" + " - [125](http://r/125-1) - Set C\n" + "\n" + ) + + def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None: """Calcule le total annuel en excluant les figurants.""" sets_path = tmp_path / "sets_enriched.csv"