1

Liste les nouveaux personnages par année

This commit is contained in:
Richard Dern 2025-12-03 22:41:06 +01:00
parent 46cef55a75
commit d5ff3cd076
4 changed files with 235 additions and 5 deletions

View File

@ -245,6 +245,7 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
2. `python -m scripts.plot_minifig_characters`
3. `python -m scripts.plot_minifig_character_variations`
4. `python -m scripts.plot_minifig_new_characters`
5. `python -m scripts.list_new_minifig_characters`
Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
@ -252,6 +253,8 @@ Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figuran
Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`).
Le quatrième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, liste pour chaque année (2015-2025) les personnages introduits et les sets de cette année contenant ces minifigs, puis produit `data/final/minifig_new_characters_by_year.csv` et `data/final/minifig_new_characters_by_year.md` (format markdown minimal pour le blog).
### Étape 23 : présence annuelle des personnages
1. `source .venv/bin/activate`

View File

@ -110,6 +110,69 @@ def aggregate_new_characters_by_year(
return results
def aggregate_new_character_sets(
minifigs_rows: Iterable[dict],
sets_lookup: Dict[str, dict],
excluded_characters: Sequence[str] | None = None,
start_year: int | None = None,
end_year: int | None = None,
) -> List[dict]:
"""Liste les personnages introduits par année avec les sets correspondants."""
excluded = set(excluded_characters or [])
first_year: Dict[str, int] = {}
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
set_row = sets_lookup.get(row["set_num"])
if set_row is None:
continue
year_int = int(set_row["year"])
current = first_year.get(character)
if current is None or year_int < current:
first_year[character] = year_int
rows: List[dict] = []
seen: set[tuple[str, str]] = set()
for row in minifigs_rows:
character = row["known_character"].strip()
fig_num = row["fig_num"].strip()
if character == "" or fig_num == "":
continue
if character in excluded:
continue
set_row = sets_lookup.get(row["set_num"])
if set_row is None:
continue
intro_year = first_year.get(character)
if intro_year is None:
continue
if start_year is not None and intro_year < start_year:
continue
if end_year is not None and intro_year > end_year:
continue
if int(set_row["year"]) != intro_year:
continue
key = (character, set_row["set_num"])
if key in seen:
continue
rows.append(
{
"year": str(int(set_row["year"])),
"known_character": character,
"set_num": set_row["set_num"],
"set_id": set_row.get("set_id", ""),
"set_name": set_row.get("name", ""),
"rebrickable_url": set_row.get("rebrickable_url", ""),
}
)
seen.add(key)
rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"]))
return rows
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
"""Compte les minifigs distinctes par genre (fig_num unique)."""
genders_by_fig: Dict[str, str] = {}
@ -154,6 +217,38 @@ def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
writer.writerow(row)
def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV listant les personnages introduits et leurs sets."""
ensure_parent_dir(path)
fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"]
with path.open("w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None:
"""Écrit un Markdown listant les personnages introduits par année et leurs sets."""
ensure_parent_dir(path)
grouped: Dict[str, Dict[str, List[dict]]] = {}
for row in rows:
year_group = grouped.setdefault(row["year"], {})
characters = year_group.setdefault(row["known_character"], [])
characters.append(row)
with path.open("w") as md_file:
for year in sorted(grouped.keys(), key=int):
md_file.write(f"##### {year}\n\n")
for character in sorted(grouped[year].keys()):
md_file.write(f"- {character}\n")
for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]):
link = entry["rebrickable_url"] or ""
set_id = entry["set_id"]
name = entry["set_name"]
md_file.write(f" - [{set_id}]({link}) - {name}\n")
md_file.write("\n")
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
"""Écrit le CSV des comptes par genre."""
ensure_parent_dir(path)

View File

@ -0,0 +1,39 @@
"""Liste les nouveaux personnages introduits chaque année et leurs sets associés."""
from pathlib import Path
from lib.rebrickable.minifig_character_sets import load_sets
from lib.rebrickable.minifig_characters import (
aggregate_new_character_sets,
load_minifigs_by_set,
write_new_character_sets_csv,
write_new_character_sets_markdown,
)
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
CSV_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.csv")
MARKDOWN_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.md")
EXCLUDED_CHARACTERS = ["Figurant"]
START_YEAR = 2015
END_YEAR = 2025
def main() -> None:
"""Construit le CSV et le Markdown listant les personnages introduits chaque année."""
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
sets_lookup = load_sets(SETS_ENRICHED_PATH)
rows = aggregate_new_character_sets(
minifigs,
sets_lookup,
excluded_characters=EXCLUDED_CHARACTERS,
start_year=START_YEAR,
end_year=END_YEAR,
)
write_new_character_sets_csv(CSV_DESTINATION_PATH, rows)
write_new_character_sets_markdown(MARKDOWN_DESTINATION_PATH, rows)
if __name__ == "__main__":
main()

View File

@ -5,16 +5,20 @@ from pathlib import Path
from lib.rebrickable.minifig_characters import (
aggregate_by_character,
aggregate_by_gender,
aggregate_new_character_sets,
aggregate_new_characters_by_year,
aggregate_variations_and_totals,
aggregate_character_spans,
aggregate_presence_by_year,
load_sets_enriched,
write_character_counts,
write_new_character_sets_csv,
write_new_character_sets_markdown,
write_new_characters_by_year,
write_character_variations_totals,
write_gender_counts,
)
from lib.rebrickable.minifig_character_sets import load_sets
def test_aggregate_by_character_counts_unique_figs() -> None:
@ -143,11 +147,11 @@ def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
"""Compter les nouveaux personnages par année en respectant la plage."""
sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text(
"set_num,year\n"
"123-1,2015\n"
"124-1,2016\n"
"125-1,2017\n"
"126-1,2014\n"
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
"123-1,Set A,2015,0,0,,-,http://r/123-1,true\n"
"124-1,Set B,2016,0,0,,-,http://r/124-1,true\n"
"125-1,Set C,2017,0,0,,-,http://r/125-1,true\n"
"126-1,Set D,2014,0,0,,-,http://r/126-1,true\n"
)
sets_years = load_sets_enriched(sets_path)
minifigs_rows = [
@ -172,6 +176,59 @@ def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
]
def test_aggregate_new_character_sets_returns_intro_sets(tmp_path: Path) -> None:
"""Lister les personnages introduits avec les sets de l'année d'introduction."""
sets_path = tmp_path / "sets_enriched.csv"
sets_path.write_text(
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
"123-1,Set A,2015,0,0,,123,http://r/123-1,true\n"
"124-1,Set B,2015,0,0,,124,http://r/124-1,true\n"
"125-1,Set C,2016,0,0,,125,http://r/125-1,true\n"
)
sets_lookup = load_sets(sets_path)
minifigs_rows = [
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
{"set_num": "125-1", "known_character": "Owen Grady", "fig_num": "fig-owen-3", "part_num": "head-c"},
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-d"},
]
rows = aggregate_new_character_sets(
minifigs_rows,
sets_lookup,
excluded_characters=["Figurant"],
start_year=2015,
end_year=2016,
)
assert rows == [
{
"year": "2015",
"known_character": "Owen Grady",
"set_num": "123-1",
"set_id": "123",
"set_name": "Set A",
"rebrickable_url": "http://r/123-1",
},
{
"year": "2015",
"known_character": "Owen Grady",
"set_num": "124-1",
"set_id": "124",
"set_name": "Set B",
"rebrickable_url": "http://r/124-1",
},
{
"year": "2016",
"known_character": "Ellie Sattler",
"set_num": "125-1",
"set_id": "125",
"set_name": "Set C",
"rebrickable_url": "http://r/125-1",
},
]
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
"""Écrit le CSV des comptes par personnage."""
destination = tmp_path / "counts.csv"
@ -224,6 +281,42 @@ def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n"
def test_write_new_character_sets_markdown_outputs_md(tmp_path: Path) -> None:
"""Écrit le Markdown listant les nouveaux personnages et leurs sets."""
destination = tmp_path / "new_characters.md"
rows = [
{
"year": "2015",
"known_character": "Owen Grady",
"set_num": "123-1",
"set_id": "123",
"set_name": "Set A",
"rebrickable_url": "http://r/123-1",
},
{
"year": "2016",
"known_character": "Ellie Sattler",
"set_num": "125-1",
"set_id": "125",
"set_name": "Set C",
"rebrickable_url": "http://r/125-1",
},
]
write_new_character_sets_markdown(destination, rows)
assert destination.read_text() == (
"##### 2015\n\n"
"- Owen Grady\n"
" - [123](http://r/123-1) - Set A\n"
"\n"
"##### 2016\n\n"
"- Ellie Sattler\n"
" - [125](http://r/125-1) - Set C\n"
"\n"
)
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
"""Calcule le total annuel en excluant les figurants."""
sets_path = tmp_path / "sets_enriched.csv"