Liste les nouveaux personnages par année
This commit is contained in:
parent
46cef55a75
commit
d5ff3cd076
@ -245,6 +245,7 @@ Le script relit `data/intermediate/sets_enriched.csv`, `data/intermediate/parts_
|
|||||||
2. `python -m scripts.plot_minifig_characters`
|
2. `python -m scripts.plot_minifig_characters`
|
||||||
3. `python -m scripts.plot_minifig_character_variations`
|
3. `python -m scripts.plot_minifig_character_variations`
|
||||||
4. `python -m scripts.plot_minifig_new_characters`
|
4. `python -m scripts.plot_minifig_new_characters`
|
||||||
|
5. `python -m scripts.list_new_minifig_characters`
|
||||||
|
|
||||||
Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
Le premier script lit `data/intermediate/minifigs_by_set.csv`, compte le nombre de minifigs distinctes par personnage (`known_character` + `fig_num`), écrit `data/intermediate/minifig_characters_counts.csv`, puis trace `figures/step22/minifig_characters.png` (barres horizontales triées). Les barres sont colorées en fonction du genre renseigné dans `config/known_character_genders.csv`.
|
||||||
|
|
||||||
@ -252,6 +253,8 @@ Le second script lit `data/intermediate/minifigs_by_set.csv`, exclut les figuran
|
|||||||
|
|
||||||
Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`).
|
Le troisième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, compte les nouveaux personnages introduits chaque année entre 2015 et 2025, sérialise `data/intermediate/minifig_new_characters_by_year.csv`, puis trace `figures/step23/minifig_new_characters_per_year.png` (barres avec jalons issus de `config/milestones.csv`).
|
||||||
|
|
||||||
|
Le quatrième script lit `data/intermediate/minifigs_by_set.csv` et `data/intermediate/sets_enriched.csv`, exclut les figurants, liste pour chaque année (2015-2025) les personnages introduits et les sets de cette année contenant ces minifigs, puis produit `data/final/minifig_new_characters_by_year.csv` et `data/final/minifig_new_characters_by_year.md` (format markdown minimal pour le blog).
|
||||||
|
|
||||||
### Étape 23 : présence annuelle des personnages
|
### Étape 23 : présence annuelle des personnages
|
||||||
|
|
||||||
1. `source .venv/bin/activate`
|
1. `source .venv/bin/activate`
|
||||||
|
|||||||
@ -110,6 +110,69 @@ def aggregate_new_characters_by_year(
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_new_character_sets(
|
||||||
|
minifigs_rows: Iterable[dict],
|
||||||
|
sets_lookup: Dict[str, dict],
|
||||||
|
excluded_characters: Sequence[str] | None = None,
|
||||||
|
start_year: int | None = None,
|
||||||
|
end_year: int | None = None,
|
||||||
|
) -> List[dict]:
|
||||||
|
"""Liste les personnages introduits par année avec les sets correspondants."""
|
||||||
|
excluded = set(excluded_characters or [])
|
||||||
|
first_year: Dict[str, int] = {}
|
||||||
|
for row in minifigs_rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
set_row = sets_lookup.get(row["set_num"])
|
||||||
|
if set_row is None:
|
||||||
|
continue
|
||||||
|
year_int = int(set_row["year"])
|
||||||
|
current = first_year.get(character)
|
||||||
|
if current is None or year_int < current:
|
||||||
|
first_year[character] = year_int
|
||||||
|
rows: List[dict] = []
|
||||||
|
seen: set[tuple[str, str]] = set()
|
||||||
|
for row in minifigs_rows:
|
||||||
|
character = row["known_character"].strip()
|
||||||
|
fig_num = row["fig_num"].strip()
|
||||||
|
if character == "" or fig_num == "":
|
||||||
|
continue
|
||||||
|
if character in excluded:
|
||||||
|
continue
|
||||||
|
set_row = sets_lookup.get(row["set_num"])
|
||||||
|
if set_row is None:
|
||||||
|
continue
|
||||||
|
intro_year = first_year.get(character)
|
||||||
|
if intro_year is None:
|
||||||
|
continue
|
||||||
|
if start_year is not None and intro_year < start_year:
|
||||||
|
continue
|
||||||
|
if end_year is not None and intro_year > end_year:
|
||||||
|
continue
|
||||||
|
if int(set_row["year"]) != intro_year:
|
||||||
|
continue
|
||||||
|
key = (character, set_row["set_num"])
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"year": str(int(set_row["year"])),
|
||||||
|
"known_character": character,
|
||||||
|
"set_num": set_row["set_num"],
|
||||||
|
"set_id": set_row.get("set_id", ""),
|
||||||
|
"set_name": set_row.get("name", ""),
|
||||||
|
"rebrickable_url": set_row.get("rebrickable_url", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
seen.add(key)
|
||||||
|
rows.sort(key=lambda r: (int(r["year"]), r["known_character"], r["set_id"]))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
def aggregate_by_gender(rows: Iterable[dict]) -> List[dict]:
|
||||||
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
"""Compte les minifigs distinctes par genre (fig_num unique)."""
|
||||||
genders_by_fig: Dict[str, str] = {}
|
genders_by_fig: Dict[str, str] = {}
|
||||||
@ -154,6 +217,38 @@ def write_new_characters_by_year(path: Path, rows: Sequence[dict]) -> None:
|
|||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_new_character_sets_csv(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit le CSV listant les personnages introduits et leurs sets."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
fieldnames = ["year", "known_character", "set_num", "set_id", "set_name", "rebrickable_url"]
|
||||||
|
with path.open("w", newline="") as csv_file:
|
||||||
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
|
||||||
|
def write_new_character_sets_markdown(path: Path, rows: Sequence[dict]) -> None:
|
||||||
|
"""Écrit un Markdown listant les personnages introduits par année et leurs sets."""
|
||||||
|
ensure_parent_dir(path)
|
||||||
|
grouped: Dict[str, Dict[str, List[dict]]] = {}
|
||||||
|
for row in rows:
|
||||||
|
year_group = grouped.setdefault(row["year"], {})
|
||||||
|
characters = year_group.setdefault(row["known_character"], [])
|
||||||
|
characters.append(row)
|
||||||
|
with path.open("w") as md_file:
|
||||||
|
for year in sorted(grouped.keys(), key=int):
|
||||||
|
md_file.write(f"##### {year}\n\n")
|
||||||
|
for character in sorted(grouped[year].keys()):
|
||||||
|
md_file.write(f"- {character}\n")
|
||||||
|
for entry in sorted(grouped[year][character], key=lambda r: r["set_id"]):
|
||||||
|
link = entry["rebrickable_url"] or ""
|
||||||
|
set_id = entry["set_id"]
|
||||||
|
name = entry["set_name"]
|
||||||
|
md_file.write(f" - [{set_id}]({link}) - {name}\n")
|
||||||
|
md_file.write("\n")
|
||||||
|
|
||||||
|
|
||||||
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
def write_gender_counts(path: Path, rows: Sequence[dict]) -> None:
|
||||||
"""Écrit le CSV des comptes par genre."""
|
"""Écrit le CSV des comptes par genre."""
|
||||||
ensure_parent_dir(path)
|
ensure_parent_dir(path)
|
||||||
|
|||||||
39
scripts/list_new_minifig_characters.py
Normal file
39
scripts/list_new_minifig_characters.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
"""Liste les nouveaux personnages introduits chaque année et leurs sets associés."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from lib.rebrickable.minifig_character_sets import load_sets
|
||||||
|
from lib.rebrickable.minifig_characters import (
|
||||||
|
aggregate_new_character_sets,
|
||||||
|
load_minifigs_by_set,
|
||||||
|
write_new_character_sets_csv,
|
||||||
|
write_new_character_sets_markdown,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MINIFIGS_BY_SET_PATH = Path("data/intermediate/minifigs_by_set.csv")
|
||||||
|
SETS_ENRICHED_PATH = Path("data/intermediate/sets_enriched.csv")
|
||||||
|
CSV_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.csv")
|
||||||
|
MARKDOWN_DESTINATION_PATH = Path("data/final/minifig_new_characters_by_year.md")
|
||||||
|
EXCLUDED_CHARACTERS = ["Figurant"]
|
||||||
|
START_YEAR = 2015
|
||||||
|
END_YEAR = 2025
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Construit le CSV et le Markdown listant les personnages introduits chaque année."""
|
||||||
|
minifigs = load_minifigs_by_set(MINIFIGS_BY_SET_PATH)
|
||||||
|
sets_lookup = load_sets(SETS_ENRICHED_PATH)
|
||||||
|
rows = aggregate_new_character_sets(
|
||||||
|
minifigs,
|
||||||
|
sets_lookup,
|
||||||
|
excluded_characters=EXCLUDED_CHARACTERS,
|
||||||
|
start_year=START_YEAR,
|
||||||
|
end_year=END_YEAR,
|
||||||
|
)
|
||||||
|
write_new_character_sets_csv(CSV_DESTINATION_PATH, rows)
|
||||||
|
write_new_character_sets_markdown(MARKDOWN_DESTINATION_PATH, rows)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -5,16 +5,20 @@ from pathlib import Path
|
|||||||
from lib.rebrickable.minifig_characters import (
|
from lib.rebrickable.minifig_characters import (
|
||||||
aggregate_by_character,
|
aggregate_by_character,
|
||||||
aggregate_by_gender,
|
aggregate_by_gender,
|
||||||
|
aggregate_new_character_sets,
|
||||||
aggregate_new_characters_by_year,
|
aggregate_new_characters_by_year,
|
||||||
aggregate_variations_and_totals,
|
aggregate_variations_and_totals,
|
||||||
aggregate_character_spans,
|
aggregate_character_spans,
|
||||||
aggregate_presence_by_year,
|
aggregate_presence_by_year,
|
||||||
load_sets_enriched,
|
load_sets_enriched,
|
||||||
write_character_counts,
|
write_character_counts,
|
||||||
|
write_new_character_sets_csv,
|
||||||
|
write_new_character_sets_markdown,
|
||||||
write_new_characters_by_year,
|
write_new_characters_by_year,
|
||||||
write_character_variations_totals,
|
write_character_variations_totals,
|
||||||
write_gender_counts,
|
write_gender_counts,
|
||||||
)
|
)
|
||||||
|
from lib.rebrickable.minifig_character_sets import load_sets
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_by_character_counts_unique_figs() -> None:
|
def test_aggregate_by_character_counts_unique_figs() -> None:
|
||||||
@ -143,11 +147,11 @@ def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
|
|||||||
"""Compter les nouveaux personnages par année en respectant la plage."""
|
"""Compter les nouveaux personnages par année en respectant la plage."""
|
||||||
sets_path = tmp_path / "sets_enriched.csv"
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
sets_path.write_text(
|
sets_path.write_text(
|
||||||
"set_num,year\n"
|
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
|
||||||
"123-1,2015\n"
|
"123-1,Set A,2015,0,0,,-,http://r/123-1,true\n"
|
||||||
"124-1,2016\n"
|
"124-1,Set B,2016,0,0,,-,http://r/124-1,true\n"
|
||||||
"125-1,2017\n"
|
"125-1,Set C,2017,0,0,,-,http://r/125-1,true\n"
|
||||||
"126-1,2014\n"
|
"126-1,Set D,2014,0,0,,-,http://r/126-1,true\n"
|
||||||
)
|
)
|
||||||
sets_years = load_sets_enriched(sets_path)
|
sets_years = load_sets_enriched(sets_path)
|
||||||
minifigs_rows = [
|
minifigs_rows = [
|
||||||
@ -172,6 +176,59 @@ def test_aggregate_new_characters_by_year_limits_range(tmp_path: Path) -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_new_character_sets_returns_intro_sets(tmp_path: Path) -> None:
|
||||||
|
"""Lister les personnages introduits avec les sets de l'année d'introduction."""
|
||||||
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
sets_path.write_text(
|
||||||
|
"set_num,name,year,theme_id,num_parts,img_url,set_id,rebrickable_url,in_collection\n"
|
||||||
|
"123-1,Set A,2015,0,0,,123,http://r/123-1,true\n"
|
||||||
|
"124-1,Set B,2015,0,0,,124,http://r/124-1,true\n"
|
||||||
|
"125-1,Set C,2016,0,0,,125,http://r/125-1,true\n"
|
||||||
|
)
|
||||||
|
sets_lookup = load_sets(sets_path)
|
||||||
|
minifigs_rows = [
|
||||||
|
{"set_num": "123-1", "known_character": "Owen Grady", "fig_num": "fig-owen-1", "part_num": "head-a"},
|
||||||
|
{"set_num": "124-1", "known_character": "Owen Grady", "fig_num": "fig-owen-2", "part_num": "head-b"},
|
||||||
|
{"set_num": "125-1", "known_character": "Owen Grady", "fig_num": "fig-owen-3", "part_num": "head-c"},
|
||||||
|
{"set_num": "125-1", "known_character": "Ellie Sattler", "fig_num": "fig-ellie-1", "part_num": "head-d"},
|
||||||
|
]
|
||||||
|
|
||||||
|
rows = aggregate_new_character_sets(
|
||||||
|
minifigs_rows,
|
||||||
|
sets_lookup,
|
||||||
|
excluded_characters=["Figurant"],
|
||||||
|
start_year=2015,
|
||||||
|
end_year=2016,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rows == [
|
||||||
|
{
|
||||||
|
"year": "2015",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"set_name": "Set A",
|
||||||
|
"rebrickable_url": "http://r/123-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2015",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"set_num": "124-1",
|
||||||
|
"set_id": "124",
|
||||||
|
"set_name": "Set B",
|
||||||
|
"rebrickable_url": "http://r/124-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2016",
|
||||||
|
"known_character": "Ellie Sattler",
|
||||||
|
"set_num": "125-1",
|
||||||
|
"set_id": "125",
|
||||||
|
"set_name": "Set C",
|
||||||
|
"rebrickable_url": "http://r/125-1",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
|
def test_write_character_counts_outputs_csv(tmp_path: Path) -> None:
|
||||||
"""Écrit le CSV des comptes par personnage."""
|
"""Écrit le CSV des comptes par personnage."""
|
||||||
destination = tmp_path / "counts.csv"
|
destination = tmp_path / "counts.csv"
|
||||||
@ -224,6 +281,42 @@ def test_write_new_characters_by_year_outputs_csv(tmp_path: Path) -> None:
|
|||||||
assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n"
|
assert destination.read_text() == "year,new_characters\n2015,3\n2016,1\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_new_character_sets_markdown_outputs_md(tmp_path: Path) -> None:
|
||||||
|
"""Écrit le Markdown listant les nouveaux personnages et leurs sets."""
|
||||||
|
destination = tmp_path / "new_characters.md"
|
||||||
|
rows = [
|
||||||
|
{
|
||||||
|
"year": "2015",
|
||||||
|
"known_character": "Owen Grady",
|
||||||
|
"set_num": "123-1",
|
||||||
|
"set_id": "123",
|
||||||
|
"set_name": "Set A",
|
||||||
|
"rebrickable_url": "http://r/123-1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": "2016",
|
||||||
|
"known_character": "Ellie Sattler",
|
||||||
|
"set_num": "125-1",
|
||||||
|
"set_id": "125",
|
||||||
|
"set_name": "Set C",
|
||||||
|
"rebrickable_url": "http://r/125-1",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
write_new_character_sets_markdown(destination, rows)
|
||||||
|
|
||||||
|
assert destination.read_text() == (
|
||||||
|
"##### 2015\n\n"
|
||||||
|
"- Owen Grady\n"
|
||||||
|
" - [123](http://r/123-1) - Set A\n"
|
||||||
|
"\n"
|
||||||
|
"##### 2016\n\n"
|
||||||
|
"- Ellie Sattler\n"
|
||||||
|
" - [125](http://r/125-1) - Set C\n"
|
||||||
|
"\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
def test_aggregate_presence_by_year_excludes_figurants(tmp_path: Path) -> None:
|
||||||
"""Calcule le total annuel en excluant les figurants."""
|
"""Calcule le total annuel en excluant les figurants."""
|
||||||
sets_path = tmp_path / "sets_enriched.csv"
|
sets_path = tmp_path / "sets_enriched.csv"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user