#!/usr/bin/env node /** * Synchronise Hugo critique frontmatter with Wikidata metadata. * * The script: * 1. Reads the critique bundle (expects an index.md with YAML frontmatter). * 2. Uses the frontmatter title (or --query) plus the bundle type (film, série, etc.) * to search Wikidata and lets the user confirm the right entity. * 3. Fetches structured data (genres, cast, crew, companies...) according to the * Hugo taxonomies currently available in config/_default/taxonomies.yaml. * 4. Adds the missing taxonomy terms to the frontmatter without removing anything. */ const fs = require("node:fs"); const path = require("node:path"); const yaml = require("js-yaml"); const readline = require("node:readline/promises"); const { stdin, stdout } = require("node:process"); const LANGUAGE_FALLBACK = ["fr", "fr-ca", "fr-fr", "en", "en-gb", "en-ca"]; const MAX_WIKIDATA_IDS_PER_REQUEST = 50; const WIKIDATA_ID_FIELD = "wikidata_id"; const PROJECT_ROOT = path.resolve(__dirname, ".."); const DEFAULT_CRITIQUES_ROOT = path.join(PROJECT_ROOT, "content", "critiques"); const WIKIPEDIA_PREFERRED_LANGS = ["fr", "en"]; const OFFICIAL_SITE_PROPERTY = "P856"; const WIKIDATA_ID_PATTERN = /^Q\d+$/i; const TYPE_CONFIG = { films: { label: "film", queryAugment: "film", descriptionHints: ["film", "movie"], taxonomyMap: { genres: ["P136"], personnalites: ["P57", "P58", "P162", "P86", "P161"], entreprises: ["P272", "P750"], }, roleQualifiers: [{ claim: "P161", qualifier: "P453" }], }, series: { label: "série TV", queryAugment: '"série télévisée"', descriptionHints: ["série", "tv series", "télévisée", "television"], taxonomyMap: { genres: ["P136"], personnalites: ["P57", "P58", "P162", "P86", "P161"], entreprises: ["P272", "P449", "P750"], }, roleQualifiers: [{ claim: "P161", qualifier: "P453" }], }, "jeux-video": { label: "jeu vidéo", queryAugment: '"jeu vidéo"', descriptionHints: ["jeu vidéo", "video game", "jeu-vidéo", "jeu video"], taxonomyMap: { genres: ["P136"], personnalites: ["P57", "P58", "P162", "P86", "P161"], entreprises: ["P178", "P123", "P750"], }, roleQualifiers: [{ claim: "P161", qualifier: "P453" }], }, livres: { label: "livre", queryAugment: "livre", descriptionHints: ["roman", "novel", "book", "livre", "comic", "nouvelle", "script"], taxonomyMap: { genres: ["P136"], personnalites: ["P50", "P110"], entreprises: ["P123"], }, }, }; function parseArgs(argv) { const args = argv.slice(2); const options = { language: "fr", limit: 8, query: null, autoSelect: false, }; const targets = []; for (const arg of args) { if (arg.startsWith("--lang=")) { options.language = arg.slice("--lang=".length).trim() || options.language; } else if (arg.startsWith("--limit=")) { const value = Number.parseInt(arg.slice("--limit=".length), 10); if (!Number.isNaN(value) && value > 0) { options.limit = value; } } else if (arg.startsWith("--query=")) { options.query = arg.slice("--query=".length).trim() || null; } else if (arg === "--auto" || arg === "--yes") { options.autoSelect = true; } else if (arg === "--help" || arg === "-h") { options.help = true; } else if (arg.length > 0) { targets.push(arg); } } return { options, targets }; } function showUsage() { console.log(`Usage: node tools/sync_wiki_metadata.js Options --lang=fr Primary language used for Wikidata labels (default: fr) --limit=8 Max number of Wikidata search results to show --query="..." Override the query derived from the frontmatter title --auto Skip the interactive prompt and pick the first result Notes • Without arguments, every critique bundle under content/critiques is processed. • Provide one or more bundle paths to limit the scope manually. Examples node tools/sync_wiki_metadata.js content/critiques/films/crocodile-dunde-ii node tools/sync_wiki_metadata.js --lang=en --query="Galaxy Quest film" content/critiques/films/galaxy-quest `); } function resolveArticleDir(targetPath) { const absolute = path.resolve(targetPath); if (!fs.existsSync(absolute)) { throw new Error(`Path not found: ${absolute}`); } const stats = fs.statSync(absolute); if (stats.isDirectory()) { return absolute; } if (stats.isFile()) { if (path.basename(absolute) !== "index.md") { throw new Error(`Expected an index.md, got ${absolute}`); } return path.dirname(absolute); } throw new Error(`Unsupported path type: ${absolute}`); } function getIndexPath(articleDir) { const indexPath = path.join(articleDir, "index.md"); if (!fs.existsSync(indexPath)) { throw new Error(`Missing index.md in ${articleDir}`); } return indexPath; } function readFrontmatter(indexPath) { const raw = fs.readFileSync(indexPath, "utf8"); const match = raw.match(/^---\n([\s\S]+?)\n---\n?([\s\S]*)$/); if (!match) { throw new Error(`No valid frontmatter found in ${indexPath}`); } const data = yaml.load(match[1]) || {}; const body = match[2] || ""; return { data, body }; } function detectCritiqueType(articleDir) { const normalized = articleDir.split(path.sep); const idx = normalized.lastIndexOf("critiques"); if (idx === -1 || idx + 1 >= normalized.length) { return null; } return normalized[idx + 1]; } function collectCritiqueBundles(rootDir) { if (!fs.existsSync(rootDir)) { return []; } const bundles = []; const stack = [rootDir]; while (stack.length > 0) { const currentDir = stack.pop(); const entries = fs.readdirSync(currentDir, { withFileTypes: true }); const hasIndex = entries.some((entry) => entry.isFile() && entry.name === "index.md"); if (hasIndex) { bundles.push(currentDir); continue; } for (const entry of entries) { if (!entry.isDirectory()) { continue; } if (entry.name.startsWith(".")) { continue; } const nextPath = path.join(currentDir, entry.name); stack.push(nextPath); } } return bundles.sort((a, b) => a.localeCompare(b, "fr")); } function buildLanguageOrder(primary) { const order = [primary, ...LANGUAGE_FALLBACK]; return order.filter((value, index) => order.indexOf(value) === index); } async function wikidataApi(params) { const url = new URL("https://www.wikidata.org/w/api.php"); for (const [key, value] of Object.entries(params)) { if (value !== undefined && value !== null) { url.searchParams.set(key, value); } } url.searchParams.set("format", "json"); const response = await fetch(url); if (!response.ok) { throw new Error(`Wikidata API error ${response.status}: ${response.statusText}`); } return response.json(); } async function searchEntities(term, typeConfig, options) { const queries = []; if (typeConfig.queryAugment) { queries.push(`${term} ${typeConfig.queryAugment}`); } queries.push(term); for (const query of queries) { const data = await wikidataApi({ action: "wbsearchentities", search: query, language: options.language, uselang: options.language, type: "item", limit: String(options.limit), strictlanguage: false, origin: "*", }); if (!data.search || data.search.length === 0) { continue; } let results = data.search; if (typeConfig.descriptionHints && typeConfig.descriptionHints.length > 0) { const hints = typeConfig.descriptionHints.map((hint) => hint.toLowerCase()); const filtered = results.filter((entry) => { if (!entry.description) { return false; } const desc = entry.description.toLowerCase(); return hints.some((hint) => desc.includes(hint)); }); if (filtered.length > 0) { results = filtered; } } return results; } return []; } async function fetchEntity(entityId, languages) { const data = await wikidataApi({ action: "wbgetentities", ids: entityId, props: "labels|descriptions|claims|sitelinks", languages: languages.join("|"), origin: "*", }); if (!data.entities || !data.entities[entityId]) { throw new Error(`Unable to load Wikidata entity ${entityId}`); } return data.entities[entityId]; } function collectClaimIds(entity, property) { const claims = entity.claims?.[property]; if (!claims) { return []; } const ids = []; for (const claim of claims) { const value = claim.mainsnak?.datavalue?.value; if (value && typeof value === "object" && value.id) { ids.push(value.id); } } return ids; } function collectClaimUrls(entity, property) { const claims = entity.claims?.[property]; if (!claims) { return []; } const urls = []; for (const claim of claims) { const value = claim.mainsnak?.datavalue?.value; if (typeof value === "string") { urls.push(value); } } return [...new Set(urls)]; } function collectRoleIds(entity, roleConfig) { const ids = []; if (!roleConfig) { return ids; } for (const { claim, qualifier } of roleConfig) { const claims = entity.claims?.[claim]; if (!claims) { continue; } for (const entry of claims) { const qualifiers = entry.qualifiers?.[qualifier]; if (!qualifiers) { continue; } for (const qual of qualifiers) { const value = qual.datavalue?.value; if (value && typeof value === "object" && value.id) { ids.push(value.id); } else if (value && typeof value === "object" && value.text) { ids.push(value.text); } } } } return ids; } async function fetchLabels(ids, languages) { const uniqueIds = [...new Set(ids.filter((value) => typeof value === "string" && value.startsWith("Q")))]; const labelMap = {}; if (uniqueIds.length === 0) { return labelMap; } for (let i = 0; i < uniqueIds.length; i += MAX_WIKIDATA_IDS_PER_REQUEST) { const chunk = uniqueIds.slice(i, i + MAX_WIKIDATA_IDS_PER_REQUEST); const data = await wikidataApi({ action: "wbgetentities", ids: chunk.join("|"), props: "labels", languages: languages.join("|"), origin: "*", }); for (const [id, entity] of Object.entries(data.entities || {})) { labelMap[id] = pickLabel(entity.labels, languages) || id; } } return labelMap; } function pickLabel(labels = {}, languages) { for (const lang of languages) { if (labels[lang]) { return labels[lang].value; } } const fallback = Object.values(labels)[0]; return fallback ? fallback.value : null; } function pickWikipediaLink(sitelinks = {}) { for (const lang of WIKIPEDIA_PREFERRED_LANGS) { const key = `${lang}wiki`; const link = sitelinks[key]; if (!link) { continue; } if (link.url) { return { lang, url: link.url }; } if (link.title) { const encodedTitle = encodeURIComponent(link.title.replace(/ /g, "_")); return { lang, url: `https://${lang}.wikipedia.org/wiki/${encodedTitle}` }; } } return null; } function inferLanguageFromUrl(rawUrl) { try { const { hostname, pathname } = new URL(rawUrl); const host = hostname.toLowerCase(); if (host.endsWith(".fr") || host.includes(".fr.")) { return "fr"; } if (host.endsWith(".de")) { return "de"; } if (host.endsWith(".es")) { return "es"; } if (host.endsWith(".it")) { return "it"; } if (host.endsWith(".pt") || host.endsWith(".br")) { return "pt"; } if (host.endsWith(".co.uk") || host.endsWith(".uk") || host.endsWith(".us") || host.endsWith(".com")) { // only infer English when explicitly present in the path if (pathname.toLowerCase().includes("/en/") || pathname.toLowerCase().startsWith("/en")) { return "en"; } return null; } if (pathname.toLowerCase().startsWith("/fr/") || pathname.toLowerCase().includes("/fr/")) { return "fr"; } return null; } catch { return null; } } function buildExternalLinks(entity) { const links = []; const seen = new Set(); const addLink = (entry) => { if (!entry || !entry.url || seen.has(entry.url)) { return; } const normalized = { name: entry.name || "Lien", url: entry.url, }; if (entry.lang) { normalized.lang = entry.lang; } links.push(normalized); seen.add(entry.url); }; const wikiLink = pickWikipediaLink(entity.sitelinks); if (wikiLink) { addLink({ name: "Page Wikipédia", url: wikiLink.url, lang: wikiLink.lang, }); } const officialUrls = collectClaimUrls(entity, OFFICIAL_SITE_PROPERTY); for (const url of officialUrls) { const link = { name: "Site officiel", url, }; const detectedLang = inferLanguageFromUrl(url); if (detectedLang) { link.lang = detectedLang; } addLink(link); } return links; } function buildTaxonomyValues(entity, typeConfig, labelLookup) { const taxonomyData = {}; const addValue = (taxonomy, value) => { if (!value) { return; } if (!taxonomyData[taxonomy]) { taxonomyData[taxonomy] = new Set(); } taxonomyData[taxonomy].add(value); }; for (const [taxonomy, properties] of Object.entries(typeConfig.taxonomyMap)) { for (const property of properties) { const ids = collectClaimIds(entity, property); for (const id of ids) { const label = labelLookup(id); if (!label || WIKIDATA_ID_PATTERN.test(label)) { continue; } addValue(taxonomy, label); } } } if (typeConfig.roleQualifiers) { const roleIds = collectRoleIds(entity, typeConfig.roleQualifiers); for (const roleId of roleIds) { if (typeof roleId === "string" && WIKIDATA_ID_PATTERN.test(roleId)) { const resolved = labelLookup(roleId); if (!resolved || WIKIDATA_ID_PATTERN.test(resolved)) { continue; } addValue("personnages_de_fiction", resolved); } else { addValue("personnages_de_fiction", roleId); } } } return Object.fromEntries( Object.entries(taxonomyData).map(([taxonomy, values]) => [taxonomy, [...values]]) ); } function mergeFrontmatter(frontmatter, newValues) { let updated = false; for (const [taxonomy, values] of Object.entries(newValues)) { if (!values || values.length === 0) { continue; } const list = Array.isArray(frontmatter[taxonomy]) ? [...frontmatter[taxonomy]] : frontmatter[taxonomy] ? [frontmatter[taxonomy]] : []; const existing = new Set(list); let added = 0; for (const value of values) { if (!existing.has(value)) { list.push(value); existing.add(value); added += 1; } } if (added > 0) { list.sort((a, b) => a.localeCompare(b, "fr")); frontmatter[taxonomy] = list; updated = true; console.log(` ↳ Added ${added} value(s) to "${taxonomy}"`); } } return updated; } function mergeLinks(frontmatter, linksToAdd) { if (!linksToAdd || linksToAdd.length === 0) { return false; } if (Object.prototype.hasOwnProperty.call(frontmatter, "links")) { return false; } frontmatter.links = [...linksToAdd]; console.log(` ↳ Added ${linksToAdd.length} link(s) to "links"`); return true; } async function promptForSelection(results, rl) { if (results.length === 1) { const only = results[0]; const answer = await rl.question( `Found a single match: ${only.label} — ${only.description || "sans description"} [${only.id}]. Use it? (Y/n) ` ); if (answer.trim() === "" || /^y(es)?$/i.test(answer.trim())) { return only; } return null; } console.log("Sélectionnez l'œuvre correspondante :"); results.forEach((result, index) => { console.log( ` ${index + 1}. ${result.label} — ${result.description || "sans description"} [${result.id}]` ); }); console.log(" 0. Annuler"); while (true) { const answer = await rl.question("Choix : "); const choice = Number.parseInt(answer, 10); if (!Number.isNaN(choice)) { if (choice === 0) { return null; } if (choice >= 1 && choice <= results.length) { return results[choice - 1]; } } console.log("Veuillez saisir un numéro valide ou 0 pour annuler."); } } async function processCritique(target, options, rl) { const articleDir = resolveArticleDir(target); const typeKey = detectCritiqueType(articleDir); if (!typeKey) { console.log(`⚠️ Impossible de déduire le type pour ${articleDir}. Ignoré.`); return; } const typeConfig = TYPE_CONFIG[typeKey]; if (!typeConfig) { console.log(`⚠️ Type "${typeKey}" non pris en charge pour ${articleDir}.`); return; } const indexPath = getIndexPath(articleDir); const { data: frontmatter, body } = readFrontmatter(indexPath); const storedEntityId = frontmatter[WIKIDATA_ID_FIELD]; const searchTerm = options.query || frontmatter?.title; if (!storedEntityId && !searchTerm) { console.log(`⚠️ Aucun titre trouvé dans ${indexPath}.`); return; } console.log(`\n📄 ${indexPath}`); console.log(` Type détecté : ${typeKey}`); let entityId = storedEntityId; let selection = null; if (entityId) { console.log(` 🆔 Identifiant Wikidata déjà enregistré : ${entityId}`); } else { console.log(` Recherche Wikidata : "${searchTerm}"`); const results = await searchEntities(searchTerm, typeConfig, options); if (!results.length) { console.log(" ❌ Aucun résultat Wikidata trouvé."); return; } if (options.autoSelect) { selection = results[0]; console.log( ` ⚙️ Mode automatique: sélection du premier résultat (${selection.label} — ${selection.description || "sans description"})` ); } else { selection = await promptForSelection(results, rl); } if (!selection) { console.log(" ❎ Sélection annulée."); return; } entityId = selection.id; } const languages = buildLanguageOrder(options.language); const entity = await fetchEntity(entityId, languages); const entityLabel = pickLabel(entity.labels, languages) || selection?.label || entityId; if (storedEntityId) { console.log(` ✔ Entité chargée : ${entityLabel} (${entityId})`); } else { console.log(` ✔ Entité sélectionnée : ${entityLabel} (${entityId})`); } const idsToResolve = new Set(); for (const properties of Object.values(typeConfig.taxonomyMap)) { for (const property of properties) { collectClaimIds(entity, property).forEach((id) => idsToResolve.add(id)); } } if (typeConfig.roleQualifiers) { collectRoleIds(entity, typeConfig.roleQualifiers) .filter((id) => typeof id === "string" && id.startsWith("Q")) .forEach((id) => idsToResolve.add(id)); } const labelMap = await fetchLabels([...idsToResolve], languages); const lookup = (id) => labelMap[id] || id; const taxonomyValues = buildTaxonomyValues(entity, typeConfig, lookup); const externalLinks = buildExternalLinks(entity); let updated = mergeFrontmatter(frontmatter, taxonomyValues); if (mergeLinks(frontmatter, externalLinks)) { updated = true; } if (frontmatter[WIKIDATA_ID_FIELD] !== entityId) { frontmatter[WIKIDATA_ID_FIELD] = entityId; updated = true; console.log(` ↳ Champ ${WIKIDATA_ID_FIELD} ajouté/mis à jour`); } if (!updated) { console.log(" ℹ️ Aucun nouveau terme à ajouter."); return; } const newFrontmatter = yaml.dump(frontmatter, { lineWidth: -1 }); fs.writeFileSync(indexPath, `---\n${newFrontmatter}---\n${body}`, "utf8"); console.log(" 💾 Frontmatter mis à jour."); } async function main() { const { options, targets: cliTargets } = parseArgs(process.argv); if (options.help) { showUsage(); return; } let targets = [...cliTargets]; if (targets.length === 0) { console.log(`🔄 Recherche de critiques dans ${DEFAULT_CRITIQUES_ROOT}...`); targets = collectCritiqueBundles(DEFAULT_CRITIQUES_ROOT); if (targets.length === 0) { console.log("Aucune critique à traiter. Veuillez fournir un chemin explicite."); return; } console.log(` → ${targets.length} critique(s) détectée(s).`); } const rl = options.autoSelect ? null : readline.createInterface({ input: stdin, output: stdout }); try { for (const target of targets) { await processCritique(target, options, rl); } } catch (error) { console.error(`Erreur: ${error.message}`); process.exitCode = 1; } finally { if (rl) { rl.close(); } } } main();