From e11e4ee591f113a499335289c91859b84423129a Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Fri, 20 Mar 2026 01:41:27 +0100 Subject: [PATCH] Ajoute un script d'import d'images Wikimedia --- tools/add_wikimedia_image.js | 131 +++++++++++ tools/generate_mermaid_diagrams.js | 62 +---- tools/generate_metadata_files.js | 62 +---- tools/lib/article_move.js | 32 +-- tools/lib/bundles.js | 163 +++++++++++++ tools/lib/wikimedia.js | 355 +++++++++++++++++++++++++++++ tools/tests/bundles.test.js | 42 ++++ tools/tests/wikimedia.test.js | 65 ++++++ 8 files changed, 761 insertions(+), 151 deletions(-) create mode 100644 tools/add_wikimedia_image.js create mode 100644 tools/lib/bundles.js create mode 100644 tools/lib/wikimedia.js create mode 100644 tools/tests/bundles.test.js create mode 100644 tools/tests/wikimedia.test.js diff --git a/tools/add_wikimedia_image.js b/tools/add_wikimedia_image.js new file mode 100644 index 00000000..c4b6d354 --- /dev/null +++ b/tools/add_wikimedia_image.js @@ -0,0 +1,131 @@ +#!/usr/bin/env node + +const fs = require("node:fs"); +const fsPromises = require("node:fs/promises"); +const path = require("node:path"); +const { ensureBundleExists, promptForBundlePath } = require("./lib/bundles"); +const { + extractFileTitleFromUrl, + fetchWikimediaAsset, + downloadFile, +} = require("./lib/wikimedia"); + +const CONTENT_DIR = path.resolve("content"); +const TEMPLATE_PATH = path.resolve("data/metadata_template.yaml"); +const IMAGES_DIR = "images"; +const DATA_IMAGES_DIR = path.join("data", "images"); + +/** + * Affiche l'aide minimale du script. + */ +function showUsage() { + console.error("Usage: node tools/add_wikimedia_image.js [bundle_path]"); +} + +/** + * Charge le modèle YAML utilisé pour les métadonnées de pièces jointes. + * @returns {Promise} Contenu brut du modèle. + */ +async function loadMetadataTemplate() { + return fsPromises.readFile(TEMPLATE_PATH, "utf8"); +} + +/** + * Injecte l'attribution et la description dans le modèle YAML existant. + * @param {string} template Modèle brut. + * @param {{ attribution: string, description: string }} metadata Valeurs à injecter. + * @returns {string} YAML final prêt à être écrit. + */ +function fillMetadataTemplate(template, metadata) { + let output = template; + output = output.replace('#attribution: ""', `attribution: ${JSON.stringify(metadata.attribution)}`); + output = output.replace('#description: ""', `description: ${JSON.stringify(metadata.description)}`); + return output; +} + +/** + * Retourne les chemins finaux de l'image et de son fichier de métadonnées. + * @param {string} bundlePath Bundle ciblé. + * @param {string} fileName Nom de fichier téléchargé. + * @returns {{ imagePath: string, metadataPath: string }} + */ +function buildTargetPaths(bundlePath, fileName) { + const extension = path.extname(fileName); + const baseName = path.basename(fileName, extension); + + return { + imagePath: path.join(bundlePath, IMAGES_DIR, fileName), + metadataPath: path.join(bundlePath, DATA_IMAGES_DIR, `${baseName}.yaml`), + }; +} + +/** + * Vérifie qu'aucun fichier existant ne sera écrasé. + * @param {string} imagePath Chemin final de l'image. + * @param {string} metadataPath Chemin final des métadonnées. + */ +function ensureTargetsDoNotExist(imagePath, metadataPath) { + if (fs.existsSync(imagePath)) { + throw new Error(`L'image ${imagePath} existe déjà.`); + } + + if (fs.existsSync(metadataPath)) { + throw new Error(`Le fichier de métadonnées ${metadataPath} existe déjà.`); + } +} + +/** + * Assure l'existence des dossiers parents nécessaires. + * @param {string} imagePath Chemin final de l'image. + * @param {string} metadataPath Chemin final des métadonnées. + */ +async function ensureTargetDirectories(imagePath, metadataPath) { + await fsPromises.mkdir(path.dirname(imagePath), { recursive: true }); + await fsPromises.mkdir(path.dirname(metadataPath), { recursive: true }); +} + +/** + * Point d'entrée principal du script. + */ +async function main() { + const rawUrl = process.argv[2]; + const manualBundlePath = process.argv[3]; + + if (!rawUrl) { + showUsage(); + process.exit(1); + } + + const bundlePath = await promptForBundlePath(manualBundlePath, { + contentDir: CONTENT_DIR, + prompts: { + confirmLatest(latest) { + return `Utiliser le dernier bundle trouve : ${latest} ? (Y/n) `; + }, + manualPath: "Saisissez le chemin relatif du bundle : ", + }, + }); + + ensureBundleExists(bundlePath); + + const fileTitle = extractFileTitleFromUrl(rawUrl); + console.log(`Recuperation des metadonnees Wikimedia pour ${fileTitle}...`); + + const asset = await fetchWikimediaAsset(fileTitle); + const targets = buildTargetPaths(bundlePath, asset.fileName); + + ensureTargetsDoNotExist(targets.imagePath, targets.metadataPath); + await ensureTargetDirectories(targets.imagePath, targets.metadataPath); + + console.log(`Telechargement de ${asset.fileName}...`); + await downloadFile(asset.imageUrl, targets.imagePath); + + const template = await loadMetadataTemplate(); + const metadataContent = fillMetadataTemplate(template, asset); + await fsPromises.writeFile(targets.metadataPath, metadataContent, "utf8"); + + console.log(`Image enregistree : ${targets.imagePath}`); + console.log(`Metadonnees enregistrees : ${targets.metadataPath}`); +} + +main(); diff --git a/tools/generate_mermaid_diagrams.js b/tools/generate_mermaid_diagrams.js index 3f683b11..b0d40ab2 100644 --- a/tools/generate_mermaid_diagrams.js +++ b/tools/generate_mermaid_diagrams.js @@ -1,50 +1,14 @@ const fs = require('fs/promises'); const path = require('path'); -const readline = require('readline'); const { spawn } = require('child_process'); const os = require('os'); +const { promptForBundlePath } = require('./lib/bundles'); const CONTENT_DIR = path.resolve('content'); const DIAGRAMS_DIR = 'diagrams'; const OUTPUT_DIR = 'images'; const MERMAID_EXTENSION = '.mermaid'; -function askQuestion(query) { - const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); - - return new Promise(resolve => rl.question(query, answer => { rl.close(); resolve(answer.trim()); })); -} - -async function findLatestBundle(dir) { - let latest = { path: null, time: 0 }; - - async function search(current) { - const entries = await fs.readdir(current, { withFileTypes: true }); - - for (const entry of entries) { - const fullPath = path.join(current, entry.name); - - if (entry.isDirectory()) { - const hasIndex = (await fs.readdir(fullPath)).includes('index.md'); - - if (hasIndex) { - const stat = await fs.stat(fullPath); - - if (stat.mtimeMs > latest.time) { - latest = { path: fullPath, time: stat.mtimeMs }; - } - } else { - await search(fullPath); - } - } - } - } - - await search(dir); - - return latest.path; -} - async function directoryExists(dirPath) { try { const stat = await fs.stat(dirPath); @@ -221,29 +185,7 @@ async function generateDiagrams(bundlePath) { async function main() { const manualPath = process.argv[2]; - - let bundle; - - if (manualPath) { - bundle = path.resolve(manualPath); - } else { - const latest = await findLatestBundle(CONTENT_DIR); - - if (!latest) { - console.error('No bundle found in content/.'); - return; - } - - const confirm = await askQuestion(`Use latest bundle found: ${latest}? (Y/n) `); - - if (confirm.toLowerCase() === 'n') { - const inputPath = await askQuestion('Enter the relative path to your bundle: '); - - bundle = path.resolve(inputPath); - } else { - bundle = latest; - } - } + const bundle = await promptForBundlePath(manualPath, { contentDir: CONTENT_DIR }); try { await generateDiagrams(bundle); diff --git a/tools/generate_metadata_files.js b/tools/generate_metadata_files.js index 9ea0406b..754200db 100644 --- a/tools/generate_metadata_files.js +++ b/tools/generate_metadata_files.js @@ -1,48 +1,12 @@ const fs = require('fs/promises'); const fsSync = require('fs'); const path = require('path'); -const readline = require('readline'); +const { promptForBundlePath } = require('./lib/bundles'); const CONTENT_DIR = path.resolve('content'); const TEMPLATE_PATH = path.resolve('data/metadata_template.yaml'); const MEDIA_TYPES = ['images', 'sounds', 'videos']; -function askQuestion(query) { - const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); - - return new Promise(resolve => rl.question(query, answer => { rl.close(); resolve(answer.trim()); })); -} - -async function findLatestBundle(dir) { - let latest = { path: null, time: 0 }; - - async function search(current) { - const entries = await fs.readdir(current, { withFileTypes: true }); - - for (const entry of entries) { - const fullPath = path.join(current, entry.name); - - if (entry.isDirectory()) { - const hasIndex = (await fs.readdir(fullPath)).includes('index.md'); - - if (hasIndex) { - const stat = await fs.stat(fullPath); - - if (stat.mtimeMs > latest.time) { - latest = { path: fullPath, time: stat.mtimeMs }; - } - } else { - await search(fullPath); - } - } - } - } - - await search(dir); - - return latest.path; -} - async function loadTemplate() { return fs.readFile(TEMPLATE_PATH, 'utf8'); } @@ -97,29 +61,7 @@ async function generateYamlFiles(bundlePath, yamlTemplate) { async function main() { const manualPath = process.argv[2]; - - let bundle; - - if (manualPath) { - bundle = path.resolve(manualPath); - } else { - const latest = await findLatestBundle(CONTENT_DIR); - - if (!latest) { - console.error('No bundle found in content/.'); - return; - } - - const confirm = await askQuestion(`Use latest bundle found: ${latest}? (Y/n) `); - - if (confirm.toLowerCase() === 'n') { - const inputPath = await askQuestion('Enter the relative path to your bundle: '); - - bundle = path.resolve(inputPath); - } else { - bundle = latest; - } - } + const bundle = await promptForBundlePath(manualPath, { contentDir: CONTENT_DIR }); const template = await loadTemplate(); diff --git a/tools/lib/article_move.js b/tools/lib/article_move.js index d9957c5a..ce61d5b7 100644 --- a/tools/lib/article_move.js +++ b/tools/lib/article_move.js @@ -1,37 +1,7 @@ const fs = require("node:fs"); const path = require("node:path"); const { readFrontmatterFile, writeFrontmatterFile } = require("./frontmatter"); - -/** - * Résout un chemin source vers le dossier du bundle. - * @param {string} input Chemin fourni par l'utilisateur. - * @returns {string} Chemin absolu du bundle. - */ -function resolveBundlePath(input) { - const resolved = path.resolve(input); - if (resolved.toLowerCase().endsWith(`${path.sep}index.md`)) { - return path.dirname(resolved); - } - return resolved; -} - -/** - * Vérifie la présence d'un bundle Hugo. - * @param {string} bundleDir Chemin absolu du bundle. - */ -function ensureBundleExists(bundleDir) { - if (!fs.existsSync(bundleDir)) { - throw new Error(`Le bundle ${bundleDir} est introuvable.`); - } - const stats = fs.statSync(bundleDir); - if (!stats.isDirectory()) { - throw new Error(`Le bundle ${bundleDir} n'est pas un dossier.`); - } - const indexPath = path.join(bundleDir, "index.md"); - if (!fs.existsSync(indexPath)) { - throw new Error(`Le bundle ${bundleDir} ne contient pas index.md.`); - } -} +const { resolveBundlePath, ensureBundleExists } = require("./bundles"); /** * Vérifie que le chemin reste sous content/. diff --git a/tools/lib/bundles.js b/tools/lib/bundles.js new file mode 100644 index 00000000..43cf0b25 --- /dev/null +++ b/tools/lib/bundles.js @@ -0,0 +1,163 @@ +const fs = require("node:fs"); +const fsPromises = require("node:fs/promises"); +const path = require("node:path"); +const readline = require("node:readline/promises"); +const { stdin, stdout } = require("node:process"); + +/** + * Normalise une entrée utilisateur vers le dossier du bundle. + * @param {string} input Chemin saisi par l'utilisateur. + * @returns {string} Chemin absolu du bundle. + */ +function resolveBundlePath(input) { + if (typeof input !== "string" || !input.trim()) { + throw new Error("Le chemin du bundle est vide."); + } + + const resolved = path.resolve(input); + if (resolved.toLowerCase().endsWith(`${path.sep}index.md`)) { + return path.dirname(resolved); + } + return resolved; +} + +/** + * Vérifie qu'un dossier correspond bien à un bundle Hugo. + * @param {string} bundleDir Chemin absolu du bundle. + */ +function ensureBundleExists(bundleDir) { + if (!fs.existsSync(bundleDir)) { + throw new Error(`Le bundle ${bundleDir} est introuvable.`); + } + + const stats = fs.statSync(bundleDir); + if (!stats.isDirectory()) { + throw new Error(`Le bundle ${bundleDir} n'est pas un dossier.`); + } + + const indexPath = path.join(bundleDir, "index.md"); + if (!fs.existsSync(indexPath)) { + throw new Error(`Le bundle ${bundleDir} ne contient pas index.md.`); + } +} + +/** + * Pose une question simple à l'utilisateur. + * @param {string} query Texte affiché dans le terminal. + * @returns {Promise} Réponse nettoyée. + */ +async function askQuestion(query) { + const rl = readline.createInterface({ input: stdin, output: stdout }); + const answer = await rl.question(query); + rl.close(); + return answer.trim(); +} + +/** + * Cherche le bundle modifié le plus récemment sous un répertoire racine. + * @param {string} rootDir Racine à parcourir. + * @returns {Promise} Chemin absolu du dernier bundle trouvé. + */ +async function findLatestBundle(rootDir) { + let latestPath = null; + let latestTime = 0; + + await walk(rootDir); + + return latestPath; + + /** + * Parcourt récursivement l'arborescence et conserve le bundle le plus récent. + * @param {string} currentDir Dossier en cours d'analyse. + */ + async function walk(currentDir) { + const entries = await fsPromises.readdir(currentDir, { withFileTypes: true }); + let hasIndex = false; + + for (const entry of entries) { + if (entry.isFile() && entry.name === "index.md") { + hasIndex = true; + break; + } + } + + if (hasIndex) { + const stats = await fsPromises.stat(currentDir); + if (stats.mtimeMs > latestTime) { + latestTime = stats.mtimeMs; + latestPath = currentDir; + } + return; + } + + for (const entry of entries) { + if (!entry.isDirectory()) { + continue; + } + + const childDir = path.join(currentDir, entry.name); + await walk(childDir); + } + } +} + +/** + * Résout le bundle cible à partir d'un chemin manuel ou du dernier bundle trouvé. + * @param {string|null|undefined} manualPath Chemin optionnel fourni en argument. + * @param {{ contentDir: string, prompts?: { confirmLatest: Function, manualPath: string } }} options Options de résolution. + * @returns {Promise} Chemin absolu du bundle retenu. + */ +async function promptForBundlePath(manualPath, options) { + let contentDir = path.resolve("content"); + if (options && typeof options.contentDir === "string" && options.contentDir.trim()) { + contentDir = path.resolve(options.contentDir); + } + + const defaultPrompts = { + confirmLatest(latest) { + return `Use latest bundle found: ${latest}? (Y/n) `; + }, + manualPath: "Enter the relative path to your bundle: ", + }; + let prompts = defaultPrompts; + + if (options && options.prompts && typeof options.prompts === "object") { + prompts = { + confirmLatest: defaultPrompts.confirmLatest, + manualPath: defaultPrompts.manualPath, + }; + + if (typeof options.prompts.confirmLatest === "function") { + prompts.confirmLatest = options.prompts.confirmLatest; + } + + if (typeof options.prompts.manualPath === "string" && options.prompts.manualPath.trim()) { + prompts.manualPath = options.prompts.manualPath; + } + } + + if (typeof manualPath === "string" && manualPath.trim()) { + return resolveBundlePath(manualPath); + } + + const latest = await findLatestBundle(contentDir); + if (!latest) { + throw new Error("Aucun bundle n'a été trouvé sous content/."); + } + + const confirm = await askQuestion(prompts.confirmLatest(latest)); + if (confirm.toLowerCase() === "n") { + const inputPath = await askQuestion(prompts.manualPath); + return resolveBundlePath(inputPath); + } + + return latest; +} + +module.exports = { + resolveBundlePath, + ensureBundleExists, + askQuestion, + findLatestBundle, + promptForBundlePath, +}; diff --git a/tools/lib/wikimedia.js b/tools/lib/wikimedia.js new file mode 100644 index 00000000..aaf6cb78 --- /dev/null +++ b/tools/lib/wikimedia.js @@ -0,0 +1,355 @@ +const fs = require("node:fs/promises"); +const path = require("node:path"); +const { fetch } = require("undici"); + +const COMMONS_API_URL = "https://commons.wikimedia.org/w/api.php"; +const COMMONS_HOST = "commons.wikimedia.org"; +const UPLOAD_HOST = "upload.wikimedia.org"; + +/** + * Extrait un titre de fichier MediaWiki depuis une URL Wikipédia ou Commons. + * @param {string} rawUrl URL fournie par l'utilisateur. + * @returns {string} Titre canonique de type `File:Nom.ext`. + */ +function extractFileTitleFromUrl(rawUrl) { + const url = new URL(rawUrl); + const hostname = url.hostname.toLowerCase(); + + if (url.hash) { + const hash = decodeURIComponent(url.hash.slice(1)); + if (hash.startsWith("/media/")) { + const fileTitle = hash.slice("/media/".length); + return normalizeFileTitle(fileTitle); + } + } + + if (pathnameLooksLikeFilePage(url.pathname)) { + const title = decodeURIComponent(url.pathname.slice("/wiki/".length)); + return normalizeFileTitle(title); + } + + if (hostname === UPLOAD_HOST) { + const fileName = decodeURIComponent(path.basename(url.pathname)); + return normalizeFileTitle(`File:${fileName}`); + } + + if (hostname === COMMONS_HOST || hostname.endsWith(".wikipedia.org")) { + throw new Error(`L'URL ${rawUrl} ne pointe pas vers une page de fichier Wikimedia.`); + } + + throw new Error(`L'URL ${rawUrl} n'appartient pas à Wikipédia ou Wikimedia Commons.`); +} + +/** + * Vérifie si un chemin d'URL correspond à une page de fichier MediaWiki. + * @param {string} pathname Partie pathname de l'URL. + * @returns {boolean} `true` si le chemin vise une page de fichier. + */ +function pathnameLooksLikeFilePage(pathname) { + if (!pathname.startsWith("/wiki/")) { + return false; + } + + const decoded = decodeURIComponent(pathname.slice("/wiki/".length)); + if (decoded.startsWith("File:")) { + return true; + } + if (decoded.startsWith("Fichier:")) { + return true; + } + return false; +} + +/** + * Normalise un titre de fichier Wikimedia vers l'espace de noms `File:`. + * @param {string} rawTitle Titre brut extrait d'une URL. + * @returns {string} Titre normalisé. + */ +function normalizeFileTitle(rawTitle) { + const cleaned = rawTitle.trim(); + if (!cleaned) { + throw new Error("Le titre du fichier Wikimedia est vide."); + } + + if (cleaned.startsWith("File:")) { + return cleaned; + } + + if (cleaned.startsWith("Fichier:")) { + return `File:${cleaned.slice("Fichier:".length)}`; + } + + throw new Error(`Le titre ${rawTitle} ne correspond pas à un fichier Wikimedia.`); +} + +/** + * Interroge l'API Commons pour récupérer l'image et ses métadonnées. + * @param {string} fileTitle Titre du fichier ciblé. + * @returns {Promise<{ fileTitle: string, fileName: string, imageUrl: string, descriptionUrl: string, descriptionShortUrl: string, description: string, attribution: string }>} + */ +async function fetchWikimediaAsset(fileTitle) { + const url = new URL(COMMONS_API_URL); + url.searchParams.set("action", "query"); + url.searchParams.set("titles", fileTitle); + url.searchParams.set("prop", "imageinfo"); + url.searchParams.set("iiprop", "url|extmetadata"); + url.searchParams.set("iiextmetadatalanguage", "en"); + url.searchParams.set("iilimit", "1"); + url.searchParams.set("format", "json"); + + const response = await fetch(url, { + headers: { + accept: "application/json", + }, + }); + + if (!response.ok) { + throw new Error(`L'API Wikimedia Commons a répondu ${response.status} pour ${fileTitle}.`); + } + + const data = await response.json(); + return extractAssetFromApiResponse(data); +} + +/** + * Extrait les informations utiles depuis une réponse JSON de l'API Commons. + * @param {Record} data Réponse JSON brute. + * @returns {{ fileTitle: string, fileName: string, imageUrl: string, descriptionUrl: string, descriptionShortUrl: string, description: string, attribution: string }} + */ +function extractAssetFromApiResponse(data) { + if (!data || typeof data !== "object") { + throw new Error("La réponse de l'API Wikimedia Commons est invalide."); + } + + const query = data.query; + if (!query || typeof query !== "object") { + throw new Error("La réponse de l'API Wikimedia Commons ne contient pas de section query."); + } + + const pages = query.pages; + if (!pages || typeof pages !== "object") { + throw new Error("La réponse de l'API Wikimedia Commons ne contient pas de pages."); + } + + const pageIds = Object.keys(pages); + if (pageIds.length === 0) { + throw new Error("La réponse de l'API Wikimedia Commons ne contient aucune page."); + } + + const page = pages[pageIds[0]]; + if (!page || typeof page !== "object") { + throw new Error("La page Wikimedia Commons retournée est invalide."); + } + + if (Object.prototype.hasOwnProperty.call(page, "missing")) { + throw new Error(`Le fichier Wikimedia ${page.title} est introuvable.`); + } + + const imageInfoList = page.imageinfo; + if (!Array.isArray(imageInfoList) || imageInfoList.length === 0) { + throw new Error(`Aucune information image n'a été retournée pour ${page.title}.`); + } + + const imageInfo = imageInfoList[0]; + const extmetadata = imageInfo.extmetadata; + if (!extmetadata || typeof extmetadata !== "object") { + throw new Error(`Les métadonnées étendues sont absentes pour ${page.title}.`); + } + + const imageUrl = imageInfo.url; + const descriptionUrl = imageInfo.descriptionurl; + const descriptionShortUrl = imageInfo.descriptionshorturl; + + if (typeof imageUrl !== "string" || !imageUrl) { + throw new Error(`L'URL de téléchargement est absente pour ${page.title}.`); + } + + if (typeof descriptionUrl !== "string" || !descriptionUrl) { + throw new Error(`L'URL de description est absente pour ${page.title}.`); + } + + if (typeof descriptionShortUrl !== "string" || !descriptionShortUrl) { + throw new Error(`L'URL courte de description est absente pour ${page.title}.`); + } + + const imageDescription = readExtMetadataValue(extmetadata, "ImageDescription"); + const artist = readExtMetadataValue(extmetadata, "Artist"); + const credit = readExtMetadataValue(extmetadata, "Credit"); + const licenseShortName = normalizeLicenseName(readExtMetadataValue(extmetadata, "LicenseShortName")); + const attribution = buildAttribution(artist, credit, licenseShortName, descriptionShortUrl); + const fileName = decodeURIComponent(path.basename(new URL(imageUrl).pathname)); + + if (!imageDescription) { + throw new Error(`La description Wikimedia est absente pour ${page.title}.`); + } + + if (!attribution) { + throw new Error(`L'attribution Wikimedia est absente pour ${page.title}.`); + } + + return { + fileTitle: page.title, + fileName, + imageUrl, + descriptionUrl, + descriptionShortUrl, + description: imageDescription, + attribution, + }; +} + +/** + * Lit un champ extmetadata et le convertit en texte brut. + * @param {Record} extmetadata Métadonnées étendues. + * @param {string} key Nom du champ recherché. + * @returns {string} Valeur nettoyée, éventuellement vide. + */ +function readExtMetadataValue(extmetadata, key) { + const entry = extmetadata[key]; + if (!entry || typeof entry !== "object") { + return ""; + } + + if (typeof entry.value !== "string") { + return ""; + } + + return sanitizeMetadataText(entry.value); +} + +/** + * Nettoie une valeur HTML issue de Commons et la ramène à du texte. + * @param {string} value Valeur brute. + * @returns {string} Texte brut nettoyé. + */ +function sanitizeMetadataText(value) { + let sanitized = decodeHtmlEntities(value); + sanitized = sanitized.replace(//gi, " "); + sanitized = sanitized.replace(/<[^>]+>/g, " "); + sanitized = decodeHtmlEntities(sanitized); + sanitized = sanitized.replace(/\s+/g, " ").trim(); + return sanitized; +} + +/** + * Décode un sous-ensemble suffisant des entités HTML utilisées par Commons. + * @param {string} value Valeur HTML encodée. + * @returns {string} Valeur décodée. + */ +function decodeHtmlEntities(value) { + const namedEntities = { + amp: "&", + apos: "'", + gt: ">", + lt: "<", + nbsp: " ", + quot: "\"", + }; + + let decoded = value.replace(/&#x([0-9a-f]+);/gi, (match, digits) => { + const codePoint = Number.parseInt(digits, 16); + if (!Number.isInteger(codePoint)) { + return match; + } + if (codePoint < 0 || codePoint > 0x10ffff) { + return match; + } + return String.fromCodePoint(codePoint); + }); + + decoded = decoded.replace(/&#([0-9]+);/g, (match, digits) => { + const codePoint = Number.parseInt(digits, 10); + if (!Number.isInteger(codePoint)) { + return match; + } + if (codePoint < 0 || codePoint > 0x10ffff) { + return match; + } + return String.fromCodePoint(codePoint); + }); + + decoded = decoded.replace(/&([a-z]+);/gi, (match, name) => { + const key = name.toLowerCase(); + if (Object.prototype.hasOwnProperty.call(namedEntities, key)) { + return namedEntities[key]; + } + return match; + }); + + return decoded; +} + +/** + * Assemble l'attribution finale telle qu'elle sera écrite dans le YAML. + * @param {string} artist Auteur nettoyé. + * @param {string} credit Crédit nettoyé. + * @param {string} licenseShortName Licence courte. + * @param {string} descriptionShortUrl URL courte Commons. + * @returns {string} Attribution concaténée. + */ +function buildAttribution(artist, credit, licenseShortName, descriptionShortUrl) { + const parts = []; + + let creditLine = ""; + if (artist) { + creditLine = `By ${artist}`; + } + + if (credit) { + if (creditLine) { + creditLine = `${creditLine} - ${credit}`; + } else { + creditLine = credit; + } + } + + if (creditLine) { + parts.push(creditLine); + } + + if (licenseShortName) { + parts.push(licenseShortName); + } + + if (descriptionShortUrl) { + parts.push(descriptionShortUrl); + } + + return parts.join(", "); +} + +/** + * Harmonise certains libellés de licence pour rester cohérente avec l'existant. + * @param {string} licenseShortName Libellé brut fourni par Commons. + * @returns {string} Libellé normalisé. + */ +function normalizeLicenseName(licenseShortName) { + if (licenseShortName === "Public domain") { + return "Public Domain"; + } + return licenseShortName; +} + +/** + * Télécharge un fichier binaire distant sur le disque. + * @param {string} url URL source. + * @param {string} targetPath Chemin cible. + */ +async function downloadFile(url, targetPath) { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Le téléchargement de ${url} a échoué avec le code ${response.status}.`); + } + + const buffer = Buffer.from(await response.arrayBuffer()); + await fs.writeFile(targetPath, buffer); +} + +module.exports = { + extractFileTitleFromUrl, + fetchWikimediaAsset, + extractAssetFromApiResponse, + sanitizeMetadataText, + buildAttribution, + downloadFile, +}; diff --git a/tools/tests/bundles.test.js b/tools/tests/bundles.test.js new file mode 100644 index 00000000..ca924cb1 --- /dev/null +++ b/tools/tests/bundles.test.js @@ -0,0 +1,42 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); +const fs = require("node:fs/promises"); +const os = require("node:os"); +const path = require("node:path"); +const { findLatestBundle, resolveBundlePath } = require("../lib/bundles"); + +/** + * Crée un bundle Hugo minimal pour les tests. + * @param {string} rootDir Racine temporaire. + * @param {string} relativePath Chemin relatif du bundle. + * @returns {Promise} Chemin absolu du bundle créé. + */ +async function createBundle(rootDir, relativePath) { + const bundleDir = path.join(rootDir, relativePath); + await fs.mkdir(bundleDir, { recursive: true }); + await fs.writeFile(path.join(bundleDir, "index.md"), "---\ntitle: Test\ndate: 2026-03-20T12:00:00+01:00\n---\n", "utf8"); + return bundleDir; +} + +test("resolveBundlePath accepte un dossier de bundle ou un index.md", () => { + const bundleDir = path.resolve("content/example/bundle"); + const indexPath = path.join(bundleDir, "index.md"); + + assert.equal(resolveBundlePath(bundleDir), bundleDir); + assert.equal(resolveBundlePath(indexPath), bundleDir); +}); + +test("findLatestBundle retourne le bundle modifie le plus recemment", async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "bundles-test-")); + const olderBundle = await createBundle(tempRoot, "alpha/article-a"); + const newerBundle = await createBundle(tempRoot, "beta/article-b"); + + await fs.utimes(olderBundle, new Date("2026-03-20T10:00:00Z"), new Date("2026-03-20T10:00:00Z")); + await fs.utimes(newerBundle, new Date("2026-03-20T11:00:00Z"), new Date("2026-03-20T11:00:00Z")); + + const latestBundle = await findLatestBundle(tempRoot); + + assert.equal(latestBundle, newerBundle); + + await fs.rm(tempRoot, { recursive: true, force: true }); +}); diff --git a/tools/tests/wikimedia.test.js b/tools/tests/wikimedia.test.js new file mode 100644 index 00000000..87ef6d80 --- /dev/null +++ b/tools/tests/wikimedia.test.js @@ -0,0 +1,65 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); +const { + extractFileTitleFromUrl, + extractAssetFromApiResponse, + sanitizeMetadataText, +} = require("../lib/wikimedia"); + +test("extractFileTitleFromUrl supporte les URLs Commons et les fragments media de Wikipedia", () => { + const commonsUrl = "https://commons.wikimedia.org/wiki/File:IBookG3_Palourde2.png"; + const wikipediaMediaUrl = "https://en.wikipedia.org/wiki/IBook#/media/File:IBookG3_Palourde2.png"; + + assert.equal(extractFileTitleFromUrl(commonsUrl), "File:IBookG3_Palourde2.png"); + assert.equal(extractFileTitleFromUrl(wikipediaMediaUrl), "File:IBookG3_Palourde2.png"); +}); + +test("sanitizeMetadataText decode le HTML de Commons", () => { + const rawValue = "No machine-readable author provided. Ocmey assumed & credited."; + assert.equal( + sanitizeMetadataText(rawValue), + "No machine-readable author provided. Ocmey assumed & credited." + ); +}); + +test("extractAssetFromApiResponse reconstruit l'attribution et la description", () => { + const response = { + query: { + pages: { + "903939": { + title: "File:IBookG3 Palourde2.png", + imageinfo: [ + { + url: "https://upload.wikimedia.org/wikipedia/commons/b/b3/IBookG3_Palourde2.png", + descriptionurl: "https://commons.wikimedia.org/wiki/File:IBookG3_Palourde2.png", + descriptionshorturl: "https://commons.wikimedia.org/w/index.php?curid=903939", + extmetadata: { + ImageDescription: { + value: "iBook G3 Open and Closed", + }, + Credit: { + value: "No machine-readable source provided. Own work assumed (based on copyright claims).", + }, + Artist: { + value: "No machine-readable author provided. Ocmey assumed (based on copyright claims).", + }, + LicenseShortName: { + value: "Public domain", + }, + }, + }, + ], + }, + }, + }, + }; + + const asset = extractAssetFromApiResponse(response); + + assert.equal(asset.fileName, "IBookG3_Palourde2.png"); + assert.equal(asset.description, "iBook G3 Open and Closed"); + assert.equal( + asset.attribution, + "By No machine-readable author provided. Ocmey assumed (based on copyright claims). - No machine-readable source provided. Own work assumed (based on copyright claims)., Public Domain, https://commons.wikimedia.org/w/index.php?curid=903939" + ); +});