diff --git a/package.json b/package.json index 95972745..d10e215f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,7 @@ { "scripts": { "icons:generate": "node tools/generate_apple_touch_icons.js", + "links:manage-dead": "node tools/manage_dead_links.js", "links:refresh": "node tools/check_external_links.js", "stats:generate": "node tools/generate_stats.js" }, diff --git a/tools/lib/archive.js b/tools/lib/archive.js index 3b4f871d..2c8ec3fe 100644 --- a/tools/lib/archive.js +++ b/tools/lib/archive.js @@ -1,37 +1,167 @@ -const ARCHIVE_API_URL = "https://archive.org/wayback/available?url="; +const { fetch } = require("undici"); + +const ARCHIVE_CDX_URL = "https://web.archive.org/cdx/search/cdx"; const ARCHIVE_SAVE_URL = "https://web.archive.org/save/"; +const ARCHIVE_REQUEST_TIMEOUT_MS = 15000; /** - * Check if a given URL exists in Archive.org. - * @param {string} url - The URL to check. - * @returns {Promise} - The archive URL if found, otherwise null. + * Construit l'URL publique d'une capture Wayback. + * @param {string} originalUrl URL d'origine. + * @param {string} timestamp Horodatage Wayback. + * @returns {string} URL archive.org utilisable directement. + */ +function buildArchiveCaptureUrl(originalUrl, timestamp) { + return `https://web.archive.org/web/${timestamp}/${originalUrl}`; +} + +/** + * Borne une valeur numerique a un entier strictement positif. + * @param {unknown} value Valeur a verifier. + * @param {number} fallback Valeur par defaut. + * @returns {number} Entier positif. + */ +function normalizePositiveInteger(value, fallback) { + const parsed = Number.parseInt(String(value), 10); + if (Number.isNaN(parsed)) { + return fallback; + } + if (parsed <= 0) { + return fallback; + } + return parsed; +} + +/** + * Charge un document JSON Archive.org avec un delai maximal. + * @param {string|URL} url URL a appeler. + * @returns {Promise} Document JSON decode. + */ +async function fetchArchiveJson(url) { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), ARCHIVE_REQUEST_TIMEOUT_MS); + const response = await fetch(url, { signal: controller.signal }).finally(() => clearTimeout(timer)); + if (!response.ok) { + throw new Error(`Erreur de l'API Archive.org (${response.status})`); + } + return response.json(); +} + +/** + * Liste les captures Wayback recentes disponibles pour une URL. + * @param {string} url URL d'origine a rechercher. + * @param {{ limit?: number }} options Options de requete. + * @returns {Promise>} + */ +async function listArchiveCaptures(url, options = {}) { + const limit = normalizePositiveInteger(options.limit, 10); + const requestUrl = new URL(ARCHIVE_CDX_URL); + requestUrl.searchParams.set("url", url); + requestUrl.searchParams.set("output", "json"); + requestUrl.searchParams.set("fl", "timestamp,original,statuscode,mimetype,digest"); + requestUrl.searchParams.set("filter", "statuscode:200"); + requestUrl.searchParams.set("collapse", "digest"); + requestUrl.searchParams.set("fastLatest", "true"); + requestUrl.searchParams.set("limit", `-${limit}`); + + const rows = await fetchArchiveJson(requestUrl); + if (!Array.isArray(rows)) { + return []; + } + if (rows.length <= 1) { + return []; + } + + const header = rows[0]; + if (!Array.isArray(header)) { + return []; + } + + const timestampIndex = header.indexOf("timestamp"); + const originalIndex = header.indexOf("original"); + const statusCodeIndex = header.indexOf("statuscode"); + const mimetypeIndex = header.indexOf("mimetype"); + + const captures = []; + for (const row of rows.slice(1)) { + if (!Array.isArray(row)) { + continue; + } + + const timestamp = row[timestampIndex]; + const originalUrl = row[originalIndex]; + if (typeof timestamp !== "string") { + continue; + } + if (typeof originalUrl !== "string") { + continue; + } + + let statusCode = null; + if (statusCodeIndex > -1) { + const parsedStatusCode = Number.parseInt(row[statusCodeIndex], 10); + if (!Number.isNaN(parsedStatusCode)) { + statusCode = parsedStatusCode; + } + } + + let mimetype = null; + if (mimetypeIndex > -1) { + const rawMimetype = row[mimetypeIndex]; + if (typeof rawMimetype === "string" && rawMimetype.trim()) { + mimetype = rawMimetype.trim(); + } + } + + captures.push({ + timestamp, + originalUrl, + statusCode, + mimetype, + url: buildArchiveCaptureUrl(originalUrl, timestamp), + }); + } + + captures.sort((left, right) => right.timestamp.localeCompare(left.timestamp)); + return captures.slice(0, limit); +} + +/** + * Retourne la capture la plus recente disponible pour une URL. + * @param {string} url URL d'origine. + * @returns {Promise} URL archive.org, ou null si aucune capture n'existe. */ async function getArchiveUrl(url) { - try { - const response = await fetch(`${ARCHIVE_API_URL}${encodeURIComponent(url)}`); - if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`); - const data = await response.json(); - return data.archived_snapshots?.closest?.url || null; - } catch (error) { - console.error(`❌ Archive.org API error: ${error.message}`); + const captures = await listArchiveCaptures(url, { limit: 1 }); + if (captures.length === 0) { return null; } + return captures[0].url; } /** - * Request Archive.org to save the given URL. - * @param {string} url - The URL to archive. - * @returns {Promise} - The permalink of the archived page if successful, otherwise null. + * Demande a Archive.org d'archiver une URL. + * @param {string} url URL a archiver. + * @returns {Promise} URL finale de la capture si disponible. */ async function saveToArchive(url) { - try { - const response = await fetch(`${ARCHIVE_SAVE_URL}${encodeURIComponent(url)}`, { method: "POST" }); - if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`); - return response.url.includes("/save/") ? null : response.url; - } catch (error) { - console.error(`❌ Failed to save URL to Archive.org: ${error.message}`); + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), ARCHIVE_REQUEST_TIMEOUT_MS); + const response = await fetch(`${ARCHIVE_SAVE_URL}${encodeURIComponent(url)}`, { + method: "POST", + signal: controller.signal, + }).finally(() => clearTimeout(timer)); + if (!response.ok) { + throw new Error(`Erreur de sauvegarde Archive.org (${response.status})`); + } + if (response.url.includes("/save/")) { return null; } + return response.url; } -module.exports = { getArchiveUrl, saveToArchive }; +module.exports = { + buildArchiveCaptureUrl, + listArchiveCaptures, + getArchiveUrl, + saveToArchive, +}; diff --git a/tools/lib/content.js b/tools/lib/content.js index ab12911e..fc81a9ad 100644 --- a/tools/lib/content.js +++ b/tools/lib/content.js @@ -92,6 +92,77 @@ async function resolveMarkdownTargets(inputs, { rootDir = process.cwd(), skipInd return Array.from(targets); } +/** + * Collecte tous les fichiers correspondant a une liste d'extensions. + * @param {string} rootDir Racine a parcourir. + * @param {string[]} extensions Extensions attendues, avec le point. + * @param {{ skipDirs?: string[] }} options Options de parcours. + * @returns {Promise} Fichiers trouves, tries par chemin. + */ +async function collectFilesByExtensions(rootDir, extensions, options = {}) { + const normalizedExtensions = new Set(); + for (const extension of extensions) { + if (typeof extension !== "string") { + continue; + } + const candidate = extension.trim().toLowerCase(); + if (!candidate) { + continue; + } + normalizedExtensions.add(candidate); + } + + if (normalizedExtensions.size === 0) { + return []; + } + + const skipDirs = new Set([".git", "node_modules"]); + if (Array.isArray(options.skipDirs)) { + for (const directoryName of options.skipDirs) { + if (typeof directoryName !== "string") { + continue; + } + const candidate = directoryName.trim(); + if (!candidate) { + continue; + } + skipDirs.add(candidate); + } + } + + const files = []; + await walk(rootDir); + files.sort((a, b) => a.localeCompare(b)); + return files; + + async function walk(currentDir) { + const entries = await fs.readdir(currentDir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name); + + if (entry.isDirectory()) { + if (skipDirs.has(entry.name)) { + continue; + } + await walk(fullPath); + continue; + } + + if (!entry.isFile()) { + continue; + } + + const extension = path.extname(entry.name).toLowerCase(); + if (!normalizedExtensions.has(extension)) { + continue; + } + + files.push(fullPath); + } + } +} + async function collectBundles(rootDir) { const bundles = []; await walk(rootDir, rootDir, bundles); @@ -140,5 +211,6 @@ module.exports = { collectMarkdownFiles, collectSectionIndexDirs, resolveMarkdownTargets, + collectFilesByExtensions, collectBundles, }; diff --git a/tools/lib/external_links_report.js b/tools/lib/external_links_report.js new file mode 100644 index 00000000..b7f6f17f --- /dev/null +++ b/tools/lib/external_links_report.js @@ -0,0 +1,198 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const yaml = require("js-yaml"); +const { loadToolsConfig } = require("./config"); + +const DEFAULT_CACHE_DIR = "tools/cache"; +const DEFAULT_CACHE_FILE = "external_links.yaml"; + +/** + * Resout le chemin du rapport des liens externes a partir de la configuration. + * @param {string} siteRoot Racine du projet. + * @returns {Promise} Chemin absolu du rapport YAML. + */ +async function resolveExternalLinksReportPath(siteRoot) { + const rootDir = path.resolve(siteRoot); + const configPath = path.join(rootDir, "tools", "config", "config.json"); + const config = await loadToolsConfig(configPath); + + let cacheDir = DEFAULT_CACHE_DIR; + const externalLinks = config.externalLinks; + if (externalLinks && typeof externalLinks.cacheDir === "string" && externalLinks.cacheDir.trim()) { + cacheDir = externalLinks.cacheDir.trim(); + } + + let cacheFile = DEFAULT_CACHE_FILE; + if (externalLinks && typeof externalLinks.cacheFile === "string" && externalLinks.cacheFile.trim()) { + cacheFile = externalLinks.cacheFile.trim(); + } + + let resolvedCacheDir = cacheDir; + if (!path.isAbsolute(resolvedCacheDir)) { + resolvedCacheDir = path.join(rootDir, resolvedCacheDir); + } + + if (path.isAbsolute(cacheFile)) { + return cacheFile; + } + + return path.join(resolvedCacheDir, cacheFile); +} + +/** + * Normalise la liste des emplacements associes a un lien. + * @param {unknown[]} rawLocations Emplacements bruts. + * @returns {Array<{ file: string, line: number|null, page: string|null }>} + */ +function normalizeLocations(rawLocations) { + if (!Array.isArray(rawLocations)) { + return []; + } + + const locations = []; + for (const rawLocation of rawLocations) { + if (!rawLocation || typeof rawLocation !== "object") { + continue; + } + + let file = null; + if (typeof rawLocation.file === "string" && rawLocation.file.trim()) { + file = rawLocation.file.trim(); + } + if (!file) { + continue; + } + + let line = null; + if (typeof rawLocation.line === "number" && Number.isFinite(rawLocation.line)) { + line = rawLocation.line; + } + + let page = null; + if (typeof rawLocation.page === "string" && rawLocation.page.trim()) { + page = rawLocation.page.trim(); + } + + locations.push({ file, line, page }); + } + + return locations; +} + +/** + * Normalise une entree du rapport. + * @param {unknown} rawLink Entree brute. + * @returns {{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }|null} + */ +function normalizeLink(rawLink) { + if (!rawLink || typeof rawLink !== "object") { + return null; + } + if (typeof rawLink.url !== "string" || !rawLink.url.trim()) { + return null; + } + + let status = null; + if (typeof rawLink.status === "number" && Number.isFinite(rawLink.status)) { + status = rawLink.status; + } + if (typeof rawLink.status === "string" && rawLink.status.trim()) { + const parsedStatus = Number.parseInt(rawLink.status, 10); + if (!Number.isNaN(parsedStatus)) { + status = parsedStatus; + } + } + + return { + url: rawLink.url.trim(), + status, + locations: normalizeLocations(rawLink.locations), + }; +} + +/** + * Reconstitue une liste de liens a partir de la section entries du cache. + * @param {Record} entries Entrees brutes. + * @returns {Array<{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }>} + */ +function buildLinksFromEntries(entries) { + const links = []; + for (const [url, rawEntry] of Object.entries(entries)) { + let status = null; + let locations = null; + if (rawEntry && typeof rawEntry === "object") { + status = rawEntry.status; + locations = rawEntry.locations; + } + const normalized = normalizeLink({ + url, + status, + locations, + }); + if (normalized) { + links.push(normalized); + } + } + return links; +} + +/** + * Charge le rapport des liens externes. + * @param {string} reportPath Chemin absolu ou relatif du rapport YAML. + * @returns {{ generatedAt: string|null, links: Array<{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }> }} + */ +function loadExternalLinksReport(reportPath) { + const resolvedPath = path.resolve(reportPath); + if (!fs.existsSync(resolvedPath)) { + return { generatedAt: null, links: [] }; + } + + const raw = yaml.load(fs.readFileSync(resolvedPath, "utf8")) || {}; + let links = []; + if (Array.isArray(raw.links)) { + for (const rawLink of raw.links) { + const normalized = normalizeLink(rawLink); + if (normalized) { + links.push(normalized); + } + } + } else if (raw.entries && typeof raw.entries === "object") { + links = buildLinksFromEntries(raw.entries); + } + + return { + generatedAt: raw.generatedAt || null, + links, + }; +} + +/** + * Filtre les liens du rapport par code de statut HTTP. + * @param {{ links?: Array<{ status: number|null }> }} report Rapport charge. + * @param {number} statusCode Code a retenir. + * @returns {Array<{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }>} + */ +function getLinksByStatus(report, statusCode) { + if (!report || !Array.isArray(report.links)) { + return []; + } + + const links = []; + for (const link of report.links) { + if (!link || typeof link !== "object") { + continue; + } + if (link.status !== statusCode) { + continue; + } + links.push(link); + } + + return links; +} + +module.exports = { + resolveExternalLinksReportPath, + loadExternalLinksReport, + getLinksByStatus, +}; diff --git a/tools/lib/url_replacements.js b/tools/lib/url_replacements.js new file mode 100644 index 00000000..0ed6881c --- /dev/null +++ b/tools/lib/url_replacements.js @@ -0,0 +1,92 @@ +const fs = require("node:fs/promises"); +const path = require("node:path"); +const { collectFilesByExtensions } = require("./content"); + +const DEFAULT_URL_TEXT_EXTENSIONS = Object.freeze([ + ".json", + ".markdown", + ".md", + ".yaml", + ".yml", +]); + +/** + * Compte les occurrences exactes d'une chaine dans un texte. + * @param {string} text Texte a analyser. + * @param {string} needle Chaine recherchee. + * @returns {number} Nombre d'occurrences trouvees. + */ +function countOccurrences(text, needle) { + if (typeof text !== "string") { + return 0; + } + if (typeof needle !== "string" || !needle) { + return 0; + } + return text.split(needle).length - 1; +} + +/** + * Retourne la liste des fichiers textuels contenant une URL donnee. + * @param {string} rootDir Racine a parcourir. + * @param {string} targetUrl URL a rechercher. + * @param {{ extensions?: string[] }} options Options de recherche. + * @returns {Promise>} + */ +async function findUrlOccurrences(rootDir, targetUrl, options = {}) { + let extensions = DEFAULT_URL_TEXT_EXTENSIONS; + if (Array.isArray(options.extensions)) { + extensions = options.extensions; + } + const files = await collectFilesByExtensions(rootDir, extensions); + const matches = []; + + for (const filePath of files) { + const content = await fs.readFile(filePath, "utf8"); + const occurrences = countOccurrences(content, targetUrl); + if (occurrences <= 0) { + continue; + } + matches.push({ filePath, occurrences }); + } + + return matches; +} + +/** + * Remplace toutes les occurrences exactes d'une URL dans une liste de fichiers. + * @param {string} rootDir Racine de recherche. + * @param {string} targetUrl URL a remplacer. + * @param {string} replacementUrl URL de remplacement. + * @param {{ extensions?: string[], matches?: Array<{ filePath: string, occurrences: number }> }} options Options d'ecriture. + * @returns {Promise<{ changedFiles: string[], totalOccurrences: number }>} + */ +async function replaceUrlInFiles(rootDir, targetUrl, replacementUrl, options = {}) { + let matches = []; + if (Array.isArray(options.matches)) { + matches = options.matches; + } else { + matches = await findUrlOccurrences(rootDir, targetUrl, options); + } + + const changedFiles = []; + let totalOccurrences = 0; + + for (const match of matches) { + const filePath = path.resolve(match.filePath); + const content = await fs.readFile(filePath, "utf8"); + const updatedContent = content.split(targetUrl).join(replacementUrl); + await fs.writeFile(filePath, updatedContent, "utf8"); + changedFiles.push(filePath); + totalOccurrences += match.occurrences; + } + + return { changedFiles, totalOccurrences }; +} + +module.exports = { + DEFAULT_URL_TEXT_EXTENSIONS, + countOccurrences, + findUrlOccurrences, + replaceUrlInFiles, +}; diff --git a/tools/manage_dead_links.js b/tools/manage_dead_links.js new file mode 100644 index 00000000..393a4a29 --- /dev/null +++ b/tools/manage_dead_links.js @@ -0,0 +1,508 @@ +#!/usr/bin/env node + +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); +const readline = require("node:readline/promises"); +const { stdin, stdout } = require("node:process"); +const { DateTime } = require("luxon"); +const { listArchiveCaptures } = require("./lib/archive"); +const { + resolveExternalLinksReportPath, + loadExternalLinksReport, + getLinksByStatus, +} = require("./lib/external_links_report"); +const { findUrlOccurrences, replaceUrlInFiles } = require("./lib/url_replacements"); + +const PROJECT_ROOT = path.resolve(__dirname, ".."); +const DEFAULT_CONTENT_DIR = path.join(PROJECT_ROOT, "content"); +const DEFAULT_STATUS_CODE = 404; + +/** + * Convertit une reponse utilisateur en boolen simple. + * @param {string} answer Reponse brute. + * @returns {boolean} true si la reponse signifie oui. + */ +function isYes(answer) { + if (typeof answer !== "string") { + return false; + } + const normalized = answer.trim().toLowerCase(); + return normalized === "o" || normalized === "oui" || normalized === "y" || normalized === "yes"; +} + +/** + * Resout un chemin CLI par rapport a la racine du projet. + * @param {string} value Valeur fournie en argument. + * @returns {string} Chemin absolu. + */ +function resolveCliPath(value) { + if (typeof value !== "string" || !value.trim()) { + throw new Error("Le chemin fourni est vide."); + } + const trimmed = value.trim(); + if (path.isAbsolute(trimmed)) { + return trimmed; + } + return path.resolve(PROJECT_ROOT, trimmed); +} + +/** + * Analyse les arguments de la ligne de commande. + * @param {string[]} argv Arguments bruts. + * @returns {{ help: boolean, refresh: boolean, contentDir: string, reportPath: string|null }} + */ +function parseArgs(argv) { + const options = { + help: false, + refresh: true, + contentDir: DEFAULT_CONTENT_DIR, + reportPath: null, + }; + + for (const arg of argv.slice(2)) { + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--no-refresh") { + options.refresh = false; + continue; + } + if (arg.startsWith("--content-dir=")) { + options.contentDir = resolveCliPath(arg.slice("--content-dir=".length)); + continue; + } + if (arg.startsWith("--report-path=")) { + options.reportPath = resolveCliPath(arg.slice("--report-path=".length)); + continue; + } + throw new Error(`Argument non pris en charge : ${arg}`); + } + + return options; +} + +/** + * Affiche l'aide du script. + */ +function showUsage() { + console.log(`Utilisation : node tools/manage_dead_links.js [options] + +Options + --no-refresh Réutilise le rapport existant au lieu de relancer la vérification + --content-dir= Racine à parcourir pour les remplacements + --report-path= Rapport YAML à lire + --help, -h Affiche cette aide + +Notes + - Par défaut, le script relance tools/check_external_links.js avant le traitement. + - Les remplacements portent sur les fichiers .md, .markdown, .json, .yaml et .yml. + - L'action de suppression est réservée pour plus tard et n'est pas encore implémentée.`); +} + +/** + * Verifie que l'on ne combine pas des options incompatibles. + * @param {{ refresh: boolean, contentDir: string, reportPath: string|null }} options Options retenues. + */ +function validateOptions(options) { + const usesCustomContentDir = path.resolve(options.contentDir) !== path.resolve(DEFAULT_CONTENT_DIR); + const usesCustomReportPath = options.reportPath !== null; + if (!options.refresh) { + return; + } + if (usesCustomContentDir || usesCustomReportPath) { + throw new Error( + "Les options --content-dir et --report-path nécessitent --no-refresh, car le contrôleur de liens actuel travaille sur le projet courant." + ); + } +} + +/** + * Relance la generation du rapport des liens externes. + */ +function refreshExternalLinksReport() { + const scriptPath = path.join(PROJECT_ROOT, "tools", "check_external_links.js"); + const result = spawnSync(process.execPath, [scriptPath], { + cwd: PROJECT_ROOT, + stdio: "inherit", + }); + + if (result.error) { + throw result.error; + } + if (result.status !== 0) { + throw new Error("La mise à jour du rapport des liens externes a échoué."); + } +} + +/** + * Formate un horodatage Archive.org. + * @param {string} timestamp Horodatage brut. + * @returns {string} Representation lisible. + */ +function formatArchiveTimestamp(timestamp) { + if (typeof timestamp !== "string") { + return "horodatage inconnu"; + } + const date = DateTime.fromFormat(timestamp, "yyyyLLddHHmmss", { zone: "utc" }); + if (!date.isValid) { + return timestamp; + } + return date.setLocale("fr").toFormat("dd/LL/yyyy HH:mm:ss 'UTC'"); +} + +/** + * Rend un chemin plus lisible pour la console. + * @param {string} filePath Chemin absolu ou relatif. + * @returns {string} Chemin affiche. + */ +function formatFilePath(filePath) { + const resolvedPath = path.resolve(filePath); + const relativePath = path.relative(PROJECT_ROOT, resolvedPath); + if (relativePath && !relativePath.startsWith("..")) { + return relativePath; + } + return resolvedPath; +} + +/** + * Affiche les emplacements connus pour un lien mort. + * @param {{ locations?: Array<{ file: string, line: number|null }> }} link Lien courant. + */ +function showLocations(link) { + let locations = []; + if (Array.isArray(link.locations)) { + locations = link.locations; + } + if (locations.length === 0) { + console.log("Emplacements connus : aucun"); + return; + } + + console.log("Emplacements connus :"); + for (const location of locations) { + let label = ` - ${location.file}`; + if (typeof location.line === "number" && Number.isFinite(location.line)) { + label += `:${location.line}`; + } + console.log(label); + } +} + +/** + * Affiche les actions disponibles pour le lien courant. + * @param {boolean} allowArchive Indique si Archive.org reste propose. + */ +function showActions(allowArchive) { + console.log("Actions :"); + if (allowArchive) { + console.log(" 1. Remplacer par une capture Archive.org"); + } + console.log(" 2. Remplacer par une autre URL"); + console.log(" 3. Supprimer le lien (non implémenté)"); + console.log(" s. Passer au lien suivant"); + console.log(" q. Quitter"); +} + +/** + * Demande l'action a executer pour un lien. + * @param {readline.Interface} rl Interface readline. + * @param {boolean} allowArchive Indique si l'action Archive.org est disponible. + * @returns {Promise<"archive"|"custom"|"remove"|"skip"|"quit">} + */ +async function promptAction(rl, allowArchive) { + while (true) { + showActions(allowArchive); + const answer = (await rl.question("> ")).trim().toLowerCase(); + + if (answer === "1" && allowArchive) { + return "archive"; + } + if (answer === "2") { + return "custom"; + } + if (answer === "3") { + return "remove"; + } + if (answer === "s") { + return "skip"; + } + if (answer === "q") { + return "quit"; + } + + console.log("Choix invalide."); + } +} + +/** + * Propose une selection de captures Wayback a l'utilisateur. + * @param {readline.Interface} rl Interface readline. + * @param {string} deadUrl URL d'origine. + * @returns {Promise<{ type: "selected", replacementUrl: string }|{ type: "unavailable" }|{ type: "cancel" }>} + */ +async function promptArchiveReplacement(rl, deadUrl) { + const captures = await listArchiveCaptures(deadUrl, { limit: 10 }); + + if (captures.length === 0) { + console.log("Aucune capture Archive.org exploitable n'a été trouvée pour cette URL."); + return { type: "unavailable" }; + } + + if (captures.length === 1) { + const capture = captures[0]; + console.log("Une capture Archive.org a été trouvée :"); + console.log(` - ${formatArchiveTimestamp(capture.timestamp)}`); + console.log(` ${capture.url}`); + const confirm = await rl.question("Utiliser cette capture ? (o/N) "); + if (isYes(confirm)) { + return { type: "selected", replacementUrl: capture.url }; + } + return { type: "cancel" }; + } + + console.log("Captures Archive.org disponibles (10 plus récentes) :"); + for (const [index, capture] of captures.entries()) { + console.log(` ${index + 1}. ${formatArchiveTimestamp(capture.timestamp)}`); + console.log(` ${capture.url}`); + } + + while (true) { + const answer = ( + await rl.question(`Choisissez une capture (1-${captures.length}) ou Entrée pour revenir au menu : `) + ).trim(); + if (!answer) { + return { type: "cancel" }; + } + + const selectedIndex = Number.parseInt(answer, 10); + if (Number.isNaN(selectedIndex)) { + console.log("Sélection invalide."); + continue; + } + if (selectedIndex < 1 || selectedIndex > captures.length) { + console.log("Sélection hors plage."); + continue; + } + + const capture = captures[selectedIndex - 1]; + return { type: "selected", replacementUrl: capture.url }; + } +} + +/** + * Demande une URL de remplacement manuelle. + * @param {readline.Interface} rl Interface readline. + * @param {string} deadUrl URL remplacee. + * @returns {Promise} URL choisie ou null si abandon. + */ +async function promptCustomReplacement(rl, deadUrl) { + while (true) { + const answer = (await rl.question("Nouvelle URL (laisser vide pour revenir au menu) : ")).trim(); + if (!answer) { + return null; + } + + if (!URL.canParse(answer)) { + console.log("Cette URL n'est pas valide."); + continue; + } + const parsed = new URL(answer); + const protocol = parsed.protocol.toLowerCase(); + if (protocol !== "http:" && protocol !== "https:") { + console.log("Seules les URL http et https sont acceptées."); + continue; + } + if (answer === deadUrl) { + console.log("La nouvelle URL est identique à l'ancienne."); + continue; + } + + return answer; + } +} + +/** + * Affiche le plan de remplacement avant confirmation. + * @param {{ url: string }} link Lien traite. + * @param {string} replacementUrl URL de destination. + * @param {Array<{ filePath: string, occurrences: number }>} matches Fichiers concernes. + * @param {readline.Interface} rl Interface readline. + * @returns {Promise} true si l'utilisateur confirme. + */ +async function confirmReplacement(link, replacementUrl, matches, rl) { + let totalOccurrences = 0; + for (const match of matches) { + totalOccurrences += match.occurrences; + } + + console.log(`Remplacement prévu : ${link.url}`); + console.log(` vers ${replacementUrl}`); + console.log(`Occurrences : ${totalOccurrences} dans ${matches.length} fichier(s)`); + + for (const match of matches.slice(0, 10)) { + console.log(` - ${formatFilePath(match.filePath)} (${match.occurrences})`); + } + if (matches.length > 10) { + console.log(` - ... ${matches.length - 10} fichier(s) supplémentaire(s)`); + } + + const answer = await rl.question("Confirmer le remplacement ? (o/N) "); + return isYes(answer); +} + +/** + * Applique un remplacement deja choisi. + * @param {{ url: string }} link Lien courant. + * @param {string} replacementUrl URL de remplacement. + * @param {string} contentDir Racine de recherche. + * @param {readline.Interface} rl Interface readline. + * @returns {Promise} true si des fichiers ont ete modifies. + */ +async function applyReplacement(link, replacementUrl, contentDir, rl) { + const matches = await findUrlOccurrences(contentDir, link.url); + if (matches.length === 0) { + console.log("Aucune occurrence exacte n'a été trouvée dans le contenu cible."); + return false; + } + + const confirmed = await confirmReplacement(link, replacementUrl, matches, rl); + if (!confirmed) { + console.log("Remplacement annulé."); + return false; + } + + const result = await replaceUrlInFiles(contentDir, link.url, replacementUrl, { matches }); + console.log( + `${result.totalOccurrences} occurrence(s) remplacee(s) dans ${result.changedFiles.length} fichier(s).` + ); + return result.changedFiles.length > 0; +} + +/** + * Traite un lien 404 dans une boucle interactive. + * @param {readline.Interface} rl Interface readline. + * @param {{ url: string, locations?: Array<{ file: string, line: number|null }> }} link Lien a gerer. + * @param {number} index Index humain. + * @param {number} total Nombre total de liens. + * @param {string} contentDir Racine du contenu. + * @returns {Promise<"changed"|"skipped"|"quit">} + */ +async function processLink(rl, link, index, total, contentDir) { + let allowArchive = true; + + while (true) { + console.log(""); + console.log(`[${index}/${total}] Lien 404`); + console.log(link.url); + showLocations(link); + + const action = await promptAction(rl, allowArchive); + + if (action === "quit") { + return "quit"; + } + if (action === "skip") { + return "skipped"; + } + if (action === "remove") { + console.log("La suppression n'est pas encore implémentée. Retour au menu."); + continue; + } + if (action === "custom") { + const replacementUrl = await promptCustomReplacement(rl, link.url); + if (!replacementUrl) { + continue; + } + const changed = await applyReplacement(link, replacementUrl, contentDir, rl); + if (changed) { + return "changed"; + } + continue; + } + if (action === "archive") { + const archiveSelection = await promptArchiveReplacement(rl, link.url); + if (archiveSelection.type === "unavailable") { + allowArchive = false; + continue; + } + if (archiveSelection.type === "cancel") { + continue; + } + const changed = await applyReplacement(link, archiveSelection.replacementUrl, contentDir, rl); + if (changed) { + return "changed"; + } + continue; + } + } +} + +/** + * Charge la liste des liens 404 a traiter. + * @param {{ reportPath: string|null }} options Options du script. + * @returns {Promise }>>} + */ +async function load404Links(options) { + let reportPath = options.reportPath; + if (!reportPath) { + reportPath = await resolveExternalLinksReportPath(PROJECT_ROOT); + } + const report = loadExternalLinksReport(reportPath); + return getLinksByStatus(report, DEFAULT_STATUS_CODE); +} + +async function main() { + const options = parseArgs(process.argv); + if (options.help) { + showUsage(); + return; + } + + validateOptions(options); + + if (options.refresh) { + console.log("Actualisation du rapport des liens externes..."); + refreshExternalLinksReport(); + } + + const links = await load404Links(options); + if (links.length === 0) { + console.log("Aucun lien 404 à traiter."); + return; + } + + const rl = readline.createInterface({ input: stdin, output: stdout }); + let changedCount = 0; + let skippedCount = 0; + const interactiveRun = async () => { + for (const [index, link] of links.entries()) { + const outcome = await processLink(rl, link, index + 1, links.length, options.contentDir); + if (outcome === "quit") { + break; + } + if (outcome === "changed") { + changedCount += 1; + continue; + } + skippedCount += 1; + } + }; + await interactiveRun().finally(() => rl.close()); + + console.log(""); + console.log(`Traitement terminé : ${changedCount} lien(s) traité(s), ${skippedCount} lien(s) ignoré(s).`); + + if (changedCount > 0 && options.refresh) { + console.log("Régénération du rapport après modifications..."); + refreshExternalLinksReport(); + } + if (changedCount > 0 && !options.refresh) { + console.log("Le rapport n'a pas été régénéré car --no-refresh est actif."); + } +} + +main().catch((error) => { + console.error("Erreur lors de la gestion des liens morts :", error.message); + process.exitCode = 1; +}); diff --git a/tools/tests/dead_links_tools.test.js b/tools/tests/dead_links_tools.test.js new file mode 100644 index 00000000..5af9f500 --- /dev/null +++ b/tools/tests/dead_links_tools.test.js @@ -0,0 +1,93 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); +const fs = require("node:fs/promises"); +const os = require("node:os"); +const path = require("node:path"); +const { findUrlOccurrences, replaceUrlInFiles } = require("../lib/url_replacements"); +const { loadExternalLinksReport, getLinksByStatus } = require("../lib/external_links_report"); + +/** + * Ecrit un fichier texte en creant son dossier parent. + * @param {string} filePath Chemin absolu du fichier. + * @param {string} content Contenu a ecrire. + */ +async function writeFixture(filePath, content) { + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.writeFile(filePath, content, "utf8"); +} + +test("replaceUrlInFiles remplace les occurrences exactes dans markdown, yaml et json", async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "dead-links-replace-")); + const contentRoot = path.join(tempRoot, "content"); + const deadUrl = "https://dead.example.com/page"; + const replacementUrl = "https://archive.example.com/page"; + + await writeFixture( + path.join(contentRoot, "article", "index.md"), + `Lien [mort](${deadUrl})\nEncore ${deadUrl}\n` + ); + await writeFixture( + path.join(contentRoot, "article", "data", "meta.yaml"), + `source: "${deadUrl}"\n` + ); + await writeFixture( + path.join(contentRoot, "stats", "data.json"), + JSON.stringify({ url: deadUrl, untouched: "https://ok.example.com" }, null, 2) + ); + + const matches = await findUrlOccurrences(contentRoot, deadUrl); + assert.deepStrictEqual( + matches.map((match) => [path.relative(contentRoot, match.filePath), match.occurrences]), + [ + ["article/data/meta.yaml", 1], + ["article/index.md", 2], + ["stats/data.json", 1], + ] + ); + + const result = await replaceUrlInFiles(contentRoot, deadUrl, replacementUrl, { matches }); + assert.equal(result.totalOccurrences, 4); + assert.equal(result.changedFiles.length, 3); + + const markdown = await fs.readFile(path.join(contentRoot, "article", "index.md"), "utf8"); + const yaml = await fs.readFile(path.join(contentRoot, "article", "data", "meta.yaml"), "utf8"); + const json = await fs.readFile(path.join(contentRoot, "stats", "data.json"), "utf8"); + + assert.ok(markdown.includes(replacementUrl)); + assert.ok(!markdown.includes(deadUrl)); + assert.ok(yaml.includes(replacementUrl)); + assert.ok(json.includes(replacementUrl)); + + await fs.rm(tempRoot, { recursive: true, force: true }); +}); + +test("loadExternalLinksReport retourne correctement les liens 404 du cache YAML", async () => { + const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "dead-links-cli-")); + const reportPath = path.join(tempRoot, "external_links.yaml"); + const deadUrl = "https://dead.example.com/article"; + await writeFixture( + reportPath, + [ + "generatedAt: '2026-03-25T21:00:00.000Z'", + "links:", + ` - url: ${deadUrl}`, + " status: 404", + " locations:", + " - file: content/demo/index.md", + " line: 5", + " page: /demo", + "", + ].join("\n") + ); + + const report = loadExternalLinksReport(reportPath); + const deadLinks = getLinksByStatus(report, 404); + + assert.equal(report.generatedAt, "2026-03-25T21:00:00.000Z"); + assert.equal(deadLinks.length, 1); + assert.equal(deadLinks[0].url, deadUrl); + assert.equal(deadLinks[0].locations[0].file, "content/demo/index.md"); + assert.equal(deadLinks[0].locations[0].line, 5); + + await fs.rm(tempRoot, { recursive: true, force: true }); +});