#!/usr/bin/env node const fs = require("fs"); const path = require("path"); const yaml = require("js-yaml"); const { buildUserAgent, probeUrl, shouldRetry } = require("./lib/http"); const { collectMarkdownLinksFromFile, extractLinksFromText, } = require("./lib/markdown_links"); const SITE_ROOT = path.resolve(__dirname, ".."); const CONTENT_DIR = path.join(SITE_ROOT, "content"); const CONFIG_PATH = path.join(__dirname, "config", "config.json"); const DAY_MS = 24 * 60 * 60 * 1000; const DEFAULT_CONFIG = { cacheDir: path.join(__dirname, "cache"), cacheFile: "external_links.yaml", hostDelayMs: 2000, retryDelayMs: 5000, requestTimeoutSeconds: 5, cacheTtlSuccessDays: 30, cacheTtlClientErrorDays: 7, cacheTtlServerErrorDays: 1, cacheTtlTimeoutDays: 7, maxConcurrentHosts: 4, maxRedirects: 5, userAgent: null, }; function loadConfig() { if (!fs.existsSync(CONFIG_PATH)) { return {}; } try { return JSON.parse(fs.readFileSync(CONFIG_PATH, "utf8")); } catch (error) { console.warn( `Impossible de parser ${path.relative(SITE_ROOT, CONFIG_PATH)} (${error.message}).` ); return {}; } } const rawConfig = loadConfig(); const settings = { ...DEFAULT_CONFIG, ...(rawConfig.externalLinks || {}), }; const CACHE_DIR = path.isAbsolute(settings.cacheDir) ? settings.cacheDir : path.resolve(SITE_ROOT, settings.cacheDir); const REPORT_PATH = path.isAbsolute(settings.cacheFile) ? settings.cacheFile : path.join(CACHE_DIR, settings.cacheFile); const HOST_DELAY_MS = Math.max(0, Number(settings.hostDelayMs) || 0); const RETRY_DELAY_MS = Math.max(0, Number(settings.retryDelayMs) || 0); const REQUEST_TIMEOUT_SECONDS = Math.max(1, Number(settings.requestTimeoutSeconds) || 5); const REQUEST_TIMEOUT_MS = REQUEST_TIMEOUT_SECONDS * 1000; const MAX_CONCURRENT_HOSTS = Math.max( 1, Number.isFinite(Number(settings.maxConcurrentHosts)) ? Number(settings.maxConcurrentHosts) : DEFAULT_CONFIG.maxConcurrentHosts ); const MAX_REDIRECTS = Math.max( 0, Number.isFinite(Number(settings.maxRedirects)) ? Number(settings.maxRedirects) : DEFAULT_CONFIG.maxRedirects ); const DEFAULT_USER_AGENT = buildUserAgent(settings.userAgent); const CACHE_TTL_SUCCESS_MS = daysToMs( pickNumber(settings.cacheTtlSuccessDays, DEFAULT_CONFIG.cacheTtlSuccessDays) ); const CACHE_TTL_CLIENT_ERROR_MS = daysToMs( pickNumber(settings.cacheTtlClientErrorDays, DEFAULT_CONFIG.cacheTtlClientErrorDays) ); const CACHE_TTL_SERVER_ERROR_MS = daysToMs( pickNumber(settings.cacheTtlServerErrorDays, DEFAULT_CONFIG.cacheTtlServerErrorDays) ); const CACHE_TTL_TIMEOUT_MS = daysToMs( pickNumber(settings.cacheTtlTimeoutDays, DEFAULT_CONFIG.cacheTtlTimeoutDays) ); fs.mkdirSync(CACHE_DIR, { recursive: true }); const BASE_HTTP_OPTIONS = { userAgent: DEFAULT_USER_AGENT, timeoutMs: REQUEST_TIMEOUT_MS, maxRedirects: MAX_REDIRECTS, }; function pickNumber(value, fallback) { const parsed = Number(value); if (Number.isFinite(parsed)) { return parsed; } return fallback; } function daysToMs(days) { if (!Number.isFinite(days) || days <= 0) { return 0; } return days * DAY_MS; } function ensureDirectoryExists(targetFile) { fs.mkdirSync(path.dirname(targetFile), { recursive: true }); } function toPosix(relativePath) { return typeof relativePath === "string" ? relativePath.split(path.sep).join("/") : relativePath; } function relativeToSite(filePath) { return toPosix(path.relative(SITE_ROOT, filePath)); } function toPagePath(relativeContentPath) { if (!relativeContentPath) return null; let normalized = toPosix(relativeContentPath); if (!normalized) return null; normalized = normalized.replace(/^content\//, ""); if (!normalized) { return "/"; } normalized = normalized.replace(/\/index\.md$/i, ""); normalized = normalized.replace(/\/_index\.md$/i, ""); normalized = normalized.replace(/\.md$/i, ""); normalized = normalized.replace(/\/+/g, "/"); normalized = normalized.replace(/\/+$/, ""); normalized = normalized.replace(/^\/+/, ""); if (!normalized) { return "/"; } return `/${normalized}`; } function deriveBundlePagePath(contentRelative) { if (!contentRelative) return null; const bundleRoot = contentRelative.replace(/\/data\/.*$/, ""); const candidates = [`${bundleRoot}/index.md`, `${bundleRoot}/_index.md`]; for (const candidate of candidates) { const absolute = path.join(CONTENT_DIR, candidate); if (fs.existsSync(absolute)) { return toPagePath(candidate); } } return toPagePath(bundleRoot); } function derivePagePath(relativeFile) { if (typeof relativeFile !== "string") return null; const normalized = toPosix(relativeFile); if (!normalized || !normalized.startsWith("content/")) return null; const contentRelative = normalized.slice("content/".length); if (contentRelative.includes("/data/")) { return deriveBundlePagePath(contentRelative); } return toPagePath(contentRelative); } function loadState() { if (!fs.existsSync(REPORT_PATH)) { return { generatedAt: null, links: [], entries: {} }; } try { const payload = yaml.load(fs.readFileSync(REPORT_PATH, "utf8")) || {}; if (payload.entries && typeof payload.entries === "object") { return { generatedAt: payload.generatedAt || null, links: Array.isArray(payload.links) ? payload.links : [], entries: normalizeEntries(payload.entries), }; } return { generatedAt: payload.generatedAt || null, links: Array.isArray(payload.links) ? payload.links : [], entries: normalizeEntries(payload), }; } catch (error) { console.warn( `Impossible de lire ${path.relative(SITE_ROOT, REPORT_PATH)} (${error.message}).` ); return { generatedAt: null, links: [], entries: {} }; } } function normalizeEntries(rawEntries) { const normalized = {}; if (!rawEntries || typeof rawEntries !== "object") { return normalized; } for (const [url, data] of Object.entries(rawEntries)) { if (!url.includes("://")) { continue; } normalized[url] = normalizeEntryShape(url, data); } return normalized; } function normalizeEntryShape(url, raw) { const checkedAt = raw?.checkedAt || raw?.checked || null; const locations = normalizeLocations(raw?.locations, raw?.files); return { url, status: typeof raw?.status === "number" ? raw.status : null, errorType: raw?.errorType || null, method: raw?.method || null, checkedAt, locations, }; } function normalizeLocations(locations, fallbackFiles) { const items = []; if (Array.isArray(locations)) { for (const entry of locations) { if (!entry) continue; if (typeof entry === "string") { const [filePart, linePart] = entry.split(":"); const filePath = toPosix(filePart.trim()); items.push({ file: filePath, line: linePart ? Number.parseInt(linePart, 10) || null : null, page: derivePagePath(filePath), }); } else if (typeof entry === "object") { const file = sizeof(entry.file) ? entry.file : null; if (file) { const normalizedFile = toPosix(file); items.push({ file: normalizedFile, line: typeof entry.line === "number" ? entry.line : null, page: typeof entry.page === "string" && entry.page.trim() ? toPosix(entry.page.trim()) : derivePagePath(normalizedFile), }); } } } } if (items.length === 0 && Array.isArray(fallbackFiles)) { for (const file of fallbackFiles) { if (!file) continue; const normalizedFile = toPosix(file); items.push({ file: normalizedFile, line: null, page: derivePagePath(normalizedFile), }); } } return dedupeAndSortLocations(items); } function sizeof(value) { return typeof value === "string" && value.trim().length > 0; } function dedupeAndSortLocations(list) { if (!Array.isArray(list) || list.length === 0) { return []; } const map = new Map(); for (const item of list) { if (!item?.file) continue; const key = `${item.file}::${item.line ?? ""}`; if (!map.has(key)) { const normalizedFile = toPosix(item.file); map.set(key, { file: normalizedFile, line: typeof item.line === "number" ? item.line : null, page: typeof item.page === "string" && item.page.trim() ? toPosix(item.page.trim()) : derivePagePath(normalizedFile), }); } } return Array.from(map.values()).sort((a, b) => { const fileDiff = a.file.localeCompare(b.file); if (fileDiff !== 0) return fileDiff; const lineA = a.line ?? Number.POSITIVE_INFINITY; const lineB = b.line ?? Number.POSITIVE_INFINITY; return lineA - lineB; }); } function saveState(state) { ensureDirectoryExists(REPORT_PATH); fs.writeFileSync(REPORT_PATH, yaml.dump(state), "utf8"); } function createEntry(url, existing = {}) { return { url, status: typeof existing.status === "number" ? existing.status : null, errorType: existing.errorType || null, method: existing.method || null, checkedAt: existing.checkedAt || null, locations: Array.isArray(existing.locations) ? dedupeAndSortLocations(existing.locations) : [], }; } function mergeOccurrences(entries, occurrences) { const merged = {}; for (const [url, urlOccurrences] of occurrences.entries()) { const existing = entries[url] || createEntry(url); merged[url] = { ...existing, url, locations: dedupeAndSortLocations(urlOccurrences), }; } return merged; } function recordOccurrence(map, filePath, line, url) { if (!map.has(url)) { map.set(url, []); } const relativeFile = relativeToSite(filePath); const normalizedLine = typeof line === "number" && Number.isFinite(line) ? line : null; const pagePath = derivePagePath(relativeFile); const list = map.get(url); const key = `${relativeFile}:${normalizedLine ?? ""}`; if (!list.some((item) => `${item.file}:${item.line ?? ""}` === key)) { list.push({ file: relativeFile, line: normalizedLine, page: pagePath }); } } function stripYamlInlineComment(line) { let inSingle = false; let inDouble = false; for (let i = 0; i < line.length; i++) { const ch = line[i]; if (ch === "'" && !inDouble) { const next = line[i + 1]; if (inSingle && next === "'") { i++; continue; } inSingle = !inSingle; } else if (ch === '"' && !inSingle) { if (!inDouble) { inDouble = true; } else if (line[i - 1] !== "\\") { inDouble = false; } } else if (ch === "#" && !inSingle && !inDouble) { return line.slice(0, i); } else if (ch === "\\" && inDouble) { i++; } } return line; } function isYamlCommentLine(line) { return line.trim().startsWith("#"); } function isBlockScalarIndicator(line) { const cleaned = stripYamlInlineComment(line).trim(); return /:\s*[>|][0-9+-]*\s*$/.test(cleaned); } function processYamlRecursively(obj, links = new Set()) { if (typeof obj === "string") { for (const link of extractLinksFromText(obj)) { links.add(link); } } else if (Array.isArray(obj)) { for (const item of obj) { processYamlRecursively(item, links); } } else if (obj && typeof obj === "object") { for (const value of Object.values(obj)) { processYamlRecursively(value, links); } } return links; } async function collectMarkdownLinks(filePath, occurrences) { const entries = await collectMarkdownLinksFromFile(filePath); for (const { url, line } of entries) { recordOccurrence(occurrences, filePath, line, url); } } async function collectYamlLinks(filePath, occurrences) { let linkSet = new Set(); try { const doc = yaml.load(fs.readFileSync(filePath, "utf8")); if (doc) { linkSet = processYamlRecursively(doc); } } catch (error) { console.warn(`Impossible de parser ${relativeToSite(filePath)} (${error.message}).`); return; } if (linkSet.size === 0) { return; } const recorded = new Map(); const lines = fs.readFileSync(filePath, "utf8").split(/\r?\n/); let inBlockScalar = false; let blockIndent = 0; const mark = (url, lineNumber) => { if (!recorded.has(url)) { recorded.set(url, new Set()); } const set = recorded.get(url); if (!set.has(lineNumber)) { set.add(lineNumber); recordOccurrence(occurrences, filePath, lineNumber, url); } }; for (let index = 0; index < lines.length; index++) { const lineNumber = index + 1; const line = lines[index]; const indent = line.match(/^\s*/)?.[0].length ?? 0; const trimmed = line.trim(); if (inBlockScalar) { if (trimmed === "" && indent < blockIndent) { inBlockScalar = false; continue; } if (trimmed === "" || indent >= blockIndent) { if (!isYamlCommentLine(line)) { for (const link of extractLinksFromText(line)) { if (linkSet.has(link)) { mark(link, lineNumber); } } } continue; } inBlockScalar = false; } const withoutComment = stripYamlInlineComment(line); const trimmedWithoutComment = withoutComment.trim(); if (isBlockScalarIndicator(line)) { inBlockScalar = true; blockIndent = indent + 1; } if (isYamlCommentLine(line) || !trimmedWithoutComment) { continue; } for (const link of extractLinksFromText(withoutComment)) { if (linkSet.has(link)) { mark(link, lineNumber); } } } for (const link of linkSet) { if (!recorded.has(link) || recorded.get(link).size === 0) { recordOccurrence(occurrences, filePath, null, link); } } } function walk(dir, exts) { let results = []; const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); if (entry.isDirectory()) { results = results.concat(walk(fullPath, exts)); } else if (exts.includes(path.extname(entry.name))) { results.push(fullPath); } } return results; } function delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } const lastHostChecks = new Map(); async function applyHostDelay(host) { if (!host || HOST_DELAY_MS <= 0) { return; } const last = lastHostChecks.get(host); if (last) { const elapsed = Date.now() - last; const wait = HOST_DELAY_MS - elapsed; if (wait > 0) { await delay(wait); } } } function recordHostCheck(host) { if (host) { lastHostChecks.set(host, Date.now()); } } function extractHost(url) { try { return new URL(url).hostname; } catch (_) { return null; } } function getTtlMs(entry) { if (!entry) return 0; if (entry.errorType === "timeout" || entry.status === 0 || entry.status === null) { return CACHE_TTL_TIMEOUT_MS; } const status = Number(entry.status); if (Number.isNaN(status)) { return CACHE_TTL_TIMEOUT_MS; } if (status >= 500) { return CACHE_TTL_SERVER_ERROR_MS; } if (status >= 400) { return CACHE_TTL_CLIENT_ERROR_MS; } if (status >= 200 && status < 400) { return CACHE_TTL_SUCCESS_MS; } return CACHE_TTL_TIMEOUT_MS; } function needsCheck(entry) { if (!entry?.checkedAt) { return true; } const checked = Date.parse(entry.checkedAt); if (Number.isNaN(checked)) { return true; } const ttl = getTtlMs(entry); if (ttl <= 0) { return true; } return Date.now() - checked >= ttl; } function groupEntriesByHost(entries) { const groups = new Map(); for (const entry of entries) { const host = extractHost(entry.url); const key = host || `__invalid__:${entry.url}`; if (!groups.has(key)) { groups.set(key, { host, entries: [] }); } groups.get(key).entries.push(entry); } return Array.from(groups.values()); } async function runWithConcurrency(items, worker, concurrency) { const executing = new Set(); for (const item of items) { const task = Promise.resolve().then(() => worker(item)); executing.add(task); const clean = () => executing.delete(task); task.then(clean).catch(clean); if (executing.size >= concurrency) { await Promise.race(executing); } } await Promise.allSettled(executing); } function updateEntryWithResult(entry, result) { const now = new Date().toISOString(); entry.status = typeof result.status === "number" ? result.status : null; entry.errorType = result.errorType || null; entry.method = result.method; entry.checkedAt = now; } function formatStatusForReport(entry) { if (!entry) return "error"; if (entry.errorType === "timeout") return "timeout"; if (typeof entry.status === "number") return entry.status; return "error"; } function isDead(entry) { if (!entry) return false; if (entry.errorType === "timeout") return true; if (typeof entry.status !== "number") return true; return entry.status >= 400; } function getStatusOrder(value) { if (typeof value === "number" && Number.isFinite(value)) { return value; } const label = typeof value === "string" ? value.toLowerCase() : ""; if (label === "timeout") { return 10000; } return 10001; } function buildDeadLinks(entries) { const list = []; for (const entry of Object.values(entries)) { if (!isDead(entry)) continue; list.push({ url: entry.url, status: formatStatusForReport(entry), locations: entry.locations || [], }); } return list.sort((a, b) => { const orderDiff = getStatusOrder(a.status) - getStatusOrder(b.status); if (orderDiff !== 0) return orderDiff; if (typeof a.status === "number" && typeof b.status === "number") { return a.status - b.status; } const labelDiff = String(a.status).localeCompare(String(b.status)); if (labelDiff !== 0) return labelDiff; return a.url.localeCompare(b.url); }); } function logProgress(processed, total) { process.stdout.write(`\rURLs vérifiées ${processed}/${total}`); } async function collectOccurrences() { const occurrences = new Map(); const mdFiles = walk(CONTENT_DIR, [".md", ".markdown"]); for (const file of mdFiles) { await collectMarkdownLinks(file, occurrences); } const yamlFiles = walk(CONTENT_DIR, [".yaml", ".yml"]); for (const file of yamlFiles) { await collectYamlLinks(file, occurrences); } return occurrences; } function persistEntriesSnapshot(entries, snapshotMeta) { const payload = { generatedAt: snapshotMeta?.generatedAt || null, links: Array.isArray(snapshotMeta?.links) ? snapshotMeta.links : [], entries, }; saveState(payload); } async function checkEntries(entriesToCheck, entries, snapshotMeta) { if (entriesToCheck.length === 0) { return; } const hostGroups = groupEntriesByHost(entriesToCheck); const concurrency = Math.max(1, Math.min(MAX_CONCURRENT_HOSTS, hostGroups.length)); let processed = 0; process.stdout.write(`Vérification de ${entriesToCheck.length} URL...\n`); await runWithConcurrency( hostGroups, async ({ host, entries: groupEntries }) => { for (const entry of groupEntries) { if (host) { await applyHostDelay(host); } let result = await probeUrl(entry.url, { ...BASE_HTTP_OPTIONS, method: "HEAD" }); recordHostCheck(host); if (shouldRetry(result)) { if (RETRY_DELAY_MS > 0) { await delay(RETRY_DELAY_MS); } if (host) { await applyHostDelay(host); } result = await probeUrl(entry.url, { ...BASE_HTTP_OPTIONS, method: "GET" }); recordHostCheck(host); } updateEntryWithResult(entries[entry.url], result); persistEntriesSnapshot(entries, snapshotMeta); processed += 1; logProgress(processed, entriesToCheck.length); } }, concurrency ); process.stdout.write("\n"); } async function main() { const occurrences = await collectOccurrences(); if (occurrences.size === 0) { const emptyState = { generatedAt: new Date().toISOString(), links: [], entries: {} }; saveState(emptyState); console.log("Aucun lien externe détecté."); return; } const state = loadState(); const mergedEntries = mergeOccurrences(state.entries, occurrences); const entriesArray = Object.values(mergedEntries); const pending = entriesArray.filter((entry) => needsCheck(entry)); const snapshotMeta = { generatedAt: state.generatedAt || null, links: Array.isArray(state.links) ? state.links : [], }; await checkEntries(pending, mergedEntries, snapshotMeta); const deadLinks = buildDeadLinks(mergedEntries); const nextState = { generatedAt: new Date().toISOString(), links: deadLinks, entries: mergedEntries, }; saveState(nextState); console.log( `Liens externes analysés: ${entriesArray.length} URL (${deadLinks.length} mort(s)). Données écrites dans ${path.relative( SITE_ROOT, REPORT_PATH )}` ); } main().catch((error) => { console.error("Erreur lors de la vérification des liens:", error); process.exitCode = 1; });