Ajoute un outil de gestion interactive des liens morts
This commit is contained in:
@@ -1,37 +1,167 @@
|
||||
const ARCHIVE_API_URL = "https://archive.org/wayback/available?url=";
|
||||
const { fetch } = require("undici");
|
||||
|
||||
const ARCHIVE_CDX_URL = "https://web.archive.org/cdx/search/cdx";
|
||||
const ARCHIVE_SAVE_URL = "https://web.archive.org/save/";
|
||||
const ARCHIVE_REQUEST_TIMEOUT_MS = 15000;
|
||||
|
||||
/**
|
||||
* Check if a given URL exists in Archive.org.
|
||||
* @param {string} url - The URL to check.
|
||||
* @returns {Promise<string|null>} - The archive URL if found, otherwise null.
|
||||
* Construit l'URL publique d'une capture Wayback.
|
||||
* @param {string} originalUrl URL d'origine.
|
||||
* @param {string} timestamp Horodatage Wayback.
|
||||
* @returns {string} URL archive.org utilisable directement.
|
||||
*/
|
||||
function buildArchiveCaptureUrl(originalUrl, timestamp) {
|
||||
return `https://web.archive.org/web/${timestamp}/${originalUrl}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Borne une valeur numerique a un entier strictement positif.
|
||||
* @param {unknown} value Valeur a verifier.
|
||||
* @param {number} fallback Valeur par defaut.
|
||||
* @returns {number} Entier positif.
|
||||
*/
|
||||
function normalizePositiveInteger(value, fallback) {
|
||||
const parsed = Number.parseInt(String(value), 10);
|
||||
if (Number.isNaN(parsed)) {
|
||||
return fallback;
|
||||
}
|
||||
if (parsed <= 0) {
|
||||
return fallback;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Charge un document JSON Archive.org avec un delai maximal.
|
||||
* @param {string|URL} url URL a appeler.
|
||||
* @returns {Promise<unknown>} Document JSON decode.
|
||||
*/
|
||||
async function fetchArchiveJson(url) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), ARCHIVE_REQUEST_TIMEOUT_MS);
|
||||
const response = await fetch(url, { signal: controller.signal }).finally(() => clearTimeout(timer));
|
||||
if (!response.ok) {
|
||||
throw new Error(`Erreur de l'API Archive.org (${response.status})`);
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Liste les captures Wayback recentes disponibles pour une URL.
|
||||
* @param {string} url URL d'origine a rechercher.
|
||||
* @param {{ limit?: number }} options Options de requete.
|
||||
* @returns {Promise<Array<{ timestamp: string, originalUrl: string, statusCode: number|null, mimetype: string|null, url: string }>>}
|
||||
*/
|
||||
async function listArchiveCaptures(url, options = {}) {
|
||||
const limit = normalizePositiveInteger(options.limit, 10);
|
||||
const requestUrl = new URL(ARCHIVE_CDX_URL);
|
||||
requestUrl.searchParams.set("url", url);
|
||||
requestUrl.searchParams.set("output", "json");
|
||||
requestUrl.searchParams.set("fl", "timestamp,original,statuscode,mimetype,digest");
|
||||
requestUrl.searchParams.set("filter", "statuscode:200");
|
||||
requestUrl.searchParams.set("collapse", "digest");
|
||||
requestUrl.searchParams.set("fastLatest", "true");
|
||||
requestUrl.searchParams.set("limit", `-${limit}`);
|
||||
|
||||
const rows = await fetchArchiveJson(requestUrl);
|
||||
if (!Array.isArray(rows)) {
|
||||
return [];
|
||||
}
|
||||
if (rows.length <= 1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const header = rows[0];
|
||||
if (!Array.isArray(header)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const timestampIndex = header.indexOf("timestamp");
|
||||
const originalIndex = header.indexOf("original");
|
||||
const statusCodeIndex = header.indexOf("statuscode");
|
||||
const mimetypeIndex = header.indexOf("mimetype");
|
||||
|
||||
const captures = [];
|
||||
for (const row of rows.slice(1)) {
|
||||
if (!Array.isArray(row)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const timestamp = row[timestampIndex];
|
||||
const originalUrl = row[originalIndex];
|
||||
if (typeof timestamp !== "string") {
|
||||
continue;
|
||||
}
|
||||
if (typeof originalUrl !== "string") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let statusCode = null;
|
||||
if (statusCodeIndex > -1) {
|
||||
const parsedStatusCode = Number.parseInt(row[statusCodeIndex], 10);
|
||||
if (!Number.isNaN(parsedStatusCode)) {
|
||||
statusCode = parsedStatusCode;
|
||||
}
|
||||
}
|
||||
|
||||
let mimetype = null;
|
||||
if (mimetypeIndex > -1) {
|
||||
const rawMimetype = row[mimetypeIndex];
|
||||
if (typeof rawMimetype === "string" && rawMimetype.trim()) {
|
||||
mimetype = rawMimetype.trim();
|
||||
}
|
||||
}
|
||||
|
||||
captures.push({
|
||||
timestamp,
|
||||
originalUrl,
|
||||
statusCode,
|
||||
mimetype,
|
||||
url: buildArchiveCaptureUrl(originalUrl, timestamp),
|
||||
});
|
||||
}
|
||||
|
||||
captures.sort((left, right) => right.timestamp.localeCompare(left.timestamp));
|
||||
return captures.slice(0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retourne la capture la plus recente disponible pour une URL.
|
||||
* @param {string} url URL d'origine.
|
||||
* @returns {Promise<string|null>} URL archive.org, ou null si aucune capture n'existe.
|
||||
*/
|
||||
async function getArchiveUrl(url) {
|
||||
try {
|
||||
const response = await fetch(`${ARCHIVE_API_URL}${encodeURIComponent(url)}`);
|
||||
if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
|
||||
const data = await response.json();
|
||||
return data.archived_snapshots?.closest?.url || null;
|
||||
} catch (error) {
|
||||
console.error(`❌ Archive.org API error: ${error.message}`);
|
||||
const captures = await listArchiveCaptures(url, { limit: 1 });
|
||||
if (captures.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return captures[0].url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request Archive.org to save the given URL.
|
||||
* @param {string} url - The URL to archive.
|
||||
* @returns {Promise<string|null>} - The permalink of the archived page if successful, otherwise null.
|
||||
* Demande a Archive.org d'archiver une URL.
|
||||
* @param {string} url URL a archiver.
|
||||
* @returns {Promise<string|null>} URL finale de la capture si disponible.
|
||||
*/
|
||||
async function saveToArchive(url) {
|
||||
try {
|
||||
const response = await fetch(`${ARCHIVE_SAVE_URL}${encodeURIComponent(url)}`, { method: "POST" });
|
||||
if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
|
||||
return response.url.includes("/save/") ? null : response.url;
|
||||
} catch (error) {
|
||||
console.error(`❌ Failed to save URL to Archive.org: ${error.message}`);
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), ARCHIVE_REQUEST_TIMEOUT_MS);
|
||||
const response = await fetch(`${ARCHIVE_SAVE_URL}${encodeURIComponent(url)}`, {
|
||||
method: "POST",
|
||||
signal: controller.signal,
|
||||
}).finally(() => clearTimeout(timer));
|
||||
if (!response.ok) {
|
||||
throw new Error(`Erreur de sauvegarde Archive.org (${response.status})`);
|
||||
}
|
||||
if (response.url.includes("/save/")) {
|
||||
return null;
|
||||
}
|
||||
return response.url;
|
||||
}
|
||||
|
||||
module.exports = { getArchiveUrl, saveToArchive };
|
||||
module.exports = {
|
||||
buildArchiveCaptureUrl,
|
||||
listArchiveCaptures,
|
||||
getArchiveUrl,
|
||||
saveToArchive,
|
||||
};
|
||||
|
||||
@@ -92,6 +92,77 @@ async function resolveMarkdownTargets(inputs, { rootDir = process.cwd(), skipInd
|
||||
return Array.from(targets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Collecte tous les fichiers correspondant a une liste d'extensions.
|
||||
* @param {string} rootDir Racine a parcourir.
|
||||
* @param {string[]} extensions Extensions attendues, avec le point.
|
||||
* @param {{ skipDirs?: string[] }} options Options de parcours.
|
||||
* @returns {Promise<string[]>} Fichiers trouves, tries par chemin.
|
||||
*/
|
||||
async function collectFilesByExtensions(rootDir, extensions, options = {}) {
|
||||
const normalizedExtensions = new Set();
|
||||
for (const extension of extensions) {
|
||||
if (typeof extension !== "string") {
|
||||
continue;
|
||||
}
|
||||
const candidate = extension.trim().toLowerCase();
|
||||
if (!candidate) {
|
||||
continue;
|
||||
}
|
||||
normalizedExtensions.add(candidate);
|
||||
}
|
||||
|
||||
if (normalizedExtensions.size === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const skipDirs = new Set([".git", "node_modules"]);
|
||||
if (Array.isArray(options.skipDirs)) {
|
||||
for (const directoryName of options.skipDirs) {
|
||||
if (typeof directoryName !== "string") {
|
||||
continue;
|
||||
}
|
||||
const candidate = directoryName.trim();
|
||||
if (!candidate) {
|
||||
continue;
|
||||
}
|
||||
skipDirs.add(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
const files = [];
|
||||
await walk(rootDir);
|
||||
files.sort((a, b) => a.localeCompare(b));
|
||||
return files;
|
||||
|
||||
async function walk(currentDir) {
|
||||
const entries = await fs.readdir(currentDir, { withFileTypes: true });
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(currentDir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
if (skipDirs.has(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
await walk(fullPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!entry.isFile()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const extension = path.extname(entry.name).toLowerCase();
|
||||
if (!normalizedExtensions.has(extension)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
files.push(fullPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function collectBundles(rootDir) {
|
||||
const bundles = [];
|
||||
await walk(rootDir, rootDir, bundles);
|
||||
@@ -140,5 +211,6 @@ module.exports = {
|
||||
collectMarkdownFiles,
|
||||
collectSectionIndexDirs,
|
||||
resolveMarkdownTargets,
|
||||
collectFilesByExtensions,
|
||||
collectBundles,
|
||||
};
|
||||
|
||||
198
tools/lib/external_links_report.js
Normal file
198
tools/lib/external_links_report.js
Normal file
@@ -0,0 +1,198 @@
|
||||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
const yaml = require("js-yaml");
|
||||
const { loadToolsConfig } = require("./config");
|
||||
|
||||
const DEFAULT_CACHE_DIR = "tools/cache";
|
||||
const DEFAULT_CACHE_FILE = "external_links.yaml";
|
||||
|
||||
/**
|
||||
* Resout le chemin du rapport des liens externes a partir de la configuration.
|
||||
* @param {string} siteRoot Racine du projet.
|
||||
* @returns {Promise<string>} Chemin absolu du rapport YAML.
|
||||
*/
|
||||
async function resolveExternalLinksReportPath(siteRoot) {
|
||||
const rootDir = path.resolve(siteRoot);
|
||||
const configPath = path.join(rootDir, "tools", "config", "config.json");
|
||||
const config = await loadToolsConfig(configPath);
|
||||
|
||||
let cacheDir = DEFAULT_CACHE_DIR;
|
||||
const externalLinks = config.externalLinks;
|
||||
if (externalLinks && typeof externalLinks.cacheDir === "string" && externalLinks.cacheDir.trim()) {
|
||||
cacheDir = externalLinks.cacheDir.trim();
|
||||
}
|
||||
|
||||
let cacheFile = DEFAULT_CACHE_FILE;
|
||||
if (externalLinks && typeof externalLinks.cacheFile === "string" && externalLinks.cacheFile.trim()) {
|
||||
cacheFile = externalLinks.cacheFile.trim();
|
||||
}
|
||||
|
||||
let resolvedCacheDir = cacheDir;
|
||||
if (!path.isAbsolute(resolvedCacheDir)) {
|
||||
resolvedCacheDir = path.join(rootDir, resolvedCacheDir);
|
||||
}
|
||||
|
||||
if (path.isAbsolute(cacheFile)) {
|
||||
return cacheFile;
|
||||
}
|
||||
|
||||
return path.join(resolvedCacheDir, cacheFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalise la liste des emplacements associes a un lien.
|
||||
* @param {unknown[]} rawLocations Emplacements bruts.
|
||||
* @returns {Array<{ file: string, line: number|null, page: string|null }>}
|
||||
*/
|
||||
function normalizeLocations(rawLocations) {
|
||||
if (!Array.isArray(rawLocations)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const locations = [];
|
||||
for (const rawLocation of rawLocations) {
|
||||
if (!rawLocation || typeof rawLocation !== "object") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let file = null;
|
||||
if (typeof rawLocation.file === "string" && rawLocation.file.trim()) {
|
||||
file = rawLocation.file.trim();
|
||||
}
|
||||
if (!file) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let line = null;
|
||||
if (typeof rawLocation.line === "number" && Number.isFinite(rawLocation.line)) {
|
||||
line = rawLocation.line;
|
||||
}
|
||||
|
||||
let page = null;
|
||||
if (typeof rawLocation.page === "string" && rawLocation.page.trim()) {
|
||||
page = rawLocation.page.trim();
|
||||
}
|
||||
|
||||
locations.push({ file, line, page });
|
||||
}
|
||||
|
||||
return locations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalise une entree du rapport.
|
||||
* @param {unknown} rawLink Entree brute.
|
||||
* @returns {{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }|null}
|
||||
*/
|
||||
function normalizeLink(rawLink) {
|
||||
if (!rawLink || typeof rawLink !== "object") {
|
||||
return null;
|
||||
}
|
||||
if (typeof rawLink.url !== "string" || !rawLink.url.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let status = null;
|
||||
if (typeof rawLink.status === "number" && Number.isFinite(rawLink.status)) {
|
||||
status = rawLink.status;
|
||||
}
|
||||
if (typeof rawLink.status === "string" && rawLink.status.trim()) {
|
||||
const parsedStatus = Number.parseInt(rawLink.status, 10);
|
||||
if (!Number.isNaN(parsedStatus)) {
|
||||
status = parsedStatus;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
url: rawLink.url.trim(),
|
||||
status,
|
||||
locations: normalizeLocations(rawLink.locations),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstitue une liste de liens a partir de la section entries du cache.
|
||||
* @param {Record<string, unknown>} entries Entrees brutes.
|
||||
* @returns {Array<{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }>}
|
||||
*/
|
||||
function buildLinksFromEntries(entries) {
|
||||
const links = [];
|
||||
for (const [url, rawEntry] of Object.entries(entries)) {
|
||||
let status = null;
|
||||
let locations = null;
|
||||
if (rawEntry && typeof rawEntry === "object") {
|
||||
status = rawEntry.status;
|
||||
locations = rawEntry.locations;
|
||||
}
|
||||
const normalized = normalizeLink({
|
||||
url,
|
||||
status,
|
||||
locations,
|
||||
});
|
||||
if (normalized) {
|
||||
links.push(normalized);
|
||||
}
|
||||
}
|
||||
return links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Charge le rapport des liens externes.
|
||||
* @param {string} reportPath Chemin absolu ou relatif du rapport YAML.
|
||||
* @returns {{ generatedAt: string|null, links: Array<{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }> }}
|
||||
*/
|
||||
function loadExternalLinksReport(reportPath) {
|
||||
const resolvedPath = path.resolve(reportPath);
|
||||
if (!fs.existsSync(resolvedPath)) {
|
||||
return { generatedAt: null, links: [] };
|
||||
}
|
||||
|
||||
const raw = yaml.load(fs.readFileSync(resolvedPath, "utf8")) || {};
|
||||
let links = [];
|
||||
if (Array.isArray(raw.links)) {
|
||||
for (const rawLink of raw.links) {
|
||||
const normalized = normalizeLink(rawLink);
|
||||
if (normalized) {
|
||||
links.push(normalized);
|
||||
}
|
||||
}
|
||||
} else if (raw.entries && typeof raw.entries === "object") {
|
||||
links = buildLinksFromEntries(raw.entries);
|
||||
}
|
||||
|
||||
return {
|
||||
generatedAt: raw.generatedAt || null,
|
||||
links,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Filtre les liens du rapport par code de statut HTTP.
|
||||
* @param {{ links?: Array<{ status: number|null }> }} report Rapport charge.
|
||||
* @param {number} statusCode Code a retenir.
|
||||
* @returns {Array<{ url: string, status: number|null, locations: Array<{ file: string, line: number|null, page: string|null }> }>}
|
||||
*/
|
||||
function getLinksByStatus(report, statusCode) {
|
||||
if (!report || !Array.isArray(report.links)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const links = [];
|
||||
for (const link of report.links) {
|
||||
if (!link || typeof link !== "object") {
|
||||
continue;
|
||||
}
|
||||
if (link.status !== statusCode) {
|
||||
continue;
|
||||
}
|
||||
links.push(link);
|
||||
}
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
resolveExternalLinksReportPath,
|
||||
loadExternalLinksReport,
|
||||
getLinksByStatus,
|
||||
};
|
||||
92
tools/lib/url_replacements.js
Normal file
92
tools/lib/url_replacements.js
Normal file
@@ -0,0 +1,92 @@
|
||||
const fs = require("node:fs/promises");
|
||||
const path = require("node:path");
|
||||
const { collectFilesByExtensions } = require("./content");
|
||||
|
||||
const DEFAULT_URL_TEXT_EXTENSIONS = Object.freeze([
|
||||
".json",
|
||||
".markdown",
|
||||
".md",
|
||||
".yaml",
|
||||
".yml",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Compte les occurrences exactes d'une chaine dans un texte.
|
||||
* @param {string} text Texte a analyser.
|
||||
* @param {string} needle Chaine recherchee.
|
||||
* @returns {number} Nombre d'occurrences trouvees.
|
||||
*/
|
||||
function countOccurrences(text, needle) {
|
||||
if (typeof text !== "string") {
|
||||
return 0;
|
||||
}
|
||||
if (typeof needle !== "string" || !needle) {
|
||||
return 0;
|
||||
}
|
||||
return text.split(needle).length - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retourne la liste des fichiers textuels contenant une URL donnee.
|
||||
* @param {string} rootDir Racine a parcourir.
|
||||
* @param {string} targetUrl URL a rechercher.
|
||||
* @param {{ extensions?: string[] }} options Options de recherche.
|
||||
* @returns {Promise<Array<{ filePath: string, occurrences: number }>>}
|
||||
*/
|
||||
async function findUrlOccurrences(rootDir, targetUrl, options = {}) {
|
||||
let extensions = DEFAULT_URL_TEXT_EXTENSIONS;
|
||||
if (Array.isArray(options.extensions)) {
|
||||
extensions = options.extensions;
|
||||
}
|
||||
const files = await collectFilesByExtensions(rootDir, extensions);
|
||||
const matches = [];
|
||||
|
||||
for (const filePath of files) {
|
||||
const content = await fs.readFile(filePath, "utf8");
|
||||
const occurrences = countOccurrences(content, targetUrl);
|
||||
if (occurrences <= 0) {
|
||||
continue;
|
||||
}
|
||||
matches.push({ filePath, occurrences });
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remplace toutes les occurrences exactes d'une URL dans une liste de fichiers.
|
||||
* @param {string} rootDir Racine de recherche.
|
||||
* @param {string} targetUrl URL a remplacer.
|
||||
* @param {string} replacementUrl URL de remplacement.
|
||||
* @param {{ extensions?: string[], matches?: Array<{ filePath: string, occurrences: number }> }} options Options d'ecriture.
|
||||
* @returns {Promise<{ changedFiles: string[], totalOccurrences: number }>}
|
||||
*/
|
||||
async function replaceUrlInFiles(rootDir, targetUrl, replacementUrl, options = {}) {
|
||||
let matches = [];
|
||||
if (Array.isArray(options.matches)) {
|
||||
matches = options.matches;
|
||||
} else {
|
||||
matches = await findUrlOccurrences(rootDir, targetUrl, options);
|
||||
}
|
||||
|
||||
const changedFiles = [];
|
||||
let totalOccurrences = 0;
|
||||
|
||||
for (const match of matches) {
|
||||
const filePath = path.resolve(match.filePath);
|
||||
const content = await fs.readFile(filePath, "utf8");
|
||||
const updatedContent = content.split(targetUrl).join(replacementUrl);
|
||||
await fs.writeFile(filePath, updatedContent, "utf8");
|
||||
changedFiles.push(filePath);
|
||||
totalOccurrences += match.occurrences;
|
||||
}
|
||||
|
||||
return { changedFiles, totalOccurrences };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
DEFAULT_URL_TEXT_EXTENSIONS,
|
||||
countOccurrences,
|
||||
findUrlOccurrences,
|
||||
replaceUrlInFiles,
|
||||
};
|
||||
Reference in New Issue
Block a user