Correction semi-automatique des liens morts
This commit is contained in:
@@ -285,7 +285,10 @@ async function checkLink(url) {
|
||||
let info = runResults.get(url);
|
||||
if (!info) {
|
||||
const cachedInfo = cache[url];
|
||||
if (!isCacheValid(cachedInfo)) {
|
||||
if (cachedInfo?.manually_killed === true) {
|
||||
// Do not re-test manually killed links
|
||||
info = cachedInfo;
|
||||
} else if (!isCacheValid(cachedInfo)) {
|
||||
const host = extractHost(url);
|
||||
if (host) {
|
||||
await applyHostDelay(host);
|
||||
@@ -304,12 +307,13 @@ async function checkLink(url) {
|
||||
}
|
||||
|
||||
info = {
|
||||
...(cachedInfo || {}),
|
||||
status: result.status ?? null,
|
||||
errorType: result.errorType || null,
|
||||
method: result.method,
|
||||
checked: new Date().toISOString(),
|
||||
};
|
||||
cache[url] = info;
|
||||
cache[url] = info; // preserves files, manual flags, etc.
|
||||
cacheDirty = true;
|
||||
persistCache();
|
||||
} else if (cachedInfo) {
|
||||
@@ -540,10 +544,43 @@ function writeReport(entries) {
|
||||
if (cachePruned) {
|
||||
cacheDirty = true;
|
||||
}
|
||||
// Update file paths, line numbers and ensure manual flags exist
|
||||
for (const entry of uniqueEntries) {
|
||||
const files = Array.from(
|
||||
new Set(entry.occurrences.map((o) => path.relative(SITE_ROOT, o.file)))
|
||||
).sort((a, b) => a.localeCompare(b));
|
||||
const locations = Array.from(
|
||||
new Set(
|
||||
entry.occurrences.map(
|
||||
(o) => `${path.relative(SITE_ROOT, o.file)}:${o.line}`
|
||||
)
|
||||
)
|
||||
).sort((a, b) => a.localeCompare(b));
|
||||
const existing = cache[entry.url] || {};
|
||||
cache[entry.url] = {
|
||||
...existing,
|
||||
manually_validated: existing.manually_validated === true,
|
||||
manually_killed: existing.manually_killed === true,
|
||||
files,
|
||||
locations,
|
||||
};
|
||||
cacheDirty = true;
|
||||
}
|
||||
if (cacheDirty) {
|
||||
ensureDirectoryExists(CACHE_PATH);
|
||||
fs.writeFileSync(CACHE_PATH, yaml.dump(cache));
|
||||
cacheDirty = false;
|
||||
}
|
||||
|
||||
// Exclude manually killed from re-checking and reporting
|
||||
const entriesToCheck = uniqueEntries.filter(
|
||||
(e) => !(cache[e.url] && cache[e.url].manually_killed === true)
|
||||
);
|
||||
|
||||
ensureDirectoryExists(PROGRESS_FILE);
|
||||
fs.writeFileSync(PROGRESS_FILE, `"url","locations","status"\n`, "utf8");
|
||||
|
||||
const total = uniqueEntries.length;
|
||||
const total = entriesToCheck.length;
|
||||
if (total === 0) {
|
||||
process.stdout.write("No external links found.\n");
|
||||
ensureDirectoryExists(CACHE_PATH);
|
||||
@@ -552,7 +589,7 @@ function writeReport(entries) {
|
||||
return;
|
||||
}
|
||||
|
||||
const hostGroups = groupEntriesByHost(uniqueEntries);
|
||||
const hostGroups = groupEntriesByHost(entriesToCheck);
|
||||
const concurrency = Math.max(1, Math.min(MAX_CONCURRENT_HOSTS, hostGroups.length || 1));
|
||||
let processed = 0;
|
||||
await runWithConcurrency(
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
"hostDelayMs": 2000,
|
||||
"retryDelayMs": 5000,
|
||||
"requestTimeoutSeconds": 5,
|
||||
"cacheTtlSuccessDays": 7,
|
||||
"cacheTtlClientErrorDays": 0,
|
||||
"cacheTtlSuccessDays": 30,
|
||||
"cacheTtlClientErrorDays": 7,
|
||||
"outputFormat": "markdown",
|
||||
"outputFile": "tools/cache/external_links_report.md",
|
||||
"userAgent": null,
|
||||
"enableCookies": true,
|
||||
"cookieJar": "tools/cache/curl_cookies.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,14 @@ function trimUnbalancedTrailing(value, openChar, closeChar) {
|
||||
return result;
|
||||
}
|
||||
|
||||
function stripTrailingPunctuation(value) {
|
||||
let result = value;
|
||||
while (/[.,;:!?'"\u2018\u2019\u201C\u201D]+$/.test(result)) {
|
||||
result = result.slice(0, -1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function sanitizeUrlCandidate(raw, options = {}) {
|
||||
if (typeof raw !== "string") return null;
|
||||
let candidate = raw.trim();
|
||||
@@ -24,9 +32,7 @@ function sanitizeUrlCandidate(raw, options = {}) {
|
||||
candidate = candidate.slice(1, -1).trim();
|
||||
}
|
||||
|
||||
while (/[.,;:!?'"\u2018\u2019\u201C\u201D]+$/.test(candidate)) {
|
||||
candidate = candidate.slice(0, -1);
|
||||
}
|
||||
candidate = stripTrailingPunctuation(candidate);
|
||||
|
||||
if (!options.keepTrailingParens) {
|
||||
candidate = trimUnbalancedTrailing(candidate, "(", ")");
|
||||
@@ -39,9 +45,15 @@ function sanitizeUrlCandidate(raw, options = {}) {
|
||||
}
|
||||
candidate = trimUnbalancedTrailing(candidate, "[", "]");
|
||||
candidate = trimUnbalancedTrailing(candidate, "{", "}");
|
||||
candidate = stripTrailingPunctuation(candidate);
|
||||
|
||||
candidate = candidate.replace(/[)]+$/g, (suffix) => {
|
||||
const toTrim = !options.keepTrailingParens ? suffix.length : Math.max(0, suffix.length - 1);
|
||||
return ")".repeat(suffix.length - toTrim);
|
||||
});
|
||||
candidate = candidate.replace(/[*_]+$/, "");
|
||||
candidate = candidate.replace(/\[\^[^\]]*\]$/, "");
|
||||
candidate = stripTrailingPunctuation(candidate);
|
||||
if (!options.keepTrailingParens) {
|
||||
candidate = trimUnbalancedTrailing(candidate, "(", ")");
|
||||
}
|
||||
@@ -201,6 +213,7 @@ async function collectMarkdownLinksFromStream(stream) {
|
||||
lineNumber++;
|
||||
const trimmed = line.trim();
|
||||
|
||||
// Skip YAML front matter entirely; only scan Markdown content
|
||||
if (lineNumber === 1 && trimmed === "---") {
|
||||
inFrontMatter = true;
|
||||
continue;
|
||||
@@ -208,11 +221,8 @@ async function collectMarkdownLinksFromStream(stream) {
|
||||
if (inFrontMatter) {
|
||||
if (trimmed === "---") {
|
||||
inFrontMatter = false;
|
||||
continue;
|
||||
}
|
||||
if (trimmed.startsWith("#")) {
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const url of extractLinksFromText(line)) {
|
||||
|
||||
@@ -24,7 +24,8 @@ test("collectMarkdownLinksFromStream preserves line numbers", async () => {
|
||||
"Markdown [link](https://docs.example.org/page(with more valid content)).",
|
||||
"Le **[baume du Canada](https://fr.wikipedia.org/wiki/Baume_du_Canada)**",
|
||||
"(_Theropoda [incertae sedis](https://fr.wikipedia.org/wiki/Incertae_sedis)_)",
|
||||
"[CDN](https://fr.wikipedia.org/wiki/Réseau_de_diffusion_de_contenu)[^2]."
|
||||
"[CDN](https://fr.wikipedia.org/wiki/Réseau_de_diffusion_de_contenu)[^2].",
|
||||
"(heu... [oui](https://github.com/opencart/opencart/tree/master/upload/system/storage/vendor)...)"
|
||||
].join("\n");
|
||||
const stream = Readable.from([content]);
|
||||
const links = await collectMarkdownLinksFromStream(stream);
|
||||
@@ -36,10 +37,11 @@ test("collectMarkdownLinksFromStream preserves line numbers", async () => {
|
||||
{ url: "https://fr.wikipedia.org/wiki/Baume_du_Canada", line: 6 },
|
||||
{ url: "https://fr.wikipedia.org/wiki/Incertae_sedis", line: 7 },
|
||||
{ url: "https://fr.wikipedia.org/wiki/Réseau_de_diffusion_de_contenu", line: 8 },
|
||||
{ url: "https://github.com/opencart/opencart/tree/master/upload/system/storage/vendor", line: 9 },
|
||||
]);
|
||||
});
|
||||
|
||||
test("collectMarkdownLinksFromStream ignores URLs in front matter comments", async () => {
|
||||
test("collectMarkdownLinksFromStream ignores URLs in front matter entirely", async () => {
|
||||
const content = [
|
||||
"---",
|
||||
"links:",
|
||||
@@ -51,7 +53,6 @@ test("collectMarkdownLinksFromStream ignores URLs in front matter comments", asy
|
||||
const stream = Readable.from([content]);
|
||||
const links = await collectMarkdownLinksFromStream(stream);
|
||||
assert.deepStrictEqual(links, [
|
||||
{ url: "https://included.example.com", line: 4 },
|
||||
{ url: "https://body.example.com", line: 6 },
|
||||
]);
|
||||
});
|
||||
|
||||
254
tools/update_external_links.js
Normal file
254
tools/update_external_links.js
Normal file
@@ -0,0 +1,254 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const util = require("util");
|
||||
const yaml = require("js-yaml");
|
||||
const readline = require("readline");
|
||||
const { execFile } = require("child_process");
|
||||
|
||||
const execFileAsync = util.promisify(execFile);
|
||||
|
||||
const SITE_ROOT = path.resolve(__dirname, "..");
|
||||
const CONFIG_PATH = path.join(__dirname, "config.json");
|
||||
|
||||
let config = {};
|
||||
if (fs.existsSync(CONFIG_PATH)) {
|
||||
try {
|
||||
config = JSON.parse(fs.readFileSync(CONFIG_PATH, "utf8"));
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Impossible de parser ${path.relative(
|
||||
SITE_ROOT,
|
||||
CONFIG_PATH
|
||||
)}. Valeurs par défaut utilisées. (${error.message})`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const externalConfig = {
|
||||
cacheDir: path.join(__dirname, "cache"),
|
||||
cacheFile: "external_links.yaml",
|
||||
...(config.externalLinks || {}),
|
||||
};
|
||||
|
||||
const CACHE_DIR = path.isAbsolute(externalConfig.cacheDir)
|
||||
? externalConfig.cacheDir
|
||||
: path.resolve(SITE_ROOT, externalConfig.cacheDir);
|
||||
const CACHE_PATH = path.isAbsolute(externalConfig.cacheFile)
|
||||
? externalConfig.cacheFile
|
||||
: path.join(CACHE_DIR, externalConfig.cacheFile);
|
||||
|
||||
function ensureDirectoryExists(targetFile) {
|
||||
fs.mkdirSync(path.dirname(targetFile), { recursive: true });
|
||||
}
|
||||
|
||||
function loadCache() {
|
||||
if (!fs.existsSync(CACHE_PATH)) return {};
|
||||
try {
|
||||
return yaml.load(fs.readFileSync(CACHE_PATH, "utf8")) || {};
|
||||
} catch (e) {
|
||||
console.error("Erreur de lecture du cache YAML:", e.message);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function saveCache(cache) {
|
||||
ensureDirectoryExists(CACHE_PATH);
|
||||
fs.writeFileSync(CACHE_PATH, yaml.dump(cache), "utf8");
|
||||
}
|
||||
|
||||
function promptFactory() {
|
||||
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
||||
const question = (q) =>
|
||||
new Promise((resolve) => rl.question(q, (ans) => resolve(ans.trim())));
|
||||
return {
|
||||
async ask(q) {
|
||||
return await question(q);
|
||||
},
|
||||
close() {
|
||||
rl.close();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function ensureCheckRanIfNeeded() {
|
||||
if (fs.existsSync(CACHE_PATH)) return;
|
||||
console.log(
|
||||
"Cache introuvable. Exécution préalable de tools/check_external_links.js..."
|
||||
);
|
||||
await execFileAsync("node", [path.join(__dirname, "check_external_links.js")], {
|
||||
cwd: SITE_ROOT,
|
||||
env: process.env,
|
||||
});
|
||||
}
|
||||
|
||||
function listBrokenUrls(cache) {
|
||||
const result = [];
|
||||
for (const [url, info] of Object.entries(cache)) {
|
||||
const status = info && typeof info.status === "number" ? info.status : null;
|
||||
const killed = info && info.manually_killed === true;
|
||||
const validated = info && info.manually_validated === true;
|
||||
if (killed) continue; // on ne traite plus ces URL
|
||||
if (validated) continue; // déjà validé manuellement
|
||||
if (status !== null && (status >= 400 || status === 0)) {
|
||||
result.push({ url, info });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function getFilesForUrl(info) {
|
||||
let files = [];
|
||||
if (Array.isArray(info?.files) && info.files.length > 0) {
|
||||
files = info.files;
|
||||
} else if (Array.isArray(info?.locations) && info.locations.length > 0) {
|
||||
files = Array.from(new Set(info.locations.map((s) => String(s).split(":")[0])));
|
||||
}
|
||||
return files.map((p) => path.resolve(SITE_ROOT, p));
|
||||
}
|
||||
|
||||
function replaceInFile(filePath, from, to) {
|
||||
if (!fs.existsSync(filePath)) return { changed: false };
|
||||
const original = fs.readFileSync(filePath, "utf8");
|
||||
if (!original.includes(from)) return { changed: false };
|
||||
const updated = original.split(from).join(to);
|
||||
if (updated !== original) {
|
||||
fs.writeFileSync(filePath, updated, "utf8");
|
||||
return { changed: true };
|
||||
}
|
||||
return { changed: false };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await ensureCheckRanIfNeeded();
|
||||
let cache = loadCache();
|
||||
|
||||
const broken = listBrokenUrls(cache);
|
||||
if (broken.length === 0) {
|
||||
console.log("Aucun lien en erreur (>= 400) à traiter.");
|
||||
return;
|
||||
}
|
||||
|
||||
const p = promptFactory();
|
||||
try {
|
||||
for (const { url, info } of broken) {
|
||||
const statusLabel = typeof info.status === "number" ? String(info.status) : "inconnu";
|
||||
const locations = Array.isArray(info.locations) ? info.locations : [];
|
||||
const files = Array.isArray(info.files) ? info.files : Array.from(new Set(locations.map((s) => String(s).split(":")[0])));
|
||||
console.log("\nURL: ", url);
|
||||
console.log("Statut: ", statusLabel);
|
||||
if (locations.length > 0) {
|
||||
console.log("Emplacements:");
|
||||
for (const loc of locations) console.log(" - ", loc);
|
||||
} else if (files.length > 0) {
|
||||
console.log("Emplacements:");
|
||||
for (const f of files) console.log(" - ", `${f}:?`);
|
||||
} else {
|
||||
console.log("Fichiers: (aucun chemin enregistré)");
|
||||
}
|
||||
|
||||
const choice = (
|
||||
await p.ask(
|
||||
"Action ? [i]gnorer, [c]onfirmer, [r]emplacer, [m]ort, [q]uitter (défaut: i) : "
|
||||
)
|
||||
).toLowerCase() || "i";
|
||||
|
||||
if (choice === "q") {
|
||||
console.log("Arrêt demandé.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (choice === "i") {
|
||||
// Ignorer
|
||||
continue;
|
||||
}
|
||||
|
||||
if (choice === "c") {
|
||||
const nowIso = new Date().toISOString();
|
||||
cache[url] = {
|
||||
...(cache[url] || {}),
|
||||
manually_validated: true,
|
||||
manually_killed: cache[url]?.manually_killed === true,
|
||||
status: 200,
|
||||
errorType: null,
|
||||
method: "MANUAL",
|
||||
checked: nowIso,
|
||||
};
|
||||
saveCache(cache);
|
||||
console.log("Marqué comme validé manuellement.");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (choice === "m") {
|
||||
cache[url] = {
|
||||
...(cache[url] || {}),
|
||||
manually_killed: true,
|
||||
manually_validated: cache[url]?.manually_validated === true,
|
||||
status: cache[url]?.status ?? null,
|
||||
errorType: cache[url]?.errorType ?? null,
|
||||
method: cache[url]?.method ?? null,
|
||||
};
|
||||
saveCache(cache);
|
||||
console.log("Marqué comme mort (plus jamais retesté).");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (choice === "r") {
|
||||
if (!(Array.isArray(files) && files.length > 0)) {
|
||||
console.log(
|
||||
"Impossible de remplacer: aucun fichier enregistré pour cet URL. Relancez d'abord tools/check_external_links.js."
|
||||
);
|
||||
continue;
|
||||
}
|
||||
const newUrl = await p.ask("Nouvel URL: ");
|
||||
if (!newUrl || !newUrl.includes("://")) {
|
||||
console.log("URL invalide, opération annulée.");
|
||||
continue;
|
||||
}
|
||||
// Remplacements dans les fichiers listés
|
||||
let changedFiles = 0;
|
||||
for (const rel of files) {
|
||||
const abs = path.resolve(SITE_ROOT, rel);
|
||||
const { changed } = replaceInFile(abs, url, newUrl);
|
||||
if (changed) changedFiles++;
|
||||
}
|
||||
console.log(`Remplacements effectués dans ${changedFiles} fichier(s).`);
|
||||
|
||||
// Mettre à jour la base: déplacer l'entrée vers la nouvelle clé
|
||||
const oldEntry = cache[url] || {};
|
||||
const newEntryExisting = cache[newUrl] || {};
|
||||
cache[newUrl] = {
|
||||
...newEntryExisting,
|
||||
files: Array.isArray(oldEntry.files) ? [...oldEntry.files] : files,
|
||||
locations: Array.isArray(oldEntry.locations)
|
||||
? [...oldEntry.locations]
|
||||
: Array.isArray(oldEntry.files)
|
||||
? oldEntry.files.map((f) => `${f}:?`)
|
||||
: Array.isArray(locations)
|
||||
? [...locations]
|
||||
: [],
|
||||
manually_validated: false,
|
||||
manually_killed: false,
|
||||
status: null,
|
||||
errorType: null,
|
||||
method: newEntryExisting.method || null,
|
||||
checked: null,
|
||||
};
|
||||
delete cache[url];
|
||||
saveCache(cache);
|
||||
console.log("Base mise à jour pour le nouvel URL.");
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log("Choix non reconnu. Ignoré.");
|
||||
}
|
||||
} finally {
|
||||
p.close();
|
||||
}
|
||||
|
||||
console.log("\nTerminé. Vous pouvez relancer 'node tools/check_external_links.js' pour mettre à jour les statuts.");
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Erreur:", err);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
Reference in New Issue
Block a user