1

Marquage des liens considérés comme définitivement morts

This commit is contained in:
2025-11-01 16:25:04 +01:00
parent 890c95a450
commit 0f272f94a1
9 changed files with 485 additions and 13 deletions

450
tools/mark_dead_links.js Normal file
View File

@@ -0,0 +1,450 @@
#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const yaml = require("js-yaml");
const SITE_ROOT = path.resolve(__dirname, "..");
const CONFIG_PATH = path.join(__dirname, "config.json");
function loadConfig() {
if (!fs.existsSync(CONFIG_PATH)) {
return {};
}
try {
return JSON.parse(fs.readFileSync(CONFIG_PATH, "utf8"));
} catch (error) {
console.warn(
`Impossible de parser ${path.relative(SITE_ROOT, CONFIG_PATH)} (${error.message}).`
);
return {};
}
}
const config = loadConfig();
const externalConfig = {
cacheDir: path.join(__dirname, "cache"),
cacheFile: "external_links.yaml",
...(config.externalLinks || {}),
};
const CACHE_DIR = path.isAbsolute(externalConfig.cacheDir)
? externalConfig.cacheDir
: path.resolve(SITE_ROOT, externalConfig.cacheDir);
const CACHE_PATH = path.isAbsolute(externalConfig.cacheFile)
? externalConfig.cacheFile
: path.join(CACHE_DIR, externalConfig.cacheFile);
function loadCache(cachePath) {
if (!fs.existsSync(cachePath)) {
return {};
}
try {
return yaml.load(fs.readFileSync(cachePath, "utf8")) || {};
} catch (error) {
console.error(`Erreur lors de la lecture du cache YAML (${error.message}).`);
return {};
}
}
function getCheckedDate(info) {
if (info && typeof info.checked === "string") {
const parsed = new Date(info.checked);
if (!Number.isNaN(parsed.valueOf())) {
return parsed.toISOString();
}
}
return new Date().toISOString();
}
function getStatusCode(info) {
if (info && typeof info.status === "number") {
return info.status;
}
return null;
}
const frenchDateFormatter = new Intl.DateTimeFormat("fr-FR", {
day: "numeric",
month: "long",
year: "numeric",
});
function formatDisplayDate(isoString) {
if (typeof isoString === "string") {
const parsed = new Date(isoString);
if (!Number.isNaN(parsed.valueOf())) {
return frenchDateFormatter.format(parsed);
}
}
return frenchDateFormatter.format(new Date());
}
function getFilesForUrl(info) {
if (!info) return [];
if (Array.isArray(info.files) && info.files.length > 0) {
return info.files;
}
if (Array.isArray(info.locations) && info.locations.length > 0) {
return Array.from(new Set(info.locations.map((entry) => String(entry).split(":")[0])));
}
return [];
}
function splitFrontmatter(content) {
if (!content.startsWith("---")) {
return null;
}
const match = content.match(/^---\n([\s\S]*?)\n---\n?/);
if (!match) {
return null;
}
const frontmatterText = match[1];
let frontmatter = {};
try {
frontmatter = yaml.load(frontmatterText) || {};
} catch (error) {
console.error(`Frontmatter YAML invalide (${error.message}).`);
return null;
}
const block = match[0];
const body = content.slice(block.length);
return { frontmatter, block, body };
}
function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function ensureTrailingNewline(value) {
if (!value.endsWith("\n")) {
return `${value}\n`;
}
return value;
}
function ensureBlankLineBeforeAppend(body) {
if (body.endsWith("\n\n")) {
return body;
}
if (body.endsWith("\n")) {
return `${body}\n`;
}
return `${body}\n\n`;
}
function markInterestingLink(filePath, url, info) {
const original = fs.readFileSync(filePath, "utf8");
const parsed = splitFrontmatter(original);
if (!parsed) {
console.warn(`Frontmatter introuvable pour ${path.relative(SITE_ROOT, filePath)}, ignoré.`);
return { changed: false };
}
const { frontmatter } = parsed;
let body = parsed.body;
const checkedDate = getCheckedDate(info);
const displayDate = formatDisplayDate(checkedDate);
const httpCode = getStatusCode(info);
let changed = false;
if (typeof frontmatter.title === "string" && !frontmatter.title.startsWith("[Lien mort]")) {
frontmatter.title = `[Lien mort] ${frontmatter.title}`;
changed = true;
}
let statusEntries = [];
if (Array.isArray(frontmatter.status)) {
statusEntries = [...frontmatter.status];
}
let statusEntry = statusEntries.find(
(entry) => entry && typeof entry === "object" && entry.date === checkedDate
);
if (!statusEntry) {
statusEntry = { date: checkedDate, http_code: httpCode };
statusEntries.push(statusEntry);
changed = true;
} else if (statusEntry.http_code !== httpCode) {
statusEntry.http_code = httpCode;
changed = true;
}
frontmatter.status = statusEntries;
const noteLine = `> Lien inaccessible depuis le ${displayDate}`;
const noteRegex = /(>\s*Lien inaccessible depuis le\s+)([^\n]+)/;
const existing = body.match(noteRegex);
if (existing) {
const current = existing[2].trim();
if (current !== displayDate) {
body = body.replace(noteRegex, `> Lien inaccessible depuis le ${displayDate}`);
changed = true;
}
} else {
body = ensureBlankLineBeforeAppend(body);
body += `${noteLine}\n`;
changed = true;
}
if (!changed) {
return { changed: false };
}
const newFrontmatter = yaml.dump(frontmatter);
const updatedContent = `---\n${newFrontmatter}---\n${body}`;
if (updatedContent === original) {
return { changed: false };
}
fs.writeFileSync(filePath, updatedContent, "utf8");
return { changed: true };
}
function collectDeadlinkMaxId(body) {
let maxId = 0;
const regex = /\[\^deadlink-(\d+)\]/g;
let match;
while ((match = regex.exec(body)) !== null) {
const value = parseInt(match[1], 10);
if (Number.isInteger(value) && value > maxId) {
maxId = value;
}
}
return maxId;
}
function findExistingDeadlinkReference(line, url) {
if (!line.includes(url)) return null;
const escapedUrl = escapeRegExp(url);
const markdownRegex = new RegExp(`\\[[^\\]]*\\]\\(${escapedUrl}\\)`);
const angleRegex = new RegExp(`<${escapedUrl}>`);
let referenceId = null;
const searchers = [
{ regex: markdownRegex },
{ regex: angleRegex },
];
for (const { regex } of searchers) {
const match = regex.exec(line);
if (!match) continue;
const start = match.index;
const end = start + match[0].length;
const tail = line.slice(end);
const footnoteMatch = tail.match(/^([\s)*_~`]*?)\[\^deadlink-(\d+)\]/);
if (footnoteMatch) {
referenceId = `deadlink-${footnoteMatch[2]}`;
break;
}
}
return referenceId;
}
function insertDeadlinkReference(line, url, nextId) {
const escapedUrl = escapeRegExp(url);
const markdownRegex = new RegExp(`\\[[^\\]]*\\]\\(${escapedUrl}\\)`);
const angleRegex = new RegExp(`<${escapedUrl}>`);
const footnoteRef = `[^deadlink-${nextId}]`;
const markdownMatch = markdownRegex.exec(line);
if (markdownMatch) {
const end = markdownMatch.index + markdownMatch[0].length;
let insertPos = end;
while (insertPos < line.length && /[*_]/.test(line[insertPos])) {
insertPos += 1;
}
return line.slice(0, insertPos) + ' ' + footnoteRef + line.slice(insertPos);
}
const angleMatch = angleRegex.exec(line);
if (angleMatch) {
const end = angleMatch.index + angleMatch[0].length;
return line.slice(0, end) + footnoteRef + line.slice(end);
}
return null;
}
function upsertFootnoteDefinition(body, footnoteId, isoDate) {
const displayDate = formatDisplayDate(isoDate);
const desired = `Lien inaccessible depuis le ${displayDate}`;
const definitionRegex = new RegExp(`^\\[\\^${footnoteId}\\]:\\s*(.+)$`, "m");
const match = definitionRegex.exec(body);
if (match) {
if (match[1].trim() !== desired) {
return {
body: body.replace(definitionRegex, `[^${footnoteId}]: ${desired}`),
changed: true,
};
}
return { body, changed: false };
}
let updated = ensureTrailingNewline(body);
updated = ensureBlankLineBeforeAppend(updated);
updated += `[^${footnoteId}]: ${desired}\n`;
return { body: updated, changed: true };
}
function markMarkdownLink(filePath, url, info) {
const original = fs.readFileSync(filePath, "utf8");
const parsed = splitFrontmatter(original);
const hasFrontmatter = Boolean(parsed);
const block = parsed?.block ?? "";
const bodyOriginal = parsed ? parsed.body : original;
const lines = bodyOriginal.split("\n");
let inFence = false;
let fenceChar = null;
let referenceId = null;
let changed = false;
let maxId = collectDeadlinkMaxId(bodyOriginal);
for (let i = 0; i < lines.length; i += 1) {
const line = lines[i];
const trimmed = line.trimStart();
const fenceMatch = trimmed.match(/^([`~]{3,})/);
if (fenceMatch) {
const currentFenceChar = fenceMatch[1][0];
if (!inFence) {
inFence = true;
fenceChar = currentFenceChar;
continue;
}
if (fenceChar === currentFenceChar) {
inFence = false;
fenceChar = null;
continue;
}
}
if (inFence) {
continue;
}
if (!line.includes(url)) {
continue;
}
const existingRef = findExistingDeadlinkReference(line, url);
if (existingRef) {
referenceId = existingRef;
break;
}
const nextId = maxId + 1;
const updatedLine = insertDeadlinkReference(line, url, nextId);
if (updatedLine) {
lines[i] = updatedLine;
referenceId = `deadlink-${nextId}`;
maxId = nextId;
changed = true;
break;
}
}
if (!referenceId) {
return { changed: false };
}
let body = lines.join("\n");
const { body: updatedBody, changed: definitionChanged } = upsertFootnoteDefinition(
body,
referenceId,
getCheckedDate(info)
);
body = updatedBody;
if (definitionChanged) {
changed = true;
}
if (!changed) {
return { changed: false };
}
const updatedContent = hasFrontmatter ? `${block}${body}` : body;
if (updatedContent === original) {
return { changed: false };
}
fs.writeFileSync(filePath, updatedContent, "utf8");
return { changed: true };
}
function processFile(absolutePath, url, info) {
if (!fs.existsSync(absolutePath)) {
console.warn(`Fichier introuvable: ${absolutePath}`);
return { changed: false };
}
const relative = path.relative(SITE_ROOT, absolutePath);
if (relative.startsWith("content/interets/liens-interessants/")) {
return markInterestingLink(absolutePath, url, info);
}
if (path.extname(relative).toLowerCase() === ".md") {
return markMarkdownLink(absolutePath, url, info);
}
return { changed: false };
}
function main() {
if (!fs.existsSync(CACHE_PATH)) {
console.error("Cache introuvable. Exécutez d'abord tools/check_external_links.js.");
process.exit(1);
}
const cache = loadCache(CACHE_PATH);
const entries = Object.entries(cache).filter(
([, info]) => info && info.manually_killed === true
);
if (entries.length === 0) {
console.log("Aucun lien marqué comme mort manuellement dans le cache.");
return;
}
let updates = 0;
let warnings = 0;
for (const [url, info] of entries) {
const files = getFilesForUrl(info);
if (files.length === 0) {
console.warn(`Aucun fichier associé à ${url}.`);
warnings += 1;
continue;
}
for (const relativePath of files) {
const absolutePath = path.isAbsolute(relativePath)
? relativePath
: path.resolve(SITE_ROOT, relativePath);
try {
const { changed } = processFile(absolutePath, url, info);
if (changed) {
updates += 1;
console.log(
`${path.relative(SITE_ROOT, absolutePath)} mis à jour pour ${url}`
);
}
} catch (error) {
warnings += 1;
console.error(
`Erreur lors du traitement de ${path.relative(SITE_ROOT, absolutePath)} (${error.message}).`
);
}
}
}
if (updates === 0) {
console.log("Aucune modification nécessaire.");
} else {
console.log(`${updates} fichier(s) mis à jour.`);
}
if (warnings > 0) {
console.warn(`${warnings} fichier(s) n'ont pas pu être traités complètement.`);
}
}
if (require.main === module) {
main();
}