Marquage des liens considérés comme définitivement morts
This commit is contained in:
450
tools/mark_dead_links.js
Normal file
450
tools/mark_dead_links.js
Normal file
@@ -0,0 +1,450 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const yaml = require("js-yaml");
|
||||
|
||||
const SITE_ROOT = path.resolve(__dirname, "..");
|
||||
const CONFIG_PATH = path.join(__dirname, "config.json");
|
||||
|
||||
function loadConfig() {
|
||||
if (!fs.existsSync(CONFIG_PATH)) {
|
||||
return {};
|
||||
}
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(CONFIG_PATH, "utf8"));
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Impossible de parser ${path.relative(SITE_ROOT, CONFIG_PATH)} (${error.message}).`
|
||||
);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
const config = loadConfig();
|
||||
const externalConfig = {
|
||||
cacheDir: path.join(__dirname, "cache"),
|
||||
cacheFile: "external_links.yaml",
|
||||
...(config.externalLinks || {}),
|
||||
};
|
||||
|
||||
const CACHE_DIR = path.isAbsolute(externalConfig.cacheDir)
|
||||
? externalConfig.cacheDir
|
||||
: path.resolve(SITE_ROOT, externalConfig.cacheDir);
|
||||
const CACHE_PATH = path.isAbsolute(externalConfig.cacheFile)
|
||||
? externalConfig.cacheFile
|
||||
: path.join(CACHE_DIR, externalConfig.cacheFile);
|
||||
|
||||
function loadCache(cachePath) {
|
||||
if (!fs.existsSync(cachePath)) {
|
||||
return {};
|
||||
}
|
||||
try {
|
||||
return yaml.load(fs.readFileSync(cachePath, "utf8")) || {};
|
||||
} catch (error) {
|
||||
console.error(`Erreur lors de la lecture du cache YAML (${error.message}).`);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function getCheckedDate(info) {
|
||||
if (info && typeof info.checked === "string") {
|
||||
const parsed = new Date(info.checked);
|
||||
if (!Number.isNaN(parsed.valueOf())) {
|
||||
return parsed.toISOString();
|
||||
}
|
||||
}
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function getStatusCode(info) {
|
||||
if (info && typeof info.status === "number") {
|
||||
return info.status;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const frenchDateFormatter = new Intl.DateTimeFormat("fr-FR", {
|
||||
day: "numeric",
|
||||
month: "long",
|
||||
year: "numeric",
|
||||
});
|
||||
|
||||
function formatDisplayDate(isoString) {
|
||||
if (typeof isoString === "string") {
|
||||
const parsed = new Date(isoString);
|
||||
if (!Number.isNaN(parsed.valueOf())) {
|
||||
return frenchDateFormatter.format(parsed);
|
||||
}
|
||||
}
|
||||
return frenchDateFormatter.format(new Date());
|
||||
}
|
||||
|
||||
function getFilesForUrl(info) {
|
||||
if (!info) return [];
|
||||
if (Array.isArray(info.files) && info.files.length > 0) {
|
||||
return info.files;
|
||||
}
|
||||
if (Array.isArray(info.locations) && info.locations.length > 0) {
|
||||
return Array.from(new Set(info.locations.map((entry) => String(entry).split(":")[0])));
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function splitFrontmatter(content) {
|
||||
if (!content.startsWith("---")) {
|
||||
return null;
|
||||
}
|
||||
const match = content.match(/^---\n([\s\S]*?)\n---\n?/);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
const frontmatterText = match[1];
|
||||
let frontmatter = {};
|
||||
try {
|
||||
frontmatter = yaml.load(frontmatterText) || {};
|
||||
} catch (error) {
|
||||
console.error(`Frontmatter YAML invalide (${error.message}).`);
|
||||
return null;
|
||||
}
|
||||
const block = match[0];
|
||||
const body = content.slice(block.length);
|
||||
return { frontmatter, block, body };
|
||||
}
|
||||
|
||||
function escapeRegExp(value) {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
function ensureTrailingNewline(value) {
|
||||
if (!value.endsWith("\n")) {
|
||||
return `${value}\n`;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function ensureBlankLineBeforeAppend(body) {
|
||||
if (body.endsWith("\n\n")) {
|
||||
return body;
|
||||
}
|
||||
if (body.endsWith("\n")) {
|
||||
return `${body}\n`;
|
||||
}
|
||||
return `${body}\n\n`;
|
||||
}
|
||||
|
||||
function markInterestingLink(filePath, url, info) {
|
||||
const original = fs.readFileSync(filePath, "utf8");
|
||||
const parsed = splitFrontmatter(original);
|
||||
if (!parsed) {
|
||||
console.warn(`Frontmatter introuvable pour ${path.relative(SITE_ROOT, filePath)}, ignoré.`);
|
||||
return { changed: false };
|
||||
}
|
||||
|
||||
const { frontmatter } = parsed;
|
||||
let body = parsed.body;
|
||||
const checkedDate = getCheckedDate(info);
|
||||
const displayDate = formatDisplayDate(checkedDate);
|
||||
const httpCode = getStatusCode(info);
|
||||
let changed = false;
|
||||
|
||||
if (typeof frontmatter.title === "string" && !frontmatter.title.startsWith("[Lien mort]")) {
|
||||
frontmatter.title = `[Lien mort] ${frontmatter.title}`;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
let statusEntries = [];
|
||||
if (Array.isArray(frontmatter.status)) {
|
||||
statusEntries = [...frontmatter.status];
|
||||
}
|
||||
|
||||
let statusEntry = statusEntries.find(
|
||||
(entry) => entry && typeof entry === "object" && entry.date === checkedDate
|
||||
);
|
||||
if (!statusEntry) {
|
||||
statusEntry = { date: checkedDate, http_code: httpCode };
|
||||
statusEntries.push(statusEntry);
|
||||
changed = true;
|
||||
} else if (statusEntry.http_code !== httpCode) {
|
||||
statusEntry.http_code = httpCode;
|
||||
changed = true;
|
||||
}
|
||||
frontmatter.status = statusEntries;
|
||||
|
||||
const noteLine = `> Lien inaccessible depuis le ${displayDate}`;
|
||||
const noteRegex = /(>\s*Lien inaccessible depuis le\s+)([^\n]+)/;
|
||||
const existing = body.match(noteRegex);
|
||||
if (existing) {
|
||||
const current = existing[2].trim();
|
||||
if (current !== displayDate) {
|
||||
body = body.replace(noteRegex, `> Lien inaccessible depuis le ${displayDate}`);
|
||||
changed = true;
|
||||
}
|
||||
} else {
|
||||
body = ensureBlankLineBeforeAppend(body);
|
||||
body += `${noteLine}\n`;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return { changed: false };
|
||||
}
|
||||
|
||||
const newFrontmatter = yaml.dump(frontmatter);
|
||||
const updatedContent = `---\n${newFrontmatter}---\n${body}`;
|
||||
if (updatedContent === original) {
|
||||
return { changed: false };
|
||||
}
|
||||
fs.writeFileSync(filePath, updatedContent, "utf8");
|
||||
return { changed: true };
|
||||
}
|
||||
|
||||
function collectDeadlinkMaxId(body) {
|
||||
let maxId = 0;
|
||||
const regex = /\[\^deadlink-(\d+)\]/g;
|
||||
let match;
|
||||
while ((match = regex.exec(body)) !== null) {
|
||||
const value = parseInt(match[1], 10);
|
||||
if (Number.isInteger(value) && value > maxId) {
|
||||
maxId = value;
|
||||
}
|
||||
}
|
||||
return maxId;
|
||||
}
|
||||
|
||||
function findExistingDeadlinkReference(line, url) {
|
||||
if (!line.includes(url)) return null;
|
||||
const escapedUrl = escapeRegExp(url);
|
||||
const markdownRegex = new RegExp(`\\[[^\\]]*\\]\\(${escapedUrl}\\)`);
|
||||
const angleRegex = new RegExp(`<${escapedUrl}>`);
|
||||
|
||||
let referenceId = null;
|
||||
|
||||
const searchers = [
|
||||
{ regex: markdownRegex },
|
||||
{ regex: angleRegex },
|
||||
];
|
||||
|
||||
for (const { regex } of searchers) {
|
||||
const match = regex.exec(line);
|
||||
if (!match) continue;
|
||||
const start = match.index;
|
||||
const end = start + match[0].length;
|
||||
const tail = line.slice(end);
|
||||
const footnoteMatch = tail.match(/^([\s)*_~`]*?)\[\^deadlink-(\d+)\]/);
|
||||
if (footnoteMatch) {
|
||||
referenceId = `deadlink-${footnoteMatch[2]}`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return referenceId;
|
||||
}
|
||||
|
||||
function insertDeadlinkReference(line, url, nextId) {
|
||||
const escapedUrl = escapeRegExp(url);
|
||||
const markdownRegex = new RegExp(`\\[[^\\]]*\\]\\(${escapedUrl}\\)`);
|
||||
const angleRegex = new RegExp(`<${escapedUrl}>`);
|
||||
|
||||
const footnoteRef = `[^deadlink-${nextId}]`;
|
||||
|
||||
const markdownMatch = markdownRegex.exec(line);
|
||||
if (markdownMatch) {
|
||||
const end = markdownMatch.index + markdownMatch[0].length;
|
||||
let insertPos = end;
|
||||
while (insertPos < line.length && /[*_]/.test(line[insertPos])) {
|
||||
insertPos += 1;
|
||||
}
|
||||
return line.slice(0, insertPos) + ' ' + footnoteRef + line.slice(insertPos);
|
||||
}
|
||||
|
||||
const angleMatch = angleRegex.exec(line);
|
||||
if (angleMatch) {
|
||||
const end = angleMatch.index + angleMatch[0].length;
|
||||
return line.slice(0, end) + footnoteRef + line.slice(end);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function upsertFootnoteDefinition(body, footnoteId, isoDate) {
|
||||
const displayDate = formatDisplayDate(isoDate);
|
||||
const desired = `Lien inaccessible depuis le ${displayDate}`;
|
||||
const definitionRegex = new RegExp(`^\\[\\^${footnoteId}\\]:\\s*(.+)$`, "m");
|
||||
const match = definitionRegex.exec(body);
|
||||
if (match) {
|
||||
if (match[1].trim() !== desired) {
|
||||
return {
|
||||
body: body.replace(definitionRegex, `[^${footnoteId}]: ${desired}`),
|
||||
changed: true,
|
||||
};
|
||||
}
|
||||
return { body, changed: false };
|
||||
}
|
||||
let updated = ensureTrailingNewline(body);
|
||||
updated = ensureBlankLineBeforeAppend(updated);
|
||||
updated += `[^${footnoteId}]: ${desired}\n`;
|
||||
return { body: updated, changed: true };
|
||||
}
|
||||
|
||||
function markMarkdownLink(filePath, url, info) {
|
||||
const original = fs.readFileSync(filePath, "utf8");
|
||||
const parsed = splitFrontmatter(original);
|
||||
const hasFrontmatter = Boolean(parsed);
|
||||
const block = parsed?.block ?? "";
|
||||
const bodyOriginal = parsed ? parsed.body : original;
|
||||
|
||||
const lines = bodyOriginal.split("\n");
|
||||
let inFence = false;
|
||||
let fenceChar = null;
|
||||
let referenceId = null;
|
||||
let changed = false;
|
||||
let maxId = collectDeadlinkMaxId(bodyOriginal);
|
||||
|
||||
for (let i = 0; i < lines.length; i += 1) {
|
||||
const line = lines[i];
|
||||
|
||||
const trimmed = line.trimStart();
|
||||
const fenceMatch = trimmed.match(/^([`~]{3,})/);
|
||||
if (fenceMatch) {
|
||||
const currentFenceChar = fenceMatch[1][0];
|
||||
if (!inFence) {
|
||||
inFence = true;
|
||||
fenceChar = currentFenceChar;
|
||||
continue;
|
||||
}
|
||||
if (fenceChar === currentFenceChar) {
|
||||
inFence = false;
|
||||
fenceChar = null;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (inFence) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!line.includes(url)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existingRef = findExistingDeadlinkReference(line, url);
|
||||
if (existingRef) {
|
||||
referenceId = existingRef;
|
||||
break;
|
||||
}
|
||||
|
||||
const nextId = maxId + 1;
|
||||
const updatedLine = insertDeadlinkReference(line, url, nextId);
|
||||
if (updatedLine) {
|
||||
lines[i] = updatedLine;
|
||||
referenceId = `deadlink-${nextId}`;
|
||||
maxId = nextId;
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!referenceId) {
|
||||
return { changed: false };
|
||||
}
|
||||
|
||||
let body = lines.join("\n");
|
||||
const { body: updatedBody, changed: definitionChanged } = upsertFootnoteDefinition(
|
||||
body,
|
||||
referenceId,
|
||||
getCheckedDate(info)
|
||||
);
|
||||
|
||||
body = updatedBody;
|
||||
if (definitionChanged) {
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return { changed: false };
|
||||
}
|
||||
|
||||
const updatedContent = hasFrontmatter ? `${block}${body}` : body;
|
||||
if (updatedContent === original) {
|
||||
return { changed: false };
|
||||
}
|
||||
fs.writeFileSync(filePath, updatedContent, "utf8");
|
||||
return { changed: true };
|
||||
}
|
||||
|
||||
function processFile(absolutePath, url, info) {
|
||||
if (!fs.existsSync(absolutePath)) {
|
||||
console.warn(`Fichier introuvable: ${absolutePath}`);
|
||||
return { changed: false };
|
||||
}
|
||||
const relative = path.relative(SITE_ROOT, absolutePath);
|
||||
if (relative.startsWith("content/interets/liens-interessants/")) {
|
||||
return markInterestingLink(absolutePath, url, info);
|
||||
}
|
||||
if (path.extname(relative).toLowerCase() === ".md") {
|
||||
return markMarkdownLink(absolutePath, url, info);
|
||||
}
|
||||
return { changed: false };
|
||||
}
|
||||
|
||||
function main() {
|
||||
if (!fs.existsSync(CACHE_PATH)) {
|
||||
console.error("Cache introuvable. Exécutez d'abord tools/check_external_links.js.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cache = loadCache(CACHE_PATH);
|
||||
const entries = Object.entries(cache).filter(
|
||||
([, info]) => info && info.manually_killed === true
|
||||
);
|
||||
|
||||
if (entries.length === 0) {
|
||||
console.log("Aucun lien marqué comme mort manuellement dans le cache.");
|
||||
return;
|
||||
}
|
||||
|
||||
let updates = 0;
|
||||
let warnings = 0;
|
||||
|
||||
for (const [url, info] of entries) {
|
||||
const files = getFilesForUrl(info);
|
||||
if (files.length === 0) {
|
||||
console.warn(`Aucun fichier associé à ${url}.`);
|
||||
warnings += 1;
|
||||
continue;
|
||||
}
|
||||
for (const relativePath of files) {
|
||||
const absolutePath = path.isAbsolute(relativePath)
|
||||
? relativePath
|
||||
: path.resolve(SITE_ROOT, relativePath);
|
||||
try {
|
||||
const { changed } = processFile(absolutePath, url, info);
|
||||
if (changed) {
|
||||
updates += 1;
|
||||
console.log(
|
||||
`✅ ${path.relative(SITE_ROOT, absolutePath)} mis à jour pour ${url}`
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
warnings += 1;
|
||||
console.error(
|
||||
`Erreur lors du traitement de ${path.relative(SITE_ROOT, absolutePath)} (${error.message}).`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (updates === 0) {
|
||||
console.log("Aucune modification nécessaire.");
|
||||
} else {
|
||||
console.log(`${updates} fichier(s) mis à jour.`);
|
||||
}
|
||||
|
||||
if (warnings > 0) {
|
||||
console.warn(`${warnings} fichier(s) n'ont pas pu être traités complètement.`);
|
||||
}
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
Reference in New Issue
Block a user