816 lines
22 KiB
JavaScript
816 lines
22 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
const util = require("util");
|
|
const yaml = require("js-yaml");
|
|
const UserAgent = require("user-agents");
|
|
const { execFile } = require("child_process");
|
|
const {
|
|
collectMarkdownLinksFromFile,
|
|
extractLinksFromText,
|
|
} = require("./lib/markdown_links");
|
|
|
|
const execFileAsync = util.promisify(execFile);
|
|
|
|
const SITE_ROOT = path.resolve(__dirname, "..");
|
|
const CONTENT_DIR = path.join(SITE_ROOT, "content");
|
|
const CONFIG_PATH = path.join(__dirname, "config.json");
|
|
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
|
|
const DEFAULT_CONFIG = {
|
|
cacheDir: path.join(__dirname, "cache"),
|
|
cacheFile: "external_links.yaml",
|
|
hostDelayMs: 2000,
|
|
retryDelayMs: 5000,
|
|
requestTimeoutSeconds: 5,
|
|
cacheTtlSuccessDays: 30,
|
|
cacheTtlClientErrorDays: 7,
|
|
cacheTtlServerErrorDays: 1,
|
|
cacheTtlTimeoutDays: 7,
|
|
maxConcurrentHosts: 4,
|
|
userAgent: null,
|
|
enableCookies: true,
|
|
cookieJar: path.join(__dirname, "cache", "curl_cookies.txt"),
|
|
};
|
|
|
|
function loadConfig() {
|
|
if (!fs.existsSync(CONFIG_PATH)) {
|
|
return {};
|
|
}
|
|
try {
|
|
return JSON.parse(fs.readFileSync(CONFIG_PATH, "utf8"));
|
|
} catch (error) {
|
|
console.warn(
|
|
`Impossible de parser ${path.relative(SITE_ROOT, CONFIG_PATH)} (${error.message}).`
|
|
);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
const rawConfig = loadConfig();
|
|
const settings = {
|
|
...DEFAULT_CONFIG,
|
|
...(rawConfig.externalLinks || {}),
|
|
};
|
|
|
|
const CACHE_DIR = path.isAbsolute(settings.cacheDir)
|
|
? settings.cacheDir
|
|
: path.resolve(SITE_ROOT, settings.cacheDir);
|
|
const REPORT_PATH = path.isAbsolute(settings.cacheFile)
|
|
? settings.cacheFile
|
|
: path.join(CACHE_DIR, settings.cacheFile);
|
|
const COOKIE_JAR = settings.cookieJar
|
|
? path.isAbsolute(settings.cookieJar)
|
|
? settings.cookieJar
|
|
: path.resolve(SITE_ROOT, settings.cookieJar)
|
|
: path.join(CACHE_DIR, "curl_cookies.txt");
|
|
|
|
const HOST_DELAY_MS = Math.max(0, Number(settings.hostDelayMs) || 0);
|
|
const RETRY_DELAY_MS = Math.max(0, Number(settings.retryDelayMs) || 0);
|
|
const REQUEST_TIMEOUT_SECONDS = Math.max(1, Number(settings.requestTimeoutSeconds) || 5);
|
|
const MAX_CONCURRENT_HOSTS = Math.max(
|
|
1,
|
|
Number.isFinite(Number(settings.maxConcurrentHosts))
|
|
? Number(settings.maxConcurrentHosts)
|
|
: DEFAULT_CONFIG.maxConcurrentHosts
|
|
);
|
|
const DEFAULT_USER_AGENT =
|
|
typeof settings.userAgent === "string" && settings.userAgent.trim()
|
|
? settings.userAgent.trim()
|
|
: new UserAgent().toString();
|
|
const ENABLE_COOKIES = settings.enableCookies !== false;
|
|
|
|
const CACHE_TTL_SUCCESS_MS = daysToMs(
|
|
pickNumber(settings.cacheTtlSuccessDays, DEFAULT_CONFIG.cacheTtlSuccessDays)
|
|
);
|
|
const CACHE_TTL_CLIENT_ERROR_MS = daysToMs(
|
|
pickNumber(settings.cacheTtlClientErrorDays, DEFAULT_CONFIG.cacheTtlClientErrorDays)
|
|
);
|
|
const CACHE_TTL_SERVER_ERROR_MS = daysToMs(
|
|
pickNumber(settings.cacheTtlServerErrorDays, DEFAULT_CONFIG.cacheTtlServerErrorDays)
|
|
);
|
|
const CACHE_TTL_TIMEOUT_MS = daysToMs(
|
|
pickNumber(settings.cacheTtlTimeoutDays, DEFAULT_CONFIG.cacheTtlTimeoutDays)
|
|
);
|
|
|
|
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
|
if (ENABLE_COOKIES) {
|
|
fs.mkdirSync(path.dirname(COOKIE_JAR), { recursive: true });
|
|
if (!fs.existsSync(COOKIE_JAR)) {
|
|
fs.closeSync(fs.openSync(COOKIE_JAR, "a"));
|
|
}
|
|
}
|
|
|
|
function pickNumber(value, fallback) {
|
|
const parsed = Number(value);
|
|
if (Number.isFinite(parsed)) {
|
|
return parsed;
|
|
}
|
|
return fallback;
|
|
}
|
|
|
|
function daysToMs(days) {
|
|
if (!Number.isFinite(days) || days <= 0) {
|
|
return 0;
|
|
}
|
|
return days * DAY_MS;
|
|
}
|
|
|
|
function ensureDirectoryExists(targetFile) {
|
|
fs.mkdirSync(path.dirname(targetFile), { recursive: true });
|
|
}
|
|
|
|
function toPosix(relativePath) {
|
|
return typeof relativePath === "string" ? relativePath.split(path.sep).join("/") : relativePath;
|
|
}
|
|
|
|
function relativeToSite(filePath) {
|
|
return toPosix(path.relative(SITE_ROOT, filePath));
|
|
}
|
|
|
|
function toPagePath(relativeContentPath) {
|
|
if (!relativeContentPath) return null;
|
|
let normalized = toPosix(relativeContentPath);
|
|
if (!normalized) return null;
|
|
normalized = normalized.replace(/^content\//, "");
|
|
if (!normalized) {
|
|
return "/";
|
|
}
|
|
normalized = normalized.replace(/\/index\.md$/i, "");
|
|
normalized = normalized.replace(/\/_index\.md$/i, "");
|
|
normalized = normalized.replace(/\.md$/i, "");
|
|
normalized = normalized.replace(/\/+/g, "/");
|
|
normalized = normalized.replace(/\/+$/, "");
|
|
normalized = normalized.replace(/^\/+/, "");
|
|
if (!normalized) {
|
|
return "/";
|
|
}
|
|
return `/${normalized}`;
|
|
}
|
|
|
|
function deriveBundlePagePath(contentRelative) {
|
|
if (!contentRelative) return null;
|
|
const bundleRoot = contentRelative.replace(/\/data\/.*$/, "");
|
|
const candidates = [`${bundleRoot}/index.md`, `${bundleRoot}/_index.md`];
|
|
for (const candidate of candidates) {
|
|
const absolute = path.join(CONTENT_DIR, candidate);
|
|
if (fs.existsSync(absolute)) {
|
|
return toPagePath(candidate);
|
|
}
|
|
}
|
|
return toPagePath(bundleRoot);
|
|
}
|
|
|
|
function derivePagePath(relativeFile) {
|
|
if (typeof relativeFile !== "string") return null;
|
|
const normalized = toPosix(relativeFile);
|
|
if (!normalized || !normalized.startsWith("content/")) return null;
|
|
const contentRelative = normalized.slice("content/".length);
|
|
if (contentRelative.includes("/data/")) {
|
|
return deriveBundlePagePath(contentRelative);
|
|
}
|
|
return toPagePath(contentRelative);
|
|
}
|
|
|
|
function loadState() {
|
|
if (!fs.existsSync(REPORT_PATH)) {
|
|
return { generatedAt: null, links: [], entries: {} };
|
|
}
|
|
try {
|
|
const payload = yaml.load(fs.readFileSync(REPORT_PATH, "utf8")) || {};
|
|
if (payload.entries && typeof payload.entries === "object") {
|
|
return {
|
|
generatedAt: payload.generatedAt || null,
|
|
links: Array.isArray(payload.links) ? payload.links : [],
|
|
entries: normalizeEntries(payload.entries),
|
|
};
|
|
}
|
|
return {
|
|
generatedAt: payload.generatedAt || null,
|
|
links: Array.isArray(payload.links) ? payload.links : [],
|
|
entries: normalizeEntries(payload),
|
|
};
|
|
} catch (error) {
|
|
console.warn(
|
|
`Impossible de lire ${path.relative(SITE_ROOT, REPORT_PATH)} (${error.message}).`
|
|
);
|
|
return { generatedAt: null, links: [], entries: {} };
|
|
}
|
|
}
|
|
|
|
function normalizeEntries(rawEntries) {
|
|
const normalized = {};
|
|
if (!rawEntries || typeof rawEntries !== "object") {
|
|
return normalized;
|
|
}
|
|
for (const [url, data] of Object.entries(rawEntries)) {
|
|
if (!url.includes("://")) {
|
|
continue;
|
|
}
|
|
normalized[url] = normalizeEntryShape(url, data);
|
|
}
|
|
return normalized;
|
|
}
|
|
|
|
function normalizeEntryShape(url, raw) {
|
|
const checkedAt = raw?.checkedAt || raw?.checked || null;
|
|
const locations = normalizeLocations(raw?.locations, raw?.files);
|
|
return {
|
|
url,
|
|
status: typeof raw?.status === "number" ? raw.status : null,
|
|
errorType: raw?.errorType || null,
|
|
method: raw?.method || null,
|
|
checkedAt,
|
|
locations,
|
|
};
|
|
}
|
|
|
|
function normalizeLocations(locations, fallbackFiles) {
|
|
const items = [];
|
|
if (Array.isArray(locations)) {
|
|
for (const entry of locations) {
|
|
if (!entry) continue;
|
|
if (typeof entry === "string") {
|
|
const [filePart, linePart] = entry.split(":");
|
|
const filePath = toPosix(filePart.trim());
|
|
items.push({
|
|
file: filePath,
|
|
line: linePart ? Number.parseInt(linePart, 10) || null : null,
|
|
page: derivePagePath(filePath),
|
|
});
|
|
} else if (typeof entry === "object") {
|
|
const file = sizeof(entry.file) ? entry.file : null;
|
|
if (file) {
|
|
const normalizedFile = toPosix(file);
|
|
items.push({
|
|
file: normalizedFile,
|
|
line: typeof entry.line === "number" ? entry.line : null,
|
|
page:
|
|
typeof entry.page === "string" && entry.page.trim()
|
|
? toPosix(entry.page.trim())
|
|
: derivePagePath(normalizedFile),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (items.length === 0 && Array.isArray(fallbackFiles)) {
|
|
for (const file of fallbackFiles) {
|
|
if (!file) continue;
|
|
const normalizedFile = toPosix(file);
|
|
items.push({
|
|
file: normalizedFile,
|
|
line: null,
|
|
page: derivePagePath(normalizedFile),
|
|
});
|
|
}
|
|
}
|
|
|
|
return dedupeAndSortLocations(items);
|
|
}
|
|
|
|
function sizeof(value) {
|
|
return typeof value === "string" && value.trim().length > 0;
|
|
}
|
|
|
|
function dedupeAndSortLocations(list) {
|
|
if (!Array.isArray(list) || list.length === 0) {
|
|
return [];
|
|
}
|
|
const map = new Map();
|
|
for (const item of list) {
|
|
if (!item?.file) continue;
|
|
const key = `${item.file}::${item.line ?? ""}`;
|
|
if (!map.has(key)) {
|
|
const normalizedFile = toPosix(item.file);
|
|
map.set(key, {
|
|
file: normalizedFile,
|
|
line: typeof item.line === "number" ? item.line : null,
|
|
page:
|
|
typeof item.page === "string" && item.page.trim()
|
|
? toPosix(item.page.trim())
|
|
: derivePagePath(normalizedFile),
|
|
});
|
|
}
|
|
}
|
|
return Array.from(map.values()).sort((a, b) => {
|
|
const fileDiff = a.file.localeCompare(b.file);
|
|
if (fileDiff !== 0) return fileDiff;
|
|
const lineA = a.line ?? Number.POSITIVE_INFINITY;
|
|
const lineB = b.line ?? Number.POSITIVE_INFINITY;
|
|
return lineA - lineB;
|
|
});
|
|
}
|
|
|
|
function saveState(state) {
|
|
ensureDirectoryExists(REPORT_PATH);
|
|
fs.writeFileSync(REPORT_PATH, yaml.dump(state), "utf8");
|
|
}
|
|
|
|
function createEntry(url, existing = {}) {
|
|
return {
|
|
url,
|
|
status: typeof existing.status === "number" ? existing.status : null,
|
|
errorType: existing.errorType || null,
|
|
method: existing.method || null,
|
|
checkedAt: existing.checkedAt || null,
|
|
locations: Array.isArray(existing.locations) ? dedupeAndSortLocations(existing.locations) : [],
|
|
};
|
|
}
|
|
|
|
function mergeOccurrences(entries, occurrences) {
|
|
const merged = {};
|
|
for (const [url, urlOccurrences] of occurrences.entries()) {
|
|
const existing = entries[url] || createEntry(url);
|
|
merged[url] = {
|
|
...existing,
|
|
url,
|
|
locations: dedupeAndSortLocations(urlOccurrences),
|
|
};
|
|
}
|
|
return merged;
|
|
}
|
|
|
|
function recordOccurrence(map, filePath, line, url) {
|
|
if (!map.has(url)) {
|
|
map.set(url, []);
|
|
}
|
|
const relativeFile = relativeToSite(filePath);
|
|
const normalizedLine = typeof line === "number" && Number.isFinite(line) ? line : null;
|
|
const pagePath = derivePagePath(relativeFile);
|
|
const list = map.get(url);
|
|
const key = `${relativeFile}:${normalizedLine ?? ""}`;
|
|
if (!list.some((item) => `${item.file}:${item.line ?? ""}` === key)) {
|
|
list.push({ file: relativeFile, line: normalizedLine, page: pagePath });
|
|
}
|
|
}
|
|
|
|
function stripYamlInlineComment(line) {
|
|
let inSingle = false;
|
|
let inDouble = false;
|
|
for (let i = 0; i < line.length; i++) {
|
|
const ch = line[i];
|
|
if (ch === "'" && !inDouble) {
|
|
const next = line[i + 1];
|
|
if (inSingle && next === "'") {
|
|
i++;
|
|
continue;
|
|
}
|
|
inSingle = !inSingle;
|
|
} else if (ch === '"' && !inSingle) {
|
|
if (!inDouble) {
|
|
inDouble = true;
|
|
} else if (line[i - 1] !== "\\") {
|
|
inDouble = false;
|
|
}
|
|
} else if (ch === "#" && !inSingle && !inDouble) {
|
|
return line.slice(0, i);
|
|
} else if (ch === "\\" && inDouble) {
|
|
i++;
|
|
}
|
|
}
|
|
return line;
|
|
}
|
|
|
|
function isYamlCommentLine(line) {
|
|
return line.trim().startsWith("#");
|
|
}
|
|
|
|
function isBlockScalarIndicator(line) {
|
|
const cleaned = stripYamlInlineComment(line).trim();
|
|
return /:\s*[>|][0-9+-]*\s*$/.test(cleaned);
|
|
}
|
|
|
|
function processYamlRecursively(obj, links = new Set()) {
|
|
if (typeof obj === "string") {
|
|
for (const link of extractLinksFromText(obj)) {
|
|
links.add(link);
|
|
}
|
|
} else if (Array.isArray(obj)) {
|
|
for (const item of obj) {
|
|
processYamlRecursively(item, links);
|
|
}
|
|
} else if (obj && typeof obj === "object") {
|
|
for (const value of Object.values(obj)) {
|
|
processYamlRecursively(value, links);
|
|
}
|
|
}
|
|
return links;
|
|
}
|
|
|
|
async function collectMarkdownLinks(filePath, occurrences) {
|
|
const entries = await collectMarkdownLinksFromFile(filePath);
|
|
for (const { url, line } of entries) {
|
|
recordOccurrence(occurrences, filePath, line, url);
|
|
}
|
|
}
|
|
|
|
async function collectYamlLinks(filePath, occurrences) {
|
|
let linkSet = new Set();
|
|
try {
|
|
const doc = yaml.load(fs.readFileSync(filePath, "utf8"));
|
|
if (doc) {
|
|
linkSet = processYamlRecursively(doc);
|
|
}
|
|
} catch (error) {
|
|
console.warn(`Impossible de parser ${relativeToSite(filePath)} (${error.message}).`);
|
|
return;
|
|
}
|
|
if (linkSet.size === 0) {
|
|
return;
|
|
}
|
|
|
|
const recorded = new Map();
|
|
const lines = fs.readFileSync(filePath, "utf8").split(/\r?\n/);
|
|
let inBlockScalar = false;
|
|
let blockIndent = 0;
|
|
|
|
const mark = (url, lineNumber) => {
|
|
if (!recorded.has(url)) {
|
|
recorded.set(url, new Set());
|
|
}
|
|
const set = recorded.get(url);
|
|
if (!set.has(lineNumber)) {
|
|
set.add(lineNumber);
|
|
recordOccurrence(occurrences, filePath, lineNumber, url);
|
|
}
|
|
};
|
|
|
|
for (let index = 0; index < lines.length; index++) {
|
|
const lineNumber = index + 1;
|
|
const line = lines[index];
|
|
const indent = line.match(/^\s*/)?.[0].length ?? 0;
|
|
const trimmed = line.trim();
|
|
|
|
if (inBlockScalar) {
|
|
if (trimmed === "" && indent < blockIndent) {
|
|
inBlockScalar = false;
|
|
continue;
|
|
}
|
|
if (trimmed === "" || indent >= blockIndent) {
|
|
if (!isYamlCommentLine(line)) {
|
|
for (const link of extractLinksFromText(line)) {
|
|
if (linkSet.has(link)) {
|
|
mark(link, lineNumber);
|
|
}
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
inBlockScalar = false;
|
|
}
|
|
|
|
const withoutComment = stripYamlInlineComment(line);
|
|
const trimmedWithoutComment = withoutComment.trim();
|
|
|
|
if (isBlockScalarIndicator(line)) {
|
|
inBlockScalar = true;
|
|
blockIndent = indent + 1;
|
|
}
|
|
|
|
if (isYamlCommentLine(line) || !trimmedWithoutComment) {
|
|
continue;
|
|
}
|
|
|
|
for (const link of extractLinksFromText(withoutComment)) {
|
|
if (linkSet.has(link)) {
|
|
mark(link, lineNumber);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const link of linkSet) {
|
|
if (!recorded.has(link) || recorded.get(link).size === 0) {
|
|
recordOccurrence(occurrences, filePath, null, link);
|
|
}
|
|
}
|
|
}
|
|
|
|
function walk(dir, exts) {
|
|
let results = [];
|
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
results = results.concat(walk(fullPath, exts));
|
|
} else if (exts.includes(path.extname(entry.name))) {
|
|
results.push(fullPath);
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
function delay(ms) {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
const lastHostChecks = new Map();
|
|
|
|
async function applyHostDelay(host) {
|
|
if (!host || HOST_DELAY_MS <= 0) {
|
|
return;
|
|
}
|
|
const last = lastHostChecks.get(host);
|
|
if (last) {
|
|
const elapsed = Date.now() - last;
|
|
const wait = HOST_DELAY_MS - elapsed;
|
|
if (wait > 0) {
|
|
await delay(wait);
|
|
}
|
|
}
|
|
}
|
|
|
|
function recordHostCheck(host) {
|
|
if (host) {
|
|
lastHostChecks.set(host, Date.now());
|
|
}
|
|
}
|
|
|
|
function extractHost(url) {
|
|
try {
|
|
return new URL(url).hostname;
|
|
} catch (_) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function curlRequest(url, method, hostHeader) {
|
|
const args = [
|
|
"--silent",
|
|
"--location",
|
|
"--fail",
|
|
"--max-time",
|
|
`${REQUEST_TIMEOUT_SECONDS}`,
|
|
"--output",
|
|
"/dev/null",
|
|
"--write-out",
|
|
"%{http_code}",
|
|
"--user-agent",
|
|
DEFAULT_USER_AGENT,
|
|
"--request",
|
|
method,
|
|
];
|
|
|
|
if (ENABLE_COOKIES) {
|
|
args.push("--cookie", COOKIE_JAR, "--cookie-jar", COOKIE_JAR);
|
|
}
|
|
if (hostHeader) {
|
|
args.push("-H", `Host: ${hostHeader}`);
|
|
}
|
|
args.push(url);
|
|
|
|
try {
|
|
const { stdout } = await execFileAsync("curl", args);
|
|
const status = parseInt(stdout.trim(), 10);
|
|
return {
|
|
status: Number.isNaN(status) ? null : status,
|
|
errorType: null,
|
|
method: method.toUpperCase(),
|
|
};
|
|
} catch (error) {
|
|
const rawStatus = error?.stdout?.toString().trim();
|
|
const status = rawStatus ? parseInt(rawStatus, 10) : null;
|
|
const errorCode = Number(error?.code);
|
|
const timeout = errorCode === 28 ? "timeout" : null;
|
|
return {
|
|
status: Number.isNaN(status) ? null : status,
|
|
errorType: timeout,
|
|
method: method.toUpperCase(),
|
|
};
|
|
}
|
|
}
|
|
|
|
function shouldRetryWithGet(result) {
|
|
if (!result) return true;
|
|
if (result.errorType) return true;
|
|
if (typeof result.status !== "number") return true;
|
|
return result.status >= 400;
|
|
}
|
|
|
|
function getTtlMs(entry) {
|
|
if (!entry) return 0;
|
|
if (entry.errorType === "timeout" || entry.status === 0 || entry.status === null) {
|
|
return CACHE_TTL_TIMEOUT_MS;
|
|
}
|
|
const status = Number(entry.status);
|
|
if (Number.isNaN(status)) {
|
|
return CACHE_TTL_TIMEOUT_MS;
|
|
}
|
|
if (status >= 500) {
|
|
return CACHE_TTL_SERVER_ERROR_MS;
|
|
}
|
|
if (status >= 400) {
|
|
return CACHE_TTL_CLIENT_ERROR_MS;
|
|
}
|
|
if (status >= 200 && status < 400) {
|
|
return CACHE_TTL_SUCCESS_MS;
|
|
}
|
|
return CACHE_TTL_TIMEOUT_MS;
|
|
}
|
|
|
|
function needsCheck(entry) {
|
|
if (!entry?.checkedAt) {
|
|
return true;
|
|
}
|
|
const checked = Date.parse(entry.checkedAt);
|
|
if (Number.isNaN(checked)) {
|
|
return true;
|
|
}
|
|
const ttl = getTtlMs(entry);
|
|
if (ttl <= 0) {
|
|
return true;
|
|
}
|
|
return Date.now() - checked >= ttl;
|
|
}
|
|
|
|
function groupEntriesByHost(entries) {
|
|
const groups = new Map();
|
|
for (const entry of entries) {
|
|
const host = extractHost(entry.url);
|
|
const key = host || `__invalid__:${entry.url}`;
|
|
if (!groups.has(key)) {
|
|
groups.set(key, { host, entries: [] });
|
|
}
|
|
groups.get(key).entries.push(entry);
|
|
}
|
|
return Array.from(groups.values());
|
|
}
|
|
|
|
async function runWithConcurrency(items, worker, concurrency) {
|
|
const executing = new Set();
|
|
for (const item of items) {
|
|
const task = Promise.resolve().then(() => worker(item));
|
|
executing.add(task);
|
|
const clean = () => executing.delete(task);
|
|
task.then(clean).catch(clean);
|
|
if (executing.size >= concurrency) {
|
|
await Promise.race(executing);
|
|
}
|
|
}
|
|
await Promise.allSettled(executing);
|
|
}
|
|
|
|
function updateEntryWithResult(entry, result) {
|
|
const now = new Date().toISOString();
|
|
entry.status = typeof result.status === "number" ? result.status : null;
|
|
entry.errorType = result.errorType || null;
|
|
entry.method = result.method;
|
|
entry.checkedAt = now;
|
|
}
|
|
|
|
function formatStatusForReport(entry) {
|
|
if (!entry) return "error";
|
|
if (entry.errorType === "timeout") return "timeout";
|
|
if (typeof entry.status === "number") return entry.status;
|
|
return "error";
|
|
}
|
|
|
|
function isDead(entry) {
|
|
if (!entry) return false;
|
|
if (entry.errorType === "timeout") return true;
|
|
if (typeof entry.status !== "number") return true;
|
|
return entry.status >= 400;
|
|
}
|
|
|
|
function getStatusOrder(value) {
|
|
if (typeof value === "number" && Number.isFinite(value)) {
|
|
return value;
|
|
}
|
|
const label = typeof value === "string" ? value.toLowerCase() : "";
|
|
if (label === "timeout") {
|
|
return 10000;
|
|
}
|
|
return 10001;
|
|
}
|
|
|
|
function buildDeadLinks(entries) {
|
|
const list = [];
|
|
for (const entry of Object.values(entries)) {
|
|
if (!isDead(entry)) continue;
|
|
list.push({
|
|
url: entry.url,
|
|
status: formatStatusForReport(entry),
|
|
locations: entry.locations || [],
|
|
});
|
|
}
|
|
return list.sort((a, b) => {
|
|
const orderDiff = getStatusOrder(a.status) - getStatusOrder(b.status);
|
|
if (orderDiff !== 0) return orderDiff;
|
|
if (typeof a.status === "number" && typeof b.status === "number") {
|
|
return a.status - b.status;
|
|
}
|
|
const labelDiff = String(a.status).localeCompare(String(b.status));
|
|
if (labelDiff !== 0) return labelDiff;
|
|
return a.url.localeCompare(b.url);
|
|
});
|
|
}
|
|
|
|
function logProgress(processed, total) {
|
|
process.stdout.write(`\rURLs vérifiées ${processed}/${total}`);
|
|
}
|
|
|
|
async function collectOccurrences() {
|
|
const occurrences = new Map();
|
|
const mdFiles = walk(CONTENT_DIR, [".md", ".markdown"]);
|
|
for (const file of mdFiles) {
|
|
await collectMarkdownLinks(file, occurrences);
|
|
}
|
|
const yamlFiles = walk(CONTENT_DIR, [".yaml", ".yml"]);
|
|
for (const file of yamlFiles) {
|
|
await collectYamlLinks(file, occurrences);
|
|
}
|
|
return occurrences;
|
|
}
|
|
|
|
function persistEntriesSnapshot(entries, snapshotMeta) {
|
|
const payload = {
|
|
generatedAt: snapshotMeta?.generatedAt || null,
|
|
links: Array.isArray(snapshotMeta?.links) ? snapshotMeta.links : [],
|
|
entries,
|
|
};
|
|
saveState(payload);
|
|
}
|
|
|
|
async function checkEntries(entriesToCheck, entries, snapshotMeta) {
|
|
if (entriesToCheck.length === 0) {
|
|
return;
|
|
}
|
|
const hostGroups = groupEntriesByHost(entriesToCheck);
|
|
const concurrency = Math.max(1, Math.min(MAX_CONCURRENT_HOSTS, hostGroups.length));
|
|
let processed = 0;
|
|
process.stdout.write(`Vérification de ${entriesToCheck.length} URL...\n`);
|
|
await runWithConcurrency(
|
|
hostGroups,
|
|
async ({ host, entries: groupEntries }) => {
|
|
for (const entry of groupEntries) {
|
|
if (host) {
|
|
await applyHostDelay(host);
|
|
}
|
|
const hostHeader = host || extractHost(entry.url);
|
|
let result = await curlRequest(entry.url, "HEAD", hostHeader);
|
|
recordHostCheck(host);
|
|
if (shouldRetryWithGet(result)) {
|
|
if (RETRY_DELAY_MS > 0) {
|
|
await delay(RETRY_DELAY_MS);
|
|
}
|
|
if (host) {
|
|
await applyHostDelay(host);
|
|
}
|
|
result = await curlRequest(entry.url, "GET", hostHeader);
|
|
recordHostCheck(host);
|
|
}
|
|
updateEntryWithResult(entries[entry.url], result);
|
|
persistEntriesSnapshot(entries, snapshotMeta);
|
|
processed += 1;
|
|
logProgress(processed, entriesToCheck.length);
|
|
}
|
|
},
|
|
concurrency
|
|
);
|
|
process.stdout.write("\n");
|
|
}
|
|
|
|
async function main() {
|
|
const occurrences = await collectOccurrences();
|
|
if (occurrences.size === 0) {
|
|
const emptyState = { generatedAt: new Date().toISOString(), links: [], entries: {} };
|
|
saveState(emptyState);
|
|
console.log("Aucun lien externe détecté.");
|
|
return;
|
|
}
|
|
|
|
const state = loadState();
|
|
const mergedEntries = mergeOccurrences(state.entries, occurrences);
|
|
const entriesArray = Object.values(mergedEntries);
|
|
const pending = entriesArray.filter((entry) => needsCheck(entry));
|
|
|
|
const snapshotMeta = {
|
|
generatedAt: state.generatedAt || null,
|
|
links: Array.isArray(state.links) ? state.links : [],
|
|
};
|
|
|
|
await checkEntries(pending, mergedEntries, snapshotMeta);
|
|
|
|
const deadLinks = buildDeadLinks(mergedEntries);
|
|
const nextState = {
|
|
generatedAt: new Date().toISOString(),
|
|
links: deadLinks,
|
|
entries: mergedEntries,
|
|
};
|
|
saveState(nextState);
|
|
|
|
console.log(
|
|
`Liens externes analysés: ${entriesArray.length} URL (${deadLinks.length} mort(s)). Données écrites dans ${path.relative(
|
|
SITE_ROOT,
|
|
REPORT_PATH
|
|
)}`
|
|
);
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error("Erreur lors de la vérification des liens:", error);
|
|
process.exitCode = 1;
|
|
});
|