1

Utilisation de playwright pour la vérification des liens externes

This commit is contained in:
2025-12-08 01:15:28 +01:00
parent cb7cd917d7
commit 0beac1afb0
6 changed files with 183 additions and 36 deletions

View File

@@ -3,7 +3,7 @@
const fs = require("fs");
const path = require("path");
const yaml = require("js-yaml");
const { buildUserAgent, checkUrl } = require("./lib/http");
const { buildUserAgent, checkUrl, checkWithPlaywright } = require("./lib/http");
const {
collectMarkdownLinksFromFile,
extractLinksFromText,
@@ -27,6 +27,9 @@ const DEFAULT_CONFIG = {
maxRedirects: 5,
userAgent: null,
ignoreHosts: [],
usePlaywright: false,
playwrightTimeoutSeconds: 10,
playwrightExecutablePath: null,
};
function loadConfig() {
@@ -73,6 +76,19 @@ const MAX_REDIRECTS = Math.max(
);
const DEFAULT_USER_AGENT = buildUserAgent(settings.userAgent);
const IGNORE_HOSTS = parseIgnoreHosts(settings.ignoreHosts);
const PLAYWRIGHT_ENABLED = settings.usePlaywright === true;
const PLAYWRIGHT_TIMEOUT_MS = Math.max(
1000,
(Number.isFinite(Number(settings.playwrightTimeoutSeconds))
? Number(settings.playwrightTimeoutSeconds)
: DEFAULT_CONFIG.playwrightTimeoutSeconds) * 1000
);
const PLAYWRIGHT_EXECUTABLE =
typeof settings.playwrightExecutablePath === "string" &&
settings.playwrightExecutablePath.trim().length > 0
? settings.playwrightExecutablePath.trim()
: null;
const PLAYWRIGHT_RECHECK_STATUSES = new Set([403, 426, 429, 502]);
const CACHE_TTL_SUCCESS_MS = daysToMs(
pickNumber(settings.cacheTtlSuccessDays, DEFAULT_CONFIG.cacheTtlSuccessDays)
@@ -432,6 +448,23 @@ function filterIgnoredHosts(occurrences, ignoreHosts) {
return filtered;
}
function shouldRecheckWithPlaywright(result) {
if (!PLAYWRIGHT_ENABLED) {
return false;
}
if (!result) {
return true;
}
if (result.errorType === "timeout" || result.errorType === "network") {
return true;
}
const status = typeof result.status === "number" ? result.status : null;
if (status === null) {
return true;
}
return PLAYWRIGHT_RECHECK_STATUSES.has(status);
}
function recordOccurrence(map, filePath, line, url) {
if (!map.has(url)) {
map.set(url, []);
@@ -795,12 +828,24 @@ async function checkEntries(entriesToCheck, entries, snapshotMeta) {
if (host) {
await applyHostDelay(host);
}
const result = await checkUrl(entry.url, {
let result = await checkUrl(entry.url, {
...BASE_HTTP_OPTIONS,
firstMethod: "GET",
retryWithGet: false,
});
recordHostCheck(host);
if (shouldRecheckWithPlaywright(result)) {
if (host) {
await applyHostDelay(host);
}
const playResult = await checkWithPlaywright(entry.url, {
userAgent: DEFAULT_USER_AGENT,
timeoutMs: PLAYWRIGHT_TIMEOUT_MS,
executablePath: PLAYWRIGHT_EXECUTABLE,
});
recordHostCheck(host);
result = playResult;
}
updateEntryWithResult(entries[entry.url], result);
persistEntriesSnapshot(entries, snapshotMeta);
processed += 1;