1

Utilisation de playwright pour la vérification des liens externes

This commit is contained in:
2025-12-08 01:15:28 +01:00
parent cb7cd917d7
commit 0beac1afb0
6 changed files with 183 additions and 36 deletions

View File

@@ -17,7 +17,7 @@ DEST_DIR="/var/lib/www/richard-dern.fr/"
HUGO_ENV="production"
TARGET_OWNER="caddy:caddy"
CHOWN_BIN="/run/current-system/sw/bin/chown"
SETFACL_BIN="/run/current-system/sw/bin/setfacl"
SETFACL_BIN="$(realpath /run/current-system/sw/bin/setfacl)"
is_local_host() {
local target="$1"

91
package-lock.json generated
View File

@@ -10,6 +10,7 @@
"chart.js": "^4.4.4",
"chartjs-node-canvas": "^5.0.0",
"luxon": "^3.7.2",
"playwright": "^1.49.0",
"postcss-import": "^16.1.0",
"postcss-nested": "^7.0.2",
"puppeteer": "^23.11.1",
@@ -950,7 +951,6 @@
"integrity": "sha512-yTwt2KWRmCQAfhvbCRjebaSX8pV1//I0Y3g+A7f/eS7gf0l4eRJoUCvcYdVtboeU4CTOZQuqYbZNS8aBYb8ROQ==",
"license": "Apache-2.0",
"optional": true,
"peer": true,
"dependencies": {
"debug": "^4.4.0",
"extract-zip": "^2.0.1",
@@ -1840,6 +1840,7 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"caniuse-lite": "^1.0.30001688",
"electron-to-chromium": "^1.5.73",
@@ -1958,6 +1959,7 @@
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.1.tgz",
"integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@kurkle/color": "^0.3.0"
},
@@ -1984,6 +1986,7 @@
"integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@chevrotain/cst-dts-gen": "11.0.3",
"@chevrotain/gast": "11.0.3",
@@ -2037,7 +2040,6 @@
"integrity": "sha512-vtRWBK2uImo5/W2oG6/cDkkHSm+2t6VHgnj+Rcwhb0pP74OoUb4GipyRX/T/y39gYQPhioP0DPShn+A7P6CHNw==",
"license": "Apache-2.0",
"optional": true,
"peer": true,
"dependencies": {
"mitt": "^3.0.1",
"zod": "^3.24.1"
@@ -2217,6 +2219,7 @@
"integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10"
}
@@ -2661,6 +2664,7 @@
"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
"dev": true,
"license": "ISC",
"peer": true,
"engines": {
"node": ">=12"
}
@@ -2871,14 +2875,6 @@
"node": ">=8"
}
},
"node_modules/devtools-protocol": {
"version": "0.0.1413902",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1413902.tgz",
"integrity": "sha512-yRtvFD8Oyk7C9Os3GmnFZLu53yAfsnyw1s+mLmHHUK0GQEc9zthHWvS1r67Zqzm5t7v56PILHIVZ7kmFMaL2yQ==",
"license": "BSD-3-Clause",
"optional": true,
"peer": true
},
"node_modules/didyoumean": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
@@ -3832,6 +3828,7 @@
"integrity": "sha512-UlIZrRariB11TY1RtTgUWp65tphtBv4CSq7vyS2ZZ2TgoMjs2nloq+wFqxiwcxlhHUvs7DPGgMjs2aeQxz5h9g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@braintree/sanitize-url": "^7.1.1",
"@iconify/utils": "^3.0.1",
@@ -4341,6 +4338,50 @@
"pathe": "^2.0.1"
}
},
"node_modules/playwright": {
"version": "1.57.0",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.57.0.tgz",
"integrity": "sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==",
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.57.0"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.57.0",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz",
"integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==",
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/playwright/node_modules/fsevents": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/points-on-curve": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/points-on-curve/-/points-on-curve-0.2.0.tgz",
@@ -4378,6 +4419,7 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"nanoid": "^3.3.8",
"picocolors": "^1.1.1",
@@ -4689,6 +4731,7 @@
"deprecated": "< 24.15.0 is no longer supported",
"hasInstallScript": true,
"license": "Apache-2.0",
"peer": true,
"dependencies": {
"@puppeteer/browsers": "2.6.1",
"chromium-bidi": "0.11.0",
@@ -4704,30 +4747,12 @@
"node": ">=18"
}
},
"node_modules/puppeteer-core": {
"version": "24.4.0",
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.4.0.tgz",
"integrity": "sha512-eFw66gCnWo0X8Hyf9KxxJtms7a61NJVMiSaWfItsFPzFBsjsWdmcNlBdsA1WVwln6neoHhsG+uTVesKmTREn/g==",
"license": "Apache-2.0",
"optional": true,
"peer": true,
"dependencies": {
"@puppeteer/browsers": "2.8.0",
"chromium-bidi": "2.1.2",
"debug": "^4.4.0",
"devtools-protocol": "0.0.1413902",
"typed-query-selector": "^2.12.0",
"ws": "^8.18.1"
},
"engines": {
"node": ">=18"
}
},
"node_modules/puppeteer-extra": {
"version": "3.3.6",
"resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz",
"integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/debug": "^4.1.0",
"debug": "^4.1.1",
@@ -4898,7 +4923,8 @@
"version": "0.0.1367902",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1367902.tgz",
"integrity": "sha512-XxtPuC3PGakY6PD7dG66/o8KwJ/LkH2/EKe19Dcw58w53dv4/vSQEkn/SzuyhHE2q4zPgCkxQBxus3VV4ql+Pg==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/puppeteer/node_modules/puppeteer-core": {
"version": "23.11.1",
@@ -4996,6 +5022,7 @@
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=0.10.0"
}
@@ -5006,6 +5033,7 @@
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"scheduler": "^0.27.0"
},
@@ -5582,6 +5610,7 @@
"integrity": "sha512-6A2rnmW5xZMdw11LYjhcI5846rt9pbLSabY5XPxo+XWdxwZaFEn47Go4NzFiHu9sNNmr/kXivP1vStfvMaK1GQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@alloc/quick-lru": "^5.2.0",
"arg": "^5.0.2",
@@ -5813,6 +5842,7 @@
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
@@ -6150,7 +6180,6 @@
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
"license": "MIT",
"optional": true,
"peer": true,
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}

View File

@@ -14,6 +14,7 @@
"puppeteer": "^23.11.1",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"playwright": "^1.49.0",
"sharp": "^0.33.5",
"undici": "^7.16.0",
"user-agents": "^1.1.480"

View File

@@ -3,7 +3,7 @@
const fs = require("fs");
const path = require("path");
const yaml = require("js-yaml");
const { buildUserAgent, checkUrl } = require("./lib/http");
const { buildUserAgent, checkUrl, checkWithPlaywright } = require("./lib/http");
const {
collectMarkdownLinksFromFile,
extractLinksFromText,
@@ -27,6 +27,9 @@ const DEFAULT_CONFIG = {
maxRedirects: 5,
userAgent: null,
ignoreHosts: [],
usePlaywright: false,
playwrightTimeoutSeconds: 10,
playwrightExecutablePath: null,
};
function loadConfig() {
@@ -73,6 +76,19 @@ const MAX_REDIRECTS = Math.max(
);
const DEFAULT_USER_AGENT = buildUserAgent(settings.userAgent);
const IGNORE_HOSTS = parseIgnoreHosts(settings.ignoreHosts);
const PLAYWRIGHT_ENABLED = settings.usePlaywright === true;
const PLAYWRIGHT_TIMEOUT_MS = Math.max(
1000,
(Number.isFinite(Number(settings.playwrightTimeoutSeconds))
? Number(settings.playwrightTimeoutSeconds)
: DEFAULT_CONFIG.playwrightTimeoutSeconds) * 1000
);
const PLAYWRIGHT_EXECUTABLE =
typeof settings.playwrightExecutablePath === "string" &&
settings.playwrightExecutablePath.trim().length > 0
? settings.playwrightExecutablePath.trim()
: null;
const PLAYWRIGHT_RECHECK_STATUSES = new Set([403, 426, 429, 502]);
const CACHE_TTL_SUCCESS_MS = daysToMs(
pickNumber(settings.cacheTtlSuccessDays, DEFAULT_CONFIG.cacheTtlSuccessDays)
@@ -432,6 +448,23 @@ function filterIgnoredHosts(occurrences, ignoreHosts) {
return filtered;
}
function shouldRecheckWithPlaywright(result) {
if (!PLAYWRIGHT_ENABLED) {
return false;
}
if (!result) {
return true;
}
if (result.errorType === "timeout" || result.errorType === "network") {
return true;
}
const status = typeof result.status === "number" ? result.status : null;
if (status === null) {
return true;
}
return PLAYWRIGHT_RECHECK_STATUSES.has(status);
}
function recordOccurrence(map, filePath, line, url) {
if (!map.has(url)) {
map.set(url, []);
@@ -795,12 +828,24 @@ async function checkEntries(entriesToCheck, entries, snapshotMeta) {
if (host) {
await applyHostDelay(host);
}
const result = await checkUrl(entry.url, {
let result = await checkUrl(entry.url, {
...BASE_HTTP_OPTIONS,
firstMethod: "GET",
retryWithGet: false,
});
recordHostCheck(host);
if (shouldRecheckWithPlaywright(result)) {
if (host) {
await applyHostDelay(host);
}
const playResult = await checkWithPlaywright(entry.url, {
userAgent: DEFAULT_USER_AGENT,
timeoutMs: PLAYWRIGHT_TIMEOUT_MS,
executablePath: PLAYWRIGHT_EXECUTABLE,
});
recordHostCheck(host);
result = playResult;
}
updateEntryWithResult(entries[entry.url], result);
persistEntriesSnapshot(entries, snapshotMeta);
processed += 1;

View File

@@ -6,7 +6,6 @@
"cacheDir": "tools/cache",
"cacheFile": "external_links.yaml",
"hostDelayMs": 2000,
"retryDelayMs": 5000,
"requestTimeoutSeconds": 5,
"cacheTtlSuccessDays": 30,
"cacheTtlClientErrorDays": 7,
@@ -15,6 +14,9 @@
"userAgent": null,
"enableCookies": true,
"cookieJar": "tools/cache/curl_cookies.txt",
"usePlaywright": true,
"playwrightTimeoutSeconds": 10,
"playwrightExecutablePath": "/nix/store/jaf9gnbln0cbs2vspfdblc4ff6vv1kk5-chromium-142.0.7444.175/bin/chromium",
"ignoreHosts": [
"10.0.2.1",
"web.archive.org",
@@ -107,4 +109,4 @@
"goaccess": {
"url": null
}
}
}

View File

@@ -12,6 +12,10 @@ const DEFAULT_USER_AGENTS = [
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
];
const DEFAULT_VIEWPORT = { width: 1366, height: 768 };
const DEFAULT_PLAYWRIGHT_ARGS = ["--disable-blink-features=AutomationControlled"];
let playwrightModule = null;
function buildUserAgent(preferred) {
if (typeof preferred === "string" && preferred.trim()) {
@@ -112,6 +116,71 @@ function buildNavigationHeaders(url, userAgent, extraHeaders = {}) {
return baseHeaders;
}
function loadPlaywright() {
if (playwrightModule) {
return playwrightModule;
}
playwrightModule = require("playwright");
return playwrightModule;
}
// Vérifie une URL via Playwright, en se rapprochant d'une navigation réelle.
async function checkWithPlaywright(url, options = {}) {
const userAgent = buildUserAgent(options.userAgent);
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
const executablePath =
typeof options.executablePath === "string" && options.executablePath.trim()
? options.executablePath.trim()
: null;
const playwright = loadPlaywright();
let browser = null;
let context = null;
try {
browser = await playwright.chromium.launch({
headless: true,
executablePath: executablePath || undefined,
args: DEFAULT_PLAYWRIGHT_ARGS,
});
context = await browser.newContext({
viewport: { ...DEFAULT_VIEWPORT },
userAgent,
extraHTTPHeaders: buildNavigationHeaders(url, userAgent),
});
const page = await context.newPage();
try {
const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: timeoutMs });
const status = response ? response.status() : null;
const finalUrl = page.url() || url;
return {
status,
finalUrl,
method: "GET",
errorType: null,
};
} catch (error) {
return {
status: null,
finalUrl: url,
method: "GET",
errorType: error?.name === "TimeoutError" ? "timeout" : "network",
message: error?.message || null,
};
} finally {
if (context) {
await context.close();
}
if (browser) {
await browser.close();
}
}
} catch (error) {
// Toute erreur de chargement/initialisation Playwright doit interrompre le script.
throw error;
}
}
async function fetchWithRedirects(targetUrl, options, maxRedirects) {
let currentUrl = targetUrl;
let response = null;
@@ -226,4 +295,5 @@ module.exports = {
checkUrl,
probeUrl,
shouldRetry,
checkWithPlaywright,
};