Utilisation de playwright pour la vérification des liens externes
This commit is contained in:
@@ -17,7 +17,7 @@ DEST_DIR="/var/lib/www/richard-dern.fr/"
|
||||
HUGO_ENV="production"
|
||||
TARGET_OWNER="caddy:caddy"
|
||||
CHOWN_BIN="/run/current-system/sw/bin/chown"
|
||||
SETFACL_BIN="/run/current-system/sw/bin/setfacl"
|
||||
SETFACL_BIN="$(realpath /run/current-system/sw/bin/setfacl)"
|
||||
|
||||
is_local_host() {
|
||||
local target="$1"
|
||||
|
||||
91
package-lock.json
generated
91
package-lock.json
generated
@@ -10,6 +10,7 @@
|
||||
"chart.js": "^4.4.4",
|
||||
"chartjs-node-canvas": "^5.0.0",
|
||||
"luxon": "^3.7.2",
|
||||
"playwright": "^1.49.0",
|
||||
"postcss-import": "^16.1.0",
|
||||
"postcss-nested": "^7.0.2",
|
||||
"puppeteer": "^23.11.1",
|
||||
@@ -950,7 +951,6 @@
|
||||
"integrity": "sha512-yTwt2KWRmCQAfhvbCRjebaSX8pV1//I0Y3g+A7f/eS7gf0l4eRJoUCvcYdVtboeU4CTOZQuqYbZNS8aBYb8ROQ==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"debug": "^4.4.0",
|
||||
"extract-zip": "^2.0.1",
|
||||
@@ -1840,6 +1840,7 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"caniuse-lite": "^1.0.30001688",
|
||||
"electron-to-chromium": "^1.5.73",
|
||||
@@ -1958,6 +1959,7 @@
|
||||
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.1.tgz",
|
||||
"integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@kurkle/color": "^0.3.0"
|
||||
},
|
||||
@@ -1984,6 +1986,7 @@
|
||||
"integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@chevrotain/cst-dts-gen": "11.0.3",
|
||||
"@chevrotain/gast": "11.0.3",
|
||||
@@ -2037,7 +2040,6 @@
|
||||
"integrity": "sha512-vtRWBK2uImo5/W2oG6/cDkkHSm+2t6VHgnj+Rcwhb0pP74OoUb4GipyRX/T/y39gYQPhioP0DPShn+A7P6CHNw==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"mitt": "^3.0.1",
|
||||
"zod": "^3.24.1"
|
||||
@@ -2217,6 +2219,7 @@
|
||||
"integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10"
|
||||
}
|
||||
@@ -2661,6 +2664,7 @@
|
||||
"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
@@ -2871,14 +2875,6 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/devtools-protocol": {
|
||||
"version": "0.0.1413902",
|
||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1413902.tgz",
|
||||
"integrity": "sha512-yRtvFD8Oyk7C9Os3GmnFZLu53yAfsnyw1s+mLmHHUK0GQEc9zthHWvS1r67Zqzm5t7v56PILHIVZ7kmFMaL2yQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/didyoumean": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz",
|
||||
@@ -3832,6 +3828,7 @@
|
||||
"integrity": "sha512-UlIZrRariB11TY1RtTgUWp65tphtBv4CSq7vyS2ZZ2TgoMjs2nloq+wFqxiwcxlhHUvs7DPGgMjs2aeQxz5h9g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@braintree/sanitize-url": "^7.1.1",
|
||||
"@iconify/utils": "^3.0.1",
|
||||
@@ -4341,6 +4338,50 @@
|
||||
"pathe": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright": {
|
||||
"version": "1.57.0",
|
||||
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.57.0.tgz",
|
||||
"integrity": "sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"playwright-core": "1.57.0"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "2.3.2"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright-core": {
|
||||
"version": "1.57.0",
|
||||
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz",
|
||||
"integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"playwright-core": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright/node_modules/fsevents": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
|
||||
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/points-on-curve": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/points-on-curve/-/points-on-curve-0.2.0.tgz",
|
||||
@@ -4378,6 +4419,7 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"nanoid": "^3.3.8",
|
||||
"picocolors": "^1.1.1",
|
||||
@@ -4689,6 +4731,7 @@
|
||||
"deprecated": "< 24.15.0 is no longer supported",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@puppeteer/browsers": "2.6.1",
|
||||
"chromium-bidi": "0.11.0",
|
||||
@@ -4704,30 +4747,12 @@
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-core": {
|
||||
"version": "24.4.0",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.4.0.tgz",
|
||||
"integrity": "sha512-eFw66gCnWo0X8Hyf9KxxJtms7a61NJVMiSaWfItsFPzFBsjsWdmcNlBdsA1WVwln6neoHhsG+uTVesKmTREn/g==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@puppeteer/browsers": "2.8.0",
|
||||
"chromium-bidi": "2.1.2",
|
||||
"debug": "^4.4.0",
|
||||
"devtools-protocol": "0.0.1413902",
|
||||
"typed-query-selector": "^2.12.0",
|
||||
"ws": "^8.18.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra": {
|
||||
"version": "3.3.6",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz",
|
||||
"integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/debug": "^4.1.0",
|
||||
"debug": "^4.1.1",
|
||||
@@ -4898,7 +4923,8 @@
|
||||
"version": "0.0.1367902",
|
||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1367902.tgz",
|
||||
"integrity": "sha512-XxtPuC3PGakY6PD7dG66/o8KwJ/LkH2/EKe19Dcw58w53dv4/vSQEkn/SzuyhHE2q4zPgCkxQBxus3VV4ql+Pg==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/puppeteer/node_modules/puppeteer-core": {
|
||||
"version": "23.11.1",
|
||||
@@ -4996,6 +5022,7 @@
|
||||
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
@@ -5006,6 +5033,7 @@
|
||||
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"scheduler": "^0.27.0"
|
||||
},
|
||||
@@ -5582,6 +5610,7 @@
|
||||
"integrity": "sha512-6A2rnmW5xZMdw11LYjhcI5846rt9pbLSabY5XPxo+XWdxwZaFEn47Go4NzFiHu9sNNmr/kXivP1vStfvMaK1GQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@alloc/quick-lru": "^5.2.0",
|
||||
"arg": "^5.0.2",
|
||||
@@ -5813,6 +5842,7 @@
|
||||
"integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -6150,7 +6180,6 @@
|
||||
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
"puppeteer": "^23.11.1",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"playwright": "^1.49.0",
|
||||
"sharp": "^0.33.5",
|
||||
"undici": "^7.16.0",
|
||||
"user-agents": "^1.1.480"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const yaml = require("js-yaml");
|
||||
const { buildUserAgent, checkUrl } = require("./lib/http");
|
||||
const { buildUserAgent, checkUrl, checkWithPlaywright } = require("./lib/http");
|
||||
const {
|
||||
collectMarkdownLinksFromFile,
|
||||
extractLinksFromText,
|
||||
@@ -27,6 +27,9 @@ const DEFAULT_CONFIG = {
|
||||
maxRedirects: 5,
|
||||
userAgent: null,
|
||||
ignoreHosts: [],
|
||||
usePlaywright: false,
|
||||
playwrightTimeoutSeconds: 10,
|
||||
playwrightExecutablePath: null,
|
||||
};
|
||||
|
||||
function loadConfig() {
|
||||
@@ -73,6 +76,19 @@ const MAX_REDIRECTS = Math.max(
|
||||
);
|
||||
const DEFAULT_USER_AGENT = buildUserAgent(settings.userAgent);
|
||||
const IGNORE_HOSTS = parseIgnoreHosts(settings.ignoreHosts);
|
||||
const PLAYWRIGHT_ENABLED = settings.usePlaywright === true;
|
||||
const PLAYWRIGHT_TIMEOUT_MS = Math.max(
|
||||
1000,
|
||||
(Number.isFinite(Number(settings.playwrightTimeoutSeconds))
|
||||
? Number(settings.playwrightTimeoutSeconds)
|
||||
: DEFAULT_CONFIG.playwrightTimeoutSeconds) * 1000
|
||||
);
|
||||
const PLAYWRIGHT_EXECUTABLE =
|
||||
typeof settings.playwrightExecutablePath === "string" &&
|
||||
settings.playwrightExecutablePath.trim().length > 0
|
||||
? settings.playwrightExecutablePath.trim()
|
||||
: null;
|
||||
const PLAYWRIGHT_RECHECK_STATUSES = new Set([403, 426, 429, 502]);
|
||||
|
||||
const CACHE_TTL_SUCCESS_MS = daysToMs(
|
||||
pickNumber(settings.cacheTtlSuccessDays, DEFAULT_CONFIG.cacheTtlSuccessDays)
|
||||
@@ -432,6 +448,23 @@ function filterIgnoredHosts(occurrences, ignoreHosts) {
|
||||
return filtered;
|
||||
}
|
||||
|
||||
function shouldRecheckWithPlaywright(result) {
|
||||
if (!PLAYWRIGHT_ENABLED) {
|
||||
return false;
|
||||
}
|
||||
if (!result) {
|
||||
return true;
|
||||
}
|
||||
if (result.errorType === "timeout" || result.errorType === "network") {
|
||||
return true;
|
||||
}
|
||||
const status = typeof result.status === "number" ? result.status : null;
|
||||
if (status === null) {
|
||||
return true;
|
||||
}
|
||||
return PLAYWRIGHT_RECHECK_STATUSES.has(status);
|
||||
}
|
||||
|
||||
function recordOccurrence(map, filePath, line, url) {
|
||||
if (!map.has(url)) {
|
||||
map.set(url, []);
|
||||
@@ -795,12 +828,24 @@ async function checkEntries(entriesToCheck, entries, snapshotMeta) {
|
||||
if (host) {
|
||||
await applyHostDelay(host);
|
||||
}
|
||||
const result = await checkUrl(entry.url, {
|
||||
let result = await checkUrl(entry.url, {
|
||||
...BASE_HTTP_OPTIONS,
|
||||
firstMethod: "GET",
|
||||
retryWithGet: false,
|
||||
});
|
||||
recordHostCheck(host);
|
||||
if (shouldRecheckWithPlaywright(result)) {
|
||||
if (host) {
|
||||
await applyHostDelay(host);
|
||||
}
|
||||
const playResult = await checkWithPlaywright(entry.url, {
|
||||
userAgent: DEFAULT_USER_AGENT,
|
||||
timeoutMs: PLAYWRIGHT_TIMEOUT_MS,
|
||||
executablePath: PLAYWRIGHT_EXECUTABLE,
|
||||
});
|
||||
recordHostCheck(host);
|
||||
result = playResult;
|
||||
}
|
||||
updateEntryWithResult(entries[entry.url], result);
|
||||
persistEntriesSnapshot(entries, snapshotMeta);
|
||||
processed += 1;
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
"cacheDir": "tools/cache",
|
||||
"cacheFile": "external_links.yaml",
|
||||
"hostDelayMs": 2000,
|
||||
"retryDelayMs": 5000,
|
||||
"requestTimeoutSeconds": 5,
|
||||
"cacheTtlSuccessDays": 30,
|
||||
"cacheTtlClientErrorDays": 7,
|
||||
@@ -15,6 +14,9 @@
|
||||
"userAgent": null,
|
||||
"enableCookies": true,
|
||||
"cookieJar": "tools/cache/curl_cookies.txt",
|
||||
"usePlaywright": true,
|
||||
"playwrightTimeoutSeconds": 10,
|
||||
"playwrightExecutablePath": "/nix/store/jaf9gnbln0cbs2vspfdblc4ff6vv1kk5-chromium-142.0.7444.175/bin/chromium",
|
||||
"ignoreHosts": [
|
||||
"10.0.2.1",
|
||||
"web.archive.org",
|
||||
@@ -107,4 +109,4 @@
|
||||
"goaccess": {
|
||||
"url": null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,10 @@ const DEFAULT_USER_AGENTS = [
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
];
|
||||
const DEFAULT_VIEWPORT = { width: 1366, height: 768 };
|
||||
const DEFAULT_PLAYWRIGHT_ARGS = ["--disable-blink-features=AutomationControlled"];
|
||||
|
||||
let playwrightModule = null;
|
||||
|
||||
function buildUserAgent(preferred) {
|
||||
if (typeof preferred === "string" && preferred.trim()) {
|
||||
@@ -112,6 +116,71 @@ function buildNavigationHeaders(url, userAgent, extraHeaders = {}) {
|
||||
return baseHeaders;
|
||||
}
|
||||
|
||||
function loadPlaywright() {
|
||||
if (playwrightModule) {
|
||||
return playwrightModule;
|
||||
}
|
||||
playwrightModule = require("playwright");
|
||||
return playwrightModule;
|
||||
}
|
||||
|
||||
// Vérifie une URL via Playwright, en se rapprochant d'une navigation réelle.
|
||||
async function checkWithPlaywright(url, options = {}) {
|
||||
const userAgent = buildUserAgent(options.userAgent);
|
||||
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
|
||||
const executablePath =
|
||||
typeof options.executablePath === "string" && options.executablePath.trim()
|
||||
? options.executablePath.trim()
|
||||
: null;
|
||||
const playwright = loadPlaywright();
|
||||
|
||||
let browser = null;
|
||||
let context = null;
|
||||
try {
|
||||
browser = await playwright.chromium.launch({
|
||||
headless: true,
|
||||
executablePath: executablePath || undefined,
|
||||
args: DEFAULT_PLAYWRIGHT_ARGS,
|
||||
});
|
||||
context = await browser.newContext({
|
||||
viewport: { ...DEFAULT_VIEWPORT },
|
||||
userAgent,
|
||||
extraHTTPHeaders: buildNavigationHeaders(url, userAgent),
|
||||
});
|
||||
const page = await context.newPage();
|
||||
|
||||
try {
|
||||
const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: timeoutMs });
|
||||
const status = response ? response.status() : null;
|
||||
const finalUrl = page.url() || url;
|
||||
return {
|
||||
status,
|
||||
finalUrl,
|
||||
method: "GET",
|
||||
errorType: null,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
status: null,
|
||||
finalUrl: url,
|
||||
method: "GET",
|
||||
errorType: error?.name === "TimeoutError" ? "timeout" : "network",
|
||||
message: error?.message || null,
|
||||
};
|
||||
} finally {
|
||||
if (context) {
|
||||
await context.close();
|
||||
}
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Toute erreur de chargement/initialisation Playwright doit interrompre le script.
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchWithRedirects(targetUrl, options, maxRedirects) {
|
||||
let currentUrl = targetUrl;
|
||||
let response = null;
|
||||
@@ -226,4 +295,5 @@ module.exports = {
|
||||
checkUrl,
|
||||
probeUrl,
|
||||
shouldRetry,
|
||||
checkWithPlaywright,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user