diff --git a/deploy.sh b/deploy.sh index 7d895b0c..df946842 100755 --- a/deploy.sh +++ b/deploy.sh @@ -17,7 +17,7 @@ DEST_DIR="/var/lib/www/richard-dern.fr/" HUGO_ENV="production" TARGET_OWNER="caddy:caddy" CHOWN_BIN="/run/current-system/sw/bin/chown" -SETFACL_BIN="/run/current-system/sw/bin/setfacl" +SETFACL_BIN="$(realpath /run/current-system/sw/bin/setfacl)" is_local_host() { local target="$1" diff --git a/package-lock.json b/package-lock.json index ac59865b..1e2b5fce 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "chart.js": "^4.4.4", "chartjs-node-canvas": "^5.0.0", "luxon": "^3.7.2", + "playwright": "^1.49.0", "postcss-import": "^16.1.0", "postcss-nested": "^7.0.2", "puppeteer": "^23.11.1", @@ -950,7 +951,6 @@ "integrity": "sha512-yTwt2KWRmCQAfhvbCRjebaSX8pV1//I0Y3g+A7f/eS7gf0l4eRJoUCvcYdVtboeU4CTOZQuqYbZNS8aBYb8ROQ==", "license": "Apache-2.0", "optional": true, - "peer": true, "dependencies": { "debug": "^4.4.0", "extract-zip": "^2.0.1", @@ -1840,6 +1840,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001688", "electron-to-chromium": "^1.5.73", @@ -1958,6 +1959,7 @@ "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.1.tgz", "integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==", "license": "MIT", + "peer": true, "dependencies": { "@kurkle/color": "^0.3.0" }, @@ -1984,6 +1986,7 @@ "integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==", "dev": true, "license": "Apache-2.0", + "peer": true, "dependencies": { "@chevrotain/cst-dts-gen": "11.0.3", "@chevrotain/gast": "11.0.3", @@ -2037,7 +2040,6 @@ "integrity": "sha512-vtRWBK2uImo5/W2oG6/cDkkHSm+2t6VHgnj+Rcwhb0pP74OoUb4GipyRX/T/y39gYQPhioP0DPShn+A7P6CHNw==", "license": "Apache-2.0", "optional": true, - "peer": true, "dependencies": { "mitt": "^3.0.1", "zod": "^3.24.1" @@ -2217,6 +2219,7 @@ "integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=0.10" } @@ -2661,6 +2664,7 @@ "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "dev": true, "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -2871,14 +2875,6 @@ "node": ">=8" } }, - "node_modules/devtools-protocol": { - "version": "0.0.1413902", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1413902.tgz", - "integrity": "sha512-yRtvFD8Oyk7C9Os3GmnFZLu53yAfsnyw1s+mLmHHUK0GQEc9zthHWvS1r67Zqzm5t7v56PILHIVZ7kmFMaL2yQ==", - "license": "BSD-3-Clause", - "optional": true, - "peer": true - }, "node_modules/didyoumean": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", @@ -3832,6 +3828,7 @@ "integrity": "sha512-UlIZrRariB11TY1RtTgUWp65tphtBv4CSq7vyS2ZZ2TgoMjs2nloq+wFqxiwcxlhHUvs7DPGgMjs2aeQxz5h9g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@braintree/sanitize-url": "^7.1.1", "@iconify/utils": "^3.0.1", @@ -4341,6 +4338,50 @@ "pathe": "^2.0.1" } }, + "node_modules/playwright": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.57.0.tgz", + "integrity": "sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==", + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.57.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz", + "integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/playwright/node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/points-on-curve": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/points-on-curve/-/points-on-curve-0.2.0.tgz", @@ -4378,6 +4419,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "nanoid": "^3.3.8", "picocolors": "^1.1.1", @@ -4689,6 +4731,7 @@ "deprecated": "< 24.15.0 is no longer supported", "hasInstallScript": true, "license": "Apache-2.0", + "peer": true, "dependencies": { "@puppeteer/browsers": "2.6.1", "chromium-bidi": "0.11.0", @@ -4704,30 +4747,12 @@ "node": ">=18" } }, - "node_modules/puppeteer-core": { - "version": "24.4.0", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.4.0.tgz", - "integrity": "sha512-eFw66gCnWo0X8Hyf9KxxJtms7a61NJVMiSaWfItsFPzFBsjsWdmcNlBdsA1WVwln6neoHhsG+uTVesKmTREn/g==", - "license": "Apache-2.0", - "optional": true, - "peer": true, - "dependencies": { - "@puppeteer/browsers": "2.8.0", - "chromium-bidi": "2.1.2", - "debug": "^4.4.0", - "devtools-protocol": "0.0.1413902", - "typed-query-selector": "^2.12.0", - "ws": "^8.18.1" - }, - "engines": { - "node": ">=18" - } - }, "node_modules/puppeteer-extra": { "version": "3.3.6", "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz", "integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==", "license": "MIT", + "peer": true, "dependencies": { "@types/debug": "^4.1.0", "debug": "^4.1.1", @@ -4898,7 +4923,8 @@ "version": "0.0.1367902", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1367902.tgz", "integrity": "sha512-XxtPuC3PGakY6PD7dG66/o8KwJ/LkH2/EKe19Dcw58w53dv4/vSQEkn/SzuyhHE2q4zPgCkxQBxus3VV4ql+Pg==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/puppeteer/node_modules/puppeteer-core": { "version": "23.11.1", @@ -4996,6 +5022,7 @@ "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -5006,6 +5033,7 @@ "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -5582,6 +5610,7 @@ "integrity": "sha512-6A2rnmW5xZMdw11LYjhcI5846rt9pbLSabY5XPxo+XWdxwZaFEn47Go4NzFiHu9sNNmr/kXivP1vStfvMaK1GQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@alloc/quick-lru": "^5.2.0", "arg": "^5.0.2", @@ -5813,6 +5842,7 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -6150,7 +6180,6 @@ "integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==", "license": "MIT", "optional": true, - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/package.json b/package.json index fce9ec7c..cabba0b6 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "puppeteer": "^23.11.1", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", + "playwright": "^1.49.0", "sharp": "^0.33.5", "undici": "^7.16.0", "user-agents": "^1.1.480" diff --git a/tools/check_external_links.js b/tools/check_external_links.js index 0ec88bf4..a3bff599 100644 --- a/tools/check_external_links.js +++ b/tools/check_external_links.js @@ -3,7 +3,7 @@ const fs = require("fs"); const path = require("path"); const yaml = require("js-yaml"); -const { buildUserAgent, checkUrl } = require("./lib/http"); +const { buildUserAgent, checkUrl, checkWithPlaywright } = require("./lib/http"); const { collectMarkdownLinksFromFile, extractLinksFromText, @@ -27,6 +27,9 @@ const DEFAULT_CONFIG = { maxRedirects: 5, userAgent: null, ignoreHosts: [], + usePlaywright: false, + playwrightTimeoutSeconds: 10, + playwrightExecutablePath: null, }; function loadConfig() { @@ -73,6 +76,19 @@ const MAX_REDIRECTS = Math.max( ); const DEFAULT_USER_AGENT = buildUserAgent(settings.userAgent); const IGNORE_HOSTS = parseIgnoreHosts(settings.ignoreHosts); +const PLAYWRIGHT_ENABLED = settings.usePlaywright === true; +const PLAYWRIGHT_TIMEOUT_MS = Math.max( + 1000, + (Number.isFinite(Number(settings.playwrightTimeoutSeconds)) + ? Number(settings.playwrightTimeoutSeconds) + : DEFAULT_CONFIG.playwrightTimeoutSeconds) * 1000 +); +const PLAYWRIGHT_EXECUTABLE = + typeof settings.playwrightExecutablePath === "string" && + settings.playwrightExecutablePath.trim().length > 0 + ? settings.playwrightExecutablePath.trim() + : null; +const PLAYWRIGHT_RECHECK_STATUSES = new Set([403, 426, 429, 502]); const CACHE_TTL_SUCCESS_MS = daysToMs( pickNumber(settings.cacheTtlSuccessDays, DEFAULT_CONFIG.cacheTtlSuccessDays) @@ -432,6 +448,23 @@ function filterIgnoredHosts(occurrences, ignoreHosts) { return filtered; } +function shouldRecheckWithPlaywright(result) { + if (!PLAYWRIGHT_ENABLED) { + return false; + } + if (!result) { + return true; + } + if (result.errorType === "timeout" || result.errorType === "network") { + return true; + } + const status = typeof result.status === "number" ? result.status : null; + if (status === null) { + return true; + } + return PLAYWRIGHT_RECHECK_STATUSES.has(status); +} + function recordOccurrence(map, filePath, line, url) { if (!map.has(url)) { map.set(url, []); @@ -795,12 +828,24 @@ async function checkEntries(entriesToCheck, entries, snapshotMeta) { if (host) { await applyHostDelay(host); } - const result = await checkUrl(entry.url, { + let result = await checkUrl(entry.url, { ...BASE_HTTP_OPTIONS, firstMethod: "GET", retryWithGet: false, }); recordHostCheck(host); + if (shouldRecheckWithPlaywright(result)) { + if (host) { + await applyHostDelay(host); + } + const playResult = await checkWithPlaywright(entry.url, { + userAgent: DEFAULT_USER_AGENT, + timeoutMs: PLAYWRIGHT_TIMEOUT_MS, + executablePath: PLAYWRIGHT_EXECUTABLE, + }); + recordHostCheck(host); + result = playResult; + } updateEntryWithResult(entries[entry.url], result); persistEntriesSnapshot(entries, snapshotMeta); processed += 1; diff --git a/tools/config/config.json b/tools/config/config.json index d1e493bb..6df4d470 100644 --- a/tools/config/config.json +++ b/tools/config/config.json @@ -6,7 +6,6 @@ "cacheDir": "tools/cache", "cacheFile": "external_links.yaml", "hostDelayMs": 2000, - "retryDelayMs": 5000, "requestTimeoutSeconds": 5, "cacheTtlSuccessDays": 30, "cacheTtlClientErrorDays": 7, @@ -15,6 +14,9 @@ "userAgent": null, "enableCookies": true, "cookieJar": "tools/cache/curl_cookies.txt", + "usePlaywright": true, + "playwrightTimeoutSeconds": 10, + "playwrightExecutablePath": "/nix/store/jaf9gnbln0cbs2vspfdblc4ff6vv1kk5-chromium-142.0.7444.175/bin/chromium", "ignoreHosts": [ "10.0.2.1", "web.archive.org", @@ -107,4 +109,4 @@ "goaccess": { "url": null } -} \ No newline at end of file +} diff --git a/tools/lib/http.js b/tools/lib/http.js index 48f58421..3661dd9d 100644 --- a/tools/lib/http.js +++ b/tools/lib/http.js @@ -12,6 +12,10 @@ const DEFAULT_USER_AGENTS = [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", ]; +const DEFAULT_VIEWPORT = { width: 1366, height: 768 }; +const DEFAULT_PLAYWRIGHT_ARGS = ["--disable-blink-features=AutomationControlled"]; + +let playwrightModule = null; function buildUserAgent(preferred) { if (typeof preferred === "string" && preferred.trim()) { @@ -112,6 +116,71 @@ function buildNavigationHeaders(url, userAgent, extraHeaders = {}) { return baseHeaders; } +function loadPlaywright() { + if (playwrightModule) { + return playwrightModule; + } + playwrightModule = require("playwright"); + return playwrightModule; +} + +// Vérifie une URL via Playwright, en se rapprochant d'une navigation réelle. +async function checkWithPlaywright(url, options = {}) { + const userAgent = buildUserAgent(options.userAgent); + const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS; + const executablePath = + typeof options.executablePath === "string" && options.executablePath.trim() + ? options.executablePath.trim() + : null; + const playwright = loadPlaywright(); + + let browser = null; + let context = null; + try { + browser = await playwright.chromium.launch({ + headless: true, + executablePath: executablePath || undefined, + args: DEFAULT_PLAYWRIGHT_ARGS, + }); + context = await browser.newContext({ + viewport: { ...DEFAULT_VIEWPORT }, + userAgent, + extraHTTPHeaders: buildNavigationHeaders(url, userAgent), + }); + const page = await context.newPage(); + + try { + const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: timeoutMs }); + const status = response ? response.status() : null; + const finalUrl = page.url() || url; + return { + status, + finalUrl, + method: "GET", + errorType: null, + }; + } catch (error) { + return { + status: null, + finalUrl: url, + method: "GET", + errorType: error?.name === "TimeoutError" ? "timeout" : "network", + message: error?.message || null, + }; + } finally { + if (context) { + await context.close(); + } + if (browser) { + await browser.close(); + } + } + } catch (error) { + // Toute erreur de chargement/initialisation Playwright doit interrompre le script. + throw error; + } +} + async function fetchWithRedirects(targetUrl, options, maxRedirects) { let currentUrl = targetUrl; let response = null; @@ -226,4 +295,5 @@ module.exports = { checkUrl, probeUrl, shouldRetry, + checkWithPlaywright, };