1

Utilisation de playwright pour la vérification des liens externes

This commit is contained in:
2025-12-08 01:15:28 +01:00
parent cb7cd917d7
commit 0beac1afb0
6 changed files with 183 additions and 36 deletions

View File

@@ -12,6 +12,10 @@ const DEFAULT_USER_AGENTS = [
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
];
const DEFAULT_VIEWPORT = { width: 1366, height: 768 };
const DEFAULT_PLAYWRIGHT_ARGS = ["--disable-blink-features=AutomationControlled"];
let playwrightModule = null;
function buildUserAgent(preferred) {
if (typeof preferred === "string" && preferred.trim()) {
@@ -112,6 +116,71 @@ function buildNavigationHeaders(url, userAgent, extraHeaders = {}) {
return baseHeaders;
}
function loadPlaywright() {
if (playwrightModule) {
return playwrightModule;
}
playwrightModule = require("playwright");
return playwrightModule;
}
// Vérifie une URL via Playwright, en se rapprochant d'une navigation réelle.
async function checkWithPlaywright(url, options = {}) {
const userAgent = buildUserAgent(options.userAgent);
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
const executablePath =
typeof options.executablePath === "string" && options.executablePath.trim()
? options.executablePath.trim()
: null;
const playwright = loadPlaywright();
let browser = null;
let context = null;
try {
browser = await playwright.chromium.launch({
headless: true,
executablePath: executablePath || undefined,
args: DEFAULT_PLAYWRIGHT_ARGS,
});
context = await browser.newContext({
viewport: { ...DEFAULT_VIEWPORT },
userAgent,
extraHTTPHeaders: buildNavigationHeaders(url, userAgent),
});
const page = await context.newPage();
try {
const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: timeoutMs });
const status = response ? response.status() : null;
const finalUrl = page.url() || url;
return {
status,
finalUrl,
method: "GET",
errorType: null,
};
} catch (error) {
return {
status: null,
finalUrl: url,
method: "GET",
errorType: error?.name === "TimeoutError" ? "timeout" : "network",
message: error?.message || null,
};
} finally {
if (context) {
await context.close();
}
if (browser) {
await browser.close();
}
}
} catch (error) {
// Toute erreur de chargement/initialisation Playwright doit interrompre le script.
throw error;
}
}
async function fetchWithRedirects(targetUrl, options, maxRedirects) {
let currentUrl = targetUrl;
let response = null;
@@ -226,4 +295,5 @@ module.exports = {
checkUrl,
probeUrl,
shouldRetry,
checkWithPlaywright,
};