const { fetch } = require("undici"); const DEFAULT_ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; const DEFAULT_ACCEPT_LANGUAGE = "fr-FR,fr;q=0.9,en;q=0.7"; const DEFAULT_ACCEPT_ENCODING = "gzip, deflate, br"; const DEFAULT_CACHE_CONTROL = "no-cache"; const DEFAULT_PRAGMA = "no-cache"; const DEFAULT_TIMEOUT_MS = 5000; const DEFAULT_MAX_REDIRECTS = 5; const DEFAULT_USER_AGENTS = [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", ]; const DEFAULT_VIEWPORT = { width: 1366, height: 768 }; const DEFAULT_PLAYWRIGHT_ARGS = ["--disable-blink-features=AutomationControlled"]; let playwrightModule = null; function buildUserAgent(preferred) { if (typeof preferred === "string" && preferred.trim()) { return preferred.trim(); } const index = Math.floor(Math.random() * DEFAULT_USER_AGENTS.length); return DEFAULT_USER_AGENTS[index]; } function extractChromeVersion(userAgent) { if (typeof userAgent !== "string") { return null; } const match = userAgent.match(/Chrome\/(\d+)/i); if (match && match[1]) { return match[1]; } return null; } function isChromeLike(userAgent) { if (typeof userAgent !== "string") { return false; } return /Chrome\/\d+/i.test(userAgent); } function derivePlatform(userAgent) { if (typeof userAgent !== "string") { return null; } if (/Windows NT/i.test(userAgent)) { return "Windows"; } if (/Mac OS X/i.test(userAgent)) { return "macOS"; } if (/Android/i.test(userAgent)) { return "Android"; } if (/iPhone|iPad|iPod/i.test(userAgent)) { return "iOS"; } if (/Linux/i.test(userAgent)) { return "Linux"; } return null; } function isMobileUserAgent(userAgent) { if (typeof userAgent !== "string") { return false; } return /Mobile|Android|iPhone|iPad|iPod/i.test(userAgent); } function buildSecChUa(userAgent) { if (!isChromeLike(userAgent)) { return null; } const version = extractChromeVersion(userAgent) || "122"; return `"Chromium";v="${version}", "Not A(Brand";v="24", "Google Chrome";v="${version}"`; } function buildNavigationHeaders(url, userAgent, extraHeaders = {}) { const platform = derivePlatform(userAgent); const secChUa = buildSecChUa(userAgent); const secChUaMobile = isMobileUserAgent(userAgent) ? "?1" : "?0"; const secChUaPlatform = platform ? `"${platform}"` : null; const baseHeaders = { "user-agent": userAgent, accept: DEFAULT_ACCEPT, "accept-language": DEFAULT_ACCEPT_LANGUAGE, "accept-encoding": DEFAULT_ACCEPT_ENCODING, "cache-control": DEFAULT_CACHE_CONTROL, pragma: DEFAULT_PRAGMA, dnt: "1", connection: "keep-alive", "upgrade-insecure-requests": "1", "sec-fetch-site": "none", "sec-fetch-mode": "navigate", "sec-fetch-user": "?1", "sec-fetch-dest": "document", ...extraHeaders, }; if (secChUa) { baseHeaders["sec-ch-ua"] = secChUa; } if (secChUaMobile) { baseHeaders["sec-ch-ua-mobile"] = secChUaMobile; } if (secChUaPlatform) { baseHeaders["sec-ch-ua-platform"] = secChUaPlatform; } return baseHeaders; } function loadPlaywright() { if (playwrightModule) { return playwrightModule; } playwrightModule = require("playwright"); return playwrightModule; } // Vérifie une URL via Playwright, en se rapprochant d'une navigation réelle. async function checkWithPlaywright(url, options = {}) { const userAgent = buildUserAgent(options.userAgent); const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS; const executablePath = typeof options.executablePath === "string" && options.executablePath.trim() ? options.executablePath.trim() : null; const playwright = loadPlaywright(); let browser = null; let context = null; try { browser = await playwright.chromium.launch({ headless: true, executablePath: executablePath || undefined, args: DEFAULT_PLAYWRIGHT_ARGS, }); context = await browser.newContext({ viewport: { ...DEFAULT_VIEWPORT }, userAgent, extraHTTPHeaders: buildNavigationHeaders(url, userAgent), }); const page = await context.newPage(); try { const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: timeoutMs }); const status = response ? response.status() : null; const finalUrl = page.url() || url; return { status, finalUrl, method: "GET", errorType: null, }; } catch (error) { return { status: null, finalUrl: url, method: "GET", errorType: error?.name === "TimeoutError" ? "timeout" : "network", message: error?.message || null, }; } finally { if (context) { await context.close(); } if (browser) { await browser.close(); } } } catch (error) { // Toute erreur de chargement/initialisation Playwright doit interrompre le script. throw error; } } async function fetchWithRedirects(targetUrl, options, maxRedirects) { let currentUrl = targetUrl; let response = null; let redirects = 0; while (redirects <= maxRedirects) { response = await fetch(currentUrl, { ...options, redirect: "manual" }); const location = response.headers.get("location"); if ( response.status >= 300 && response.status < 400 && location && redirects < maxRedirects ) { if (response.body && typeof response.body.cancel === "function") { try { await response.body.cancel(); } catch (_) { // Ignore cancellation errors; we're moving to the next hop. } } currentUrl = new URL(location, currentUrl).toString(); redirects += 1; continue; } break; } return response; } async function probeUrl(url, options = {}) { const method = typeof options.method === "string" ? options.method.toUpperCase() : "GET"; const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS; const maxRedirects = Number.isFinite(options.maxRedirects) ? options.maxRedirects : DEFAULT_MAX_REDIRECTS; const userAgent = buildUserAgent(options.userAgent); const headers = buildNavigationHeaders(url, userAgent, options.headers || {}); const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); try { const response = await fetchWithRedirects( url, { method, headers, signal: controller.signal, }, maxRedirects ); const status = response ? response.status : null; const finalUrl = response?.url || url; if (response?.body && typeof response.body.cancel === "function") { try { await response.body.cancel(); } catch (_) { // Ignore cancellation errors; the status is all we needed. } } return { status, finalUrl, method, errorType: null, }; } catch (error) { if (error.name === "AbortError") { return { status: null, finalUrl: url, method, errorType: "timeout", }; } return { status: null, finalUrl: url, method, errorType: "network", message: error.message, }; } finally { clearTimeout(timer); } } function shouldRetry(result) { if (!result) return true; if (result.errorType) return true; if (typeof result.status !== "number") return true; return result.status >= 400; } async function checkUrl(url, options = {}) { const firstMethod = options.firstMethod || "GET"; const retryWithGet = typeof options.retryWithGet === "boolean" ? options.retryWithGet : firstMethod === "HEAD"; let result = await probeUrl(url, { ...options, method: firstMethod }); if (retryWithGet && shouldRetry(result)) { result = await probeUrl(url, { ...options, method: "GET" }); } return result; } module.exports = { buildUserAgent, checkUrl, probeUrl, shouldRetry, checkWithPlaywright, };