300 lines
8.1 KiB
JavaScript
300 lines
8.1 KiB
JavaScript
const { fetch } = require("undici");
|
|
|
|
const DEFAULT_ACCEPT =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
const DEFAULT_ACCEPT_LANGUAGE = "fr-FR,fr;q=0.9,en;q=0.7";
|
|
const DEFAULT_ACCEPT_ENCODING = "gzip, deflate, br";
|
|
const DEFAULT_CACHE_CONTROL = "no-cache";
|
|
const DEFAULT_PRAGMA = "no-cache";
|
|
const DEFAULT_TIMEOUT_MS = 5000;
|
|
const DEFAULT_MAX_REDIRECTS = 5;
|
|
const DEFAULT_USER_AGENTS = [
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
|
];
|
|
const DEFAULT_VIEWPORT = { width: 1366, height: 768 };
|
|
const DEFAULT_PLAYWRIGHT_ARGS = ["--disable-blink-features=AutomationControlled"];
|
|
|
|
let playwrightModule = null;
|
|
|
|
function buildUserAgent(preferred) {
|
|
if (typeof preferred === "string" && preferred.trim()) {
|
|
return preferred.trim();
|
|
}
|
|
const index = Math.floor(Math.random() * DEFAULT_USER_AGENTS.length);
|
|
return DEFAULT_USER_AGENTS[index];
|
|
}
|
|
|
|
function extractChromeVersion(userAgent) {
|
|
if (typeof userAgent !== "string") {
|
|
return null;
|
|
}
|
|
const match = userAgent.match(/Chrome\/(\d+)/i);
|
|
if (match && match[1]) {
|
|
return match[1];
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function isChromeLike(userAgent) {
|
|
if (typeof userAgent !== "string") {
|
|
return false;
|
|
}
|
|
return /Chrome\/\d+/i.test(userAgent);
|
|
}
|
|
|
|
function derivePlatform(userAgent) {
|
|
if (typeof userAgent !== "string") {
|
|
return null;
|
|
}
|
|
if (/Windows NT/i.test(userAgent)) {
|
|
return "Windows";
|
|
}
|
|
if (/Mac OS X/i.test(userAgent)) {
|
|
return "macOS";
|
|
}
|
|
if (/Android/i.test(userAgent)) {
|
|
return "Android";
|
|
}
|
|
if (/iPhone|iPad|iPod/i.test(userAgent)) {
|
|
return "iOS";
|
|
}
|
|
if (/Linux/i.test(userAgent)) {
|
|
return "Linux";
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function isMobileUserAgent(userAgent) {
|
|
if (typeof userAgent !== "string") {
|
|
return false;
|
|
}
|
|
return /Mobile|Android|iPhone|iPad|iPod/i.test(userAgent);
|
|
}
|
|
|
|
function buildSecChUa(userAgent) {
|
|
if (!isChromeLike(userAgent)) {
|
|
return null;
|
|
}
|
|
const version = extractChromeVersion(userAgent) || "122";
|
|
return `"Chromium";v="${version}", "Not A(Brand";v="24", "Google Chrome";v="${version}"`;
|
|
}
|
|
|
|
function buildNavigationHeaders(url, userAgent, extraHeaders = {}) {
|
|
const platform = derivePlatform(userAgent);
|
|
const secChUa = buildSecChUa(userAgent);
|
|
const secChUaMobile = isMobileUserAgent(userAgent) ? "?1" : "?0";
|
|
const secChUaPlatform = platform ? `"${platform}"` : null;
|
|
|
|
const baseHeaders = {
|
|
"user-agent": userAgent,
|
|
accept: DEFAULT_ACCEPT,
|
|
"accept-language": DEFAULT_ACCEPT_LANGUAGE,
|
|
"accept-encoding": DEFAULT_ACCEPT_ENCODING,
|
|
"cache-control": DEFAULT_CACHE_CONTROL,
|
|
pragma: DEFAULT_PRAGMA,
|
|
dnt: "1",
|
|
connection: "keep-alive",
|
|
"upgrade-insecure-requests": "1",
|
|
"sec-fetch-site": "none",
|
|
"sec-fetch-mode": "navigate",
|
|
"sec-fetch-user": "?1",
|
|
"sec-fetch-dest": "document",
|
|
...extraHeaders,
|
|
};
|
|
|
|
if (secChUa) {
|
|
baseHeaders["sec-ch-ua"] = secChUa;
|
|
}
|
|
if (secChUaMobile) {
|
|
baseHeaders["sec-ch-ua-mobile"] = secChUaMobile;
|
|
}
|
|
if (secChUaPlatform) {
|
|
baseHeaders["sec-ch-ua-platform"] = secChUaPlatform;
|
|
}
|
|
|
|
return baseHeaders;
|
|
}
|
|
|
|
function loadPlaywright() {
|
|
if (playwrightModule) {
|
|
return playwrightModule;
|
|
}
|
|
playwrightModule = require("playwright");
|
|
return playwrightModule;
|
|
}
|
|
|
|
// Vérifie une URL via Playwright, en se rapprochant d'une navigation réelle.
|
|
async function checkWithPlaywright(url, options = {}) {
|
|
const userAgent = buildUserAgent(options.userAgent);
|
|
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
|
|
const executablePath =
|
|
typeof options.executablePath === "string" && options.executablePath.trim()
|
|
? options.executablePath.trim()
|
|
: null;
|
|
const playwright = loadPlaywright();
|
|
|
|
let browser = null;
|
|
let context = null;
|
|
try {
|
|
browser = await playwright.chromium.launch({
|
|
headless: true,
|
|
executablePath: executablePath || undefined,
|
|
args: DEFAULT_PLAYWRIGHT_ARGS,
|
|
});
|
|
context = await browser.newContext({
|
|
viewport: { ...DEFAULT_VIEWPORT },
|
|
userAgent,
|
|
extraHTTPHeaders: buildNavigationHeaders(url, userAgent),
|
|
});
|
|
const page = await context.newPage();
|
|
|
|
try {
|
|
const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: timeoutMs });
|
|
const status = response ? response.status() : null;
|
|
const finalUrl = page.url() || url;
|
|
return {
|
|
status,
|
|
finalUrl,
|
|
method: "GET",
|
|
errorType: null,
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
status: null,
|
|
finalUrl: url,
|
|
method: "GET",
|
|
errorType: error?.name === "TimeoutError" ? "timeout" : "network",
|
|
message: error?.message || null,
|
|
};
|
|
} finally {
|
|
if (context) {
|
|
await context.close();
|
|
}
|
|
if (browser) {
|
|
await browser.close();
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// Toute erreur de chargement/initialisation Playwright doit interrompre le script.
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async function fetchWithRedirects(targetUrl, options, maxRedirects) {
|
|
let currentUrl = targetUrl;
|
|
let response = null;
|
|
let redirects = 0;
|
|
|
|
while (redirects <= maxRedirects) {
|
|
response = await fetch(currentUrl, { ...options, redirect: "manual" });
|
|
const location = response.headers.get("location");
|
|
if (
|
|
response.status >= 300 &&
|
|
response.status < 400 &&
|
|
location &&
|
|
redirects < maxRedirects
|
|
) {
|
|
if (response.body && typeof response.body.cancel === "function") {
|
|
try {
|
|
await response.body.cancel();
|
|
} catch (_) {
|
|
// Ignore cancellation errors; we're moving to the next hop.
|
|
}
|
|
}
|
|
currentUrl = new URL(location, currentUrl).toString();
|
|
redirects += 1;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
return response;
|
|
}
|
|
|
|
async function probeUrl(url, options = {}) {
|
|
const method = typeof options.method === "string" ? options.method.toUpperCase() : "GET";
|
|
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
|
|
const maxRedirects = Number.isFinite(options.maxRedirects)
|
|
? options.maxRedirects
|
|
: DEFAULT_MAX_REDIRECTS;
|
|
const userAgent = buildUserAgent(options.userAgent);
|
|
const headers = buildNavigationHeaders(url, userAgent, options.headers || {});
|
|
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
|
|
try {
|
|
const response = await fetchWithRedirects(
|
|
url,
|
|
{
|
|
method,
|
|
headers,
|
|
signal: controller.signal,
|
|
},
|
|
maxRedirects
|
|
);
|
|
const status = response ? response.status : null;
|
|
const finalUrl = response?.url || url;
|
|
if (response?.body && typeof response.body.cancel === "function") {
|
|
try {
|
|
await response.body.cancel();
|
|
} catch (_) {
|
|
// Ignore cancellation errors; the status is all we needed.
|
|
}
|
|
}
|
|
return {
|
|
status,
|
|
finalUrl,
|
|
method,
|
|
errorType: null,
|
|
};
|
|
} catch (error) {
|
|
if (error.name === "AbortError") {
|
|
return {
|
|
status: null,
|
|
finalUrl: url,
|
|
method,
|
|
errorType: "timeout",
|
|
};
|
|
}
|
|
return {
|
|
status: null,
|
|
finalUrl: url,
|
|
method,
|
|
errorType: "network",
|
|
message: error.message,
|
|
};
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
}
|
|
|
|
function shouldRetry(result) {
|
|
if (!result) return true;
|
|
if (result.errorType) return true;
|
|
if (typeof result.status !== "number") return true;
|
|
return result.status >= 400;
|
|
}
|
|
|
|
async function checkUrl(url, options = {}) {
|
|
const firstMethod = options.firstMethod || "GET";
|
|
const retryWithGet =
|
|
typeof options.retryWithGet === "boolean"
|
|
? options.retryWithGet
|
|
: firstMethod === "HEAD";
|
|
let result = await probeUrl(url, { ...options, method: firstMethod });
|
|
if (retryWithGet && shouldRetry(result)) {
|
|
result = await probeUrl(url, { ...options, method: "GET" });
|
|
}
|
|
return result;
|
|
}
|
|
|
|
module.exports = {
|
|
buildUserAgent,
|
|
checkUrl,
|
|
probeUrl,
|
|
shouldRetry,
|
|
checkWithPlaywright,
|
|
};
|