Amélioration de la détection de liens morts
This commit is contained in:
@@ -1,18 +1,115 @@
|
||||
const { fetch } = require("undici");
|
||||
const UserAgent = require("user-agents");
|
||||
|
||||
const DEFAULT_ACCEPT =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
const DEFAULT_ACCEPT_LANGUAGE = "fr-FR,fr;q=0.9,en;q=0.7";
|
||||
const DEFAULT_ACCEPT_ENCODING = "gzip, deflate, br";
|
||||
const DEFAULT_CACHE_CONTROL = "no-cache";
|
||||
const DEFAULT_PRAGMA = "no-cache";
|
||||
const DEFAULT_TIMEOUT_MS = 5000;
|
||||
const DEFAULT_MAX_REDIRECTS = 5;
|
||||
const DEFAULT_USER_AGENTS = [
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
];
|
||||
|
||||
function buildUserAgent(preferred) {
|
||||
if (typeof preferred === "string" && preferred.trim()) {
|
||||
return preferred.trim();
|
||||
}
|
||||
const ua = new UserAgent();
|
||||
return ua.toString();
|
||||
const index = Math.floor(Math.random() * DEFAULT_USER_AGENTS.length);
|
||||
return DEFAULT_USER_AGENTS[index];
|
||||
}
|
||||
|
||||
function extractChromeVersion(userAgent) {
|
||||
if (typeof userAgent !== "string") {
|
||||
return null;
|
||||
}
|
||||
const match = userAgent.match(/Chrome\/(\d+)/i);
|
||||
if (match && match[1]) {
|
||||
return match[1];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isChromeLike(userAgent) {
|
||||
if (typeof userAgent !== "string") {
|
||||
return false;
|
||||
}
|
||||
return /Chrome\/\d+/i.test(userAgent);
|
||||
}
|
||||
|
||||
function derivePlatform(userAgent) {
|
||||
if (typeof userAgent !== "string") {
|
||||
return null;
|
||||
}
|
||||
if (/Windows NT/i.test(userAgent)) {
|
||||
return "Windows";
|
||||
}
|
||||
if (/Mac OS X/i.test(userAgent)) {
|
||||
return "macOS";
|
||||
}
|
||||
if (/Android/i.test(userAgent)) {
|
||||
return "Android";
|
||||
}
|
||||
if (/iPhone|iPad|iPod/i.test(userAgent)) {
|
||||
return "iOS";
|
||||
}
|
||||
if (/Linux/i.test(userAgent)) {
|
||||
return "Linux";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isMobileUserAgent(userAgent) {
|
||||
if (typeof userAgent !== "string") {
|
||||
return false;
|
||||
}
|
||||
return /Mobile|Android|iPhone|iPad|iPod/i.test(userAgent);
|
||||
}
|
||||
|
||||
function buildSecChUa(userAgent) {
|
||||
if (!isChromeLike(userAgent)) {
|
||||
return null;
|
||||
}
|
||||
const version = extractChromeVersion(userAgent) || "122";
|
||||
return `"Chromium";v="${version}", "Not A(Brand";v="24", "Google Chrome";v="${version}"`;
|
||||
}
|
||||
|
||||
function buildNavigationHeaders(url, userAgent, extraHeaders = {}) {
|
||||
const platform = derivePlatform(userAgent);
|
||||
const secChUa = buildSecChUa(userAgent);
|
||||
const secChUaMobile = isMobileUserAgent(userAgent) ? "?1" : "?0";
|
||||
const secChUaPlatform = platform ? `"${platform}"` : null;
|
||||
|
||||
const baseHeaders = {
|
||||
"user-agent": userAgent,
|
||||
accept: DEFAULT_ACCEPT,
|
||||
"accept-language": DEFAULT_ACCEPT_LANGUAGE,
|
||||
"accept-encoding": DEFAULT_ACCEPT_ENCODING,
|
||||
"cache-control": DEFAULT_CACHE_CONTROL,
|
||||
pragma: DEFAULT_PRAGMA,
|
||||
dnt: "1",
|
||||
connection: "keep-alive",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-user": "?1",
|
||||
"sec-fetch-dest": "document",
|
||||
...extraHeaders,
|
||||
};
|
||||
|
||||
if (secChUa) {
|
||||
baseHeaders["sec-ch-ua"] = secChUa;
|
||||
}
|
||||
if (secChUaMobile) {
|
||||
baseHeaders["sec-ch-ua-mobile"] = secChUaMobile;
|
||||
}
|
||||
if (secChUaPlatform) {
|
||||
baseHeaders["sec-ch-ua-platform"] = secChUaPlatform;
|
||||
}
|
||||
|
||||
return baseHeaders;
|
||||
}
|
||||
|
||||
async function fetchWithRedirects(targetUrl, options, maxRedirects) {
|
||||
@@ -47,18 +144,13 @@ async function fetchWithRedirects(targetUrl, options, maxRedirects) {
|
||||
}
|
||||
|
||||
async function probeUrl(url, options = {}) {
|
||||
const method = typeof options.method === "string" ? options.method.toUpperCase() : "HEAD";
|
||||
const method = typeof options.method === "string" ? options.method.toUpperCase() : "GET";
|
||||
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : DEFAULT_TIMEOUT_MS;
|
||||
const maxRedirects = Number.isFinite(options.maxRedirects)
|
||||
? options.maxRedirects
|
||||
: DEFAULT_MAX_REDIRECTS;
|
||||
const userAgent = buildUserAgent(options.userAgent);
|
||||
const headers = {
|
||||
"user-agent": userAgent,
|
||||
"accept-language": DEFAULT_ACCEPT_LANGUAGE,
|
||||
"accept": DEFAULT_ACCEPT,
|
||||
...(options.headers || {}),
|
||||
};
|
||||
const headers = buildNavigationHeaders(url, userAgent, options.headers || {});
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
@@ -117,9 +209,13 @@ function shouldRetry(result) {
|
||||
}
|
||||
|
||||
async function checkUrl(url, options = {}) {
|
||||
const firstMethod = options.firstMethod || "HEAD";
|
||||
const firstMethod = options.firstMethod || "GET";
|
||||
const retryWithGet =
|
||||
typeof options.retryWithGet === "boolean"
|
||||
? options.retryWithGet
|
||||
: firstMethod === "HEAD";
|
||||
let result = await probeUrl(url, { ...options, method: firstMethod });
|
||||
if (options.retryWithGet !== false && shouldRetry(result)) {
|
||||
if (retryWithGet && shouldRetry(result)) {
|
||||
result = await probeUrl(url, { ...options, method: "GET" });
|
||||
}
|
||||
return result;
|
||||
|
||||
Reference in New Issue
Block a user