1

Uniformisation de la vérification des liens

This commit is contained in:
2025-11-28 23:43:07 +01:00
parent 5e846aa4b4
commit 0260c1ab4e
7 changed files with 2145 additions and 1157 deletions

View File

@@ -1,6 +1,6 @@
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
const UserAgent = require("user-agents");
const { buildUserAgent } = require("./http");
puppeteer.use(StealthPlugin());
@@ -8,9 +8,11 @@ puppeteer.use(StealthPlugin());
* Scrape a webpage to extract metadata and take a screenshot.
* @param {string} url - The URL of the page to scrape.
* @param {string} screenshotPath - Path where the screenshot should be saved.
* @param {object} options
* @param {string} [options.userAgent] - Optional user agent to use for the session.
* @returns {Promise<object>} - Metadata including title, description, keywords, language, and HTTP status.
*/
async function scrapePage(url, screenshotPath) {
async function scrapePage(url, screenshotPath, options = {}) {
console.log(`🔍 Scraping: ${url}`);
const browser = await puppeteer.launch({
@@ -35,9 +37,8 @@ async function scrapePage(url, screenshotPath) {
const page = await browser.newPage();
// Generate a fresh, realistic user-agent
const userAgent = new UserAgent();
await page.setUserAgent(userAgent.toString());
const userAgent = buildUserAgent(options.userAgent);
await page.setUserAgent(userAgent);
// Add headers to simulate a real browser
await page.setExtraHTTPHeaders({