Uniformisation de la vérification des liens
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
const puppeteer = require("puppeteer-extra");
|
||||
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
|
||||
const UserAgent = require("user-agents");
|
||||
const { buildUserAgent } = require("./http");
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
@@ -8,9 +8,11 @@ puppeteer.use(StealthPlugin());
|
||||
* Scrape a webpage to extract metadata and take a screenshot.
|
||||
* @param {string} url - The URL of the page to scrape.
|
||||
* @param {string} screenshotPath - Path where the screenshot should be saved.
|
||||
* @param {object} options
|
||||
* @param {string} [options.userAgent] - Optional user agent to use for the session.
|
||||
* @returns {Promise<object>} - Metadata including title, description, keywords, language, and HTTP status.
|
||||
*/
|
||||
async function scrapePage(url, screenshotPath) {
|
||||
async function scrapePage(url, screenshotPath, options = {}) {
|
||||
console.log(`🔍 Scraping: ${url}`);
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
@@ -35,9 +37,8 @@ async function scrapePage(url, screenshotPath) {
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Generate a fresh, realistic user-agent
|
||||
const userAgent = new UserAgent();
|
||||
await page.setUserAgent(userAgent.toString());
|
||||
const userAgent = buildUserAgent(options.userAgent);
|
||||
await page.setUserAgent(userAgent);
|
||||
|
||||
// Add headers to simulate a real browser
|
||||
await page.setExtraHTTPHeaders({
|
||||
|
||||
Reference in New Issue
Block a user