Uniformisation de la vérification des liens
This commit is contained in:
@@ -6,6 +6,7 @@ const crypto = require("crypto");
|
||||
const path = require("path");
|
||||
const os = require("os");
|
||||
const YAML = require("yaml");
|
||||
const { buildUserAgent, checkUrl } = require("./lib/http");
|
||||
const { getArchiveUrl, saveToArchive } = require("./lib/archive");
|
||||
const { scrapePage } = require("./lib/puppeteer");
|
||||
|
||||
@@ -59,6 +60,14 @@ if (duplicateBundlePath) {
|
||||
|
||||
// Check URL accessibility and Archive.org availability
|
||||
(async () => {
|
||||
const userAgent = buildUserAgent();
|
||||
const initialCheck = await checkUrl(url, { userAgent, timeoutMs: 8000 });
|
||||
if (initialCheck.errorType || (typeof initialCheck.status === "number" && initialCheck.status >= 400)) {
|
||||
console.warn(`⚠ Vérification HTTP avant scraping: ${initialCheck.errorType || initialCheck.status || "indéterminé"}`);
|
||||
} else {
|
||||
console.log(`🌐 Vérification HTTP avant scraping: ${initialCheck.status ?? "inconnue"}`);
|
||||
}
|
||||
|
||||
let archiveUrl = await getArchiveUrl(url);
|
||||
|
||||
// If the URL is not archived, attempt to save it
|
||||
@@ -106,13 +115,16 @@ if (duplicateBundlePath) {
|
||||
|
||||
// Scrape the page and capture a screenshot
|
||||
console.log(`🔍 Scraping page and capturing screenshot...`);
|
||||
const metadata = await scrapePage(url, tempScreenshotPath);
|
||||
const metadata = await scrapePage(url, tempScreenshotPath, { userAgent });
|
||||
|
||||
// If Puppeteer failed, do not proceed
|
||||
if (!metadata || !fs.existsSync(tempScreenshotPath)) {
|
||||
console.error(`❌ Scraping failed. No bundle will be created.`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (!metadata.httpStatus && typeof initialCheck.status === "number") {
|
||||
metadata.httpStatus = initialCheck.status;
|
||||
}
|
||||
|
||||
// Create Hugo bundle only if scraping succeeded
|
||||
console.log(`📦 Creating Hugo bundle for ${url}...`);
|
||||
|
||||
Reference in New Issue
Block a user