1

Uniformisation de la vérification des liens

This commit is contained in:
2025-11-28 23:43:07 +01:00
parent 5e846aa4b4
commit 0260c1ab4e
7 changed files with 2145 additions and 1157 deletions

View File

@@ -6,6 +6,7 @@ const crypto = require("crypto");
const path = require("path");
const os = require("os");
const YAML = require("yaml");
const { buildUserAgent, checkUrl } = require("./lib/http");
const { getArchiveUrl, saveToArchive } = require("./lib/archive");
const { scrapePage } = require("./lib/puppeteer");
@@ -59,6 +60,14 @@ if (duplicateBundlePath) {
// Check URL accessibility and Archive.org availability
(async () => {
const userAgent = buildUserAgent();
const initialCheck = await checkUrl(url, { userAgent, timeoutMs: 8000 });
if (initialCheck.errorType || (typeof initialCheck.status === "number" && initialCheck.status >= 400)) {
console.warn(`⚠ Vérification HTTP avant scraping: ${initialCheck.errorType || initialCheck.status || "indéterminé"}`);
} else {
console.log(`🌐 Vérification HTTP avant scraping: ${initialCheck.status ?? "inconnue"}`);
}
let archiveUrl = await getArchiveUrl(url);
// If the URL is not archived, attempt to save it
@@ -106,13 +115,16 @@ if (duplicateBundlePath) {
// Scrape the page and capture a screenshot
console.log(`🔍 Scraping page and capturing screenshot...`);
const metadata = await scrapePage(url, tempScreenshotPath);
const metadata = await scrapePage(url, tempScreenshotPath, { userAgent });
// If Puppeteer failed, do not proceed
if (!metadata || !fs.existsSync(tempScreenshotPath)) {
console.error(`❌ Scraping failed. No bundle will be created.`);
process.exit(1);
}
if (!metadata.httpStatus && typeof initialCheck.status === "number") {
metadata.httpStatus = initialCheck.status;
}
// Create Hugo bundle only if scraping succeeded
console.log(`📦 Creating Hugo bundle for ${url}...`);