#!/usr/bin/env node const { execSync } = require("child_process"); const fs = require("fs"); const crypto = require("crypto"); const path = require("path"); const os = require("os"); const YAML = require("yaml"); const { buildUserAgent, checkUrl } = require("./lib/http"); const { getArchiveUrl, saveToArchive } = require("./lib/archive"); const { scrapePage } = require("./lib/puppeteer"); const { formatDateTime } = require("./lib/datetime"); const LINKS_ROOT = path.join("content", "interets", "liens-interessants"); if (process.argv.length < 3) { console.error("Usage: add_link.js [optional: YYYY-MM-DD]"); process.exit(1); } const url = process.argv[2]; const customDate = process.argv[3] || null; // Generate an MD5 hash of the URL const urlHash = crypto.createHash("md5").update(url).digest("hex").slice(0, 8); const hugoRoot = path.resolve(process.cwd()); const interestingLinksRoot = path.join(hugoRoot, LINKS_ROOT); function findExistingLinkBundle(hash) { if (!fs.existsSync(interestingLinksRoot)) { return null; } const stack = [interestingLinksRoot]; while (stack.length > 0) { const current = stack.pop(); if (path.basename(current) === hash) { return current; } let entries = []; try { entries = fs.readdirSync(current, { withFileTypes: true }); } catch (error) { continue; } for (const entry of entries) { if (entry.isDirectory()) { stack.push(path.join(current, entry.name)); } } } return null; } const duplicateBundlePath = findExistingLinkBundle(urlHash); if (duplicateBundlePath) { const relative = path.relative(hugoRoot, duplicateBundlePath); console.log(`⚠ Link already exists at ${relative}: ${url}`); process.exit(0); } // Check URL accessibility and Archive.org availability (async () => { const userAgent = buildUserAgent(); const initialCheck = await checkUrl(url, { userAgent, timeoutMs: 8000 }); if (initialCheck.errorType || (typeof initialCheck.status === "number" && initialCheck.status >= 400)) { console.warn(`⚠ Vérification HTTP avant scraping: ${initialCheck.errorType || initialCheck.status || "indéterminé"}`); } else { console.log(`🌐 Vérification HTTP avant scraping: ${initialCheck.status ?? "inconnue"}`); } let archiveUrl = await getArchiveUrl(url); // If the URL is not archived, attempt to save it if (!archiveUrl) { console.log(`📂 No archive found. Attempting to save ${url}...`); archiveUrl = await saveToArchive(url); if (!archiveUrl) { console.log(`⚠ Warning: Unable to archive ${url}. Continuing without archive.`); } } console.log(`📂 Archive URL ${archiveUrl}...`); // Déterminer la date et l'heure d'enregistrement let entryDate = customDate ? new Date(customDate) : new Date(); if (isNaN(entryDate.getTime())) { console.error("❌ Invalid date format. Use YYYY-MM-DD."); process.exit(1); } const now = new Date(); // Current date for status const formattedEntryDate = formatDateTime(entryDate); // ISO 8601 local avec offset const formattedStatusDate = now.toISOString(); // ISO format const formattedDateFrench = entryDate.toLocaleDateString("fr-FR", { year: "numeric", month: "long", day: "numeric", hour: "2-digit", minute: "2-digit", }); const year = entryDate.getFullYear(); const month = String(entryDate.getMonth() + 1).padStart(2, "0"); const day = String(entryDate.getDate()).padStart(2, "0"); // Define paths const bundlePath = path.join(hugoRoot, `content/interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`); const imagesPath = path.join(bundlePath, "images"); const dataPath = path.join(bundlePath, "data"); const finalScreenshotPath = path.join(imagesPath, "screenshot.png"); const metadataPath = path.join(dataPath, "screenshot.yaml"); // Store screenshot in a temporary location first const tempScreenshotPath = path.join(os.tmpdir(), `screenshot_${urlHash}.png`); // Scrape the page and capture a screenshot console.log(`🔍 Scraping page and capturing screenshot...`); const metadata = await scrapePage(url, tempScreenshotPath, { userAgent }); // If Puppeteer failed, do not proceed if (!metadata || !fs.existsSync(tempScreenshotPath)) { console.error(`❌ Scraping failed. No bundle will be created.`); process.exit(1); } if (!metadata.httpStatus && typeof initialCheck.status === "number") { metadata.httpStatus = initialCheck.status; } // Create Hugo bundle only if scraping succeeded console.log(`📦 Creating Hugo bundle for ${url}...`); execSync(`hugo new --kind liens-interessants interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`, { stdio: "inherit" }); if (!fs.existsSync(bundlePath)) { console.error("❌ Failed to create the bundle."); process.exit(1); } // Move the screenshot to the final destination if (!fs.existsSync(imagesPath)) fs.mkdirSync(imagesPath, { recursive: true }); fs.renameSync(tempScreenshotPath, finalScreenshotPath); // Modify the frontmatter const indexPath = path.join(bundlePath, "index.md"); let content = fs.readFileSync(indexPath, "utf8"); // Inject date content = content.replace(/^date: .*/m, `date: ${formattedEntryDate}`); // Inject status const statusEntry = `{"date": "${formattedStatusDate}", "http_code": ${metadata.httpStatus || "null"}}`; content = content.replace("status: []", `status: [${statusEntry}]`); // Inject title and description if (metadata.title) { content = content.replace(/title: ".*?"/, `title: "${metadata.title.replace(/"/g, '\\"')}"`); } if (metadata.description) { content = content.replace("> [description]", `> ${metadata.description.replace(/"/g, '\\"')}`); } else { content = content.replace("> [description]\n\n", ""); // Remove placeholder if no description } // Inject keywords if (metadata.keywords.length > 0) { content = content.replace("keywords: []", `keywords: ["${metadata.keywords.join('", "')}"]`); } // Inject cover content = content.replace('cover: ""', `cover: "images/screenshot.png"`); // Inject links (and supprimer urls/links éventuels déjà présents) const links = []; links.push({ name: "Page d'origine", url: url, lang: metadata.lang || "unknown", }); if (archiveUrl) { links.push({ name: "Archive", url: archiveUrl, archive: true, }); } const linksYaml = YAML.stringify({ links }).trim(); content = content.replace(/^urls: \[\]\n?/m, ""); content = content.replace(/^links: \[\]\n?/m, ""); content = content.replace(/^---/, `---\n${linksYaml}`); fs.writeFileSync(indexPath, content); // Create metadata folder if necessary if (!fs.existsSync(dataPath)) fs.mkdirSync(dataPath, { recursive: true }); // Write metadata for the screenshot console.log("📝 Writing metadata..."); const metadataContent = `title: "Capture d'écran de ${url}" description: "Capture effectuée le ${formattedDateFrench}" attribution: "Richard Dern" file: "images/screenshot.png" `; fs.writeFileSync(metadataPath, metadataContent); console.log(`✔ Metadata saved: ${metadataPath}`); console.log(`🎉 Link successfully added! Bundle path: ${bundlePath}`); console.log(bundlePath); })();