1
Files
2025/tools/add_link.js

199 lines
6.8 KiB
JavaScript

#!/usr/bin/env node
const { execSync } = require("child_process");
const fs = require("fs");
const crypto = require("crypto");
const path = require("path");
const os = require("os");
const YAML = require("yaml");
const { getArchiveUrl, saveToArchive } = require("./lib/archive");
const { scrapePage } = require("./lib/puppeteer");
const LINKS_ROOT = path.join("content", "interets", "liens-interessants");
if (process.argv.length < 3) {
console.error("Usage: add_link.js <URL> [optional: YYYY-MM-DD]");
process.exit(1);
}
const url = process.argv[2];
const customDate = process.argv[3] || null;
// Generate an MD5 hash of the URL
const urlHash = crypto.createHash("md5").update(url).digest("hex").slice(0, 8);
const hugoRoot = path.resolve(process.cwd());
const interestingLinksRoot = path.join(hugoRoot, LINKS_ROOT);
function findExistingLinkBundle(hash) {
if (!fs.existsSync(interestingLinksRoot)) {
return null;
}
const stack = [interestingLinksRoot];
while (stack.length > 0) {
const current = stack.pop();
if (path.basename(current) === hash) {
return current;
}
let entries = [];
try {
entries = fs.readdirSync(current, { withFileTypes: true });
} catch (error) {
continue;
}
for (const entry of entries) {
if (entry.isDirectory()) {
stack.push(path.join(current, entry.name));
}
}
}
return null;
}
const duplicateBundlePath = findExistingLinkBundle(urlHash);
if (duplicateBundlePath) {
const relative = path.relative(hugoRoot, duplicateBundlePath);
console.log(`⚠ Link already exists at ${relative}: ${url}`);
process.exit(0);
}
// Check URL accessibility and Archive.org availability
(async () => {
let archiveUrl = await getArchiveUrl(url);
// If the URL is not archived, attempt to save it
if (!archiveUrl) {
console.log(`📂 No archive found. Attempting to save ${url}...`);
archiveUrl = await saveToArchive(url);
if (!archiveUrl) {
console.log(`⚠ Warning: Unable to archive ${url}. Continuing without archive.`);
}
}
console.log(`📂 Archive URL ${archiveUrl}...`);
// Determine the entry date
let entryDate = customDate ? new Date(customDate) : new Date();
if (isNaN(entryDate.getTime())) {
console.error("❌ Invalid date format. Use YYYY-MM-DD.");
process.exit(1);
}
const now = new Date(); // Current date for status
const formattedEntryDate = entryDate.toISOString().split("T")[0]; // YYYY-MM-DD
const formattedStatusDate = now.toISOString(); // ISO format
const formattedDateFrench = entryDate.toLocaleDateString("fr-FR", {
year: "numeric",
month: "long",
day: "numeric",
hour: "2-digit",
minute: "2-digit",
});
const year = entryDate.getFullYear();
const month = String(entryDate.getMonth() + 1).padStart(2, "0");
const day = String(entryDate.getDate()).padStart(2, "0");
// Define paths
const bundlePath = path.join(hugoRoot, `content/interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`);
const imagesPath = path.join(bundlePath, "images");
const dataPath = path.join(bundlePath, "data");
const finalScreenshotPath = path.join(imagesPath, "screenshot.png");
const metadataPath = path.join(dataPath, "screenshot.yaml");
// Store screenshot in a temporary location first
const tempScreenshotPath = path.join(os.tmpdir(), `screenshot_${urlHash}.png`);
// Scrape the page and capture a screenshot
console.log(`🔍 Scraping page and capturing screenshot...`);
const metadata = await scrapePage(url, tempScreenshotPath);
// If Puppeteer failed, do not proceed
if (!metadata || !fs.existsSync(tempScreenshotPath)) {
console.error(`❌ Scraping failed. No bundle will be created.`);
process.exit(1);
}
// Create Hugo bundle only if scraping succeeded
console.log(`📦 Creating Hugo bundle for ${url}...`);
execSync(`hugo new --kind liens-interessants interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`, { stdio: "inherit" });
if (!fs.existsSync(bundlePath)) {
console.error("❌ Failed to create the bundle.");
process.exit(1);
}
// Move the screenshot to the final destination
if (!fs.existsSync(imagesPath)) fs.mkdirSync(imagesPath, { recursive: true });
fs.renameSync(tempScreenshotPath, finalScreenshotPath);
// Modify the frontmatter
const indexPath = path.join(bundlePath, "index.md");
let content = fs.readFileSync(indexPath, "utf8");
// Inject date
content = content.replace(/^date: .*/m, `date: ${formattedEntryDate}`);
// Inject status
const statusEntry = `{"date": "${formattedStatusDate}", "http_code": ${metadata.httpStatus || "null"}}`;
content = content.replace("status: []", `status: [${statusEntry}]`);
// Inject title and description
if (metadata.title) {
content = content.replace(/title: ".*?"/, `title: "${metadata.title.replace(/"/g, '\\"')}"`);
}
if (metadata.description) {
content = content.replace("> [description]", `> ${metadata.description.replace(/"/g, '\\"')}`);
} else {
content = content.replace("> [description]\n\n", ""); // Remove placeholder if no description
}
// Inject keywords
if (metadata.keywords.length > 0) {
content = content.replace("keywords: []", `keywords: ["${metadata.keywords.join('", "')}"]`);
}
// Inject cover
content = content.replace('cover: ""', `cover: "images/screenshot.png"`);
// Inject links (and supprimer urls/links éventuels déjà présents)
const links = [];
links.push({
name: "Page d'origine",
url: url,
lang: metadata.lang || "unknown",
});
if (archiveUrl) {
links.push({
name: "Archive",
url: archiveUrl,
archive: true,
});
}
const linksYaml = YAML.stringify({ links }).trim();
content = content.replace(/^urls: \[\]\n?/m, "");
content = content.replace(/^links: \[\]\n?/m, "");
content = content.replace(/^---/, `---\n${linksYaml}`);
fs.writeFileSync(indexPath, content);
// Create metadata folder if necessary
if (!fs.existsSync(dataPath)) fs.mkdirSync(dataPath, { recursive: true });
// Write metadata for the screenshot
console.log("📝 Writing metadata...");
const metadataContent = `title: "Capture d'écran de ${url}"
description: "Capture effectuée le ${formattedDateFrench}"
attribution: "Richard Dern"
file: "images/screenshot.png"
`;
fs.writeFileSync(metadataPath, metadataContent);
console.log(`✔ Metadata saved: ${metadataPath}`);
console.log(`🎉 Link successfully added! Bundle path: ${bundlePath}`);
console.log(bundlePath);
})();