1
Files
2025/tools/add_link.js

200 lines
7.2 KiB
JavaScript

#!/usr/bin/env node
const { execSync } = require("child_process");
const fs = require("fs");
const crypto = require("crypto");
const path = require("path");
const os = require("os");
const YAML = require("yaml");
const { buildUserAgent, checkUrl } = require("./lib/http");
const { getArchiveUrl, saveToArchive } = require("./lib/archive");
const { scrapePage } = require("./lib/puppeteer");
const { formatDateTime, toHugoDateTime } = require("./lib/datetime");
const LINKS_ROOT = path.join("content", "interets", "liens-interessants");
if (process.argv.length < 3) {
console.error("Usage: add_link.js <URL> [optional: YYYY-MM-DD]");
process.exit(1);
}
const url = process.argv[2];
const customDate = process.argv[3] || null;
// Generate an MD5 hash of the URL
const urlHash = crypto.createHash("md5").update(url).digest("hex").slice(0, 8);
const hugoRoot = path.resolve(process.cwd());
const interestingLinksRoot = path.join(hugoRoot, LINKS_ROOT);
function findExistingLinkBundle(hash) {
if (!fs.existsSync(interestingLinksRoot)) {
return null;
}
const stack = [interestingLinksRoot];
while (stack.length > 0) {
const current = stack.pop();
if (path.basename(current) === hash) {
return current;
}
let entries = [];
try {
entries = fs.readdirSync(current, { withFileTypes: true });
} catch (error) {
continue;
}
for (const entry of entries) {
if (entry.isDirectory()) {
stack.push(path.join(current, entry.name));
}
}
}
return null;
}
const duplicateBundlePath = findExistingLinkBundle(urlHash);
if (duplicateBundlePath) {
const relative = path.relative(hugoRoot, duplicateBundlePath);
console.log(`⚠ Link already exists at ${relative}: ${url}`);
process.exit(0);
}
// Check URL accessibility and Archive.org availability
(async () => {
const userAgent = buildUserAgent();
const initialCheck = await checkUrl(url, { userAgent, timeoutMs: 8000 });
if (initialCheck.errorType || (typeof initialCheck.status === "number" && initialCheck.status >= 400)) {
console.warn(`⚠ Vérification HTTP avant scraping: ${initialCheck.errorType || initialCheck.status || "indéterminé"}`);
} else {
console.log(`🌐 Vérification HTTP avant scraping: ${initialCheck.status ?? "inconnue"}`);
}
let archiveUrl = await getArchiveUrl(url);
// If the URL is not archived, attempt to save it
if (!archiveUrl) {
console.log(`📂 No archive found. Attempting to save ${url}...`);
archiveUrl = await saveToArchive(url);
if (!archiveUrl) {
console.log(`⚠ Warning: Unable to archive ${url}. Continuing without archive.`);
}
}
console.log(`📂 Archive URL ${archiveUrl}...`);
// Déterminer la date et l'heure d'enregistrement
const entryDate = customDate ? toHugoDateTime(customDate) : toHugoDateTime();
const formattedEntryDate = formatDateTime(entryDate);
const formattedStatusDate = formatDateTime();
const formattedDateFrench = entryDate.setLocale("fr").toFormat("d LLLL yyyy 'à' HH:mm");
const year = entryDate.year;
const month = String(entryDate.month).padStart(2, "0");
const day = String(entryDate.day).padStart(2, "0");
// Define paths
const bundlePath = path.join(hugoRoot, `content/interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`);
const imagesPath = path.join(bundlePath, "images");
const dataPath = path.join(bundlePath, "data");
const finalScreenshotPath = path.join(imagesPath, "screenshot.png");
const metadataPath = path.join(dataPath, "screenshot.yaml");
// Store screenshot in a temporary location first
const tempScreenshotPath = path.join(os.tmpdir(), `screenshot_${urlHash}.png`);
// Scrape the page and capture a screenshot
console.log(`🔍 Scraping page and capturing screenshot...`);
const metadata = await scrapePage(url, tempScreenshotPath, { userAgent });
// If Puppeteer failed, do not proceed
if (!metadata || !fs.existsSync(tempScreenshotPath)) {
console.error(`❌ Scraping failed. No bundle will be created.`);
process.exit(1);
}
if (!metadata.httpStatus && typeof initialCheck.status === "number") {
metadata.httpStatus = initialCheck.status;
}
// Create Hugo bundle only if scraping succeeded
console.log(`📦 Creating Hugo bundle for ${url}...`);
execSync(`hugo new --kind liens-interessants interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`, { stdio: "inherit" });
if (!fs.existsSync(bundlePath)) {
console.error("❌ Failed to create the bundle.");
process.exit(1);
}
// Move the screenshot to the final destination
if (!fs.existsSync(imagesPath)) fs.mkdirSync(imagesPath, { recursive: true });
fs.renameSync(tempScreenshotPath, finalScreenshotPath);
// Modify the frontmatter
const indexPath = path.join(bundlePath, "index.md");
let content = fs.readFileSync(indexPath, "utf8");
// Inject date
content = content.replace(/^date: .*/m, `date: ${formattedEntryDate}`);
// Inject status
const statusEntry = `{"date": "${formattedStatusDate}", "http_code": ${metadata.httpStatus || "null"}}`;
content = content.replace("status: []", `status: [${statusEntry}]`);
// Inject title and description
if (metadata.title) {
content = content.replace(/title: ".*?"/, `title: "${metadata.title.replace(/"/g, '\\"')}"`);
}
if (metadata.description) {
content = content.replace("> [description]", `> ${metadata.description.replace(/"/g, '\\"')}`);
} else {
content = content.replace("> [description]\n\n", ""); // Remove placeholder if no description
}
// Inject keywords
if (metadata.keywords.length > 0) {
content = content.replace("keywords: []", `keywords: ["${metadata.keywords.join('", "')}"]`);
}
// Inject cover
content = content.replace('cover: ""', `cover: "images/screenshot.png"`);
// Inject links (and supprimer urls/links éventuels déjà présents)
const links = [];
links.push({
name: "Page d'origine",
url: url,
lang: metadata.lang || "unknown",
});
if (archiveUrl) {
links.push({
name: "Archive",
url: archiveUrl,
archive: true,
});
}
const linksYaml = YAML.stringify({ links }).trim();
content = content.replace(/^urls: \[\]\n?/m, "");
content = content.replace(/^links: \[\]\n?/m, "");
content = content.replace(/^---/, `---\n${linksYaml}`);
fs.writeFileSync(indexPath, content);
// Create metadata folder if necessary
if (!fs.existsSync(dataPath)) fs.mkdirSync(dataPath, { recursive: true });
// Write metadata for the screenshot
console.log("📝 Writing metadata...");
const metadataContent = `title: "Capture d'écran de ${url}"
description: "Capture effectuée le ${formattedDateFrench}"
attribution: "Richard Dern"
file: "images/screenshot.png"
`;
fs.writeFileSync(metadataPath, metadataContent);
console.log(`✔ Metadata saved: ${metadataPath}`);
console.log(`🎉 Link successfully added! Bundle path: ${bundlePath}`);
console.log(bundlePath);
})();