211 lines
7.5 KiB
JavaScript
211 lines
7.5 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
const { execSync } = require("child_process");
|
|
const fs = require("fs");
|
|
const crypto = require("crypto");
|
|
const path = require("path");
|
|
const os = require("os");
|
|
const YAML = require("yaml");
|
|
const { buildUserAgent, checkUrl } = require("./lib/http");
|
|
const { getArchiveUrl, saveToArchive } = require("./lib/archive");
|
|
const { scrapePage } = require("./lib/puppeteer");
|
|
|
|
const LINKS_ROOT = path.join("content", "interets", "liens-interessants");
|
|
|
|
if (process.argv.length < 3) {
|
|
console.error("Usage: add_link.js <URL> [optional: YYYY-MM-DD]");
|
|
process.exit(1);
|
|
}
|
|
|
|
const url = process.argv[2];
|
|
const customDate = process.argv[3] || null;
|
|
|
|
// Generate an MD5 hash of the URL
|
|
const urlHash = crypto.createHash("md5").update(url).digest("hex").slice(0, 8);
|
|
|
|
const hugoRoot = path.resolve(process.cwd());
|
|
const interestingLinksRoot = path.join(hugoRoot, LINKS_ROOT);
|
|
|
|
function findExistingLinkBundle(hash) {
|
|
if (!fs.existsSync(interestingLinksRoot)) {
|
|
return null;
|
|
}
|
|
const stack = [interestingLinksRoot];
|
|
while (stack.length > 0) {
|
|
const current = stack.pop();
|
|
if (path.basename(current) === hash) {
|
|
return current;
|
|
}
|
|
let entries = [];
|
|
try {
|
|
entries = fs.readdirSync(current, { withFileTypes: true });
|
|
} catch (error) {
|
|
continue;
|
|
}
|
|
for (const entry of entries) {
|
|
if (entry.isDirectory()) {
|
|
stack.push(path.join(current, entry.name));
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
const duplicateBundlePath = findExistingLinkBundle(urlHash);
|
|
if (duplicateBundlePath) {
|
|
const relative = path.relative(hugoRoot, duplicateBundlePath);
|
|
console.log(`⚠ Link already exists at ${relative}: ${url}`);
|
|
process.exit(0);
|
|
}
|
|
|
|
// Check URL accessibility and Archive.org availability
|
|
(async () => {
|
|
const userAgent = buildUserAgent();
|
|
const initialCheck = await checkUrl(url, { userAgent, timeoutMs: 8000 });
|
|
if (initialCheck.errorType || (typeof initialCheck.status === "number" && initialCheck.status >= 400)) {
|
|
console.warn(`⚠ Vérification HTTP avant scraping: ${initialCheck.errorType || initialCheck.status || "indéterminé"}`);
|
|
} else {
|
|
console.log(`🌐 Vérification HTTP avant scraping: ${initialCheck.status ?? "inconnue"}`);
|
|
}
|
|
|
|
let archiveUrl = await getArchiveUrl(url);
|
|
|
|
// If the URL is not archived, attempt to save it
|
|
if (!archiveUrl) {
|
|
console.log(`📂 No archive found. Attempting to save ${url}...`);
|
|
archiveUrl = await saveToArchive(url);
|
|
if (!archiveUrl) {
|
|
console.log(`⚠ Warning: Unable to archive ${url}. Continuing without archive.`);
|
|
}
|
|
}
|
|
|
|
console.log(`📂 Archive URL ${archiveUrl}...`);
|
|
|
|
// Determine the entry date
|
|
let entryDate = customDate ? new Date(customDate) : new Date();
|
|
if (isNaN(entryDate.getTime())) {
|
|
console.error("❌ Invalid date format. Use YYYY-MM-DD.");
|
|
process.exit(1);
|
|
}
|
|
|
|
const now = new Date(); // Current date for status
|
|
const formattedEntryDate = entryDate.toISOString().split("T")[0]; // YYYY-MM-DD
|
|
const formattedStatusDate = now.toISOString(); // ISO format
|
|
const formattedDateFrench = entryDate.toLocaleDateString("fr-FR", {
|
|
year: "numeric",
|
|
month: "long",
|
|
day: "numeric",
|
|
hour: "2-digit",
|
|
minute: "2-digit",
|
|
});
|
|
|
|
const year = entryDate.getFullYear();
|
|
const month = String(entryDate.getMonth() + 1).padStart(2, "0");
|
|
const day = String(entryDate.getDate()).padStart(2, "0");
|
|
|
|
// Define paths
|
|
const bundlePath = path.join(hugoRoot, `content/interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`);
|
|
const imagesPath = path.join(bundlePath, "images");
|
|
const dataPath = path.join(bundlePath, "data");
|
|
const finalScreenshotPath = path.join(imagesPath, "screenshot.png");
|
|
const metadataPath = path.join(dataPath, "screenshot.yaml");
|
|
|
|
// Store screenshot in a temporary location first
|
|
const tempScreenshotPath = path.join(os.tmpdir(), `screenshot_${urlHash}.png`);
|
|
|
|
// Scrape the page and capture a screenshot
|
|
console.log(`🔍 Scraping page and capturing screenshot...`);
|
|
const metadata = await scrapePage(url, tempScreenshotPath, { userAgent });
|
|
|
|
// If Puppeteer failed, do not proceed
|
|
if (!metadata || !fs.existsSync(tempScreenshotPath)) {
|
|
console.error(`❌ Scraping failed. No bundle will be created.`);
|
|
process.exit(1);
|
|
}
|
|
if (!metadata.httpStatus && typeof initialCheck.status === "number") {
|
|
metadata.httpStatus = initialCheck.status;
|
|
}
|
|
|
|
// Create Hugo bundle only if scraping succeeded
|
|
console.log(`📦 Creating Hugo bundle for ${url}...`);
|
|
execSync(`hugo new --kind liens-interessants interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`, { stdio: "inherit" });
|
|
|
|
if (!fs.existsSync(bundlePath)) {
|
|
console.error("❌ Failed to create the bundle.");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Move the screenshot to the final destination
|
|
if (!fs.existsSync(imagesPath)) fs.mkdirSync(imagesPath, { recursive: true });
|
|
fs.renameSync(tempScreenshotPath, finalScreenshotPath);
|
|
|
|
// Modify the frontmatter
|
|
const indexPath = path.join(bundlePath, "index.md");
|
|
let content = fs.readFileSync(indexPath, "utf8");
|
|
|
|
// Inject date
|
|
content = content.replace(/^date: .*/m, `date: ${formattedEntryDate}`);
|
|
|
|
// Inject status
|
|
const statusEntry = `{"date": "${formattedStatusDate}", "http_code": ${metadata.httpStatus || "null"}}`;
|
|
content = content.replace("status: []", `status: [${statusEntry}]`);
|
|
|
|
// Inject title and description
|
|
if (metadata.title) {
|
|
content = content.replace(/title: ".*?"/, `title: "${metadata.title.replace(/"/g, '\\"')}"`);
|
|
}
|
|
if (metadata.description) {
|
|
content = content.replace("> [description]", `> ${metadata.description.replace(/"/g, '\\"')}`);
|
|
} else {
|
|
content = content.replace("> [description]\n\n", ""); // Remove placeholder if no description
|
|
}
|
|
|
|
// Inject keywords
|
|
if (metadata.keywords.length > 0) {
|
|
content = content.replace("keywords: []", `keywords: ["${metadata.keywords.join('", "')}"]`);
|
|
}
|
|
|
|
// Inject cover
|
|
content = content.replace('cover: ""', `cover: "images/screenshot.png"`);
|
|
|
|
// Inject links (and supprimer urls/links éventuels déjà présents)
|
|
const links = [];
|
|
|
|
links.push({
|
|
name: "Page d'origine",
|
|
url: url,
|
|
lang: metadata.lang || "unknown",
|
|
});
|
|
|
|
if (archiveUrl) {
|
|
links.push({
|
|
name: "Archive",
|
|
url: archiveUrl,
|
|
archive: true,
|
|
});
|
|
}
|
|
|
|
const linksYaml = YAML.stringify({ links }).trim();
|
|
content = content.replace(/^urls: \[\]\n?/m, "");
|
|
content = content.replace(/^links: \[\]\n?/m, "");
|
|
content = content.replace(/^---/, `---\n${linksYaml}`);
|
|
|
|
fs.writeFileSync(indexPath, content);
|
|
|
|
// Create metadata folder if necessary
|
|
if (!fs.existsSync(dataPath)) fs.mkdirSync(dataPath, { recursive: true });
|
|
|
|
// Write metadata for the screenshot
|
|
console.log("📝 Writing metadata...");
|
|
const metadataContent = `title: "Capture d'écran de ${url}"
|
|
description: "Capture effectuée le ${formattedDateFrench}"
|
|
attribution: "Richard Dern"
|
|
file: "images/screenshot.png"
|
|
`;
|
|
fs.writeFileSync(metadataPath, metadataContent);
|
|
console.log(`✔ Metadata saved: ${metadataPath}`);
|
|
|
|
console.log(`🎉 Link successfully added! Bundle path: ${bundlePath}`);
|
|
console.log(bundlePath);
|
|
})();
|