187 lines
6.6 KiB
JavaScript
187 lines
6.6 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
const { execSync } = require("child_process");
|
|
const fs = require("fs");
|
|
const crypto = require("crypto");
|
|
const path = require("path");
|
|
const os = require("os");
|
|
const YAML = require("yaml");
|
|
const { getArchiveUrl, saveToArchive } = require("./lib/archive");
|
|
const { scrapePage } = require("./lib/puppeteer");
|
|
|
|
const KNOWN_LINKS_FILE = "data/known_links.yaml"; // YAML file with { hash: path }
|
|
|
|
if (process.argv.length < 3) {
|
|
console.error("Usage: add_link.js <URL> [optional: YYYY-MM-DD]");
|
|
process.exit(1);
|
|
}
|
|
|
|
const url = process.argv[2];
|
|
const customDate = process.argv[3] || null;
|
|
|
|
// Generate an MD5 hash of the URL
|
|
const urlHash = crypto.createHash("md5").update(url).digest("hex").slice(0, 8);
|
|
|
|
// Ensure the known_links file is stored at the correct location
|
|
const hugoRoot = path.resolve(process.cwd());
|
|
const knownLinksPath = path.join(hugoRoot, KNOWN_LINKS_FILE);
|
|
|
|
// Load known links from YAML
|
|
let knownLinks = {};
|
|
if (fs.existsSync(knownLinksPath)) {
|
|
try {
|
|
knownLinks = YAML.parse(fs.readFileSync(knownLinksPath, "utf8")) || {};
|
|
} catch (err) {
|
|
console.error(`❌ Unable to parse ${KNOWN_LINKS_FILE}: ${err.message}`);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
if (knownLinks[urlHash]) {
|
|
console.log(`⚠ Link already exists: ${url}`);
|
|
process.exit(0);
|
|
}
|
|
|
|
// Check URL accessibility and Archive.org availability
|
|
(async () => {
|
|
let archiveUrl = await getArchiveUrl(url);
|
|
|
|
// If the URL is not archived, attempt to save it
|
|
if (!archiveUrl) {
|
|
console.log(`📂 No archive found. Attempting to save ${url}...`);
|
|
archiveUrl = await saveToArchive(url);
|
|
if (!archiveUrl) {
|
|
console.log(`⚠ Warning: Unable to archive ${url}. Continuing without archive.`);
|
|
}
|
|
}
|
|
|
|
console.log(`📂 Archive URL ${archiveUrl}...`);
|
|
|
|
// Determine the entry date
|
|
let entryDate = customDate ? new Date(customDate) : new Date();
|
|
if (isNaN(entryDate.getTime())) {
|
|
console.error("❌ Invalid date format. Use YYYY-MM-DD.");
|
|
process.exit(1);
|
|
}
|
|
|
|
const now = new Date(); // Current date for status
|
|
const formattedEntryDate = entryDate.toISOString().split("T")[0]; // YYYY-MM-DD
|
|
const formattedStatusDate = now.toISOString(); // ISO format
|
|
const formattedDateFrench = entryDate.toLocaleDateString("fr-FR", {
|
|
year: "numeric",
|
|
month: "long",
|
|
day: "numeric",
|
|
hour: "2-digit",
|
|
minute: "2-digit",
|
|
});
|
|
|
|
const year = entryDate.getFullYear();
|
|
const month = String(entryDate.getMonth() + 1).padStart(2, "0");
|
|
const day = String(entryDate.getDate()).padStart(2, "0");
|
|
|
|
// Define paths
|
|
const bundlePath = path.join(hugoRoot, `content/interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`);
|
|
const imagesPath = path.join(bundlePath, "images");
|
|
const dataPath = path.join(bundlePath, "data");
|
|
const finalScreenshotPath = path.join(imagesPath, "screenshot.png");
|
|
const metadataPath = path.join(dataPath, "screenshot.yaml");
|
|
|
|
// Store screenshot in a temporary location first
|
|
const tempScreenshotPath = path.join(os.tmpdir(), `screenshot_${urlHash}.png`);
|
|
|
|
// Scrape the page and capture a screenshot
|
|
console.log(`🔍 Scraping page and capturing screenshot...`);
|
|
const metadata = await scrapePage(url, tempScreenshotPath);
|
|
|
|
// If Puppeteer failed, do not proceed
|
|
if (!metadata || !fs.existsSync(tempScreenshotPath)) {
|
|
console.error(`❌ Scraping failed. No bundle will be created.`);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Create Hugo bundle only if scraping succeeded
|
|
console.log(`📦 Creating Hugo bundle for ${url}...`);
|
|
execSync(`hugo new --kind liens-interessants interets/liens-interessants/${year}/${month}/${day}/${urlHash}/`, { stdio: "inherit" });
|
|
|
|
if (!fs.existsSync(bundlePath)) {
|
|
console.error("❌ Failed to create the bundle.");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Move the screenshot to the final destination
|
|
if (!fs.existsSync(imagesPath)) fs.mkdirSync(imagesPath, { recursive: true });
|
|
fs.renameSync(tempScreenshotPath, finalScreenshotPath);
|
|
|
|
// Modify the frontmatter
|
|
const indexPath = path.join(bundlePath, "index.md");
|
|
let content = fs.readFileSync(indexPath, "utf8");
|
|
|
|
// Inject date
|
|
content = content.replace(/^date: .*/m, `date: ${formattedEntryDate}`);
|
|
|
|
// Inject status
|
|
const statusEntry = `{"date": "${formattedStatusDate}", "http_code": ${metadata.httpStatus || "null"}}`;
|
|
content = content.replace("status: []", `status: [${statusEntry}]`);
|
|
|
|
// Inject title and description
|
|
if (metadata.title) {
|
|
content = content.replace(/title: ".*?"/, `title: "${metadata.title.replace(/"/g, '\\"')}"`);
|
|
}
|
|
if (metadata.description) {
|
|
content = content.replace("> [description]", `> ${metadata.description.replace(/"/g, '\\"')}`);
|
|
} else {
|
|
content = content.replace("> [description]\n\n", ""); // Remove placeholder if no description
|
|
}
|
|
|
|
// Inject keywords
|
|
if (metadata.keywords.length > 0) {
|
|
content = content.replace("keywords: []", `keywords: ["${metadata.keywords.join('", "')}"]`);
|
|
}
|
|
|
|
// Inject cover
|
|
content = content.replace('cover: ""', `cover: "images/screenshot.png"`);
|
|
|
|
// Inject links (and supprimer urls/links éventuels déjà présents)
|
|
const links = [];
|
|
|
|
links.push({
|
|
name: "Page d'origine",
|
|
url: url,
|
|
lang: metadata.lang || "unknown",
|
|
});
|
|
|
|
if (archiveUrl) {
|
|
links.push({
|
|
name: "Archive",
|
|
url: archiveUrl,
|
|
archive: true,
|
|
});
|
|
}
|
|
|
|
const linksYaml = YAML.stringify({ links }).trim();
|
|
content = content.replace(/^urls: \[\]\n?/m, "");
|
|
content = content.replace(/^links: \[\]\n?/m, "");
|
|
content = content.replace(/^---/, `---\n${linksYaml}`);
|
|
|
|
fs.writeFileSync(indexPath, content);
|
|
|
|
// Create metadata folder if necessary
|
|
if (!fs.existsSync(dataPath)) fs.mkdirSync(dataPath, { recursive: true });
|
|
|
|
// Write metadata for the screenshot
|
|
console.log("📝 Writing metadata...");
|
|
const metadataContent = `title: "Capture d'écran de ${url}"
|
|
description: "Capture effectuée le ${formattedDateFrench}"
|
|
attribution: "Richard Dern"
|
|
file: "images/screenshot.png"
|
|
`;
|
|
fs.writeFileSync(metadataPath, metadataContent);
|
|
console.log(`✔ Metadata saved: ${metadataPath}`);
|
|
|
|
// Append the hash to known_links.yaml
|
|
knownLinks[urlHash] = path.relative(hugoRoot, bundlePath);
|
|
fs.writeFileSync(knownLinksPath, YAML.stringify(knownLinks));
|
|
|
|
console.log(`🎉 Link successfully added! Bundle path: ${bundlePath}`);
|
|
console.log(bundlePath);
|
|
})();
|