1

Initial commit

This commit is contained in:
2025-03-28 12:57:37 +01:00
commit ed9ddcfdc8
1841 changed files with 42303 additions and 0 deletions

View File

@@ -0,0 +1,126 @@
const fs = require("fs");
const path = require("path");
const yaml = require("js-yaml");
const { scrapePage } = require("./lib/puppeteer");
const readline = require("readline");
const CONTENT_DIR = path.join(__dirname, "..", "content");
const DATA_DIR = path.join(__dirname, "..", "data");
const SITE_ROOT = path.resolve(__dirname, "..");
const CACHE_PATH = path.join(DATA_DIR, "external_links.yaml");
const CACHE_TTL_DAYS = 7;
let cache = {};
if (fs.existsSync(CACHE_PATH)) {
cache = yaml.load(fs.readFileSync(CACHE_PATH, "utf8")) || {};
}
const now = new Date();
const BAD_LINKS = [];
function isExternalLink(link) {
return typeof link === "string" && link.includes("://");
}
function isCacheValid(entry) {
if (!entry?.checked) return false;
const date = new Date(entry.checked);
return (now - date) / (1000 * 60 * 60 * 24) < CACHE_TTL_DAYS;
}
function extractLinksFromText(text) {
const regex = /\bhttps?:\/\/[^\s)"'>]+/g;
return text.match(regex) || [];
}
async function checkLink(file, line, url) {
if (isCacheValid(cache[url])) return;
const meta = await scrapePage(url, null, { screenshot: false });
cache[url] = {
status: meta.httpStatus || null,
checked: new Date().toISOString(),
};
const bundle = path.relative(SITE_ROOT, file);
if (!meta.httpStatus || meta.httpStatus >= 400) {
BAD_LINKS.push({ bundle, url, line, status: meta.httpStatus });
process.stdout.write("❌");
} else {
process.stdout.write("✔");
}
}
async function processMarkdown(filePath) {
const fileStream = fs.createReadStream(filePath);
const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
let lineNumber = 0;
for await (const line of rl) {
lineNumber++;
const links = extractLinksFromText(line);
for (const link of links) {
await checkLink(filePath, lineNumber, link);
}
}
}
function processYamlRecursively(obj, links = []) {
if (typeof obj === "string" && isExternalLink(obj)) {
links.push(obj);
} else if (Array.isArray(obj)) {
for (const item of obj) processYamlRecursively(item, links);
} else if (typeof obj === "object" && obj !== null) {
for (const key in obj) processYamlRecursively(obj[key], links);
}
return links;
}
async function processYaml(filePath) {
try {
const doc = yaml.load(fs.readFileSync(filePath, "utf8"));
const links = processYamlRecursively(doc);
for (const link of links) {
await checkLink(filePath, "?", link);
}
} catch (e) {
console.error(`Failed to parse YAML file: ${filePath}`);
}
}
function walk(dir, exts) {
let results = [];
const list = fs.readdirSync(dir);
for (const file of list) {
const fullPath = path.resolve(dir, file);
const stat = fs.statSync(fullPath);
if (stat.isDirectory()) {
results = results.concat(walk(fullPath, exts));
} else if (exts.includes(path.extname(fullPath))) {
results.push(fullPath);
}
}
return results;
}
(async () => {
const mdFiles = walk(CONTENT_DIR, [".md"]);
const yamlFiles = walk(DATA_DIR, [".yaml", ".yml"]);
console.log(`Scanning ${mdFiles.length} Markdown and ${yamlFiles.length} YAML files...`);
for (const file of mdFiles) {
await processMarkdown(file);
}
for (const file of yamlFiles) {
await processYaml(file);
}
fs.writeFileSync(CACHE_PATH, yaml.dump(cache));
console.log("\n\n=== Broken External Links Report ===");
if (BAD_LINKS.length === 0) {
console.log("✅ No broken external links found.");
} else {
console.table(BAD_LINKS);
}
})();