Initial commit
This commit is contained in:
126
tools/check_external_links.js
Normal file
126
tools/check_external_links.js
Normal file
@@ -0,0 +1,126 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const yaml = require("js-yaml");
|
||||
const { scrapePage } = require("./lib/puppeteer");
|
||||
const readline = require("readline");
|
||||
|
||||
const CONTENT_DIR = path.join(__dirname, "..", "content");
|
||||
const DATA_DIR = path.join(__dirname, "..", "data");
|
||||
const SITE_ROOT = path.resolve(__dirname, "..");
|
||||
const CACHE_PATH = path.join(DATA_DIR, "external_links.yaml");
|
||||
const CACHE_TTL_DAYS = 7;
|
||||
|
||||
let cache = {};
|
||||
if (fs.existsSync(CACHE_PATH)) {
|
||||
cache = yaml.load(fs.readFileSync(CACHE_PATH, "utf8")) || {};
|
||||
}
|
||||
|
||||
const now = new Date();
|
||||
const BAD_LINKS = [];
|
||||
|
||||
function isExternalLink(link) {
|
||||
return typeof link === "string" && link.includes("://");
|
||||
}
|
||||
|
||||
function isCacheValid(entry) {
|
||||
if (!entry?.checked) return false;
|
||||
const date = new Date(entry.checked);
|
||||
return (now - date) / (1000 * 60 * 60 * 24) < CACHE_TTL_DAYS;
|
||||
}
|
||||
|
||||
function extractLinksFromText(text) {
|
||||
const regex = /\bhttps?:\/\/[^\s)"'>]+/g;
|
||||
return text.match(regex) || [];
|
||||
}
|
||||
|
||||
async function checkLink(file, line, url) {
|
||||
if (isCacheValid(cache[url])) return;
|
||||
|
||||
const meta = await scrapePage(url, null, { screenshot: false });
|
||||
cache[url] = {
|
||||
status: meta.httpStatus || null,
|
||||
checked: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const bundle = path.relative(SITE_ROOT, file);
|
||||
|
||||
if (!meta.httpStatus || meta.httpStatus >= 400) {
|
||||
BAD_LINKS.push({ bundle, url, line, status: meta.httpStatus });
|
||||
process.stdout.write("❌");
|
||||
} else {
|
||||
process.stdout.write("✔");
|
||||
}
|
||||
}
|
||||
|
||||
async function processMarkdown(filePath) {
|
||||
const fileStream = fs.createReadStream(filePath);
|
||||
const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
|
||||
let lineNumber = 0;
|
||||
for await (const line of rl) {
|
||||
lineNumber++;
|
||||
const links = extractLinksFromText(line);
|
||||
for (const link of links) {
|
||||
await checkLink(filePath, lineNumber, link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function processYamlRecursively(obj, links = []) {
|
||||
if (typeof obj === "string" && isExternalLink(obj)) {
|
||||
links.push(obj);
|
||||
} else if (Array.isArray(obj)) {
|
||||
for (const item of obj) processYamlRecursively(item, links);
|
||||
} else if (typeof obj === "object" && obj !== null) {
|
||||
for (const key in obj) processYamlRecursively(obj[key], links);
|
||||
}
|
||||
return links;
|
||||
}
|
||||
|
||||
async function processYaml(filePath) {
|
||||
try {
|
||||
const doc = yaml.load(fs.readFileSync(filePath, "utf8"));
|
||||
const links = processYamlRecursively(doc);
|
||||
for (const link of links) {
|
||||
await checkLink(filePath, "?", link);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Failed to parse YAML file: ${filePath}`);
|
||||
}
|
||||
}
|
||||
|
||||
function walk(dir, exts) {
|
||||
let results = [];
|
||||
const list = fs.readdirSync(dir);
|
||||
for (const file of list) {
|
||||
const fullPath = path.resolve(dir, file);
|
||||
const stat = fs.statSync(fullPath);
|
||||
if (stat.isDirectory()) {
|
||||
results = results.concat(walk(fullPath, exts));
|
||||
} else if (exts.includes(path.extname(fullPath))) {
|
||||
results.push(fullPath);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const mdFiles = walk(CONTENT_DIR, [".md"]);
|
||||
const yamlFiles = walk(DATA_DIR, [".yaml", ".yml"]);
|
||||
console.log(`Scanning ${mdFiles.length} Markdown and ${yamlFiles.length} YAML files...`);
|
||||
|
||||
for (const file of mdFiles) {
|
||||
await processMarkdown(file);
|
||||
}
|
||||
for (const file of yamlFiles) {
|
||||
await processYaml(file);
|
||||
}
|
||||
|
||||
fs.writeFileSync(CACHE_PATH, yaml.dump(cache));
|
||||
|
||||
console.log("\n\n=== Broken External Links Report ===");
|
||||
if (BAD_LINKS.length === 0) {
|
||||
console.log("✅ No broken external links found.");
|
||||
} else {
|
||||
console.table(BAD_LINKS);
|
||||
}
|
||||
})();
|
||||
Reference in New Issue
Block a user