Amélioration de la détection de liens externes morts
This commit is contained in:
21
tools/tests/archive.test.js
Normal file
21
tools/tests/archive.test.js
Normal file
@@ -0,0 +1,21 @@
|
||||
const { getArchiveUrl, saveToArchive } = require("../lib/archive");
|
||||
|
||||
(async () => {
|
||||
const testUrl = "https://richard-dern.fr";
|
||||
|
||||
console.log(`🔍 Checking Archive.org for: ${testUrl}`);
|
||||
let archiveUrl = await getArchiveUrl(testUrl);
|
||||
|
||||
if (archiveUrl) {
|
||||
console.log(`✔ Archive found: ${archiveUrl}`);
|
||||
} else {
|
||||
console.log(`❌ No archive found, requesting a new one...`);
|
||||
archiveUrl = await saveToArchive(testUrl);
|
||||
|
||||
if (archiveUrl) {
|
||||
console.log(`✔ URL successfully archived: ${archiveUrl}`);
|
||||
} else {
|
||||
console.log(`❌ Failed to archive the URL.`);
|
||||
}
|
||||
}
|
||||
})();
|
||||
68
tools/tests/markdown_links.test.js
Normal file
68
tools/tests/markdown_links.test.js
Normal file
@@ -0,0 +1,68 @@
|
||||
const test = require("node:test");
|
||||
const assert = require("node:assert/strict");
|
||||
const { Readable } = require("node:stream");
|
||||
const {
|
||||
collectMarkdownLinksFromStream,
|
||||
extractLinksFromText,
|
||||
sanitizeUrlCandidate,
|
||||
} = require("../lib/markdown_links");
|
||||
|
||||
test("extractLinksFromText returns sanitized external URLs only once", () => {
|
||||
const input =
|
||||
"See [example](https://example.com) and <https://foo.com>. " +
|
||||
"Autolink https://bar.com/path).\nDuplicate https://example.com!";
|
||||
const urls = extractLinksFromText(input);
|
||||
assert.deepStrictEqual(urls, ["https://example.com", "https://foo.com", "https://bar.com/path"]);
|
||||
});
|
||||
|
||||
test("collectMarkdownLinksFromStream preserves line numbers", async () => {
|
||||
const content = [
|
||||
"Intro line with no link",
|
||||
"Markdown [link](https://docs.example.org/page).",
|
||||
"Plain link https://news.example.net/article.",
|
||||
"Trailing <https://portal.example.com/path> punctuation.",
|
||||
"Markdown [link](https://docs.example.org/page(with more valid content)).",
|
||||
"Le **[baume du Canada](https://fr.wikipedia.org/wiki/Baume_du_Canada)**",
|
||||
"(_Theropoda [incertae sedis](https://fr.wikipedia.org/wiki/Incertae_sedis)_)",
|
||||
"[CDN](https://fr.wikipedia.org/wiki/Réseau_de_diffusion_de_contenu)[^2]."
|
||||
].join("\n");
|
||||
const stream = Readable.from([content]);
|
||||
const links = await collectMarkdownLinksFromStream(stream);
|
||||
assert.deepStrictEqual(links, [
|
||||
{ url: "https://docs.example.org/page", line: 2 },
|
||||
{ url: "https://news.example.net/article", line: 3 },
|
||||
{ url: "https://portal.example.com/path", line: 4 },
|
||||
{ url: "https://docs.example.org/page(with more valid content)", line: 5 },
|
||||
{ url: "https://fr.wikipedia.org/wiki/Baume_du_Canada", line: 6 },
|
||||
{ url: "https://fr.wikipedia.org/wiki/Incertae_sedis", line: 7 },
|
||||
{ url: "https://fr.wikipedia.org/wiki/Réseau_de_diffusion_de_contenu", line: 8 },
|
||||
]);
|
||||
});
|
||||
|
||||
test("collectMarkdownLinksFromStream ignores URLs in front matter comments", async () => {
|
||||
const content = [
|
||||
"---",
|
||||
"links:",
|
||||
" # url: https://ignored.example.com",
|
||||
" - url: https://included.example.com",
|
||||
"---",
|
||||
"Body with https://body.example.com link.",
|
||||
].join("\n");
|
||||
const stream = Readable.from([content]);
|
||||
const links = await collectMarkdownLinksFromStream(stream);
|
||||
assert.deepStrictEqual(links, [
|
||||
{ url: "https://included.example.com", line: 4 },
|
||||
{ url: "https://body.example.com", line: 6 },
|
||||
]);
|
||||
});
|
||||
|
||||
test("sanitizeUrlCandidate removes spurious trailing punctuation", () => {
|
||||
const cases = [
|
||||
["https://example.com).", "https://example.com"],
|
||||
["https://example.com!\"", "https://example.com"],
|
||||
["<https://example.com>", "https://example.com"],
|
||||
];
|
||||
for (const [input, expected] of cases) {
|
||||
assert.equal(sanitizeUrlCandidate(input), expected);
|
||||
}
|
||||
});
|
||||
13
tools/tests/puppeteer.test.js
Normal file
13
tools/tests/puppeteer.test.js
Normal file
@@ -0,0 +1,13 @@
|
||||
const { scrapePage } = require("../lib/puppeteer");
|
||||
const path = require("path");
|
||||
|
||||
(async () => {
|
||||
const testUrl = "https://richard-dern.fr";
|
||||
const screenshotPath = path.join(__dirname, "test_screenshot.png");
|
||||
|
||||
console.log(`🔍 Testing Puppeteer module on: ${testUrl}`);
|
||||
const metadata = await scrapePage(testUrl, screenshotPath);
|
||||
|
||||
console.log("📄 Page metadata:");
|
||||
console.log(metadata);
|
||||
})();
|
||||
Reference in New Issue
Block a user