1

Corrige les faux positifs dans l'analyse des liens Markdown

This commit is contained in:
2026-03-25 23:06:33 +01:00
parent 3cb735333a
commit a2cbaeacd1
2 changed files with 179 additions and 8 deletions

View File

@@ -129,8 +129,8 @@ function parseLinkDestination(raw) {
return result; return result;
} }
function extractMarkdownDestinations(text) { function extractMarkdownLinkTokens(text) {
const urls = []; const tokens = [];
for (let i = 0; i < text.length; i++) { for (let i = 0; i < text.length; i++) {
if (text[i] === "!") { if (text[i] === "!") {
if (text[i + 1] !== "[") continue; if (text[i + 1] !== "[") continue;
@@ -157,24 +157,120 @@ function extractMarkdownDestinations(text) {
const rawDestination = text.slice(openParen + 1, closeParen); const rawDestination = text.slice(openParen + 1, closeParen);
const candidate = parseLinkDestination(rawDestination); const candidate = parseLinkDestination(rawDestination);
if (candidate) { if (candidate) {
urls.push(candidate); const startOffset = rawDestination.indexOf(candidate);
if (startOffset > -1) {
tokens.push({
url: candidate,
start: openParen + 1 + startOffset,
end: openParen + 1 + startOffset + candidate.length,
});
} else {
tokens.push({
url: candidate,
start: openParen + 1,
end: closeParen,
});
}
} }
i = closeParen; i = closeParen;
} }
return urls; return tokens;
}
function extractMarkdownDestinations(text) {
return extractMarkdownLinkTokens(text).map((token) => token.url);
} }
function isExternalLink(link) { function isExternalLink(link) {
return typeof link === "string" && link.includes("://"); return typeof link === "string" && link.includes("://");
} }
function stripMarkdownInlineCode(text) {
if (typeof text !== "string" || !text.includes("`")) {
return text;
}
let result = "";
let index = 0;
while (index < text.length) {
if (text[index] !== "`") {
result += text[index];
index += 1;
continue;
}
let fenceLength = 1;
while (index + fenceLength < text.length && text[index + fenceLength] === "`") {
fenceLength += 1;
}
const fence = "`".repeat(fenceLength);
const closingIndex = text.indexOf(fence, index + fenceLength);
if (closingIndex === -1) {
result += text.slice(index, index + fenceLength);
index += fenceLength;
continue;
}
const spanLength = closingIndex + fenceLength - index;
result += " ".repeat(spanLength);
index = closingIndex + fenceLength;
}
return result;
}
function parseMarkdownFence(line) {
if (typeof line !== "string") {
return null;
}
const match = line.match(/^[ ]{0,3}([`~]{3,})/);
if (!match) {
return null;
}
return {
marker: match[1][0],
length: match[1].length,
};
}
function isFenceClosingLine(line, activeFence) {
if (!activeFence || typeof line !== "string") {
return false;
}
const match = line.match(/^[ ]{0,3}([`~]{3,})[ \t]*$/);
if (!match) {
return false;
}
if (match[1][0] !== activeFence.marker) {
return false;
}
return match[1].length >= activeFence.length;
}
function isIndentedCodeLine(line) {
if (typeof line !== "string" || !line) {
return false;
}
return line.startsWith(" ") || line.startsWith("\t");
}
function extractLinksFromText(text) { function extractLinksFromText(text) {
if (typeof text !== "string" || !text.includes("http")) { if (typeof text !== "string" || !text.includes("http")) {
return []; return [];
} }
const strippedText = stripMarkdownInlineCode(text);
if (typeof strippedText !== "string" || !strippedText.includes("http")) {
return [];
}
const results = []; const results = [];
const seen = new Set(); const seen = new Set();
const markdownLinkTokens = extractMarkdownLinkTokens(strippedText);
function addCandidate(candidate, options = {}) { function addCandidate(candidate, options = {}) {
const sanitized = sanitizeUrlCandidate(candidate, options); const sanitized = sanitizeUrlCandidate(candidate, options);
@@ -185,18 +281,28 @@ function extractLinksFromText(text) {
results.push(sanitized); results.push(sanitized);
} }
for (const url of extractMarkdownDestinations(text)) { for (const token of markdownLinkTokens) {
addCandidate(url, { keepTrailingParens: true }); addCandidate(token.url, { keepTrailingParens: true });
} }
const angleRegex = /<\s*(https?:\/\/[^>\s]+)\s*>/gi; const angleRegex = /<\s*(https?:\/\/[^>\s]+)\s*>/gi;
let match; let match;
while ((match = angleRegex.exec(text)) !== null) { while ((match = angleRegex.exec(strippedText)) !== null) {
addCandidate(match[1]); addCandidate(match[1]);
} }
const autoRegex = /https?:\/\/[^\s<>"`]+/gi; const autoRegex = /https?:\/\/[^\s<>"`]+/gi;
while ((match = autoRegex.exec(text)) !== null) { while ((match = autoRegex.exec(strippedText)) !== null) {
let overlapsMarkdownDestination = false;
for (const token of markdownLinkTokens) {
if (match.index >= token.start && match.index < token.end) {
overlapsMarkdownDestination = true;
break;
}
}
if (overlapsMarkdownDestination) {
continue;
}
addCandidate(match[0]); addCandidate(match[0]);
} }
@@ -208,6 +314,9 @@ async function collectMarkdownLinksFromStream(stream) {
const results = []; const results = [];
let lineNumber = 0; let lineNumber = 0;
let inFrontMatter = false; let inFrontMatter = false;
let activeFence = null;
let inIndentedCodeBlock = false;
let previousLineBlank = true;
try { try {
for await (const line of rl) { for await (const line of rl) {
lineNumber++; lineNumber++;
@@ -225,9 +334,44 @@ async function collectMarkdownLinksFromStream(stream) {
continue; continue;
} }
if (activeFence) {
if (isFenceClosingLine(line, activeFence)) {
activeFence = null;
}
previousLineBlank = trimmed === "";
continue;
}
const openingFence = parseMarkdownFence(line);
if (openingFence) {
activeFence = openingFence;
previousLineBlank = trimmed === "";
continue;
}
if (inIndentedCodeBlock) {
if (trimmed === "") {
previousLineBlank = true;
continue;
}
if (isIndentedCodeLine(line)) {
previousLineBlank = false;
continue;
}
inIndentedCodeBlock = false;
}
if (previousLineBlank && isIndentedCodeLine(line)) {
inIndentedCodeBlock = true;
previousLineBlank = false;
continue;
}
for (const url of extractLinksFromText(line)) { for (const url of extractLinksFromText(line)) {
results.push({ url, line: lineNumber }); results.push({ url, line: lineNumber });
} }
previousLineBlank = trimmed === "";
} }
} finally { } finally {
rl.close(); rl.close();

View File

@@ -15,6 +15,12 @@ test("extractLinksFromText returns sanitized external URLs only once", () => {
assert.deepStrictEqual(urls, ["https://example.com", "https://foo.com", "https://bar.com/path"]); assert.deepStrictEqual(urls, ["https://example.com", "https://foo.com", "https://bar.com/path"]);
}); });
test("extractLinksFromText does not extend a markdown destination past the closing parenthesis", () => {
const input = "J'ai eu mon lot d'installations du couple [anope](https://www.anope.org/)/epona.";
const urls = extractLinksFromText(input);
assert.deepStrictEqual(urls, ["https://www.anope.org/"]);
});
test("collectMarkdownLinksFromStream preserves line numbers", async () => { test("collectMarkdownLinksFromStream preserves line numbers", async () => {
const content = [ const content = [
"Intro line with no link", "Intro line with no link",
@@ -41,6 +47,27 @@ test("collectMarkdownLinksFromStream preserves line numbers", async () => {
]); ]);
}); });
test("collectMarkdownLinksFromStream ignores inline code, fenced code blocks and indented code blocks", async () => {
const content = [
"Visible https://visible.example.com.",
"Inline code `https://inline.example.com` and normal https://normal.example.com.",
"",
"```yaml",
"uses: https://github.com/easingthemes/ssh-deploy@main",
"```",
"",
" https://indented.example.com",
"After code https://after.example.com.",
].join("\n");
const stream = Readable.from([content]);
const links = await collectMarkdownLinksFromStream(stream);
assert.deepStrictEqual(links, [
{ url: "https://visible.example.com", line: 1 },
{ url: "https://normal.example.com", line: 2 },
{ url: "https://after.example.com", line: 9 },
]);
});
test("collectMarkdownLinksFromStream ignores URLs in front matter entirely", async () => { test("collectMarkdownLinksFromStream ignores URLs in front matter entirely", async () => {
const content = [ const content = [
"---", "---",