From a2cbaeacd1966e2f14fc4d7f793a4d7e7663fbe5 Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Wed, 25 Mar 2026 23:06:33 +0100 Subject: [PATCH] Corrige les faux positifs dans l'analyse des liens Markdown --- tools/lib/markdown_links.js | 160 +++++++++++++++++++++++++++-- tools/tests/markdown_links.test.js | 27 +++++ 2 files changed, 179 insertions(+), 8 deletions(-) diff --git a/tools/lib/markdown_links.js b/tools/lib/markdown_links.js index 9eb3bfe9..197beeb3 100644 --- a/tools/lib/markdown_links.js +++ b/tools/lib/markdown_links.js @@ -129,8 +129,8 @@ function parseLinkDestination(raw) { return result; } -function extractMarkdownDestinations(text) { - const urls = []; +function extractMarkdownLinkTokens(text) { + const tokens = []; for (let i = 0; i < text.length; i++) { if (text[i] === "!") { if (text[i + 1] !== "[") continue; @@ -157,24 +157,120 @@ function extractMarkdownDestinations(text) { const rawDestination = text.slice(openParen + 1, closeParen); const candidate = parseLinkDestination(rawDestination); if (candidate) { - urls.push(candidate); + const startOffset = rawDestination.indexOf(candidate); + if (startOffset > -1) { + tokens.push({ + url: candidate, + start: openParen + 1 + startOffset, + end: openParen + 1 + startOffset + candidate.length, + }); + } else { + tokens.push({ + url: candidate, + start: openParen + 1, + end: closeParen, + }); + } } i = closeParen; } - return urls; + return tokens; +} + +function extractMarkdownDestinations(text) { + return extractMarkdownLinkTokens(text).map((token) => token.url); } function isExternalLink(link) { return typeof link === "string" && link.includes("://"); } +function stripMarkdownInlineCode(text) { + if (typeof text !== "string" || !text.includes("`")) { + return text; + } + + let result = ""; + let index = 0; + + while (index < text.length) { + if (text[index] !== "`") { + result += text[index]; + index += 1; + continue; + } + + let fenceLength = 1; + while (index + fenceLength < text.length && text[index + fenceLength] === "`") { + fenceLength += 1; + } + + const fence = "`".repeat(fenceLength); + const closingIndex = text.indexOf(fence, index + fenceLength); + if (closingIndex === -1) { + result += text.slice(index, index + fenceLength); + index += fenceLength; + continue; + } + + const spanLength = closingIndex + fenceLength - index; + result += " ".repeat(spanLength); + index = closingIndex + fenceLength; + } + + return result; +} + +function parseMarkdownFence(line) { + if (typeof line !== "string") { + return null; + } + const match = line.match(/^[ ]{0,3}([`~]{3,})/); + if (!match) { + return null; + } + + return { + marker: match[1][0], + length: match[1].length, + }; +} + +function isFenceClosingLine(line, activeFence) { + if (!activeFence || typeof line !== "string") { + return false; + } + + const match = line.match(/^[ ]{0,3}([`~]{3,})[ \t]*$/); + if (!match) { + return false; + } + if (match[1][0] !== activeFence.marker) { + return false; + } + return match[1].length >= activeFence.length; +} + +function isIndentedCodeLine(line) { + if (typeof line !== "string" || !line) { + return false; + } + return line.startsWith(" ") || line.startsWith("\t"); +} + function extractLinksFromText(text) { if (typeof text !== "string" || !text.includes("http")) { return []; } + const strippedText = stripMarkdownInlineCode(text); + if (typeof strippedText !== "string" || !strippedText.includes("http")) { + return []; + } + const results = []; const seen = new Set(); + const markdownLinkTokens = extractMarkdownLinkTokens(strippedText); function addCandidate(candidate, options = {}) { const sanitized = sanitizeUrlCandidate(candidate, options); @@ -185,18 +281,28 @@ function extractLinksFromText(text) { results.push(sanitized); } - for (const url of extractMarkdownDestinations(text)) { - addCandidate(url, { keepTrailingParens: true }); + for (const token of markdownLinkTokens) { + addCandidate(token.url, { keepTrailingParens: true }); } const angleRegex = /<\s*(https?:\/\/[^>\s]+)\s*>/gi; let match; - while ((match = angleRegex.exec(text)) !== null) { + while ((match = angleRegex.exec(strippedText)) !== null) { addCandidate(match[1]); } const autoRegex = /https?:\/\/[^\s<>"`]+/gi; - while ((match = autoRegex.exec(text)) !== null) { + while ((match = autoRegex.exec(strippedText)) !== null) { + let overlapsMarkdownDestination = false; + for (const token of markdownLinkTokens) { + if (match.index >= token.start && match.index < token.end) { + overlapsMarkdownDestination = true; + break; + } + } + if (overlapsMarkdownDestination) { + continue; + } addCandidate(match[0]); } @@ -208,6 +314,9 @@ async function collectMarkdownLinksFromStream(stream) { const results = []; let lineNumber = 0; let inFrontMatter = false; + let activeFence = null; + let inIndentedCodeBlock = false; + let previousLineBlank = true; try { for await (const line of rl) { lineNumber++; @@ -225,9 +334,44 @@ async function collectMarkdownLinksFromStream(stream) { continue; } + if (activeFence) { + if (isFenceClosingLine(line, activeFence)) { + activeFence = null; + } + previousLineBlank = trimmed === ""; + continue; + } + + const openingFence = parseMarkdownFence(line); + if (openingFence) { + activeFence = openingFence; + previousLineBlank = trimmed === ""; + continue; + } + + if (inIndentedCodeBlock) { + if (trimmed === "") { + previousLineBlank = true; + continue; + } + if (isIndentedCodeLine(line)) { + previousLineBlank = false; + continue; + } + inIndentedCodeBlock = false; + } + + if (previousLineBlank && isIndentedCodeLine(line)) { + inIndentedCodeBlock = true; + previousLineBlank = false; + continue; + } + for (const url of extractLinksFromText(line)) { results.push({ url, line: lineNumber }); } + + previousLineBlank = trimmed === ""; } } finally { rl.close(); diff --git a/tools/tests/markdown_links.test.js b/tools/tests/markdown_links.test.js index 68913ea8..45b9bb60 100644 --- a/tools/tests/markdown_links.test.js +++ b/tools/tests/markdown_links.test.js @@ -15,6 +15,12 @@ test("extractLinksFromText returns sanitized external URLs only once", () => { assert.deepStrictEqual(urls, ["https://example.com", "https://foo.com", "https://bar.com/path"]); }); +test("extractLinksFromText does not extend a markdown destination past the closing parenthesis", () => { + const input = "J'ai eu mon lot d'installations du couple [anope](https://www.anope.org/)/epona."; + const urls = extractLinksFromText(input); + assert.deepStrictEqual(urls, ["https://www.anope.org/"]); +}); + test("collectMarkdownLinksFromStream preserves line numbers", async () => { const content = [ "Intro line with no link", @@ -41,6 +47,27 @@ test("collectMarkdownLinksFromStream preserves line numbers", async () => { ]); }); +test("collectMarkdownLinksFromStream ignores inline code, fenced code blocks and indented code blocks", async () => { + const content = [ + "Visible https://visible.example.com.", + "Inline code `https://inline.example.com` and normal https://normal.example.com.", + "", + "```yaml", + "uses: https://github.com/easingthemes/ssh-deploy@main", + "```", + "", + " https://indented.example.com", + "After code https://after.example.com.", + ].join("\n"); + const stream = Readable.from([content]); + const links = await collectMarkdownLinksFromStream(stream); + assert.deepStrictEqual(links, [ + { url: "https://visible.example.com", line: 1 }, + { url: "https://normal.example.com", line: 2 }, + { url: "https://after.example.com", line: 9 }, + ]); +}); + test("collectMarkdownLinksFromStream ignores URLs in front matter entirely", async () => { const content = [ "---",