Corrige les faux positifs dans l'analyse des liens Markdown
This commit is contained in:
@@ -129,8 +129,8 @@ function parseLinkDestination(raw) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractMarkdownDestinations(text) {
|
function extractMarkdownLinkTokens(text) {
|
||||||
const urls = [];
|
const tokens = [];
|
||||||
for (let i = 0; i < text.length; i++) {
|
for (let i = 0; i < text.length; i++) {
|
||||||
if (text[i] === "!") {
|
if (text[i] === "!") {
|
||||||
if (text[i + 1] !== "[") continue;
|
if (text[i + 1] !== "[") continue;
|
||||||
@@ -157,24 +157,120 @@ function extractMarkdownDestinations(text) {
|
|||||||
const rawDestination = text.slice(openParen + 1, closeParen);
|
const rawDestination = text.slice(openParen + 1, closeParen);
|
||||||
const candidate = parseLinkDestination(rawDestination);
|
const candidate = parseLinkDestination(rawDestination);
|
||||||
if (candidate) {
|
if (candidate) {
|
||||||
urls.push(candidate);
|
const startOffset = rawDestination.indexOf(candidate);
|
||||||
|
if (startOffset > -1) {
|
||||||
|
tokens.push({
|
||||||
|
url: candidate,
|
||||||
|
start: openParen + 1 + startOffset,
|
||||||
|
end: openParen + 1 + startOffset + candidate.length,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
tokens.push({
|
||||||
|
url: candidate,
|
||||||
|
start: openParen + 1,
|
||||||
|
end: closeParen,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
i = closeParen;
|
i = closeParen;
|
||||||
}
|
}
|
||||||
return urls;
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractMarkdownDestinations(text) {
|
||||||
|
return extractMarkdownLinkTokens(text).map((token) => token.url);
|
||||||
}
|
}
|
||||||
|
|
||||||
function isExternalLink(link) {
|
function isExternalLink(link) {
|
||||||
return typeof link === "string" && link.includes("://");
|
return typeof link === "string" && link.includes("://");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function stripMarkdownInlineCode(text) {
|
||||||
|
if (typeof text !== "string" || !text.includes("`")) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = "";
|
||||||
|
let index = 0;
|
||||||
|
|
||||||
|
while (index < text.length) {
|
||||||
|
if (text[index] !== "`") {
|
||||||
|
result += text[index];
|
||||||
|
index += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fenceLength = 1;
|
||||||
|
while (index + fenceLength < text.length && text[index + fenceLength] === "`") {
|
||||||
|
fenceLength += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fence = "`".repeat(fenceLength);
|
||||||
|
const closingIndex = text.indexOf(fence, index + fenceLength);
|
||||||
|
if (closingIndex === -1) {
|
||||||
|
result += text.slice(index, index + fenceLength);
|
||||||
|
index += fenceLength;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const spanLength = closingIndex + fenceLength - index;
|
||||||
|
result += " ".repeat(spanLength);
|
||||||
|
index = closingIndex + fenceLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseMarkdownFence(line) {
|
||||||
|
if (typeof line !== "string") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const match = line.match(/^[ ]{0,3}([`~]{3,})/);
|
||||||
|
if (!match) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
marker: match[1][0],
|
||||||
|
length: match[1].length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function isFenceClosingLine(line, activeFence) {
|
||||||
|
if (!activeFence || typeof line !== "string") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const match = line.match(/^[ ]{0,3}([`~]{3,})[ \t]*$/);
|
||||||
|
if (!match) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (match[1][0] !== activeFence.marker) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return match[1].length >= activeFence.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isIndentedCodeLine(line) {
|
||||||
|
if (typeof line !== "string" || !line) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return line.startsWith(" ") || line.startsWith("\t");
|
||||||
|
}
|
||||||
|
|
||||||
function extractLinksFromText(text) {
|
function extractLinksFromText(text) {
|
||||||
if (typeof text !== "string" || !text.includes("http")) {
|
if (typeof text !== "string" || !text.includes("http")) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const strippedText = stripMarkdownInlineCode(text);
|
||||||
|
if (typeof strippedText !== "string" || !strippedText.includes("http")) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
|
const markdownLinkTokens = extractMarkdownLinkTokens(strippedText);
|
||||||
|
|
||||||
function addCandidate(candidate, options = {}) {
|
function addCandidate(candidate, options = {}) {
|
||||||
const sanitized = sanitizeUrlCandidate(candidate, options);
|
const sanitized = sanitizeUrlCandidate(candidate, options);
|
||||||
@@ -185,18 +281,28 @@ function extractLinksFromText(text) {
|
|||||||
results.push(sanitized);
|
results.push(sanitized);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const url of extractMarkdownDestinations(text)) {
|
for (const token of markdownLinkTokens) {
|
||||||
addCandidate(url, { keepTrailingParens: true });
|
addCandidate(token.url, { keepTrailingParens: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
const angleRegex = /<\s*(https?:\/\/[^>\s]+)\s*>/gi;
|
const angleRegex = /<\s*(https?:\/\/[^>\s]+)\s*>/gi;
|
||||||
let match;
|
let match;
|
||||||
while ((match = angleRegex.exec(text)) !== null) {
|
while ((match = angleRegex.exec(strippedText)) !== null) {
|
||||||
addCandidate(match[1]);
|
addCandidate(match[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const autoRegex = /https?:\/\/[^\s<>"`]+/gi;
|
const autoRegex = /https?:\/\/[^\s<>"`]+/gi;
|
||||||
while ((match = autoRegex.exec(text)) !== null) {
|
while ((match = autoRegex.exec(strippedText)) !== null) {
|
||||||
|
let overlapsMarkdownDestination = false;
|
||||||
|
for (const token of markdownLinkTokens) {
|
||||||
|
if (match.index >= token.start && match.index < token.end) {
|
||||||
|
overlapsMarkdownDestination = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (overlapsMarkdownDestination) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
addCandidate(match[0]);
|
addCandidate(match[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,6 +314,9 @@ async function collectMarkdownLinksFromStream(stream) {
|
|||||||
const results = [];
|
const results = [];
|
||||||
let lineNumber = 0;
|
let lineNumber = 0;
|
||||||
let inFrontMatter = false;
|
let inFrontMatter = false;
|
||||||
|
let activeFence = null;
|
||||||
|
let inIndentedCodeBlock = false;
|
||||||
|
let previousLineBlank = true;
|
||||||
try {
|
try {
|
||||||
for await (const line of rl) {
|
for await (const line of rl) {
|
||||||
lineNumber++;
|
lineNumber++;
|
||||||
@@ -225,9 +334,44 @@ async function collectMarkdownLinksFromStream(stream) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (activeFence) {
|
||||||
|
if (isFenceClosingLine(line, activeFence)) {
|
||||||
|
activeFence = null;
|
||||||
|
}
|
||||||
|
previousLineBlank = trimmed === "";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const openingFence = parseMarkdownFence(line);
|
||||||
|
if (openingFence) {
|
||||||
|
activeFence = openingFence;
|
||||||
|
previousLineBlank = trimmed === "";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inIndentedCodeBlock) {
|
||||||
|
if (trimmed === "") {
|
||||||
|
previousLineBlank = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isIndentedCodeLine(line)) {
|
||||||
|
previousLineBlank = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
inIndentedCodeBlock = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (previousLineBlank && isIndentedCodeLine(line)) {
|
||||||
|
inIndentedCodeBlock = true;
|
||||||
|
previousLineBlank = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
for (const url of extractLinksFromText(line)) {
|
for (const url of extractLinksFromText(line)) {
|
||||||
results.push({ url, line: lineNumber });
|
results.push({ url, line: lineNumber });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
previousLineBlank = trimmed === "";
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
rl.close();
|
rl.close();
|
||||||
|
|||||||
@@ -15,6 +15,12 @@ test("extractLinksFromText returns sanitized external URLs only once", () => {
|
|||||||
assert.deepStrictEqual(urls, ["https://example.com", "https://foo.com", "https://bar.com/path"]);
|
assert.deepStrictEqual(urls, ["https://example.com", "https://foo.com", "https://bar.com/path"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("extractLinksFromText does not extend a markdown destination past the closing parenthesis", () => {
|
||||||
|
const input = "J'ai eu mon lot d'installations du couple [anope](https://www.anope.org/)/epona.";
|
||||||
|
const urls = extractLinksFromText(input);
|
||||||
|
assert.deepStrictEqual(urls, ["https://www.anope.org/"]);
|
||||||
|
});
|
||||||
|
|
||||||
test("collectMarkdownLinksFromStream preserves line numbers", async () => {
|
test("collectMarkdownLinksFromStream preserves line numbers", async () => {
|
||||||
const content = [
|
const content = [
|
||||||
"Intro line with no link",
|
"Intro line with no link",
|
||||||
@@ -41,6 +47,27 @@ test("collectMarkdownLinksFromStream preserves line numbers", async () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("collectMarkdownLinksFromStream ignores inline code, fenced code blocks and indented code blocks", async () => {
|
||||||
|
const content = [
|
||||||
|
"Visible https://visible.example.com.",
|
||||||
|
"Inline code `https://inline.example.com` and normal https://normal.example.com.",
|
||||||
|
"",
|
||||||
|
"```yaml",
|
||||||
|
"uses: https://github.com/easingthemes/ssh-deploy@main",
|
||||||
|
"```",
|
||||||
|
"",
|
||||||
|
" https://indented.example.com",
|
||||||
|
"After code https://after.example.com.",
|
||||||
|
].join("\n");
|
||||||
|
const stream = Readable.from([content]);
|
||||||
|
const links = await collectMarkdownLinksFromStream(stream);
|
||||||
|
assert.deepStrictEqual(links, [
|
||||||
|
{ url: "https://visible.example.com", line: 1 },
|
||||||
|
{ url: "https://normal.example.com", line: 2 },
|
||||||
|
{ url: "https://after.example.com", line: 9 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
test("collectMarkdownLinksFromStream ignores URLs in front matter entirely", async () => {
|
test("collectMarkdownLinksFromStream ignores URLs in front matter entirely", async () => {
|
||||||
const content = [
|
const content = [
|
||||||
"---",
|
"---",
|
||||||
|
|||||||
Reference in New Issue
Block a user