#!/usr/bin/env node const fs = require("fs"); const path = require("path"); const yaml = require("js-yaml"); const { sanitizeUrlCandidate } = require("./lib/markdown_links"); const SITE_ROOT = path.resolve(__dirname, ".."); const CONTENT_DIR = path.join(SITE_ROOT, "content"); const TAXONOMIES_FILE = path.join(SITE_ROOT, "config", "_default", "taxonomies.yaml"); const TARGET_EXTENSIONS = new Set([".md", ".markdown", ".mdx", ".yaml", ".yml"]); const MARKDOWN_EXTENSIONS = new Set([".md", ".markdown", ".mdx"]); const INTERNAL_LINK_REGEX = /\/[^\s"'`<>\\\[\]{}|]+/g; const VALID_PREFIX_REGEX = /[\s"'`([<{=:]/; const PATH_KEY_REGEX = /^\s*(?:"path"|'path'|path)\s*:/i; const FRONTMATTER_PATTERN = /^---\r?\n([\s\S]+?)\r?\n---\r?\n?/; function toPosix(value) { return value.split(path.sep).join("/"); } function relativeToSite(filePath) { return toPosix(path.relative(SITE_ROOT, filePath)); } function isTargetFile(filePath) { const ext = path.extname(filePath).toLowerCase(); return TARGET_EXTENSIONS.has(ext); } function isMarkdownFile(filePath) { const ext = path.extname(filePath).toLowerCase(); return MARKDOWN_EXTENSIONS.has(ext); } function isYamlFile(filePath) { const ext = path.extname(filePath).toLowerCase(); return ext === ".yaml" || ext === ".yml"; } function collectContentEntries(rootDir) { const files = []; const directories = new Set(["/"]); function walk(currentDir) { const entries = fs.readdirSync(currentDir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentDir, entry.name); if (entry.isDirectory()) { const relative = path.relative(rootDir, fullPath); const normalized = relative ? `/${toPosix(relative)}` : "/"; directories.add(normalized); walk(fullPath); } else if (entry.isFile() && isTargetFile(fullPath)) { files.push(fullPath); } } } walk(rootDir); return { files, directories }; } function collectTaxonomyKeywordPaths(files) { const mapping = loadTaxonomyMapping(TAXONOMIES_FILE); if (!mapping) { return new Set(); } const keywordPaths = new Set(); for (const filePath of files) { if (!isMarkdownFile(filePath)) { continue; } let raw; try { raw = fs.readFileSync(filePath, "utf8"); } catch (error) { console.warn( `Impossible de lire ${relativeToSite(filePath)} pour extraire les taxonomies (${error.message}).`, ); continue; } const frontmatterMatch = raw.match(FRONTMATTER_PATTERN); if (!frontmatterMatch) { continue; } let frontmatter = {}; try { frontmatter = yaml.load(frontmatterMatch[1]) || {}; } catch (error) { console.warn(`Frontmatter invalide dans ${relativeToSite(filePath)} (${error.message}).`); continue; } const keywords = extractTaxonomyKeywords( frontmatter, frontmatterMatch[1], mapping.fieldToCanonical, ); for (const keyword of keywords) { const normalized = normalizeInternalLink(keyword.url); if (normalized) { keywordPaths.add(normalized); } } } return keywordPaths; } function loadTaxonomyMapping(configPath) { let raw; try { raw = fs.readFileSync(configPath, "utf8"); } catch (error) { console.warn(`Impossible de lire ${relativeToSite(configPath)} (${error.message}).`); return null; } let data; try { data = yaml.load(raw) || {}; } catch (error) { console.warn(`YAML invalide dans ${relativeToSite(configPath)} (${error.message}).`); return null; } if (typeof data !== "object" || data === null) { console.warn(`Format inattendu dans ${relativeToSite(configPath)}.`); return null; } const fieldToCanonical = new Map(); for (const [singular, plural] of Object.entries(data)) { const canonical = typeof plural === "string" && plural.trim().length > 0 ? plural.trim() : singular.trim(); if (!canonical) continue; const candidates = new Set([singular, canonical].filter(Boolean)); for (const candidate of candidates) { fieldToCanonical.set(candidate, canonical); } } if (fieldToCanonical.size === 0) { console.warn("Aucune taxonomie valide n'a été trouvée."); return null; } return { fieldToCanonical }; } function extractTaxonomyKeywords(frontmatter, frontmatterRaw, fieldToCanonical) { const keywords = []; const seen = new Set(); function addKeyword(taxonomy, term) { if (!taxonomy || typeof term !== "string") return; const normalized = term.trim(); if (!normalized) return; const slug = slugify(normalized); if (!slug) return; const key = `${taxonomy}::${normalized.toLowerCase()}`; if (seen.has(key)) return; seen.add(key); keywords.push({ taxonomy, term: normalized, url: `/${taxonomy}/${slug}/`, }); } if (typeof frontmatter === "object" && frontmatter !== null) { for (const [field, value] of Object.entries(frontmatter)) { const canonical = fieldToCanonical.get(field); if (!canonical) continue; const terms = normalizeTerms(value); for (const term of terms) { addKeyword(canonical, term); } } } for (const entry of extractCommentedTerms(frontmatterRaw, fieldToCanonical)) { addKeyword(entry.taxonomy, entry.term); } return keywords; } function normalizeTerms(value) { if (Array.isArray(value)) { return value.map((item) => normalizeTerm(item)).filter(Boolean); } const single = normalizeTerm(value); return single ? [single] : []; } function normalizeTerm(value) { if (typeof value !== "string") return null; const trimmed = value.trim(); return trimmed.length > 0 ? trimmed : null; } function extractCommentedTerms(frontmatterRaw, fieldToCanonical) { if (typeof frontmatterRaw !== "string" || frontmatterRaw.length === 0) { return []; } const results = []; const lines = frontmatterRaw.split(/\r?\n/); let currentCanonical = null; let currentIndent = 0; for (const line of lines) { const indent = getIndentation(line); const fieldMatch = line.match(/^\s*([A-Za-z0-9_]+):\s*(?:#.*)?$/); if (fieldMatch) { const fieldName = fieldMatch[1]; currentCanonical = fieldToCanonical.get(fieldName) || null; currentIndent = indent; continue; } if (!currentCanonical) continue; const commentMatch = line.match(/^\s*#\s*-\s+(.*)$/); if (!commentMatch) continue; if (indent <= currentIndent) continue; const term = commentMatch[1].trim(); if (!term) continue; results.push({ taxonomy: currentCanonical, term }); } return results; } function getIndentation(line) { if (typeof line !== "string" || line.length === 0) return 0; const match = line.match(/^\s*/); return match ? match[0].length : 0; } function slugify(value) { return value .normalize("NFD") .replace(/\p{Diacritic}/gu, "") .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-+|-+$/g, "") .replace(/-{2,}/g, "-"); } function sanitizeInternalLink(raw) { const candidate = sanitizeUrlCandidate(raw); if (!candidate) return null; if (!candidate.startsWith("/")) return null; if (candidate.startsWith("//")) return null; if (candidate.includes("://")) return null; return candidate; } function normalizeInternalLink(link) { if (typeof link !== "string" || !link.startsWith("/")) { return null; } let normalized = link.split("?")[0]; normalized = normalized.split("#")[0]; normalized = normalized.replace(/\/+/g, "/"); normalized = normalized.replace(/\/+$/, ""); if (!normalized) { normalized = "/"; } return normalized; } function expectedDirForLink(link) { if (link === "/") { return CONTENT_DIR; } const relative = link.slice(1); const segments = relative.split("/").filter(Boolean); return path.join(CONTENT_DIR, ...segments); } function countRepeatedChar(text, startIndex, char) { let count = 0; while (text[startIndex + count] === char) { count++; } return count; } function findMatchingPair(text, startIndex, openChar, closeChar) { let depth = 0; for (let i = startIndex; i < text.length; i++) { const ch = text[i]; if (ch === "\\") { i++; continue; } if (ch === openChar) { depth++; } else if (ch === closeChar) { depth--; if (depth === 0) { return i; } } } return -1; } function extractMarkdownLinksFromLine(line) { const results = []; let inlineFence = null; for (let i = 0; i < line.length; i++) { const ch = line[i]; if (ch === "`") { const runLength = countRepeatedChar(line, i, "`"); if (!inlineFence) { inlineFence = runLength; } else if (inlineFence === runLength) { inlineFence = null; } i += runLength - 1; continue; } if (inlineFence) { continue; } if (ch !== "[") { continue; } const closeBracket = findMatchingPair(line, i, "[", "]"); if (closeBracket === -1) { break; } let pointer = closeBracket + 1; while (pointer < line.length && /\s/.test(line[pointer])) { pointer++; } if (pointer >= line.length || line[pointer] !== "(") { i = closeBracket; continue; } const closeParen = findMatchingPair(line, pointer, "(", ")"); if (closeParen === -1) { break; } const destination = line.slice(pointer + 1, closeParen); results.push({ destination }); i = closeParen; } return results; } function extractInternalLinks(filePath) { const content = fs.readFileSync(filePath, "utf8"); const lines = content.split(/\r?\n/); const entries = []; const skipPathKey = isYamlFile(filePath); const treatAsMarkdown = isMarkdownFile(filePath); let fenceDelimiter = null; let inFrontMatter = false; for (let index = 0; index < lines.length; index++) { const line = lines[index]; const trimmed = line.trim(); if (treatAsMarkdown) { if (index === 0 && trimmed === "---") { inFrontMatter = true; continue; } if (inFrontMatter) { if (trimmed === "---") { inFrontMatter = false; } continue; } const fenceMatch = trimmed.match(/^(```+|~~~+)/); if (fenceMatch) { const delimiterChar = fenceMatch[1][0]; if (!fenceDelimiter) { fenceDelimiter = delimiterChar; } else if (delimiterChar === fenceDelimiter) { fenceDelimiter = null; } continue; } if (fenceDelimiter) { continue; } const markdownLinks = extractMarkdownLinksFromLine(line); for (const { destination } of markdownLinks) { const sanitized = sanitizeInternalLink(destination); if (!sanitized) continue; const normalized = normalizeInternalLink(sanitized); if (!normalized || normalized === "//") continue; entries.push({ link: normalized, line: index + 1 }); } continue; } if (skipPathKey && PATH_KEY_REGEX.test(line)) { continue; } for (const match of line.matchAll(INTERNAL_LINK_REGEX)) { const raw = match[0]; const startIndex = match.index ?? line.indexOf(raw); if (startIndex > 0) { const prevChar = line[startIndex - 1]; if (!VALID_PREFIX_REGEX.test(prevChar)) { continue; } } const sanitized = sanitizeInternalLink(raw); if (!sanitized) continue; const normalized = normalizeInternalLink(sanitized); if (!normalized || normalized === "//") continue; entries.push({ link: normalized, line: index + 1 }); } } return entries; } function addMissingLink(missingMap, link, filePath, line) { let entry = missingMap.get(link); if (!entry) { entry = { expectedPath: expectedDirForLink(link), references: [], referenceKeys: new Set(), }; missingMap.set(link, entry); } const referenceKey = `${filePath}:${line}`; if (entry.referenceKeys.has(referenceKey)) { return; } entry.referenceKeys.add(referenceKey); entry.references.push({ file: relativeToSite(filePath), line, }); } function main() { if (!fs.existsSync(CONTENT_DIR)) { console.error(`Le dossier content est introuvable (${CONTENT_DIR}).`); process.exit(1); } const { files, directories } = collectContentEntries(CONTENT_DIR); const taxonomyPaths = collectTaxonomyKeywordPaths(files); for (const keywordPath of taxonomyPaths) { directories.add(keywordPath); } const missingLinks = new Map(); for (const filePath of files) { let entries; try { entries = extractInternalLinks(filePath); } catch (error) { console.warn(`Impossible de lire ${relativeToSite(filePath)} (${error.message}).`); continue; } for (const { link, line } of entries) { if (directories.has(link)) { continue; } addMissingLink(missingLinks, link, filePath, line); } } if (missingLinks.size === 0) { console.log("Tous les liens internes pointent vers un dossier existant."); return; } console.error(`Liens internes cassés détectés: ${missingLinks.size}`); const sorted = Array.from(missingLinks.entries()).sort((a, b) => a[0].localeCompare(b[0], "fr")); for (const [link, data] of sorted) { const expectedRelative = relativeToSite(data.expectedPath); console.error(`- ${link} (attendu: ${expectedRelative})`); for (const reference of data.references) { console.error(` • ${reference.file}:${reference.line}`); } } process.exitCode = 1; } if (require.main === module) { try { main(); } catch (error) { console.error(`Erreur lors de la vérification des liens internes: ${error.message}`); process.exit(1); } }