#!/usr/bin/env node /** * Parcourt tous les articles Markdown du dossier content/ et * crée automatiquement un lien vers la page du mot-clé pour la * première occurrence de chaque terme défini dans les taxonomies * du frontmatter. Les occurrences déjà liées sont ignorées. * * Sort avec un code différent de 0 lorsqu'au moins un fichier est modifié. */ const fs = require("node:fs"); const path = require("node:path"); const yaml = require("js-yaml"); const PROJECT_ROOT = path.resolve(__dirname, ".."); const CONTENT_ROOT = path.join(PROJECT_ROOT, "content"); const TAXONOMIES_FILE = path.join(PROJECT_ROOT, "config", "_default", "taxonomies.yaml"); const FRONTMATTER_PATTERN = /^---\n([\s\S]+?)\n---\n?([\s\S]*)$/; const WORD_CHAR = /[\p{L}\p{N}]/u; const INLINE_FORMATTING_CHARS = ["*", "_"]; main(); function main() { const taxonomyMapping = loadTaxonomyMapping(TAXONOMIES_FILE); const files = collectMarkdownFiles(CONTENT_ROOT); if (files.length === 0) { console.log("Aucun article Markdown trouvé sous content/."); return; } const changed = []; for (const filePath of files) { if (processFile(filePath, taxonomyMapping)) { changed.push(filePath); } } if (changed.length > 0) { for (const filePath of changed) { const rel = path.relative(PROJECT_ROOT, filePath); console.log(`✏️ ${rel}`); } console.log("Des modifications ont été effectuées. Merci de les revoir."); process.exit(2); } else { console.log("Tous les articles sont déjà correctement liés."); } } function processFile(filePath, taxonomyMapping) { let raw; try { raw = fs.readFileSync(filePath, "utf8"); } catch (error) { console.warn(`⚠️ Impossible de lire ${filePath}: ${error.message}`); return false; } const match = raw.match(FRONTMATTER_PATTERN); if (!match) { return false; } let frontmatter; try { frontmatter = yaml.load(match[1]) || {}; } catch (error) { console.warn(`⚠️ Frontmatter invalide dans ${filePath}: ${error.message}`); return false; } const keywords = extractKeywords(frontmatter, match[1], taxonomyMapping.fieldToCanonical); if (keywords.length === 0) { return false; } const { body, changed } = linkKeywordsInBody(match[2], keywords); if (!changed) { return false; } const prefixLength = raw.length - match[2].length; const updated = raw.slice(0, prefixLength) + body; fs.writeFileSync(filePath, updated, "utf8"); return true; } function loadTaxonomyMapping(configPath) { let raw; try { raw = fs.readFileSync(configPath, "utf8"); } catch (error) { console.error(`Impossible de lire ${configPath}: ${error.message}`); process.exit(1); } let data; try { data = yaml.load(raw) || {}; } catch (error) { console.error(`YAML invalide dans ${configPath}: ${error.message}`); process.exit(1); } if (typeof data !== "object" || data === null) { console.error(`Format inattendu dans ${configPath}`); process.exit(1); } const fieldToCanonical = new Map(); for (const [singular, plural] of Object.entries(data)) { const canonicalName = typeof plural === "string" && plural.trim().length > 0 ? plural.trim() : singular.trim(); if (!canonicalName) continue; const candidates = new Set([singular, canonicalName].filter(Boolean)); for (const name of candidates) { fieldToCanonical.set(name, canonicalName); } } if (fieldToCanonical.size === 0) { console.error("Aucune taxonomie n'est définie dans la configuration."); process.exit(1); } return { fieldToCanonical }; } function collectMarkdownFiles(root) { const files = []; walk(root, files); return files.sort((a, b) => a.localeCompare(b)); } function walk(dir, bucket) { let entries; try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch (error) { console.warn(`⚠️ Impossible de parcourir ${dir}: ${error.message}`); return; } for (const entry of entries) { if (entry.name === ".git" || entry.name === "node_modules") { continue; } const absolute = path.join(dir, entry.name); if (entry.isDirectory()) { walk(absolute, bucket); } else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) { bucket.push(absolute); } } } function extractKeywords(frontmatter, frontmatterRaw, fieldToCanonical) { const keywords = []; const seen = new Set(); function addKeyword(taxonomy, term) { if (!taxonomy || typeof term !== "string") return; const normalized = term.trim(); if (!normalized) return; const key = `${taxonomy}::${normalized.toLowerCase()}`; if (seen.has(key)) return; const slug = slugify(normalized); if (!slug) return; seen.add(key); keywords.push({ taxonomy, term: normalized, url: `/${taxonomy}/${slug}/`, }); } if (typeof frontmatter === "object" && frontmatter !== null) { for (const [field, value] of Object.entries(frontmatter)) { const canonical = fieldToCanonical.get(field); if (!canonical) continue; const terms = normalizeTerms(value); for (const term of terms) { addKeyword(canonical, term); } } } for (const entry of extractCommentedTerms(frontmatterRaw, fieldToCanonical)) { addKeyword(entry.taxonomy, entry.term); } return keywords; } function normalizeTerms(value) { if (Array.isArray(value)) { return value.map((item) => normalizeTerm(item)).filter(Boolean); } const single = normalizeTerm(value); return single ? [single] : []; } function normalizeTerm(value) { if (typeof value !== "string") return null; const trimmed = value.trim(); return trimmed.length > 0 ? trimmed : null; } function extractCommentedTerms(frontmatterRaw, fieldToCanonical) { if (typeof frontmatterRaw !== "string" || frontmatterRaw.length === 0) { return []; } const results = []; const lines = frontmatterRaw.split(/\r?\n/); let currentCanonical = null; let currentIndent = 0; for (const line of lines) { const indent = getIndentation(line); const fieldMatch = line.match(/^\s*([A-Za-z0-9_]+):\s*(?:#.*)?$/); if (fieldMatch) { const fieldName = fieldMatch[1]; currentCanonical = fieldToCanonical.get(fieldName) || null; currentIndent = indent; continue; } if (!currentCanonical) continue; const commentMatch = line.match(/^\s*#\s*-\s+(.*)$/); if (!commentMatch) continue; if (indent <= currentIndent) continue; const term = commentMatch[1].trim(); if (!term) continue; results.push({ taxonomy: currentCanonical, term }); } return results; } function linkKeywordsInBody(body, keywords) { if (typeof body !== "string" || body.length === 0 || keywords.length === 0) { return { body, changed: false }; } let updated = body; let changed = false; let linkRanges = computeLinkRanges(updated); for (const keyword of keywords) { const occurrence = findKeywordOccurrence(updated, keyword.term, linkRanges); if (!occurrence) continue; const expanded = includeFormattingCharacters(updated, occurrence.start, occurrence.end); const before = updated.slice(0, expanded.start); const label = updated.slice(expanded.start, expanded.end); const after = updated.slice(expanded.end); updated = `${before}[${label}](${keyword.url})${after}`; changed = true; linkRanges = computeLinkRanges(updated); } return { body: updated, changed }; } function findKeywordOccurrence(text, keyword, linkRanges) { if (!keyword) return null; const escaped = escapeRegExp(keyword); if (!escaped) return null; const regex = new RegExp(escaped, "giu"); let match; while ((match = regex.exec(text)) !== null) { const start = match.index; const end = start + match[0].length; if (isInsideExistingLink(start, end, linkRanges)) { continue; } if (!hasWordBoundaries(text, start, end)) { continue; } return { start, end, text: match[0] }; } return null; } function computeLinkRanges(text) { const ranges = []; if (typeof text !== "string" || text.length === 0) { return ranges; } for (let i = 0; i < text.length; i++) { let isImage = false; if (text[i] === "!" && text[i + 1] === "[") { isImage = true; i += 1; } if (text[i] !== "[") continue; const openBracket = i; const closeBracket = findMatchingPair(text, openBracket, "[", "]"); if (closeBracket === -1) continue; let pointer = closeBracket + 1; while (pointer < text.length && /\s/.test(text[pointer])) pointer++; if (pointer >= text.length || text[pointer] !== "(") { i = closeBracket; continue; } const openParen = pointer; const closeParen = findMatchingPair(text, openParen, "(", ")"); if (closeParen === -1) break; ranges.push({ textStart: openBracket + 1, textEnd: closeBracket, destStart: openParen + 1, destEnd: closeParen, isImage, }); i = closeParen; } return ranges; } function findMatchingPair(text, startIndex, openChar, closeChar) { let depth = 0; for (let i = startIndex; i < text.length; i++) { const ch = text[i]; if (ch === "\\") { i++; continue; } if (ch === openChar) { depth++; } else if (ch === closeChar) { depth--; if (depth === 0) { return i; } } } return -1; } function isInsideExistingLink(start, end, ranges) { return ranges.some((range) => { const overlapsText = start < range.textEnd && end > range.textStart; const overlapsDest = typeof range.destStart === "number" && typeof range.destEnd === "number" && start < range.destEnd && end > range.destStart; return overlapsText || overlapsDest; }); } function hasWordBoundaries(text, start, end) { const before = start > 0 ? text[start - 1] : ""; const after = end < text.length ? text[end] : ""; const startChar = text[start]; const endChar = text[end - 1]; if (isWordChar(startChar) && isWordChar(before)) { return false; } if (isWordChar(endChar) && isWordChar(after)) { return false; } return true; } function isWordChar(ch) { return Boolean(ch && WORD_CHAR.test(ch)); } function slugify(value) { return value .normalize("NFD") .replace(/\p{Diacritic}/gu, "") .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-+|-+$/g, "") .replace(/-{2,}/g, "-"); } function escapeRegExp(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } function includeFormattingCharacters(text, start, end) { let newStart = start; let newEnd = end; for (const marker of INLINE_FORMATTING_CHARS) { let prefixCount = 0; while (newStart - prefixCount - 1 >= 0 && text[newStart - prefixCount - 1] === marker) { prefixCount++; } let suffixCount = 0; while (newEnd + suffixCount < text.length && text[newEnd + suffixCount] === marker) { suffixCount++; } const count = Math.min(prefixCount, suffixCount); if (count > 0) { newStart -= count; newEnd += count; } } return { start: newStart, end: newEnd }; } function getIndentation(line) { if (typeof line !== "string" || line.length === 0) return 0; const match = line.match(/^\s*/); return match ? match[0].length : 0; }