426 lines
11 KiB
JavaScript
426 lines
11 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Parcourt tous les articles Markdown du dossier content/ et
|
|
* crée automatiquement un lien vers la page du mot-clé pour la
|
|
* première occurrence de chaque terme défini dans les taxonomies
|
|
* du frontmatter. Les occurrences déjà liées sont ignorées.
|
|
*
|
|
* Sort avec un code différent de 0 lorsqu'au moins un fichier est modifié.
|
|
*/
|
|
|
|
const fs = require("node:fs");
|
|
const path = require("node:path");
|
|
const yaml = require("js-yaml");
|
|
|
|
const PROJECT_ROOT = path.resolve(__dirname, "..");
|
|
const CONTENT_ROOT = path.join(PROJECT_ROOT, "content");
|
|
const TAXONOMIES_FILE = path.join(PROJECT_ROOT, "config", "_default", "taxonomies.yaml");
|
|
const FRONTMATTER_PATTERN = /^---\n([\s\S]+?)\n---\n?([\s\S]*)$/;
|
|
const WORD_CHAR = /[\p{L}\p{N}]/u;
|
|
const INLINE_FORMATTING_CHARS = ["*", "_"];
|
|
|
|
main();
|
|
|
|
function main() {
|
|
const taxonomyMapping = loadTaxonomyMapping(TAXONOMIES_FILE);
|
|
const files = collectMarkdownFiles(CONTENT_ROOT);
|
|
|
|
if (files.length === 0) {
|
|
console.log("Aucun article Markdown trouvé sous content/.");
|
|
return;
|
|
}
|
|
|
|
const changed = [];
|
|
for (const filePath of files) {
|
|
if (processFile(filePath, taxonomyMapping)) {
|
|
changed.push(filePath);
|
|
}
|
|
}
|
|
|
|
if (changed.length > 0) {
|
|
for (const filePath of changed) {
|
|
const rel = path.relative(PROJECT_ROOT, filePath);
|
|
console.log(`✏️ ${rel}`);
|
|
}
|
|
console.log("Des modifications ont été effectuées. Merci de les revoir.");
|
|
process.exit(2);
|
|
} else {
|
|
console.log("Tous les articles sont déjà correctement liés.");
|
|
}
|
|
}
|
|
|
|
function processFile(filePath, taxonomyMapping) {
|
|
let raw;
|
|
try {
|
|
raw = fs.readFileSync(filePath, "utf8");
|
|
} catch (error) {
|
|
console.warn(`⚠️ Impossible de lire ${filePath}: ${error.message}`);
|
|
return false;
|
|
}
|
|
|
|
const match = raw.match(FRONTMATTER_PATTERN);
|
|
if (!match) {
|
|
return false;
|
|
}
|
|
|
|
let frontmatter;
|
|
try {
|
|
frontmatter = yaml.load(match[1]) || {};
|
|
} catch (error) {
|
|
console.warn(`⚠️ Frontmatter invalide dans ${filePath}: ${error.message}`);
|
|
return false;
|
|
}
|
|
|
|
const keywords = extractKeywords(frontmatter, match[1], taxonomyMapping.fieldToCanonical);
|
|
if (keywords.length === 0) {
|
|
return false;
|
|
}
|
|
|
|
const { body, changed } = linkKeywordsInBody(match[2], keywords);
|
|
if (!changed) {
|
|
return false;
|
|
}
|
|
|
|
const prefixLength = raw.length - match[2].length;
|
|
const updated = raw.slice(0, prefixLength) + body;
|
|
fs.writeFileSync(filePath, updated, "utf8");
|
|
return true;
|
|
}
|
|
|
|
function loadTaxonomyMapping(configPath) {
|
|
let raw;
|
|
try {
|
|
raw = fs.readFileSync(configPath, "utf8");
|
|
} catch (error) {
|
|
console.error(`Impossible de lire ${configPath}: ${error.message}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
let data;
|
|
try {
|
|
data = yaml.load(raw) || {};
|
|
} catch (error) {
|
|
console.error(`YAML invalide dans ${configPath}: ${error.message}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
if (typeof data !== "object" || data === null) {
|
|
console.error(`Format inattendu dans ${configPath}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const fieldToCanonical = new Map();
|
|
for (const [singular, plural] of Object.entries(data)) {
|
|
const canonicalName =
|
|
typeof plural === "string" && plural.trim().length > 0 ? plural.trim() : singular.trim();
|
|
if (!canonicalName) continue;
|
|
const candidates = new Set([singular, canonicalName].filter(Boolean));
|
|
for (const name of candidates) {
|
|
fieldToCanonical.set(name, canonicalName);
|
|
}
|
|
}
|
|
|
|
if (fieldToCanonical.size === 0) {
|
|
console.error("Aucune taxonomie n'est définie dans la configuration.");
|
|
process.exit(1);
|
|
}
|
|
|
|
return { fieldToCanonical };
|
|
}
|
|
|
|
function collectMarkdownFiles(root) {
|
|
const files = [];
|
|
walk(root, files);
|
|
return files.sort((a, b) => a.localeCompare(b));
|
|
}
|
|
|
|
function walk(dir, bucket) {
|
|
let entries;
|
|
try {
|
|
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
} catch (error) {
|
|
console.warn(`⚠️ Impossible de parcourir ${dir}: ${error.message}`);
|
|
return;
|
|
}
|
|
|
|
for (const entry of entries) {
|
|
if (entry.name === ".git" || entry.name === "node_modules") {
|
|
continue;
|
|
}
|
|
const absolute = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
walk(absolute, bucket);
|
|
} else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
|
|
bucket.push(absolute);
|
|
}
|
|
}
|
|
}
|
|
|
|
function extractKeywords(frontmatter, frontmatterRaw, fieldToCanonical) {
|
|
const keywords = [];
|
|
const seen = new Set();
|
|
|
|
function addKeyword(taxonomy, term) {
|
|
if (!taxonomy || typeof term !== "string") return;
|
|
const normalized = term.trim();
|
|
if (!normalized) return;
|
|
const key = `${taxonomy}::${normalized.toLowerCase()}`;
|
|
if (seen.has(key)) return;
|
|
const slug = slugify(normalized);
|
|
if (!slug) return;
|
|
seen.add(key);
|
|
keywords.push({
|
|
taxonomy,
|
|
term: normalized,
|
|
url: `/${taxonomy}/${slug}/`,
|
|
});
|
|
}
|
|
|
|
if (typeof frontmatter === "object" && frontmatter !== null) {
|
|
for (const [field, value] of Object.entries(frontmatter)) {
|
|
const canonical = fieldToCanonical.get(field);
|
|
if (!canonical) continue;
|
|
const terms = normalizeTerms(value);
|
|
for (const term of terms) {
|
|
addKeyword(canonical, term);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const entry of extractCommentedTerms(frontmatterRaw, fieldToCanonical)) {
|
|
addKeyword(entry.taxonomy, entry.term);
|
|
}
|
|
|
|
return keywords;
|
|
}
|
|
|
|
function normalizeTerms(value) {
|
|
if (Array.isArray(value)) {
|
|
return value.map((item) => normalizeTerm(item)).filter(Boolean);
|
|
}
|
|
const single = normalizeTerm(value);
|
|
return single ? [single] : [];
|
|
}
|
|
|
|
function normalizeTerm(value) {
|
|
if (typeof value !== "string") return null;
|
|
const trimmed = value.trim();
|
|
return trimmed.length > 0 ? trimmed : null;
|
|
}
|
|
|
|
function extractCommentedTerms(frontmatterRaw, fieldToCanonical) {
|
|
if (typeof frontmatterRaw !== "string" || frontmatterRaw.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const results = [];
|
|
const lines = frontmatterRaw.split(/\r?\n/);
|
|
let currentCanonical = null;
|
|
let currentIndent = 0;
|
|
|
|
for (const line of lines) {
|
|
const indent = getIndentation(line);
|
|
const fieldMatch = line.match(/^\s*([A-Za-z0-9_]+):\s*(?:#.*)?$/);
|
|
if (fieldMatch) {
|
|
const fieldName = fieldMatch[1];
|
|
currentCanonical = fieldToCanonical.get(fieldName) || null;
|
|
currentIndent = indent;
|
|
continue;
|
|
}
|
|
|
|
if (!currentCanonical) continue;
|
|
const commentMatch = line.match(/^\s*#\s*-\s+(.*)$/);
|
|
if (!commentMatch) continue;
|
|
if (indent <= currentIndent) continue;
|
|
const term = commentMatch[1].trim();
|
|
if (!term) continue;
|
|
results.push({ taxonomy: currentCanonical, term });
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
function linkKeywordsInBody(body, keywords) {
|
|
if (typeof body !== "string" || body.length === 0 || keywords.length === 0) {
|
|
return { body, changed: false };
|
|
}
|
|
|
|
let updated = body;
|
|
let changed = false;
|
|
let linkRanges = computeLinkRanges(updated);
|
|
|
|
for (const keyword of keywords) {
|
|
const occurrence = findKeywordOccurrence(updated, keyword.term, linkRanges);
|
|
if (!occurrence) continue;
|
|
const expanded = includeFormattingCharacters(updated, occurrence.start, occurrence.end);
|
|
const before = updated.slice(0, expanded.start);
|
|
const label = updated.slice(expanded.start, expanded.end);
|
|
const after = updated.slice(expanded.end);
|
|
updated = `${before}[${label}](${keyword.url})${after}`;
|
|
changed = true;
|
|
linkRanges = computeLinkRanges(updated);
|
|
}
|
|
|
|
return { body: updated, changed };
|
|
}
|
|
|
|
function findKeywordOccurrence(text, keyword, linkRanges) {
|
|
if (!keyword) return null;
|
|
const escaped = escapeRegExp(keyword);
|
|
if (!escaped) return null;
|
|
const regex = new RegExp(escaped, "giu");
|
|
let match;
|
|
|
|
while ((match = regex.exec(text)) !== null) {
|
|
const start = match.index;
|
|
const end = start + match[0].length;
|
|
if (isInsideExistingLink(start, end, linkRanges)) {
|
|
continue;
|
|
}
|
|
if (!hasWordBoundaries(text, start, end)) {
|
|
continue;
|
|
}
|
|
return { start, end, text: match[0] };
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function computeLinkRanges(text) {
|
|
const ranges = [];
|
|
if (typeof text !== "string" || text.length === 0) {
|
|
return ranges;
|
|
}
|
|
|
|
for (let i = 0; i < text.length; i++) {
|
|
let isImage = false;
|
|
if (text[i] === "!" && text[i + 1] === "[") {
|
|
isImage = true;
|
|
i += 1;
|
|
}
|
|
if (text[i] !== "[") continue;
|
|
|
|
const openBracket = i;
|
|
const closeBracket = findMatchingPair(text, openBracket, "[", "]");
|
|
if (closeBracket === -1) continue;
|
|
|
|
let pointer = closeBracket + 1;
|
|
while (pointer < text.length && /\s/.test(text[pointer])) pointer++;
|
|
if (pointer >= text.length || text[pointer] !== "(") {
|
|
i = closeBracket;
|
|
continue;
|
|
}
|
|
|
|
const openParen = pointer;
|
|
const closeParen = findMatchingPair(text, openParen, "(", ")");
|
|
if (closeParen === -1) break;
|
|
|
|
ranges.push({
|
|
textStart: openBracket + 1,
|
|
textEnd: closeBracket,
|
|
destStart: openParen + 1,
|
|
destEnd: closeParen,
|
|
isImage,
|
|
});
|
|
i = closeParen;
|
|
}
|
|
|
|
return ranges;
|
|
}
|
|
|
|
function findMatchingPair(text, startIndex, openChar, closeChar) {
|
|
let depth = 0;
|
|
for (let i = startIndex; i < text.length; i++) {
|
|
const ch = text[i];
|
|
if (ch === "\\") {
|
|
i++;
|
|
continue;
|
|
}
|
|
if (ch === openChar) {
|
|
depth++;
|
|
} else if (ch === closeChar) {
|
|
depth--;
|
|
if (depth === 0) {
|
|
return i;
|
|
}
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
function isInsideExistingLink(start, end, ranges) {
|
|
return ranges.some((range) => {
|
|
const overlapsText = start < range.textEnd && end > range.textStart;
|
|
const overlapsDest =
|
|
typeof range.destStart === "number" &&
|
|
typeof range.destEnd === "number" &&
|
|
start < range.destEnd &&
|
|
end > range.destStart;
|
|
return overlapsText || overlapsDest;
|
|
});
|
|
}
|
|
|
|
function hasWordBoundaries(text, start, end) {
|
|
const before = start > 0 ? text[start - 1] : "";
|
|
const after = end < text.length ? text[end] : "";
|
|
const startChar = text[start];
|
|
const endChar = text[end - 1];
|
|
|
|
if (isWordChar(startChar) && isWordChar(before)) {
|
|
return false;
|
|
}
|
|
if (isWordChar(endChar) && isWordChar(after)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
function isWordChar(ch) {
|
|
return Boolean(ch && WORD_CHAR.test(ch));
|
|
}
|
|
|
|
function slugify(value) {
|
|
return value
|
|
.normalize("NFD")
|
|
.replace(/\p{Diacritic}/gu, "")
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, "-")
|
|
.replace(/^-+|-+$/g, "")
|
|
.replace(/-{2,}/g, "-");
|
|
}
|
|
|
|
function escapeRegExp(value) {
|
|
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
}
|
|
|
|
function includeFormattingCharacters(text, start, end) {
|
|
let newStart = start;
|
|
let newEnd = end;
|
|
|
|
for (const marker of INLINE_FORMATTING_CHARS) {
|
|
let prefixCount = 0;
|
|
while (newStart - prefixCount - 1 >= 0 && text[newStart - prefixCount - 1] === marker) {
|
|
prefixCount++;
|
|
}
|
|
|
|
let suffixCount = 0;
|
|
while (newEnd + suffixCount < text.length && text[newEnd + suffixCount] === marker) {
|
|
suffixCount++;
|
|
}
|
|
|
|
const count = Math.min(prefixCount, suffixCount);
|
|
if (count > 0) {
|
|
newStart -= count;
|
|
newEnd += count;
|
|
}
|
|
}
|
|
|
|
return { start: newStart, end: newEnd };
|
|
}
|
|
|
|
function getIndentation(line) {
|
|
if (typeof line !== "string" || line.length === 0) return 0;
|
|
const match = line.match(/^\s*/);
|
|
return match ? match[0].length : 0;
|
|
}
|