#!/usr/bin/env node

const fs = require("fs");
const path = require("path");
const yaml = require("js-yaml");
const { sanitizeUrlCandidate } = require("./lib/markdown_links");

const SITE_ROOT = path.resolve(__dirname, "..");
const CONTENT_DIR = path.join(SITE_ROOT, "content");
const TAXONOMIES_FILE = path.join(SITE_ROOT, "config", "_default", "taxonomies.yaml");
const TARGET_EXTENSIONS = new Set([".md", ".markdown", ".mdx", ".yaml", ".yml"]);
const MARKDOWN_EXTENSIONS = new Set([".md", ".markdown", ".mdx"]);
const INTERNAL_LINK_REGEX = /\/[^\s"'`<>\\\[\]{}|]+/g;
const VALID_PREFIX_REGEX = /[\s"'`([<{=:]/;
const PATH_KEY_REGEX = /^\s*(?:"path"|'path'|path)\s*:/i;
const FRONTMATTER_PATTERN = /^---\r?\n([\s\S]+?)\r?\n---\r?\n?/;

function toPosix(value) {
  return value.split(path.sep).join("/");
}

function relativeToSite(filePath) {
  return toPosix(path.relative(SITE_ROOT, filePath));
}

function isTargetFile(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  return TARGET_EXTENSIONS.has(ext);
}

function isMarkdownFile(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  return MARKDOWN_EXTENSIONS.has(ext);
}

function isYamlFile(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  return ext === ".yaml" || ext === ".yml";
}

function collectContentEntries(rootDir) {
  const files = [];
  const directories = new Set(["/"]);

  function walk(currentDir) {
    const entries = fs.readdirSync(currentDir, { withFileTypes: true });
    for (const entry of entries) {
      const fullPath = path.join(currentDir, entry.name);
      if (entry.isDirectory()) {
        const relative = path.relative(rootDir, fullPath);
        const normalized = relative ? `/${toPosix(relative)}` : "/";
        directories.add(normalized);
        walk(fullPath);
      } else if (entry.isFile() && isTargetFile(fullPath)) {
        files.push(fullPath);
      }
    }
  }

  walk(rootDir);
  return { files, directories };
}

function collectTaxonomyKeywordPaths(files) {
  const mapping = loadTaxonomyMapping(TAXONOMIES_FILE);
  if (!mapping) {
    return new Set();
  }

  const keywordPaths = new Set();

  for (const filePath of files) {
    if (!isMarkdownFile(filePath)) {
      continue;
    }

    let raw;
    try {
      raw = fs.readFileSync(filePath, "utf8");
    } catch (error) {
      console.warn(
        `Impossible de lire ${relativeToSite(filePath)} pour extraire les taxonomies (${error.message}).`,
      );
      continue;
    }

    const frontmatterMatch = raw.match(FRONTMATTER_PATTERN);
    if (!frontmatterMatch) {
      continue;
    }

    let frontmatter = {};
    try {
      frontmatter = yaml.load(frontmatterMatch[1]) || {};
    } catch (error) {
      console.warn(`Frontmatter invalide dans ${relativeToSite(filePath)} (${error.message}).`);
      continue;
    }

    const keywords = extractTaxonomyKeywords(
      frontmatter,
      frontmatterMatch[1],
      mapping.fieldToCanonical,
    );
    for (const keyword of keywords) {
      const normalized = normalizeInternalLink(keyword.url);
      if (normalized) {
        keywordPaths.add(normalized);
      }
    }
  }

  return keywordPaths;
}

function loadTaxonomyMapping(configPath) {
  let raw;
  try {
    raw = fs.readFileSync(configPath, "utf8");
  } catch (error) {
    console.warn(`Impossible de lire ${relativeToSite(configPath)} (${error.message}).`);
    return null;
  }

  let data;
  try {
    data = yaml.load(raw) || {};
  } catch (error) {
    console.warn(`YAML invalide dans ${relativeToSite(configPath)} (${error.message}).`);
    return null;
  }

  if (typeof data !== "object" || data === null) {
    console.warn(`Format inattendu dans ${relativeToSite(configPath)}.`);
    return null;
  }

  const fieldToCanonical = new Map();
  for (const [singular, plural] of Object.entries(data)) {
    const canonical =
      typeof plural === "string" && plural.trim().length > 0 ? plural.trim() : singular.trim();
    if (!canonical) continue;
    const candidates = new Set([singular, canonical].filter(Boolean));
    for (const candidate of candidates) {
      fieldToCanonical.set(candidate, canonical);
    }
  }

  if (fieldToCanonical.size === 0) {
    console.warn("Aucune taxonomie valide n'a été trouvée.");
    return null;
  }

  return { fieldToCanonical };
}

function extractTaxonomyKeywords(frontmatter, frontmatterRaw, fieldToCanonical) {
  const keywords = [];
  const seen = new Set();

  function addKeyword(taxonomy, term) {
    if (!taxonomy || typeof term !== "string") return;
    const normalized = term.trim();
    if (!normalized) return;
    const slug = slugify(normalized);
    if (!slug) return;
    const key = `${taxonomy}::${normalized.toLowerCase()}`;
    if (seen.has(key)) return;
    seen.add(key);
    keywords.push({
      taxonomy,
      term: normalized,
      url: `/${taxonomy}/${slug}/`,
    });
  }

  if (typeof frontmatter === "object" && frontmatter !== null) {
    for (const [field, value] of Object.entries(frontmatter)) {
      const canonical = fieldToCanonical.get(field);
      if (!canonical) continue;
      const terms = normalizeTerms(value);
      for (const term of terms) {
        addKeyword(canonical, term);
      }
    }
  }

  for (const entry of extractCommentedTerms(frontmatterRaw, fieldToCanonical)) {
    addKeyword(entry.taxonomy, entry.term);
  }

  return keywords;
}

function normalizeTerms(value) {
  if (Array.isArray(value)) {
    return value.map((item) => normalizeTerm(item)).filter(Boolean);
  }
  const single = normalizeTerm(value);
  return single ? [single] : [];
}

function normalizeTerm(value) {
  if (typeof value !== "string") return null;
  const trimmed = value.trim();
  return trimmed.length > 0 ? trimmed : null;
}

function extractCommentedTerms(frontmatterRaw, fieldToCanonical) {
  if (typeof frontmatterRaw !== "string" || frontmatterRaw.length === 0) {
    return [];
  }

  const results = [];
  const lines = frontmatterRaw.split(/\r?\n/);
  let currentCanonical = null;
  let currentIndent = 0;

  for (const line of lines) {
    const indent = getIndentation(line);
    const fieldMatch = line.match(/^\s*([A-Za-z0-9_]+):\s*(?:#.*)?$/);
    if (fieldMatch) {
      const fieldName = fieldMatch[1];
      currentCanonical = fieldToCanonical.get(fieldName) || null;
      currentIndent = indent;
      continue;
    }

    if (!currentCanonical) continue;
    const commentMatch = line.match(/^\s*#\s*-\s+(.*)$/);
    if (!commentMatch) continue;
    if (indent <= currentIndent) continue;
    const term = commentMatch[1].trim();
    if (!term) continue;
    results.push({ taxonomy: currentCanonical, term });
  }

  return results;
}

function getIndentation(line) {
  if (typeof line !== "string" || line.length === 0) return 0;
  const match = line.match(/^\s*/);
  return match ? match[0].length : 0;
}

function slugify(value) {
  return value
    .normalize("NFD")
    .replace(/\p{Diacritic}/gu, "")
    .toLowerCase()
    .replace(/[^a-z0-9]+/g, "-")
    .replace(/^-+|-+$/g, "")
    .replace(/-{2,}/g, "-");
}

function sanitizeInternalLink(raw) {
  const candidate = sanitizeUrlCandidate(raw);
  if (!candidate) return null;
  if (!candidate.startsWith("/")) return null;
  if (candidate.startsWith("//")) return null;
  if (candidate.includes("://")) return null;
  return candidate;
}

function normalizeInternalLink(link) {
  if (typeof link !== "string" || !link.startsWith("/")) {
    return null;
  }
  let normalized = link.split("?")[0];
  normalized = normalized.split("#")[0];
  normalized = normalized.replace(/\/+/g, "/");
  normalized = normalized.replace(/\/+$/, "");
  if (!normalized) {
    normalized = "/";
  }
  return normalized;
}

function expectedDirForLink(link) {
  if (link === "/") {
    return CONTENT_DIR;
  }
  const relative = link.slice(1);
  const segments = relative.split("/").filter(Boolean);
  return path.join(CONTENT_DIR, ...segments);
}

function countRepeatedChar(text, startIndex, char) {
  let count = 0;
  while (text[startIndex + count] === char) {
    count++;
  }
  return count;
}

function findMatchingPair(text, startIndex, openChar, closeChar) {
  let depth = 0;
  for (let i = startIndex; i < text.length; i++) {
    const ch = text[i];
    if (ch === "\\") {
      i++;
      continue;
    }
    if (ch === openChar) {
      depth++;
    } else if (ch === closeChar) {
      depth--;
      if (depth === 0) {
        return i;
      }
    }
  }
  return -1;
}

function extractMarkdownLinksFromLine(line) {
  const results = [];
  let inlineFence = null;

  for (let i = 0; i < line.length; i++) {
    const ch = line[i];

    if (ch === "`") {
      const runLength = countRepeatedChar(line, i, "`");
      if (!inlineFence) {
        inlineFence = runLength;
      } else if (inlineFence === runLength) {
        inlineFence = null;
      }
      i += runLength - 1;
      continue;
    }

    if (inlineFence) {
      continue;
    }

    if (ch !== "[") {
      continue;
    }

    const closeBracket = findMatchingPair(line, i, "[", "]");
    if (closeBracket === -1) {
      break;
    }

    let pointer = closeBracket + 1;
    while (pointer < line.length && /\s/.test(line[pointer])) {
      pointer++;
    }
    if (pointer >= line.length || line[pointer] !== "(") {
      i = closeBracket;
      continue;
    }

    const closeParen = findMatchingPair(line, pointer, "(", ")");
    if (closeParen === -1) {
      break;
    }

    const destination = line.slice(pointer + 1, closeParen);
    results.push({ destination });
    i = closeParen;
  }

  return results;
}

function extractInternalLinks(filePath) {
  const content = fs.readFileSync(filePath, "utf8");
  const lines = content.split(/\r?\n/);
  const entries = [];
  const skipPathKey = isYamlFile(filePath);
  const treatAsMarkdown = isMarkdownFile(filePath);
  let fenceDelimiter = null;
  let inFrontMatter = false;

  for (let index = 0; index < lines.length; index++) {
    const line = lines[index];
    const trimmed = line.trim();

    if (treatAsMarkdown) {
      if (index === 0 && trimmed === "---") {
        inFrontMatter = true;
        continue;
      }
      if (inFrontMatter) {
        if (trimmed === "---") {
          inFrontMatter = false;
        }
        continue;
      }

      const fenceMatch = trimmed.match(/^(```+|~~~+)/);
      if (fenceMatch) {
        const delimiterChar = fenceMatch[1][0];
        if (!fenceDelimiter) {
          fenceDelimiter = delimiterChar;
        } else if (delimiterChar === fenceDelimiter) {
          fenceDelimiter = null;
        }
        continue;
      }

      if (fenceDelimiter) {
        continue;
      }

      const markdownLinks = extractMarkdownLinksFromLine(line);
      for (const { destination } of markdownLinks) {
        const sanitized = sanitizeInternalLink(destination);
        if (!sanitized) continue;
        const normalized = normalizeInternalLink(sanitized);
        if (!normalized || normalized === "//") continue;
        entries.push({ link: normalized, line: index + 1 });
      }
      continue;
    }

    if (skipPathKey && PATH_KEY_REGEX.test(line)) {
      continue;
    }

    for (const match of line.matchAll(INTERNAL_LINK_REGEX)) {
      const raw = match[0];
      const startIndex = match.index ?? line.indexOf(raw);
      if (startIndex > 0) {
        const prevChar = line[startIndex - 1];
        if (!VALID_PREFIX_REGEX.test(prevChar)) {
          continue;
        }
      }
      const sanitized = sanitizeInternalLink(raw);
      if (!sanitized) continue;
      const normalized = normalizeInternalLink(sanitized);
      if (!normalized || normalized === "//") continue;
      entries.push({ link: normalized, line: index + 1 });
    }
  }

  return entries;
}

function addMissingLink(missingMap, link, filePath, line) {
  let entry = missingMap.get(link);
  if (!entry) {
    entry = {
      expectedPath: expectedDirForLink(link),
      references: [],
      referenceKeys: new Set(),
    };
    missingMap.set(link, entry);
  }

  const referenceKey = `${filePath}:${line}`;
  if (entry.referenceKeys.has(referenceKey)) {
    return;
  }

  entry.referenceKeys.add(referenceKey);
  entry.references.push({
    file: relativeToSite(filePath),
    line,
  });
}

function main() {
  if (!fs.existsSync(CONTENT_DIR)) {
    console.error(`Le dossier content est introuvable (${CONTENT_DIR}).`);
    process.exit(1);
  }

  const { files, directories } = collectContentEntries(CONTENT_DIR);
  const taxonomyPaths = collectTaxonomyKeywordPaths(files);
  for (const keywordPath of taxonomyPaths) {
    directories.add(keywordPath);
  }
  const missingLinks = new Map();

  for (const filePath of files) {
    let entries;
    try {
      entries = extractInternalLinks(filePath);
    } catch (error) {
      console.warn(`Impossible de lire ${relativeToSite(filePath)} (${error.message}).`);
      continue;
    }

    for (const { link, line } of entries) {
      if (directories.has(link)) {
        continue;
      }
      addMissingLink(missingLinks, link, filePath, line);
    }
  }

  if (missingLinks.size === 0) {
    console.log("Tous les liens internes pointent vers un dossier existant.");
    return;
  }

  console.error(`Liens internes cassés détectés: ${missingLinks.size}`);
  const sorted = Array.from(missingLinks.entries()).sort((a, b) => a[0].localeCompare(b[0], "fr"));

  for (const [link, data] of sorted) {
    const expectedRelative = relativeToSite(data.expectedPath);
    console.error(`- ${link} (attendu: ${expectedRelative})`);
    for (const reference of data.references) {
      console.error(`    • ${reference.file}:${reference.line}`);
    }
  }

  process.exitCode = 1;
}

if (require.main === module) {
  try {
    main();
  } catch (error) {
    console.error(`Erreur lors de la vérification des liens internes: ${error.message}`);
    process.exit(1);
  }
}