Ajoute un outil de gestion interactive des liens morts
This commit is contained in:
@@ -1,37 +1,167 @@
|
||||
const ARCHIVE_API_URL = "https://archive.org/wayback/available?url=";
|
||||
const { fetch } = require("undici");
|
||||
|
||||
const ARCHIVE_CDX_URL = "https://web.archive.org/cdx/search/cdx";
|
||||
const ARCHIVE_SAVE_URL = "https://web.archive.org/save/";
|
||||
const ARCHIVE_REQUEST_TIMEOUT_MS = 15000;
|
||||
|
||||
/**
|
||||
* Check if a given URL exists in Archive.org.
|
||||
* @param {string} url - The URL to check.
|
||||
* @returns {Promise<string|null>} - The archive URL if found, otherwise null.
|
||||
* Construit l'URL publique d'une capture Wayback.
|
||||
* @param {string} originalUrl URL d'origine.
|
||||
* @param {string} timestamp Horodatage Wayback.
|
||||
* @returns {string} URL archive.org utilisable directement.
|
||||
*/
|
||||
function buildArchiveCaptureUrl(originalUrl, timestamp) {
|
||||
return `https://web.archive.org/web/${timestamp}/${originalUrl}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Borne une valeur numerique a un entier strictement positif.
|
||||
* @param {unknown} value Valeur a verifier.
|
||||
* @param {number} fallback Valeur par defaut.
|
||||
* @returns {number} Entier positif.
|
||||
*/
|
||||
function normalizePositiveInteger(value, fallback) {
|
||||
const parsed = Number.parseInt(String(value), 10);
|
||||
if (Number.isNaN(parsed)) {
|
||||
return fallback;
|
||||
}
|
||||
if (parsed <= 0) {
|
||||
return fallback;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Charge un document JSON Archive.org avec un delai maximal.
|
||||
* @param {string|URL} url URL a appeler.
|
||||
* @returns {Promise<unknown>} Document JSON decode.
|
||||
*/
|
||||
async function fetchArchiveJson(url) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), ARCHIVE_REQUEST_TIMEOUT_MS);
|
||||
const response = await fetch(url, { signal: controller.signal }).finally(() => clearTimeout(timer));
|
||||
if (!response.ok) {
|
||||
throw new Error(`Erreur de l'API Archive.org (${response.status})`);
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Liste les captures Wayback recentes disponibles pour une URL.
|
||||
* @param {string} url URL d'origine a rechercher.
|
||||
* @param {{ limit?: number }} options Options de requete.
|
||||
* @returns {Promise<Array<{ timestamp: string, originalUrl: string, statusCode: number|null, mimetype: string|null, url: string }>>}
|
||||
*/
|
||||
async function listArchiveCaptures(url, options = {}) {
|
||||
const limit = normalizePositiveInteger(options.limit, 10);
|
||||
const requestUrl = new URL(ARCHIVE_CDX_URL);
|
||||
requestUrl.searchParams.set("url", url);
|
||||
requestUrl.searchParams.set("output", "json");
|
||||
requestUrl.searchParams.set("fl", "timestamp,original,statuscode,mimetype,digest");
|
||||
requestUrl.searchParams.set("filter", "statuscode:200");
|
||||
requestUrl.searchParams.set("collapse", "digest");
|
||||
requestUrl.searchParams.set("fastLatest", "true");
|
||||
requestUrl.searchParams.set("limit", `-${limit}`);
|
||||
|
||||
const rows = await fetchArchiveJson(requestUrl);
|
||||
if (!Array.isArray(rows)) {
|
||||
return [];
|
||||
}
|
||||
if (rows.length <= 1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const header = rows[0];
|
||||
if (!Array.isArray(header)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const timestampIndex = header.indexOf("timestamp");
|
||||
const originalIndex = header.indexOf("original");
|
||||
const statusCodeIndex = header.indexOf("statuscode");
|
||||
const mimetypeIndex = header.indexOf("mimetype");
|
||||
|
||||
const captures = [];
|
||||
for (const row of rows.slice(1)) {
|
||||
if (!Array.isArray(row)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const timestamp = row[timestampIndex];
|
||||
const originalUrl = row[originalIndex];
|
||||
if (typeof timestamp !== "string") {
|
||||
continue;
|
||||
}
|
||||
if (typeof originalUrl !== "string") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let statusCode = null;
|
||||
if (statusCodeIndex > -1) {
|
||||
const parsedStatusCode = Number.parseInt(row[statusCodeIndex], 10);
|
||||
if (!Number.isNaN(parsedStatusCode)) {
|
||||
statusCode = parsedStatusCode;
|
||||
}
|
||||
}
|
||||
|
||||
let mimetype = null;
|
||||
if (mimetypeIndex > -1) {
|
||||
const rawMimetype = row[mimetypeIndex];
|
||||
if (typeof rawMimetype === "string" && rawMimetype.trim()) {
|
||||
mimetype = rawMimetype.trim();
|
||||
}
|
||||
}
|
||||
|
||||
captures.push({
|
||||
timestamp,
|
||||
originalUrl,
|
||||
statusCode,
|
||||
mimetype,
|
||||
url: buildArchiveCaptureUrl(originalUrl, timestamp),
|
||||
});
|
||||
}
|
||||
|
||||
captures.sort((left, right) => right.timestamp.localeCompare(left.timestamp));
|
||||
return captures.slice(0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retourne la capture la plus recente disponible pour une URL.
|
||||
* @param {string} url URL d'origine.
|
||||
* @returns {Promise<string|null>} URL archive.org, ou null si aucune capture n'existe.
|
||||
*/
|
||||
async function getArchiveUrl(url) {
|
||||
try {
|
||||
const response = await fetch(`${ARCHIVE_API_URL}${encodeURIComponent(url)}`);
|
||||
if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
|
||||
const data = await response.json();
|
||||
return data.archived_snapshots?.closest?.url || null;
|
||||
} catch (error) {
|
||||
console.error(`❌ Archive.org API error: ${error.message}`);
|
||||
const captures = await listArchiveCaptures(url, { limit: 1 });
|
||||
if (captures.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return captures[0].url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request Archive.org to save the given URL.
|
||||
* @param {string} url - The URL to archive.
|
||||
* @returns {Promise<string|null>} - The permalink of the archived page if successful, otherwise null.
|
||||
* Demande a Archive.org d'archiver une URL.
|
||||
* @param {string} url URL a archiver.
|
||||
* @returns {Promise<string|null>} URL finale de la capture si disponible.
|
||||
*/
|
||||
async function saveToArchive(url) {
|
||||
try {
|
||||
const response = await fetch(`${ARCHIVE_SAVE_URL}${encodeURIComponent(url)}`, { method: "POST" });
|
||||
if (!response.ok) throw new Error(`HTTP error! Status: ${response.status}`);
|
||||
return response.url.includes("/save/") ? null : response.url;
|
||||
} catch (error) {
|
||||
console.error(`❌ Failed to save URL to Archive.org: ${error.message}`);
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), ARCHIVE_REQUEST_TIMEOUT_MS);
|
||||
const response = await fetch(`${ARCHIVE_SAVE_URL}${encodeURIComponent(url)}`, {
|
||||
method: "POST",
|
||||
signal: controller.signal,
|
||||
}).finally(() => clearTimeout(timer));
|
||||
if (!response.ok) {
|
||||
throw new Error(`Erreur de sauvegarde Archive.org (${response.status})`);
|
||||
}
|
||||
if (response.url.includes("/save/")) {
|
||||
return null;
|
||||
}
|
||||
return response.url;
|
||||
}
|
||||
|
||||
module.exports = { getArchiveUrl, saveToArchive };
|
||||
module.exports = {
|
||||
buildArchiveCaptureUrl,
|
||||
listArchiveCaptures,
|
||||
getArchiveUrl,
|
||||
saveToArchive,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user