From c17f07d2f9a7d7a58ffb7da75a9134fa1c457bcc Mon Sep 17 00:00:00 2001 From: Richard Dern Date: Thu, 11 Sep 2025 00:04:43 +0200 Subject: [PATCH] tools/add_lego.js: do not overwrite index.md; add image refs with alt/title; skip redundant downloads; log actions; report unreferenced images; add 1-week JSON cache with --no-cache option --- tools/add_lego.js | 228 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 202 insertions(+), 26 deletions(-) diff --git a/tools/add_lego.js b/tools/add_lego.js index 5d860a48..d1c9e202 100644 --- a/tools/add_lego.js +++ b/tools/add_lego.js @@ -12,7 +12,8 @@ - Create data/images/ - Download all available set images from Rebrickable - Create data/images/.yaml with attribution - - Create index.md with title and date + - Create index.md with title and date (only if not existing), + including markdown references to images found */ const fs = require('fs/promises'); @@ -25,6 +26,7 @@ const ROOT = process.cwd(); const LEGO_ROOT = path.join(ROOT, 'content', 'collections', 'lego'); const CONFIG_PATH = path.join(ROOT, 'tools', 'config.json'); const CACHE_ROOT = path.join(ROOT, 'tools', 'cache', 'rebrickable'); +const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000; function loadConfig() { const raw = fsSync.readFileSync(CONFIG_PATH, 'utf8'); @@ -86,10 +88,85 @@ async function saveJson(relPath, data) { await fs.writeFile(out, JSON.stringify(data, null, 2), 'utf8'); } +async function cachedApiJson(relPath, host, apiPath, apiKey, label, options = {}) { + const full = path.join(CACHE_ROOT, relPath); + if (options.noCache) { + const json = await apiGetJson(host, apiPath, apiKey); + await ensureDir(path.dirname(full)); + await fs.writeFile(full, JSON.stringify(json, null, 2), 'utf8'); + if (label) console.log(`[fetch] ${label} <- ${apiPath} [nocache]`); + return json; + } + try { + const st = await fs.stat(full); + const ageMs = Date.now() - st.mtimeMs; + if (ageMs < ONE_WEEK_MS) { + try { + const raw = await fs.readFile(full, 'utf8'); + const json = JSON.parse(raw); + if (label) console.log(`[cache] ${label}`); + return json; + } catch (_) { + // Corrupt cache, fall through to refetch + if (label) console.log(`[stale] ${label} (cache unreadable) -> refetch`); + } + } else { + if (label) console.log(`[stale] ${label} (age ${(ageMs / ONE_WEEK_MS).toFixed(2)} weeks) -> refetch`); + } + } catch (_) { + // No cache, fetch + } + + const json = await apiGetJson(host, apiPath, apiKey); + await ensureDir(path.dirname(full)); + await fs.writeFile(full, JSON.stringify(json, null, 2), 'utf8'); + if (label) console.log(`[fetch] ${label} <- ${apiPath}`); + return json; +} + +function escapeMarkdownAlt(text) { + return String(text || '') + .replace(/\r?\n/g, ' ') + .replace(/\\/g, '\\\\') + .replace(/\[/g, '\\[') + .replace(/\]/g, '\\]'); +} + +async function readYamlTitle(yamlPath) { + try { + const content = await fs.readFile(yamlPath, 'utf8'); + // Match title: "..." or title: '...' or title: text + const m = content.match(/^\s*title\s*:\s*(?:\"([^\"]*)\"|'([^']*)'|([^\n\r#]+))/m); + if (!m) return null; + const val = (m[1] || m[2] || m[3] || '').trim(); + return val; + } catch (_) { + return null; + } +} + async function main() { - const query = process.argv[2]; + // Args parsing: support --no-cache and a free-form query + const argv = process.argv.slice(2); + let noCache = false; + const parts = []; + for (const a of argv) { + if (a === '--no-cache' || a === '--nocache' || a === '--fresh') { + noCache = true; + } else if (a === '--') { + // everything after is query literal + const idx = argv.indexOf('--'); + parts.push(argv.slice(idx + 1).join(' ')); + break; + } else if (a.startsWith('--')) { + // ignore unknown flags for now + } else { + parts.push(a); + } + } + const query = parts.join(' ').trim(); if (!query) { - console.log('Usage: node tools/add_lego.js '); + console.log('Usage: node tools/add_lego.js [--no-cache] '); process.exit(1); } @@ -97,11 +174,17 @@ async function main() { const apiKey = config.rebrickable.apiKey; const host = 'rebrickable.com'; - // 1) Search + // 1) Search (cached ≤ 1 week) const searchPath = `/api/v3/lego/sets/?search=${encodeURIComponent(query)}&page_size=25`; - const search = await apiGetJson(host, searchPath, apiKey); + const search = await cachedApiJson( + `search/${encodeURIComponent(query)}.json`, + host, + searchPath, + apiKey, + `search:${query}`, + { noCache } + ); const results = search.results || []; - await saveJson(`search/${encodeURIComponent(query)}.json`, search); if (results.length === 0) { console.log('No sets found for query:', query); @@ -124,12 +207,24 @@ async function main() { const setNum = selected.set_num; // e.g., 10283-1 const setId = (setNum || '').split('-')[0]; - // 3) Fetch exact set and theme - const setDetails = await apiGetJson(host, `/api/v3/lego/sets/${encodeURIComponent(setNum)}/`, apiKey); - await saveJson(`sets/${setNum}.json`, setDetails); + // 3) Fetch exact set and theme (cached ≤ 1 week) + const setDetails = await cachedApiJson( + `sets/${setNum}.json`, + host, + `/api/v3/lego/sets/${encodeURIComponent(setNum)}/`, + apiKey, + `set:${setNum}`, + { noCache } + ); const themeId = setDetails.theme_id || selected.theme_id; - const theme = await apiGetJson(host, `/api/v3/lego/themes/${themeId}/`, apiKey); - await saveJson(`themes/${themeId}.json`, theme); + const theme = await cachedApiJson( + `themes/${themeId}.json`, + host, + `/api/v3/lego/themes/${themeId}/`, + apiKey, + `theme:${themeId}`, + { noCache } + ); const themeName = theme.name; const themeSlug = slugify(themeName); @@ -142,6 +237,7 @@ async function main() { await ensureDir(dataImagesDir); // Images provided by Rebrickable (set images) + const downloadedImages = new Set(); const imageUrls = []; if (setDetails.set_img_url) imageUrls.push(setDetails.set_img_url); if (setDetails.set_img_url_2) imageUrls.push(setDetails.set_img_url_2); @@ -150,7 +246,13 @@ async function main() { const urlObj = new URL(url); const base = path.basename(urlObj.pathname); const dest = path.join(imagesDir, base); - await downloadFile(url, dest); + if (fsSync.existsSync(dest)) { + console.log(`[skip] image exists: ${path.relative(ROOT, dest)}`); + } else { + await downloadFile(url, dest); + downloadedImages.add(base); + console.log(`[download] ${url} -> ${path.relative(ROOT, dest)}`); + } const nameNoExt = base.replace(/\.[^.]+$/, ''); const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`); @@ -161,17 +263,29 @@ async function main() { } // Minifigs images (stored directly in images/) - const figsResp = await apiGetJson(host, `/api/v3/lego/sets/${encodeURIComponent(setNum)}/minifigs/?page_size=1000`, apiKey); + const figsResp = await cachedApiJson( + `sets/${setNum}_minifigs.json`, + host, + `/api/v3/lego/sets/${encodeURIComponent(setNum)}/minifigs/?page_size=1000`, + apiKey, + `minifigs:${setNum}`, + { noCache } + ); const figs = figsResp.results || []; - await saveJson(`sets/${setNum}_minifigs.json`, figsResp); if (figs.length > 0) { for (const fig of figs) { const figNum = fig.fig_num || fig.set_num; let figImg = fig.set_img_url || fig.fig_img_url; let figTitle = fig.name || ''; if (!figImg && figNum) { - const figDetails = await apiGetJson(host, `/api/v3/lego/minifigs/${encodeURIComponent(figNum)}/`, apiKey); - await saveJson(`minifigs/${figNum}.json`, figDetails); + const figDetails = await cachedApiJson( + `minifigs/${figNum}.json`, + host, + `/api/v3/lego/minifigs/${encodeURIComponent(figNum)}/`, + apiKey, + `minifig:${figNum}`, + { noCache } + ); figImg = figDetails.set_img_url || figDetails.fig_img_url; figTitle = figTitle || figDetails.name || ''; } @@ -180,7 +294,13 @@ async function main() { const urlObj = new URL(figImg); const base = path.basename(urlObj.pathname); const dest = path.join(imagesDir, base); - await downloadFile(figImg, dest); + if (fsSync.existsSync(dest)) { + console.log(`[skip] image exists: ${path.relative(ROOT, dest)}`); + } else { + await downloadFile(figImg, dest); + downloadedImages.add(base); + console.log(`[download] ${figImg} -> ${path.relative(ROOT, dest)}`); + } const nameNoExt = base.replace(/\.[^.]+$/, ''); const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`); @@ -190,15 +310,71 @@ async function main() { } } - // index.md - const title = setDetails.name || selected.name; - const today = new Date(); - const date = today.toISOString().slice(0, 10); - const md = `---\n` + - `title: "${title.replace(/"/g, '\\"')}"\n` + - `date: ${date}\n` + - `---\n`; - await fs.writeFile(path.join(setDir, 'index.md'), md, 'utf8'); + // index.md (do not overwrite if exists) + const indexPath = path.join(setDir, 'index.md'); + const indexExists = fsSync.existsSync(indexPath); + if (!indexExists) { + const pageTitle = setDetails.name || selected.name; + const today = new Date(); + const date = today.toISOString().slice(0, 10); + + // Collect images present in imagesDir (existing or just downloaded) + let imageFiles = []; + try { + const allFiles = await fs.readdir(imagesDir); + imageFiles = allFiles.filter((f) => /\.(png|jpe?g|gif|webp)$/i.test(f)); + } catch (_) { + imageFiles = Array.from(downloadedImages); + } + + let body = ''; + if (imageFiles.length > 0) { + const ordered = [ + ...Array.from(downloadedImages).filter((b) => imageFiles.includes(b)), + ...imageFiles.filter((b) => !downloadedImages.has(b)), + ]; + const lines = []; + for (const base of ordered) { + const nameNoExt = base.replace(/\.[^.]+$/, ''); + const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`); + const altRaw = (await readYamlTitle(yamlPath)) || `${pageTitle}`; + const altEsc = escapeMarkdownAlt(altRaw); + const titleAttr = altRaw.replace(/\"/g, '\\"'); + lines.push(`\n![${altEsc}](images/${base} \"${titleAttr}\")`); + } + body = lines.join('\n') + '\n'; + } + + const md = `---\n` + + `title: "${pageTitle.replace(/"/g, '\\"')}"\n` + + `date: ${date}\n` + + `---\n\n` + + body; + await fs.writeFile(indexPath, md, 'utf8'); + console.log(`[create] ${path.relative(ROOT, indexPath)} with ${imageFiles.length} image reference(s)`); + } + + // Report downloaded images that are not referenced in an existing index.md + if (indexExists && downloadedImages.size > 0) { + try { + const mdContent = await fs.readFile(indexPath, 'utf8'); + const refSet = new Set(); + const imgRegex = /!\[[^\]]*\]\(([^)]+)\)/g; // capture URL inside () + let m; + while ((m = imgRegex.exec(mdContent)) !== null) { + const p = m[1].trim(); + const base = path.basename(p.split('?')[0].split('#')[0]); + if (base) refSet.add(base); + } + const notRef = Array.from(downloadedImages).filter((b) => !refSet.has(b)); + if (notRef.length > 0) { + console.log(`\n[note] ${notRef.length} downloaded image(s) not referenced in index.md:`); + notRef.forEach((b) => console.log(` - images/${b}`)); + } + } catch (e) { + console.log(`[warn] could not analyze index.md references: ${e.message}`); + } + } console.log(`\nAdded set ${setNum} → ${path.relative(ROOT, setDir)}`); }