1

tools/add_lego.js: do not overwrite index.md; add image refs with alt/title; skip redundant downloads; log actions; report unreferenced images; add 1-week JSON cache with --no-cache option

This commit is contained in:
2025-09-11 00:04:43 +02:00
parent 9fc8493ee7
commit c17f07d2f9

View File

@@ -12,7 +12,8 @@
- Create data/images/
- Download all available set images from Rebrickable
- Create data/images/<image-name>.yaml with attribution
- Create index.md with title and date
- Create index.md with title and date (only if not existing),
including markdown references to images found
*/
const fs = require('fs/promises');
@@ -25,6 +26,7 @@ const ROOT = process.cwd();
const LEGO_ROOT = path.join(ROOT, 'content', 'collections', 'lego');
const CONFIG_PATH = path.join(ROOT, 'tools', 'config.json');
const CACHE_ROOT = path.join(ROOT, 'tools', 'cache', 'rebrickable');
const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
function loadConfig() {
const raw = fsSync.readFileSync(CONFIG_PATH, 'utf8');
@@ -86,10 +88,85 @@ async function saveJson(relPath, data) {
await fs.writeFile(out, JSON.stringify(data, null, 2), 'utf8');
}
async function cachedApiJson(relPath, host, apiPath, apiKey, label, options = {}) {
const full = path.join(CACHE_ROOT, relPath);
if (options.noCache) {
const json = await apiGetJson(host, apiPath, apiKey);
await ensureDir(path.dirname(full));
await fs.writeFile(full, JSON.stringify(json, null, 2), 'utf8');
if (label) console.log(`[fetch] ${label} <- ${apiPath} [nocache]`);
return json;
}
try {
const st = await fs.stat(full);
const ageMs = Date.now() - st.mtimeMs;
if (ageMs < ONE_WEEK_MS) {
try {
const raw = await fs.readFile(full, 'utf8');
const json = JSON.parse(raw);
if (label) console.log(`[cache] ${label}`);
return json;
} catch (_) {
// Corrupt cache, fall through to refetch
if (label) console.log(`[stale] ${label} (cache unreadable) -> refetch`);
}
} else {
if (label) console.log(`[stale] ${label} (age ${(ageMs / ONE_WEEK_MS).toFixed(2)} weeks) -> refetch`);
}
} catch (_) {
// No cache, fetch
}
const json = await apiGetJson(host, apiPath, apiKey);
await ensureDir(path.dirname(full));
await fs.writeFile(full, JSON.stringify(json, null, 2), 'utf8');
if (label) console.log(`[fetch] ${label} <- ${apiPath}`);
return json;
}
function escapeMarkdownAlt(text) {
return String(text || '')
.replace(/\r?\n/g, ' ')
.replace(/\\/g, '\\\\')
.replace(/\[/g, '\\[')
.replace(/\]/g, '\\]');
}
async function readYamlTitle(yamlPath) {
try {
const content = await fs.readFile(yamlPath, 'utf8');
// Match title: "..." or title: '...' or title: text
const m = content.match(/^\s*title\s*:\s*(?:\"([^\"]*)\"|'([^']*)'|([^\n\r#]+))/m);
if (!m) return null;
const val = (m[1] || m[2] || m[3] || '').trim();
return val;
} catch (_) {
return null;
}
}
async function main() {
const query = process.argv[2];
// Args parsing: support --no-cache and a free-form query
const argv = process.argv.slice(2);
let noCache = false;
const parts = [];
for (const a of argv) {
if (a === '--no-cache' || a === '--nocache' || a === '--fresh') {
noCache = true;
} else if (a === '--') {
// everything after is query literal
const idx = argv.indexOf('--');
parts.push(argv.slice(idx + 1).join(' '));
break;
} else if (a.startsWith('--')) {
// ignore unknown flags for now
} else {
parts.push(a);
}
}
const query = parts.join(' ').trim();
if (!query) {
console.log('Usage: node tools/add_lego.js <query|set-id>');
console.log('Usage: node tools/add_lego.js [--no-cache] <query|set-id>');
process.exit(1);
}
@@ -97,11 +174,17 @@ async function main() {
const apiKey = config.rebrickable.apiKey;
const host = 'rebrickable.com';
// 1) Search
// 1) Search (cached ≤ 1 week)
const searchPath = `/api/v3/lego/sets/?search=${encodeURIComponent(query)}&page_size=25`;
const search = await apiGetJson(host, searchPath, apiKey);
const search = await cachedApiJson(
`search/${encodeURIComponent(query)}.json`,
host,
searchPath,
apiKey,
`search:${query}`,
{ noCache }
);
const results = search.results || [];
await saveJson(`search/${encodeURIComponent(query)}.json`, search);
if (results.length === 0) {
console.log('No sets found for query:', query);
@@ -124,12 +207,24 @@ async function main() {
const setNum = selected.set_num; // e.g., 10283-1
const setId = (setNum || '').split('-')[0];
// 3) Fetch exact set and theme
const setDetails = await apiGetJson(host, `/api/v3/lego/sets/${encodeURIComponent(setNum)}/`, apiKey);
await saveJson(`sets/${setNum}.json`, setDetails);
// 3) Fetch exact set and theme (cached ≤ 1 week)
const setDetails = await cachedApiJson(
`sets/${setNum}.json`,
host,
`/api/v3/lego/sets/${encodeURIComponent(setNum)}/`,
apiKey,
`set:${setNum}`,
{ noCache }
);
const themeId = setDetails.theme_id || selected.theme_id;
const theme = await apiGetJson(host, `/api/v3/lego/themes/${themeId}/`, apiKey);
await saveJson(`themes/${themeId}.json`, theme);
const theme = await cachedApiJson(
`themes/${themeId}.json`,
host,
`/api/v3/lego/themes/${themeId}/`,
apiKey,
`theme:${themeId}`,
{ noCache }
);
const themeName = theme.name;
const themeSlug = slugify(themeName);
@@ -142,6 +237,7 @@ async function main() {
await ensureDir(dataImagesDir);
// Images provided by Rebrickable (set images)
const downloadedImages = new Set();
const imageUrls = [];
if (setDetails.set_img_url) imageUrls.push(setDetails.set_img_url);
if (setDetails.set_img_url_2) imageUrls.push(setDetails.set_img_url_2);
@@ -150,7 +246,13 @@ async function main() {
const urlObj = new URL(url);
const base = path.basename(urlObj.pathname);
const dest = path.join(imagesDir, base);
if (fsSync.existsSync(dest)) {
console.log(`[skip] image exists: ${path.relative(ROOT, dest)}`);
} else {
await downloadFile(url, dest);
downloadedImages.add(base);
console.log(`[download] ${url} -> ${path.relative(ROOT, dest)}`);
}
const nameNoExt = base.replace(/\.[^.]+$/, '');
const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`);
@@ -161,17 +263,29 @@ async function main() {
}
// Minifigs images (stored directly in images/)
const figsResp = await apiGetJson(host, `/api/v3/lego/sets/${encodeURIComponent(setNum)}/minifigs/?page_size=1000`, apiKey);
const figsResp = await cachedApiJson(
`sets/${setNum}_minifigs.json`,
host,
`/api/v3/lego/sets/${encodeURIComponent(setNum)}/minifigs/?page_size=1000`,
apiKey,
`minifigs:${setNum}`,
{ noCache }
);
const figs = figsResp.results || [];
await saveJson(`sets/${setNum}_minifigs.json`, figsResp);
if (figs.length > 0) {
for (const fig of figs) {
const figNum = fig.fig_num || fig.set_num;
let figImg = fig.set_img_url || fig.fig_img_url;
let figTitle = fig.name || '';
if (!figImg && figNum) {
const figDetails = await apiGetJson(host, `/api/v3/lego/minifigs/${encodeURIComponent(figNum)}/`, apiKey);
await saveJson(`minifigs/${figNum}.json`, figDetails);
const figDetails = await cachedApiJson(
`minifigs/${figNum}.json`,
host,
`/api/v3/lego/minifigs/${encodeURIComponent(figNum)}/`,
apiKey,
`minifig:${figNum}`,
{ noCache }
);
figImg = figDetails.set_img_url || figDetails.fig_img_url;
figTitle = figTitle || figDetails.name || '';
}
@@ -180,7 +294,13 @@ async function main() {
const urlObj = new URL(figImg);
const base = path.basename(urlObj.pathname);
const dest = path.join(imagesDir, base);
if (fsSync.existsSync(dest)) {
console.log(`[skip] image exists: ${path.relative(ROOT, dest)}`);
} else {
await downloadFile(figImg, dest);
downloadedImages.add(base);
console.log(`[download] ${figImg} -> ${path.relative(ROOT, dest)}`);
}
const nameNoExt = base.replace(/\.[^.]+$/, '');
const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`);
@@ -190,15 +310,71 @@ async function main() {
}
}
// index.md
const title = setDetails.name || selected.name;
// index.md (do not overwrite if exists)
const indexPath = path.join(setDir, 'index.md');
const indexExists = fsSync.existsSync(indexPath);
if (!indexExists) {
const pageTitle = setDetails.name || selected.name;
const today = new Date();
const date = today.toISOString().slice(0, 10);
// Collect images present in imagesDir (existing or just downloaded)
let imageFiles = [];
try {
const allFiles = await fs.readdir(imagesDir);
imageFiles = allFiles.filter((f) => /\.(png|jpe?g|gif|webp)$/i.test(f));
} catch (_) {
imageFiles = Array.from(downloadedImages);
}
let body = '';
if (imageFiles.length > 0) {
const ordered = [
...Array.from(downloadedImages).filter((b) => imageFiles.includes(b)),
...imageFiles.filter((b) => !downloadedImages.has(b)),
];
const lines = [];
for (const base of ordered) {
const nameNoExt = base.replace(/\.[^.]+$/, '');
const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`);
const altRaw = (await readYamlTitle(yamlPath)) || `${pageTitle}`;
const altEsc = escapeMarkdownAlt(altRaw);
const titleAttr = altRaw.replace(/\"/g, '\\"');
lines.push(`\n![${altEsc}](images/${base} \"${titleAttr}\")`);
}
body = lines.join('\n') + '\n';
}
const md = `---\n` +
`title: "${title.replace(/"/g, '\\"')}"\n` +
`title: "${pageTitle.replace(/"/g, '\\"')}"\n` +
`date: ${date}\n` +
`---\n`;
await fs.writeFile(path.join(setDir, 'index.md'), md, 'utf8');
`---\n\n` +
body;
await fs.writeFile(indexPath, md, 'utf8');
console.log(`[create] ${path.relative(ROOT, indexPath)} with ${imageFiles.length} image reference(s)`);
}
// Report downloaded images that are not referenced in an existing index.md
if (indexExists && downloadedImages.size > 0) {
try {
const mdContent = await fs.readFile(indexPath, 'utf8');
const refSet = new Set();
const imgRegex = /!\[[^\]]*\]\(([^)]+)\)/g; // capture URL inside ()
let m;
while ((m = imgRegex.exec(mdContent)) !== null) {
const p = m[1].trim();
const base = path.basename(p.split('?')[0].split('#')[0]);
if (base) refSet.add(base);
}
const notRef = Array.from(downloadedImages).filter((b) => !refSet.has(b));
if (notRef.length > 0) {
console.log(`\n[note] ${notRef.length} downloaded image(s) not referenced in index.md:`);
notRef.forEach((b) => console.log(` - images/${b}`));
}
} catch (e) {
console.log(`[warn] could not analyze index.md references: ${e.message}`);
}
}
console.log(`\nAdded set ${setNum}${path.relative(ROOT, setDir)}`);
}