tools/add_lego.js: do not overwrite index.md; add image refs with alt/title; skip redundant downloads; log actions; report unreferenced images; add 1-week JSON cache with --no-cache option
This commit is contained in:
@@ -12,7 +12,8 @@
|
||||
- Create data/images/
|
||||
- Download all available set images from Rebrickable
|
||||
- Create data/images/<image-name>.yaml with attribution
|
||||
- Create index.md with title and date
|
||||
- Create index.md with title and date (only if not existing),
|
||||
including markdown references to images found
|
||||
*/
|
||||
|
||||
const fs = require('fs/promises');
|
||||
@@ -25,6 +26,7 @@ const ROOT = process.cwd();
|
||||
const LEGO_ROOT = path.join(ROOT, 'content', 'collections', 'lego');
|
||||
const CONFIG_PATH = path.join(ROOT, 'tools', 'config.json');
|
||||
const CACHE_ROOT = path.join(ROOT, 'tools', 'cache', 'rebrickable');
|
||||
const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
|
||||
|
||||
function loadConfig() {
|
||||
const raw = fsSync.readFileSync(CONFIG_PATH, 'utf8');
|
||||
@@ -86,10 +88,85 @@ async function saveJson(relPath, data) {
|
||||
await fs.writeFile(out, JSON.stringify(data, null, 2), 'utf8');
|
||||
}
|
||||
|
||||
async function cachedApiJson(relPath, host, apiPath, apiKey, label, options = {}) {
|
||||
const full = path.join(CACHE_ROOT, relPath);
|
||||
if (options.noCache) {
|
||||
const json = await apiGetJson(host, apiPath, apiKey);
|
||||
await ensureDir(path.dirname(full));
|
||||
await fs.writeFile(full, JSON.stringify(json, null, 2), 'utf8');
|
||||
if (label) console.log(`[fetch] ${label} <- ${apiPath} [nocache]`);
|
||||
return json;
|
||||
}
|
||||
try {
|
||||
const st = await fs.stat(full);
|
||||
const ageMs = Date.now() - st.mtimeMs;
|
||||
if (ageMs < ONE_WEEK_MS) {
|
||||
try {
|
||||
const raw = await fs.readFile(full, 'utf8');
|
||||
const json = JSON.parse(raw);
|
||||
if (label) console.log(`[cache] ${label}`);
|
||||
return json;
|
||||
} catch (_) {
|
||||
// Corrupt cache, fall through to refetch
|
||||
if (label) console.log(`[stale] ${label} (cache unreadable) -> refetch`);
|
||||
}
|
||||
} else {
|
||||
if (label) console.log(`[stale] ${label} (age ${(ageMs / ONE_WEEK_MS).toFixed(2)} weeks) -> refetch`);
|
||||
}
|
||||
} catch (_) {
|
||||
// No cache, fetch
|
||||
}
|
||||
|
||||
const json = await apiGetJson(host, apiPath, apiKey);
|
||||
await ensureDir(path.dirname(full));
|
||||
await fs.writeFile(full, JSON.stringify(json, null, 2), 'utf8');
|
||||
if (label) console.log(`[fetch] ${label} <- ${apiPath}`);
|
||||
return json;
|
||||
}
|
||||
|
||||
function escapeMarkdownAlt(text) {
|
||||
return String(text || '')
|
||||
.replace(/\r?\n/g, ' ')
|
||||
.replace(/\\/g, '\\\\')
|
||||
.replace(/\[/g, '\\[')
|
||||
.replace(/\]/g, '\\]');
|
||||
}
|
||||
|
||||
async function readYamlTitle(yamlPath) {
|
||||
try {
|
||||
const content = await fs.readFile(yamlPath, 'utf8');
|
||||
// Match title: "..." or title: '...' or title: text
|
||||
const m = content.match(/^\s*title\s*:\s*(?:\"([^\"]*)\"|'([^']*)'|([^\n\r#]+))/m);
|
||||
if (!m) return null;
|
||||
const val = (m[1] || m[2] || m[3] || '').trim();
|
||||
return val;
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const query = process.argv[2];
|
||||
// Args parsing: support --no-cache and a free-form query
|
||||
const argv = process.argv.slice(2);
|
||||
let noCache = false;
|
||||
const parts = [];
|
||||
for (const a of argv) {
|
||||
if (a === '--no-cache' || a === '--nocache' || a === '--fresh') {
|
||||
noCache = true;
|
||||
} else if (a === '--') {
|
||||
// everything after is query literal
|
||||
const idx = argv.indexOf('--');
|
||||
parts.push(argv.slice(idx + 1).join(' '));
|
||||
break;
|
||||
} else if (a.startsWith('--')) {
|
||||
// ignore unknown flags for now
|
||||
} else {
|
||||
parts.push(a);
|
||||
}
|
||||
}
|
||||
const query = parts.join(' ').trim();
|
||||
if (!query) {
|
||||
console.log('Usage: node tools/add_lego.js <query|set-id>');
|
||||
console.log('Usage: node tools/add_lego.js [--no-cache] <query|set-id>');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
@@ -97,11 +174,17 @@ async function main() {
|
||||
const apiKey = config.rebrickable.apiKey;
|
||||
const host = 'rebrickable.com';
|
||||
|
||||
// 1) Search
|
||||
// 1) Search (cached ≤ 1 week)
|
||||
const searchPath = `/api/v3/lego/sets/?search=${encodeURIComponent(query)}&page_size=25`;
|
||||
const search = await apiGetJson(host, searchPath, apiKey);
|
||||
const search = await cachedApiJson(
|
||||
`search/${encodeURIComponent(query)}.json`,
|
||||
host,
|
||||
searchPath,
|
||||
apiKey,
|
||||
`search:${query}`,
|
||||
{ noCache }
|
||||
);
|
||||
const results = search.results || [];
|
||||
await saveJson(`search/${encodeURIComponent(query)}.json`, search);
|
||||
|
||||
if (results.length === 0) {
|
||||
console.log('No sets found for query:', query);
|
||||
@@ -124,12 +207,24 @@ async function main() {
|
||||
const setNum = selected.set_num; // e.g., 10283-1
|
||||
const setId = (setNum || '').split('-')[0];
|
||||
|
||||
// 3) Fetch exact set and theme
|
||||
const setDetails = await apiGetJson(host, `/api/v3/lego/sets/${encodeURIComponent(setNum)}/`, apiKey);
|
||||
await saveJson(`sets/${setNum}.json`, setDetails);
|
||||
// 3) Fetch exact set and theme (cached ≤ 1 week)
|
||||
const setDetails = await cachedApiJson(
|
||||
`sets/${setNum}.json`,
|
||||
host,
|
||||
`/api/v3/lego/sets/${encodeURIComponent(setNum)}/`,
|
||||
apiKey,
|
||||
`set:${setNum}`,
|
||||
{ noCache }
|
||||
);
|
||||
const themeId = setDetails.theme_id || selected.theme_id;
|
||||
const theme = await apiGetJson(host, `/api/v3/lego/themes/${themeId}/`, apiKey);
|
||||
await saveJson(`themes/${themeId}.json`, theme);
|
||||
const theme = await cachedApiJson(
|
||||
`themes/${themeId}.json`,
|
||||
host,
|
||||
`/api/v3/lego/themes/${themeId}/`,
|
||||
apiKey,
|
||||
`theme:${themeId}`,
|
||||
{ noCache }
|
||||
);
|
||||
const themeName = theme.name;
|
||||
const themeSlug = slugify(themeName);
|
||||
|
||||
@@ -142,6 +237,7 @@ async function main() {
|
||||
await ensureDir(dataImagesDir);
|
||||
|
||||
// Images provided by Rebrickable (set images)
|
||||
const downloadedImages = new Set();
|
||||
const imageUrls = [];
|
||||
if (setDetails.set_img_url) imageUrls.push(setDetails.set_img_url);
|
||||
if (setDetails.set_img_url_2) imageUrls.push(setDetails.set_img_url_2);
|
||||
@@ -150,7 +246,13 @@ async function main() {
|
||||
const urlObj = new URL(url);
|
||||
const base = path.basename(urlObj.pathname);
|
||||
const dest = path.join(imagesDir, base);
|
||||
if (fsSync.existsSync(dest)) {
|
||||
console.log(`[skip] image exists: ${path.relative(ROOT, dest)}`);
|
||||
} else {
|
||||
await downloadFile(url, dest);
|
||||
downloadedImages.add(base);
|
||||
console.log(`[download] ${url} -> ${path.relative(ROOT, dest)}`);
|
||||
}
|
||||
|
||||
const nameNoExt = base.replace(/\.[^.]+$/, '');
|
||||
const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`);
|
||||
@@ -161,17 +263,29 @@ async function main() {
|
||||
}
|
||||
|
||||
// Minifigs images (stored directly in images/)
|
||||
const figsResp = await apiGetJson(host, `/api/v3/lego/sets/${encodeURIComponent(setNum)}/minifigs/?page_size=1000`, apiKey);
|
||||
const figsResp = await cachedApiJson(
|
||||
`sets/${setNum}_minifigs.json`,
|
||||
host,
|
||||
`/api/v3/lego/sets/${encodeURIComponent(setNum)}/minifigs/?page_size=1000`,
|
||||
apiKey,
|
||||
`minifigs:${setNum}`,
|
||||
{ noCache }
|
||||
);
|
||||
const figs = figsResp.results || [];
|
||||
await saveJson(`sets/${setNum}_minifigs.json`, figsResp);
|
||||
if (figs.length > 0) {
|
||||
for (const fig of figs) {
|
||||
const figNum = fig.fig_num || fig.set_num;
|
||||
let figImg = fig.set_img_url || fig.fig_img_url;
|
||||
let figTitle = fig.name || '';
|
||||
if (!figImg && figNum) {
|
||||
const figDetails = await apiGetJson(host, `/api/v3/lego/minifigs/${encodeURIComponent(figNum)}/`, apiKey);
|
||||
await saveJson(`minifigs/${figNum}.json`, figDetails);
|
||||
const figDetails = await cachedApiJson(
|
||||
`minifigs/${figNum}.json`,
|
||||
host,
|
||||
`/api/v3/lego/minifigs/${encodeURIComponent(figNum)}/`,
|
||||
apiKey,
|
||||
`minifig:${figNum}`,
|
||||
{ noCache }
|
||||
);
|
||||
figImg = figDetails.set_img_url || figDetails.fig_img_url;
|
||||
figTitle = figTitle || figDetails.name || '';
|
||||
}
|
||||
@@ -180,7 +294,13 @@ async function main() {
|
||||
const urlObj = new URL(figImg);
|
||||
const base = path.basename(urlObj.pathname);
|
||||
const dest = path.join(imagesDir, base);
|
||||
if (fsSync.existsSync(dest)) {
|
||||
console.log(`[skip] image exists: ${path.relative(ROOT, dest)}`);
|
||||
} else {
|
||||
await downloadFile(figImg, dest);
|
||||
downloadedImages.add(base);
|
||||
console.log(`[download] ${figImg} -> ${path.relative(ROOT, dest)}`);
|
||||
}
|
||||
|
||||
const nameNoExt = base.replace(/\.[^.]+$/, '');
|
||||
const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`);
|
||||
@@ -190,15 +310,71 @@ async function main() {
|
||||
}
|
||||
}
|
||||
|
||||
// index.md
|
||||
const title = setDetails.name || selected.name;
|
||||
// index.md (do not overwrite if exists)
|
||||
const indexPath = path.join(setDir, 'index.md');
|
||||
const indexExists = fsSync.existsSync(indexPath);
|
||||
if (!indexExists) {
|
||||
const pageTitle = setDetails.name || selected.name;
|
||||
const today = new Date();
|
||||
const date = today.toISOString().slice(0, 10);
|
||||
|
||||
// Collect images present in imagesDir (existing or just downloaded)
|
||||
let imageFiles = [];
|
||||
try {
|
||||
const allFiles = await fs.readdir(imagesDir);
|
||||
imageFiles = allFiles.filter((f) => /\.(png|jpe?g|gif|webp)$/i.test(f));
|
||||
} catch (_) {
|
||||
imageFiles = Array.from(downloadedImages);
|
||||
}
|
||||
|
||||
let body = '';
|
||||
if (imageFiles.length > 0) {
|
||||
const ordered = [
|
||||
...Array.from(downloadedImages).filter((b) => imageFiles.includes(b)),
|
||||
...imageFiles.filter((b) => !downloadedImages.has(b)),
|
||||
];
|
||||
const lines = [];
|
||||
for (const base of ordered) {
|
||||
const nameNoExt = base.replace(/\.[^.]+$/, '');
|
||||
const yamlPath = path.join(dataImagesDir, `${nameNoExt}.yaml`);
|
||||
const altRaw = (await readYamlTitle(yamlPath)) || `${pageTitle}`;
|
||||
const altEsc = escapeMarkdownAlt(altRaw);
|
||||
const titleAttr = altRaw.replace(/\"/g, '\\"');
|
||||
lines.push(`\n`);
|
||||
}
|
||||
body = lines.join('\n') + '\n';
|
||||
}
|
||||
|
||||
const md = `---\n` +
|
||||
`title: "${title.replace(/"/g, '\\"')}"\n` +
|
||||
`title: "${pageTitle.replace(/"/g, '\\"')}"\n` +
|
||||
`date: ${date}\n` +
|
||||
`---\n`;
|
||||
await fs.writeFile(path.join(setDir, 'index.md'), md, 'utf8');
|
||||
`---\n\n` +
|
||||
body;
|
||||
await fs.writeFile(indexPath, md, 'utf8');
|
||||
console.log(`[create] ${path.relative(ROOT, indexPath)} with ${imageFiles.length} image reference(s)`);
|
||||
}
|
||||
|
||||
// Report downloaded images that are not referenced in an existing index.md
|
||||
if (indexExists && downloadedImages.size > 0) {
|
||||
try {
|
||||
const mdContent = await fs.readFile(indexPath, 'utf8');
|
||||
const refSet = new Set();
|
||||
const imgRegex = /!\[[^\]]*\]\(([^)]+)\)/g; // capture URL inside ()
|
||||
let m;
|
||||
while ((m = imgRegex.exec(mdContent)) !== null) {
|
||||
const p = m[1].trim();
|
||||
const base = path.basename(p.split('?')[0].split('#')[0]);
|
||||
if (base) refSet.add(base);
|
||||
}
|
||||
const notRef = Array.from(downloadedImages).filter((b) => !refSet.has(b));
|
||||
if (notRef.length > 0) {
|
||||
console.log(`\n[note] ${notRef.length} downloaded image(s) not referenced in index.md:`);
|
||||
notRef.forEach((b) => console.log(` - images/${b}`));
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(`[warn] could not analyze index.md references: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nAdded set ${setNum} → ${path.relative(ROOT, setDir)}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user