webamp/experiments/skin-database/tasks/fetchInternetArchiveMetadata.js
2019-06-06 07:11:03 -07:00

49 lines
1.2 KiB
JavaScript

const fetch = require("node-fetch");
const db = require("./db");
const iaItems = db.get("internetArchiveItems");
async function fetchMetadata(identifier) {
const resp = await fetch(`http://archive.org/metadata/${identifier}`);
const metadata = await resp.json();
const metadataFetchDate = Date.now();
await iaItems.findOneAndUpdate(
{ identifier },
{ $set: { metadata, metadataFetchDate } }
);
}
async function fetchAllMetadata(limit) {
const items = await iaItems.find(
{ metadata: { $eq: null } },
{
limit,
fields: {
identifier: 1,
},
}
);
await Promise.all(
items.map(item => {
return fetchMetadata(item.identifier);
})
);
}
// TODO: Refetch collections from:
// https://archive.org/advancedsearch.php?q=collection%3Awinampskins&fl%5B%5D=identifier&rows=100000&page=1&output=json
module.exports = async function main() {
let delay = 60000;
let timeout = null;
async function go() {
console.log("Gonna fetch some more");
try {
await fetchAllMetadata(500);
} catch (e) {
console.error(e);
delay += 60000;
}
console.log("Scheduling another", delay / 1000);
timeout = setTimeout(go, delay);
}
go();
};