diff --git a/packages/skin-database/CloudFlare.ts b/packages/skin-database/CloudFlare.ts new file mode 100644 index 00000000..6ce9d62c --- /dev/null +++ b/packages/skin-database/CloudFlare.ts @@ -0,0 +1,27 @@ +import fetch from "node-fetch"; +import { CLOUDFLARE_PURGE_AUTH_KEY } from "./config"; + +const cdnZone = "b2c0ca43723f95f9d317710ff2ce86a3"; + +export async function purgeFiles(files: string[]): Promise { + const response = await fetch( + `https://api.cloudflare.com/client/v4/zones/${cdnZone}/purge_cache`, + { + method: "POST", + headers: { + Authorization: `Bearer ${CLOUDFLARE_PURGE_AUTH_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ files }), + } + ); + + const body = await response.json(); + if (!response.ok) { + console.error(body); + throw new Error(`Got error response: ${response.status}`); + } + if (!body.success) { + throw new Error(`Could not purge URLs: '${JSON.stringify(body)}'`); + } +} diff --git a/packages/skin-database/api/__tests__/router.test.ts b/packages/skin-database/api/__tests__/router.test.ts index 0a4c2a9e..18e3cf2b 100644 --- a/packages/skin-database/api/__tests__/router.test.ts +++ b/packages/skin-database/api/__tests__/router.test.ts @@ -319,7 +319,7 @@ test("An Upload Flow", async () => { filename, id, skin_md5: md5, - status: "UPLOAD_REPORTED", + status: "UPLOAD_REPRTED", }); }); diff --git a/packages/skin-database/cli.ts b/packages/skin-database/cli.ts index 78902913..aaea5fe8 100755 --- a/packages/skin-database/cli.ts +++ b/packages/skin-database/cli.ts @@ -5,7 +5,7 @@ import { argv } from "yargs"; import logger from "./logger"; import DiscordWinstonTransport from "./DiscordWinstonTransport"; import * as Skins from "./data/skins"; -import Discord from "discord.js"; +import Discord, { RichEmbed, TextChannel } from "discord.js"; import { tweet } from "./tasks/tweet"; import { addSkinFromBuffer } from "./addSkin"; import * as SkinHash from "./skinHash"; @@ -16,7 +16,8 @@ import { screenshot } from "./tasks/screenshotSkin"; import Shooter from "./shooter"; import UserContext from "./data/UserContext"; import { integrityCheck } from "./tasks/integrityCheck"; -import { syncWithArchive } from "./tasks/syncWithArchive"; +import { ensureWebampLinks, syncWithArchive } from "./tasks/syncWithArchive"; +import { syncFromArchive } from "./tasks/syncFromArchive"; async function main() { const client = new Discord.Client(); @@ -25,6 +26,12 @@ async function main() { try { switch (argv._[0]) { + case "ensure-webamp-links": + await ensureWebampLinks(); + break; + case "sync-from-ia": + await syncFromArchive(); + break; case "sync-ia": await syncWithArchive(); break; @@ -118,12 +125,6 @@ async function main() { await tweet(client, null); break; } - case "skin": { - const hash = argv._[1]; - console.log(await Skins.getSkinDebugData(hash)); - break; - } - case "stats": { console.log(await Skins.getStats()); break; diff --git a/packages/skin-database/tasks/syncFromArchive.ts b/packages/skin-database/tasks/syncFromArchive.ts new file mode 100644 index 00000000..bf52edfb --- /dev/null +++ b/packages/skin-database/tasks/syncFromArchive.ts @@ -0,0 +1,133 @@ +import { knex } from "../db"; +import fetch from "node-fetch"; +import UserContext from "../data/UserContext"; +import SkinModel from "../data/SkinModel"; +import child_process from "child_process"; +import * as Parallel from "async-parallel"; +import util from "util"; +const exec = util.promisify(child_process.exec); + +function chunk(items: T[], chunkSize: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < items.length; i += chunkSize) { + chunks.push(items.slice(i, i + chunkSize)); + } + return chunks; +} + +function flatten(matrix: T[][]): T[] { + const flat: T[] = []; + matrix.forEach((arr) => { + flat.push(...arr); + }); + return flat; +} + +async function _filterOutKnownIdentifiers( + identifiers: string[] +): Promise { + const found = await knex("ia_items") + .whereIn("identifier", identifiers) + .select(["identifier"]); + const foundSet = new Set(found.map((row) => row.identifier)); + return identifiers.filter((id) => !foundSet.has(id)); +} + +async function filterOutKnownIdentifiers( + identifiers: string[] +): Promise { + const matrix = await Parallel.map( + chunk(identifiers, 800), + (chunk) => _filterOutKnownIdentifiers(chunk), + 10 + ); + return flatten(matrix); +} + +const CONCURRENT = 5; + +// Build the URL to get all wsz files +function getSearchUrl(): string { + const url = new URL("https://archive.org/advancedsearch.php"); + const queryString = + "(collection:winampskins OR collection:winampskinsmature) skintype:wsz"; + url.searchParams.set("q", queryString); + url.searchParams.append("fl[]", "identifier"); + url.searchParams.set("rows", "100000"); + url.searchParams.set("page", "1"); + url.searchParams.set("output", "json"); + return url.toString(); +} + +async function allItems(): Promise { + const r = await fetch(getSearchUrl()); + const result = await r.json(); + const response = result.response; + const numFound = response.numFound; + const items = response.docs; + if (items.length !== numFound) { + console.error(`Expected to find ${numFound} items but saw ${items.length}`); + } + if (items.length === 100000) { + console.error( + `We've hit the max number of items. We are likely missing some.` + ); + } + return items.map((item: { identifier: string }) => item.identifier); +} + +async function ensureIaRecord( + ctx: UserContext, + identifier: string +): Promise { + const r = await fetch(`https://archive.org/metadata/${identifier}`); + const response = await r.json(); + const files = response.files; + const skins = files.filter((file) => file.name.endsWith(".wsz")); + if (skins.length === 0) { + // TODO TODO TODO TODO + // TODO TODO TODO TODO + // + // What if the skin ends in .zip? + // + // TODO TODO TODO TODO + // TODO TODO TODO TODO + console.log(`No skins found in ${identifier}. Deleting... (YOLO)`); + const command = `ia delete ${identifier} --all`; + // await exec(command, { encoding: "utf8" }); + console.log(`Deleted ${identifier}`); + return; + } + if (skins.length !== 1) { + console.error( + `Expected to find one skin file for "${identifier}", found ${skins.length}` + ); + return; + } + const md5 = skins[0].md5; + const skin = await SkinModel.fromMd5(ctx, md5); + if (skin == null) { + console.error( + `We don't have a record for the skin found in "${identifier}"` + ); + return; + } + + await knex("ia_items").insert({ skin_md5: md5, identifier }); + console.log(`Inserted "${identifier}".`); +} + +export async function syncFromArchive() { + const ctx = new UserContext(); + // Ensure we know about all items in the `winampskins` collection. + console.log("Going to ensure we know about all archive items"); + const items = await allItems(); + const unknownItems = await filterOutKnownIdentifiers(items); + await Parallel.each( + unknownItems, + async (identifier) => { + await ensureIaRecord(ctx, identifier); + }, + CONCURRENT + ); +} diff --git a/packages/skin-database/tasks/syncWithArchive.ts b/packages/skin-database/tasks/syncWithArchive.ts index 86b365b9..6bb3f799 100644 --- a/packages/skin-database/tasks/syncWithArchive.ts +++ b/packages/skin-database/tasks/syncWithArchive.ts @@ -8,74 +8,13 @@ import UserContext from "../data/UserContext"; import SkinModel from "../data/SkinModel"; import util from "util"; import * as Parallel from "async-parallel"; +import IaItemModel from "../data/IaItemModel"; const exec = util.promisify(child_process.exec); const CONCURRENT = 5; const temp = _temp.track(); -async function allItems(): Promise { - const r = await fetch( - "https://archive.org/advancedsearch.php?q=collection%3Awinampskins+skintype%3Awsz&fl%5B%5D=identifier&fl%5B%5D=skintype&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=100000&page=1&output=json&save=yes" - ); - const result = await r.json(); - const response = result.response; - const numFound = response.numFound; - const items = response.docs; - if (items.length !== numFound) { - console.error(`Expected to find ${numFound} items but saw ${items.length}`); - } - items.forEach((item) => { - if (item.skintype !== "wsz") { - throw new Error(`${item.identifier} has skintype of ${item.skintype}`); - } - }); - return items.map((item: { identifier: string }) => item.identifier); -} - -async function ensureIaRecord( - ctx: UserContext, - identifier: string -): Promise { - const dbItem = await knex("ia_items").where({ identifier }).first(); - if (dbItem) { - return; - } - const r = await fetch(`https://archive.org/metadata/${identifier}`); - const response = await r.json(); - const files = response.files; - const skins = files.filter((file) => file.name.endsWith(".wsz")); - if (skins.length !== 1) { - console.error( - `Expected to find one skin file for "${identifier}", found ${skins.length}` - ); - return; - } - const md5 = skins[0].md5; - const skin = await SkinModel.fromMd5(ctx, md5); - if (skin == null) { - console.error( - `We don't have a record for the skin found in "${identifier}"` - ); - return; - } - - await knex("ia_items").insert({ skin_md5: md5, identifier }); - console.log(`Inserted "${identifier}".`); -} - -// eslint-disable-next-line @typescript-eslint/no-unused-vars -async function collectExistingItems(ctx: UserContext) { - const items = await allItems(); - await Parallel.each( - items, - async (identifier) => { - await ensureIaRecord(ctx, identifier); - }, - CONCURRENT - ); -} - function sanitize(name: string): string { return name.replace(/[^A-Za-z0-9_\-.]/g, "_").replace(/^\d*/, ""); } @@ -92,21 +31,25 @@ async function downloadToTemp(url: string, filename: string): Promise { return tempFile; } +export async function identifierExists(identifier: string): Promise { + const existing = await knex("ia_items") + .whereRaw("LOWER(identifier) = LOWER(?)", identifier) + .select([]); + if (existing.length > 0) { + return true; + } + const result = await exec(`ia metadata ${identifier}`); + const data = JSON.parse(result.stdout); + return Object.keys(data).length > 0; +} + async function getNewIdentifier(filename: string): Promise { const identifierBase = `winampskins_${sanitize(path.parse(filename).name)}`; let counter = 0; function getIdentifier() { return identifierBase + (counter === 0 ? "" : `_${counter}`); } - // eslint-disable-next-line no-constant-condition - while (true) { - const existing = await knex("ia_items").whereRaw( - "LOWER(identifier) = LOWER(?)", - getIdentifier() - ); - if (existing.length === 0) { - break; - } + while (await identifierExists(getIdentifier())) { counter++; } return getIdentifier(); @@ -115,7 +58,9 @@ async function getNewIdentifier(filename: string): Promise { async function archive(skin: SkinModel): Promise { const filename = await skin.getFileName(); if (filename == null) { - throw new Error(`Could archive skin. Filename not found. ${skin.getMd5()}`); + throw new Error( + `Couldn't archive skin. Filename not found. ${skin.getMd5()}` + ); } if ( @@ -189,13 +134,11 @@ const CORRUPT = new Set([ "04d172dc3f08d7fc1c9a047db956ea5d", "515941f5dee8ab399bd0e58d0a116274", "6b00596f4519fcc9d8bff7a69194333a", + "0f2cd2d789d9194e3ef6525a8f00f5fd", ]); export async function syncWithArchive() { const ctx = new UserContext(); - // Ensure we know about all items in the `winampskins` collection. - // console.log("Going to ensure we know about all archive items"); - // await collectExistingItems(ctx); console.log("Checking which new skins we have..."); const unarchived = await knex("skins") .leftJoin("ia_items", "ia_items.skin_md5", "=", "skins.md5") @@ -235,3 +178,61 @@ export async function syncWithArchive() { CONCURRENT ); } +// Build the URL to get all wsz files +function getSearchUrl(): string { + const url = new URL("https://archive.org/advancedsearch.php"); + // https://stackoverflow.com/a/11890368/1263117 + const queryString = + "(collection:winampskins OR collection:winampskinsmature) skintype:wsz -webamp:[* TO *]"; + url.searchParams.set("q", queryString); + url.searchParams.append("fl[]", "identifier"); + url.searchParams.append("fl[]", "webamp"); + url.searchParams.set("rows", "100000"); + url.searchParams.set("page", "1"); + url.searchParams.set("output", "json"); + return url.toString(); +} + +export async function ensureWebampLinks() { + const ctx = new UserContext(); + const r = await fetch(getSearchUrl()); + const result = await r.json(); + const response = result.response; + const items: { identifier: string }[] = response.docs; + await Parallel.each( + items, + async ({ identifier }) => { + const iaItem = await IaItemModel.fromIdentifier(ctx, identifier); + if (iaItem == null) { + console.log(`Found an IA item we are missing: "${identifier}`); + return; + } + const r = await fetch(`https://archive.org/metadata/${identifier}`); + const response = await r.json(); + const files = response.files; + const skins = files.filter((file) => file.name.endsWith(".wsz")); + if (skins.length === 0) { + console.warn(`Could not find any skin file for ${identifier}`); + return; + } + if (skins.length > 1) { + console.warn(`Too many skin files for ${identifier}`); + return; + } + + const skin = skins[0]; + if (skin.md5 !== iaItem.getMd5()) { + console.error(`Md5 mismatch for ${identifier}`); + return; + } + const skinUrl = `https://archive.org/cors/${identifier}/${encodeURIComponent( + skin.name + )}`; + + const webampLink = new URL("https://webamp.org"); + webampLink.searchParams.set("skinUrl", skinUrl); + console.log(webampLink.toString()); + }, + 5 + ); +} diff --git a/packages/skin-database/tasks/tweet.ts b/packages/skin-database/tasks/tweet.ts index 546acf15..f40d6dfd 100644 --- a/packages/skin-database/tasks/tweet.ts +++ b/packages/skin-database/tasks/tweet.ts @@ -8,6 +8,7 @@ import sharp from "sharp"; import { getTwitterClient } from "../twitter"; import SkinModel from "../data/SkinModel"; import UserContext from "../data/UserContext"; +import TweetModel from "../data/TweetModel"; const temp = _temp.track(); export async function tweet(discordClient: Client, anything: string | null) { @@ -68,8 +69,13 @@ export async function tweet(discordClient: Client, anything: string | null) { return; } await Skins.markAsTweeted(tweetableSkin.getMd5(), tweetId); + + const tweet = await TweetModel.fromTweetId(ctx, tweetId); + if (tweet == null) { + throw new Error(`Could not locate tweet with ID "${tweetId}"`); + } // @ts-ignore - await tweetBotChannel.send(output.trim()); + await tweetBotChannel.send(tweet?.getUrl()); const remainingSkinCount = await Skins.getTweetableSkinCount(); if (remainingSkinCount < 10) { // @ts-ignore