diff --git a/packages/skin-database/__mocks__/s3.js b/packages/skin-database/__mocks__/s3.js index 76f1062b..fdd7c057 100644 --- a/packages/skin-database/__mocks__/s3.js +++ b/packages/skin-database/__mocks__/s3.js @@ -2,4 +2,11 @@ function getSkinUploadUrl(_md5, _id) { return ""; } -module.exports = { getSkinUploadUrl: jest.fn(getSkinUploadUrl) }; +function getUploadedSkin(_md5) { + return null; +} + +module.exports = { + getSkinUploadUrl: jest.fn(getSkinUploadUrl), + getUploadedSkin: jest.fn(getUploadedSkin), +}; diff --git a/packages/skin-database/addSkin.ts b/packages/skin-database/addSkin.ts index 2c3ee38d..7ff6dd42 100644 --- a/packages/skin-database/addSkin.ts +++ b/packages/skin-database/addSkin.ts @@ -8,6 +8,7 @@ import * as Analyser from "./analyser"; import { SkinType } from "./types"; import SkinModel from "./data/SkinModel"; import UserContext from "./data/UserContext"; +import JSZip from "jszip"; // TODO Move this into the function so that we clean up on each run? const temp = _temp.track(); @@ -34,7 +35,8 @@ export async function addSkinFromBuffer( } // Note: This will thrown on invalid skins. - const skinType = await Analyser.getSkinType(buffer); + const zip = await JSZip.loadAsync(buffer); + const skinType = await Analyser.getSkinType(zip); switch (skinType) { case "CLASSIC": @@ -84,7 +86,8 @@ async function addClassicSkinFromBuffer( await S3.putScreenshot(md5, fs.readFileSync(tempScreenshotPath)); await S3.putSkin(md5, buffer, "wsz"); - const readmeText = await Analyser.getReadme(buffer); + const zip = await JSZip.loadAsync(buffer); + const readmeText = await Analyser.getReadme(zip); await Skins.addSkin({ md5, filePath, diff --git a/packages/skin-database/analyser.ts b/packages/skin-database/analyser.ts index 2774ebf7..d1ae3f93 100644 --- a/packages/skin-database/analyser.ts +++ b/packages/skin-database/analyser.ts @@ -4,33 +4,30 @@ import { knex } from "./db"; import JSZip from "jszip"; import { SkinType } from "./types"; import * as Skins from "./data/skins"; -import fetch from "node-fetch"; +import SkinModel from "./data/SkinModel"; -export async function setReadmeForSkin(skinMd5: string): Promise { - const url = Skins.getSkinUrl(skinMd5); - const response = await fetch(url); - if (!response.ok) { - console.error(`Could not fetch skin at "${url}"`); - return; - } - const body = await response.buffer(); +export async function setReadmeForSkin(skin: SkinModel): Promise { let text: string | null; try { - text = await getReadme(body); + text = await getReadme(await skin.getZip()); } catch (e) { console.error(e.message); return; } - await knex("skins").where({ md5: skinMd5 }).update({ readme_text: text }); - await Skins.updateSearchIndex(skinMd5); + + if (skin.getReadme() !== text) { + await knex("skins") + .where({ md5: skin.getMd5() }) + .update({ readme_text: text }); + await Skins.updateSearchIndex(skin.getMd5()); + } } const IS_README = /(file_id\.diz)|(\.txt)$/i; // Skinning Updates.txt ? const IS_NOT_README = /(genex\.txt)|(genexinfo\.txt)|(gen_gslyrics\.txt)|(region\.txt)|(pledit\.txt)|(viscolor\.txt)|(winampmb\.txt)|("gen_ex help\.txt)|(mbinner\.txt)$/i; -export async function getReadme(buffer: Buffer): Promise { - const zip = await JSZip.loadAsync(buffer); +export async function getReadme(zip: JSZip): Promise { const readmeFiles = zip.filter((filePath) => { return IS_README.test(filePath) && !IS_NOT_README.test(filePath); }); @@ -45,8 +42,7 @@ export async function getReadme(buffer: Buffer): Promise { return readme.async("text"); } -export async function getSkinType(buffer: Buffer): Promise { - const zip = await JSZip.loadAsync(buffer); +export async function getSkinType(zip: JSZip): Promise { if (zip.file(/main\.bmp$/i).length > 0) { return "CLASSIC"; } diff --git a/packages/skin-database/api/__tests__/__snapshots__/router.test.ts.snap b/packages/skin-database/api/__tests__/__snapshots__/router.test.ts.snap new file mode 100644 index 00000000..9c217c67 --- /dev/null +++ b/packages/skin-database/api/__tests__/__snapshots__/router.test.ts.snap @@ -0,0 +1,45 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`/skins/a_fake_md5/debug 1`] = ` +Object { + "archiveFiles": Array [ + Object { + "row": Object { + "file_date": 819188640000, + "file_md5": "a_fake_file_md5", + "file_name": null, + "id": 1, + "skin_md5": "a_fake_md5", + }, + }, + ], + "files": Array [ + Object { + "row": Object { + "file_path": "/a/fake/path.wsz", + "id": 50, + "skin_md5": "a_fake_md5", + "source_attribution": null, + }, + }, + ], + "iaItem": Object { + "row": Object { + "id": 8, + "identifier": "a_fake_ia_identifier", + "skin_md5": "a_fake_md5", + }, + }, + "reviews": Array [], + "row": Object { + "content_hash": null, + "emails": "", + "id": 50, + "md5": "a_fake_md5", + "readme_text": null, + "skin_type": 1, + }, + "tweets": Array [], + "uploadStatuses": Array [], +} +`; diff --git a/packages/skin-database/api/__tests__/router.test.ts b/packages/skin-database/api/__tests__/router.test.ts index 0a4c2a9e..87caa513 100644 --- a/packages/skin-database/api/__tests__/router.test.ts +++ b/packages/skin-database/api/__tests__/router.test.ts @@ -142,6 +142,13 @@ describe("/skins/", () => { }); }); +test("/skins/a_fake_md5/debug", async () => { + const { body } = await request(app) + .get("/skins/a_fake_md5/debug") + .expect(200); + expect(body).toMatchSnapshot(); +}); + test("/skins/a_fake_md5/report", async () => { const { body } = await request(app) .post("/skins/a_fake_md5/report") diff --git a/packages/skin-database/api/router.ts b/packages/skin-database/api/router.ts index aa54a296..c15b72b1 100644 --- a/packages/skin-database/api/router.ts +++ b/packages/skin-database/api/router.ts @@ -139,6 +139,20 @@ router.get( }) ); +router.get( + "/skins/:md5/debug", + asyncHandler(async (req, res) => { + const { md5 } = req.params; + const skin = await SkinModel.fromMd5(req.ctx, md5); + if (skin == null) { + req.log(`Details for hash "${md5}" NOT FOUND`); + res.status(404).json(); + return; + } + res.json(await skin.debug()); + }) +); + function requireAuthed(req, res, next) { if (!req.ctx.authed()) { res.status(403); diff --git a/packages/skin-database/cli.ts b/packages/skin-database/cli.ts index a683d396..23c44742 100755 --- a/packages/skin-database/cli.ts +++ b/packages/skin-database/cli.ts @@ -8,23 +8,25 @@ import * as Skins from "./data/skins"; import Discord from "discord.js"; import { tweet } from "./tasks/tweet"; import { addSkinFromBuffer } from "./addSkin"; -import * as SkinHash from "./skinHash"; -import * as Analyser from "./analyser"; import { searchIndex } from "./algolia"; import { scrapeLikeData } from "./tasks/scrapeLikes"; -import { screenshot } from "./tasks/screenshotSkin"; -import Shooter from "./shooter"; import UserContext from "./data/UserContext"; import { integrityCheck } from "./tasks/integrityCheck"; import { ensureWebampLinks, syncWithArchive } from "./tasks/syncWithArchive"; import { syncFromArchive } from "./tasks/syncFromArchive"; +import { getSkinsToRefresh, refreshSkins } from "./tasks/refresh"; import { processUserUploads } from "./api/processUserUploads"; import DiscordEventHandler from "./api/DiscordEventHandler"; +import SkinModel from "./data/SkinModel"; +import { chunk } from "./utils"; +import rl from "readline"; async function main() { const client = new Discord.Client(); // The Winston transport logs in the client. await DiscordWinstonTransport.addToLogger(client, logger); + const ctx = new UserContext("CLI"); + const handler = new DiscordEventHandler(); try { switch (argv._[0]) { @@ -48,66 +50,69 @@ async function main() { await Skins.reject(new UserContext("CLI"), md5); break; } - case "screenshots": { - const stdinBuffer = fs.readFileSync(0); // STDIN_FILENO = 0 - let md5s = stdinBuffer.toString().trim().split("\n"); - if (md5s.length === 0) { - md5s = await Skins.getSkinsToShoot(1000); - } - await Shooter.withShooter(async (shooter: Shooter) => { - for (const md5 of md5s) { - await screenshot(md5, shooter); + case "refresh": { + const md5 = argv._[1]; + if (md5 != null) { + const skin = await SkinModel.fromMd5(ctx, md5); + if (skin == null) { + throw new Error(`Could not find skin ${md5}`); } - }); - break; - } - case "readme": { - const rows = await knex.raw( - 'SELECT md5 FROM files LEFT JOIN skins on skins.md5 = files.skin_md5 WHERE source_attribution = "Web API" AND readme_text IS NULL;' - ); + refreshSkins([skin]); + } else { + const toRefresh = await getSkinsToRefresh(ctx, 100); - const hashes = rows.map(({ md5 }) => md5); - for (const hash of hashes) { - console.log(`Setting readme for ${hash}`); - await Analyser.setReadmeForSkin(hash); + const chunks = chunk(toRefresh, toRefresh.length / 3); + + await Promise.all(chunks.map(refreshSkins)); } break; } - case "content-hash": { - const rows = await knex("skins") - .leftJoin("archive_files", "archive_files.skin_md5", "=", "skins.md5") - .whereNot("skin_md5", null) - .where("content_hash", null) - // This is just because our URL schema sucks - .where("skin_type", Skins.SKIN_TYPE.CLASSIC) - .groupBy("md5") - .select("md5"); + case "nested": { + const nested = await knex("archive_files") + .select("archive_files.skin_md5", "file_name") + .leftJoin("skins", "skins.md5", "=", "file_md5") + .where(function () { + //this.where("file_name", "like", "%.wsz"); + this.orWhere("file_name", "like", "%.zip"); + }) + .where("skins.md5", "IS", null); - console.log(`Found ${rows.length} rows`); - - for (const { md5 } of rows) { - const hash = await Skins.setContentHash(md5); - console.log(hash); + for (const row of nested) { + const url = `https://zip-worker.jordan1320.workers.dev/zip/${ + row.skin_md5 + }/${encodeURI(row.file_name)}`; + console.log(url); } - break; - } - case "hash": { - const rows = await knex("skins") - .leftJoin("archive_files", "archive_files.skin_md5", "=", "skins.md5") - .where("skin_md5", null) - // This is just because our URL schema sucks - .where("skin_type", Skins.SKIN_TYPE.CLASSIC) - .select("md5"); - - for (const { md5 } of rows) { - console.log(md5); - try { - await SkinHash.setHashesForSkin(md5); - await Skins.setContentHash(md5); - } catch (e) { - console.error(e); + /* + const query = `SELECT skin_md5, error + FROM refreshes + WHERE + error LIKE "Not a skin%";`; + const rows = await knex.raw(query); + for (const row of rows) { + const files = await knex("archive_files") + .where("skin_md5", row.skin_md5) + .select(); + console.log("Download:", Skins.getSkinUrl(row.skin_md5)); + // const url = `; + console.table( + files.map((f) => ({ + file_name: f.file_name, + url: `https://zip-worker.jordan1320.workers.dev/zip/${ + row.skin_md5 + }/${encodeURI(f.file_name)}`, + })), + ["file_name", "url"] + ); + const answer = await ask("skip (s), delete (d)"); + switch (answer) { + case "s": + break; + case "d": + await Skins.deleteSkin(row.skin_md5); } } + */ break; } case "tweet": { @@ -179,7 +184,21 @@ async function main() { knex.destroy(); logger.close(); client.destroy(); + await handler.dispose(); } } +function ask(question): Promise { + return new Promise((resolve) => { + const r = rl.createInterface({ + input: process.stdin, + output: process.stdout, + }); + r.question(question + "\n", function (answer) { + r.close(); + resolve(answer); + }); + }); +} + main(); diff --git a/packages/skin-database/data/SkinModel.ts b/packages/skin-database/data/SkinModel.ts index 1d6e5c7b..1734958b 100644 --- a/packages/skin-database/data/SkinModel.ts +++ b/packages/skin-database/data/SkinModel.ts @@ -1,5 +1,11 @@ import { getScreenshotUrl, getSkinUrl } from "./skins"; -import { TweetStatus, SkinRow, ReviewRow, UploadStatus } from "../types"; +import { + TweetStatus, + SkinRow, + ReviewRow, + UploadStatus, + SkinType, +} from "../types"; import UserContext, { ctxWeakMapMemoize } from "./UserContext"; import TweetModel, { TweetDebugData } from "./TweetModel"; import IaItemModel, { IaItemDebugData } from "./IaItemModel"; @@ -10,6 +16,8 @@ import { knex } from "../db"; import UploadModel, { UploadDebugData } from "./UploadModel"; import ArchiveFileModel, { ArchiveFileDebugData } from "./ArchiveFileModel"; import * as Skins from "./skins"; +import fetch from "node-fetch"; +import JSZip from "jszip"; export default class SkinModel { constructor(readonly ctx: UserContext, readonly row: SkinRow) {} @@ -50,6 +58,16 @@ export default class SkinModel { return row != null; } + getSkinType(): SkinType { + switch (this.row.skin_type) { + case 1: + return "CLASSIC"; + case 2: + return "MODERN"; + } + throw new Error(`Unknown skin_type ${this.row.skin_type}`); + } + async tweeted(): Promise { return (await this.getTweet()) != null; } @@ -147,6 +165,23 @@ export default class SkinModel { return getSkinUrl(this.row.md5); } + getBuffer = mem( + async (): Promise => { + const response = await fetch(this.getSkinUrl()); + if (!response.ok) { + throw new Error(`Could not fetch skin at "${this.getSkinUrl()}"`); + } + return response.buffer(); + } + ); + + getZip = mem( + async (): Promise => { + const buffer = await this.getBuffer(); + return JSZip.loadAsync(buffer); + } + ); + async debug(): Promise<{ row: SkinRow; reviews: ReviewRow[]; @@ -192,3 +227,13 @@ const getReviewsLoader = ctxWeakMapMemoize>( return md5s.map((md5) => rows.filter((x) => x.skin_md5 === md5)); }) ); + +function mem(fn: () => T): () => T { + let cached: T | null = null; + return () => { + if (cached == null) { + cached = fn(); + } + return cached; + }; +} diff --git a/packages/skin-database/data/__tests__/skins.test.ts b/packages/skin-database/data/__tests__/skins.test.ts index 154e6d33..ad17d4fd 100644 --- a/packages/skin-database/data/__tests__/skins.test.ts +++ b/packages/skin-database/data/__tests__/skins.test.ts @@ -140,6 +140,17 @@ describe("seeded", () => { ] `); }); + test("getMuseumPage does not include skins with errors", async () => { + await knex("refreshes").insert({ + skin_md5: "48bbdbbeb03d347e59b1eebda4d352d0", + error: "Whoops", + }); + const page = await Skins.getMuseumPage({ offset: 0, first: 10 }); + const hasZelda = page.some( + (skin) => skin.md5 === "48bbdbbeb03d347e59b1eebda4d352d0" + ); + expect(hasZelda).toBe(false); + }); test("getStats", async () => { expect(await Skins.getStats()).toMatchInlineSnapshot(` Object { @@ -160,6 +171,13 @@ describe("seeded", () => { md5: "an_approved_md5", }); }); + test("getSkinToTweet does not include skins with errors", async () => { + await knex("refreshes").insert({ + skin_md5: "an_approved_md5", + error: "Whoops", + }); + expect(await Skins.getSkinToTweet()).toBe(null); + }); test("getSkinToReview", async () => { expect(Skins.getSkinToReview()).resolves.toEqual({ filename: expect.any(String), diff --git a/packages/skin-database/data/skins.ts b/packages/skin-database/data/skins.ts index b785a6da..ba570f28 100644 --- a/packages/skin-database/data/skins.ts +++ b/packages/skin-database/data/skins.ts @@ -196,6 +196,8 @@ export async function deleteSkin(md5: string): Promise { console.log(`Deleting skin ${md5}...`); console.log(`... sqlite "skins"`); await knex("skins").where({ md5 }).limit(1).delete(); + console.log(`... sqlite "refreshes"`); + await knex("refreshes").where({ skin_md5: md5 }).delete(); console.log(`... sqlite "files"`); await knex("files").where({ skin_md5: md5 }).delete(); console.log(`... sqlite "skin_reviews"`); @@ -363,10 +365,12 @@ export async function getSkinToTweet(): Promise<{ .leftJoin("skin_reviews", "skin_reviews.skin_md5", "=", "skins.md5") .leftJoin("tweets", "tweets.skin_md5", "=", "skins.md5") .leftJoin("files", "files.skin_md5", "=", "skins.md5") + .leftJoin("refreshes", "refreshes.skin_md5", "=", "skins.md5") .where({ "tweets.id": null, skin_type: 1, "skin_reviews.review": "APPROVED", + "refreshes.error": null, }) .groupBy("skins.md5") .select(["skins.md5", "files.file_path"]) @@ -501,30 +505,32 @@ export async function getMuseumPage({ const skins = await knex.raw( ` SELECT skins.md5, - skin_reviews.review = 'NSFW' AS nsfw, - skin_reviews.review = 'APPROVED' AS approved, - skin_reviews.review = 'REJECTED' AS rejected, - (IFNULL(tweets.likes, 0) + (IFNULL(tweets.retweets,0) * 1.5)) AS tweet_score, + skin_reviews.review = 'NSFW' AS nsfw, + skin_reviews.review = 'APPROVED' AS approved, + skin_reviews.review = 'REJECTED' AS rejected, + (IFNULL(tweets.likes, 0) + (IFNULL(tweets.retweets,0) * 1.5)) AS tweet_score, files.file_path, CASE skins.md5 - WHEN "5e4f10275dcb1fb211d4a8b4f1bda236" THEN 0 -- Base - WHEN "cd251187a5e6ff54ce938d26f1f2de02" THEN 1 -- Winamp3 Classified - WHEN "b0fb83cc20af3abe264291bb17fb2a13" THEN 2 -- Winamp5 Classified - WHEN "d6010aa35bed659bc1311820daa4b341" THEN 3 -- Bento Classified - ELSE 1000 - END priority -FROM skins - LEFT JOIN tweets ON tweets.skin_md5 = skins.md5 - LEFT JOIN skin_reviews ON skin_reviews.skin_md5 = skins.md5 + WHEN "5e4f10275dcb1fb211d4a8b4f1bda236" THEN 0 -- Base + WHEN "cd251187a5e6ff54ce938d26f1f2de02" THEN 1 -- Winamp3 Classified + WHEN "b0fb83cc20af3abe264291bb17fb2a13" THEN 2 -- Winamp5 Classified + WHEN "d6010aa35bed659bc1311820daa4b341" THEN 3 -- Bento Classified + ELSE 1000 + END + priority +FROM skins + LEFT JOIN tweets ON tweets.skin_md5 = skins.md5 + LEFT JOIN skin_reviews ON skin_reviews.skin_md5 = skins.md5 LEFT JOIN files ON files.skin_md5 = skins.md5 -WHERE skin_type = 1 + LEFT JOIN refreshes ON refreshes.skin_md5 = skins.md5 +WHERE skin_type = 1 AND refreshes.error IS NULL GROUP BY skins.md5 -ORDER BY - priority ASC, - tweet_score DESC, - nsfw ASC, - approved DESC, - rejected ASC +ORDER BY + priority ASC, + tweet_score DESC, + nsfw ASC, + approved DESC, + rejected ASC LIMIT ? offset ?`, [first, offset] ); diff --git a/packages/skin-database/migrations/20210118140710_support_refresh.ts b/packages/skin-database/migrations/20210118140710_support_refresh.ts new file mode 100644 index 00000000..b357b5a3 --- /dev/null +++ b/packages/skin-database/migrations/20210118140710_support_refresh.ts @@ -0,0 +1,16 @@ +import * as Knex from "knex"; + +export async function up(knex: Knex): Promise { + await knex.schema.createTable("refreshes", function (table) { + table.increments(); + table.string("skin_md5").notNullable(); + table.string("error"); + table.timestamp("timestamp").defaultTo(knex.fn.now()); + table.foreign("skin_md5").references("skins.skin_md5"); + table.index("skin_md5", "idx_refreshes_skin_md5"); + }); +} + +export async function down(knex: Knex): Promise { + await knex.schema.dropTable("refreshes"); +} diff --git a/packages/skin-database/s3.js b/packages/skin-database/s3.js index fddf1d58..0ee8c3bb 100644 --- a/packages/skin-database/s3.js +++ b/packages/skin-database/s3.js @@ -23,7 +23,6 @@ function putSkin(md5, buffer, ext = "wsz") { rejectPromise(err); return; } - console.log(`Uploaded skin to ${bucketName} ${key}`); resolve(); } ); @@ -122,7 +121,6 @@ function putScreenshot(md5, buffer) { rejectPromise(err); return; } - console.log(`Uploaded screenshot to ${bucketName} ${key}`); resolve(); } ); diff --git a/packages/skin-database/seeds/test_data.ts b/packages/skin-database/seeds/test_data.ts index c49af7cd..6da1bd6a 100644 --- a/packages/skin-database/seeds/test_data.ts +++ b/packages/skin-database/seeds/test_data.ts @@ -8,6 +8,7 @@ export async function seed(knex: Knex): Promise { await knex("ia_items").del(); await knex("tweets").del(); await knex("archive_files").del(); + await knex("refreshes").del(); // Inserts seed entries await knex("skins").insert([ { md5: "a_fake_md5", skin_type: 1, emails: "" }, diff --git a/packages/skin-database/shooter.js b/packages/skin-database/shooter.js index 0f132ab4..54db0104 100644 --- a/packages/skin-database/shooter.js +++ b/packages/skin-database/shooter.js @@ -12,13 +12,14 @@ function min(imgPath) { } export default class Shooter { - constructor(url) { + constructor(url, logger) { this._initialized = false; this._url = url; + this._log = logger ?? ((str) => console.log(str)); } - static async withShooter(cb) { - const shooter = new Shooter("https://webamp.org"); + static async withShooter(cb, logger) { + const shooter = new Shooter("https://webamp.org", logger); try { return await cb(shooter); } finally { @@ -38,10 +39,10 @@ export default class Shooter { return; } - console.log("page log:", consoleMessage.text()); + this._log("page log:", consoleMessage.text()); }); this._page.on("error", (e) => { - console.log(`Page error: ${e.toString()}`); + this._log(`Page error: ${e.toString()}`); }); const url = `${this._url}/?screenshot=1`; @@ -79,7 +80,7 @@ export default class Shooter { try { const handle = await this._page.$("#webamp-file-input"); - console.log("Goinng to try to screenshot"); + this._log("Goinng to try to screenshot"); // eslint-disable-next-line no-async-promise-executor await new Promise(async (resolve, reject) => { try { @@ -105,7 +106,7 @@ export default class Shooter { } }); - console.log("Wrote screenshot to", screenshotPath); + this._log("Wrote screenshot to", screenshotPath); if (minify) { min(screenshotPath); } diff --git a/packages/skin-database/skinHash.ts b/packages/skin-database/skinHash.ts index f7f39f10..1e41dace 100644 --- a/packages/skin-database/skinHash.ts +++ b/packages/skin-database/skinHash.ts @@ -1,8 +1,7 @@ import JSZip from "jszip"; import md5 from "md5"; import { knex } from "./db"; -import * as Skins from "./data/skins"; -import fetch from "node-fetch"; +import SkinModel from "./data/SkinModel"; type FileData = { fileName: string; @@ -10,32 +9,38 @@ type FileData = { date: Date; }; -async function getFileData(file: JSZip.JSZipObject): Promise { - const blob = await file.async("nodebuffer"); - return { fileName: file.name, md5: md5(blob), date: file.date }; -} - -export async function getSkinFileData(skinData: Buffer): Promise { - const zip = await JSZip.loadAsync(skinData); - return Promise.all(Object.values(zip.files).map(getFileData)); -} - -export async function setHashesForSkin(skinMd5: string): Promise { - const url = Skins.getSkinUrl(skinMd5); - const response = await fetch(url); - if (!response.ok) { - throw new Error(`Could not fetch skin at "${url}"`); +async function getFileData(file: JSZip.JSZipObject): Promise { + try { + const blob = await file.async("nodebuffer"); + return { fileName: file.name, md5: md5(blob), date: file.date }; + } catch (e) { + // TODO: We could flag these. + return null; } - const body = await response.buffer(); - const hashes = await getSkinFileData(body); - const rows = hashes.map(({ fileName, md5, date }) => { +} + +export async function getSkinFileData( + skin: SkinModel +): Promise<(FileData | null)[]> { + const zip = await skin.getZip(); + return await Promise.all(Object.values(zip.files).map(getFileData)); +} + +// https://stackoverflow.com/a/46700791/1263117 +function notEmpty(value: TValue | null | undefined): value is TValue { + return value !== null && value !== undefined; +} + +export async function setHashesForSkin(skin: SkinModel): Promise { + const hashes = await getSkinFileData(skin); + const rows = hashes.filter(notEmpty).map(({ fileName, md5, date }) => { return { - skin_md5: skinMd5, + skin_md5: skin.getMd5(), file_name: fileName, file_md5: md5, file_date: date, }; }); - await knex("archive_files").where("skin_md5", skinMd5).delete(); + await knex("archive_files").where("skin_md5", skin.getMd5()).delete(); await knex("archive_files").insert(rows); } diff --git a/packages/skin-database/tasks/__tests__/__snapshots__/refresh.test.ts.snap b/packages/skin-database/tasks/__tests__/__snapshots__/refresh.test.ts.snap new file mode 100644 index 00000000..082a79a7 --- /dev/null +++ b/packages/skin-database/tasks/__tests__/__snapshots__/refresh.test.ts.snap @@ -0,0 +1,203 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`valid skin (TopazAmp) 1`] = ` +Array [ + Object { + "row": Object { + "file_date": 932523808000, + "file_md5": "fbee37aa8e23945860c1e58cd6b8a80c", + "file_name": "Balance.bmp", + "id": 7, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246040000, + "file_md5": "c6005d7d77f3ff78b57749f569582161", + "file_name": "Cbuttons.bmp", + "id": 12, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 933074154000, + "file_md5": "fdb3aee2834b78d87f760962d969779b", + "file_name": "Eqmain.bmp", + "id": 3, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 933073556000, + "file_md5": "c0d761d8f3363b13f99b4e691787a8eb", + "file_name": "Main.bmp", + "id": 5, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246058000, + "file_md5": "96040c73c8f4a0a8bda59be8f5cf0bef", + "file_name": "Monoster.bmp", + "id": 9, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246052000, + "file_md5": "0ad5b85bb51123c70e13b227eb689d3c", + "file_name": "Playpaus.bmp", + "id": 11, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932830402000, + "file_md5": "44faaa170461ca54203124892d697654", + "file_name": "Pledit.bmp", + "id": 6, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246036000, + "file_md5": "d7e938bdec0aa7490977450d7162446e", + "file_name": "Posbar.bmp", + "id": 13, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246026000, + "file_md5": "9d12fcaf2bad4fdf9f75c00554a051c2", + "file_name": "Shufrep.bmp", + "id": 15, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246032000, + "file_md5": "7a08e36a3f7607dff03fb54b9cc95621", + "file_name": "Text.bmp", + "id": 14, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 933074338000, + "file_md5": "2d58324dece68707e973ee5f4f072f7a", + "file_name": "Titlebar.bmp", + "id": 2, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 933073820000, + "file_md5": "890df1888deb6e37ad34077d6e433a1e", + "file_name": "eq_ex.bmp", + "id": 4, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246008000, + "file_md5": "c3f72ef137c58ae52e825e41a2129ca8", + "file_name": "mb.bmp", + "id": 16, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246060000, + "file_md5": "c9ef2c8a717f354c6dccf839de4237d2", + "file_name": "numbers.bmp", + "id": 8, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246054000, + "file_md5": "654653f2114144a2c079f4edfd58b5e1", + "file_name": "nums_ex.bmp", + "id": 10, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932230724000, + "file_md5": "c378b0acc505489f07adeabe9e6f3cff", + "file_name": "pledit.txt", + "id": 18, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 933074876000, + "file_md5": "baf5a61397990b5fbf5cafeadf5e5acb", + "file_name": "readme.txt", + "id": 1, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932230356000, + "file_md5": "458b893975e52f6cb35d9d7b8d9b2bb3", + "file_name": "viscolor.txt", + "id": 19, + "skin_md5": "a_fake_md5", + }, + }, + Object { + "row": Object { + "file_date": 932246004000, + "file_md5": "dc770d401d56e758ba64b57dcf999186", + "file_name": "volume.bmp", + "id": 17, + "skin_md5": "a_fake_md5", + }, + }, +] +`; + +exports[`valid skin (TopazAmp) 2`] = ` +"--[ TopazAmp v1.2 ]-- + + +about this skin +========================== + +(for v2.x of Winamp) +This is my first attempt at a Winamp skin, based on the design my upcoming web site. + + -- some bugs from v1.0 fixed + -- thanks to Wolf [http://surf.to/guf, hhc977@edu.ghs.dk] for fixing some v1.1 bugs + +--[ distribute freely, but please don't modify the images. thanks ]-- + +========================== + +[ (c) 1999 Kelly McLarnon ] +[ website: http://topazdesigns.com ] +[ (personal: http://topazdesigns.com/~klm) ] +[ email: klm@topazdesigns.com ] + + +" +`; diff --git a/packages/skin-database/tasks/__tests__/refresh.test.ts b/packages/skin-database/tasks/__tests__/refresh.test.ts new file mode 100644 index 00000000..6dd4961d --- /dev/null +++ b/packages/skin-database/tasks/__tests__/refresh.test.ts @@ -0,0 +1,94 @@ +import UserContext from "../../data/UserContext"; +import { knex } from "../../db"; +import { getSkinsToRefresh, refresh } from "../refresh"; +import SkinModel from "../../data/SkinModel"; +import fs from "fs"; +import Shooter from "../../shooter"; + +jest.mock("../../algolia"); +jest.mock("../../s3"); +jest.mock("../screenshotSkin"); +jest.mock("node-fetch", () => jest.fn()); + +// @ts-ignore +const shooter: Shooter = {}; + +beforeEach(async () => { + await knex.migrate.latest(); + await knex.seed.run(); +}); + +test("refresh", async () => { + const ctx = new UserContext(); + const [skin] = await getSkinsToRefresh(ctx, 1); + expect(skin.getMd5()).toEqual("a_fake_md5"); + + skin.getBuffer = async () => Buffer.from(""); + + await refresh(skin, shooter); + + const [nextToRefresh] = await getSkinsToRefresh(ctx, 1); + expect(nextToRefresh.getMd5()).not.toEqual("a_fake_md5"); +}); + +test("can't extract", async () => { + const ctx = new UserContext(); + const skin = await SkinModel.fromMd5(ctx, "a_fake_md5"); + if (skin == null) { + throw new Error("Could not find skin"); + } + skin.getBuffer = async () => Buffer.from(""); + + await refresh(skin, shooter); + const refreshRow = await knex("refreshes") + .where("skin_md5", skin.getMd5()) + .first(); + expect(refreshRow).toEqual({ + error: + "End of data reached (data length = 0, asked index = 4). Corrupted zip ?", + skin_md5: "a_fake_md5", + id: expect.any(Number), + timestamp: expect.stringMatching(/^[0-9]{4}-/), + }); +}); + +test("valid skin (TopazAmp)", async () => { + const ctx = new UserContext(); + const skin = await SkinModel.fromMd5(ctx, "a_fake_md5"); + if (skin == null) { + throw new Error("Could not find skin"); + } + + skin.getBuffer = async () => { + return fs.readFileSync( + "/home/captbaritone/projects/webamp/packages/webamp/demo/skins/TopazAmp1-2.wsz" + ); + }; + + await refresh(skin, shooter); + + // Check that it set the archive files + const archiveFiles = await skin.getArchiveFiles(); + + // Check archive file + expect( + await Promise.all(archiveFiles.map((file) => file.debug())) + ).toMatchSnapshot(); + + // Check readme + const skinRow = await knex("skins").where("md5", skin.getMd5()).first(); + expect(skinRow.readme_text).toMatchSnapshot(); + expect(skinRow.skin_type).toBe(1); + expect(skinRow.content_hash).toBe("512e79c5de299a6a13ee42e1bad9ac12"); + + // Check Refresh + const refreshRow = await knex("refreshes") + .where("skin_md5", skin.getMd5()) + .first(); + expect(refreshRow).toEqual({ + error: null, + skin_md5: "a_fake_md5", + id: expect.any(Number), + timestamp: expect.stringMatching(/^[0-9]{4}-/), + }); +}); diff --git a/packages/skin-database/tasks/refresh.ts b/packages/skin-database/tasks/refresh.ts new file mode 100644 index 00000000..9f7670e6 --- /dev/null +++ b/packages/skin-database/tasks/refresh.ts @@ -0,0 +1,117 @@ +import UserContext from "../data/UserContext"; +import SkinModel from "../data/SkinModel"; +import { knex } from "../db"; +import { setHashesForSkin } from "../skinHash"; +import * as Analyser from "../analyser"; +import Shooter from "../shooter"; +import { screenshot } from "./screenshotSkin"; +import * as Skins from "../data/skins"; + +// TODO Move this into the function so that we clean up on each run? + +async function getExtractionError(skin: SkinModel): Promise { + try { + await skin.getZip(); + } catch (e) { + return e.message; + } + + return null; +} + +// A task to download a skin and refresh all the data we scrape from it + +export async function getSkinsToRefresh( + ctx: UserContext, + n: number, + neverDone = false +): Promise { + let query = knex("skins") + .leftJoin("refreshes", "skins.md5", "refreshes.skin_md5") + .where("skin_type", 1) + .orderBy("refreshes.timestamp", "asc") + .limit(n); + + if (neverDone) { + query = query.where("refreshes.skin_md5", null); + } + const skins = await query.select(); + + return skins.map((row) => new SkinModel(ctx, row)); +} + +export async function refreshSkins(skins: SkinModel[]): Promise { + const shooterLogger = () => { + // Don't log + }; + await Shooter.withShooter(async (shooter: Shooter) => { + for (const [i, skin] of skins.entries()) { + console.log(`${i + 1}/${skins.length}: ${skin.getMd5()}`); + await refresh(skin, shooter); + console.log(`COMPLETE: ${i + 1}/${skins.length}: ${skin.getMd5()}`); + // We end up caching a lot of stuff (the whole skin/zip) on the model, so we can't just leave these around for the whole process. + delete skins[i]; + } + }, shooterLogger); +} + +export async function _refresh( + skin: SkinModel, + shooter: Shooter +): Promise { + const extractionError = await getExtractionError(skin); + if (extractionError != null) { + throw new Error(extractionError); + } + await setHashesForSkin(skin); + await Skins.setContentHash(skin.getMd5()); + + await Analyser.setReadmeForSkin(skin); + + let skinType; + try { + skinType = await Analyser.getSkinType(await skin.getZip()); + } catch (e) { + throw new Error("Not a skin (no main.bmp/skin.xml)"); + } + if (skinType !== "CLASSIC") { + // TODO: + // Reviews are invalid + // Delete from algolia + // Delete screenshot + // Tweets are invalid + throw new Error("Skin type mismatch"); + } + + // Retake screenshot + await screenshot(skin, shooter); + + await knex("refreshes").insert({ + skin_md5: skin.getMd5(), + }); +} + +export async function refresh( + skin: SkinModel, + shooter: Shooter +): Promise { + if (skin.getSkinType() !== "CLASSIC") { + console.log("Not classic"); + throw new Error("Can't refresh non-classic skins"); + } + try { + await _refresh(skin, shooter); + console.log("Done!"); + } catch (e) { + console.log("Caught error!!"); + await knex("refreshes").insert({ + skin_md5: skin.getMd5(), + error: e.message, + }); + return; + } + console.log("Insertting"); + await knex("refreshes").insert({ + skin_md5: skin.getMd5(), + }); +} diff --git a/packages/skin-database/tasks/screenshotSkin.ts b/packages/skin-database/tasks/screenshotSkin.ts index 256b8fb3..acac0177 100644 --- a/packages/skin-database/tasks/screenshotSkin.ts +++ b/packages/skin-database/tasks/screenshotSkin.ts @@ -1,26 +1,27 @@ // eslint-disable-next-line -import _temp from "temp"; +import temp from "temp"; import fs from "fs"; -import fetch from "node-fetch"; import md5Buffer from "md5"; import * as S3 from "../s3"; import * as Skins from "../data/skins"; import * as CloudFlare from "../CloudFlare"; +import SkinModel from "../data/SkinModel"; const Shooter = require("../shooter"); -const temp = _temp.track(); -export async function screenshot(md5: string, shooter: typeof Shooter) { - const url = Skins.getSkinUrl(md5); - const response = await fetch(url); - if (!response.ok) { - await Skins.recordScreenshotUpdate(md5, `Failed to download from ${url}.`); - console.error(`Failed to download skin from "${url}".`); - return; +export async function screenshot(skin: SkinModel, shooter: typeof Shooter) { + let buffer: Buffer; + try { + buffer = await skin.getBuffer(); + } catch { + await Skins.recordScreenshotUpdate( + skin.getMd5(), + `Failed to get skin buffer.` + ); + throw new Error("Failed to get skin buffer"); } - const buffer = await response.buffer(); const actualMd5 = md5Buffer(buffer); - if (md5 !== actualMd5) { + if (skin.getMd5() !== actualMd5) { throw new Error("Downloaded skin had a different md5."); } @@ -29,16 +30,19 @@ export async function screenshot(md5: string, shooter: typeof Shooter) { fs.writeFileSync(tempFile, buffer); - console.log("Starting screenshot"); const success = await shooter.takeScreenshot(tempFile, tempScreenshotPath, { minify: true, - md5, + md5: skin.getMd5(), }); - if (success) { - console.log("Completed screenshot"); - await S3.putScreenshot(md5, fs.readFileSync(tempScreenshotPath)); - await CloudFlare.purgeFiles([Skins.getScreenshotUrl(actualMd5)]); - } else { - console.log(`Screenshot failed ${md5}`); + + if (!success) { + fs.unlinkSync(tempFile); + fs.unlinkSync(tempScreenshotPath); + temp.cleanupSync(); + throw new Error("Screenshot failed."); } + await S3.putScreenshot(skin.getMd5(), fs.readFileSync(tempScreenshotPath)); + await CloudFlare.purgeFiles([Skins.getScreenshotUrl(actualMd5)]); + fs.unlinkSync(tempFile); + fs.unlinkSync(tempScreenshotPath); } diff --git a/packages/skin-database/tasks/syncFromArchive.ts b/packages/skin-database/tasks/syncFromArchive.ts index bf52edfb..00cbbb56 100644 --- a/packages/skin-database/tasks/syncFromArchive.ts +++ b/packages/skin-database/tasks/syncFromArchive.ts @@ -4,17 +4,10 @@ import UserContext from "../data/UserContext"; import SkinModel from "../data/SkinModel"; import child_process from "child_process"; import * as Parallel from "async-parallel"; +import { chunk } from "../utils"; import util from "util"; const exec = util.promisify(child_process.exec); -function chunk(items: T[], chunkSize: number): T[][] { - const chunks: T[][] = []; - for (let i = 0; i < items.length; i += chunkSize) { - chunks.push(items.slice(i, i + chunkSize)); - } - return chunks; -} - function flatten(matrix: T[][]): T[] { const flat: T[] = []; matrix.forEach((arr) => { diff --git a/packages/skin-database/utils.ts b/packages/skin-database/utils.ts index b8d37b2b..721db239 100644 --- a/packages/skin-database/utils.ts +++ b/packages/skin-database/utils.ts @@ -11,5 +11,13 @@ export function truncate(str: string, len: number): string { return `${start} ########### ${end}`; } +export function chunk(items: T[], chunkSize: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < items.length; i += chunkSize) { + chunks.push(items.slice(i, i + chunkSize)); + } + return chunks; +} + export const MD5_REGEX = /([a-fA-F0-9]{32})/; export const TWEET_SNOWFLAKE_REGEX = /([0-9]{19})/;