diff --git a/packages/skin-database/addSkin.ts b/packages/skin-database/addSkin.ts index ce14bf19..a6f07def 100644 --- a/packages/skin-database/addSkin.ts +++ b/packages/skin-database/addSkin.ts @@ -117,7 +117,8 @@ async function addClassicSkinFromBuffer( await setHashesForSkin(skin); - await Skins.updateSearchIndex(ctx, md5); + // Disable while we figure out our quota + // await Skins.updateSearchIndex(ctx, md5); return { md5, status: "ADDED", skinType: "CLASSIC" }; } diff --git a/packages/skin-database/api/DiscordEventHandler.ts b/packages/skin-database/api/DiscordEventHandler.ts index 6a02df29..06368a53 100644 --- a/packages/skin-database/api/DiscordEventHandler.ts +++ b/packages/skin-database/api/DiscordEventHandler.ts @@ -105,7 +105,7 @@ export default class DiscordEventHandler { case "SYNCED_TO_ARCHIVE": { const dest = await this.getChannel(Config.SKIN_UPLOADS_CHANNEL_ID); - const message = `Synced skins to archive.org. Success: ${action.successes.toLocaleString()} Errors: ${action.errors.toLocaleString()}.`; + const message = `Synced skins to archive.org. Success: ${action.successes.toLocaleString()} Errors: ${action.errors.toLocaleString()} Skipped: ${action.skips.toLocaleString()}.`; await dest.send(message); break; @@ -172,7 +172,8 @@ export default class DiscordEventHandler { dest, }); } else { - await DiscordUtils.sendAlreadyReviewed({ md5, dest }); + // Too much nosie + // await DiscordUtils.sendAlreadyReviewed({ md5, dest }); } } } diff --git a/packages/skin-database/api/app.ts b/packages/skin-database/api/app.ts index 80c63a10..3e98b6c2 100644 --- a/packages/skin-database/api/app.ts +++ b/packages/skin-database/api/app.ts @@ -1,6 +1,6 @@ import graphql from "./graphql"; import cors, { CorsOptions } from "cors"; -import Sentry from "@sentry/node"; +import * as Sentry from "@sentry/node"; import expressSitemapXml from "express-sitemap-xml"; import * as Skins from "../data/skins"; import express, { RequestHandler, ErrorRequestHandler, Handler } from "express"; @@ -18,13 +18,13 @@ export type ApiAction = | { type: "CLASSIC_SKIN_UPLOADED"; md5: string } | { type: "MODERN_SKIN_UPLOADED"; md5: string } | { type: "SKIN_UPLOAD_ERROR"; uploadId: string; message: string } + | { type: "GOT_FEEDBACK"; message: string; email?: string; url?: string } | { - type: "GOT_FEEDBACK"; - message: string; - email?: string | null; - url?: string | null; + type: "SYNCED_TO_ARCHIVE"; + successes: number; + errors: number; + skips: number; } - | { type: "SYNCED_TO_ARCHIVE"; successes: number; errors: number } | { type: "STARTED_SYNC_TO_ARCHIVE"; count: number } | { type: "POPULAR_TWEET"; @@ -65,7 +65,9 @@ type Options = { export function createApp({ eventHandler, logger, extraMiddleware }: Options) { const app = express(); if (Sentry) { - app.use(Sentry.Handlers.requestHandler() as RequestHandler); + Sentry.init({ + dsn: "https://0e6bc841b4f744b2953a1fe5981effe6@o68382.ingest.us.sentry.io/5508241", + }); } // https://expressjs.com/en/guide/behind-proxies.html diff --git a/packages/skin-database/api/server.ts b/packages/skin-database/api/server.ts index a1f3100c..9a79eacd 100644 --- a/packages/skin-database/api/server.ts +++ b/packages/skin-database/api/server.ts @@ -1,4 +1,3 @@ -// import Sentry from "@sentry/node"; import dotenv from "dotenv"; dotenv.config(); @@ -25,14 +24,3 @@ app.listen(port, () => { console.log(`Explore: http://localhost:${port}/graphql`); }); -// Initialize Sentry after we start listening. Any crash at start time will appear in the console and we'll notice. -/* -Sentry.init({ - dsn: - "https://0e6bc841b4f744b2953a1fe5981effe6@o68382.ingest.sentry.io/5508241", - - // We recommend adjusting this value in production, or using tracesSampler - // for finer control - tracesSampleRate: 1.0, -}); -*/ diff --git a/packages/skin-database/cli.ts b/packages/skin-database/cli.ts index 9810fbb9..dd32d9ac 100755 --- a/packages/skin-database/cli.ts +++ b/packages/skin-database/cli.ts @@ -34,6 +34,8 @@ import * as config from "./config"; import { setHashesForSkin } from "./skinHash"; import * as S3 from "./s3"; import { generateDescription } from "./services/openAi"; +import KeyValue from "./data/KeyValue"; +import detectRepacks from "./tasks/detectRepacks"; async function withHandler( cb: (handler: DiscordEventHandler) => Promise @@ -110,6 +112,12 @@ program "Delete a skin from the database, including its S3 files " + "CloudFlare cache and seach index entries." ) + .option( + "--purge", + "Purge a skin from the database, including its S3 files " + + "CloudFlare cache and seach index entries. " + + "Also prevents it from being uploaded again." + ) .option( "--hide", "Hide a skin from the museum main page. Useful for removing aparent dupes." @@ -124,13 +132,25 @@ program "--refresh", "Retake the screenshot of a skin and update the database." ) + .option("--refresh-archive-files") .option("--reject", 'Give a skin a "rejected" review.') .option("--metadata", "Push metadata to the archive.") .option("--ai", "Use AI to generate a text description of the skin.") .action( async ( md5, - { delete: del, deleteLocal, index, refresh, reject, metadata, hide, ai } + { + delete: del, + deleteLocal, + index, + refresh, + reject, + metadata, + hide, + purge, + refreshArchiveFiles, + ai, + } ) => { const ctx = new UserContext("CLI"); if (ai) { @@ -141,6 +161,15 @@ program console.log(description); console.log("===================================="); } + if (purge) { + // cat purge | xargs -I {} yarn cli skin --purge {} + await Skins.deleteSkin(md5); + const purgedArr: string[] = (await KeyValue.get("purged")) || []; + const purged = new Set(purgedArr); + purged.add(md5); + + await KeyValue.set("purged", Array.from(purged)); + } if (del) { await Skins.deleteSkin(md5); } @@ -165,6 +194,13 @@ program await SyncToArchive.updateMetadata(skin); console.log("Updated Metadata"); } + if (refreshArchiveFiles) { + const skin = await SkinModel.fromMd5Assert(ctx, md5); + if (skin == null) { + throw new Error("Can't find skin"); + } + await setHashesForSkin(skin); + } } ); @@ -352,6 +388,12 @@ program "--compute-museum-order", "Compute the order in which skins should be displayed in the museum" ) + .option("--foo", "Learn about missing skins") + .option( + "--winampskinsinfo", + "Detect skins that were broken by winampskins.info injecting an ad file" + ) + .option("--test-cloudflare", "Try to upload to cloudflare") .action(async (arg) => { const { uploadIaScreenshot, @@ -361,7 +403,17 @@ program updateSearchIndex, configureR2Cors, computeMuseumOrder, + foo, + winampskinsinfo, + testCloudflare, } = arg; + if (testCloudflare) { + const buffer = new Buffer("testing", "utf8"); + await S3.putTemp("hello", buffer); + } + if (winampskinsinfo) { + await detectRepacks(); + } if (computeMuseumOrder) { await Skins.computeMuseumOrder(); console.log("Museum order updated."); @@ -406,6 +458,23 @@ program console.log("Did not upload screenshot"); } } + if (foo) { + const ctx = new UserContext(); + const missingModernSkins = await KeyValue.get("missingModernSkins"); + const missingModernSkinsSet = new Set(missingModernSkins); + const skins = {}; + for (const md5 of missingModernSkins) { + const skin = await SkinModel.fromMd5(ctx, md5); + if (skin == null) { + continue; + } + missingModernSkinsSet.delete(md5); + } + await KeyValue.set( + "missingModernSkins", + Array.from(missingModernSkinsSet) + ); + } if (refreshArchiveFiles) { const ctx = new UserContext(); const skinRows = await knex("skins") @@ -415,13 +484,21 @@ program .where((builder) => { return builder.where("file_info.file_md5", null); }) - .limit(1000) + .limit(2000) .groupBy("skins.md5") .select(); console.log(`Found ${skinRows.length} skins to update`); + const missingModernSkins = new Set( + await KeyValue.get("missingModernSkins") + ); const skins = skinRows.map((row) => new SkinModel(ctx, row)); for (const skin of skins) { console.log("Working on", skin.getMd5(), await skin.getFileName()); + if (missingModernSkins.has(skin.getMd5())) { + console.log("NOT skipping since this one is a missingModernSkin"); + // continue + } + try { await setHashesForSkin(skin); } catch (e) { diff --git a/packages/skin-database/data/ArchiveFileModel.ts b/packages/skin-database/data/ArchiveFileModel.ts index 1a75b722..831591f5 100644 --- a/packages/skin-database/data/ArchiveFileModel.ts +++ b/packages/skin-database/data/ArchiveFileModel.ts @@ -115,9 +115,26 @@ export default class ArchiveFileModel { * It may not work for all files. * @gqlField url */ - getUrl(): string { + async getUrl(): Promise { + if (this.getIsDirectory()) { + return null; + } + const ext = await this.skinExt(); const filename = encodeURIComponent(this.getFileName()); - return `https://zip-worker.jordan1320.workers.dev/zip/${this.getMd5()}/${filename}`; + return `https://zip-worker.jordan1320.workers.dev/zip/${this.getMd5()}.${ext}/${filename}`; + } + + async skinExt(): Promise { + const skin = await this.getSkin(); + const type = skin.getSkinType(); + switch (type) { + case "CLASSIC": + return "wsz"; + case "MODERN": + return "wal"; + default: + throw new Error(`Unexpected skin type: "${type}".`); + } } async getSkin(): Promise { diff --git a/packages/skin-database/data/KeyValue.ts b/packages/skin-database/data/KeyValue.ts index c53beef0..0645555c 100644 --- a/packages/skin-database/data/KeyValue.ts +++ b/packages/skin-database/data/KeyValue.ts @@ -1,7 +1,7 @@ import { knex } from "../db"; export default class KeyValue { - static async get(key: string): Promise { + static async get(key: string): Promise { const result = await knex("key_value").where({ key }).first("value"); if (result == null) { return null; diff --git a/packages/skin-database/data/SkinModel.ts b/packages/skin-database/data/SkinModel.ts index b76d500e..c728da0a 100644 --- a/packages/skin-database/data/SkinModel.ts +++ b/packages/skin-database/data/SkinModel.ts @@ -20,11 +20,12 @@ import JSZip from "jszip"; import fs from "fs/promises"; import path from "path"; import { getTransparentAreaSize } from "../transparency"; +import KeyValue from "./KeyValue"; export const IS_README = /(file_id\.diz)|(\.txt)$/i; // Skinning Updates.txt ? export const IS_NOT_README = - /(dialogs\.txt)|(genex\.txt)|(genexinfo\.txt)|(gen_gslyrics\.txt)|(region\.txt)|(pledit\.txt)|(viscolor\.txt)|(winampmb\.txt)|("gen_ex help\.txt)|(mbinner\.txt)$/i; + /(dialogs\.txt)|(genex\.txt)|(genexinfo\.txt)|(gen_gslyrics\.txt)|(region\.txt)|(pledit\.txt)|(viscolor\.txt)|(winampmb\.txt)|("gen_ex help\.txt)|(mbinner\.txt)|(winampskins\.info\.txt)|(albumlist\.txt)|(covertag\.txt)|(1001winampskins\.com\.txt)$/i; export default class SkinModel { constructor(readonly ctx: UserContext, readonly row: SkinRow) {} @@ -190,7 +191,12 @@ export default class SkinModel { const files = await this.getArchiveFiles(); const readme = files.find((file) => { const filename = file.getFileName(); - return IS_README.test(filename) && !IS_NOT_README.test(filename); + const isReadme = IS_README.test(filename); + const isNotReadme = IS_NOT_README.test(filename); + + console.log({ filename, isReadme, isNotReadme, md5: file.getFileMd5() }); + + return isReadme && !isNotReadme; }); if (readme == null) { @@ -242,7 +248,17 @@ export default class SkinModel { } else { const response = await fetch(this.getSkinUrl()); if (!response.ok) { - throw new Error(`Could not fetch skin at "${this.getSkinUrl()}"`); + const missingModernSkins = + (await KeyValue.get("missingModernSkins")) ?? []; + const missingModernSkinsSet = new Set(missingModernSkins); + missingModernSkinsSet.add(this.getMd5()); + await KeyValue.set( + "missingModernSkins", + Array.from(missingModernSkinsSet) + ); + throw new Error( + `Could not fetch skin at "${this.getSkinUrl()}" (Marked in missingModernSkins in the KeyValue store)` + ); } return response.buffer(); } diff --git a/packages/skin-database/discord-bot/utils.ts b/packages/skin-database/discord-bot/utils.ts index a15ee6c4..2aa090fb 100644 --- a/packages/skin-database/discord-bot/utils.ts +++ b/packages/skin-database/discord-bot/utils.ts @@ -44,7 +44,6 @@ export async function postSkin({ return; } const readmeText = await skin.getReadme(); - console.log("readmeText", readmeText); const tweet = await skin.getTweet(); const tweetStatus = await skin.getTweetStatus(); const iaItem = await skin.getIaItem(); diff --git a/packages/skin-database/tasks/DeleteSkin.ts b/packages/skin-database/tasks/DeleteSkin.ts new file mode 100644 index 00000000..c612693c --- /dev/null +++ b/packages/skin-database/tasks/DeleteSkin.ts @@ -0,0 +1,8 @@ +import Task from "./ITask"; +import * as Skins from "../data/skins"; + +export default class DeleteSkin extends Task { + async run(): Promise { + await Skins.deleteSkin(md5); + } +} diff --git a/packages/skin-database/tasks/ITask.ts b/packages/skin-database/tasks/ITask.ts new file mode 100644 index 00000000..5b9cb584 --- /dev/null +++ b/packages/skin-database/tasks/ITask.ts @@ -0,0 +1,11 @@ +import UserContext from "../data/UserContext"; + +export default abstract class Task { + ctx: UserContext; + constructor(ctx: UserContext) { + this.ctx = ctx; + } + name: string; + description: string; + abstract run(): Promise; +} diff --git a/packages/skin-database/tasks/detectRepacks.ts b/packages/skin-database/tasks/detectRepacks.ts new file mode 100644 index 00000000..e5eba16f --- /dev/null +++ b/packages/skin-database/tasks/detectRepacks.ts @@ -0,0 +1,168 @@ +import KeyValue from "../data/KeyValue"; +import SkinModel from "../data/SkinModel"; +import UserContext from "../data/UserContext"; +import { knex } from "../db"; + +export default async function detectRepacks() { + const ctx = new UserContext(); + const stored = await KeyValue.get<{ [key: string]: string[] }>( + "winampskins.info" + ); + if (stored == null) { + throw new Error("Expected kv"); + } + + // console.log(stored); + + let found = 0; + let corrupt = 0; + let identical = 0; + + for (const [key, orignalCandidates] of Object.entries(stored)) { + const repack = await SkinModel.fromMd5Assert(ctx, key); + + for (const originalHash of orignalCandidates) { + const original = await SkinModel.fromMd5Assert(ctx, originalHash); + if (await isRepackOf(repack, original)) { + console.log(`${key} is a repack of ${originalHash}`); + found++; + } else if (await areIdentical(repack, original)) { + console.log(`${key} is an identical skin to ${originalHash}`); + identical++; + } else if (await isCorruptRepack(repack, original)) { + console.log(`${key} is a CORRUPT repack of ${originalHash}`); + corrupt++; + } + } + } + + console.log( + `Found ${found} originals out of ${Object.keys(stored).length} repacks` + ); + console.log( + `Found ${identical} twins out of ${Object.keys(stored).length} repacks` + ); + console.log( + `Found ${corrupt} corrupt repacks out of ${ + Object.keys(stored).length + } repacks` + ); +} + +// A skin is a repack if it contains exactly the same files except for the addition of advertizing files +async function isRepackOf( + repack: SkinModel, + original: SkinModel +): Promise { + const nonAdHashesSet = await getSkinArchiveFileHashesSansAds(repack); + const hashesSet = await getSkinArchiveFileHashes(original); + return setsAreEqual(nonAdHashesSet, hashesSet); +} + +async function isCorruptRepack( + repack: SkinModel, + original: SkinModel +): Promise { + const repackHashesSet = await getSkinArchiveFileHashes(repack); + const hashesSet = await getSkinArchiveFileHashes(original); + return setContains(repackHashesSet, hashesSet); +} + +async function areIdentical( + repack: SkinModel, + original: SkinModel +): Promise { + const repackHashesSet = await getSkinArchiveFileHashes(repack); + const hashesSet = await getSkinArchiveFileHashes(original); + return setsAreEqual(repackHashesSet, hashesSet); +} + +async function getSkinArchiveFileHashes(skin: SkinModel): Promise> { + const archiveFiles = await skin.getArchiveFiles(); + const nonAdHashes = archiveFiles.map((f) => f.getFileMd5()); + + return new Set(nonAdHashes); +} + +async function getSkinArchiveFileHashesSansAds( + skin: SkinModel +): Promise> { + const archiveFiles = await skin.getArchiveFiles(); + const nonAdFiles = archiveFiles.filter( + (f) => !f.getFileName().match(/winampskins\.info/) + ); + + const nonAdHashes = nonAdFiles.map((f) => f.getFileMd5()); + + return new Set(nonAdHashes); +} + +// Get all the skins that have ad files in them +async function getRepackSkins(ctx: UserContext) { + const rows = await knex.raw( + `SELECT DISTINCT(skin_md5) FROM archive_files WHERE file_name = "winampskins.info.txt";` + ); + return Promise.all( + rows.map((row) => { + return SkinModel.fromMd5Assert(ctx, row.skin_md5); + }) + ); +} + +function setsAreEqual(a: Set, b: Set): boolean { + if (a === b) return true; + if (a.size !== b.size) { + return false; + } + for (const value of a) if (!b.has(value)) return false; + return true; +} + +// Does a contain b? +function setContains(a: Set, b: Set): boolean { + if (a === b) return true; + if (a.size < b.size) { + return false; + } + for (const value of b) if (!a.has(value)) return false; + return true; +} + +async function foo() { + const repacks = await getRepackSkins(ctx); + for (const skin of repacks) { + const md5 = skin.getMd5(); + if (stored[md5] != null) { + console.log(`Already computed ${md5}`); + continue; + } + const archiveFiles = await skin.getArchiveFiles(); + const archiveFilesMd5 = archiveFiles.map((af) => af.getFileMd5()); + + const matches = await knex.raw(` + SELECT + skin_md5 + FROM + ( + SELECT + skin_md5, + COUNT(*) as total_files, + SUM(CASE WHEN file_md5 IN (${archiveFilesMd5 + .map((m) => `"${m}"`) + .join(",")}) THEN 1 ELSE 0 END) as matching_files + FROM + archive_files + GROUP BY + skin_md5 + ) AS t + WHERE + total_files = matching_files; + `); + const filtered = matches + .map((r) => r.skin_md5) + .filter((m) => m != skin_md5); + stored[skin_md5] = filtered; + await KeyValue.update("winampskins.info", stored); + console.log(`Stored that ${skin_md5} matches ${JSON.stringify(filtered)}`); + } +} diff --git a/packages/skin-database/tasks/syncToArchive.ts b/packages/skin-database/tasks/syncToArchive.ts index 384f892b..3b1860af 100644 --- a/packages/skin-database/tasks/syncToArchive.ts +++ b/packages/skin-database/tasks/syncToArchive.ts @@ -153,6 +153,14 @@ const INVALID_IDENTIFIERS = new Set([ "winampskins_Sakura", "winampskins_Sakura3", "winampskins_Izumi2", + "winampskins_beasley_skin", // Case alias? + "winampskins_Episode1_1", + "winampskins_ORTV1", + "winampskins_bluemetal", + "winampskins_Episode1_2", + "winampskins_Episode1_3", + "winampskins_Episode1_4", + "winampskins_Episode1_5" ]); export async function identifierExists(identifier: string): Promise { @@ -218,6 +226,7 @@ export async function syncToArchive(handler: DiscordEventHandler) { let successCount = 0; let errorCount = 0; + let skipCount = 0; await Parallel.map( unarchived, @@ -227,6 +236,7 @@ export async function syncToArchive(handler: DiscordEventHandler) { md5 === "91477bec2b599bc5085f87f0fca3a4d5" ) { // The internet archive claims this one is corrupt for some reason. + skipCount++; console.warn(`Skipping this skin. It's known to not upload correctly.`); return null; } @@ -262,6 +272,7 @@ export async function syncToArchive(handler: DiscordEventHandler) { type: "SYNCED_TO_ARCHIVE", successes: successCount, errors: errorCount, + skips: skipCount }); console.log(`Job complete: ${successCount} success, ${errorCount} errors`); } diff --git a/yarn.lock b/yarn.lock index 3d12d94f..269d949d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -15492,9 +15492,15 @@ graphql@^16.8.1: integrity sha512-59LZHPdGZVh695Ud9lRzPBVTtlX9ZCV150Er2W43ro37wVof0ctenSaskPPjN7lVTIN8mSZt8PHUNKZuNQUuxw== graphql@^16.9.0: +<<<<<<< HEAD version "16.11.0" resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.11.0.tgz#96d17f66370678027fdf59b2d4c20b4efaa8a633" integrity sha512-mS1lbMsxgQj6hge1XZ6p7GPhbrtFwUFYi3wRzXAC/FmYnyXMTvvI3td3rjmQ2u8ewXueaSvRPWaEcgVVOT9Jnw== +======= + version "16.10.0" + resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.10.0.tgz#24c01ae0af6b11ea87bf55694429198aaa8e220c" + integrity sha512-AjqGKbDGUFRKIRCP9tCKiIGHyriz2oHEbPIbEtcSLSs4YjReZOIPQQWek4+6hjw62H9QShXHyaGivGiYVLeYFQ== +>>>>>>> 285979f6 (More stuff) grats@^0.0.31: version "0.0.31"