Too many updates

This commit is contained in:
Jordan Eldredge 2020-09-07 17:08:28 -04:00
parent a50ec10bde
commit 55e9bd102e
13 changed files with 276 additions and 93 deletions

View file

@ -25,8 +25,8 @@ export async function addSkinFromBuffer(
uploader: string
): Promise<Result> {
const md5 = md5Buffer(buffer);
const skin = await Skins.getSkinByMd5(md5);
if (skin != null) {
const exists = await Skins.skinExists(md5);
if (exists) {
return { md5, status: "FOUND" };
}
const tempFile = temp.path({ suffix: ".wsz" });

View file

@ -18,9 +18,6 @@ async function main() {
try {
switch (argv._[0]) {
case "test":
console.log(await Skins.test());
break;
case "tweet":
await tweet(client, null);
break;
@ -32,7 +29,7 @@ async function main() {
case "skin": {
const hash = argv._[1];
logger.info({ hash });
console.log(await Skins.getSkinByMd5(hash));
console.log(await Skins.getSkinByMd5_DEPRECATED(hash));
break;
}
@ -46,10 +43,23 @@ async function main() {
console.log(await addSkinFromBuffer(buffer, filePath, "cli-user"));
break;
}
case "sql": {
const filePath = argv._[1];
const skins = await Skins.getMuseumPageSql({ offset: 0, first: 70000 });
const firstNSFW = skins.findIndex((item) => item.rejected);
console.log(firstNSFW);
// console.log(await Skins.getMuseumPage({ offset: 100, first: 100 }));
break;
}
case "nsfw": {
console.log(await Skins.getSkinToReviewForNsfw());
break;
}
case "index": {
console.log(await Skins.updateSearchIndex(argv._[1]));
break;
}
case "confirm-nsfw-predictions": {
const md5s = await Skins.getMissingNsfwPredictions();
console.log(`Found ${md5s.length} to predict`);

View file

@ -9,8 +9,6 @@ test("getSkinToReview", async () => {
const { md5, filename } = await Skins.getSkinToReview();
expect(md5.length).toBe(32);
expect(typeof filename).toBe("string");
const skin = await Skins.getSkinByMd5(md5);
expect(skin?.tweetStatus).toBe("UNREVIEWED");
});
test("getSkinToReviewForNsfw", async () => {
@ -29,8 +27,8 @@ test("getStats", async () => {
expect(stats).toMatchObject({
approved: 1969,
rejected: 4194,
tweetable: 16,
tweeted: 4292,
tweetable: expect.anything(),
tweeted: expect.anything(),
});
});
@ -61,8 +59,16 @@ test.skip("getMissingNsfwPredictions", async () => {
expect(page).toBe(8);
});
test("skinExists", async () => {
expect(await Skins.skinExists("6a2843f40058f86406630671b454d66b")).toBe(true);
expect(await Skins.skinExists("8a2843f40058f86406630671b454d66b")).toBe(
false
);
});
test("getSkinByMd5", async () => {
const skin = await Skins.getSkinByMd5("6a2843f40058f86406630671b454d66b");
const skin = await Skins.getSkinByMd5_DEPRECATED(
"6a2843f40058f86406630671b454d66b"
);
expect(skin).toMatchInlineSnapshot(`
Object {
"approved": true,
@ -182,15 +188,15 @@ test("getSkinByMd5", async () => {
Leonard A Gray, produced
by Impact Software, Inc
",
"screenshotUrl": "https://s3.amazonaws.com/webamp-uploaded-skins/screenshots/6a2843f40058f86406630671b454d66b.png",
"skinUrl": "https://s3.amazonaws.com/webamp-uploaded-skins/skins/6a2843f40058f86406630671b454d66b.wsz",
"screenshotUrl": "https://cdn.webampskins.org/screenshots/6a2843f40058f86406630671b454d66b.png",
"skinUrl": "https://cdn.webampskins.org/skins/6a2843f40058f86406630671b454d66b.wsz",
"tweetId": "1077896846117285890",
"tweetStatus": "TWEETED",
"tweetUrl": "https://twitter.com/statuses/1077896846117285890",
"tweeted": true,
"twitterLikes": 23,
"type": "CLASSIC",
"webampUrl": "https://webamp.org?skinUrl=https://s3.amazonaws.com/webamp-uploaded-skins/skins/6a2843f40058f86406630671b454d66b.wsz",
"webampUrl": "https://webamp.org?skinUrl=https://cdn.webampskins.org/skins/6a2843f40058f86406630671b454d66b.wsz",
}
`);
});

View file

@ -2,6 +2,7 @@ import { db, knex } from "../db";
import path from "path";
import logger from "../logger";
import { searchIndex } from "../algolia";
import { truncate } from "../utils";
import { DBSkinRecord, SkinRecord, DBIARecord, TweetStatus } from "../types";
import fetch from "node-fetch";
import { analyseBuffer, NsfwPrediction } from "../nsfwImage";
@ -44,15 +45,15 @@ function getCanonicalFilename(skin: DBSkinRecord): string | null {
return fileNames[0] || null;
}
function getSkinUrl(skin: DBSkinRecord): string {
return `https://s3.amazonaws.com/webamp-uploaded-skins/skins/${skin.md5}.wsz`;
function getSkinUrl(md5: string): string {
return `https://cdn.webampskins.org/skins/${md5}.wsz`;
}
function getScreenshotUrl(skin: DBSkinRecord): string {
return `https://s3.amazonaws.com/webamp-uploaded-skins/screenshots/${skin.md5}.png`;
function getScreenshotUrl(md5: string): string {
return `https://cdn.webampskins.org/screenshots/${md5}.png`;
}
function getWebampUrl(skin: DBSkinRecord): string {
return `https://webamp.org?skinUrl=${getSkinUrl(skin)}`;
function getWebampUrl(md5: string): string {
return `https://webamp.org?skinUrl=${getSkinUrl(md5)}`;
}
export async function addSkin({ md5, filePath, uploader, averageColor }) {
@ -81,14 +82,6 @@ export async function addSkin({ md5, filePath, uploader, averageColor }) {
);
}
export async function test() {
// Do we know about all IA items?
const skins = await knex("skins")
.leftJoin("ia_items", "ia_items.skin_md5", "=", "skins.md5")
.where({ "ia_items.id": null, "skins.skin_type": 1 });
console.log(skins);
}
const IA_URL = /^(https:\/\/)?archive.org\/details\/([^\/]+)\/?/;
const MD5 = /([a-fA-F0-9]{32})/;
@ -115,8 +108,15 @@ export async function getMd5ByAnything(
return getMd5FromInternetArchvieItemName(anything);
}
export async function skinExists(md5: string): Promise<boolean> {
const skin = await knex("skins").where({ md5 }).first();
return skin != null;
}
// TODO: SQLITE
export async function getSkinByMd5(md5: string): Promise<SkinRecord | null> {
export async function getSkinByMd5_DEPRECATED(
md5: string
): Promise<SkinRecord | null> {
const _skin: DBSkinRecord | null = await skins_DEPRECATED.findOne({
md5,
type: "CLASSIC",
@ -150,11 +150,11 @@ export async function getSkinByMd5(md5: string): Promise<SkinRecord | null> {
averageColor: _skin.averageColor,
emails: _skin.emails,
tweetStatus,
skinUrl: getSkinUrl(_skin),
screenshotUrl: getScreenshotUrl(_skin),
skinUrl: getSkinUrl(_skin.md5),
screenshotUrl: getScreenshotUrl(_skin.md5),
fileNames: getFilenames(_skin),
canonicalFilename: getCanonicalFilename(_skin),
webampUrl: getWebampUrl(_skin),
webampUrl: getWebampUrl(_skin.md5),
internetArchiveItemName,
internetArchiveUrl,
};
@ -221,14 +221,10 @@ export async function markAsTweeted(md5: string, url: string): Promise<void> {
// TODO: Also path actor
export async function markAsNSFW(md5: string): Promise<void> {
await skins_CONVERTED.findOneAndUpdate({ md5 }, { $set: { nsfw: true } });
const indexes = [{ objectID: md5, nsfw: true }];
const index = { objectID: md5, nsfw: true };
// TODO: Await here, but for some reason this never completes
new Promise((resolve, reject) => {
searchIndex.partialUpdateObjects(indexes, function (err, content) {
if (err != null) reject(err);
resolve(content);
});
});
await searchIndex.partialUpdateObjects([index]);
await recordSearchIndexUpdates(md5, Object.keys(index));
await knex("skin_reviews").insert({ skin_md5: md5, review: "NSFW" }, []);
}
@ -251,6 +247,67 @@ export async function getStatus(md5: string): Promise<TweetStatus> {
return "UNREVIEWED";
}
export async function updateSearchIndex(md5: string): Promise<{} | null> {
const skins = await knex.raw(
`
SELECT skins.md5,
skins.average_color,
skin_reviews.review = 'NSFW' AS nsfw,
skins.readme_text,
skins.emails,
tweets.likes,
files.file_path
FROM skins
LEFT JOIN tweets
ON tweets.skin_md5 = skins.md5
LEFT JOIN skin_reviews
ON skin_reviews.skin_md5 = skins.md5
LEFT JOIN files
ON files.skin_md5 = skins.md5
WHERE skin_type = 1 AND md5 = ? LIMIT 1;`,
[md5]
);
const skin = skins[0] || null;
if (skin == null) {
console.log("No skin");
return null;
}
const index = {
objectID: skin.md5,
md5: skin.md5,
nsfw: skin.nsfw,
readmeText: skin.readme_text ? truncate(skin.readme_text, 4800) : null,
emails: skin.emails ? skin.emails.split() : [],
color: skin.average_color,
fileName: path.basename(skin.file_path),
twitterLikes: Number(skin.likes || 0),
};
const results = await searchIndex.partialUpdateObjects([index], {
createIfNotExists: true,
});
await recordSearchIndexUpdates(md5, Object.keys(index));
return results;
}
export async function recordSearchIndexUpdates(
md5: string,
fields: string[]
): Promise<void> {
const update_timestamp = Math.floor(Date.now() / 1000);
await knex("algolia_field_updates").insert(
Object.keys(fields).map((field) => ({
skin_md5: md5,
update_timestamp,
field,
}))
);
}
// TODO: Also path actor
export async function approve(md5: string): Promise<void> {
await skins_CONVERTED.findOneAndUpdate({ md5 }, { $set: { approved: true } });
@ -273,6 +330,8 @@ export async function getSkinToReview(): Promise<{
.innerJoin("files", "files.skin_md5", "=", "skins.md5")
.select("skins.md5", "files.file_path")
.where({ "skin_reviews.id": null, "tweets.id": null, "skins.skin_type": 1 })
// TODO: Remove this once we run out of skins that don't have it.
.whereNot("emails", "like", "%Dr.Algebra@gmx.de%")
.orderByRaw("random()")
.limit(1);
if (!skins.length) {
@ -319,7 +378,7 @@ export async function getSkinToTweet(): Promise<SkinRecord | null> {
if (skin == null) {
return null;
}
return getSkinByMd5(skin.md5);
return getSkinByMd5_DEPRECATED(skin.md5);
}
export async function getStats(): Promise<{
@ -349,13 +408,15 @@ export async function getRandomClassicSkinMd5(): Promise<string> {
}
export async function getScreenshotBuffer(md5: string): Promise<Buffer> {
const skin = await getSkinByMd5(md5);
if (skin == null) {
const exists = await skinExists(md5);
if (!exists) {
throw new Error(`Could not find skin with hash ${md5}`);
}
const screenshotResponse = await fetch(skin?.screenshotUrl);
const screenshotUrl = getScreenshotUrl(md5);
const screenshotResponse = await fetch(screenshotUrl);
if (!screenshotResponse.ok) {
throw new Error(`Could not get screenshot at ${skin?.screenshotUrl}`);
throw new Error(`Could not get screenshot at ${screenshotUrl}`);
}
return screenshotResponse.buffer();
}
@ -422,3 +483,44 @@ export async function getMuseumPage({
};
});
}
export async function getMuseumPageSql({
offset,
first,
}: {
offset: number;
first: number;
}): Promise<
Array<{ color: string; fileName: string; md5: string; nsfw: boolean }>
> {
const skins = await knex.raw(
`
SELECT skins.md5,
skins.average_color,
skin_reviews.review = 'NSFW' AS nsfw,
skin_reviews.review = 'APPROVED' AS approved,
skin_reviews.review = 'REJECTED' AS rejected,
tweets.likes
FROM skins
LEFT JOIN tweets
ON tweets.skin_md5 = skins.md5
LEFT JOIN skin_reviews
ON skin_reviews.skin_md5 = skins.md5
WHERE skin_type = 1
ORDER BY tweets.likes DESC,
nsfw ASC,
approved DESC,
rejected ASC
LIMIT ? offset ?`,
[first, offset]
);
return skins.map(({ md5, nsfw, average_color }) => {
return {
color: undefined,
filename: undefined,
md5,
nsfw: nsfw ? true : undefined,
};
});
}

View file

@ -34,7 +34,7 @@ export async function postSkin({
dest: TextChannel | DMChannel | GroupDMChannel;
}) {
console.log("postSkin...");
const skin = await Skins.getSkinByMd5(md5);
const skin = await Skins.getSkinByMd5_DEPRECATED(md5);
if (skin == null) {
console.warn("Could not find skin for md5", { md5, alert: true });
logger.warn("Could not find skin for md5", { md5, alert: true });

View file

@ -2,24 +2,35 @@ const express = require("express");
const app = express();
const config = require("./config");
const Skins = require("./data/skins");
const port = 3001;
const port = process.env.PORT ? Number(process.env.PORT) : 3001;
const fileUpload = require("express-fileupload");
const { addSkinFromBuffer } = require("./addSkin");
const Discord = require("discord.js");
const Utils = require("./discord-bot/utils");
const cors = require("cors");
var bodyParser = require("body-parser");
var LRU = require("lru-cache");
const whitelist = ["https://skins.webamp.org", "http://localhost:3000"];
const allowList = [
/https:\/\/skins\.webamp\.org/,
/http:\/\/localhost:3000/,
/netlify.app/,
];
const corsOptions = {
origin: function (origin, callback) {
if (whitelist.indexOf(origin) !== -1 || !origin) {
if (allowList.some((regex) => regex.test(origin)) || !origin) {
callback(null, true);
} else {
callback(new Error("Not allowed by CORS"));
callback(
new Error(`Request from origin "${origin}" not allowed by CORS.`)
);
}
},
};
// parse application/json
app.use(bodyParser.json());
app.use(cors(corsOptions));
app.options("*", cors(corsOptions));
@ -34,22 +45,53 @@ app.use(
})
);
app.get("/", async (req, res) => {
res.send("Hello World!");
});
let skinCount = null;
const options = {
max: 100,
maxAge: 1000 * 60 * 60,
};
const cache = new LRU(options);
app.get("/skins/", async (req, res) => {
if (skinCount == null) {
skinCount = await Skins.getClassicSkinCount();
}
const { offset = 0, first = 100 } = req.query;
const [skins, skinCount] = await Promise.all([
Skins.getMuseumPage({
offset: Number(offset),
first: Number(first),
}),
Skins.getClassicSkinCount(),
]);
const key = req.originalUrl;
const cached = cache.get(key);
if (cached != null) {
console.log(`Cache hit for ${key}`);
res.json({ skinCount, skins: cached });
return;
}
console.log(`Getting offset: ${offset}, first: ${first}`);
const start = Date.now();
const skins = await Skins.getMuseumPageSql({
offset: Number(offset),
first: Number(first),
});
console.log(`Query took ${(Date.now() - start) / 1000}`);
console.log(`Cache set for ${key}`);
cache.set(key, skins);
res.json({ skinCount, skins });
});
app.post("/skins/missing", async (req, res) => {
const missing = [];
const found = [];
for (const md5 of req.body.hashes) {
if (!(await Skins.skinExists(md5))) {
missing.push(md5);
} else {
found.push(md5);
}
}
res.json({ missing, found });
});
app.post("/skins/", async (req, res) => {
const files = req.files;
if (files == null) {
@ -62,12 +104,19 @@ app.post("/skins/", async (req, res) => {
return;
}
const result = await addSkinFromBuffer(upload.data, upload.name, "Web API");
if (result.status === "ADDED") {
console.log(`Updating index for ${result.md5}.`);
await Skins.updateSearchIndex(result.md5);
}
res.json({ ...result, filename: upload.name });
});
app.get("/skins/:md5", async (req, res) => {
const { md5 } = req.params;
const skin = await Skins.getSkinByMd5(md5);
console.log(`Details for hash "${md5}"`);
const skin = await Skins.getSkinByMd5_DEPRECATED(md5);
if (skin == null) {
res.status(404).json();
return;
@ -75,9 +124,17 @@ app.get("/skins/:md5", async (req, res) => {
res.json(skin);
});
app.post("/skins/:md5/index", async (req, res) => {
const { md5 } = req.params;
console.log(`Going to index hash "${md5}"`);
const skin = await Skins.updateSearchIndex(md5);
res.json(skin);
});
// TODO: Make this POST
app.post("/skins/:md5/report", async (req, res) => {
const { md5 } = req.params;
console.log(`Reporting skin with hash "${md5}"`);
const client = new Discord.Client();
await client.login(config.discordToken);
const dest = client.channels.get(config.NSFW_SKIN_CHANNEL_ID);
@ -93,7 +150,8 @@ app.post("/skins/:md5/report", async (req, res) => {
app.get("/skins/:md5/screenshot.png", async (req, res) => {
const { md5 } = req.params;
const { screenshotUrl } = await Skins.getSkinByMd5(md5);
console.log(`Getting screenshot for hash "${md5}"`);
const { screenshotUrl } = await Skins.getSkinByMd5_DEPRECATED(md5);
if (screenshotUrl == null) {
res.status(404).send();
return;
@ -103,7 +161,8 @@ app.get("/skins/:md5/screenshot.png", async (req, res) => {
app.get("/skins/:md5/download", async (req, res) => {
const { md5 } = req.params;
const { skinUrl } = await Skins.getSkinByMd5(md5);
console.log(`Downloading for hash "${md5}"`);
const { skinUrl } = await Skins.getSkinByMd5_DEPRECATED(md5);
if (skinUrl == null) {
res.status(404).send();
return;
@ -111,4 +170,9 @@ app.get("/skins/:md5/download", async (req, res) => {
res.redirect(301, skinUrl);
});
app.use(function (err, req, res, next) {
console.error(err.stack);
res.status(500).send("Oops. Something went wrong. We're working on it.");
});
app.listen(port, () => console.log(`Example app listening on port ${port}!`));

View file

@ -14,6 +14,7 @@
"imagemin": "^7.0.0",
"imagemin-optipng": "^7.0.0",
"knex": "^0.21.1",
"lru-cache": "^6.0.0",
"md5": "^2.2.1",
"monk": "^7.0.0",
"node-fetch": "^2.6.0",

View file

@ -5,7 +5,7 @@ const s3 = new AWS.S3();
function putSkin(md5, buffer) {
return new Promise((resolve, rejectPromise) => {
const bucketName = "webamp-uploaded-skins";
const bucketName = "cdn.webampskins.org";
const key = `skins/${md5}.wsz`;
s3.putObject(
{ Bucket: bucketName, Key: key, Body: buffer, ACL: "public-read" },
@ -23,7 +23,7 @@ function putSkin(md5, buffer) {
function putScreenshot(md5, buffer) {
return new Promise((resolve, rejectPromise) => {
const bucketName = "webamp-uploaded-skins";
const bucketName = "cdn.webampskins.org";
const key = `screenshots/${md5}.png`;
s3.putObject(
{ Bucket: bucketName, Key: key, Body: buffer, ACL: "public-read" },

View file

@ -1,6 +1,7 @@
const { searchIndex } = require("../algolia");
const path = require("path");
const { db } = require("../db");
const { tuncate } = require("../utils");
function tuncate(str, len) {
const overflow = str.length - len;

View file

@ -94,7 +94,7 @@ async function getNewIdentifier(filename: string): Promise<string> {
}
async function archive(md5: string): Promise<string> {
const skin = await Skins.getSkinByMd5(md5);
const skin = await Skins.getSkinByMd5_DEPRECATED(md5);
if (skin == null) {
throw new Error(`Could not find skin with hash ${md5}`);
}

View file

@ -0,0 +1,12 @@
export function truncate(str: string, len: number): string {
const overflow = str.length - len;
if (overflow < 0) {
return str;
}
const half = Math.floor((len - 1) / 2);
const start = str.slice(0, half);
const end = str.slice(-half);
return `${start} ########### ${end}`;
}

View file

@ -50,15 +50,6 @@ def find(dir):
yield os.path.join(root, file)
def url_is_good(url):
try:
r = requests.head(url)
return r.status_code == 200
# prints the int of the status code. Find more at httpstatusrappers.com :)
except Exception:
return False
def md5_file(path):
hash_md5 = hashlib.md5()
with open(path, "rb") as f:
@ -68,28 +59,19 @@ def md5_file(path):
def tweet_skin(md5, skin_name, dry):
skin_url = get_skin_url(md5)
screenshot_url = get_screenshot_url(md5)
screenshot_path = NamedTemporaryFile(suffix=".png").name
urllib.request.urlretrieve(screenshot_url, screenshot_path)
if not url_is_good(skin_url):
print("URL %s is no good. Aborting." % skin_url)
return
return tweet_image(skin_name, md5, skin_url, screenshot_path, dry)
def get_skin_url(md5):
return "https://s3.amazonaws.com/webamp-uploaded-skins/skins/%s.wsz" % md5
return tweet_image(skin_name, md5, screenshot_path, dry)
def get_screenshot_url(md5):
return "https://s3.amazonaws.com/webamp-uploaded-skins/screenshots/%s.png" % md5
def tweet_image(skin_name, md5, skin_url, screenshot_path, dry):
def tweet_image(skin_name, md5, screenshot_path, dry):
# Trick Twitter into keeping the skin a PNG
img = Image.open(screenshot_path)
img = img.convert("RGBA") # ensure 32-bit
@ -105,16 +87,11 @@ def tweet_image(skin_name, md5, skin_url, screenshot_path, dry):
img.save(screenshot_path)
escaped_skin_url = urllib.parse.quote(skin_url)
museum_url = "https://skins.webamp.org/skin/%s" % md5
winamp2_js_url = "https://webamp.org/?skinUrl=%s" % escaped_skin_url
status_message = """%s
Try Online: %s
Download: %s""" % (
status_message = "%s\n\n%s" % (
skin_name,
winamp2_js_url,
skin_url,
museum_url,
)
if not dry:
return tweet(status_message, screenshot_path)

View file

@ -7969,6 +7969,12 @@ lru-cache@^5.1.1:
dependencies:
yallist "^3.0.2"
lru-cache@^6.0.0:
version "6.0.0"
resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-6.0.0.tgz#6d6fe6570ebd96aaf90fcad1dafa3b2566db3a94"
dependencies:
yallist "^4.0.0"
make-dir@^1.0.0, make-dir@^1.2.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-1.3.0.tgz#79c1033b80515bd6d24ec9933e860ca75ee27f0c"
@ -12449,6 +12455,10 @@ yallist@^3.0.0, yallist@^3.0.2, yallist@^3.0.3:
version "3.1.1"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
yallist@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
yargs-parser@^13.1.0, yargs-parser@^13.1.2:
version "13.1.2"
resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-13.1.2.tgz#130f09702ebaeef2650d54ce6e3e5706f7a4fb38"