Progress (probably, who knows)

This commit is contained in:
Jordan Eldredge 2020-07-05 16:12:30 -04:00
parent 202ca53a20
commit f40bee2be0
15 changed files with 1475 additions and 187 deletions

View file

@ -0,0 +1,6 @@
module.exports = {
presets: [
["@babel/preset-env", { targets: { node: "current" } }],
"@babel/preset-typescript",
],
};

View file

@ -3,8 +3,6 @@ import path from "path";
import fs from "fs";
import { db, knex } from "./db";
import { argv } from "yargs";
import fetchInternetArchiveMetadata from "./tasks/fetchInternetArchiveMetadata";
import ensureInternetArchiveItemsIndexByMd5 from "./tasks/ensureInternetArchiveItemsIndexByMd5";
import logger from "./logger";
import DiscordWinstonTransport from "./DiscordWinstonTransport";
import * as Skins from "./data/skins";
@ -20,17 +18,12 @@ async function main() {
try {
switch (argv._[0]) {
case "test":
console.log(await Skins.test());
break;
case "tweet":
await tweet(client, null);
break;
case "fetch-metadata":
console.log("Going to download metadata from the Internet Archive");
await fetchInternetArchiveMetadata();
break;
case "ensure-md5s":
await ensureInternetArchiveItemsIndexByMd5();
break;
case "metadata": {
const hash = argv._[1];
console.log(Skins.getInternetArchiveUrl(hash));
@ -42,6 +35,11 @@ async function main() {
console.log(await Skins.getSkinByMd5(hash));
break;
}
case "stats": {
console.log(await Skins.getStats());
break;
}
case "add": {
const filePath = argv._[1];
const buffer = fs.readFileSync(filePath);

View file

@ -0,0 +1,25 @@
import * as Skins from "./skins";
import { db } from "../db";
afterAll(() => {
db.close();
});
test("getSkinToReview", async () => {
const { md5, filename } = await Skins.getSkinToReview();
expect(md5.length).toBe(32);
expect(typeof filename).toBe("string");
const skin = await Skins.getSkinByMd5(md5);
expect(skin?.tweetStatus).toBe("UNREVIEWED");
});
test("getSkinToReviewForNsfw", async () => {
const { md5, filename } = await Skins.getSkinToReviewForNsfw();
expect(md5.length).toBe(32);
expect(typeof filename).toBe("string");
});
test("getClassicSkinCount", async () => {
const count = await Skins.getClassicSkinCount();
expect(count > 60000).toBe(true);
});

View file

@ -163,8 +163,19 @@ export function getTweetableSkinCount(): Promise<number> {
return skins_DEPRECATED.count(TWEETABLE_QUERY);
}
export function getClassicSkinCount(): Promise<number> {
return skins_DEPRECATED.count(CLASSIC_QUERY);
export async function getClassicSkinCount(): Promise<number> {
const rows = await knex("skins")
.where({ skin_type: 1 })
.count({ count: "*" });
const row = rows[0];
if (row == null || row.count == null) {
throw new Error("Could not find count row");
}
const { count } = row;
if (count == null) {
throw new Error("Could not find count");
}
return Number(count);
}
// TODO: Also pass id
@ -189,7 +200,7 @@ export async function markAsNSFW(md5: string): Promise<void> {
export async function getStatus(md5: string): Promise<TweetStatus> {
const tweeted = await knex("tweets").where({ skin_md5: md5 }).limit(1);
if (tweeted) {
if (tweeted.length > 0) {
return "TWEETED";
}
const reviewRows = await knex("skin_reviews")
@ -228,31 +239,54 @@ export async function getSkinToReview(): Promise<{
filename: string | null;
md5: string;
}> {
const reviewable = await skins_DEPRECATED.aggregate([
{ $match: REVIEWABLE_QUERY },
{ $sample: { size: 1 } },
]);
const skin = reviewable[0];
return { filename: getCanonicalFilename(skin), md5: skin.md5 };
const skins = await knex("skins")
.leftJoin("skin_reviews", "skin_reviews.skin_md5", "=", "skins.md5")
.leftJoin("tweets", "tweets.skin_md5", "=", "skins.md5")
.innerJoin("files", "files.skin_md5", "=", "skins.md5")
.select("skins.md5", "files.file_path")
.where({ "skin_reviews.id": null, "tweets.id": null, "skins.skin_type": 1 })
.orderByRaw("random()")
.limit(1);
if (!skins.length) {
throw new Error("Could not find any skins to review");
}
const skin = skins[0];
return { filename: path.basename(skin.file_path), md5: skin.md5 };
}
export async function getSkinToReviewForNsfw(): Promise<{
filename: string | null;
md5: string;
}> {
const reviewable = await skins_DEPRECATED.find(REVIEWABLE_QUERY, {
limit: 1,
sort: { "nsfwPredictions.porn": -1 },
});
const skin = reviewable[0];
return { filename: getCanonicalFilename(skin), md5: skin.md5 };
// TODO: This will not surface skins which have already been reviewed for the bot but not for NSFW.
const skins = await knex("skins")
.leftJoin("nsfw_predictions", "nsfw_predictions.skin_md5", "=", "skins.md5")
.leftJoin("skin_reviews", "skin_reviews.skin_md5", "=", "skins.md5")
.innerJoin("files", "files.skin_md5", "=", "nsfw_predictions.skin_md5")
.select("nsfw_predictions.skin_md5", "files.file_path")
.where({ "skin_reviews.id": null, skin_type: 1 })
.orderBy("nsfw_predictions.porn", "desc")
.limit(1);
if (!skins.length) {
throw new Error("Could not find any skins to review");
}
const skin = skins[0];
return { filename: path.basename(skin.file_path), md5: skin.skin_md5 };
}
export async function getSkinToTweet(): Promise<SkinRecord | null> {
const tweetables = await skins_DEPRECATED.aggregate([
{ $match: TWEETABLE_QUERY },
{ $sample: { size: 1 } },
]);
// TODO: This does not account for skins that have been both approved and rejected
const tweetables = await knex("skins")
.leftJoin("skin_reviews", "skin_reviews.skin_md5", "=", "skins.md5")
.leftJoin("tweets", "tweets.skin_md5", "=", "skins.md5")
.where({
"tweets.id": null,
skin_type: 1,
"skin_reviews.review": "APPROVED",
})
.select("skins.md5")
.orderByRaw("random()")
.limit(1);
const skin = tweetables[0];
if (skin == null) {
return null;
@ -266,6 +300,10 @@ export async function getStats(): Promise<{
tweeted: number;
tweetable: number;
}> {
const result = await knex.raw(
`SELECT COUNT(DISTINCT skin_md5) AS "approved_count" FROM skin_reviews WHERE review = "APPROVED";`
);
console.log(result);
const approved = await skins_DEPRECATED.count({ approved: true });
const rejected = await skins_DEPRECATED.count({ rejected: true });
const tweeted = await skins_DEPRECATED.count({ tweeted: true });

View file

@ -3,13 +3,16 @@ import * as Utils from "../utils";
import { Message } from "discord.js";
async function reviewSkin(message: Message, nsfw: boolean): Promise<void> {
console.log(1);
const skin = await (nsfw
? Skins.getSkinToReviewForNsfw()
: Skins.getSkinToReview());
if (skin == null) {
console.log(1.5);
throw new Error("No skins to review");
}
const { md5 } = skin;
console.log(2);
await Utils.postSkin({
md5,
title: (filename) => `Review: ${filename}`,
@ -17,7 +20,10 @@ async function reviewSkin(message: Message, nsfw: boolean): Promise<void> {
});
}
console.log("Top scope");
async function handler(message: Message, args: [string, string]) {
console.log("Handler called");
let count = Number(args[0] || 1);
let nsfw = args[1] === "nsfw";
if (count > 50) {
@ -26,6 +32,7 @@ async function handler(message: Message, args: [string, string]) {
);
count = 50;
}
console.log("About to send");
await message.channel.send(`Going to show ${count} skins to review.`);
let i = count;
while (i--) {

View file

@ -33,6 +33,7 @@ export async function postSkin({
title?: (filename: string | null) => string;
dest: TextChannel | DMChannel | GroupDMChannel;
}) {
console.log("postSkin...");
const skin = await Skins.getSkinByMd5(md5);
if (skin == null) {
console.warn("Could not find skin for md5", { md5, alert: true });

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

View file

@ -0,0 +1,18 @@
import fs from "fs";
import path from "path";
import { analyseBuffer } from "./nsfwImage";
jest.setTimeout(30000);
test("predict nsfw", async () => {
const skin = fs.readFileSync(path.join(__dirname, "./fixtures/60.png"));
const result = await analyseBuffer(Uint8Array.from(skin));
expect(result).toEqual({
drawing: 0.14270488917827606,
hentai: 0.006160343065857887,
neutral: 0.8509458303451538,
porn: 0.0001247897307621315,
sexy: 0.00006406463944585994,
});
});

View file

@ -1,7 +1,5 @@
import * as tf from "@tensorflow/tfjs-node";
import { load as nsfwLoad } from "nsfwjs";
import path from "path";
import fs from "fs";
const modelPromise = nsfwLoad();
@ -14,6 +12,7 @@ export type NsfwPrediction = {
};
export async function analyseBuffer(buffer: Buffer): Promise<NsfwPrediction> {
// const tf = await import("@tensorflow/tfjs-node");
const model = await modelPromise;
const image = await tf.node.decodePng(buffer, 3);
const predictions = await model.classify(image);

View file

@ -38,6 +38,8 @@
},
"prettier": {},
"devDependencies": {
"@babel/preset-typescript": "^7.10.1",
"jest": "^26.1.0",
"prettier": "^2.0.5",
"typescript": "^3.8.3"
}

View file

@ -1,32 +0,0 @@
const { db } = require("../db");
const iaItems = db.get("internetArchiveItems");
module.exports = async function main() {
const items = await iaItems.find(
{ "metadata.metadata.skintype": { $eq: "wsz" }, md5: { $eq: null } },
{
fields: {
identifier: 1,
metadata: 1,
},
}
);
for (const item of items) {
const skinFiles = item.metadata.files.filter((file) => {
return file.name.endsWith(".wsz");
});
if (skinFiles.length !== 1) {
console.warn(
`Found a skin item with ${skinFiles.length} skin files. Identifier: ${item.identifier}`
);
continue;
}
const { md5, name } = skinFiles[0];
await iaItems.update(
{ _id: { $eq: item._id } },
{ $set: { md5, skinFileName: name } }
);
}
};

View file

@ -1,61 +0,0 @@
const fetch = require("node-fetch");
const { db } = require("../db");
const iaItems = db.get("internetArchiveItems");
async function fetchMetadata(identifier) {
const resp = await fetch(`http://archive.org/metadata/${identifier}`);
const metadata = await resp.json();
const metadataFetchDate = Date.now();
await iaItems.findOneAndUpdate(
{ identifier },
{ $set: { metadata, metadataFetchDate } }
);
}
async function fetchAllMetadata(limit) {
const items = await iaItems.find(
{ metadata: { $eq: null } },
{
limit,
fields: {
identifier: 1,
},
}
);
console.log(`Found ${items.length} missing metadata`);
await Promise.all(
items.map((item) => {
return fetchMetadata(item.identifier);
})
);
return items.length;
}
// TODO: Refetch collections from:
// https://archive.org/advancedsearch.php?q=collection%3Awinampskins&fl%5B%5D=identifier&rows=100000&page=1&output=json
module.exports = async function main() {
return new Promise((resolve) => {
let delay = 60000;
let timeout = null;
async function go() {
console.log("Gonna fetch some more");
try {
const count = await fetchAllMetadata(500);
if (count < 1) {
if (timeout != null) {
// I don't think this can ever happen
clearTimeout(timeout);
}
console.log("Done.");
resolve();
return;
}
} catch (e) {
console.error(e);
delay += 60000;
}
timeout = setTimeout(go, delay);
}
go();
});
};

View file

@ -1,29 +0,0 @@
const fetch = require("node-fetch");
const { db } = require("../db");
const iaItems = db.get("internetArchiveItems");
module.exports = async function main() {
const resp = await fetch(
`https://archive.org/advancedsearch.php?q=collection%3Awinampskins&fl%5B%5D=identifier&rows=100000&page=1&output=json`
);
const collections = await resp.json();
const items = collections.response.docs;
const bulkUpdates = items
.map((item) => {
const { identifier } = item;
return {
updateOne: {
filter: { identifier },
update: {
$set: {
identifier,
},
},
upsert: true,
},
};
})
.filter(Boolean);
await iaItems.bulkWrite(bulkUpdates);
};

1384
yarn.lock

File diff suppressed because it is too large Load diff