rewrite trendsworker with kysely and deno cron, format changes

This commit is contained in:
Siddharth Singh
2024-05-20 00:05:03 +05:30
parent 83a7b1f231
commit f19629600d
5 changed files with 90 additions and 104 deletions

View File

@@ -1,9 +1,6 @@
import { assertEquals } from '@std/assert';
import { TrendsWorker } from './trends.ts';
await TrendsWorker.open(':memory:');
const p8 = (pubkey8: string) => `${pubkey8}00000000000000000000000000000000000000000000000000000000`;
Deno.test('getTrendingTags', async () => {
@@ -19,9 +16,9 @@ Deno.test('getTrendingTags', async () => {
});
const expected = [
{ name: 'ditto', accounts: 3, uses: 3 },
{ name: 'hello', accounts: 2, uses: 3 },
{ name: 'yolo', accounts: 1, uses: 1 },
{ tag: 'ditto', accounts: 3, uses: 3 },
{ tag: 'hello', accounts: 2, uses: 3 },
{ tag: 'yolo', accounts: 1, uses: 1 },
];
assertEquals(result, expected);

View File

@@ -1,9 +1,10 @@
import { NSchema } from '@nostrify/nostrify';
import * as Comlink from 'comlink';
import { Sqlite } from '@/deps.ts';
import { hashtagSchema } from '@/schema.ts';
import { generateDateRange, Time } from '@/utils/time.ts';
import { DittoDB } from '@/db/DittoDB.ts';
import { sql } from 'kysely';
interface GetTrendingTagsOpts {
since: Date;
@@ -20,73 +21,57 @@ interface GetTagHistoryOpts {
offset?: number;
}
let db: Sqlite;
const kysely = await DittoDB.getInstance();
export const TrendsWorker = {
open(path: string) {
db = new Sqlite(path);
db.execute(`
CREATE TABLE IF NOT EXISTS tag_usages (
tag TEXT NOT NULL COLLATE NOCASE,
pubkey8 TEXT NOT NULL,
inserted_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_time_tag ON tag_usages(inserted_at, tag);
`);
const cleanup = () => {
console.info('Cleaning up old tag usages...');
setupCleanupJob() {
Deno.cron('cleanup tag usages older than a week', { hour: { every: 1 } }, async () => {
const lastWeek = new Date(new Date().getTime() - Time.days(7));
this.cleanupTagUsages(lastWeek);
};
setInterval(cleanup, Time.hours(1));
cleanup();
await this.cleanupTagUsages(lastWeek);
});
},
/** Gets the most used hashtags between the date range. */
getTrendingTags({ since, until, limit = 10, threshold = 3 }: GetTrendingTagsOpts) {
return db.query<string[]>(
`
SELECT tag, COUNT(DISTINCT pubkey8), COUNT(*)
FROM tag_usages
WHERE inserted_at >= ? AND inserted_at < ?
GROUP BY tag
HAVING COUNT(DISTINCT pubkey8) >= ?
ORDER BY COUNT(DISTINCT pubkey8)
DESC LIMIT ?;
`,
[since, until, threshold, limit],
).map((row) => ({
name: row[0],
accounts: Number(row[1]),
uses: Number(row[2]),
}));
async getTrendingTags({ since, until, limit = 10, threshold = 3 }: GetTrendingTagsOpts): Promise<{
tag: string;
accounts: number;
uses: number;
}[]> {
return await kysely.selectFrom('trends_tag_usages')
.select(({ fn }) => [
'tag',
fn.agg<number>('count', ['pubkey8']).distinct().as('accounts'),
fn.countAll<number>().as('uses'),
])
.where('inserted_at', '>=', since.valueOf())
.where('inserted_at', '<', until.valueOf())
.groupBy('tag')
.having((c) => c(c.fn.agg('count', ['pubkey8']).distinct(), '>=', threshold))
.orderBy((c) => c.fn.agg('count', ['pubkey8']).distinct(), 'desc')
.limit(limit)
.execute();
},
/**
* Gets the tag usage count for a specific tag.
* It returns an array with counts for each date between the range.
*/
getTagHistory({ tag, since, until, limit = 7, offset = 0 }: GetTagHistoryOpts) {
const result = db.query<string[]>(
`
SELECT date(inserted_at), COUNT(DISTINCT pubkey8), COUNT(*)
FROM tag_usages
WHERE tag = ? AND inserted_at >= ? AND inserted_at < ?
GROUP BY date(inserted_at)
ORDER BY date(inserted_at) DESC
LIMIT ?
OFFSET ?;
`,
[tag, since, until, limit, offset],
).map((row) => ({
day: new Date(row[0]),
accounts: Number(row[1]),
uses: Number(row[2]),
}));
async getTagHistory({ tag, since, until, limit = 7, offset = 0 }: GetTagHistoryOpts) {
const result = await kysely
.selectFrom('trends_tag_usages')
.select(({ fn }) => [
'inserted_at',
fn.agg<number>('count', ['pubkey8']).distinct().as('accounts'),
fn.countAll<number>().as('uses'),
])
.where('tag', '=', tag)
.where('inserted_at', '>=', since.valueOf())
.where('inserted_at', '<', until.valueOf())
.groupBy(sql`inserted_at`)
.orderBy(sql`inserted_at`, 'desc')
.limit(limit)
.offset(offset)
.execute();
/** Full date range between `since` and `until`. */
const dateRange = generateDateRange(
@@ -96,26 +81,29 @@ export const TrendsWorker = {
// Fill in missing dates with 0 usages.
return dateRange.map((day) => {
const data = result.find((item) => item.day.getTime() === day.getTime());
return data || { day, accounts: 0, uses: 0 };
const data = result.find((item) => new Date(item.inserted_at).getTime() === day.getTime());
if (data) {
return { ...data, day: new Date(data.inserted_at) };
}
return { day, accounts: 0, uses: 0 };
});
},
addTagUsages(pubkey: string, hashtags: string[], date = new Date()): void {
async addTagUsages(pubkey: string, hashtags: string[], inserted_at = new Date().valueOf()): Promise<void> {
const pubkey8 = NSchema.id().parse(pubkey).substring(0, 8);
const tags = hashtagSchema.array().min(1).parse(hashtags);
db.query(
'INSERT INTO tag_usages (tag, pubkey8, inserted_at) VALUES ' + tags.map(() => '(?, ?, ?)').join(', '),
tags.map((tag) => [tag, pubkey8, date]).flat(),
);
await kysely
.insertInto('trends_tag_usages')
.values(tags.map((tag) => ({ tag, pubkey8, inserted_at })))
.execute();
},
cleanupTagUsages(until: Date): void {
db.query(
'DELETE FROM tag_usages WHERE inserted_at < ?',
[until],
);
async cleanupTagUsages(until: Date): Promise<void> {
await kysely
.deleteFrom('trends_tag_usages')
.where('inserted_at', '<', until.valueOf())
.execute();
},
};