rewrite trendsworker with kysely and deno cron, format changes

2026-02-23 05:56:03 +01:00 · 2024-05-20 00:05:03 +05:30
parent 83a7b1f231
commit f19629600d
5 changed files with 90 additions and 104 deletions
--- a/src/workers/trends.test.ts
+++ b/src/workers/trends.test.ts
@@ -1,9 +1,6 @@
 import { assertEquals } from '@std/assert';
-
 import { TrendsWorker } from './trends.ts';

-await TrendsWorker.open(':memory:');
-
 const p8 = (pubkey8: string) => `${pubkey8}00000000000000000000000000000000000000000000000000000000`;

 Deno.test('getTrendingTags', async () => {
@@ -19,9 +16,9 @@ Deno.test('getTrendingTags', async () => {
  });

  const expected = [
-    { name: 'ditto', accounts: 3, uses: 3 },
-    { name: 'hello', accounts: 2, uses: 3 },
-    { name: 'yolo', accounts: 1, uses: 1 },
+    { tag: 'ditto', accounts: 3, uses: 3 },
+    { tag: 'hello', accounts: 2, uses: 3 },
+    { tag: 'yolo', accounts: 1, uses: 1 },
  ];

  assertEquals(result, expected);
--- a/src/workers/trends.worker.ts
+++ b/src/workers/trends.worker.ts
@@ -1,9 +1,10 @@
 import { NSchema } from '@nostrify/nostrify';
 import * as Comlink from 'comlink';

-import { Sqlite } from '@/deps.ts';
 import { hashtagSchema } from '@/schema.ts';
 import { generateDateRange, Time } from '@/utils/time.ts';
+import { DittoDB } from '@/db/DittoDB.ts';
+import { sql } from 'kysely';

 interface GetTrendingTagsOpts {
  since: Date;
@@ -20,73 +21,57 @@ interface GetTagHistoryOpts {
  offset?: number;
 }

-let db: Sqlite;
+const kysely = await DittoDB.getInstance();

 export const TrendsWorker = {
-  open(path: string) {
-    db = new Sqlite(path);
-
-    db.execute(`
-      CREATE TABLE IF NOT EXISTS tag_usages (
-        tag TEXT NOT NULL COLLATE NOCASE,
-        pubkey8 TEXT NOT NULL,
-        inserted_at INTEGER NOT NULL
-      );
-
-      CREATE INDEX IF NOT EXISTS idx_time_tag ON tag_usages(inserted_at, tag);
-    `);
-
-    const cleanup = () => {
-      console.info('Cleaning up old tag usages...');
+  setupCleanupJob() {
+    Deno.cron('cleanup tag usages older than a week', { hour: { every: 1 } }, async () => {
      const lastWeek = new Date(new Date().getTime() - Time.days(7));
-      this.cleanupTagUsages(lastWeek);
-    };
-
-    setInterval(cleanup, Time.hours(1));
-    cleanup();
+      await this.cleanupTagUsages(lastWeek);
+    });
  },

  /** Gets the most used hashtags between the date range. */
-  getTrendingTags({ since, until, limit = 10, threshold = 3 }: GetTrendingTagsOpts) {
-    return db.query<string[]>(
-      `
-      SELECT tag, COUNT(DISTINCT pubkey8), COUNT(*)
-        FROM tag_usages
-        WHERE inserted_at >= ? AND inserted_at < ?
-        GROUP BY tag
-        HAVING COUNT(DISTINCT pubkey8) >= ?
-        ORDER BY COUNT(DISTINCT pubkey8)
-        DESC LIMIT ?;
-    `,
-      [since, until, threshold, limit],
-    ).map((row) => ({
-      name: row[0],
-      accounts: Number(row[1]),
-      uses: Number(row[2]),
-    }));
+  async getTrendingTags({ since, until, limit = 10, threshold = 3 }: GetTrendingTagsOpts): Promise<{
+    tag: string;
+    accounts: number;
+    uses: number;
+  }[]> {
+    return await kysely.selectFrom('trends_tag_usages')
+      .select(({ fn }) => [
+        'tag',
+        fn.agg<number>('count', ['pubkey8']).distinct().as('accounts'),
+        fn.countAll<number>().as('uses'),
+      ])
+      .where('inserted_at', '>=', since.valueOf())
+      .where('inserted_at', '<', until.valueOf())
+      .groupBy('tag')
+      .having((c) => c(c.fn.agg('count', ['pubkey8']).distinct(), '>=', threshold))
+      .orderBy((c) => c.fn.agg('count', ['pubkey8']).distinct(), 'desc')
+      .limit(limit)
+      .execute();
  },

  /**
   * Gets the tag usage count for a specific tag.
   * It returns an array with counts for each date between the range.
   */
-  getTagHistory({ tag, since, until, limit = 7, offset = 0 }: GetTagHistoryOpts) {
-    const result = db.query<string[]>(
-      `
-      SELECT date(inserted_at), COUNT(DISTINCT pubkey8), COUNT(*)
-        FROM tag_usages
-        WHERE tag = ? AND inserted_at >= ? AND inserted_at < ?
-        GROUP BY date(inserted_at)
-        ORDER BY date(inserted_at) DESC
-        LIMIT ?
-        OFFSET ?;
-    `,
-      [tag, since, until, limit, offset],
-    ).map((row) => ({
-      day: new Date(row[0]),
-      accounts: Number(row[1]),
-      uses: Number(row[2]),
-    }));
+  async getTagHistory({ tag, since, until, limit = 7, offset = 0 }: GetTagHistoryOpts) {
+    const result = await kysely
+      .selectFrom('trends_tag_usages')
+      .select(({ fn }) => [
+        'inserted_at',
+        fn.agg<number>('count', ['pubkey8']).distinct().as('accounts'),
+        fn.countAll<number>().as('uses'),
+      ])
+      .where('tag', '=', tag)
+      .where('inserted_at', '>=', since.valueOf())
+      .where('inserted_at', '<', until.valueOf())
+      .groupBy(sql`inserted_at`)
+      .orderBy(sql`inserted_at`, 'desc')
+      .limit(limit)
+      .offset(offset)
+      .execute();

    /** Full date range between `since` and `until`. */
    const dateRange = generateDateRange(
@@ -96,26 +81,29 @@ export const TrendsWorker = {

    // Fill in missing dates with 0 usages.
    return dateRange.map((day) => {
-      const data = result.find((item) => item.day.getTime() === day.getTime());
-      return data || { day, accounts: 0, uses: 0 };
+      const data = result.find((item) => new Date(item.inserted_at).getTime() === day.getTime());
+      if (data) {
+        return { ...data, day: new Date(data.inserted_at) };
+      }
+      return { day, accounts: 0, uses: 0 };
    });
  },

-  addTagUsages(pubkey: string, hashtags: string[], date = new Date()): void {
+  async addTagUsages(pubkey: string, hashtags: string[], inserted_at = new Date().valueOf()): Promise<void> {
    const pubkey8 = NSchema.id().parse(pubkey).substring(0, 8);
    const tags = hashtagSchema.array().min(1).parse(hashtags);

-    db.query(
-      'INSERT INTO tag_usages (tag, pubkey8, inserted_at) VALUES ' + tags.map(() => '(?, ?, ?)').join(', '),
-      tags.map((tag) => [tag, pubkey8, date]).flat(),
-    );
+    await kysely
+      .insertInto('trends_tag_usages')
+      .values(tags.map((tag) => ({ tag, pubkey8, inserted_at })))
+      .execute();
  },

-  cleanupTagUsages(until: Date): void {
-    db.query(
-      'DELETE FROM tag_usages WHERE inserted_at < ?',
-      [until],
-    );
+  async cleanupTagUsages(until: Date): Promise<void> {
+    await kysely
+      .deleteFrom('trends_tag_usages')
+      .where('inserted_at', '<', until.valueOf())
+      .execute();
  },
 };