From 34449a76ab6d4895796d45a5575a39fcd65f6650 Mon Sep 17 00:00:00 2001 From: Cody Tseng Date: Thu, 21 Nov 2024 11:53:23 +0800 Subject: [PATCH] feat: fts (#30) --- README.md | 1 + .../__test__/event-repository-sqlite.spec.ts | 14 +- .../migrations/001-initial.sql | 41 ----- .../src/event-repository-sqlite.ts | 165 ++++++++++++------ .../event-repository-sqlite/src/migrations.ts | 95 ++++++++++ .../event-repository-sqlite/src/search.ts | 68 ++++++++ 6 files changed, 282 insertions(+), 102 deletions(-) delete mode 100644 packages/event-repository-sqlite/migrations/001-initial.sql create mode 100644 packages/event-repository-sqlite/src/migrations.ts create mode 100644 packages/event-repository-sqlite/src/search.ts diff --git a/README.md b/README.md index 965db981..7fea7f96 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ async function bootstrap() { // You can implement your own event repository. It just needs to implement a few methods. const eventRepository = new EventRepositorySqlite(); + await eventRepository.init(); const relay = new NostrRelay(eventRepository); const validator = new Validator(); diff --git a/packages/event-repository-sqlite/__test__/event-repository-sqlite.spec.ts b/packages/event-repository-sqlite/__test__/event-repository-sqlite.spec.ts index 94db67cf..77a857f4 100644 --- a/packages/event-repository-sqlite/__test__/event-repository-sqlite.spec.ts +++ b/packages/event-repository-sqlite/__test__/event-repository-sqlite.spec.ts @@ -9,6 +9,7 @@ describe('EventRepositorySqlite', () => { beforeEach(async () => { eventRepository = new EventRepositorySqlite(); + await eventRepository.init(); database = eventRepository.getDatabase(); }); @@ -20,6 +21,7 @@ describe('EventRepositorySqlite', () => { it('should support create by better-sqlite3.Database', async () => { const db = new BetterSqlite3(':memory:'); const newEventRepository = new EventRepositorySqlite(db); + await newEventRepository.init(); expect(newEventRepository.getDatabase()).toBe(db); expect(newEventRepository.getDefaultLimit()).toBe(100); await newEventRepository.destroy(); @@ -29,14 +31,15 @@ describe('EventRepositorySqlite', () => { const newEventRepository = new EventRepositorySqlite(':memory:', { defaultLimit: 10, }); + await newEventRepository.init(); expect(newEventRepository.getDefaultLimit()).toBe(10); await newEventRepository.destroy(); }); }); describe('isSearchSupported', () => { - it('should return false', () => { - expect(eventRepository.isSearchSupported()).toBe(false); + it('should return true', () => { + expect(eventRepository.isSearchSupported()).toBe(true); }); }); @@ -394,11 +397,4 @@ describe('EventRepositorySqlite', () => { expect(eventRepository.getDefaultLimit()).toBe(10); }); }); - - describe('migrate', () => { - it('should not run migration if already migrated', async () => { - const result = (eventRepository as any).migrate(); - expect(result.executedMigrations).toHaveLength(0); - }); - }); }); diff --git a/packages/event-repository-sqlite/migrations/001-initial.sql b/packages/event-repository-sqlite/migrations/001-initial.sql deleted file mode 100644 index df4d0b1a..00000000 --- a/packages/event-repository-sqlite/migrations/001-initial.sql +++ /dev/null @@ -1,41 +0,0 @@ -CREATE TABLE - events ( - id TEXT PRIMARY KEY NOT NULL, - pubkey TEXT NOT NULL, - author TEXT NOT NULL, - created_at INTEGER NOT NULL, - kind INTEGER NOT NULL, - tags TEXT NOT NULL DEFAULT '[]', - content TEXT NOT NULL DEFAULT '', - sig TEXT NOT NULL, - d_tag_value TEXT - ); - -CREATE UNIQUE INDEX e_author_kind_d_tag_value_idx ON events (author, kind, d_tag_value) -WHERE - d_tag_value IS NOT NULL; - -CREATE INDEX e_author_kind_created_at_idx ON events (author, kind, created_at); - -CREATE INDEX e_author_created_at_idx ON events (author, created_at); - -CREATE INDEX e_kind_created_at_idx ON events (kind, created_at); - -CREATE INDEX e_created_at_idx ON events (created_at); - -CREATE TABLE - generic_tags ( - tag TEXT NOT NULL, - event_id TEXT NOT NULL, - author TEXT NOT NULL, - kind INTEGER NOT NULL, - created_at INTEGER NOT NULL, - PRIMARY KEY (event_id, tag), - FOREIGN KEY (event_id) REFERENCES events (id) ON DELETE CASCADE ON UPDATE CASCADE - ); - -CREATE INDEX g_tag_created_at_desc_event_id_idx ON generic_tags (tag, created_at DESC, event_id); - -CREATE INDEX g_tag_kind_created_at_desc_event_id_idx ON generic_tags (tag, kind, created_at DESC, event_id); - -CREATE INDEX g_tag_author_kind_created_at_desc_event_id_idx ON generic_tags (tag, author, kind, created_at DESC, event_id); \ No newline at end of file diff --git a/packages/event-repository-sqlite/src/event-repository-sqlite.ts b/packages/event-repository-sqlite/src/event-repository-sqlite.ts index cb49e0cd..e5fa332d 100644 --- a/packages/event-repository-sqlite/src/event-repository-sqlite.ts +++ b/packages/event-repository-sqlite/src/event-repository-sqlite.ts @@ -6,14 +6,16 @@ import { Filter, } from '@nostr-relay/common'; import * as BetterSqlite3 from 'better-sqlite3'; -import { readFileSync, readdirSync } from 'fs'; import { JSONColumnType, Kysely, + Migrator, SelectQueryBuilder, + sql, SqliteDialect, } from 'kysely'; -import * as path from 'path'; +import { CustomMigrationProvider } from './migrations'; +import { extractSearchableContent } from './search'; const DEFAULT_LIMIT = 100; const MAX_LIMIT_MULTIPLIER = 10; @@ -24,6 +26,7 @@ export type EventRepositorySqliteOptions = { export interface Database { events: EventTable; + events_fts: EventFtsTable; generic_tags: GenericTagTable; } @@ -39,6 +42,11 @@ interface EventTable { d_tag_value: string | null; } +interface EventFtsTable { + id: string; + content: string; +} + interface GenericTagTable { tag: string; author: string; @@ -80,12 +88,15 @@ export class EventRepositorySqlite extends EventRepository { this.db = new Kysely({ dialect: new SqliteDialect({ database: this.betterSqlite3 }), }); - this.migrate(); this.defaultLimit = options?.defaultLimit ?? DEFAULT_LIMIT; this.maxLimit = this.defaultLimit * MAX_LIMIT_MULTIPLIER; } + async init(): Promise { + await this.migrate(); + } + getDatabase(): BetterSqlite3.Database { return this.betterSqlite3; } @@ -95,7 +106,7 @@ export class EventRepositorySqlite extends EventRepository { } isSearchSupported(): boolean { - return false; + return true; } async upsert(event: Event): Promise { @@ -106,6 +117,17 @@ export class EventRepositorySqlite extends EventRepository { const { numInsertedOrUpdatedRows } = await this.db .transaction() .execute(async trx => { + let oldEventId: string | undefined; + if (dTagValue !== null) { + const row = await trx + .selectFrom('events') + .select(['id']) + .where('author', '=', author) + .where('kind', '=', event.kind) + .where('d_tag_value', '=', dTagValue) + .executeTakeFirst(); + oldEventId = row ? row.id : undefined; + } const eventInsertResult = await trx .insertInto('events') .values({ @@ -171,6 +193,24 @@ export class EventRepositorySqlite extends EventRepository { .executeTakeFirst(); } + if (oldEventId) { + await trx + .deleteFrom('events_fts') + .where('id', '=', oldEventId) + .execute(); + } + + const searchableContent = extractSearchableContent(event); + if (searchableContent) { + await trx + .insertInto('events_fts') + .values({ + id: event.id, + content: searchableContent, + }) + .execute(); + } + return eventInsertResult; }); @@ -183,6 +223,21 @@ export class EventRepositorySqlite extends EventRepository { } } + async insertToSearch(event: Event): Promise { + const searchableContent = extractSearchableContent(event); + if (searchableContent) { + await this.db + .insertInto('events_fts') + .values({ + id: event.id, + content: searchableContent, + }) + .execute(); + return 1; + } + return 0; + } + async find(filter: Filter): Promise { const limit = this.getLimitFrom(filter); if (limit === 0) return []; @@ -240,6 +295,15 @@ export class EventRepositorySqlite extends EventRepository { private createSelectQuery(filter: Filter): eventSelectQueryBuilder { let query = this.db.selectFrom('events as e'); + const searchStr = filter.search?.trim(); + if (searchStr) { + query = query.innerJoin('events_fts as fts', join => + join + .onRef('fts.id', '=', 'e.id') + .on('fts.content', sql`match`, searchStr), + ); + } + const genericTagsCollection = this.extractGenericTagsCollectionFrom(filter); if (genericTagsCollection.length) { const [firstGenericTagsFilter, secondGenericTagsFilter] = @@ -360,59 +424,56 @@ export class EventRepositorySqlite extends EventRepository { : Math.min(filter.limit, this.maxLimit); } - private migrate(): { - lastMigration: string | undefined; - executedMigrations: string[]; - } { - this.betterSqlite3.exec(` - CREATE TABLE IF NOT EXISTS nostr_relay_migrations ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - created_at INTEGER NOT NULL - ) - `); + private async migrate(): Promise { + this.migrateOldMigrationTable(); - const lastMigration = this.betterSqlite3 - .prepare(`SELECT * FROM nostr_relay_migrations ORDER BY id DESC LIMIT 1`) - .get() as { name: string } | undefined; + const migrator = new Migrator({ + db: this.db, + provider: new CustomMigrationProvider(), + migrationTableName: 'nostr_relay_sqlite_migrations', + }); + + const { error } = await migrator.migrateToLatest(); - const migrationFileNames = readdirSync( - path.join(__dirname, '../migrations'), - ).filter(fileName => fileName.endsWith('.sql')); - - const migrationsToRun = ( - lastMigration - ? migrationFileNames.filter(fileName => fileName > lastMigration.name) - : migrationFileNames - ).sort(); - - if (migrationsToRun.length === 0) { - return { - lastMigration: lastMigration?.name, - executedMigrations: [], - }; + if (error) { + throw error; } + } - const runMigrations = this.betterSqlite3.transaction(() => { - migrationsToRun.forEach(fileName => { - const migration = readFileSync( - path.join(__dirname, '../migrations', fileName), - 'utf8', - ); - this.betterSqlite3.exec(migration); - this.betterSqlite3 - .prepare( - `INSERT INTO nostr_relay_migrations (name, created_at) VALUES (?, ?)`, - ) - .run(fileName, Date.now()); - }); - }); - runMigrations(); + private migrateOldMigrationTable(): void { + const oldMigrationsTable = this.betterSqlite3 + .prepare( + `SELECT name FROM sqlite_master WHERE type='table' AND name='nostr_relay_migrations'`, + ) + .get() as { name: string } | undefined; - return { - lastMigration: migrationsToRun[migrationsToRun.length - 1], - executedMigrations: migrationsToRun, - }; + if (oldMigrationsTable) { + this.betterSqlite3.exec(` + CREATE TABLE IF NOT EXISTS nostr_relay_sqlite_migrations ( + name TEXT NOT NULL PRIMARY KEY, + timestamp TEXT NOT NULL + ) + `); + + const oldMigrations = this.betterSqlite3 + .prepare(`SELECT * FROM nostr_relay_migrations`) + .all() as { name: string; created_at: number }[]; + + const runMigrations = this.betterSqlite3.transaction(() => { + oldMigrations.forEach(migration => { + this.betterSqlite3 + .prepare( + `INSERT INTO nostr_relay_sqlite_migrations (name, timestamp) VALUES (?, ?)`, + ) + .run( + migration.name.replace('.sql', ''), + new Date(migration.created_at).toISOString(), + ); + }); + this.betterSqlite3.exec(`DROP TABLE nostr_relay_migrations`); + }); + runMigrations(); + } } private toEvent(row: any): Event { diff --git a/packages/event-repository-sqlite/src/migrations.ts b/packages/event-repository-sqlite/src/migrations.ts new file mode 100644 index 00000000..16d5373f --- /dev/null +++ b/packages/event-repository-sqlite/src/migrations.ts @@ -0,0 +1,95 @@ +import { Kysely, Migration, MigrationProvider, sql } from 'kysely'; + +const migrations: Record = { + '001-initial': { + up: async (db: Kysely) => { + await sql`CREATE TABLE + events ( + id TEXT PRIMARY KEY NOT NULL, + pubkey TEXT NOT NULL, + author TEXT NOT NULL, + created_at INTEGER NOT NULL, + kind INTEGER NOT NULL, + tags TEXT NOT NULL DEFAULT '[]', + content TEXT NOT NULL DEFAULT '', + sig TEXT NOT NULL, + d_tag_value TEXT + );`.execute(db); + + await sql`CREATE UNIQUE INDEX e_author_kind_d_tag_value_idx ON events (author, kind, d_tag_value) + WHERE + d_tag_value IS NOT NULL;`.execute(db); + + await sql`CREATE INDEX e_author_kind_created_at_idx ON events (author, kind, created_at);`.execute( + db, + ); + + await sql`CREATE INDEX e_author_created_at_idx ON events (author, created_at);`.execute( + db, + ); + + await sql`CREATE INDEX e_kind_created_at_idx ON events (kind, created_at);`.execute( + db, + ); + + await sql`CREATE INDEX e_created_at_idx ON events (created_at);`.execute( + db, + ); + + await sql`CREATE TABLE + generic_tags ( + tag TEXT NOT NULL, + event_id TEXT NOT NULL, + author TEXT NOT NULL, + kind INTEGER NOT NULL, + created_at INTEGER NOT NULL, + PRIMARY KEY (event_id, tag), + FOREIGN KEY (event_id) REFERENCES events (id) ON DELETE CASCADE ON UPDATE CASCADE + );`.execute(db); + + await sql`CREATE INDEX g_tag_created_at_desc_event_id_idx ON generic_tags (tag, created_at DESC, event_id);`.execute( + db, + ); + + await sql`CREATE INDEX g_tag_kind_created_at_desc_event_id_idx ON generic_tags (tag, kind, created_at DESC, event_id);`.execute( + db, + ); + + await sql`CREATE INDEX g_tag_author_kind_created_at_desc_event_id_idx ON generic_tags (tag, author, kind, created_at DESC, event_id);`.execute( + db, + ); + }, + down: async db => { + await db.schema + .dropIndex('g_tag_author_kind_created_at_desc_event_id_idx') + .execute(); + await db.schema + .dropIndex('g_tag_kind_created_at_desc_event_id_idx') + .execute(); + await db.schema.dropIndex('g_tag_created_at_desc_event_id_idx').execute(); + await db.schema.dropTable('generic_tags').execute(); + await db.schema.dropIndex('e_created_at_idx').execute(); + await db.schema.dropIndex('e_kind_created_at_idx').execute(); + await db.schema.dropIndex('e_author_created_at_idx').execute(); + await db.schema.dropIndex('e_author_kind_created_at_idx').execute(); + await db.schema.dropIndex('e_author_kind_d_tag_value_idx').execute(); + await db.schema.dropTable('events').execute(); + }, + }, + '002-fts': { + up: async db => { + await sql`CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5(id UNINDEXED, content, tokenize='trigram')`.execute( + db, + ); + }, + down: async db => { + await db.schema.dropTable('events_fts').execute(); + }, + }, +}; + +export class CustomMigrationProvider implements MigrationProvider { + async getMigrations(): Promise> { + return migrations; + } +} diff --git a/packages/event-repository-sqlite/src/search.ts b/packages/event-repository-sqlite/src/search.ts new file mode 100644 index 00000000..840ebc41 --- /dev/null +++ b/packages/event-repository-sqlite/src/search.ts @@ -0,0 +1,68 @@ +import { Event } from '@nostr-relay/common'; + +const SEARCHABLE_TAGS = ['title', 'description', 'about', 'summary', 'alt']; +const SEARCHABLE_KIND_WHITELIST = [0, 1, 1111, 9802, 30023, 30024]; +const SEARCHABLE_CONTENT_FORMATTERS: Record< + number, + (content: string) => string +> = { + [0]: content => { + const SEARCHABLE_PROFILE_FIELDS = [ + 'name', + 'display_name', + 'about', + 'nip05', + 'lud16', + 'website', + // Deprecated fields + 'displayName', + 'username', + ]; + try { + const lines: string[] = []; + const json = JSON.parse(content); + + for (const field of SEARCHABLE_PROFILE_FIELDS) { + if (json[field]) lines.push(json[field]); + } + + return lines.join('\n'); + } catch { + return content; + } + }, +}; + +export function extractSearchableContent(event: Event): string { + if (!SEARCHABLE_KIND_WHITELIST.includes(event.kind)) return ''; + + const formattedContent = ( + SEARCHABLE_CONTENT_FORMATTERS[event.kind] + ? SEARCHABLE_CONTENT_FORMATTERS[event.kind](event.content) + : event.content + ).trim(); + + const formattedTags = event.tags + .filter(([tagName]) => SEARCHABLE_TAGS.includes(tagName)) + .map(([, tagValue]) => tagValue.trim()) + .filter(Boolean) + .join(' '); + + return cleanContent(`${formattedContent} ${formattedTags}`); +} + +function cleanContent(content: string): string { + // remove all image and video links + content = content.replace( + /https?:\/\/\S+\.(jpg|jpeg|png|gif|bmp|webp|tiff|tif|svg|ico|heic|mp4|mkv|mov|avi|flv|wmv|webm|m4v|3gp|ts)(\?\S*)?/gi, + '', + ); + + // remove all nostr:... + content = content.replace(/nostr:\S+/g, ''); + + // remove extra spaces + content = content.replace(/\s+/g, ' ').trim(); + + return content; +}