From 15e2d87caf622926db94bcd1195706a0bc5c1203 Mon Sep 17 00:00:00 2001 From: Paul Tran-Van Date: Thu, 7 Nov 2024 12:18:14 +0100 Subject: [PATCH] feat: Use local search only for long-term sessions We now rely on the checkbox on login to determine whether or not the local search should be used. If the user enable this checkbox, it means he trusts this device and accept to store local data on it. Otherwise, we do not store any private data, and directly query the stack to build the search index. Note the update mechanism is the same and rely on realtime. The difference is that we do not replicate the document for non local search. --- packages/cozy-dataproxy-lib/package.json | 3 +- .../src/search/SearchEngine.ts | 175 ++++++++++-------- .../cozy-dataproxy-lib/src/search/consts.ts | 2 +- .../src/search/queries/index.ts | 8 - 4 files changed, 103 insertions(+), 85 deletions(-) diff --git a/packages/cozy-dataproxy-lib/package.json b/packages/cozy-dataproxy-lib/package.json index ce8ea09a9a..27b4769716 100644 --- a/packages/cozy-dataproxy-lib/package.json +++ b/packages/cozy-dataproxy-lib/package.json @@ -63,7 +63,8 @@ "prepublishOnly": "yarn build", "test": "jest --config=./tests/jest.config.js", "test:watch": "yarn test --watchAll", - "start": "yarn build:watch" + "start": "yarn build:watch", + "lint": "cd .. && yarn eslint --ext js,jsx,ts packages/cozy-dataproxy-lib" }, "types": "dist/index.d.ts" } diff --git a/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts b/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts index 74a8d3abfa..215f132e3c 100644 --- a/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts +++ b/packages/cozy-dataproxy-lib/src/search/SearchEngine.ts @@ -11,13 +11,14 @@ import { CONTACTS_DOCTYPE, DOCTYPE_ORDER, LIMIT_DOCTYPE_SEARCH, - SearchedDoctype + SearchedDoctype, + SEARCHABLE_DOCTYPES } from './consts' import { getPouchLink } from './helpers/client' import { getSearchEncoder } from './helpers/getSearchEncoder' import { addFilePaths, shouldKeepFile } from './helpers/normalizeFile' import { normalizeSearchResult } from './helpers/normalizeSearchResult' -import { queryFilesForSearch, queryAllContacts, queryAllApps } from './queries' +import { queryFilesForSearch } from './queries' import { CozyDoc, RawSearchResult, @@ -30,6 +31,7 @@ import { isSearchedDoctype } from './types' + const log = Minilog('🗂️ [Indexing]') interface FlexSearchResultWithDoctype @@ -41,27 +43,42 @@ export class SearchEngine { client: CozyClient searchIndexes: SearchIndexes debouncedReplication: () => void + isLocalSearch: boolean constructor(client: CozyClient) { this.client = client this.searchIndexes = {} as SearchIndexes - this.indexOnChanges() + this.isLocalSearch = !!getPouchLink(this.client) + log.info('Use local data on trusted device : ', this.isLocalSearch) + this.debouncedReplication = (): void => { const pouchLink = getPouchLink(client) if (pouchLink) { pouchLink.startReplicationWithDebounce() } } + this.indexDocuments() } - indexOnChanges(): void { + indexDocuments(): void { if (!this.client) { return } + if (!this.isLocalSearch) { + // In case of non-local search, force the indexing for all doctypes + // For local search, this will be done automatically after initial replication + for (const doctype of SEARCHABLE_DOCTYPES) { + this.indexDocsForSearch(doctype as keyof typeof SEARCH_SCHEMA) + } + } + this.client.on('pouchlink:doctypesync:end', async (doctype: string) => { if (isSearchedDoctype(doctype)) { - await this.indexDocsForSearch(doctype as keyof typeof SEARCH_SCHEMA) + // Index doctype after initial replication + this.searchIndexes[doctype] = await this.indexDocsForSearch( + doctype as keyof typeof SEARCH_SCHEMA + ) } }) this.client.on('pouchlink:sync:start', () => { @@ -70,6 +87,7 @@ export class SearchEngine { this.client.on('pouchlink:sync:end', () => { log.debug('Ended pouch replication') }) + this.client.on('login', () => { // Ensure login is done before plugin register this.client.registerPlugin(RealtimePlugin, {}) @@ -105,7 +123,9 @@ export class SearchEngine { log.debug('[REALTIME] index doc after update : ', doc) this.addDocToIndex(searchIndex.index, doc) - this.debouncedReplication() + if (this.isLocalSearch) { + this.debouncedReplication() + } } handleDeletedDoc(doc: CozyDoc): void { @@ -121,13 +141,17 @@ export class SearchEngine { log.debug('[REALTIME] remove doc from index after update : ', doc) this.searchIndexes[doctype].index.remove(doc._id!) - this.debouncedReplication() + if (this.isLocalSearch) { + this.debouncedReplication() + } } buildSearchIndex( doctype: keyof typeof SEARCH_SCHEMA, docs: CozyDoc[] ): FlexSearch.Document { + const startTimeIndex = performance.now() + const fieldsToIndex = SEARCH_SCHEMA[doctype] const flexsearchIndex = new FlexSearch.Document({ @@ -146,6 +170,10 @@ export class SearchEngine { this.addDocToIndex(flexsearchIndex, doc) } + const endTimeIndex = performance.now() + log.debug( + `Create ${doctype} index took ${(endTimeIndex - startTimeIndex).toFixed(2)} ms` + ) return flexsearchIndex } @@ -165,51 +193,59 @@ export class SearchEngine { return true } - async indexDocsForSearch( - doctype: keyof typeof SEARCH_SCHEMA - ): Promise { - const searchIndex = this.searchIndexes[doctype] - const pouchLink = getPouchLink(this.client) + async getLocalLastSeq(doctype: keyof typeof SEARCH_SCHEMA): Promise { + if (this.isLocalSearch) { + const pouchLink = getPouchLink(this.client) + const info = pouchLink ? await pouchLink.getDbInfo(doctype) : null + return info?.update_seq || 0 + } + return -1 + } - if (!pouchLink) { - return null + async queryLocalOrRemoteDocs( + doctype: keyof typeof SEARCH_SCHEMA + ): Promise { + let docs = [] + const startTimeQ = performance.now() + + if (!this.isLocalSearch && doctype === FILES_DOCTYPE) { + // Special case for stack's files + docs = await queryFilesForSearch(this.client) + } else { + docs = await this.client.queryAll(Q(doctype).limitBy(null)) } - if (!searchIndex) { - // First creation of search index - const startTimeQ = performance.now() - const docs = await this.client.queryAll( - Q(doctype).limitBy(null) - ) - const endTimeQ = performance.now() - log.debug( - `Query ${docs.length} docs doctype ${doctype} took ${( - endTimeQ - startTimeQ - ).toFixed(2)} ms` - ) - - const startTimeIndex = performance.now() - const index = this.buildSearchIndex(doctype, docs) - const endTimeIndex = performance.now() - log.debug( - `Create ${doctype} index took ${(endTimeIndex - startTimeIndex).toFixed( - 2 - )} ms` - ) - const info = await pouchLink.getDbInfo(doctype) - - this.searchIndexes[doctype] = { - index, - lastSeq: info?.update_seq, - lastUpdated: new Date().toISOString() - } - return this.searchIndexes[doctype] + const endTimeQ = performance.now() + log.debug( + `Query ${docs.length} docs doctype ${doctype} took ${(endTimeQ - startTimeQ).toFixed(2)} ms` + ) + return docs + } + + async initialIndexation( + doctype: keyof typeof SEARCH_SCHEMA + ): Promise { + const docs = await this.queryLocalOrRemoteDocs(doctype) + const index = this.buildSearchIndex(doctype, docs) + const lastSeq = await this.getLocalLastSeq(doctype) + + this.searchIndexes[doctype] = { + index, + lastSeq, + lastUpdated: new Date().toISOString() } + return this.searchIndexes[doctype] + } - // Incremental index update - // At this point, the search index are supposed to be already up-to-date, - // thanks to the realtime. - // However, we check it is actually the case for safety, and update the lastSeq + async incrementalIndexation( + doctype: keyof typeof SEARCH_SCHEMA, + searchIndex: SearchIndex + ): Promise { + const pouchLink = getPouchLink(this.client) + if (!this.isLocalSearch || !pouchLink) { + // No need to handle incremental indexation for non-local search: it is already done through realtime + return searchIndex + } const lastSeq = searchIndex.lastSeq || 0 const changes = await pouchLink.getChanges(doctype, { include_docs: true, @@ -230,34 +266,20 @@ export class SearchEngine { return searchIndex } - initIndexesFromStack = async (): Promise => { - log.debug('Initializing indexes') - - const files = await queryFilesForSearch(this.client) - const filesIndex = this.buildSearchIndex('io.cozy.files', files) - - const contacts = await queryAllContacts(this.client) - const contactsIndex = this.buildSearchIndex('io.cozy.contacts', contacts) - - const apps = await queryAllApps(this.client) - const appsIndex = this.buildSearchIndex('io.cozy.apps', apps) - - log.debug('Finished initializing indexes') - const currentDate = new Date().toISOString() - this.searchIndexes = { - [FILES_DOCTYPE]: { - index: filesIndex, - lastSeq: 0, - lastUpdated: currentDate - }, - [CONTACTS_DOCTYPE]: { - index: contactsIndex, - lastSeq: 0, - lastUpdated: currentDate - }, - [APPS_DOCTYPE]: { index: appsIndex, lastSeq: 0, lastUpdated: currentDate } + async indexDocsForSearch( + doctype: keyof typeof SEARCH_SCHEMA + ): Promise { + const searchIndex = this.searchIndexes[doctype] + + if (!searchIndex) { + // First creation of search index + return this.initialIndexation(doctype) } - return this.searchIndexes + + // At this point, the search index is supposed to be already up-to-date, + // thanks to the realtime. + // However, we check if it is actually the case for safety, and update the lastSeq + return this.incrementalIndexation(doctype, searchIndex) } search(query: string): SearchResult[] { @@ -273,7 +295,10 @@ export class SearchEngine { const results = this.limitSearchResults(sortedResults) const normResults: SearchResult[] = [] - const completedResults = addFilePaths(this.client, results) + // Special case for local files: the path is missing + const completedResults = this.isLocalSearch + ? addFilePaths(this.client, results) + : results for (const res of completedResults) { const normalizedRes = normalizeSearchResult(this.client, res, query) normResults.push(normalizedRes) diff --git a/packages/cozy-dataproxy-lib/src/search/consts.ts b/packages/cozy-dataproxy-lib/src/search/consts.ts index 2a84ec8b0b..bbb79ee8fa 100644 --- a/packages/cozy-dataproxy-lib/src/search/consts.ts +++ b/packages/cozy-dataproxy-lib/src/search/consts.ts @@ -1,4 +1,4 @@ -const SEARCHABLE_DOCTYPES = [ +export const SEARCHABLE_DOCTYPES = [ 'io.cozy.files', 'io.cozy.contacts', 'io.cozy.apps' diff --git a/packages/cozy-dataproxy-lib/src/search/queries/index.ts b/packages/cozy-dataproxy-lib/src/search/queries/index.ts index 8efa6f2d92..bae5a930e6 100644 --- a/packages/cozy-dataproxy-lib/src/search/queries/index.ts +++ b/packages/cozy-dataproxy-lib/src/search/queries/index.ts @@ -36,11 +36,3 @@ export const queryFilesForSearch = async ( return normalizedFiles } - -export const queryAllContacts = (client: CozyClient): Promise => { - return client.queryAll(Q(CONTACTS_DOCTYPE).limitBy(1000)) -} - -export const queryAllApps = (client: CozyClient): Promise => { - return client.queryAll(Q(APPS_DOCTYPE).limitBy(1000)) -}