From 859e3b23d18ba5203574cbcf210ebb36aa4e3cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Poizat?= Date: Thu, 14 Dec 2023 17:33:06 +0100 Subject: [PATCH] feat(backup): Improve dedup algorithm to work with old backup Dedup in photo backup means that we do not upload pictures if they already exists in the Cozy. To identify if two pictures are identical, we compare name and creation date. For new backup, we compare with our own creation date added in the io.cozy.files metadata. It just works. But in photo uploaded by old backup, we compare with the creation date that has been taken from EXIF and can where the timezone could have been badly managed. So if we compare stricly date, it may not work. So here we compare only part of the date in dedup mode by ignoring the "hour" field : - its almost impossible to have a false identity by just ignoring the "hour" field => OK - we can miss some identity => but we accept that our dedup is not 100% accurate --- .../services/manageRemoteBackupConfig.spec.ts | 104 ++++++++++++++++++ .../services/manageRemoteBackupConfig.ts | 46 ++++++-- 2 files changed, 142 insertions(+), 8 deletions(-) diff --git a/src/app/domain/backup/services/manageRemoteBackupConfig.spec.ts b/src/app/domain/backup/services/manageRemoteBackupConfig.spec.ts index dfcfc94a1..d96e92ba1 100644 --- a/src/app/domain/backup/services/manageRemoteBackupConfig.spec.ts +++ b/src/app/domain/backup/services/manageRemoteBackupConfig.spec.ts @@ -1,6 +1,14 @@ import * as manageRemoteBackupConfig from '/app/domain/backup/services/manageRemoteBackupConfig' import type CozyClient from 'cozy-client' +import flag from 'cozy-flags' + +import { Media } from '/app/domain/backup/models' +import { File } from '/app/domain/backup/queries' + +jest.mock('cozy-flags') + +const mockedFlag = flag as jest.MockedFunction describe('fetchRemoteBackupConfigs', () => { const mockClientWithFindReferencedByResult = ( @@ -142,3 +150,99 @@ describe('fetchDeviceRemoteBackupConfig', () => { expect(remoteBackupConfig).toBeUndefined() }) }) + +describe('isFileCorrespondingToMedia', () => { + it('true when creationDateFromLibrary exists in file and corresponds to creationDate in media', () => { + const file = { + metadata: { + creationDateFromLibrary: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date + }, + created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF) + } as unknown as File + + const media = { + creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime() + } as unknown as Media + + expect( + manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media) + ).toBe(true) + }) + + it('false when creationDateFromLibrary exists in file and do not correspond to creationDate in media', () => { + const file = { + metadata: { + creationDateFromLibrary: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date + }, + created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF) + } as unknown as File + + const media = { + creationDate: new Date(2021, 0, 1, 10, 0, 0).getTime() + } as unknown as Media + + expect( + manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media) + ).toBe(false) + }) + + it('true when only created_at exists in file and corresponds to creationDate in media', () => { + const file = { + created_at: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date (lucky EXIF) + } as unknown as File + + const media = { + creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime() + } as unknown as Media + + expect( + manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media) + ).toBe(true) + }) + + it('false when file only created_at exists and do not correspond to creationDate in media', () => { + const file = { + created_at: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date (lucky EXIF) + } as unknown as File + + const media = { + creationDate: new Date(2021, 0, 1, 10, 0, 0).getTime() + } as unknown as Media + + expect( + manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media) + ).toBe(false) + }) + + it('true when only created_at exists in file and corresponds with day/minute to creationDate in media with dedup mode', () => { + mockedFlag.mockReturnValue(true) + + const file = { + created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF) + } as unknown as File + + const media = { + creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime() + } as unknown as Media + + expect( + manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media) + ).toBe(true) + }) + + it('false when only created_at exists in file and corresponds with day/minute to creationDate in media without dedup mode', () => { + mockedFlag.mockReturnValue(false) + + const file = { + created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF) + } as unknown as File + + const media = { + creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime() + } as unknown as Media + + expect( + manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media) + ).toBe(false) + }) +}) diff --git a/src/app/domain/backup/services/manageRemoteBackupConfig.ts b/src/app/domain/backup/services/manageRemoteBackupConfig.ts index 3e976dcf3..f69860bd4 100644 --- a/src/app/domain/backup/services/manageRemoteBackupConfig.ts +++ b/src/app/domain/backup/services/manageRemoteBackupConfig.ts @@ -244,15 +244,45 @@ export const createRemoteBackupFolder = async ( return remoteBackupConfig } -const isFileCorrespondingToMedia = (file: File, media: Media): boolean => { - const creationDate = new Date( - file?.metadata?.creationDateFromLibrary ?? file.created_at - ) - creationDate.setMilliseconds(0) +export const isFileCorrespondingToMedia = ( + file: File, + media: Media +): boolean => { + const creationDateFromLibrary = file.metadata?.creationDateFromLibrary - return ( - file.name === media.name && creationDate.getTime() === media.creationDate - ) + /* File come from the new backup */ + + if (creationDateFromLibrary) { + const creationDate = new Date(creationDateFromLibrary) + creationDate.setMilliseconds(0) + + return ( + file.name === media.name && creationDate.getTime() === media.creationDate + ) + } + + if (flag('flagship.backup.dedup')) { + const creationDate = new Date(file.created_at) + creationDate.setMilliseconds(0) + + const mediaCreationDate = new Date(media.creationDate) + + return ( + file.name === media.name && + creationDate.getFullYear() === mediaCreationDate.getFullYear() && + creationDate.getMonth() === mediaCreationDate.getMonth() && + creationDate.getDate() === mediaCreationDate.getDate() && + creationDate.getMinutes() === mediaCreationDate.getMinutes() && + creationDate.getSeconds() === mediaCreationDate.getSeconds() + ) + } else { + const creationDate = new Date(file.created_at) + creationDate.setMilliseconds(0) + + return ( + file.name === media.name && creationDate.getTime() === media.creationDate + ) + } } const formatBackupedMedia = (