Skip to content

Commit

Permalink
feat(backup): Improve dedup algorithm to work with old backup
Browse files Browse the repository at this point in the history
Dedup in photo backup means that we do not upload pictures if they already exists in the Cozy.

To identify if two pictures are identical, we compare name and creation date.

For new backup, we compare with our own creation date added in the io.cozy.files metadata. It just works.

But in photo uploaded by old backup, we compare with the creation date that has been taken from EXIF and can  where the timezone could have been badly managed. So if we compare stricly date, it may not work.

So here we compare only part of the date in dedup mode by ignoring the "hour" field :
- its almost impossible to have a false identity by just ignoring the "hour" field => OK
- we can miss some identity => but we accept that our dedup is not 100% accurate
  • Loading branch information
zatteo committed Dec 15, 2023
1 parent a3ee7da commit 859e3b2
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 8 deletions.
104 changes: 104 additions & 0 deletions src/app/domain/backup/services/manageRemoteBackupConfig.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import * as manageRemoteBackupConfig from '/app/domain/backup/services/manageRemoteBackupConfig'

import type CozyClient from 'cozy-client'
import flag from 'cozy-flags'

import { Media } from '/app/domain/backup/models'
import { File } from '/app/domain/backup/queries'

jest.mock('cozy-flags')

const mockedFlag = flag as jest.MockedFunction<typeof flag>

describe('fetchRemoteBackupConfigs', () => {
const mockClientWithFindReferencedByResult = (
Expand Down Expand Up @@ -142,3 +150,99 @@ describe('fetchDeviceRemoteBackupConfig', () => {
expect(remoteBackupConfig).toBeUndefined()
})
})

describe('isFileCorrespondingToMedia', () => {
it('true when creationDateFromLibrary exists in file and corresponds to creationDate in media', () => {
const file = {
metadata: {
creationDateFromLibrary: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date
},
created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF)
} as unknown as File

const media = {
creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime()
} as unknown as Media

expect(
manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media)
).toBe(true)
})

it('false when creationDateFromLibrary exists in file and do not correspond to creationDate in media', () => {
const file = {
metadata: {
creationDateFromLibrary: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date
},
created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF)
} as unknown as File

const media = {
creationDate: new Date(2021, 0, 1, 10, 0, 0).getTime()
} as unknown as Media

expect(
manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media)
).toBe(false)
})

it('true when only created_at exists in file and corresponds to creationDate in media', () => {
const file = {
created_at: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date (lucky EXIF)
} as unknown as File

const media = {
creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime()
} as unknown as Media

expect(
manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media)
).toBe(true)
})

it('false when file only created_at exists and do not correspond to creationDate in media', () => {
const file = {
created_at: new Date(2021, 0, 0, 10, 0, 0).getTime() // good date (lucky EXIF)
} as unknown as File

const media = {
creationDate: new Date(2021, 0, 1, 10, 0, 0).getTime()
} as unknown as Media

expect(
manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media)
).toBe(false)
})

it('true when only created_at exists in file and corresponds with day/minute to creationDate in media with dedup mode', () => {
mockedFlag.mockReturnValue(true)

const file = {
created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF)
} as unknown as File

const media = {
creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime()
} as unknown as Media

expect(
manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media)
).toBe(true)
})

it('false when only created_at exists in file and corresponds with day/minute to creationDate in media without dedup mode', () => {
mockedFlag.mockReturnValue(false)

const file = {
created_at: new Date(2021, 0, 0, 9, 0, 0).getTime() // bad date (bad EXIF)
} as unknown as File

const media = {
creationDate: new Date(2021, 0, 0, 10, 0, 0).getTime()
} as unknown as Media

expect(
manageRemoteBackupConfig.isFileCorrespondingToMedia(file, media)
).toBe(false)
})
})
46 changes: 38 additions & 8 deletions src/app/domain/backup/services/manageRemoteBackupConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,45 @@ export const createRemoteBackupFolder = async (
return remoteBackupConfig
}

const isFileCorrespondingToMedia = (file: File, media: Media): boolean => {
const creationDate = new Date(
file?.metadata?.creationDateFromLibrary ?? file.created_at
)
creationDate.setMilliseconds(0)
export const isFileCorrespondingToMedia = (
file: File,
media: Media
): boolean => {
const creationDateFromLibrary = file.metadata?.creationDateFromLibrary

return (
file.name === media.name && creationDate.getTime() === media.creationDate
)
/* File come from the new backup */

if (creationDateFromLibrary) {
const creationDate = new Date(creationDateFromLibrary)
creationDate.setMilliseconds(0)

return (
file.name === media.name && creationDate.getTime() === media.creationDate
)
}

if (flag('flagship.backup.dedup')) {
const creationDate = new Date(file.created_at)
creationDate.setMilliseconds(0)

const mediaCreationDate = new Date(media.creationDate)

return (
file.name === media.name &&
creationDate.getFullYear() === mediaCreationDate.getFullYear() &&
creationDate.getMonth() === mediaCreationDate.getMonth() &&
creationDate.getDate() === mediaCreationDate.getDate() &&
creationDate.getMinutes() === mediaCreationDate.getMinutes() &&
creationDate.getSeconds() === mediaCreationDate.getSeconds()
)
} else {
const creationDate = new Date(file.created_at)
creationDate.setMilliseconds(0)

return (
file.name === media.name && creationDate.getTime() === media.creationDate
)
}
}

const formatBackupedMedia = (
Expand Down

0 comments on commit 859e3b2

Please sign in to comment.