From 0243c4e415cc4b0f8d9c6f01e77419e73122e3d7 Mon Sep 17 00:00:00 2001 From: mytlogos Date: Tue, 28 Dec 2021 15:08:20 +0100 Subject: [PATCH] feat(scraper): delete invalid releases releases are invalid if the toc scraper does not produce them anymore when scraped --- .../src/database/contexts/episodeContext.ts | 37 ++++++++++++------- packages/scraper/src/jobHandler.ts | 37 +++++++++++++++++++ 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/packages/core/src/database/contexts/episodeContext.ts b/packages/core/src/database/contexts/episodeContext.ts index d960aba0..235ed7a5 100644 --- a/packages/core/src/database/contexts/episodeContext.ts +++ b/packages/core/src/database/contexts/episodeContext.ts @@ -666,19 +666,30 @@ export class EpisodeContext extends SubContext { ); } - public async deleteRelease(release: EpisodeRelease): EmptyPromise { - const result = await this.delete( - "episode_release", - { - column: "episode_id", - value: release.episodeId, - }, - { - column: "url", - value: release.url, - }, - ); - storeModifications("release", "delete", result); + public async deleteRelease(release: EpisodeRelease | EpisodeRelease[]): EmptyPromise { + if (Array.isArray(release)) { + await Promise.all( + batch(release, 100).map((releaseBatch) => { + return this.query( + `DELETE FROM episode_release WHERE (episode_id, url) in (${releaseBatch.map(() => "(?,?)").join(",")})`, + releaseBatch.flatMap((item) => [item.episodeId, item.url]), + ); + }), + ); + } else { + const result = await this.delete( + "episode_release", + { + column: "episode_id", + value: release.episodeId, + }, + { + column: "url", + value: release.url, + }, + ); + storeModifications("release", "delete", result); + } } public async getEpisodeContentData(chapterLink: string): Promise { diff --git a/packages/scraper/src/jobHandler.ts b/packages/scraper/src/jobHandler.ts index dba0a567..dbd07702 100644 --- a/packages/scraper/src/jobHandler.ts +++ b/packages/scraper/src/jobHandler.ts @@ -312,6 +312,8 @@ async function addPartEpisodes( if (knownEpisodeIds.length) { const updateReleases: EpisodeRelease[] = []; + const episodeReleasesMap = new Map(); + let tocId = 0; const newReleases = nonNewIndices .map((index): Optional => { @@ -326,10 +328,20 @@ async function addPartEpisodes( throw Error("known episode has no episode from storage"); } const id = currentEpisode.id; + const foundRelease = storageReleases.find( (release) => release.url === episodeValue.tocEpisode.url && release.episodeId === id, ); + const episodeTocId = episodeValue.tocEpisode.tocId; + if (episodeTocId) { + if (!tocId) { + tocId = episodeTocId; + } else if (tocId !== episodeTocId) { + tocId = -1; // disable using the tocId + logger.warn(`Different TocIds on episodes, Expected ${tocId} but got ${episodeTocId} instead`); + } + } const tocRelease: EpisodeRelease = { episodeId: id, releaseDate: getLatestDate(episodeValue.tocEpisode.releaseDate || new Date()), @@ -338,6 +350,10 @@ async function addPartEpisodes( locked: episodeValue.tocEpisode.locked, tocId: episodeValue.tocEpisode.tocId, }; + + // map scraped toc + getElseSet(episodeReleasesMap, id, () => []).push(tocRelease); + if (foundRelease) { const date = foundRelease.releaseDate < tocRelease.releaseDate ? foundRelease.releaseDate : tocRelease.releaseDate; @@ -358,12 +374,33 @@ async function addPartEpisodes( }) .filter((v) => v) as EpisodeRelease[]; + const deleteReleases: EpisodeRelease[] = []; + + // only delete releases if the toc is not empty and all episodes have the same valid tocId + if (episodeReleasesMap.size && tocId > 0) { + for (const release of storageReleases) { + if (release.tocId !== tocId) { + continue; + } + const tocReleases = episodeReleasesMap.get(release.episodeId); + + // to delete the release either the episode of it should not be defined or the release + // (same url only, as same episodeId and tocId is already given) should not be available + if (!tocReleases || !tocReleases.find((other) => other.url === release.url)) { + deleteReleases.push(release); + } + } + } + if (newReleases.length) { await episodeStorage.addRelease(newReleases); } if (updateReleases.length) { await episodeStorage.updateRelease(updateReleases); } + if (deleteReleases.length) { + await episodeStorage.deleteRelease(deleteReleases); + } } if (allEpisodes.length) { await episodeStorage.addEpisode(allEpisodes);