Skip to content

Commit

Permalink
feat(scraper): delete invalid releases
Browse files Browse the repository at this point in the history
releases are invalid if the toc scraper does not
produce them anymore when scraped
  • Loading branch information
mytlogos committed Dec 28, 2021
1 parent a1627c0 commit 0243c4e
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 13 deletions.
37 changes: 24 additions & 13 deletions packages/core/src/database/contexts/episodeContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -666,19 +666,30 @@ export class EpisodeContext extends SubContext {
);
}

public async deleteRelease(release: EpisodeRelease): EmptyPromise {
const result = await this.delete(
"episode_release",
{
column: "episode_id",
value: release.episodeId,
},
{
column: "url",
value: release.url,
},
);
storeModifications("release", "delete", result);
public async deleteRelease(release: EpisodeRelease | EpisodeRelease[]): EmptyPromise {
if (Array.isArray(release)) {
await Promise.all(
batch(release, 100).map((releaseBatch) => {
return this.query(
`DELETE FROM episode_release WHERE (episode_id, url) in (${releaseBatch.map(() => "(?,?)").join(",")})`,
releaseBatch.flatMap((item) => [item.episodeId, item.url]),
);
}),
);
} else {
const result = await this.delete(
"episode_release",
{
column: "episode_id",
value: release.episodeId,
},
{
column: "url",
value: release.url,
},
);
storeModifications("release", "delete", result);
}
}

public async getEpisodeContentData(chapterLink: string): Promise<EpisodeContentData> {
Expand Down
37 changes: 37 additions & 0 deletions packages/scraper/src/jobHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ async function addPartEpisodes(

if (knownEpisodeIds.length) {
const updateReleases: EpisodeRelease[] = [];
const episodeReleasesMap = new Map<number, EpisodeRelease[]>();
let tocId = 0;

const newReleases = nonNewIndices
.map((index): Optional<EpisodeRelease> => {
Expand All @@ -326,10 +328,20 @@ async function addPartEpisodes(
throw Error("known episode has no episode from storage");
}
const id = currentEpisode.id;

const foundRelease = storageReleases.find(
(release) => release.url === episodeValue.tocEpisode.url && release.episodeId === id,
);

const episodeTocId = episodeValue.tocEpisode.tocId;
if (episodeTocId) {
if (!tocId) {
tocId = episodeTocId;
} else if (tocId !== episodeTocId) {
tocId = -1; // disable using the tocId
logger.warn(`Different TocIds on episodes, Expected ${tocId} but got ${episodeTocId} instead`);
}
}
const tocRelease: EpisodeRelease = {
episodeId: id,
releaseDate: getLatestDate(episodeValue.tocEpisode.releaseDate || new Date()),
Expand All @@ -338,6 +350,10 @@ async function addPartEpisodes(
locked: episodeValue.tocEpisode.locked,
tocId: episodeValue.tocEpisode.tocId,
};

// map scraped toc
getElseSet(episodeReleasesMap, id, () => []).push(tocRelease);

if (foundRelease) {
const date =
foundRelease.releaseDate < tocRelease.releaseDate ? foundRelease.releaseDate : tocRelease.releaseDate;
Expand All @@ -358,12 +374,33 @@ async function addPartEpisodes(
})
.filter((v) => v) as EpisodeRelease[];

const deleteReleases: EpisodeRelease[] = [];

// only delete releases if the toc is not empty and all episodes have the same valid tocId
if (episodeReleasesMap.size && tocId > 0) {
for (const release of storageReleases) {
if (release.tocId !== tocId) {
continue;
}
const tocReleases = episodeReleasesMap.get(release.episodeId);

// to delete the release either the episode of it should not be defined or the release
// (same url only, as same episodeId and tocId is already given) should not be available
if (!tocReleases || !tocReleases.find((other) => other.url === release.url)) {
deleteReleases.push(release);
}
}
}

if (newReleases.length) {
await episodeStorage.addRelease(newReleases);
}
if (updateReleases.length) {
await episodeStorage.updateRelease(updateReleases);
}
if (deleteReleases.length) {
await episodeStorage.deleteRelease(deleteReleases);
}
}
if (allEpisodes.length) {
await episodeStorage.addEpisode(allEpisodes);
Expand Down

0 comments on commit 0243c4e

Please sign in to comment.