Skip to content

Commit

Permalink
Deprecate downloader.setBaseUrls() in favor of downloader.setUrlsDire…
Browse files Browse the repository at this point in the history
…ctors()
  • Loading branch information
VadimKovalenkoSNF committed Nov 7, 2023
1 parent 2765055 commit 3ea2735
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 92 deletions.
82 changes: 22 additions & 60 deletions src/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import * as logger from './Logger.js'
import MediaWiki, { QueryOpts } from './MediaWiki.js'
import { Dump } from './Dump.js'
import ApiURLDirector from './util/builders/url/api.director.js'
import basicURLDirector from './util/builders/url/basic.director.js'
import urlHelper from './util/url.helper.js'

import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js'
Expand Down Expand Up @@ -94,8 +93,6 @@ class Downloader {
public streamRequestOptions: AxiosRequestConfig
public wikimediaMobileJsDependenciesList: string[] = []
public wikimediaMobileStyleDependenciesList: string[] = []
public articleUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector
public mainPageUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector

private readonly uaString: string
private activeRequests = 0
Expand All @@ -104,6 +101,8 @@ class Downloader {
private readonly optimisationCacheUrl: string
private s3: S3
private apiUrlDirector: ApiURLDirector
private articleUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector
private mainPageUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector

constructor({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions }: DownloaderOpts) {
this.uaString = uaString
Expand Down Expand Up @@ -176,69 +175,32 @@ class Downloader {
}
}

private getUrlDirector(capabilitiesList): WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector {
for (const capabilityInfo of capabilitiesList) {
if (capabilityInfo.condition) {
return new capabilityInfo.Director(capabilityInfo.value)
}
private getUrlDirector(renderer: object) {
switch (renderer.constructor.name) {
case 'WikimediaDesktopRenderer':
return new WikimediaDesktopURLDirector(MediaWiki.WikimediaDesktopApiUrl.href)
case 'VisualEditorRenderer':
return new VisualEditorURLDirector(MediaWiki.VisualEditorApiUrl.href)
case 'WikimediaMobileRenderer':
return new WikimediaMobileURLDirector(MediaWiki.WikimediaMobileApiUrl.href)
}
throw new Error('No suitable URL director found.')
}

public async setBaseUrlsDirectors(forceRender = null) {
if (!forceRender) {
//* Objects order in array matters!
const articlesCapabilitiesList = [
{ condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href, Director: WikimediaMobileURLDirector },
{ condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href, Director: WikimediaDesktopURLDirector },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.VisualEditorApiUrl.href, Director: VisualEditorURLDirector },
]

this.baseUrl = basicURLDirector.buildDownloaderBaseUrl(articlesCapabilitiesList)
this.articleUrlDirector = this.getUrlDirector(articlesCapabilitiesList)

//* Objects order in array matters!
const mainPageCapabilitiesList = [
{ condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href, Director: WikimediaDesktopURLDirector },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.VisualEditorApiUrl.href, Director: VisualEditorURLDirector },
{ condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href, Director: WikimediaMobileURLDirector },
]
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl(mainPageCapabilitiesList)
this.mainPageUrlDirector = this.getUrlDirector(mainPageCapabilitiesList)
} else {
switch (forceRender) {
case 'WikimediaDesktop':
if (MediaWiki.hasWikimediaDesktopApi()) {
this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href
this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href
this.articleUrlDirector = this.mainPageUrlDirector = new WikimediaDesktopURLDirector(MediaWiki.WikimediaDesktopApiUrl.href)
break
}
break
case 'VisualEditor':
if (MediaWiki.hasVisualEditorApi()) {
this.baseUrl = MediaWiki.VisualEditorApiUrl.href
this.baseUrlForMainPage = MediaWiki.VisualEditorApiUrl.href
this.articleUrlDirector = this.mainPageUrlDirector = new VisualEditorURLDirector(MediaWiki.VisualEditorApiUrl.href)
break
}
break
case 'WikimediaMobile':
if (MediaWiki.hasWikimediaMobileApi()) {
this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href
this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href
this.articleUrlDirector = this.mainPageUrlDirector = new WikimediaMobileURLDirector(MediaWiki.WikimediaMobileApiUrl.href)
break
}
break
default:
throw new Error('Unable to find specific API end-point to retrieve article HTML')
}
public async setUrlsDirectors(mainPageRenderer, articlesRenderer): Promise<void> {
if (!this.articleUrlDirector) {
this.articleUrlDirector = this.getUrlDirector(articlesRenderer)
}
if (!this.mainPageUrlDirector) {
this.mainPageUrlDirector = this.getUrlDirector(mainPageRenderer)
}
}

public getArticleUrl(dump: Dump, articleId: string): string {
return `${dump.isMainPage(articleId) ? this.mainPageUrlDirector.buildArticleURL(articleId) : this.articleUrlDirector.buildArticleURL(articleId)}`
public getArticleUrl(articleId: string): string {
return this.articleUrlDirector.buildArticleURL(articleId)
}

public getMainPageUrl(articleId: string): string {
return this.mainPageUrlDirector.buildArticleURL(articleId)
}

public removeEtagWeakPrefix(etag: string): string {
Expand Down
1 change: 0 additions & 1 deletion src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ async function execute(argv: any) {
await MediaWiki.hasWikimediaDesktopApi()
const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrlsDirectors(forceRender)

RedisStore.setOptions(argv.redis || config.defaults.redisPath)
await RedisStore.connect()
Expand Down
6 changes: 3 additions & 3 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK
try {
const isMainPage = dump.isMainPage(articleId)
const renderer = isMainPage ? mainPageRenderer : articlesRenderer
const articleUrl = downloader.getArticleUrl(dump, articleId)
const articleUrl = isMainPage ? downloader.getMainPageUrl(articleId) : downloader.getArticleUrl(articleId)

rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)
for (const { articleId, html } of rets) {
Expand Down Expand Up @@ -254,6 +254,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade
renderType: hasWikimediaMobileApi ? 'mobile' : 'auto',
})
}
downloader.setUrlsDirectors(mainPageRenderer, articlesRenderer)

if (dump.customProcessor?.shouldKeepArticle) {
await getAllArticlesToKeep(downloader, articleDetailXId, dump, mainPageRenderer, articlesRenderer)
Expand Down Expand Up @@ -291,8 +292,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade
try {
const isMainPage = dump.isMainPage(articleId)
const renderer = isMainPage ? mainPageRenderer : articlesRenderer

const articleUrl = downloader.getArticleUrl(dump, articleId)
const articleUrl = isMainPage ? downloader.getMainPageUrl(articleId) : downloader.getArticleUrl(articleId)

rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)

Expand Down
2 changes: 0 additions & 2 deletions test/e2e/extra.e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { execa } from 'execa'
import 'dotenv/config.js'
import { jest } from '@jest/globals'
import { RENDERERS_LIST } from '../../src/util/const.js'
import { sleep } from '../util.js'

jest.setTimeout(20000)

Expand Down Expand Up @@ -68,7 +67,6 @@ describe('Extra', () => {
const redisScan = await execa('redis-cli --scan', { shell: true })
// Redis has been cleared
expect(redisScan.stdout).toEqual('')
await sleep(1000)
})
}
})
3 changes: 1 addition & 2 deletions test/testAllRenders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as logger from '../src/Logger.js'
import * as mwoffliner from '../src/mwoffliner.lib.js'
import { execa } from 'execa'
import { RENDERERS_LIST } from '../src/util/const.js'
import { zimcheckAvailable, zimdumpAvailable, sleep } from './util.js'
import { zimcheckAvailable, zimdumpAvailable } from './util.js'

interface Parameters {
mwUrl: string
Expand Down Expand Up @@ -56,6 +56,5 @@ export async function testAllRenders(parameters: Parameters, callback) {
outFiles[0].testId = testId
outFiles[0].renderer = renderer
await callback(outFiles)
await sleep(1000)
}
}
11 changes: 5 additions & 6 deletions test/unit/downloader.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.
import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js'
import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js'
import { RENDERERS_LIST } from '../../src/util/const.js'
import { sleep } from '../util.js'

jest.setTimeout(200000)

Expand Down Expand Up @@ -135,9 +134,10 @@ describe('Downloader class', () => {
dump = new Dump('', {} as any, mwMetadata)
})

test('getArticle of "London" returns one article for wikimediaMobileRenderer render', async () => {
test('getArticle of "London" returns one article for WikimediaMobileRenderer render', async () => {
const articleId = 'London'
const articleUrl = downloader.getArticleUrl(dump, articleId)
downloader.setUrlsDirectors(wikimediaMobileRenderer, wikimediaMobileRenderer)
const articleUrl = downloader.getArticleUrl(articleId)
const articleDetail = {
title: articleId,
thumbnail: {
Expand Down Expand Up @@ -192,7 +192,7 @@ describe('Downloader class', () => {

test('getArticle response status for non-existent article id is 404 for WikimediaDesktop render', async () => {
const articleId = 'NeverExistingArticle'
const articleUrl = downloader.getArticleUrl(dump, articleId)
const articleUrl = downloader.getArticleUrl(articleId)
const articleDetail = {
title: articleId,
missing: '',
Expand Down Expand Up @@ -238,7 +238,7 @@ describe('Downloader class', () => {

test(`getArticle response status for non-existent article id is 404 for ${renderer} render`, async () => {
const articleId = 'NeverExistingArticle'
const articleUrl = downloader.getArticleUrl(dump, articleId)
const articleUrl = downloader.getArticleUrl(articleId)
const articleDetail = {
title: articleId,
missing: '',
Expand All @@ -257,7 +257,6 @@ describe('Downloader class', () => {
dump.isMainPage(articleId),
),
).rejects.toThrowError(new Error('Request failed with status code 404'))
await sleep(1000)
})
}
})
Expand Down
19 changes: 7 additions & 12 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import domino from 'domino'

import RedisStore from '../../src/RedisStore.js'
import { startRedis, stopRedis } from './bootstrap.js'
import { setupScrapeClasses, sleep } from '../util.js'
import { setupScrapeClasses } from '../util.js'
import { saveArticles } from '../../src/util/saveArticles.js'
import { ZimArticle } from '@openzim/libzim'
import { mwRetToArticleDetail, DELETED_ARTICLE_ERROR } from '../../src/util/index.js'
Expand Down Expand Up @@ -71,7 +71,7 @@ describe('saveArticles', () => {
expect(addedArticles[0].aid).toEqual('A/London')

const articleId = 'non-existent-article'
const articleUrl = downloader.getArticleUrl(dump, articleId)
const articleUrl = downloader.getArticleUrl(articleId)
const articleDetail = { title: 'Non-existent-article', missing: '' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)

Expand All @@ -87,14 +87,13 @@ describe('saveArticles', () => {
expect(articleDoc.querySelector('meta[name="geo.position"]')?.getAttribute('content')).toEqual('51.50722222;-0.1275')
// Check if header exists
expect(articleDoc.querySelector('h1.article-header, h1.pcs-edit-section-title')).toBeTruthy()
await sleep(1000)
})

test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => {
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrlsDirectors(renderer)
const articleId = 'Canada'
const articleUrl = downloader.getArticleUrl(dump, articleId)
downloader.setUrlsDirectors(rendererInstance, rendererInstance)
const articleUrl = downloader.getArticleUrl(articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
Expand All @@ -119,14 +118,13 @@ describe('saveArticles', () => {
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
await sleep(1000)
})

test(`Load main page and check that it is without header using ${renderer} renderer`, async () => {
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia
await downloader.setBaseUrlsDirectors(renderer)
const articleId = 'Main_Page'
const articleUrl = downloader.getArticleUrl(dump, articleId)
downloader.setUrlsDirectors(rendererInstance, rendererInstance)
const articleUrl = downloader.getArticleUrl(articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
Expand All @@ -146,12 +144,10 @@ describe('saveArticles', () => {
)
const articleDoc = domino.createDocument(result[0].html)
expect(articleDoc.querySelector('h1.article-header')).toBeFalsy()
await sleep(1000)
})

test(`--customFlavour using ${renderer} renderer`, async () => {
const { downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia
await downloader.setBaseUrlsDirectors(renderer)
class CustomFlavour implements CustomProcessor {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
public async shouldKeepArticle(articleId: string, doc: Document) {
Expand Down Expand Up @@ -210,7 +206,6 @@ describe('saveArticles', () => {
expect(ParisDocument.querySelector('#PRE_PROCESSOR')).toBeDefined()
// Prague was correctly post-processed
expect(PragueDocument.querySelector('#POST_PROCESSOR')).toBeDefined()
await sleep(1000)
})
}

Expand All @@ -227,7 +222,7 @@ describe('saveArticles', () => {
const downloader = classes.downloader
await downloader.setCapabilities()
await downloader.setBaseUrlsDirectors()
await downloader.setUrlsDirectors()
const _articleDetailsRet = await downloader.getArticleDetailsIds(['Western_Greenland'])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = redisStore
Expand Down
8 changes: 3 additions & 5 deletions test/unit/treatments/article.treatment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import domino from 'domino'
import RedisStore from '../../../src/RedisStore.js'
import { ZimArticle } from '@openzim/libzim'
import { mwRetToArticleDetail } from '../../../src/util/mw-api.js'
import { setupScrapeClasses, sleep } from '../../util.js'
import { setupScrapeClasses } from '../../util.js'
import { startRedis, stopRedis } from '../bootstrap.js'
import { saveArticles } from '../../../src/util/saveArticles.js'
import { jest } from '@jest/globals'
Expand Down Expand Up @@ -36,7 +36,6 @@ describe('ArticleTreatment', () => {
test(`Article html processing for ${renderer} render`, async () => {
const { downloader, dump } = await setupScrapeClasses() // en wikipedia
const title = 'London'
await downloader.setBaseUrlsDirectors(renderer)
const _articlesDetail = await downloader.getArticleDetailsIds([title])
const articlesDetail = mwRetToArticleDetail(_articlesDetail)
const { articleDetailXId } = RedisStore
Expand All @@ -46,7 +45,8 @@ describe('ArticleTreatment', () => {
const addedArticles: (typeof ZimArticle)[] = []

const articleId = 'non-existent-article'
const articleUrl = downloader.getArticleUrl(dump, articleId)
downloader.setUrlsDirectors(rendererInstance, rendererInstance)
const articleUrl = downloader.getArticleUrl(articleId)

const _moduleDependencies = await downloader.getModuleDependencies(title)
const articleDetail = {
Expand Down Expand Up @@ -91,8 +91,6 @@ describe('ArticleTreatment', () => {
expect(articleDoc.querySelector('meta[name="geo.position"]')).toBeDefined()
// Geo Position data is correct
expect(articleDoc.querySelector('meta[name="geo.position"]')?.getAttribute('content')).toEqual('51.50722222;-0.1275')

await sleep(1000)
})
}
})
1 change: 0 additions & 1 deletion test/unit/urlRewriting.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ describe('Styles', () => {
await articleDetailXId.flush()
await RedisStore.redirectsXId.flush()
const { downloader, dump } = await setupScrapeClasses() // en wikipedia
await downloader.setBaseUrlsDirectors('WikimediaDesktop')

await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area'])

Expand Down

0 comments on commit 3ea2735

Please sign in to comment.