Skip to content

Commit

Permalink
Deprecate setBaseUrl() and getArticleUrl() in favor of downloader.get…
Browse files Browse the repository at this point in the history
…ArticleUrl()
  • Loading branch information
VadimKovalenkoSNF committed Oct 17, 2023
1 parent 8ebd7d0 commit 3a5526e
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 78 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"test:e2e-coverage": "npm run test:pattern 'test/e2e' -- --coverage",
"test:unit-coverage": "npm run test:pattern 'test/unit' -- --coverage",
"test": "npm run test:unit-coverage -- --silent && npm run test:e2e-coverage -- --silent",
"test:geturl": "npm start -- --mwUrl=https://en.wikipedia.org [email protected] --articleList=BMW",
"test-verbose": "npm run test:unit-coverage && npm run test:e2e-coverage",
"test-without-coverage": "npm run test:unit -- -- --silent && npm run test:e2e -- -- --silent",
"codecov": "nyc --reporter=lcov npm t",
Expand Down
60 changes: 14 additions & 46 deletions src/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import S3 from './S3.js'
import * as logger from './Logger.js'
import MediaWiki, { QueryOpts } from './MediaWiki.js'
import ApiURLDirector from './util/builders/url/api.director.js'
import basicURLDirector from './util/builders/url/basic.director.js'
import urlHelper from './util/url.helper.js'

const imageminOptions = new Map()
Expand Down Expand Up @@ -169,52 +168,21 @@ class Downloader {
}
}

public async setBaseUrls(forceRender = null) {
if (!forceRender) {
//* Objects order in array matters!
this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href },
{ condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
])

//* Objects order in array matters!
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
{ condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href },
])
} else {
switch (forceRender) {
case 'WikimediaDesktop':
if (MediaWiki.hasWikimediaDesktopApi()) {
this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href
this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href
break
}
break
case 'VisualEditor':
if (MediaWiki.hasVisualEditorApi()) {
this.baseUrl = MediaWiki.visualEditorApiUrl.href
this.baseUrlForMainPage = MediaWiki.visualEditorApiUrl.href
break
}
break
case 'WikimediaMobile':
if (MediaWiki.hasWikimediaMobileApi()) {
this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href
this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href
break
}
break
default:
throw new Error('Unable to find specific API end-point to retrieve article HTML')
}
public getArticleUrl(renderer, articleId: string): string {
let articleUrl: string
// Renders and URL builders are independent so there should be a switch/case scenario here
switch (renderer.constructor.name) {
case 'WikimediaDesktopRenderer':
articleUrl = MediaWiki.wikimediaDesktopUrlDirector.buildArticleURL(articleId)
break
case 'VisualEditorRenderer':
articleUrl = MediaWiki.visualEditorURLDirector.buildArticleURL(articleId)
break
case 'WikimediaMobileRenderer':
articleUrl = MediaWiki.wikimediaMobileUrlDirector.buildArticleURL(articleId)
break
}
logger.log('Base Url: ', this.baseUrl)
logger.log('Base Url for Main Page: ', this.baseUrlForMainPage)

if (!this.baseUrl || !this.baseUrlForMainPage) throw new Error('Unable to find appropriate API end-point to retrieve article HTML')
return articleUrl
}

public removeEtagWeakPrefix(etag: string): string {
Expand Down
10 changes: 5 additions & 5 deletions src/MediaWiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ class MediaWiki {
#apiActionPath: string
#domain: string
private apiUrlDirector: ApiURLDirector
private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector
private wikimediaMobileUrlDirector: WikimediaMobileURLDirector
private VisualEditorURLDirector: VisualEditorURLDirector
public wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector
public wikimediaMobileUrlDirector: WikimediaMobileURLDirector
public visualEditorURLDirector: VisualEditorURLDirector

public visualEditorApiUrl: URL
public apiUrl: URL
Expand Down Expand Up @@ -152,7 +152,7 @@ class MediaWiki {

public async hasVisualEditorApi(): Promise<boolean> {
if (this.#hasVisualEditorApi === null) {
this.#hasVisualEditorApi = await checkApiAvailability(this.VisualEditorURLDirector.buildArticleURL(this.apiCheckArticleId))
this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId))
return this.#hasVisualEditorApi
}
return this.#hasVisualEditorApi
Expand Down Expand Up @@ -189,7 +189,7 @@ class MediaWiki {
this.mobileModulePath = baseUrlDirector.buildMobileModuleURL()
this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.WikimediaDesktopApiUrl.href)
this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.WikimediaMobileApiUrl.href)
this.VisualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
}

public async login(downloader: Downloader) {
Expand Down
1 change: 0 additions & 1 deletion src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ async function execute(argv: any) {
await MediaWiki.hasWikimediaDesktopApi()
const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrls(forceRender)

RedisStore.setOptions(argv.redis || config.defaults.redisPath)
await RedisStore.connect()
Expand Down
9 changes: 3 additions & 6 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
let rets: any
try {
const articleUrl = getArticleUrl(downloader, dump, articleId)
const isMainPage = dump.isMainPage(articleId)
const renderer = isMainPage ? mainPageRenderer : articlesRenderer
const articleUrl = downloader.getArticleUrl(renderer, articleId)

Check warning on line 136 in src/util/saveArticles.ts

View check run for this annotation

Codecov / codecov/patch

src/util/saveArticles.ts#L136

Added line #L136 was not covered by tests

rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)
for (const { articleId, html } of rets) {
Expand Down Expand Up @@ -224,10 +224,6 @@ async function saveArticle(
}
}

export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: string): string {
return `${dump.isMainPage(articleId) ? downloader.baseUrlForMainPage : downloader.baseUrl}${encodeURIComponent(articleId)}`
}

/*
* Fetch Articles
*/
Expand Down Expand Up @@ -293,10 +289,11 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade

let rets: any
try {
const articleUrl = getArticleUrl(downloader, dump, articleId)
const isMainPage = dump.isMainPage(articleId)
const renderer = isMainPage ? mainPageRenderer : articlesRenderer

const articleUrl = downloader.getArticleUrl(renderer, articleId)

rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage)

for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, staticFiles, subtitles } of rets) {
Expand Down
12 changes: 7 additions & 5 deletions test/unit/downloader.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import Axios from 'axios'
import { mwRetToArticleDetail, stripHttpFromUrl, isImageUrl } from '../../src/util/index.js'
import S3 from '../../src/S3.js'
import { Dump } from '../../src/Dump.js'
import { getArticleUrl } from '../../src/util/saveArticles.js'
import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js'
import { config } from '../../src/config.js'
import 'dotenv/config.js'
Expand All @@ -15,6 +14,7 @@ import { jest } from '@jest/globals'
import urlParser from 'url'
import { setTimeout } from 'timers/promises'
import domino from 'domino'
import { setupScrapeClasses } from '../util.js'

jest.setTimeout(200000)

Expand All @@ -35,7 +35,6 @@ describe('Downloader class', () => {
await MediaWiki.hasWikimediaDesktopApi()
await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrls()
})

test('Test Action API version 2 response in comparison with version 1', async () => {
Expand Down Expand Up @@ -126,16 +125,19 @@ describe('Downloader class', () => {

describe('getArticle method', () => {
let dump: Dump
let renderer
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()

beforeAll(async () => {
const mwMetadata = await MediaWiki.getMwMetaData(downloader)
dump = new Dump('', {} as any, mwMetadata)
const setupScrapeClass = await setupScrapeClasses() // en wikipedia
renderer = setupScrapeClass.renderer
})

test('getArticle of "London" returns one article', async () => {
const articleId = 'London'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const articleUrl = downloader.getArticleUrl(renderer, articleId)
const articleDetail = {
title: articleId,
thumbnail: {
Expand Down Expand Up @@ -171,7 +173,7 @@ describe('Downloader class', () => {
revisionId: 1168361498,
timestamp: '2023-08-02T09:57:11Z',
}
const articleUrl = getArticleUrl(downloader, dump, articleDetail.title)
const articleUrl = downloader.getArticleUrl(renderer, articleDetail.title)
const PaginatedArticle = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
Expand All @@ -188,7 +190,7 @@ describe('Downloader class', () => {

test('getArticle response status for non-existent article id is 404', async () => {
const articleId = 'NeverExistingArticle'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const articleUrl = downloader.getArticleUrl(renderer, articleId)
const articleDetail = {
title: articleId,
missing: '',
Expand Down
1 change: 0 additions & 1 deletion test/unit/renderers/renderer.builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ describe('RendererBuilder', () => {
await MediaWiki.hasWikimediaDesktopApi()
await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrls()

const rendererBuilderOptions = {
MediaWiki,
Expand Down
13 changes: 4 additions & 9 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import { saveArticles } from '../../src/util/saveArticles.js'
import { ZimArticle } from '@openzim/libzim'
import { mwRetToArticleDetail, DELETED_ARTICLE_ERROR } from '../../src/util/index.js'
import { jest } from '@jest/globals'
import { getArticleUrl } from '../../src/util/saveArticles.js'
import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js'
import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js'
import { RENDERERS_LIST } from '../../src/util/const.js'
Expand All @@ -19,12 +18,11 @@ describe('saveArticles', () => {
afterAll(stopRedis)

test('Article html processing', async () => {
const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia
const { renderer, MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia
await MediaWiki.hasCoordinates(downloader)
await MediaWiki.hasWikimediaDesktopApi()
await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrls('WikimediaDesktop')
const _articlesDetail = await downloader.getArticleDetailsIds(['London'])
const articlesDetail = mwRetToArticleDetail(_articlesDetail)
const { articleDetailXId } = RedisStore
Expand Down Expand Up @@ -55,7 +53,7 @@ describe('saveArticles', () => {

const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const articleId = 'non-existent-article'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const articleUrl = downloader.getArticleUrl(renderer, articleId)
const articleDetail = { title: 'Non-existent-article', missing: '' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)

Expand Down Expand Up @@ -100,9 +98,8 @@ describe('saveArticles', () => {
throw new Error(`Unknown renderer: ${renderer}`)
}
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls(renderer)
const articleId = 'Canada'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const articleUrl = downloader.getArticleUrl(rendererInstance, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
Expand Down Expand Up @@ -133,9 +130,8 @@ describe('saveArticles', () => {
test('Load main page and check that it is without header', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia
await downloader.setBaseUrls('WikimediaDesktop')
const articleId = 'Main_Page'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const articleUrl = downloader.getArticleUrl(wikimediaDesktopRenderer, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
Expand Down Expand Up @@ -232,7 +228,6 @@ describe('saveArticles', () => {
await MediaWiki.hasWikimediaDesktopApi()
await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrls()
class CustomFlavour implements CustomProcessor {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
public async shouldKeepArticle(articleId: string, doc: Document) {
Expand Down
6 changes: 2 additions & 4 deletions test/unit/treatments/article.treatment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { setupScrapeClasses } from '../../util.js'
import { startRedis, stopRedis } from '../bootstrap.js'
import { saveArticles } from '../../../src/util/saveArticles.js'
import { jest } from '@jest/globals'
import { getArticleUrl } from '../../../src/util/saveArticles.js'
import { WikimediaDesktopRenderer } from '../../../src/renderers/wikimedia-desktop.renderer.js'

jest.setTimeout(10000)
Expand All @@ -16,8 +15,7 @@ describe('ArticleTreatment', () => {
afterAll(stopRedis)

test('Article html processing', async () => {
const { downloader, dump } = await setupScrapeClasses() // en wikipedia
await downloader.setBaseUrls()
const { downloader, dump, renderer } = await setupScrapeClasses() // en wikipedia
const title = 'London'
const _articlesDetail = await downloader.getArticleDetailsIds([title])
const articlesDetail = mwRetToArticleDetail(_articlesDetail)
Expand All @@ -29,7 +27,7 @@ describe('ArticleTreatment', () => {

const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const articleId = 'non-existent-article'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const articleUrl = downloader.getArticleUrl(renderer, articleId)

const _moduleDependencies = await downloader.getModuleDependencies(title)
const articleDetail = {
Expand Down
1 change: 0 additions & 1 deletion test/unit/urlRewriting.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ describe('Styles', () => {
await MediaWiki.hasWikimediaDesktopApi()
await MediaWiki.hasWikimediaMobileApi()
await MediaWiki.hasVisualEditorApi()
await downloader.setBaseUrls('WikimediaDesktop')

await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area'])

Expand Down
5 changes: 5 additions & 0 deletions test/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ export function makeLink($doc: Document, href: string, rel: string, title: strin
export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', format = '' } = {}) {
MediaWiki.base = mwUrl

const renderer = {}

Object.defineProperty(renderer.constructor, 'name', { value: 'WikimediaDesktopRenderer' })

const downloader = new Downloader({ uaString: `${config.userAgent} ([email protected])`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' })

await MediaWiki.getMwMetaData(downloader)
Expand All @@ -47,6 +51,7 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f
MediaWiki,
downloader,
dump,
renderer,
}
}

Expand Down

0 comments on commit 3a5526e

Please sign in to comment.