From 3a5526e7006a43b7ee3dc965fe0a4cc82a393fde Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 17 Oct 2023 12:09:59 +0300 Subject: [PATCH] Deprecate setBaseUrl() and getArticleUrl() in favor of downloader.getArticleUrl() --- package.json | 1 + src/Downloader.ts | 60 +++++-------------- src/MediaWiki.ts | 10 ++-- src/mwoffliner.lib.ts | 1 - src/util/saveArticles.ts | 9 +-- test/unit/downloader.test.ts | 12 ++-- test/unit/renderers/renderer.builder.test.ts | 1 - test/unit/saveArticles.test.ts | 13 ++-- .../unit/treatments/article.treatment.test.ts | 6 +- test/unit/urlRewriting.test.ts | 1 - test/util.ts | 5 ++ 11 files changed, 41 insertions(+), 78 deletions(-) diff --git a/package.json b/package.json index 717617d9..c8d9be86 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "test:e2e-coverage": "npm run test:pattern 'test/e2e' -- --coverage", "test:unit-coverage": "npm run test:pattern 'test/unit' -- --coverage", "test": "npm run test:unit-coverage -- --silent && npm run test:e2e-coverage -- --silent", + "test:geturl": "npm start -- --mwUrl=https://en.wikipedia.org --adminEmail=mail@mail.com --articleList=BMW", "test-verbose": "npm run test:unit-coverage && npm run test:e2e-coverage", "test-without-coverage": "npm run test:unit -- -- --silent && npm run test:e2e -- -- --silent", "codecov": "nyc --reporter=lcov npm t", diff --git a/src/Downloader.ts b/src/Downloader.ts index afb05357..584635fa 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -21,7 +21,6 @@ import S3 from './S3.js' import * as logger from './Logger.js' import MediaWiki, { QueryOpts } from './MediaWiki.js' import ApiURLDirector from './util/builders/url/api.director.js' -import basicURLDirector from './util/builders/url/basic.director.js' import urlHelper from './util/url.helper.js' const imageminOptions = new Map() @@ -169,52 +168,21 @@ class Downloader { } } - public async setBaseUrls(forceRender = null) { - if (!forceRender) { - //* Objects order in array matters! - this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, - { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - ]) - - //* Objects order in array matters! - this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, - { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, - ]) - } else { - switch (forceRender) { - case 'WikimediaDesktop': - if (MediaWiki.hasWikimediaDesktopApi()) { - this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href - this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href - break - } - break - case 'VisualEditor': - if (MediaWiki.hasVisualEditorApi()) { - this.baseUrl = MediaWiki.visualEditorApiUrl.href - this.baseUrlForMainPage = MediaWiki.visualEditorApiUrl.href - break - } - break - case 'WikimediaMobile': - if (MediaWiki.hasWikimediaMobileApi()) { - this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href - this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href - break - } - break - default: - throw new Error('Unable to find specific API end-point to retrieve article HTML') - } + public getArticleUrl(renderer, articleId: string): string { + let articleUrl: string + // Renders and URL builders are independent so there should be a switch/case scenario here + switch (renderer.constructor.name) { + case 'WikimediaDesktopRenderer': + articleUrl = MediaWiki.wikimediaDesktopUrlDirector.buildArticleURL(articleId) + break + case 'VisualEditorRenderer': + articleUrl = MediaWiki.visualEditorURLDirector.buildArticleURL(articleId) + break + case 'WikimediaMobileRenderer': + articleUrl = MediaWiki.wikimediaMobileUrlDirector.buildArticleURL(articleId) + break } - logger.log('Base Url: ', this.baseUrl) - logger.log('Base Url for Main Page: ', this.baseUrlForMainPage) - - if (!this.baseUrl || !this.baseUrlForMainPage) throw new Error('Unable to find appropriate API end-point to retrieve article HTML') + return articleUrl } public removeEtagWeakPrefix(etag: string): string { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 60faa856..fe378523 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -50,9 +50,9 @@ class MediaWiki { #apiActionPath: string #domain: string private apiUrlDirector: ApiURLDirector - private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector - private wikimediaMobileUrlDirector: WikimediaMobileURLDirector - private VisualEditorURLDirector: VisualEditorURLDirector + public wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector + public wikimediaMobileUrlDirector: WikimediaMobileURLDirector + public visualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL public apiUrl: URL @@ -152,7 +152,7 @@ class MediaWiki { public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { - this.#hasVisualEditorApi = await checkApiAvailability(this.VisualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) + this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasVisualEditorApi } return this.#hasVisualEditorApi @@ -189,7 +189,7 @@ class MediaWiki { this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.WikimediaDesktopApiUrl.href) this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.WikimediaMobileApiUrl.href) - this.VisualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) + this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } public async login(downloader: Downloader) { diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 53f21463..dce0f265 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -214,7 +214,6 @@ async function execute(argv: any) { await MediaWiki.hasWikimediaDesktopApi() const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls(forceRender) RedisStore.setOptions(argv.redis || config.defaults.redisPath) await RedisStore.connect() diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 847ffae8..f715a526 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -131,9 +131,9 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) let rets: any try { - const articleUrl = getArticleUrl(downloader, dump, articleId) const isMainPage = dump.isMainPage(articleId) const renderer = isMainPage ? mainPageRenderer : articlesRenderer + const articleUrl = downloader.getArticleUrl(renderer, articleId) rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, html } of rets) { @@ -224,10 +224,6 @@ async function saveArticle( } } -export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: string): string { - return `${dump.isMainPage(articleId) ? downloader.baseUrlForMainPage : downloader.baseUrl}${encodeURIComponent(articleId)}` -} - /* * Fetch Articles */ @@ -293,10 +289,11 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let rets: any try { - const articleUrl = getArticleUrl(downloader, dump, articleId) const isMainPage = dump.isMainPage(articleId) const renderer = isMainPage ? mainPageRenderer : articlesRenderer + const articleUrl = downloader.getArticleUrl(renderer, articleId) + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, staticFiles, subtitles } of rets) { diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index f4a208f7..3f7caade 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -6,7 +6,6 @@ import Axios from 'axios' import { mwRetToArticleDetail, stripHttpFromUrl, isImageUrl } from '../../src/util/index.js' import S3 from '../../src/S3.js' import { Dump } from '../../src/Dump.js' -import { getArticleUrl } from '../../src/util/saveArticles.js' import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' import { config } from '../../src/config.js' import 'dotenv/config.js' @@ -15,6 +14,7 @@ import { jest } from '@jest/globals' import urlParser from 'url' import { setTimeout } from 'timers/promises' import domino from 'domino' +import { setupScrapeClasses } from '../util.js' jest.setTimeout(200000) @@ -35,7 +35,6 @@ describe('Downloader class', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() }) test('Test Action API version 2 response in comparison with version 1', async () => { @@ -126,16 +125,19 @@ describe('Downloader class', () => { describe('getArticle method', () => { let dump: Dump + let renderer const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() beforeAll(async () => { const mwMetadata = await MediaWiki.getMwMetaData(downloader) dump = new Dump('', {} as any, mwMetadata) + const setupScrapeClass = await setupScrapeClasses() // en wikipedia + renderer = setupScrapeClass.renderer }) test('getArticle of "London" returns one article', async () => { const articleId = 'London' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(renderer, articleId) const articleDetail = { title: articleId, thumbnail: { @@ -171,7 +173,7 @@ describe('Downloader class', () => { revisionId: 1168361498, timestamp: '2023-08-02T09:57:11Z', } - const articleUrl = getArticleUrl(downloader, dump, articleDetail.title) + const articleUrl = downloader.getArticleUrl(renderer, articleDetail.title) const PaginatedArticle = await downloader.getArticle( downloader.webp, _moduleDependencies, @@ -188,7 +190,7 @@ describe('Downloader class', () => { test('getArticle response status for non-existent article id is 404', async () => { const articleId = 'NeverExistingArticle' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(renderer, articleId) const articleDetail = { title: articleId, missing: '', diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index 9a6687ee..b7ab903c 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -84,7 +84,6 @@ describe('RendererBuilder', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() const rendererBuilderOptions = { MediaWiki, diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index e539f1fc..8e0cb7d8 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -7,7 +7,6 @@ import { saveArticles } from '../../src/util/saveArticles.js' import { ZimArticle } from '@openzim/libzim' import { mwRetToArticleDetail, DELETED_ARTICLE_ERROR } from '../../src/util/index.js' import { jest } from '@jest/globals' -import { getArticleUrl } from '../../src/util/saveArticles.js' import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js' import { RENDERERS_LIST } from '../../src/util/const.js' @@ -19,12 +18,11 @@ describe('saveArticles', () => { afterAll(stopRedis) test('Article html processing', async () => { - const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia + const { renderer, MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls('WikimediaDesktop') const _articlesDetail = await downloader.getArticleDetailsIds(['London']) const articlesDetail = mwRetToArticleDetail(_articlesDetail) const { articleDetailXId } = RedisStore @@ -55,7 +53,7 @@ describe('saveArticles', () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const articleId = 'non-existent-article' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(renderer, articleId) const articleDetail = { title: 'Non-existent-article', missing: '' } const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) @@ -100,9 +98,8 @@ describe('saveArticles', () => { throw new Error(`Unknown renderer: ${renderer}`) } const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia - await downloader.setBaseUrls(renderer) const articleId = 'Canada' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(rendererInstance, articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) const { articleDetailXId } = RedisStore @@ -133,9 +130,8 @@ describe('saveArticles', () => { test('Load main page and check that it is without header', async () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia - await downloader.setBaseUrls('WikimediaDesktop') const articleId = 'Main_Page' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(wikimediaDesktopRenderer, articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) const { articleDetailXId } = RedisStore @@ -232,7 +228,6 @@ describe('saveArticles', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { // eslint-disable-next-line @typescript-eslint/no-unused-vars public async shouldKeepArticle(articleId: string, doc: Document) { diff --git a/test/unit/treatments/article.treatment.test.ts b/test/unit/treatments/article.treatment.test.ts index cbe4d150..a3dfba52 100644 --- a/test/unit/treatments/article.treatment.test.ts +++ b/test/unit/treatments/article.treatment.test.ts @@ -6,7 +6,6 @@ import { setupScrapeClasses } from '../../util.js' import { startRedis, stopRedis } from '../bootstrap.js' import { saveArticles } from '../../../src/util/saveArticles.js' import { jest } from '@jest/globals' -import { getArticleUrl } from '../../../src/util/saveArticles.js' import { WikimediaDesktopRenderer } from '../../../src/renderers/wikimedia-desktop.renderer.js' jest.setTimeout(10000) @@ -16,8 +15,7 @@ describe('ArticleTreatment', () => { afterAll(stopRedis) test('Article html processing', async () => { - const { downloader, dump } = await setupScrapeClasses() // en wikipedia - await downloader.setBaseUrls() + const { downloader, dump, renderer } = await setupScrapeClasses() // en wikipedia const title = 'London' const _articlesDetail = await downloader.getArticleDetailsIds([title]) const articlesDetail = mwRetToArticleDetail(_articlesDetail) @@ -29,7 +27,7 @@ describe('ArticleTreatment', () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const articleId = 'non-existent-article' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(renderer, articleId) const _moduleDependencies = await downloader.getModuleDependencies(title) const articleDetail = { diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 03b17b5c..707ae9d5 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -143,7 +143,6 @@ describe('Styles', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls('WikimediaDesktop') await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) diff --git a/test/util.ts b/test/util.ts index 7625cb78..28a030fc 100644 --- a/test/util.ts +++ b/test/util.ts @@ -33,6 +33,10 @@ export function makeLink($doc: Document, href: string, rel: string, title: strin export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', format = '' } = {}) { MediaWiki.base = mwUrl + const renderer = {} + + Object.defineProperty(renderer.constructor, 'name', { value: 'WikimediaDesktopRenderer' }) + const downloader = new Downloader({ uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }) await MediaWiki.getMwMetaData(downloader) @@ -47,6 +51,7 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f MediaWiki, downloader, dump, + renderer, } }