From 7c2bd3f7eec679dfa7b396f19075f1d516b8d331 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 29 Aug 2023 14:26:05 +0300 Subject: [PATCH] Move hasCoordinates to Mediawiki singleton --- src/Downloader.ts | 33 +++++----------- src/MediaWiki.ts | 40 ++++++++++++++++++++ src/mwoffliner.lib.ts | 1 + test/unit/downloader.test.ts | 1 + test/unit/mwApi.test.ts | 2 +- test/unit/renderers/renderer.builder.test.ts | 1 + test/unit/saveArticles.test.ts | 2 + test/unit/urlRewriting.test.ts | 2 +- test/util.ts | 2 +- 9 files changed, 57 insertions(+), 27 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index 7b66d420..21bf36dd 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -16,7 +16,7 @@ import https from 'https' import { normalizeMwResponse, DB_ERROR, WEAK_ETAG_REGEX, stripHttpFromUrl, isBitmapImageMimeType, isImageUrl, getMimeType, isWebpCandidateImageMimeType } from './util/index.js' import S3 from './S3.js' import * as logger from './Logger.js' -import MediaWiki from './MediaWiki.js' +import MediaWiki, { QueryOpts } from './MediaWiki.js' import ApiURLDirector from './util/builders/url/api.director.js' import basicURLDirector from './util/builders/url/basic.director.js' import urlHelper from './util/url.helper.js' @@ -88,7 +88,6 @@ class Downloader { private readonly uaString: string private activeRequests = 0 private maxActiveRequests = 1 - private hasCoordinates = true private readonly backoffOptions: BackoffOptions private readonly optimisationCacheUrl: string private s3: S3 @@ -165,19 +164,6 @@ class Downloader { } } - public async checkCoordinatesAvailability(): Promise { - // Coordinate fetching - const reqOpts = this.getArticleQueryOpts() - - const resp = await this.getJSON(this.apiUrlDirector.buildQueryURL(reqOpts)) - - const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates') - if (isCoordinateWarning) { - logger.info('Coordinates not available on this wiki') - this.hasCoordinates = false - } - } - public async setBaseUrls() { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ @@ -211,9 +197,9 @@ class Downloader { while (true) { const queryOpts: KVS = { - ...this.getArticleQueryOpts(shouldGetThumbnail, true), + ...(await this.getArticleQueryOpts(shouldGetThumbnail, true)), titles: articleIds.join('|'), - ...(this.hasCoordinates ? { colimit: 'max' } : {}), + ...((await MediaWiki.hasCoordinates(this)) ? { colimit: 'max' } : {}), ...(MediaWiki.getCategories ? { cllimit: 'max', @@ -252,8 +238,8 @@ class Downloader { while (true) { const queryOpts: KVS = { - ...this.getArticleQueryOpts(), - ...(this.hasCoordinates ? { colimit: 'max' } : {}), + ...(await this.getArticleQueryOpts()), + ...((await MediaWiki.hasCoordinates(this)) ? { colimit: 'max' } : {}), ...(MediaWiki.getCategories ? { cllimit: 'max', @@ -394,13 +380,12 @@ class Downloader { if (resp.error) logger.log(`Got error from MW Query ${JSON.stringify(resp.warnings, null, '\t')}`) } - private getArticleQueryOpts(includePageimages = false, redirects = false) { + private async getArticleQueryOpts(includePageimages = false, redirects = false): Promise { const validNamespaceIds = MediaWiki.namespacesToMirror.map((ns) => MediaWiki.namespaces[ns].num) + const prop = `${includePageimages ? '|pageimages' : ''}${(await MediaWiki.hasCoordinates(this)) ? '|coordinates' : ''}${MediaWiki.getCategories ? '|categories' : ''}` return { - action: 'query', - format: 'json', - prop: `redirects|revisions${includePageimages ? '|pageimages' : ''}${this.hasCoordinates ? '|coordinates' : ''}${MediaWiki.getCategories ? '|categories' : ''}`, - rdlimit: 'max', + ...MediaWiki.queryOpts, + prop: MediaWiki.queryOpts.prop.concat(prop), rdnamespace: validNamespaceIds.join('|'), redirects: redirects ? true : undefined, } diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index d84a10dc..c3ddb107 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -13,6 +13,15 @@ import DesktopURLDirector from './util/builders/url/desktop.director.js' import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import { checkApiAvailability } from './util/mw-api.js' +export interface QueryOpts { + action: string + format: string + prop: string + rdlimit: string + rdnamespace: string | number + redirects?: boolean +} + class MediaWiki { private static instance: MediaWiki @@ -30,6 +39,7 @@ class MediaWiki { public namespaces: MWNamespaces = {} public namespacesToMirror: string[] = [] public apiCheckArticleId: string + public queryOpts: QueryOpts #wikiPath: string #restApiPath: string @@ -50,6 +60,7 @@ class MediaWiki { #hasWikimediaDesktopRestApi: boolean | null #hasVisualEditorApi: boolean | null + #hasCoordinates: boolean | null set username(value: string) { this.#username = value @@ -97,8 +108,18 @@ class MediaWiki { this.#wikiPath = 'wiki/' this.apiCheckArticleId = 'MediaWiki:Sidebar' + this.queryOpts = { + action: 'query', + format: 'json', + prop: 'redirects|revisions', + rdlimit: 'max', + rdnamespace: 0, + redirects: false, + } + this.#hasWikimediaDesktopRestApi = null this.#hasVisualEditorApi = null + this.#hasCoordinates = null } private constructor() { @@ -121,6 +142,25 @@ class MediaWiki { return this.#hasVisualEditorApi } + public async hasCoordinates(downloader: Downloader): Promise { + if (this.#hasCoordinates === null) { + const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num) + const reqOpts = { + ...this.queryOpts, + rdnamespace: validNamespaceIds, + } + + const resp = await downloader.getJSON(this.apiUrlDirector.buildQueryURL(reqOpts)) + const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates') + if (isCoordinateWarning) { + logger.info('Coordinates not available on this wiki') + return (this.#hasCoordinates = false) + } + return (this.#hasCoordinates = true) + } + return this.#hasCoordinates + } + private initMWApis() { const baseUrlDirector = new BaseURLDirector(this.baseUrl.href) this.webUrl = baseUrlDirector.buildURL(this.#wikiPath) diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 57467def..9e956304 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -209,6 +209,7 @@ async function execute(argv: any) { } MediaWiki.apiCheckArticleId = mwMetaData.mainPage + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index a0204446..203339ce 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -30,6 +30,7 @@ describe('Downloader class', () => { downloader = new Downloader({ uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }) await MediaWiki.getMwMetaData(downloader) + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/mwApi.test.ts b/test/unit/mwApi.test.ts index 5bc05cd2..74dd5642 100644 --- a/test/unit/mwApi.test.ts +++ b/test/unit/mwApi.test.ts @@ -23,9 +23,9 @@ describe('mwApi', () => { downloader = new Downloader({ uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }) await MediaWiki.getMwMetaData(downloader) + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() - await downloader.checkCoordinatesAvailability() await MediaWiki.getNamespaces([], downloader) }) diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index 01606307..396c4a4f 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -80,6 +80,7 @@ describe('RendererBuilder', () => { it('should throw an error for unknown RendererAPI in specific mode', async () => { const { downloader, MediaWiki } = await setupScrapeClasses() // en wikipedia + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 6e426ebe..9052e6ba 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -18,6 +18,7 @@ describe('saveArticles', () => { test('Article html processing', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() @@ -131,6 +132,7 @@ describe('saveArticles', () => { test('--customFlavour', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index e5c28219..b24c1368 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -138,9 +138,9 @@ describe('Styles', () => { await articleDetailXId.flush() await redisStore.redirectsXId.flush() const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() - await downloader.checkCoordinatesAvailability() await downloader.setBaseUrls() await getArticleIds(downloader, redisStore, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) diff --git a/test/util.ts b/test/util.ts index 6ba396a8..c4d6ebc4 100644 --- a/test/util.ts +++ b/test/util.ts @@ -36,9 +36,9 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f const downloader = new Downloader({ uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }) await MediaWiki.getMwMetaData(downloader) + await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() await MediaWiki.hasVisualEditorApi() - await downloader.checkCoordinatesAvailability() const dump = new Dump(format, {} as any, MediaWiki.metaData)