From 5d6b68c127a0258ef556c4c3c59081bc0adc2f2f Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 11 Sep 2023 17:07:45 +0300 Subject: [PATCH 01/58] Implement mobile renderer (partial impl) --- src/Downloader.ts | 2 + src/MediaWiki.ts | 15 ++++++ src/mwoffliner.lib.ts | 1 + src/renderers/renderer.builder.ts | 20 ++++++-- src/renderers/wikimedia-mobile.renderer.ts | 50 +++++++++++++++++++ src/util/builders/url/base.director.ts | 7 +++ src/util/builders/url/mobile.director.ts | 16 ++++++ src/util/saveArticles.ts | 16 +++--- test/unit/builders/url/base.director.test.ts | 14 ++++++ .../unit/builders/url/mobile.director.test.ts | 13 +++++ test/unit/downloader.test.ts | 1 + test/unit/renderers/renderer.builder.test.ts | 1 + test/unit/saveArticles.test.ts | 2 + test/unit/urlRewriting.test.ts | 1 + test/util.ts | 1 + 15 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 src/renderers/wikimedia-mobile.renderer.ts create mode 100644 src/util/builders/url/mobile.director.ts create mode 100644 test/unit/builders/url/mobile.director.test.ts diff --git a/src/Downloader.ts b/src/Downloader.ts index 8a8ca712..ac9d210b 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -173,12 +173,14 @@ class Downloader { this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) } else { switch (forceRender) { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 65ca1055..434ae49d 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -10,6 +10,7 @@ import basicURLDirector from './util/builders/url/basic.director.js' import BaseURLDirector from './util/builders/url/base.director.js' import ApiURLDirector from './util/builders/url/api.director.js' import DesktopURLDirector from './util/builders/url/desktop.director.js' +import MobileURLDirector from './util/builders/url/mobile.director.js' import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import { checkApiAvailability } from './util/mw-api.js' import { BLACKLISTED_NS } from './util/const.js' @@ -50,6 +51,7 @@ class MediaWiki { #domain: string private apiUrlDirector: ApiURLDirector private wikimediaDesktopUrlDirector: DesktopURLDirector + private wikimediaMobileUrlDirector: MobileURLDirector private visualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL @@ -58,8 +60,10 @@ class MediaWiki { public _modulePathOpt: string // only for whiting to generate modulePath public webUrl: URL public desktopRestApiUrl: URL + public mobileRestApiUrl: URL #hasWikimediaDesktopRestApi: boolean | null + #hasWikimediaMobileRestApi: boolean | null #hasVisualEditorApi: boolean | null #hasCoordinates: boolean | null @@ -120,6 +124,7 @@ class MediaWiki { } this.#hasWikimediaDesktopRestApi = null + this.#hasWikimediaMobileRestApi = null this.#hasVisualEditorApi = null this.#hasCoordinates = null } @@ -136,6 +141,14 @@ class MediaWiki { return this.#hasWikimediaDesktopRestApi } + public async hasWikimediaMobileRestApi(): Promise { + if (this.#hasWikimediaMobileRestApi === null) { + this.#hasWikimediaMobileRestApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaMobileRestApi + } + return this.#hasWikimediaMobileRestApi + } + public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) @@ -170,8 +183,10 @@ class MediaWiki { this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.#restApiPath) + this.mobileRestApiUrl = baseUrlDirector.buildMobileRestApiURL(this.#restApiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopRestApiUrl.href) + this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileRestApiUrl.href) this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 782f7a8d..bc807aab 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -212,6 +212,7 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index dc6ed04e..48418461 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -2,6 +2,7 @@ import MediaWiki from './../MediaWiki.js' import { Renderer } from './abstract.renderer.js' import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' +import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' import * as logger from './../Logger.js' @@ -9,7 +10,11 @@ export class RendererBuilder { public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options - const [hasVisualEditorApi, hasWikimediaDesktopRestApi] = await Promise.all([MediaWiki.hasVisualEditorApi(), MediaWiki.hasWikimediaDesktopRestApi()]) + const [hasVisualEditorApi, hasWikimediaDesktopRestApi, hasWikimediaMobileRestApi] = await Promise.all([ + MediaWiki.hasVisualEditorApi(), + MediaWiki.hasWikimediaDesktopRestApi(), + MediaWiki.hasWikimediaMobileRestApi(), + ]) switch (renderType) { case 'desktop': @@ -23,7 +28,11 @@ export class RendererBuilder { process.exit(1) } case 'mobile': - // TODO: return WikimediaMobile renderer + if (hasWikimediaMobileRestApi) { + return new WikimediaMobileRenderer() + } + logger.error('No available mobile renderer.') + process.exit(1) break case 'auto': if (hasWikimediaDesktopRestApi) { @@ -51,8 +60,11 @@ export class RendererBuilder { logger.error('Cannot create an instance of VisualEditor renderer.') process.exit(1) case 'WikimediaMobile': - // TODO: return WikimediaMobile renderer - return + if (hasWikimediaMobileRestApi) { + return new WikimediaMobileRenderer() + } + logger.error('No available mobile renderer.') + process.exit(1) default: throw new Error(`Unknown renderName for specific mode: ${renderName}`) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts new file mode 100644 index 00000000..f7bca864 --- /dev/null +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -0,0 +1,50 @@ +import * as logger from '../Logger.js' +import { Renderer } from './abstract.renderer.js' +import { getStrippedTitleFromHtml } from '../util/misc.js' +import { RenderOpts, RenderOutput } from './abstract.renderer.js' + +// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' +export class WikimediaMobileRenderer extends Renderer { + constructor() { + super() + } + + private async retrieveHtml(renderOpts: RenderOpts): Promise { + const { data, articleId, articleDetail, isMainPage } = renderOpts + + const html = isMainPage ? data : super.injectH1TitleToHtml(data, articleDetail) + const strippedTitle = getStrippedTitleFromHtml(html) + const displayTitle = strippedTitle || articleId.replace('_', ' ') + + return { html, displayTitle } + } + + public async render(renderOpts: RenderOpts): Promise { + try { + const result: RenderOutput = [] + const { data, articleId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + const articleDetail = await renderOpts.articleDetailXId.get(articleId) + + const { html, displayTitle } = await this.retrieveHtml(renderOpts) + if (html) { + let dataWithHeader = '' + if (!isMainPage) { + dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) + } + const { finalHTML, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + + result.push({ + articleId, + displayTitle, + html: finalHTML, + mediaDependencies: null, + subtitles, + }) + return result + } + } catch (err) { + logger.error(err.message) + throw new Error(err.message) + } + } +} diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 3aa7ba3a..2f8f7949 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -28,6 +28,13 @@ export default class BaseURLDirector { .build(true, '/') } + buildMobileRestApiURL(path?: string) { + return urlBuilder + .setDomain(this.baseDomain) + .setPath(path ?? 'api/rest_v1/page/mobile-html') + .build(true, '/') + } + buildModuleURL(path?: string) { return urlBuilder .setDomain(this.baseDomain) diff --git a/src/util/builders/url/mobile.director.ts b/src/util/builders/url/mobile.director.ts new file mode 100644 index 00000000..258b389e --- /dev/null +++ b/src/util/builders/url/mobile.director.ts @@ -0,0 +1,16 @@ +import urlBuilder from './url.builder.js' + +/** + * Interface to build URLs based on MediaWiki mobile URL + */ +export default class MobileURLDirector { + baseDomain: string + + constructor(baseDomain: string) { + this.baseDomain = baseDomain + } + + buildArticleURL(articleId: string) { + return urlBuilder.setDomain(this.baseDomain).setPath(encodeURIComponent(articleId)).build() + } +} diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index cf60d2bb..bb092b35 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -206,7 +206,7 @@ async function saveArticle( filesToDownload[s.path] = { url: s.url, namespace: '-' } }) - if (mediaDependencies.length) { + if (mediaDependencies && mediaDependencies.length) { const existingVals = await RedisStore.filesToDownloadXPath.getMany(mediaDependencies.map((dep) => dep.path)) for (const dep of mediaDependencies) { @@ -260,21 +260,25 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade const rendererBuilder = new RendererBuilder() let rendererBuilderOptions: RendererBuilderOptions + + let mainPageRenderer + let articlesRenderer if (forceRender) { rendererBuilderOptions = { renderType: 'specific', renderName: forceRender, } + // All articles and main page will use the same renderer if 'forceRender' is specified + mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) } else { rendererBuilderOptions = { - renderType: 'auto', + renderType: 'desktop', } + mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) } - const mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - // TODO: article renderer will be switched to the mobile mode later - const articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - if (dump.customProcessor?.shouldKeepArticle) { await getAllArticlesToKeep(downloader, articleDetailXId, dump, mainPageRenderer, articlesRenderer) } diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 9282ff8c..5aadd9d4 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -25,6 +25,20 @@ describe('BaseURLDirector', () => { }) }) + describe('buildMobileRestApiURL', () => { + it('should return mobile rest URL with provided path and trailing char', () => { + const url = baseUrlDirector.buildMobileRestApiURL('api/rest_v2/page/mobile-html') + + expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') + }) + + it('should return mobile rest URL with default path and trailing char', () => { + const url = baseUrlDirector.buildMobileRestApiURL() + + expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') + }) + }) + describe('buildDesktopRestApiURL', () => { it('should return a desktop URL with provided path and trailing char', () => { const url = baseUrlDirector.buildDesktopRestApiURL('api/rest_v2/page/html') diff --git a/test/unit/builders/url/mobile.director.test.ts b/test/unit/builders/url/mobile.director.test.ts new file mode 100644 index 00000000..d5f94a78 --- /dev/null +++ b/test/unit/builders/url/mobile.director.test.ts @@ -0,0 +1,13 @@ +import MobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' + +describe('MobileURLDirector', () => { + const mobuleUrlDirector = new MobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') + + describe('buildArticleURL', () => { + it('should return a URL for retrieving mobile article', () => { + const url = mobuleUrlDirector.buildArticleURL('article-123') + + expect(url).toBe('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/article-123') + }) + }) +}) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 153666e0..e0dd8504 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -33,6 +33,7 @@ describe('Downloader class', () => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() }) diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index c071eff0..8c92d42c 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -82,6 +82,7 @@ describe('RendererBuilder', () => { const { downloader, MediaWiki } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 525f80b0..120a948b 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -21,6 +21,7 @@ describe('saveArticles', () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() const _articlesDetail = await downloader.getArticleDetailsIds(['London']) @@ -243,6 +244,7 @@ describe('saveArticles', () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 4e76d0c7..0f184007 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -141,6 +141,7 @@ describe('Styles', () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/util.ts b/test/util.ts index c4d6ebc4..2f01b581 100644 --- a/test/util.ts +++ b/test/util.ts @@ -38,6 +38,7 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() const dump = new Dump(format, {} as any, MediaWiki.metaData) From 09aa8384d2673c174d924a0daa886c85a38fcf55 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 13 Sep 2023 15:23:07 +0300 Subject: [PATCH 02/58] Setup renderers for main page and articles --- src/renderers/renderer.builder.ts | 3 ++- src/renderers/wikimedia-mobile.renderer.ts | 4 ++-- src/util/saveArticles.ts | 15 +++++++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index 48418461..47223f3c 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -33,13 +33,14 @@ export class RendererBuilder { } logger.error('No available mobile renderer.') process.exit(1) - break case 'auto': if (hasWikimediaDesktopRestApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { return new VisualEditorRenderer() + } else if (hasWikimediaMobileRestApi) { + return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') process.exit(1) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index f7bca864..ce3fc930 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -31,13 +31,13 @@ export class WikimediaMobileRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } - const { finalHTML, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) result.push({ articleId, displayTitle, html: finalHTML, - mediaDependencies: null, + mediaDependencies, subtitles, }) return result diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index bb092b35..d8b0c765 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -14,6 +14,7 @@ import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } f import urlHelper from './url.helper.js' import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' +import MediaWiki from '../../src/MediaWiki.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { await retryStore.flush() @@ -202,9 +203,11 @@ async function saveArticle( try { const filesToDownload: KVS = {} - subtitles.forEach((s) => { - filesToDownload[s.path] = { url: s.url, namespace: '-' } - }) + if (subtitles?.length > 0) { + subtitles.forEach((s) => { + filesToDownload[s.path] = { url: s.url, namespace: '-' } + }) + } if (mediaDependencies && mediaDependencies.length) { const existingVals = await RedisStore.filesToDownloadXPath.getMany(mediaDependencies.map((dep) => dep.path)) @@ -276,7 +279,11 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade renderType: 'desktop', } mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) + // If the mobile renderer API is not available, switch articles rendering to the auto mode instead + if (await MediaWiki.hasWikimediaMobileRestApi()) { + articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) + } + articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'auto' }) } if (dump.customProcessor?.shouldKeepArticle) { From 293caaec2dd7e7b0fb07868b857bb61d9ecd0061 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 13 Sep 2023 16:53:22 +0300 Subject: [PATCH 03/58] Fix regressions in saveArticles --- src/util/saveArticles.ts | 64 +++++++--------------------------------- 1 file changed, 11 insertions(+), 53 deletions(-) diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index d8b0c765..e43d0fb6 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -130,33 +130,13 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK await articleDetailXId.iterateItems(downloader.speed, async (articleKeyValuePairs) => { for (const [articleId, articleDetail] of Object.entries(articleKeyValuePairs)) { const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) + let rets: any try { const articleUrl = getArticleUrl(downloader, dump, articleId) - let rets: any - if (dump.isMainPage) { - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - mainPageRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - } - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - articlesRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) + const isMainPage = dump.isMainPage(articleId) + const renderer = isMainPage ? mainPageRenderer : articlesRenderer + + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, html } of rets) { if (!html) { continue @@ -280,10 +260,8 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade } mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) // If the mobile renderer API is not available, switch articles rendering to the auto mode instead - if (await MediaWiki.hasWikimediaMobileRestApi()) { - articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) - } - articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'auto' }) + rendererBuilderOptions.renderType = (await MediaWiki.hasWikimediaMobileRestApi()) ? 'mobile' : 'auto' + articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) } if (dump.customProcessor?.shouldKeepArticle) { @@ -321,30 +299,10 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let rets: any try { const articleUrl = getArticleUrl(downloader, dump, articleId) - if (dump.isMainPage) { - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - mainPageRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - } - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - articlesRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) + const isMainPage = dump.isMainPage(articleId) + const renderer = isMainPage ? mainPageRenderer : articlesRenderer + + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, subtitles } of rets) { if (!finalHTML) { From fb6f78c78d6f59568b982cdb0d39541319cc4973 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 13 Sep 2023 19:47:36 +0300 Subject: [PATCH 04/58] Pass hasWikimediaMobileRestApi to saveArticles as argument to prevent issue with no found module --- src/mwoffliner.lib.ts | 4 ++-- src/util/saveArticles.ts | 5 ++--- test/unit/saveArticles.test.ts | 2 ++ test/unit/treatments/article.treatment.test.ts | 1 + test/unit/urlRewriting.test.ts | 1 + 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index bc807aab..240b94bd 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -212,7 +212,7 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + const hasWikimediaMobileRestApi = await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) @@ -421,7 +421,7 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, forceRender) + const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileRestApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index e43d0fb6..224c80fc 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -14,7 +14,6 @@ import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } f import urlHelper from './url.helper.js' import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' -import MediaWiki from '../../src/MediaWiki.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { await retryStore.flush() @@ -232,7 +231,7 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str /* * Fetch Articles */ -export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, forceRender = null) { +export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileRestApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() let jsConfigVars = '' @@ -260,7 +259,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade } mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) // If the mobile renderer API is not available, switch articles rendering to the auto mode instead - rendererBuilderOptions.renderType = (await MediaWiki.hasWikimediaMobileRestApi()) ? 'mobile' : 'auto' + rendererBuilderOptions.renderType = hasWikimediaMobileRestApi ? 'mobile' : 'auto' articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) } diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 120a948b..7fe36b07 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -44,6 +44,7 @@ describe('saveArticles', () => { } as any, downloader, dump, + true, ) // Successfully scrapped existent articles @@ -292,6 +293,7 @@ describe('saveArticles', () => { } as any, downloader, dump, + true, ) const ParisDocument = domino.createDocument(writtenArticles.Paris.bufferData) diff --git a/test/unit/treatments/article.treatment.test.ts b/test/unit/treatments/article.treatment.test.ts index ab9a129b..cbe4d150 100644 --- a/test/unit/treatments/article.treatment.test.ts +++ b/test/unit/treatments/article.treatment.test.ts @@ -56,6 +56,7 @@ describe('ArticleTreatment', () => { } as any, downloader, dump, + true, ) // Successfully scrapped existent articles diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 0f184007..49001797 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -160,6 +160,7 @@ describe('Styles', () => { } as any, downloader, dump, + true, ) const html = LondonArticle.bufferData.toString() From 0b9b688535de7afd8f5a04704a17304a5f28c03b Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 14 Sep 2023 17:23:45 +0300 Subject: [PATCH 05/58] Add treatSections method for mobile render (partial impl) --- src/renderers/wikimedia-mobile.renderer.ts | 151 +++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index ce3fc930..776aebae 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,7 +1,9 @@ +import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' +import { categoriesTemplate, leadSectionTemplate, sectionTemplate, subCategoriesTemplate, subPagesTemplate, subSectionTemplate } from '../Templates.js' // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' export class WikimediaMobileRenderer extends Renderer { @@ -31,6 +33,7 @@ export class WikimediaMobileRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } + // TODO: do mobile page transformations before applying other treatments const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) result.push({ @@ -47,4 +50,152 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } + + // TODO: work in progress + private treatSections(data: any, dump: Dump, articleId: string, displayTitle: string, articleDetail: ArticleDetail): string { + const doc = domino.createDocument(data) + + let html = '' + + // set the first section (open by default) + html += leadSectionTemplate({ + lead_display_title: displayTitle, + lead_section_text: doc.querySelector('section[data-mw-section-id="0"]').innerHTML, + strings: dump.strings, + }) + + // Get only top remain setions except first one + // Calculate toclevel to handle nesting. To do so you need to traverse DOM recursevely + + // set all other section (closed by default) + if (!dump.nodet && json.remaining.sections.length > 0) { + const firstTocLevel = json.remaining.sections[0].toclevel + json.remaining.sections.forEach((oneSection: any, i: number) => { + if (oneSection.toclevel === firstTocLevel) { + html = html.replace(`__SUB_LEVEL_SECTION_${i}__`, '') // remove unused anchor for subsection + html += sectionTemplate({ + section_index: i + 1, + section_id: oneSection.id, // Get from data-mw-section-id attribute + section_anchor: oneSection.anchor, // Anchor of the heading, id attr of the heading in html + section_line: oneSection.line, // this is the textContent() from the title. Check mobileapps for reference (checked) + section_text: oneSection.text, // this is the innerHTML of the section, refer to mobileapps + strings: dump.strings, // TODO: investigate + }) + } else { + html = html.replace( + `__SUB_LEVEL_SECTION_${i}__`, + subSectionTemplate({ + section_index: i + 1, + section_toclevel: oneSection.toclevel + 1, + section_id: oneSection.id, + section_anchor: oneSection.anchor, + section_line: oneSection.line, + section_text: oneSection.text, + strings: dump.strings, + }), + ) + } + }) + } + + // For section index + /** + * Iterate over parent and nested sections separately and set section_index. For parent = 1, and nested 2..n respectively + */ + + // For id + /** + * const sectionNumberString = sectionElement && sectionElement.getAttribute('data-mw-section-id'); + return sectionNumberString ? parseInt(sectionNumberString, 10) : undefined; + */ + + // For line + /** + * node.innerHTML.trim() + */ + + // For section text + /** + if (node.nodeType === NodeType.TEXT_NODE) { + currentSection.text += node.textContent; + } else { + currentSection.text += node.outerHTML; + } + */ + + // For anchor + /** + * node.getAttribute('id'); + */ + + const articleResourceNamespace = 'A' + const categoryResourceNamespace = 'U' + const slashesInUrl = articleId.split('/').length - 1 + const upStr = '../'.repeat(slashesInUrl + 1) + if (articleDetail.subCategories && articleDetail.subCategories.length) { + const subCategories = articleDetail.subCategories.map((category) => { + return { + name: category.title.split(':').slice(1).join(':'), + url: `${upStr}${categoryResourceNamespace}/${category.title}`, + } + }) + + const groups = this.groupAlphabetical(subCategories) + + html += subCategoriesTemplate({ + strings: dump.strings, + groups, + prevArticleUrl: articleDetail.prevArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.prevArticleId}` : null, + nextArticleUrl: articleDetail.nextArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.nextArticleId}` : null, + }) + } + + if (articleDetail.pages && articleDetail.pages.length) { + const pages = articleDetail.pages.map((page) => { + return { + name: page.title, + url: `${upStr}${articleResourceNamespace}/${page.title}`, + } + }) + + const groups = this.groupAlphabetical(pages) + + html += subPagesTemplate({ + strings: dump.strings, + groups, + }) + } + + if (articleDetail.categories && articleDetail.categories.length) { + const categories = articleDetail.categories.map((category) => { + return { + name: category.title.split(':').slice(1).join(':'), + url: `${upStr}${categoryResourceNamespace}/${category.title}`, + } + }) + html += categoriesTemplate({ + strings: dump.strings, + categories, + }) + } + html = html.replace(`__SUB_LEVEL_SECTION_${json.remaining.sections.length}__`, '') // remove the last subcestion anchor (all other anchor are removed in the forEach) + return html + } + + private groupAlphabetical(items: PageRef[]) { + const groupsAlphabetical = items.reduce((acc: any, item) => { + const groupId = item.name[0].toLocaleUpperCase() + acc[groupId] = (acc[groupId] || []).concat(item) + return acc + }, {}) + + return Object.keys(groupsAlphabetical) + .sort() + .map((letter) => { + return { + title: letter, + items: groupsAlphabetical[letter], + } + }) + } } From 3c5087dfcceaf5aafc5e8a476a8b153cdd03d3a7 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 15 Sep 2023 16:54:38 +0300 Subject: [PATCH 06/58] Pass explicit output of mobile-html to the zim creator --- src/Downloader.ts | 2 +- src/renderers/wikimedia-mobile.renderer.ts | 180 ++------------------- 2 files changed, 13 insertions(+), 169 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index ac9d210b..5c77ffa9 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -171,9 +171,9 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) //* Objects order in array matters! diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 776aebae..9089cab8 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,45 +1,37 @@ -import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' -import { categoriesTemplate, leadSectionTemplate, sectionTemplate, subCategoriesTemplate, subPagesTemplate, subSectionTemplate } from '../Templates.js' -// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' +// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { constructor() { super() } - private async retrieveHtml(renderOpts: RenderOpts): Promise { - const { data, articleId, articleDetail, isMainPage } = renderOpts + private getStrippedTitle(renderOpts: RenderOpts): string { + const { data, articleId } = renderOpts - const html = isMainPage ? data : super.injectH1TitleToHtml(data, articleDetail) - const strippedTitle = getStrippedTitleFromHtml(html) - const displayTitle = strippedTitle || articleId.replace('_', ' ') - - return { html, displayTitle } + const strippedTitle = getStrippedTitleFromHtml(data) + return strippedTitle || articleId.replace('_', ' ') } public async render(renderOpts: RenderOpts): Promise { try { const result: RenderOutput = [] - const { data, articleId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + const { data, articleId, webp, _moduleDependencies, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) - const { html, displayTitle } = await this.retrieveHtml(renderOpts) - if (html) { - let dataWithHeader = '' - if (!isMainPage) { - dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) - } - // TODO: do mobile page transformations before applying other treatments - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const displayTitle = this.getStrippedTitle(renderOpts) + if (data) { + // TODO: Apply mobile page transformations before applying other treatments + const { subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) + // TODO: styles, scripts and most of content are not visible in Kiwix app, but enabled when use Kiwix server result.push({ articleId, displayTitle, - html: finalHTML, + html: data, mediaDependencies, subtitles, }) @@ -50,152 +42,4 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } - - // TODO: work in progress - private treatSections(data: any, dump: Dump, articleId: string, displayTitle: string, articleDetail: ArticleDetail): string { - const doc = domino.createDocument(data) - - let html = '' - - // set the first section (open by default) - html += leadSectionTemplate({ - lead_display_title: displayTitle, - lead_section_text: doc.querySelector('section[data-mw-section-id="0"]').innerHTML, - strings: dump.strings, - }) - - // Get only top remain setions except first one - // Calculate toclevel to handle nesting. To do so you need to traverse DOM recursevely - - // set all other section (closed by default) - if (!dump.nodet && json.remaining.sections.length > 0) { - const firstTocLevel = json.remaining.sections[0].toclevel - json.remaining.sections.forEach((oneSection: any, i: number) => { - if (oneSection.toclevel === firstTocLevel) { - html = html.replace(`__SUB_LEVEL_SECTION_${i}__`, '') // remove unused anchor for subsection - html += sectionTemplate({ - section_index: i + 1, - section_id: oneSection.id, // Get from data-mw-section-id attribute - section_anchor: oneSection.anchor, // Anchor of the heading, id attr of the heading in html - section_line: oneSection.line, // this is the textContent() from the title. Check mobileapps for reference (checked) - section_text: oneSection.text, // this is the innerHTML of the section, refer to mobileapps - strings: dump.strings, // TODO: investigate - }) - } else { - html = html.replace( - `__SUB_LEVEL_SECTION_${i}__`, - subSectionTemplate({ - section_index: i + 1, - section_toclevel: oneSection.toclevel + 1, - section_id: oneSection.id, - section_anchor: oneSection.anchor, - section_line: oneSection.line, - section_text: oneSection.text, - strings: dump.strings, - }), - ) - } - }) - } - - // For section index - /** - * Iterate over parent and nested sections separately and set section_index. For parent = 1, and nested 2..n respectively - */ - - // For id - /** - * const sectionNumberString = sectionElement && sectionElement.getAttribute('data-mw-section-id'); - return sectionNumberString ? parseInt(sectionNumberString, 10) : undefined; - */ - - // For line - /** - * node.innerHTML.trim() - */ - - // For section text - /** - if (node.nodeType === NodeType.TEXT_NODE) { - currentSection.text += node.textContent; - } else { - currentSection.text += node.outerHTML; - } - */ - - // For anchor - /** - * node.getAttribute('id'); - */ - - const articleResourceNamespace = 'A' - const categoryResourceNamespace = 'U' - const slashesInUrl = articleId.split('/').length - 1 - const upStr = '../'.repeat(slashesInUrl + 1) - if (articleDetail.subCategories && articleDetail.subCategories.length) { - const subCategories = articleDetail.subCategories.map((category) => { - return { - name: category.title.split(':').slice(1).join(':'), - url: `${upStr}${categoryResourceNamespace}/${category.title}`, - } - }) - - const groups = this.groupAlphabetical(subCategories) - - html += subCategoriesTemplate({ - strings: dump.strings, - groups, - prevArticleUrl: articleDetail.prevArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.prevArticleId}` : null, - nextArticleUrl: articleDetail.nextArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.nextArticleId}` : null, - }) - } - - if (articleDetail.pages && articleDetail.pages.length) { - const pages = articleDetail.pages.map((page) => { - return { - name: page.title, - url: `${upStr}${articleResourceNamespace}/${page.title}`, - } - }) - - const groups = this.groupAlphabetical(pages) - - html += subPagesTemplate({ - strings: dump.strings, - groups, - }) - } - - if (articleDetail.categories && articleDetail.categories.length) { - const categories = articleDetail.categories.map((category) => { - return { - name: category.title.split(':').slice(1).join(':'), - url: `${upStr}${categoryResourceNamespace}/${category.title}`, - } - }) - html += categoriesTemplate({ - strings: dump.strings, - categories, - }) - } - html = html.replace(`__SUB_LEVEL_SECTION_${json.remaining.sections.length}__`, '') // remove the last subcestion anchor (all other anchor are removed in the forEach) - return html - } - - private groupAlphabetical(items: PageRef[]) { - const groupsAlphabetical = items.reduce((acc: any, item) => { - const groupId = item.name[0].toLocaleUpperCase() - acc[groupId] = (acc[groupId] || []).concat(item) - return acc - }, {}) - - return Object.keys(groupsAlphabetical) - .sort() - .map((letter) => { - return { - title: letter, - items: groupsAlphabetical[letter], - } - }) - } } From f2b5c08f849936201a16dbd552ebf674be85d3a8 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 18 Sep 2023 17:50:58 +0300 Subject: [PATCH 07/58] Apply mobile css and js in mobile renderer --- src/renderers/wikimedia-mobile.renderer.ts | 37 +++++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 9089cab8..7666c436 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,3 +1,4 @@ +import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' @@ -24,14 +25,14 @@ export class WikimediaMobileRenderer extends Renderer { const displayTitle = this.getStrippedTitle(renderOpts) if (data) { - // TODO: Apply mobile page transformations before applying other treatments - const { subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) - - // TODO: styles, scripts and most of content are not visible in Kiwix app, but enabled when use Kiwix server + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) + // TODO: Add mobile scripts after all treatments but this need to be refactored + // TODO: enable reference list + const dataWithMobileModules = this.addMobileModules(finalHTML) result.push({ articleId, displayTitle, - html: data, + html: dataWithMobileModules, mediaDependencies, subtitles, }) @@ -42,4 +43,30 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } + + private addMobileModules(data) { + const doc = domino.createDocument(data) + const protocol = 'https://' + // TODO: query this instead of hardcoding. + const offlineResourcesCSSList = [ + 'meta.wikimedia.org/api/rest_v1/data/css/mobile/base', + 'meta.wikimedia.org/api/rest_v1/data/css/mobile/pcs', + 'en.wikipedia.org/api/rest_v1/data/css/mobile/site', + ] + const offlineResourcesJSList = ['meta.wikimedia.org/api/rest_v1/data/javascript/mobile/pcs'] + + offlineResourcesCSSList.forEach((cssUrl) => { + const linkEl = doc.createElement('link') as DominoElement + Object.assign(linkEl, { rel: 'stylesheet', href: `${protocol}${cssUrl}` }) + doc.head.appendChild(linkEl) + }) + + offlineResourcesJSList.forEach((jsUrl) => { + const scriptEl = doc.createElement('script') as DominoElement + scriptEl.setAttribute('src', `${protocol}${jsUrl}`) + doc.head.appendChild(scriptEl) + }) + + return doc.documentElement.outerHTML + } } From 96fb0f59012bdc6a0c7859730d1024344dbf52b3 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 19 Sep 2023 16:41:11 +0300 Subject: [PATCH 08/58] Adapt reference links and minor treatments --- src/renderers/wikimedia-mobile.renderer.ts | 135 +++++++++++++++++++-- 1 file changed, 128 insertions(+), 7 deletions(-) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 7666c436..004013e4 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -4,6 +4,8 @@ import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' +type PipeFunction = (data: string) => string + // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { constructor() { @@ -26,13 +28,22 @@ export class WikimediaMobileRenderer extends Renderer { const displayTitle = this.getStrippedTitle(renderOpts) if (data) { const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) - // TODO: Add mobile scripts after all treatments but this need to be refactored - // TODO: enable reference list - const dataWithMobileModules = this.addMobileModules(finalHTML) + const finalHTMLDoc = domino.createDocument(finalHTML) + const mobileHTML = this.pipeMobileTransformations( + finalHTMLDoc, + this.addMobileModules, + this.convertLazyLoadToImages, + this.removeEditContainer, + this.removeHiddenClass, + this.restoreLinkDefaults, + this.disableClientLinkListener, + this.overrideMobileStyles, + ) + result.push({ articleId, displayTitle, - html: dataWithMobileModules, + html: mobileHTML.documentElement.outerHTML, mediaDependencies, subtitles, }) @@ -44,8 +55,11 @@ export class WikimediaMobileRenderer extends Renderer { } } - private addMobileModules(data) { - const doc = domino.createDocument(data) + private pipeMobileTransformations(value, ...fns: PipeFunction[]) { + return fns.reduce((acc, fn) => fn(acc), value) + } + + private addMobileModules(doc: DominoElement) { const protocol = 'https://' // TODO: query this instead of hardcoding. const offlineResourcesCSSList = [ @@ -67,6 +81,113 @@ export class WikimediaMobileRenderer extends Renderer { doc.head.appendChild(scriptEl) }) - return doc.documentElement.outerHTML + return doc + } + + private removeEditContainer(doc: DominoElement) { + const editContainers = doc.querySelectorAll('.pcs-edit-section-link-container') + + editContainers.forEach((elem: DominoElement) => { + elem.remove() + }) + + return doc + } + + private convertLazyLoadToImages(doc: DominoElement) { + const protocol = 'https://' + const spans = doc.querySelectorAll('.pcs-lazy-load-placeholder') + + spans.forEach((span: DominoElement) => { + // Create a new img element + const img = doc.createElement('img') as DominoElement + + // Set the attributes for the img element based on the data attributes in the span + img.src = protocol + span.getAttribute('data-src') + img.setAttribute('decoding', 'async') + img.setAttribute('data-file-width', span.getAttribute('data-data-file-width')) + img.setAttribute('data-file-height', span.getAttribute('data-data-file-height')) + img.setAttribute('data-file-type', 'bitmap') + img.width = span.getAttribute('data-width') + img.height = span.getAttribute('data-height') + img.setAttribute('srcset', `${protocol}${span.getAttribute('data-srcset')}`) + img.className = span.getAttribute('data-class') + + // Replace the span with the img element + span.parentNode.replaceChild(img, span) + }) + + return doc + } + + private removeHiddenClass(doc: DominoElement) { + const pcsSectionHidden = 'pcs-section-hidden' + const hiddenSections = doc.querySelectorAll(`.${pcsSectionHidden}`) + hiddenSections.forEach((section) => { + section.classList.remove(pcsSectionHidden) + }) + return doc + } + + private restoreLinkDefaults(doc: DominoElement) { + const supElements = doc.querySelectorAll('sup') + + Array.from(supElements).forEach((sup: DominoElement) => { + const anchor = doc.createElement('a') + const mwRefLinkTextElement = sup.querySelector('.mw-reflink-text') as DominoElement + + let mwRefLinkText = '' + if (mwRefLinkTextElement) { + mwRefLinkText = mwRefLinkTextElement.textContent || '' + } + + const existedAnchor = sup.querySelector('.reference-link') + + if (existedAnchor?.getAttribute('href')) { + anchor.setAttribute('href', existedAnchor.getAttribute('href')) + } + anchor.className = 'reference-link' + anchor.textContent = mwRefLinkText + + sup.innerHTML = '' + sup.appendChild(anchor) + }) + + return doc + } + + private disableClientLinkListener(doc: DominoElement) { + const scriptEl = doc.createElement('script') + scriptEl.type = 'text/javascript' + scriptEl.text = ` + document.addEventListener("DOMContentLoaded", function() { + const supElements = document.querySelectorAll('sup'); + const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); + const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + }); + ` + doc.head.appendChild(scriptEl) + + return doc + } + + private overrideMobileStyles(doc: DominoElement) { + const styleEl = doc.createElement('style') + styleEl.innerHTML = ` + body { + margin: 0 auto; + } + .reference-link::after { + content: none !important; + } + ` + doc.head.appendChild(styleEl) + + return doc } } From c45d285b1993193b07d997b7fffbf5fe8c00175e Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 22 Sep 2023 08:56:09 +0300 Subject: [PATCH 09/58] Refactor tests (partial impl) --- src/Downloader.ts | 14 +++---- src/MediaWiki.ts | 40 ++++++++++---------- src/mwoffliner.lib.ts | 6 +-- src/renderers/renderer.builder.ts | 18 ++++----- src/renderers/wikimedia-mobile.renderer.ts | 3 ++ src/util/builders/url/base.director.ts | 6 +-- src/util/saveArticles.ts | 27 ++++++------- test/e2e/articleLists.test.ts | 1 + test/e2e/bm.e2e.test.ts | 1 + test/e2e/downloadImage.e2e.test.ts | 1 + test/e2e/en10.e2e.test.ts | 1 + test/e2e/extra.e2e.test.ts | 1 + test/e2e/multimediaContent.test.ts | 1 + test/e2e/treatMedia.e2e.test.ts | 1 + test/e2e/vikidia.e2e.test.ts | 1 + test/e2e/wikisource.e2e.test.ts | 1 + test/e2e/zimMetadata.e2e.test.ts | 1 + test/unit/builders/url/base.director.test.ts | 18 ++++----- test/unit/downloader.test.ts | 4 +- test/unit/renderers/renderer.builder.test.ts | 6 +-- test/unit/saveArticles.test.ts | 14 ++++--- test/unit/urlRewriting.test.ts | 6 +-- test/unit/webpAndRedirection.test.ts | 1 + test/util.ts | 4 +- 24 files changed, 94 insertions(+), 83 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index 5c77ffa9..12e48210 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -171,23 +171,23 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, - { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, ]) } else { switch (forceRender) { case 'WikimediaDesktop': - if (MediaWiki.hasWikimediaDesktopRestApi()) { - this.baseUrl = MediaWiki.desktopRestApiUrl.href - this.baseUrlForMainPage = MediaWiki.desktopRestApiUrl.href + if (MediaWiki.hasWikimediaDesktopApi()) { + this.baseUrl = MediaWiki.desktopApiUrl.href + this.baseUrlForMainPage = MediaWiki.desktopApiUrl.href break } break diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 434ae49d..dd26d16e 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -59,11 +59,11 @@ class MediaWiki { public modulePath: string // only for reading public _modulePathOpt: string // only for whiting to generate modulePath public webUrl: URL - public desktopRestApiUrl: URL - public mobileRestApiUrl: URL + public desktopApiUrl: URL + public mobileApiUrl: URL - #hasWikimediaDesktopRestApi: boolean | null - #hasWikimediaMobileRestApi: boolean | null + #hasWikimediaDesktopApi: boolean | null + #hasWikimediaMobileApi: boolean | null #hasVisualEditorApi: boolean | null #hasCoordinates: boolean | null @@ -123,8 +123,8 @@ class MediaWiki { formatversion: '2', } - this.#hasWikimediaDesktopRestApi = null - this.#hasWikimediaMobileRestApi = null + this.#hasWikimediaDesktopApi = null + this.#hasWikimediaMobileApi = null this.#hasVisualEditorApi = null this.#hasCoordinates = null } @@ -133,20 +133,20 @@ class MediaWiki { this.initializeMediaWikiDefaults() } - public async hasWikimediaDesktopRestApi(): Promise { - if (this.#hasWikimediaDesktopRestApi === null) { - this.#hasWikimediaDesktopRestApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) - return this.#hasWikimediaDesktopRestApi + public async hasWikimediaDesktopApi(): Promise { + if (this.#hasWikimediaDesktopApi === null) { + this.#hasWikimediaDesktopApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaDesktopApi } - return this.#hasWikimediaDesktopRestApi + return this.#hasWikimediaDesktopApi } - public async hasWikimediaMobileRestApi(): Promise { - if (this.#hasWikimediaMobileRestApi === null) { - this.#hasWikimediaMobileRestApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) - return this.#hasWikimediaMobileRestApi + public async hasWikimediaMobileApi(): Promise { + if (this.#hasWikimediaMobileApi === null) { + this.#hasWikimediaMobileApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaMobileApi } - return this.#hasWikimediaMobileRestApi + return this.#hasWikimediaMobileApi } public async hasVisualEditorApi(): Promise { @@ -182,11 +182,11 @@ class MediaWiki { this.apiUrl = baseUrlDirector.buildURL(this.#apiPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.#restApiPath) - this.mobileRestApiUrl = baseUrlDirector.buildMobileRestApiURL(this.#restApiPath) + this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#restApiPath) + this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#restApiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) - this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopRestApiUrl.href) - this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileRestApiUrl.href) + this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) + this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 240b94bd..752ddbec 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -211,8 +211,8 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - const hasWikimediaMobileRestApi = await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) @@ -421,7 +421,7 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileRestApi, forceRender) + const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index 47223f3c..cffb1cab 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -10,15 +10,15 @@ export class RendererBuilder { public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options - const [hasVisualEditorApi, hasWikimediaDesktopRestApi, hasWikimediaMobileRestApi] = await Promise.all([ + const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ MediaWiki.hasVisualEditorApi(), - MediaWiki.hasWikimediaDesktopRestApi(), - MediaWiki.hasWikimediaMobileRestApi(), + MediaWiki.hasWikimediaDesktopApi(), + MediaWiki.hasWikimediaMobileApi(), ]) switch (renderType) { case 'desktop': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { @@ -28,18 +28,18 @@ export class RendererBuilder { process.exit(1) } case 'mobile': - if (hasWikimediaMobileRestApi) { + if (hasWikimediaMobileApi) { return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') process.exit(1) case 'auto': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { return new VisualEditorRenderer() - } else if (hasWikimediaMobileRestApi) { + } else if (hasWikimediaMobileApi) { return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') @@ -49,7 +49,7 @@ export class RendererBuilder { // renderName argument is required for 'specific' mode switch (renderName) { case 'WikimediaDesktop': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { return new WikimediaDesktopRenderer() } logger.error('Cannot create an instance of WikimediaDesktop renderer.') @@ -61,7 +61,7 @@ export class RendererBuilder { logger.error('Cannot create an instance of VisualEditor renderer.') process.exit(1) case 'WikimediaMobile': - if (hasWikimediaMobileRestApi) { + if (hasWikimediaMobileApi) { return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 004013e4..7a749618 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -185,6 +185,9 @@ export class WikimediaMobileRenderer extends Renderer { .reference-link::after { content: none !important; } + .mw-body h3, .mw-body h2 { + width: auto; + } ` doc.head.appendChild(styleEl) diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 2f8f7949..922d2b9e 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -14,21 +14,21 @@ export default class BaseURLDirector { return urlBuilder.setDomain(this.baseDomain).setPath(path).build(true) } - buildRestApiURL(path?: string) { + buildWikimediaApiURL(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1') .build(true, '/') } - buildDesktopRestApiURL(path?: string) { + buildDesktopApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/html') .build(true, '/') } - buildMobileRestApiURL(path?: string) { + buildMobileApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/mobile-html') diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 224c80fc..a9c45b57 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -12,7 +12,7 @@ import { config } from '../config.js' import { getSizeFromUrl, cleanupAxiosError } from './misc.js' import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } from './const.js' import urlHelper from './url.helper.js' -import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' +import { Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { @@ -231,7 +231,7 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str /* * Fetch Articles */ -export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileRestApi: boolean, forceRender = null) { +export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() let jsConfigVars = '' @@ -241,26 +241,21 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade const rendererBuilder = new RendererBuilder() - let rendererBuilderOptions: RendererBuilderOptions - let mainPageRenderer let articlesRenderer if (forceRender) { - rendererBuilderOptions = { + // All articles and main page will use the same renderer if 'forceRender' is specified + const renderer = await rendererBuilder.createRenderer({ renderType: 'specific', renderName: forceRender, - } - // All articles and main page will use the same renderer if 'forceRender' is specified - mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + }) + mainPageRenderer = renderer + articlesRenderer = renderer } else { - rendererBuilderOptions = { - renderType: 'desktop', - } - mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - // If the mobile renderer API is not available, switch articles rendering to the auto mode instead - rendererBuilderOptions.renderType = hasWikimediaMobileRestApi ? 'mobile' : 'auto' - articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) + articlesRenderer = await rendererBuilder.createRenderer({ + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }) } if (dump.customProcessor?.shouldKeepArticle) { diff --git a/test/e2e/articleLists.test.ts b/test/e2e/articleLists.test.ts index 84547681..ad85cc1c 100644 --- a/test/e2e/articleLists.test.ts +++ b/test/e2e/articleLists.test.ts @@ -22,6 +22,7 @@ describe('articleList', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', } test('articleList and articleListIgnore check', async () => { diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index e957a333..e1198ec8 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -18,6 +18,7 @@ describe('bm', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', } test('Simple articleList', async () => { diff --git a/test/e2e/downloadImage.e2e.test.ts b/test/e2e/downloadImage.e2e.test.ts index cc3c9078..774d67b0 100644 --- a/test/e2e/downloadImage.e2e.test.ts +++ b/test/e2e/downloadImage.e2e.test.ts @@ -20,6 +20,7 @@ describeIf('Check image downloading from S3 using optimisationCacheUrl parameter articleList: 'Paris', format: ['nodet'], optimisationCacheUrl: process.env.S3_URL, + forceRender: 'WikimediaDesktop', } test('right scrapping from fr.wikipedia.org with optimisationCacheUrl parameter', async () => { diff --git a/test/e2e/en10.e2e.test.ts b/test/e2e/en10.e2e.test.ts index 543fe901..f62e6677 100644 --- a/test/e2e/en10.e2e.test.ts +++ b/test/e2e/en10.e2e.test.ts @@ -21,6 +21,7 @@ describe('en10', () => { redis: process.env.REDIS, // format: ['nopic', 'novid', 'nopdf', 'nodet'], format: ['nopic', 'nopdf'], + forceRender: 'WikimediaDesktop', } test('Simple articleList', async () => { diff --git a/test/e2e/extra.e2e.test.ts b/test/e2e/extra.e2e.test.ts index 78562f80..6ab70a06 100644 --- a/test/e2e/extra.e2e.test.ts +++ b/test/e2e/extra.e2e.test.ts @@ -36,6 +36,7 @@ AC/DC` outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', }) // Created 1 outputs diff --git a/test/e2e/multimediaContent.test.ts b/test/e2e/multimediaContent.test.ts index 511a3280..f16d5808 100644 --- a/test/e2e/multimediaContent.test.ts +++ b/test/e2e/multimediaContent.test.ts @@ -18,6 +18,7 @@ describe('Multimedia', () => { outputDirectory: testId, redis: process.env.REDIS, customZimDescription: 'Example of the description', + forceRender: 'WikimediaDesktop', } test('check multimedia content from wikipedia test page', async () => { diff --git a/test/e2e/treatMedia.e2e.test.ts b/test/e2e/treatMedia.e2e.test.ts index 22e045ec..f5ac7a13 100644 --- a/test/e2e/treatMedia.e2e.test.ts +++ b/test/e2e/treatMedia.e2e.test.ts @@ -18,6 +18,7 @@ describe('treatment test', () => { articleList, outputDirectory: testId, redis: process.env.REDIS, + forcdRender: 'WikimediaDesktop', } test('media file from hidden element should not be downloaded', async () => { diff --git a/test/e2e/vikidia.e2e.test.ts b/test/e2e/vikidia.e2e.test.ts index 694d15f3..30a7f684 100644 --- a/test/e2e/vikidia.e2e.test.ts +++ b/test/e2e/vikidia.e2e.test.ts @@ -18,6 +18,7 @@ describe('vikidia', () => { redis: process.env.REDIS, articleList: 'Alaska', customZimDescription: 'Alaska article', + forcdRender: 'WikimediaDesktop', } test('right scrapping from vikidia.org', async () => { diff --git a/test/e2e/wikisource.e2e.test.ts b/test/e2e/wikisource.e2e.test.ts index 3c8def91..d8c4e11a 100644 --- a/test/e2e/wikisource.e2e.test.ts +++ b/test/e2e/wikisource.e2e.test.ts @@ -18,6 +18,7 @@ describe('wikisource', () => { redis: process.env.REDIS, format: ['nopic'], noLocalParserFallback: true, + forcdRender: 'WikimediaDesktop', } test('Wikisource List', async () => { diff --git a/test/e2e/zimMetadata.e2e.test.ts b/test/e2e/zimMetadata.e2e.test.ts index cc60ee59..75161c15 100644 --- a/test/e2e/zimMetadata.e2e.test.ts +++ b/test/e2e/zimMetadata.e2e.test.ts @@ -23,6 +23,7 @@ describe('zimMetadata', () => { customZimLongDescription: 'Example of the long description', customZimTitle: 'Example of the title', publisher: 'Example of the publisher', + forcdRender: 'WikimediaDesktop', } test('check all zim metadata using zimdump', async () => { diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 5aadd9d4..76a18810 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -11,43 +11,43 @@ describe('BaseURLDirector', () => { }) }) - describe('buildRestApiURL', () => { + describe('buildWikimediaApiURL', () => { it('should return rest URL with provided path and trailing char at the end', () => { - const url = baseUrlDirector.buildRestApiURL('api/rest_v2') + const url = baseUrlDirector.buildWikimediaApiURL('api/rest_v2') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/') }) it('should return rest URL with default path and trailing char at the end', () => { - const url = baseUrlDirector.buildRestApiURL() + const url = baseUrlDirector.buildWikimediaApiURL() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/') }) }) - describe('buildMobileRestApiURL', () => { + describe('buildMobileApiUrl', () => { it('should return mobile rest URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildMobileRestApiURL('api/rest_v2/page/mobile-html') + const url = baseUrlDirector.buildMobileApiUrl('api/rest_v2/page/mobile-html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') }) it('should return mobile rest URL with default path and trailing char', () => { - const url = baseUrlDirector.buildMobileRestApiURL() + const url = baseUrlDirector.buildMobileApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') }) }) - describe('buildDesktopRestApiURL', () => { + describe('buildDesktopApiUrl', () => { it('should return a desktop URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildDesktopRestApiURL('api/rest_v2/page/html') + const url = baseUrlDirector.buildDesktopApiUrl('api/rest_v2/page/html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/html/') }) it('should return a desktop URL with default path and trailing char', () => { - const url = baseUrlDirector.buildDesktopRestApiURL() + const url = baseUrlDirector.buildDesktopApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/html/') }) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index e0dd8504..f4a208f7 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -32,8 +32,8 @@ describe('Downloader class', () => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() }) diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index 8c92d42c..9a6687ee 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -65,7 +65,7 @@ describe('RendererBuilder', () => { const { MediaWiki } = await setupScrapeClasses() // en wikipedia // Force MediaWiki to have capability for the WikimediaDesktop for test purpose - jest.spyOn(MediaWiki, 'hasWikimediaDesktopRestApi').mockResolvedValue(true) + jest.spyOn(MediaWiki, 'hasWikimediaDesktopApi').mockResolvedValue(true) const rendererBuilderOptions = { MediaWiki, @@ -81,8 +81,8 @@ describe('RendererBuilder', () => { it('should throw an error for unknown RendererAPI in specific mode', async () => { const { downloader, MediaWiki } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 7fe36b07..05e7dec5 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -20,10 +20,10 @@ describe('saveArticles', () => { test('Article html processing', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') const _articlesDetail = await downloader.getArticleDetailsIds(['London']) const articlesDetail = mwRetToArticleDetail(_articlesDetail) const { articleDetailXId } = RedisStore @@ -45,6 +45,7 @@ describe('saveArticles', () => { downloader, dump, true, + 'WikimediaDesktop', ) // Successfully scrapped existent articles @@ -148,7 +149,7 @@ describe('saveArticles', () => { test('Load main page and check that it is without header', async () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') const articleId = 'Main_Page' const articleUrl = getArticleUrl(downloader, dump, articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) @@ -244,8 +245,8 @@ describe('saveArticles', () => { test('--customFlavour', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { @@ -294,6 +295,7 @@ describe('saveArticles', () => { downloader, dump, true, + 'WikimediaDesktop', ) const ParisDocument = domino.createDocument(writtenArticles.Paris.bufferData) diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 49001797..03b17b5c 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -140,10 +140,10 @@ describe('Styles', () => { await RedisStore.redirectsXId.flush() const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) diff --git a/test/unit/webpAndRedirection.test.ts b/test/unit/webpAndRedirection.test.ts index 4a094a76..33f93150 100644 --- a/test/unit/webpAndRedirection.test.ts +++ b/test/unit/webpAndRedirection.test.ts @@ -35,6 +35,7 @@ Real-time computer graphics` outputDirectory: testId, redis: process.env.REDIS, webp: true, + forceRender: 'WikimediaDesktop', }) const zimFile = new ZimReader(outFiles[0].outFile) diff --git a/test/util.ts b/test/util.ts index 2f01b581..7625cb78 100644 --- a/test/util.ts +++ b/test/util.ts @@ -37,8 +37,8 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() const dump = new Dump(format, {} as any, MediaWiki.metaData) From daa7d138ee967639f58a2ec450248bc24623d6ce Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 22 Sep 2023 11:23:17 +0300 Subject: [PATCH 10/58] Update naming for mw api across mwoffliner --- src/MediaWiki.ts | 22 +++++++++++----------- src/mwoffliner.lib.ts | 4 ++-- src/parameterList.ts | 4 ++-- src/types.d.ts | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index dd26d16e..dfba0e71 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -44,10 +44,10 @@ class MediaWiki { public queryOpts: QueryOpts #wikiPath: string - #restApiPath: string + #apiPath: string #username: string #password: string - #apiPath: string + #apiActionPath: string #domain: string private apiUrlDirector: ApiURLDirector private wikimediaDesktopUrlDirector: DesktopURLDirector @@ -75,12 +75,12 @@ class MediaWiki { this.#password = value } - set apiPath(value: string) { - this.#apiPath = value + set apiActionPath(value: string) { + this.#apiActionPath = value } - set restApiPath(value: string) { - this.#restApiPath = value + set apiPath(value: string) { + this.#apiPath = value } set domain(value: string) { @@ -109,7 +109,7 @@ class MediaWiki { this.namespaces = {} this.namespacesToMirror = [] - this.#apiPath = 'w/api.php' + this.#apiActionPath = 'w/api.php' this.#wikiPath = 'wiki/' this.apiCheckArticleId = 'MediaWiki:Sidebar' @@ -179,11 +179,11 @@ class MediaWiki { private initMWApis() { const baseUrlDirector = new BaseURLDirector(this.baseUrl.href) this.webUrl = baseUrlDirector.buildURL(this.#wikiPath) - this.apiUrl = baseUrlDirector.buildURL(this.#apiPath) + this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#restApiPath) - this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#restApiPath) + this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#apiPath) + this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) @@ -416,7 +416,7 @@ class MediaWiki { webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, baseUrl: this.baseUrl.href, - apiPath: this.#apiPath, + apiActionPath: this.#apiActionPath, domain: this.#domain, textDir: textDir as TextDirection, diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 752ddbec..0fb4f223 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -75,8 +75,8 @@ async function execute(argv: any) { keepEmptyParagraphs, mwUrl, mwWikiPath, + mwActionApiPath, mwApiPath, - mwRestApiPath, mwModulePath, mwDomain, mwUsername, @@ -158,8 +158,8 @@ async function execute(argv: any) { /* Wikipedia/... URL; Normalize by adding trailing / as necessary */ MediaWiki.base = mwUrl MediaWiki.getCategories = !!argv.getCategories + MediaWiki.apiActionPath = mwActionApiPath MediaWiki.apiPath = mwApiPath - MediaWiki.restApiPath = mwRestApiPath MediaWiki.modulePathOpt = mwModulePath MediaWiki.domain = mwDomain MediaWiki.password = mwPassword diff --git a/src/parameterList.ts b/src/parameterList.ts index 6f31e797..eee869e4 100644 --- a/src/parameterList.ts +++ b/src/parameterList.ts @@ -17,8 +17,8 @@ export const parameterDescriptions = { 'Specify a flavour for the scraping. If missing, scrape all article contents. Each --format argument will cause a new local file to be created but options can be combined. Supported options are:\n * novid: no video & audio content\n * nopic: no pictures (implies "novid")\n * nopdf: no PDF files\n * nodet: only the first/head paragraph (implies "novid")\nFormat names can also be aliased using a ":"\nExample: "... --format=nopic:mini --format=novid,nopdf"', keepEmptyParagraphs: 'Keep all paragraphs, even empty ones.', mwWikiPath: 'Mediawiki wiki base path (per default "/wiki/")', - mwApiPath: 'Mediawiki API path (per default "/w/api.php")', - mwRestApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")', + mwActionApiPath: 'Mediawiki action API path (per default "/w/api.php")', + mwApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")', mwModulePath: 'Mediawiki module load path (per default "/w/load.php")', mwDomain: 'Mediawiki user domain (thought for private wikis)', mwUsername: 'Mediawiki username (thought for private wikis)', diff --git a/src/types.d.ts b/src/types.d.ts index 9e25f752..00856ca6 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -159,7 +159,7 @@ interface MWMetaData { baseUrl: string wikiPath: string - apiPath: string + apiActionPath: string domain: string webUrl: string apiUrl: string @@ -178,8 +178,8 @@ interface MWNamespaces { interface MWConfig { base: string wikiPath?: string + apiActionPath?: string apiPath?: string - restApiPath?: string domain?: string username?: string password?: string From 37a37c5c087c886f9053ac62145c93feaefdac18 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 25 Sep 2023 17:58:00 +0300 Subject: [PATCH 11/58] Split article treatment flow for mobile render to represent images inside figure tag --- res/mobile_article_page.css | 9 ++++ res/script.js | 15 ++++++ src/Downloader.ts | 7 +++ src/config.ts | 1 + src/renderers/wikimedia-mobile.renderer.ts | 56 ++++++++++------------ 5 files changed, 56 insertions(+), 32 deletions(-) create mode 100644 res/mobile_article_page.css diff --git a/res/mobile_article_page.css b/res/mobile_article_page.css new file mode 100644 index 00000000..6de633f2 --- /dev/null +++ b/res/mobile_article_page.css @@ -0,0 +1,9 @@ +body { + margin: 0 auto; +} +.reference-link::after { + content: none !important; +} +.mw-body h3, .mw-body h2 { + width: auto; +} diff --git a/res/script.js b/res/script.js index 2e484cdb..cdb654c6 100644 --- a/res/script.js +++ b/res/script.js @@ -27,6 +27,21 @@ window.onload = function () { /* Add the user-agent to allow dedicated CSS rules (like for KaiOS) */ document.querySelector('body').setAttribute('data-useragent', navigator.userAgent); + + // Check if there is a PCS output page + if (document.querySelector('#pcs')) { + document.addEventListener("DOMContentLoaded", function() { + const supElements = document.querySelectorAll('sup'); + const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); + const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + }); + } + } /* WebP Polyfill */ diff --git a/src/Downloader.ts b/src/Downloader.ts index 12e48210..fd6f8034 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -198,6 +198,13 @@ class Downloader { break } break + case 'WikimediaMobile': + if (MediaWiki.hasWikimediaMobileApi()) { + this.baseUrl = MediaWiki.mobileApiUrl.href + this.baseUrlForMainPage = MediaWiki.mobileApiUrl.href + break + } + break default: throw new Error('Unable to find specific API end-point to retrieve article HTML') } diff --git a/src/config.ts b/src/config.ts index d30ae925..68d4338c 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,6 +56,7 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], + mobileArticleCssResources: ['mobile_article_page'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 7a749618..67cb0d02 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,10 +1,11 @@ import * as domino from 'domino' import * as logger from '../Logger.js' +import { config } from '../config.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' -type PipeFunction = (data: string) => string +type PipeFunction = (value: DominoElement) => DominoElement | Promise // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { @@ -27,25 +28,32 @@ export class WikimediaMobileRenderer extends Renderer { const displayTitle = this.getStrippedTitle(renderOpts) if (data) { - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) - const finalHTMLDoc = domino.createDocument(finalHTML) - const mobileHTML = this.pipeMobileTransformations( - finalHTMLDoc, - this.addMobileModules, + let mediaDependenciesVal + let subtitlesVal + const mobileHTML = domino.createDocument(data) + const finalHTMLMobile = await this.pipeMobileTransformations( + mobileHTML, this.convertLazyLoadToImages, this.removeEditContainer, this.removeHiddenClass, + async (doc) => { + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(doc.documentElement.outerHTML, dump, articleId, articleDetail, _moduleDependencies, webp) + + mediaDependenciesVal = mediaDependencies + subtitlesVal = subtitles + return domino.createDocument(finalHTML) + }, this.restoreLinkDefaults, - this.disableClientLinkListener, + this.addMobileModules, this.overrideMobileStyles, ) result.push({ articleId, displayTitle, - html: mobileHTML.documentElement.outerHTML, - mediaDependencies, - subtitles, + html: finalHTMLMobile.documentElement.outerHTML, + mediaDependencies: mediaDependenciesVal, + subtitles: subtitlesVal, }) return result } @@ -55,8 +63,12 @@ export class WikimediaMobileRenderer extends Renderer { } } - private pipeMobileTransformations(value, ...fns: PipeFunction[]) { - return fns.reduce((acc, fn) => fn(acc), value) + private async pipeMobileTransformations(value: DominoElement, ...fns: PipeFunction[]): Promise { + let result: DominoElement | Promise = value + for (const fn of fns) { + result = fn(await result) + } + return result } private addMobileModules(doc: DominoElement) { @@ -156,26 +168,6 @@ export class WikimediaMobileRenderer extends Renderer { return doc } - private disableClientLinkListener(doc: DominoElement) { - const scriptEl = doc.createElement('script') - scriptEl.type = 'text/javascript' - scriptEl.text = ` - document.addEventListener("DOMContentLoaded", function() { - const supElements = document.querySelectorAll('sup'); - const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); - const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) - disabledElems.forEach((elem) => { - elem.addEventListener('click', (event) => { - event.stopPropagation(); - }, true); - }); - }); - ` - doc.head.appendChild(scriptEl) - - return doc - } - private overrideMobileStyles(doc: DominoElement) { const styleEl = doc.createElement('style') styleEl.innerHTML = ` From 218e53271b08d91d438c3be161c70dba1598dffe Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 26 Sep 2023 17:15:53 +0300 Subject: [PATCH 12/58] Apply css and js module downoload for WikimediaMobile render --- res/script.js | 15 ------ res/templates/page.html | 41 +++++++--------- src/Downloader.ts | 21 +++++++- src/Dump.ts | 1 + src/MediaWiki.ts | 3 ++ src/config.ts | 1 - src/mwoffliner.lib.ts | 1 + src/renderers/abstract.renderer.ts | 54 ++++++++++++++------- src/renderers/renderer.builder.ts | 6 ++- src/renderers/wikimedia-mobile.renderer.ts | 27 ----------- src/types.d.ts | 1 + src/util/builders/url/base.director.ts | 7 +++ src/util/misc.ts | 6 +++ src/util/saveArticles.ts | 22 +++++---- test/e2e/mobileRenderIntegrity.test.ts | 56 ++++++++++++++++++++++ 15 files changed, 167 insertions(+), 95 deletions(-) create mode 100644 test/e2e/mobileRenderIntegrity.test.ts diff --git a/res/script.js b/res/script.js index cdb654c6..2e484cdb 100644 --- a/res/script.js +++ b/res/script.js @@ -27,21 +27,6 @@ window.onload = function () { /* Add the user-agent to allow dedicated CSS rules (like for KaiOS) */ document.querySelector('body').setAttribute('data-useragent', navigator.userAgent); - - // Check if there is a PCS output page - if (document.querySelector('#pcs')) { - document.addEventListener("DOMContentLoaded", function() { - const supElements = document.querySelectorAll('sup'); - const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); - const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) - disabledElems.forEach((elem) => { - elem.addEventListener('click', (event) => { - event.stopPropagation(); - }, true); - }); - }); - } - } /* WebP Polyfill */ diff --git a/res/templates/page.html b/res/templates/page.html index 3035a043..07dec78f 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -1,31 +1,24 @@ - - - - - - __ARTICLE_CANONICAL_LINK__ - __ARTICLE_CSS_LIST__ - __CSS_LINKS__ - __JS_SCRIPTS__ - - - -
-
-
- -
-

-
+ + + + __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ + __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ + + +
+
+
+ +
+

+
-
- __ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ - - + __ARTICLE_CONFIGVARS_LIST__ + __ARTICLE_JS_LIST__ + diff --git a/src/Downloader.ts b/src/Downloader.ts index fd6f8034..a3066079 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -87,6 +87,8 @@ class Downloader { public arrayBufferRequestOptions: AxiosRequestConfig public jsonRequestOptions: AxiosRequestConfig public streamRequestOptions: AxiosRequestConfig + public mobileJsDependenciesList: string[] = [] + public mobileStyleDependenciesList: string[] = [] private readonly uaString: string private activeRequests = 0 @@ -694,7 +696,24 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' - return { jsConfigVars, jsDependenciesList, styleDependenciesList } + // Download mobile page dependencies only once + if (this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { + const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}${title}`) + mobileModulesData.forEach((module: string) => { + if (module.includes('javascript')) { + this.mobileJsDependenciesList.push(module) + } else if (module.includes('css')) { + this.mobileStyleDependenciesList.push(module) + } + }) + } + return { + jsConfigVars, + jsDependenciesList, + styleDependenciesList, + mobileJsDependenciesList: this.mobileJsDependenciesList, + mobileStyleDependenciesList: this.mobileStyleDependenciesList, + } } // Solution to handle aws js sdk v3 from https://github.com/aws/aws-sdk-js-v3/issues/1877 diff --git a/src/Dump.ts b/src/Dump.ts index f467bbea..cbc1e278 100644 --- a/src/Dump.ts +++ b/src/Dump.ts @@ -29,6 +29,7 @@ interface DumpOpts { keepEmptyParagraphs: boolean tags?: string filenameDate: string + isMobileRenderer: boolean } export class Dump { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index dfba0e71..9780bd76 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -58,6 +58,7 @@ class MediaWiki { public apiUrl: URL public modulePath: string // only for reading public _modulePathOpt: string // only for whiting to generate modulePath + public mobileModulePath: string public webUrl: URL public desktopApiUrl: URL public mobileApiUrl: URL @@ -185,6 +186,7 @@ class MediaWiki { this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#apiPath) this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) + this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) @@ -413,6 +415,7 @@ class MediaWiki { webUrl: this.webUrl.href, apiUrl: this.apiUrl.href, modulePath: this.modulePath, + mobileModulePath: this.mobileModulePath, webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, baseUrl: this.baseUrl.href, diff --git a/src/config.ts b/src/config.ts index 68d4338c..d30ae925 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,7 +56,6 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], - mobileArticleCssResources: ['mobile_article_page'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 0fb4f223..a16f769d 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -338,6 +338,7 @@ async function execute(argv: any) { keepEmptyParagraphs, tags: customZimTags, filenameDate, + isMobileRenderer: false, }, { ...mwMetaData, mainPage }, customProcessor, diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index a6522ca0..0a986f31 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -20,6 +20,8 @@ import { genCanonicalLink, genHeaderScript, genHeaderCSSLink, + genHeaderMobileScript, + genHeaderMobileCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -466,27 +468,43 @@ export abstract class Renderer { articleDetail: ArticleDetail, articleDetailXId: RKVS, ): Promise { - const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { + const { jsConfigVars, jsDependenciesList, styleDependenciesList, mobileJsDependenciesList, mobileStyleDependenciesList } = moduleDependencies as { jsConfigVars: string | RegExpExecArray jsDependenciesList: string[] styleDependenciesList: string[] - } - - const htmlTemplateDoc = domino.createDocument( - htmlTemplateCode(articleId) - .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') - .replace( - '__ARTICLE_JS_LIST__', - jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', - ) - .replace( - '__ARTICLE_CSS_LIST__', - styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ), - ) + mobileJsDependenciesList: string[] + mobileStyleDependenciesList: string[] + } + + const isMobileRenderer = dump.opts.isMobileRenderer + + // Conditional replacements based on mobile render enabling + const articleConfigVarsList = isMobileRenderer ? '' : jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '' + const articleJsList = isMobileRenderer + ? mobileJsDependenciesList.length !== 0 + ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderMobileScript(oneMobJsDep)).join('\n') + : '' + : jsDependenciesList.length !== 0 + ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '' + const articleCssList = isMobileRenderer + ? mobileStyleDependenciesList.length !== 0 + ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderMobileCSSLink(oneMobCssDep)).join('\n') + : '' + : styleDependenciesList.length !== 0 + ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '' + + // Perform replacements + const htmlTemplateString = htmlTemplateCode(articleId) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', articleConfigVarsList) + .replace('__ARTICLE_JS_LIST__', articleJsList) + .replace('__ARTICLE_CSS_LIST__', articleCssList) + .replace('__JS_SCRIPTS_MOBILE__', isMobileRenderer ? articleJsList : '') + .replace('__CSS_LINKS_MOBILE__', isMobileRenderer ? articleCssList : '') + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) /* Create final document by merging template and parsoid documents */ htmlTemplateDoc.getElementById('mw-content-text').style.setProperty('direction', dump.mwMetaData.textDir) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index cffb1cab..d0f17908 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -4,10 +4,11 @@ import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' +import { Dump } from './../Dump.js' import * as logger from './../Logger.js' export class RendererBuilder { - public async createRenderer(options: RendererBuilderOptions): Promise { + public async createRenderer(options: RendererBuilderOptions, dump: Dump): Promise { const { renderType, renderName } = options const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ @@ -29,6 +30,7 @@ export class RendererBuilder { } case 'mobile': if (hasWikimediaMobileApi) { + dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') @@ -40,6 +42,7 @@ export class RendererBuilder { } else if (hasVisualEditorApi) { return new VisualEditorRenderer() } else if (hasWikimediaMobileApi) { + dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') @@ -62,6 +65,7 @@ export class RendererBuilder { process.exit(1) case 'WikimediaMobile': if (hasWikimediaMobileApi) { + dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 67cb0d02..3b741f39 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,6 +1,5 @@ import * as domino from 'domino' import * as logger from '../Logger.js' -import { config } from '../config.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' @@ -44,7 +43,6 @@ export class WikimediaMobileRenderer extends Renderer { return domino.createDocument(finalHTML) }, this.restoreLinkDefaults, - this.addMobileModules, this.overrideMobileStyles, ) @@ -71,31 +69,6 @@ export class WikimediaMobileRenderer extends Renderer { return result } - private addMobileModules(doc: DominoElement) { - const protocol = 'https://' - // TODO: query this instead of hardcoding. - const offlineResourcesCSSList = [ - 'meta.wikimedia.org/api/rest_v1/data/css/mobile/base', - 'meta.wikimedia.org/api/rest_v1/data/css/mobile/pcs', - 'en.wikipedia.org/api/rest_v1/data/css/mobile/site', - ] - const offlineResourcesJSList = ['meta.wikimedia.org/api/rest_v1/data/javascript/mobile/pcs'] - - offlineResourcesCSSList.forEach((cssUrl) => { - const linkEl = doc.createElement('link') as DominoElement - Object.assign(linkEl, { rel: 'stylesheet', href: `${protocol}${cssUrl}` }) - doc.head.appendChild(linkEl) - }) - - offlineResourcesJSList.forEach((jsUrl) => { - const scriptEl = doc.createElement('script') as DominoElement - scriptEl.setAttribute('src', `${protocol}${jsUrl}`) - doc.head.appendChild(scriptEl) - }) - - return doc - } - private removeEditContainer(doc: DominoElement) { const editContainers = doc.querySelectorAll('.pcs-edit-section-link-container') diff --git a/src/types.d.ts b/src/types.d.ts index 00856ca6..1170a392 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -165,6 +165,7 @@ interface MWMetaData { apiUrl: string webUrlPath: string modulePath: string + mobileModulePath: string } interface MWNamespaces { diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 922d2b9e..d91c6f13 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -41,4 +41,11 @@ export default class BaseURLDirector { .setPath(path ?? 'w/load.php') .build(false, '?') } + + buildMobileModuleURL(path?: string) { + return urlBuilder + .setDomain(this.baseDomain) + .setPath(path ?? 'api/rest_v1/page/mobile-html-offline-resources') + .build(false, '/') + } } diff --git a/src/util/misc.ts b/src/util/misc.ts index ee31a5f8..6ca59343 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -198,6 +198,9 @@ export function genHeaderCSSLink(config: Config, css: string, articleId: string, const upStr = '../'.repeat(slashesInUrl + 1) return `` } +export function genHeaderMobileCSSLink(css: string) { + return `` +} export function genHeaderScript(config: Config, js: string, articleId: string, subDirectory = '', attributes = '') { const resourceNamespace = '-' const slashesInUrl = articleId.split('/').length - 1 @@ -205,6 +208,9 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } +export function genHeaderMobileScript(js: string) { + return `` +} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index a9c45b57..43ed278b 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -245,17 +245,23 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let articlesRenderer if (forceRender) { // All articles and main page will use the same renderer if 'forceRender' is specified - const renderer = await rendererBuilder.createRenderer({ - renderType: 'specific', - renderName: forceRender, - }) + const renderer = await rendererBuilder.createRenderer( + { + renderType: 'specific', + renderName: forceRender, + }, + dump, + ) mainPageRenderer = renderer articlesRenderer = renderer } else { - mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) - articlesRenderer = await rendererBuilder.createRenderer({ - renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', - }) + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }, dump) + articlesRenderer = await rendererBuilder.createRenderer( + { + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }, + dump, + ) } if (dump.customProcessor?.shouldKeepArticle) { diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts new file mode 100644 index 00000000..c87e8700 --- /dev/null +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -0,0 +1,56 @@ +import 'dotenv/config.js' +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import rimraf from 'rimraf' +import { execa } from 'execa' +import { jest } from '@jest/globals' +import { zimcheckAvailable, zimdumpAvailable, zimcheck } from '../util.js' + +jest.setTimeout(200000) + +let zimcheckIsAvailable +let zimdumpIsAvailable + +beforeAll(async () => { + zimcheckIsAvailable = await zimcheckAvailable() + zimdumpIsAvailable = await zimdumpAvailable() +}) + +async function getOutFiles(testId: string, articleList: string, mwUrl: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'mail@mail.com', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: 'WikimediaMobile', + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +const commonTreatmentTest = async (articleList: string, mwUrl: string) => { + if (!zimcheckIsAvailable || !zimdumpIsAvailable) { + const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' + console.log(`${missingTool} not installed, skipping test`) + return + } + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl) + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + + rimraf.sync(`./${testId}`) +} + +describe('Mobile render zim file integrity', () => { + const mwUrl = 'https://en.wikipedia.org' + const articleList = 'Canada' + + test('Test WikimediaMobile with en.wikipedia.org', async () => { + await commonTreatmentTest(articleList, mwUrl) + }) +}) From baa55ae04ce1bf0e0b026d3edf93615756f3cde1 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 27 Sep 2023 20:54:07 +0300 Subject: [PATCH 13/58] Refactor modules for mobile renderer --- res/templates/page.html | 2 +- src/Downloader.ts | 23 +++++--- src/mwoffliner.lib.ts | 18 +++++- src/renderers/abstract.renderer.ts | 64 +++++++++++---------- src/renderers/renderer.builder.ts | 6 +- src/renderers/wikimedia-desktop.renderer.ts | 1 + src/renderers/wikimedia-mobile.renderer.ts | 1 + src/util/dump.ts | 27 +++++++-- src/util/misc.ts | 6 -- src/util/saveArticles.ts | 32 ++++++----- test/e2e/mobileRenderIntegrity.test.ts | 3 +- 11 files changed, 111 insertions(+), 72 deletions(-) diff --git a/res/templates/page.html b/res/templates/page.html index 07dec78f..ac3fcd47 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -19,6 +19,6 @@

__ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ + __ARTICLE_JS_LIST__ diff --git a/src/Downloader.ts b/src/Downloader.ts index a3066079..dcc730c1 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -697,15 +697,20 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' // Download mobile page dependencies only once - if (this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { - const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}${title}`) - mobileModulesData.forEach((module: string) => { - if (module.includes('javascript')) { - this.mobileJsDependenciesList.push(module) - } else if (module.includes('css')) { - this.mobileStyleDependenciesList.push(module) - } - }) + if ((await MediaWiki.hasWikimediaMobileApi()) && this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { + try { + // TODO: An arbitrary title can be placed since all Wikimedia wikis have the same mobile offline resources + const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}Test`) + mobileModulesData.forEach((module: string) => { + if (module.includes('javascript')) { + this.mobileJsDependenciesList.push(module.replace('//', '')) + } else if (module.includes('css')) { + this.mobileStyleDependenciesList.push(module.replace('//', '')) + } + }) + } catch (err) { + throw new Error(`Error getting mobile modules ${err.message}`) + } } return { jsConfigVars, diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index a16f769d..edb8028a 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -422,17 +422,31 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) + const { jsModuleDependencies, cssModuleDependencies, jsMobileModuleDependencies, cssMobileModuleDependencies } = await saveArticles( + zimCreator, + downloader, + dump, + hasWikimediaMobileApi, + forceRender, + ) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`) - const allDependenciesWithType = [ + let allDependenciesWithType = [ { type: 'js', moduleList: Array.from(jsModuleDependencies) }, { type: 'css', moduleList: Array.from(cssModuleDependencies) }, ] + if (dump.opts.isMobileRenderer) { + allDependenciesWithType = [ + ...allDependenciesWithType, + { type: 'mobileJs', moduleList: Array.from(jsMobileModuleDependencies) }, + { type: 'mobileCss', moduleList: Array.from(cssMobileModuleDependencies) }, + ] + } + if (downloader.webp) { logger.log('Downloading polyfill module') importPolyfillModules(zimCreator) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 0a986f31..4a0938cd 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -20,8 +20,6 @@ import { genCanonicalLink, genHeaderScript, genHeaderCSSLink, - genHeaderMobileScript, - genHeaderMobileCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -476,33 +474,41 @@ export abstract class Renderer { mobileStyleDependenciesList: string[] } - const isMobileRenderer = dump.opts.isMobileRenderer - - // Conditional replacements based on mobile render enabling - const articleConfigVarsList = isMobileRenderer ? '' : jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '' - const articleJsList = isMobileRenderer - ? mobileJsDependenciesList.length !== 0 - ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderMobileScript(oneMobJsDep)).join('\n') - : '' - : jsDependenciesList.length !== 0 - ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '' - const articleCssList = isMobileRenderer - ? mobileStyleDependenciesList.length !== 0 - ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderMobileCSSLink(oneMobCssDep)).join('\n') - : '' - : styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '' - - // Perform replacements - const htmlTemplateString = htmlTemplateCode(articleId) - .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - .replace('__ARTICLE_CONFIGVARS_LIST__', articleConfigVarsList) - .replace('__ARTICLE_JS_LIST__', articleJsList) - .replace('__ARTICLE_CSS_LIST__', articleCssList) - .replace('__JS_SCRIPTS_MOBILE__', isMobileRenderer ? articleJsList : '') - .replace('__CSS_LINKS_MOBILE__', isMobileRenderer ? articleCssList : '') + let htmlTemplateString = htmlTemplateCode(articleId).replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + + if (!dump.opts.isMobileRenderer) { + htmlTemplateString = htmlTemplateString + .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') + .replace( + '__ARTICLE_JS_LIST__', + jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', + ) + .replace( + '__ARTICLE_CSS_LIST__', + styleDependenciesList.length !== 0 + ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace('__JS_SCRIPTS_MOBILE__', '') + .replace('__CSS_LINKS_MOBILE__', '') + } else { + htmlTemplateString = htmlTemplateString + .replace('__ARTICLE_CONFIGVARS_LIST__', '') + .replace('__ARTICLE_JS_LIST__', '') + .replace('__ARTICLE_CSS_LIST__', '') + .replace( + '__JS_SCRIPTS_MOBILE__', + mobileJsDependenciesList.length !== 0 + ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace( + '__CSS_LINKS_MOBILE__', + mobileStyleDependenciesList.length !== 0 + ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + } const htmlTemplateDoc = domino.createDocument(htmlTemplateString) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index d0f17908..cffb1cab 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -4,11 +4,10 @@ import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' -import { Dump } from './../Dump.js' import * as logger from './../Logger.js' export class RendererBuilder { - public async createRenderer(options: RendererBuilderOptions, dump: Dump): Promise { + public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ @@ -30,7 +29,6 @@ export class RendererBuilder { } case 'mobile': if (hasWikimediaMobileApi) { - dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') @@ -42,7 +40,6 @@ export class RendererBuilder { } else if (hasVisualEditorApi) { return new VisualEditorRenderer() } else if (hasWikimediaMobileApi) { - dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') @@ -65,7 +62,6 @@ export class RendererBuilder { process.exit(1) case 'WikimediaMobile': if (hasWikimediaMobileApi) { - dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 9bba2c3c..43fc23a3 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -35,6 +35,7 @@ export class WikimediaDesktopRenderer extends Renderer { public async render(renderOpts: RenderOpts): Promise { const result: RenderOutput = [] const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + dump.opts.isMobileRenderer = false const articleDetail = await renderOpts.articleDetailXId.get(articleId) // Paginate when there are more than 200 subCategories diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 3b741f39..cc3df0dc 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -23,6 +23,7 @@ export class WikimediaMobileRenderer extends Renderer { try { const result: RenderOutput = [] const { data, articleId, webp, _moduleDependencies, dump } = renderOpts + dump.opts.isMobileRenderer = true const articleDetail = await renderOpts.articleDetailXId.get(articleId) const displayTitle = this.getStrippedTitle(renderOpts) diff --git a/src/util/dump.ts b/src/util/dump.ts index 2d6e63c5..50ecaa83 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -90,7 +90,7 @@ export async function getAndProcessStylesheets(downloader: Downloader, links: Ar }) } -export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') { +export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css' | 'mobileJs' | 'mobileCss') { const replaceCodeByRegex = (sourceText, replaceMap: Map) => { let text: string replaceMap.forEach((textToReplace, regEx) => { @@ -117,13 +117,19 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: } let apiParameterOnly - if (type === 'js') { + let moduleApiUrl: string + if (type === 'js' || type === 'mobileJs') { apiParameterOnly = 'scripts' - } else if (type === 'css') { + } else if (type === 'css' || type === 'mobileCss') { apiParameterOnly = 'styles' } - const moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) + if (type === 'js' || type === 'css') { + moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) + } else if (type === 'mobileJs' || type === 'mobileCss') { + moduleApiUrl = encodeURI(`https:${module}`) + } + logger.info(`Getting [${type}] module [${moduleApiUrl}]`) const { content } = await downloader.downloadContent(moduleApiUrl) @@ -141,7 +147,18 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: } try { - const articleId = type === 'js' ? jsPath(module, config.output.dirs.mediawiki) : cssPath(module, config.output.dirs.mediawiki) + let articleId + const pathFunctions = { + js: jsPath, + css: cssPath, + mobileJs: jsPath, + mobileCss: cssPath, + } + + const pathFunction = pathFunctions[type] + if (pathFunction) { + articleId = pathFunction(module, config.output.dirs.mediawiki) + } const article = new ZimArticle({ url: articleId, data: text, ns: '-' }) zimCreator.addArticle(article) logger.info(`Saved module [${module}]`) diff --git a/src/util/misc.ts b/src/util/misc.ts index 6ca59343..ee31a5f8 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -198,9 +198,6 @@ export function genHeaderCSSLink(config: Config, css: string, articleId: string, const upStr = '../'.repeat(slashesInUrl + 1) return `` } -export function genHeaderMobileCSSLink(css: string) { - return `` -} export function genHeaderScript(config: Config, js: string, articleId: string, subDirectory = '', attributes = '') { const resourceNamespace = '-' const slashesInUrl = articleId.split('/').length - 1 @@ -208,9 +205,6 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } -export function genHeaderMobileScript(js: string) { - return `` -} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 43ed278b..0e41b40a 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -234,6 +234,8 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() + const jsMobileModuleDependencies = new Set() + const cssMobileModuleDependencies = new Set() let jsConfigVars = '' let prevPercentProgress: string const { articleDetailXId } = RedisStore @@ -245,23 +247,17 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let articlesRenderer if (forceRender) { // All articles and main page will use the same renderer if 'forceRender' is specified - const renderer = await rendererBuilder.createRenderer( - { - renderType: 'specific', - renderName: forceRender, - }, - dump, - ) + const renderer = await rendererBuilder.createRenderer({ + renderType: 'specific', + renderName: forceRender, + }) mainPageRenderer = renderer articlesRenderer = renderer } else { - mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }, dump) - articlesRenderer = await rendererBuilder.createRenderer( - { - renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', - }, - dump, - ) + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) + articlesRenderer = await rendererBuilder.createRenderer({ + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }) } if (dump.customProcessor?.shouldKeepArticle) { @@ -317,6 +313,12 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade for (const dep of _moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } + for (const dep of _moduleDependencies.mobileJsDependenciesList) { + jsMobileModuleDependencies.add(dep) + } + for (const dep of _moduleDependencies.mobileStyleDependenciesList) { + cssMobileModuleDependencies.add(dep) + } jsConfigVars = jsConfigVars || _moduleDependencies.jsConfigVars /* @@ -398,5 +400,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade return { jsModuleDependencies, cssModuleDependencies, + jsMobileModuleDependencies, + cssMobileModuleDependencies, } } diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts index c87e8700..0df9fc9d 100644 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -48,7 +48,8 @@ const commonTreatmentTest = async (articleList: string, mwUrl: string) => { describe('Mobile render zim file integrity', () => { const mwUrl = 'https://en.wikipedia.org' - const articleList = 'Canada' + // TODO: some articles such as 'Canada' don't pass this test even with desktop renderer + const articleList = 'BMW' test('Test WikimediaMobile with en.wikipedia.org', async () => { await commonTreatmentTest(articleList, mwUrl) From d89bb92d40116012573346cf623a36ed5b509306 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 28 Sep 2023 10:24:13 +0300 Subject: [PATCH 14/58] Add PCS override script to enable links for mobile render --- res/pcs/pcs_override_script.js | 15 +++++++++++++++ res/templates/page.html | 1 + src/config.ts | 1 + src/mwoffliner.lib.ts | 4 ++++ src/renderers/abstract.renderer.ts | 3 +++ src/util/misc.ts | 16 ++++++++++++++++ 6 files changed, 40 insertions(+) create mode 100644 res/pcs/pcs_override_script.js diff --git a/res/pcs/pcs_override_script.js b/res/pcs/pcs_override_script.js new file mode 100644 index 00000000..240d5bb0 --- /dev/null +++ b/res/pcs/pcs_override_script.js @@ -0,0 +1,15 @@ +function importScript() { return 1 } // this is to avoid the error from site.js + +window.onload = function () { + // Check if there is a PCS output page + if (document.querySelector('#pcs')) { + const supElements = document.querySelectorAll('sup'); + const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); + const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + } +} diff --git a/res/templates/page.html b/res/templates/page.html index ac3fcd47..eb49043a 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -20,5 +20,6 @@

__ARTICLE_CONFIGVARS_LIST__ __ARTICLE_JS_LIST__ + __PCS_JS_OVERRIDE__ diff --git a/src/config.ts b/src/config.ts index d30ae925..fb55aef3 100644 --- a/src/config.ts +++ b/src/config.ts @@ -58,6 +58,7 @@ const config = { mainPageCssResources: ['mobile_main_page'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], + pcsJsResources: ['pcs_override_script'], // JS/CSS resources to be imported from MediaWiki mw: { diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index edb8028a..42fefce3 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -37,6 +37,7 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, + saveStaticPCSFiles, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -403,6 +404,9 @@ async function execute(argv: any) { logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) + logger.info('Copying Static PCS Files') + await saveStaticPCSFiles(config, zimCreator) + logger.info('Finding stylesheets to download') const stylesheetsToGet = await dump.getRelevantStylesheetUrls(downloader) logger.log(`Found [${stylesheetsToGet.length}] stylesheets to download`) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 4a0938cd..a4beb8ba 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -20,6 +20,7 @@ import { genCanonicalLink, genHeaderScript, genHeaderCSSLink, + genPCSOverrideScript, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -491,6 +492,7 @@ export abstract class Renderer { ) .replace('__JS_SCRIPTS_MOBILE__', '') .replace('__CSS_LINKS_MOBILE__', '') + .replace('__PCS_JS_OVERRIDE__', '') } else { htmlTemplateString = htmlTemplateString .replace('__ARTICLE_CONFIGVARS_LIST__', '') @@ -508,6 +510,7 @@ export abstract class Renderer { ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) + .replace('__PCS_JS_OVERRIDE__', genPCSOverrideScript(config.output.pcsJsResources[0])) } const htmlTemplateDoc = domino.createDocument(htmlTemplateString) diff --git a/src/util/misc.ts b/src/util/misc.ts index ee31a5f8..3a85acf6 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -185,6 +185,19 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { return Promise.all([...cssPromises, ...jsPromises]) } +export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { + const pcsJsPromises = config.output.pcsJsResources.map(async (pcsJs) => { + try { + const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsJs}.js`)) + const article = new ZimArticle({ url: jsPath(pcsJs), data: jsCont, ns: '-' }) + zimCreator.addArticle(article) + } catch (error) { + logger.warn(`Could not create pcs override ${pcsJs} file : ${error}`) + } + }) + return pcsJsPromises +} + export function cssPath(css: string, subDirectory = '') { return `${subDirectory ? `${subDirectory}/` : ''}${css.replace(/(\.css)?$/, '')}.css` } @@ -205,6 +218,9 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } +export function genPCSOverrideScript(js: string) { + return `` +} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } From 5a908658e3e93db8bc74f3794d95fefcd361829f Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 28 Sep 2023 12:34:46 +0300 Subject: [PATCH 15/58] Enable regular behaviour of links for mobile render --- res/pcs/pcs_override_script.js | 4 ++-- .../pcs_override_style.css} | 6 ++++++ res/templates/page.html | 2 +- src/config.ts | 1 + src/mwoffliner.lib.ts | 3 ++- src/renderers/abstract.renderer.ts | 3 +++ src/renderers/wikimedia-mobile.renderer.ts | 19 ------------------- src/util/misc.ts | 17 +++++++++++++++-- 8 files changed, 30 insertions(+), 25 deletions(-) rename res/{mobile_article_page.css => pcs/pcs_override_style.css} (51%) diff --git a/res/pcs/pcs_override_script.js b/res/pcs/pcs_override_script.js index 240d5bb0..d39c7b2d 100644 --- a/res/pcs/pcs_override_script.js +++ b/res/pcs/pcs_override_script.js @@ -4,8 +4,8 @@ window.onload = function () { // Check if there is a PCS output page if (document.querySelector('#pcs')) { const supElements = document.querySelectorAll('sup'); - const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); - const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + const linkElements = document.querySelectorAll('a'); + const disabledElems = Array.from(supElements).concat(Array.from(linkElements)) disabledElems.forEach((elem) => { elem.addEventListener('click', (event) => { event.stopPropagation(); diff --git a/res/mobile_article_page.css b/res/pcs/pcs_override_style.css similarity index 51% rename from res/mobile_article_page.css rename to res/pcs/pcs_override_style.css index 6de633f2..c4d61088 100644 --- a/res/mobile_article_page.css +++ b/res/pcs/pcs_override_style.css @@ -1,6 +1,12 @@ body { margin: 0 auto; } +p#pcs-edit-section-add-title-description { + display: none !important; +} +span.noviewer { + display: none !important; +} .reference-link::after { content: none !important; } diff --git a/res/templates/page.html b/res/templates/page.html index eb49043a..3e2574ed 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -4,7 +4,7 @@ __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ - __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ + __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __PCS_CSS_OVERRIDE__
diff --git a/src/config.ts b/src/config.ts index fb55aef3..92856fd1 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,6 +56,7 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], + pcsCssResources: ['pcs_override_style'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], pcsJsResources: ['pcs_override_script'], diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 42fefce3..32894650 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -404,7 +404,8 @@ async function execute(argv: any) { logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) - logger.info('Copying Static PCS Files') + // TODO: refactor sequence, this only needed for mobile renderer + logger.info('Copying Static PCS Override Files') await saveStaticPCSFiles(config, zimCreator) logger.info('Finding stylesheets to download') diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index a4beb8ba..1d053e93 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -21,6 +21,7 @@ import { genHeaderScript, genHeaderCSSLink, genPCSOverrideScript, + genPCSCOverrideCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -492,6 +493,7 @@ export abstract class Renderer { ) .replace('__JS_SCRIPTS_MOBILE__', '') .replace('__CSS_LINKS_MOBILE__', '') + .replace('__PCS_CSS_OVERRIDE__', '') .replace('__PCS_JS_OVERRIDE__', '') } else { htmlTemplateString = htmlTemplateString @@ -510,6 +512,7 @@ export abstract class Renderer { ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) + .replace('__PCS_CSS_OVERRIDE__', genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) .replace('__PCS_JS_OVERRIDE__', genPCSOverrideScript(config.output.pcsJsResources[0])) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index cc3df0dc..e315c97e 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -44,7 +44,6 @@ export class WikimediaMobileRenderer extends Renderer { return domino.createDocument(finalHTML) }, this.restoreLinkDefaults, - this.overrideMobileStyles, ) result.push({ @@ -141,22 +140,4 @@ export class WikimediaMobileRenderer extends Renderer { return doc } - - private overrideMobileStyles(doc: DominoElement) { - const styleEl = doc.createElement('style') - styleEl.innerHTML = ` - body { - margin: 0 auto; - } - .reference-link::after { - content: none !important; - } - .mw-body h3, .mw-body h2 { - width: auto; - } - ` - doc.head.appendChild(styleEl) - - return doc - } } diff --git a/src/util/misc.ts b/src/util/misc.ts index 3a85acf6..2216d42b 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -186,16 +186,26 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { } export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { + const pcsCssPromises = config.output.pcsCssResources.map(async (pcsCss) => { + try { + const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsCss}.css`)) + const article = new ZimArticle({ url: cssPath(pcsCss), data: cssCont, ns: '-' }) + zimCreator.addArticle(article) + } catch (error) { + logger.warn(`Could not create style PCS override ${pcsCss} file : ${error}`) + } + }) + const pcsJsPromises = config.output.pcsJsResources.map(async (pcsJs) => { try { const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsJs}.js`)) const article = new ZimArticle({ url: jsPath(pcsJs), data: jsCont, ns: '-' }) zimCreator.addArticle(article) } catch (error) { - logger.warn(`Could not create pcs override ${pcsJs} file : ${error}`) + logger.warn(`Could not create script PCS override ${pcsJs} file : ${error}`) } }) - return pcsJsPromises + return Promise.all([...pcsCssPromises, ...pcsJsPromises]) } export function cssPath(css: string, subDirectory = '') { @@ -221,6 +231,9 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s export function genPCSOverrideScript(js: string) { return `` } +export function genPCSCOverrideCSSLink(css: string) { + return `` +} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } From 2b045599921ac336f3d1f587bdc54ba5b3ae107b Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 3 Oct 2023 09:05:28 +0300 Subject: [PATCH 16/58] Merge mobile and desktop modules output, replace article templating to interim DesktopRenderer and MobileRenderer --- src/Downloader.ts | 6 +- src/mwoffliner.lib.ts | 18 +----- src/renderers/abstract.renderer.ts | 71 +++------------------ src/renderers/abstractDesktop.render.ts | 47 ++++++++++++++ src/renderers/abstractMobile.render.ts | 54 ++++++++++++++++ src/renderers/visual-editor.renderer.ts | 16 +++-- src/renderers/wikimedia-desktop.renderer.ts | 14 +++- src/renderers/wikimedia-mobile.renderer.ts | 14 +++- src/util/dump.ts | 10 +-- src/util/misc.ts | 6 -- src/util/saveArticles.ts | 10 --- 11 files changed, 153 insertions(+), 113 deletions(-) create mode 100644 src/renderers/abstractDesktop.render.ts create mode 100644 src/renderers/abstractMobile.render.ts diff --git a/src/Downloader.ts b/src/Downloader.ts index dcc730c1..a0226d97 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -714,10 +714,8 @@ class Downloader { } return { jsConfigVars, - jsDependenciesList, - styleDependenciesList, - mobileJsDependenciesList: this.mobileJsDependenciesList, - mobileStyleDependenciesList: this.mobileStyleDependenciesList, + jsDependenciesList: jsDependenciesList.concat(this.mobileJsDependenciesList), + styleDependenciesList: styleDependenciesList.concat(this.mobileStyleDependenciesList), } } diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 32894650..3c3f34cb 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -427,31 +427,17 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies, jsMobileModuleDependencies, cssMobileModuleDependencies } = await saveArticles( - zimCreator, - downloader, - dump, - hasWikimediaMobileApi, - forceRender, - ) + const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`) - let allDependenciesWithType = [ + const allDependenciesWithType = [ { type: 'js', moduleList: Array.from(jsModuleDependencies) }, { type: 'css', moduleList: Array.from(cssModuleDependencies) }, ] - if (dump.opts.isMobileRenderer) { - allDependenciesWithType = [ - ...allDependenciesWithType, - { type: 'mobileJs', moduleList: Array.from(jsMobileModuleDependencies) }, - { type: 'mobileCss', moduleList: Array.from(cssMobileModuleDependencies) }, - ] - } - if (downloader.webp) { logger.log('Downloading polyfill module') importPolyfillModules(zimCreator) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 1d053e93..7ae75cdd 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -9,7 +9,7 @@ import DU from '../DOMUtils.js' import { config } from '../config.js' import { Dump } from '../Dump.js' import { rewriteUrlsOfDoc } from '../util/rewriteUrls.js' -import { footerTemplate, htmlTemplateCode } from '../Templates.js' +import { footerTemplate } from '../Templates.js' import { getFullUrl, getMediaBase, @@ -17,11 +17,6 @@ import { getRelativeFilePath, isWebpCandidateImageMimeType, interpolateTranslationString, - genCanonicalLink, - genHeaderScript, - genHeaderCSSLink, - genPCSOverrideScript, - genPCSCOverrideCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -388,7 +383,8 @@ export abstract class Renderer { return thumbDiv } - public async processHtml(html: string, dump: Dump, articleId: string, articleDetail: any, _moduleDependencies: any, webp: boolean) { + // TODO: The first part of this method is common for all renders + public async processHtml(html: string, dump: Dump, articleId: string, articleDetail: any, _moduleDependencies: any, webp: boolean, callback) { let mediaDependencies: Array<{ url: string; path: string }> = [] let subtitles: Array<{ url: string; path: string }> = [] let doc = domino.createDocument(html) @@ -432,7 +428,8 @@ export abstract class Renderer { doc = await dump.customProcessor.preProcessArticle(articleId, doc) } - let templatedDoc = await this.templateArticle(doc, _moduleDependencies, dump, articleId, articleDetail, RedisStore.articleDetailXId) + let templatedDoc = callback(_moduleDependencies, articleId) + templatedDoc = await this.mergeTemplateDoc(templatedDoc, doc, dump, articleDetail, RedisStore.articleDetailXId, articleId) if (dump.customProcessor && dump.customProcessor.postProcessArticle) { templatedDoc = await dump.customProcessor.postProcessArticle(articleId, templatedDoc) @@ -460,64 +457,14 @@ export abstract class Renderer { } } - private async templateArticle( + private async mergeTemplateDoc( + htmlTemplateDoc: DominoElement, parsoidDoc: DominoElement, - moduleDependencies: any, dump: Dump, - articleId: string, articleDetail: ArticleDetail, articleDetailXId: RKVS, - ): Promise { - const { jsConfigVars, jsDependenciesList, styleDependenciesList, mobileJsDependenciesList, mobileStyleDependenciesList } = moduleDependencies as { - jsConfigVars: string | RegExpExecArray - jsDependenciesList: string[] - styleDependenciesList: string[] - mobileJsDependenciesList: string[] - mobileStyleDependenciesList: string[] - } - - let htmlTemplateString = htmlTemplateCode(articleId).replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - - if (!dump.opts.isMobileRenderer) { - htmlTemplateString = htmlTemplateString - .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') - .replace( - '__ARTICLE_JS_LIST__', - jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', - ) - .replace( - '__ARTICLE_CSS_LIST__', - styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ) - .replace('__JS_SCRIPTS_MOBILE__', '') - .replace('__CSS_LINKS_MOBILE__', '') - .replace('__PCS_CSS_OVERRIDE__', '') - .replace('__PCS_JS_OVERRIDE__', '') - } else { - htmlTemplateString = htmlTemplateString - .replace('__ARTICLE_CONFIGVARS_LIST__', '') - .replace('__ARTICLE_JS_LIST__', '') - .replace('__ARTICLE_CSS_LIST__', '') - .replace( - '__JS_SCRIPTS_MOBILE__', - mobileJsDependenciesList.length !== 0 - ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ) - .replace( - '__CSS_LINKS_MOBILE__', - mobileStyleDependenciesList.length !== 0 - ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ) - .replace('__PCS_CSS_OVERRIDE__', genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) - .replace('__PCS_JS_OVERRIDE__', genPCSOverrideScript(config.output.pcsJsResources[0])) - } - - const htmlTemplateDoc = domino.createDocument(htmlTemplateString) - + articleId: string, + ) { /* Create final document by merging template and parsoid documents */ htmlTemplateDoc.getElementById('mw-content-text').style.setProperty('direction', dump.mwMetaData.textDir) htmlTemplateDoc.getElementById('mw-content-text').innerHTML = parsoidDoc.getElementsByTagName('body')[0].innerHTML diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts new file mode 100644 index 00000000..169d1825 --- /dev/null +++ b/src/renderers/abstractDesktop.render.ts @@ -0,0 +1,47 @@ +import * as domino from 'domino' +import { Renderer } from './abstract.renderer.js' +import { config } from '../config.js' +import MediaWiki from '../MediaWiki.js' + +import { htmlTemplateCode } from '../Templates.js' +import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' + +export abstract class DesktopRenderer extends Renderer { + constructor() { + super() + } + + public templateDesktopArticle(moduleDependencies: any, articleId: string): Document { + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { + jsConfigVars: string | RegExpExecArray + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const desktopJsModuleDependencies = jsDependenciesList.filter((item) => !item.includes('javascript/mobile')) + const desktopCssModuleDependencies = styleDependenciesList.filter((item) => !item.includes('css/mobile')) + + const htmlTemplateString = htmlTemplateCode(articleId) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') + .replace( + '__ARTICLE_JS_LIST__', + desktopJsModuleDependencies.length !== 0 + ? desktopJsModuleDependencies.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace( + '__ARTICLE_CSS_LIST__', + desktopCssModuleDependencies.length !== 0 + ? desktopCssModuleDependencies.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace('__JS_SCRIPTS_MOBILE__', '') + .replace('__CSS_LINKS_MOBILE__', '') + .replace('__PCS_CSS_OVERRIDE__', '') + .replace('__PCS_JS_OVERRIDE__', '') + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) + return htmlTemplateDoc + } +} diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts new file mode 100644 index 00000000..426ffacb --- /dev/null +++ b/src/renderers/abstractMobile.render.ts @@ -0,0 +1,54 @@ +import * as domino from 'domino' +import { Renderer } from './abstract.renderer.js' +import { config } from '../config.js' +import MediaWiki from '../MediaWiki.js' + +import { htmlTemplateCode } from '../Templates.js' +import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' + +export abstract class MobileRenderer extends Renderer { + constructor() { + super() + } + + private genPCSCOverrideCSSLink(css: string) { + return `` + } + + private genPCSOverrideScript(js: string) { + return `` + } + + public templateMobileArticle(moduleDependencies: any, articleId: string): Document { + const { jsDependenciesList, styleDependenciesList } = moduleDependencies as { + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const mobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) + const mobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) + + const htmlTemplateString = htmlTemplateCode(articleId) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', '') + .replace('__ARTICLE_JS_LIST__', '') + .replace('__ARTICLE_CSS_LIST__', '') + .replace( + '__JS_SCRIPTS_MOBILE__', + mobileJsModuleDependencies.length !== 0 + ? mobileJsModuleDependencies.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace( + '__CSS_LINKS_MOBILE__', + mobileCssModuleDependencies.length !== 0 + ? mobileCssModuleDependencies.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace('__PCS_CSS_OVERRIDE__', this.genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) + .replace('__PCS_JS_OVERRIDE__', this.genPCSOverrideScript(config.output.pcsJsResources[0])) + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) + return htmlTemplateDoc + } +} diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index 4fb505d2..edece1db 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -1,6 +1,6 @@ -import { DELETED_ARTICLE_ERROR } from '../util/const.js' import * as logger from '../Logger.js' -import { Renderer } from './abstract.renderer.js' +import { DELETED_ARTICLE_ERROR } from '../util/const.js' +import { DesktopRenderer } from './abstractDesktop.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' @@ -9,7 +9,7 @@ Represent 'https://{wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=h or 'https://{3rd-part-wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page={title}' */ -export class VisualEditorRenderer extends Renderer { +export class VisualEditorRenderer extends DesktopRenderer { constructor() { super() } @@ -55,7 +55,15 @@ export class VisualEditorRenderer extends Renderer { const { articleId, articleDetail, webp, _moduleDependencies, dump } = renderOpts const { html, displayTitle } = await this.retrieveHtml(renderOpts) if (html) { - const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(html, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( + html, + dump, + articleId, + articleDetail, + _moduleDependencies, + webp, + super.templateDesktopArticle(_moduleDependencies, articleId), + ) result.push({ articleId, displayTitle, diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 43fc23a3..0aaf4591 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -1,10 +1,10 @@ import domino from 'domino' -import { Renderer } from './abstract.renderer.js' +import { DesktopRenderer } from './abstractDesktop.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' -export class WikimediaDesktopRenderer extends Renderer { +export class WikimediaDesktopRenderer extends DesktopRenderer { constructor() { super() } @@ -47,7 +47,15 @@ export class WikimediaDesktopRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } - const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( + dataWithHeader || data, + dump, + articleId, + articleDetail, + _moduleDependencies, + webp, + super.templateDesktopArticle.bind(this), + ) result.push({ articleId: _articleId, diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index e315c97e..6250b44d 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,13 +1,13 @@ import * as domino from 'domino' import * as logger from '../Logger.js' -import { Renderer } from './abstract.renderer.js' +import { MobileRenderer } from './abstractMobile.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' type PipeFunction = (value: DominoElement) => DominoElement | Promise // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' -export class WikimediaMobileRenderer extends Renderer { +export class WikimediaMobileRenderer extends MobileRenderer { constructor() { super() } @@ -37,7 +37,15 @@ export class WikimediaMobileRenderer extends Renderer { this.removeEditContainer, this.removeHiddenClass, async (doc) => { - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(doc.documentElement.outerHTML, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml( + doc.documentElement.outerHTML, + dump, + articleId, + articleDetail, + _moduleDependencies, + webp, + super.templateMobileArticle.bind(this), + ) mediaDependenciesVal = mediaDependencies subtitlesVal = subtitles diff --git a/src/util/dump.ts b/src/util/dump.ts index 50ecaa83..26e59b58 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -90,7 +90,7 @@ export async function getAndProcessStylesheets(downloader: Downloader, links: Ar }) } -export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css' | 'mobileJs' | 'mobileCss') { +export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') { const replaceCodeByRegex = (sourceText, replaceMap: Map) => { let text: string replaceMap.forEach((textToReplace, regEx) => { @@ -118,15 +118,15 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: let apiParameterOnly let moduleApiUrl: string - if (type === 'js' || type === 'mobileJs') { + if (type === 'js') { apiParameterOnly = 'scripts' - } else if (type === 'css' || type === 'mobileCss') { + } else if (type === 'css') { apiParameterOnly = 'styles' } - if (type === 'js' || type === 'css') { + if (!module.includes('javascript/mobile') && !module.includes('css/mobile')) { moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) - } else if (type === 'mobileJs' || type === 'mobileCss') { + } else { moduleApiUrl = encodeURI(`https:${module}`) } diff --git a/src/util/misc.ts b/src/util/misc.ts index 2216d42b..72c738c2 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -228,12 +228,6 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } -export function genPCSOverrideScript(js: string) { - return `` -} -export function genPCSCOverrideCSSLink(css: string) { - return `` -} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 0e41b40a..a9c45b57 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -234,8 +234,6 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() - const jsMobileModuleDependencies = new Set() - const cssMobileModuleDependencies = new Set() let jsConfigVars = '' let prevPercentProgress: string const { articleDetailXId } = RedisStore @@ -313,12 +311,6 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade for (const dep of _moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } - for (const dep of _moduleDependencies.mobileJsDependenciesList) { - jsMobileModuleDependencies.add(dep) - } - for (const dep of _moduleDependencies.mobileStyleDependenciesList) { - cssMobileModuleDependencies.add(dep) - } jsConfigVars = jsConfigVars || _moduleDependencies.jsConfigVars /* @@ -400,7 +392,5 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade return { jsModuleDependencies, cssModuleDependencies, - jsMobileModuleDependencies, - cssMobileModuleDependencies, } } From 83520c6c2c62361af034b7fc8f607899480cddb9 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 3 Oct 2023 09:42:05 +0300 Subject: [PATCH 17/58] Optimize static files saving --- src/Dump.ts | 1 - src/MediaWiki.ts | 4 +- src/mwoffliner.lib.ts | 7 +-- src/renderers/visual-editor.renderer.ts | 2 +- src/renderers/wikimedia-desktop.renderer.ts | 1 - src/renderers/wikimedia-mobile.renderer.ts | 1 - src/util/builders/url/base.director.ts | 4 +- src/util/misc.ts | 51 +++++++------------- test/e2e/mobileRenderIntegrity.test.ts | 5 +- test/unit/builders/url/base.director.test.ts | 12 ++--- 10 files changed, 33 insertions(+), 55 deletions(-) diff --git a/src/Dump.ts b/src/Dump.ts index cbc1e278..f467bbea 100644 --- a/src/Dump.ts +++ b/src/Dump.ts @@ -29,7 +29,6 @@ interface DumpOpts { keepEmptyParagraphs: boolean tags?: string filenameDate: string - isMobileRenderer: boolean } export class Dump { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 9780bd76..3c314791 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -183,8 +183,8 @@ class MediaWiki { this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#apiPath) - this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#apiPath) + this.desktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) + this.mobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 3c3f34cb..71d0de49 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -339,7 +339,6 @@ async function execute(argv: any) { keepEmptyParagraphs, tags: customZimTags, filenameDate, - isMobileRenderer: false, }, { ...mwMetaData, mainPage }, customProcessor, @@ -401,12 +400,10 @@ async function execute(argv: any) { }) zimCreator.addArticle(scraperArticle) - logger.info('Copying Static Resource Files') - await saveStaticFiles(config, zimCreator) - - // TODO: refactor sequence, this only needed for mobile renderer logger.info('Copying Static PCS Override Files') await saveStaticPCSFiles(config, zimCreator) + logger.info('Copying Static Resource Files') + await saveStaticFiles(config, zimCreator) logger.info('Finding stylesheets to download') const stylesheetsToGet = await dump.getRelevantStylesheetUrls(downloader) diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index edece1db..fb406708 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -62,7 +62,7 @@ export class VisualEditorRenderer extends DesktopRenderer { articleDetail, _moduleDependencies, webp, - super.templateDesktopArticle(_moduleDependencies, articleId), + super.templateDesktopArticle.bind(this), ) result.push({ articleId, diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 0aaf4591..074f3e76 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -35,7 +35,6 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { public async render(renderOpts: RenderOpts): Promise { const result: RenderOutput = [] const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts - dump.opts.isMobileRenderer = false const articleDetail = await renderOpts.articleDetailXId.get(articleId) // Paginate when there are more than 200 subCategories diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 6250b44d..d464ab61 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -23,7 +23,6 @@ export class WikimediaMobileRenderer extends MobileRenderer { try { const result: RenderOutput = [] const { data, articleId, webp, _moduleDependencies, dump } = renderOpts - dump.opts.isMobileRenderer = true const articleDetail = await renderOpts.articleDetailXId.get(articleId) const displayTitle = this.getStrippedTitle(renderOpts) diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index d91c6f13..6006441b 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -21,14 +21,14 @@ export default class BaseURLDirector { .build(true, '/') } - buildDesktopApiUrl(path?: string) { + buildWikimediaDesktopApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/html') .build(true, '/') } - buildMobileApiUrl(path?: string) { + buildWikimediaMobileApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/mobile-html') diff --git a/src/util/misc.ts b/src/util/misc.ts index 72c738c2..49a0c880 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -162,49 +162,32 @@ export function interpolateTranslationString(str: string, parameters: { [key: st return newString } -export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { - const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map(async (css) => { +function saveResourceFile(resource: string, type: 'css' | 'js', basePath: string, config: Config, zimCreator: ZimCreator) { + return async () => { try { - const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`)) - const article = new ZimArticle({ url: cssPath(css), data: cssCont, ns: '-' }) + const content = await readFilePromise(pathParser.resolve(__dirname, `../../res/${basePath}${resource}.${type}`)) + const article = new ZimArticle({ + url: type === 'css' ? cssPath(resource) : jsPath(resource), + data: content, + ns: '-', + }) zimCreator.addArticle(article) } catch (error) { - logger.warn(`Could not create ${css} file : ${error}`) + const fileType = type === 'css' ? (basePath.includes('pcs') ? 'style PCS override' : 'style') : 'script' + logger.warn(`Could not create ${fileType} ${resource} file : ${error}`) } - }) + } +} - const jsPromises = config.output.jsResources.map(async (js) => { - try { - const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`)) - const article = new ZimArticle({ url: jsPath(js), data: jsCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create ${js} file : ${error}`) - } - }) +export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { + const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map((css) => saveResourceFile(css, 'css', '', config, zimCreator)()) + const jsPromises = config.output.jsResources.map((js) => saveResourceFile(js, 'js', '', config, zimCreator)()) return Promise.all([...cssPromises, ...jsPromises]) } export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { - const pcsCssPromises = config.output.pcsCssResources.map(async (pcsCss) => { - try { - const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsCss}.css`)) - const article = new ZimArticle({ url: cssPath(pcsCss), data: cssCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create style PCS override ${pcsCss} file : ${error}`) - } - }) - - const pcsJsPromises = config.output.pcsJsResources.map(async (pcsJs) => { - try { - const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsJs}.js`)) - const article = new ZimArticle({ url: jsPath(pcsJs), data: jsCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create script PCS override ${pcsJs} file : ${error}`) - } - }) + const pcsCssPromises = config.output.pcsCssResources.map((pcsCss) => saveResourceFile(pcsCss, 'css', 'pcs/', config, zimCreator)()) + const pcsJsPromises = config.output.pcsJsResources.map((pcsJs) => saveResourceFile(pcsJs, 'js', 'pcs/', config, zimCreator)()) return Promise.all([...pcsCssPromises, ...pcsJsPromises]) } diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts index 0df9fc9d..2f03f270 100644 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -1,5 +1,6 @@ import 'dotenv/config.js' import * as mwoffliner from '../../src/mwoffliner.lib.js' +import * as logger from '../../src/Logger.js' import rimraf from 'rimraf' import { execa } from 'execa' import { jest } from '@jest/globals' @@ -34,8 +35,8 @@ async function getOutFiles(testId: string, articleList: string, mwUrl: string): const commonTreatmentTest = async (articleList: string, mwUrl: string) => { if (!zimcheckIsAvailable || !zimdumpIsAvailable) { const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - console.log(`${missingTool} not installed, skipping test`) - return + logger.error(`${missingTool} not installed, exiting test`) + process.exit(1) } const now = new Date() const testId = `mwo-test-${+now}` diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 76a18810..f679d2f9 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -25,29 +25,29 @@ describe('BaseURLDirector', () => { }) }) - describe('buildMobileApiUrl', () => { + describe('buildWikimediaMobileApiUrl', () => { it('should return mobile rest URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildMobileApiUrl('api/rest_v2/page/mobile-html') + const url = baseUrlDirector.buildWikimediaMobileApiUrl('api/rest_v2/page/mobile-html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') }) it('should return mobile rest URL with default path and trailing char', () => { - const url = baseUrlDirector.buildMobileApiUrl() + const url = baseUrlDirector.buildWikimediaMobileApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') }) }) - describe('buildDesktopApiUrl', () => { + describe('buildWikimediaDesktopApiUrl', () => { it('should return a desktop URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildDesktopApiUrl('api/rest_v2/page/html') + const url = baseUrlDirector.buildWikimediaDesktopApiUrl('api/rest_v2/page/html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/html/') }) it('should return a desktop URL with default path and trailing char', () => { - const url = baseUrlDirector.buildDesktopApiUrl() + const url = baseUrlDirector.buildWikimediaDesktopApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/html/') }) From 971481832052db634d24a38b15ab089ab806f850 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 3 Oct 2023 11:23:37 +0300 Subject: [PATCH 18/58] Update naming for mobile and desktop api across application --- src/Downloader.ts | 30 +++++++++---------- src/MediaWiki.ts | 26 ++++++++-------- src/util/builders/url/desktop.director.ts | 2 +- src/util/builders/url/mobile.director.ts | 2 +- .../builders/url/desktop.director.test.ts | 6 ++-- .../unit/builders/url/mobile.director.test.ts | 6 ++-- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index a0226d97..4db4532e 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -87,8 +87,8 @@ class Downloader { public arrayBufferRequestOptions: AxiosRequestConfig public jsonRequestOptions: AxiosRequestConfig public streamRequestOptions: AxiosRequestConfig - public mobileJsDependenciesList: string[] = [] - public mobileStyleDependenciesList: string[] = [] + public wikimediaMobileJsDependenciesList: string[] = [] + public wikimediaMobileStyleDependenciesList: string[] = [] private readonly uaString: string private activeRequests = 0 @@ -173,23 +173,23 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, ]) } else { switch (forceRender) { case 'WikimediaDesktop': if (MediaWiki.hasWikimediaDesktopApi()) { - this.baseUrl = MediaWiki.desktopApiUrl.href - this.baseUrlForMainPage = MediaWiki.desktopApiUrl.href + this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href + this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href break } break @@ -202,8 +202,8 @@ class Downloader { break case 'WikimediaMobile': if (MediaWiki.hasWikimediaMobileApi()) { - this.baseUrl = MediaWiki.mobileApiUrl.href - this.baseUrlForMainPage = MediaWiki.mobileApiUrl.href + this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href + this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href break } break @@ -697,15 +697,15 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' // Download mobile page dependencies only once - if ((await MediaWiki.hasWikimediaMobileApi()) && this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { + if ((await MediaWiki.hasWikimediaMobileApi()) && this.wikimediaMobileJsDependenciesList.length === 0 && this.wikimediaMobileStyleDependenciesList.length === 0) { try { // TODO: An arbitrary title can be placed since all Wikimedia wikis have the same mobile offline resources const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}Test`) mobileModulesData.forEach((module: string) => { if (module.includes('javascript')) { - this.mobileJsDependenciesList.push(module.replace('//', '')) + this.wikimediaMobileJsDependenciesList.push(module.replace('//', '')) } else if (module.includes('css')) { - this.mobileStyleDependenciesList.push(module.replace('//', '')) + this.wikimediaMobileStyleDependenciesList.push(module.replace('//', '')) } }) } catch (err) { @@ -714,8 +714,8 @@ class Downloader { } return { jsConfigVars, - jsDependenciesList: jsDependenciesList.concat(this.mobileJsDependenciesList), - styleDependenciesList: styleDependenciesList.concat(this.mobileStyleDependenciesList), + jsDependenciesList: jsDependenciesList.concat(this.wikimediaMobileJsDependenciesList), + styleDependenciesList: styleDependenciesList.concat(this.wikimediaMobileStyleDependenciesList), } } diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 3c314791..60faa856 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -9,8 +9,8 @@ import semver from 'semver' import basicURLDirector from './util/builders/url/basic.director.js' import BaseURLDirector from './util/builders/url/base.director.js' import ApiURLDirector from './util/builders/url/api.director.js' -import DesktopURLDirector from './util/builders/url/desktop.director.js' -import MobileURLDirector from './util/builders/url/mobile.director.js' +import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js' +import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js' import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import { checkApiAvailability } from './util/mw-api.js' import { BLACKLISTED_NS } from './util/const.js' @@ -50,9 +50,9 @@ class MediaWiki { #apiActionPath: string #domain: string private apiUrlDirector: ApiURLDirector - private wikimediaDesktopUrlDirector: DesktopURLDirector - private wikimediaMobileUrlDirector: MobileURLDirector - private visualEditorURLDirector: VisualEditorURLDirector + private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector + private wikimediaMobileUrlDirector: WikimediaMobileURLDirector + private VisualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL public apiUrl: URL @@ -60,8 +60,8 @@ class MediaWiki { public _modulePathOpt: string // only for whiting to generate modulePath public mobileModulePath: string public webUrl: URL - public desktopApiUrl: URL - public mobileApiUrl: URL + public WikimediaDesktopApiUrl: URL + public WikimediaMobileApiUrl: URL #hasWikimediaDesktopApi: boolean | null #hasWikimediaMobileApi: boolean | null @@ -152,7 +152,7 @@ class MediaWiki { public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { - this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) + this.#hasVisualEditorApi = await checkApiAvailability(this.VisualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasVisualEditorApi } return this.#hasVisualEditorApi @@ -183,13 +183,13 @@ class MediaWiki { this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) - this.mobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) + this.WikimediaDesktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) + this.WikimediaMobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() - this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) - this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) - this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) + this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.WikimediaDesktopApiUrl.href) + this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.WikimediaMobileApiUrl.href) + this.VisualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } public async login(downloader: Downloader) { diff --git a/src/util/builders/url/desktop.director.ts b/src/util/builders/url/desktop.director.ts index 100163ae..2f157deb 100644 --- a/src/util/builders/url/desktop.director.ts +++ b/src/util/builders/url/desktop.director.ts @@ -3,7 +3,7 @@ import urlBuilder from './url.builder.js' /** * Interface to build URLs based on Downloader desktop URL */ -export default class DesktopURLDirector { +export default class WikimediaDesktopURLDirector { baseDomain: string constructor(baseDomain: string) { diff --git a/src/util/builders/url/mobile.director.ts b/src/util/builders/url/mobile.director.ts index 258b389e..d33dcf9e 100644 --- a/src/util/builders/url/mobile.director.ts +++ b/src/util/builders/url/mobile.director.ts @@ -3,7 +3,7 @@ import urlBuilder from './url.builder.js' /** * Interface to build URLs based on MediaWiki mobile URL */ -export default class MobileURLDirector { +export default class WikimediaMobileURLDirector { baseDomain: string constructor(baseDomain: string) { diff --git a/test/unit/builders/url/desktop.director.test.ts b/test/unit/builders/url/desktop.director.test.ts index 642a0735..5ca6b5d8 100644 --- a/test/unit/builders/url/desktop.director.test.ts +++ b/test/unit/builders/url/desktop.director.test.ts @@ -1,7 +1,7 @@ -import DesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js' +import WikimediaDesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js' -describe('DesktopURLDirector', () => { - const wikimediaDesktopUrlDirector = new DesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/') +describe('WikimediaDesktopURLDirector', () => { + const wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/') describe('buildArticleURL', () => { it('should return the URL to retrieve a desktop article', () => { diff --git a/test/unit/builders/url/mobile.director.test.ts b/test/unit/builders/url/mobile.director.test.ts index d5f94a78..07dd29ae 100644 --- a/test/unit/builders/url/mobile.director.test.ts +++ b/test/unit/builders/url/mobile.director.test.ts @@ -1,7 +1,7 @@ -import MobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' +import WikimediaMobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' -describe('MobileURLDirector', () => { - const mobuleUrlDirector = new MobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') +describe('WikimediaMobileURLDirector', () => { + const mobuleUrlDirector = new WikimediaMobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') describe('buildArticleURL', () => { it('should return a URL for retrieving mobile article', () => { From 6dc7d7a17f2bf3f4fb0080c885aca71360fc41e5 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 10:18:28 +0300 Subject: [PATCH 19/58] Change PCS naming in favor of Wm Mobile --- res/templates/page.html | 6 +++--- .../wm_mobile_override_script.js} | 2 +- .../wm_mobile_override_style.css} | 0 src/config.ts | 4 ++-- src/mwoffliner.lib.ts | 6 +++--- src/renderers/abstractDesktop.render.ts | 4 ++-- src/renderers/abstractMobile.render.ts | 8 ++++---- src/util/misc.ts | 10 +++++----- test/unit/mwApi.test.ts | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) rename res/{pcs/pcs_override_script.js => wm_mobile/wm_mobile_override_script.js} (90%) rename res/{pcs/pcs_override_style.css => wm_mobile/wm_mobile_override_style.css} (100%) diff --git a/res/templates/page.html b/res/templates/page.html index 3e2574ed..0f831328 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -4,7 +4,7 @@ __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ - __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __PCS_CSS_OVERRIDE__ + __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __WM_MOBILE_CSS_OVERRIDE__
@@ -19,7 +19,7 @@

__ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ - __PCS_JS_OVERRIDE__ + __ARTICLE_JS_LIST__ + __WM_MOBILE_JS_OVERRIDE__ diff --git a/res/pcs/pcs_override_script.js b/res/wm_mobile/wm_mobile_override_script.js similarity index 90% rename from res/pcs/pcs_override_script.js rename to res/wm_mobile/wm_mobile_override_script.js index d39c7b2d..ce316eff 100644 --- a/res/pcs/pcs_override_script.js +++ b/res/wm_mobile/wm_mobile_override_script.js @@ -1,7 +1,7 @@ function importScript() { return 1 } // this is to avoid the error from site.js window.onload = function () { - // Check if there is a PCS output page + // Check if there is a Wikimedia mobile output page if (document.querySelector('#pcs')) { const supElements = document.querySelectorAll('sup'); const linkElements = document.querySelectorAll('a'); diff --git a/res/pcs/pcs_override_style.css b/res/wm_mobile/wm_mobile_override_style.css similarity index 100% rename from res/pcs/pcs_override_style.css rename to res/wm_mobile/wm_mobile_override_style.css diff --git a/src/config.ts b/src/config.ts index 92856fd1..2556710a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,10 +56,10 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], - pcsCssResources: ['pcs_override_style'], + wmMobileCssResources: ['wm_mobile_override_style'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], - pcsJsResources: ['pcs_override_script'], + mwMobileJsResources: ['wm_mobile_override_script'], // JS/CSS resources to be imported from MediaWiki mw: { diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 71d0de49..fc9118b0 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -37,7 +37,7 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, - saveStaticPCSFiles, + saveStaticWmMobileFiles, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -400,8 +400,8 @@ async function execute(argv: any) { }) zimCreator.addArticle(scraperArticle) - logger.info('Copying Static PCS Override Files') - await saveStaticPCSFiles(config, zimCreator) + logger.info('Copying Static Wikimedia Mobile Override Files') + await saveStaticWmMobileFiles(config, zimCreator) logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts index 169d1825..fb9a154c 100644 --- a/src/renderers/abstractDesktop.render.ts +++ b/src/renderers/abstractDesktop.render.ts @@ -38,8 +38,8 @@ export abstract class DesktopRenderer extends Renderer { ) .replace('__JS_SCRIPTS_MOBILE__', '') .replace('__CSS_LINKS_MOBILE__', '') - .replace('__PCS_CSS_OVERRIDE__', '') - .replace('__PCS_JS_OVERRIDE__', '') + .replace('__WM_MOBILE_CSS_OVERRIDE__', '') + .replace('__WM_MOBILE_JS_OVERRIDE__', '') const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index 426ffacb..c0fc2949 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -11,11 +11,11 @@ export abstract class MobileRenderer extends Renderer { super() } - private genPCSCOverrideCSSLink(css: string) { + private genWmMobileOverrideCSSLink(css: string) { return `` } - private genPCSOverrideScript(js: string) { + private genWmMobileOverrideScript(js: string) { return `` } @@ -45,8 +45,8 @@ export abstract class MobileRenderer extends Renderer { ? mobileCssModuleDependencies.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) - .replace('__PCS_CSS_OVERRIDE__', this.genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) - .replace('__PCS_JS_OVERRIDE__', this.genPCSOverrideScript(config.output.pcsJsResources[0])) + .replace('__WM_MOBILE_CSS_OVERRIDE__', this.genWmMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) + .replace('__WM_MOBILE_JS_OVERRIDE__', this.genWmMobileOverrideScript(config.output.mwMobileJsResources[0])) const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/util/misc.ts b/src/util/misc.ts index 49a0c880..ec815d34 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -173,7 +173,7 @@ function saveResourceFile(resource: string, type: 'css' | 'js', basePath: string }) zimCreator.addArticle(article) } catch (error) { - const fileType = type === 'css' ? (basePath.includes('pcs') ? 'style PCS override' : 'style') : 'script' + const fileType = type === 'css' ? (basePath.includes('wm_mobile') ? 'style Wikimedia mobile override' : 'style') : 'script' logger.warn(`Could not create ${fileType} ${resource} file : ${error}`) } } @@ -185,10 +185,10 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { return Promise.all([...cssPromises, ...jsPromises]) } -export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { - const pcsCssPromises = config.output.pcsCssResources.map((pcsCss) => saveResourceFile(pcsCss, 'css', 'pcs/', config, zimCreator)()) - const pcsJsPromises = config.output.pcsJsResources.map((pcsJs) => saveResourceFile(pcsJs, 'js', 'pcs/', config, zimCreator)()) - return Promise.all([...pcsCssPromises, ...pcsJsPromises]) +export function saveStaticWmMobileFiles(config: Config, zimCreator: ZimCreator) { + const wmMobileCssPromises = config.output.wmMobileCssResources.map((wmMobileCss) => saveResourceFile(wmMobileCss, 'css', 'wm_mobile/', config, zimCreator)()) + const wmMobileJsPromises = config.output.mwMobileJsResources.map((wmMobileJs) => saveResourceFile(wmMobileJs, 'js', 'wm_mobile/', config, zimCreator)()) + return Promise.all([...wmMobileCssPromises, ...wmMobileJsPromises]) } export function cssPath(css: string, subDirectory = '') { diff --git a/test/unit/mwApi.test.ts b/test/unit/mwApi.test.ts index 9b73fadd..4c44f9b2 100644 --- a/test/unit/mwApi.test.ts +++ b/test/unit/mwApi.test.ts @@ -18,7 +18,7 @@ afterAll(stopRedis) const initMW = async (downloader: Downloader) => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasVisualEditorApi() await MediaWiki.getNamespaces([], downloader) From a5cbf514430e3cd7e953baa4cac0910fcb764fb3 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 11:40:19 +0300 Subject: [PATCH 20/58] Add tests for the format params while using WikimediaMobile render --- test/e2e/mobileRenderFormatParams.test.ts | 126 ++++++++++++++++++++++ test/e2e/mobileRenderIntegrity.test.ts | 1 + 2 files changed, 127 insertions(+) create mode 100644 test/e2e/mobileRenderFormatParams.test.ts diff --git a/test/e2e/mobileRenderFormatParams.test.ts b/test/e2e/mobileRenderFormatParams.test.ts new file mode 100644 index 00000000..c34d5367 --- /dev/null +++ b/test/e2e/mobileRenderFormatParams.test.ts @@ -0,0 +1,126 @@ +import 'dotenv/config.js' +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import * as logger from '../../src/Logger.js' +import domino from 'domino' +import rimraf from 'rimraf' +import { execa } from 'execa' +import { jest } from '@jest/globals' +import { zimdumpAvailable, zimdump } from '../util.js' + +jest.setTimeout(200000) + +let zimdumpIsAvailable + +beforeAll(async () => { + zimdumpIsAvailable = await zimdumpAvailable() + if (!zimdumpIsAvailable) { + logger.error('Zimdump not installed, exiting test') + process.exit(1) + } +}) + +async function getOutFiles(testId: string, articleList: string, mwUrl: string, format?: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'mail@mail.com', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: 'WikimediaMobile', + format, + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +// TODO: articulate this test with /pull/1898 once merged +describe('Mobile render with multiple format params', () => { + const mwUrl = 'https://en.wikipedia.org' + + test('Test WikimediaMobile with en.wikipedia.org using format:nopic param', async () => { + const articleList = 'BMW' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nopic') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + + expect(imgElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:nodet param', async () => { + const articleList = 'BMW' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nodet') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const sectionsElements = Array.from(articleDoc.querySelectorAll('section')) + + expect(sectionsElements).toHaveLength(1) + expect(sectionsElements[0].getAttribute('data-mw-section-id')).toEqual('0') + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:novid param to check no video tags', async () => { + const articleList = 'Animation' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'novid') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const videoElements = Array.from(articleDoc.querySelectorAll('video')) + + expect(videoElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:novid param to check no audio tags', async () => { + const articleList = 'English_alphabet' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'novid') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const audioElements = Array.from(articleDoc.querySelectorAll('audio')) + + expect(audioElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + // TODO: secure nppdf format for all renders + test.skip('Test WikimediaMobile render with en.wikipedia.org using format:nopdf', async () => { + const articleList = 'PDF' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nopdf') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const anchorElements = Array.from(articleDoc.querySelectorAll('a')) + + anchorElements.forEach((anchor) => { + expect(anchor.href).not.toContain('.pdf') + }) + + rimraf.sync(`./${testId}`) + }) +}) diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts index 2f03f270..8cb5d81a 100644 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -47,6 +47,7 @@ const commonTreatmentTest = async (articleList: string, mwUrl: string) => { rimraf.sync(`./${testId}`) } +// TODO: articulate this test with /pull/1898 once merged describe('Mobile render zim file integrity', () => { const mwUrl = 'https://en.wikipedia.org' // TODO: some articles such as 'Canada' don't pass this test even with desktop renderer From b44bf8b46fb24910f7f6e85e3e5ac3d0e5c66487 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 15:20:15 +0300 Subject: [PATCH 21/58] Override image size for WikimediaMobile --- res/wm_mobile/wm_mobile_override_style.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/res/wm_mobile/wm_mobile_override_style.css b/res/wm_mobile/wm_mobile_override_style.css index c4d61088..3cbaef67 100644 --- a/res/wm_mobile/wm_mobile_override_style.css +++ b/res/wm_mobile/wm_mobile_override_style.css @@ -13,3 +13,8 @@ span.noviewer { .mw-body h3, .mw-body h2 { width: auto; } + +.thumbinner img.pcs-widen-image-override { + width: auto !important; + max-width: 100% !important; +} From c96a23726a7ee2001177738af0e819db579c4d45 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 16:12:49 +0300 Subject: [PATCH 22/58] Update placeholder in the test for nopdf param --- test/e2e/mobileRenderFormatParams.test.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/e2e/mobileRenderFormatParams.test.ts b/test/e2e/mobileRenderFormatParams.test.ts index c34d5367..826f2273 100644 --- a/test/e2e/mobileRenderFormatParams.test.ts +++ b/test/e2e/mobileRenderFormatParams.test.ts @@ -105,7 +105,6 @@ describe('Mobile render with multiple format params', () => { rimraf.sync(`./${testId}`) }) - // TODO: secure nppdf format for all renders test.skip('Test WikimediaMobile render with en.wikipedia.org using format:nopdf', async () => { const articleList = 'PDF' const now = new Date() @@ -117,8 +116,8 @@ describe('Mobile render with multiple format params', () => { const anchorElements = Array.from(articleDoc.querySelectorAll('a')) - anchorElements.forEach((anchor) => { - expect(anchor.href).not.toContain('.pdf') + anchorElements.forEach(() => { + // TODO: Check valid links to pdf source }) rimraf.sync(`./${testId}`) From 7ffc1e20cfb4fa0ab37f97a256a3f38dc3ac5f8d Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 16:50:13 +0300 Subject: [PATCH 23/58] Force article body to be centered for mobile render --- res/wm_mobile/wm_mobile_override_style.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/wm_mobile/wm_mobile_override_style.css b/res/wm_mobile/wm_mobile_override_style.css index 3cbaef67..ab31918b 100644 --- a/res/wm_mobile/wm_mobile_override_style.css +++ b/res/wm_mobile/wm_mobile_override_style.css @@ -1,5 +1,5 @@ body { - margin: 0 auto; + margin: 0 auto !important; } p#pcs-edit-section-add-title-description { display: none !important; From e5d82f8a60b0510df79386658a3e21dc549d5162 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 11 Sep 2023 17:07:45 +0300 Subject: [PATCH 24/58] Implement mobile renderer (partial impl) --- src/Downloader.ts | 2 + src/MediaWiki.ts | 15 ++++++ src/mwoffliner.lib.ts | 1 + src/renderers/renderer.builder.ts | 20 ++++++-- src/renderers/wikimedia-mobile.renderer.ts | 50 +++++++++++++++++++ src/util/builders/url/base.director.ts | 7 +++ src/util/builders/url/mobile.director.ts | 16 ++++++ src/util/saveArticles.ts | 16 +++--- test/unit/builders/url/base.director.test.ts | 14 ++++++ .../unit/builders/url/mobile.director.test.ts | 13 +++++ test/unit/downloader.test.ts | 1 + test/unit/renderers/renderer.builder.test.ts | 1 + test/unit/saveArticles.test.ts | 2 + test/unit/urlRewriting.test.ts | 1 + test/util.ts | 1 + 15 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 src/renderers/wikimedia-mobile.renderer.ts create mode 100644 src/util/builders/url/mobile.director.ts create mode 100644 test/unit/builders/url/mobile.director.test.ts diff --git a/src/Downloader.ts b/src/Downloader.ts index 8a8ca712..ac9d210b 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -173,12 +173,14 @@ class Downloader { this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) } else { switch (forceRender) { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 65ca1055..434ae49d 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -10,6 +10,7 @@ import basicURLDirector from './util/builders/url/basic.director.js' import BaseURLDirector from './util/builders/url/base.director.js' import ApiURLDirector from './util/builders/url/api.director.js' import DesktopURLDirector from './util/builders/url/desktop.director.js' +import MobileURLDirector from './util/builders/url/mobile.director.js' import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import { checkApiAvailability } from './util/mw-api.js' import { BLACKLISTED_NS } from './util/const.js' @@ -50,6 +51,7 @@ class MediaWiki { #domain: string private apiUrlDirector: ApiURLDirector private wikimediaDesktopUrlDirector: DesktopURLDirector + private wikimediaMobileUrlDirector: MobileURLDirector private visualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL @@ -58,8 +60,10 @@ class MediaWiki { public _modulePathOpt: string // only for whiting to generate modulePath public webUrl: URL public desktopRestApiUrl: URL + public mobileRestApiUrl: URL #hasWikimediaDesktopRestApi: boolean | null + #hasWikimediaMobileRestApi: boolean | null #hasVisualEditorApi: boolean | null #hasCoordinates: boolean | null @@ -120,6 +124,7 @@ class MediaWiki { } this.#hasWikimediaDesktopRestApi = null + this.#hasWikimediaMobileRestApi = null this.#hasVisualEditorApi = null this.#hasCoordinates = null } @@ -136,6 +141,14 @@ class MediaWiki { return this.#hasWikimediaDesktopRestApi } + public async hasWikimediaMobileRestApi(): Promise { + if (this.#hasWikimediaMobileRestApi === null) { + this.#hasWikimediaMobileRestApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaMobileRestApi + } + return this.#hasWikimediaMobileRestApi + } + public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) @@ -170,8 +183,10 @@ class MediaWiki { this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.#restApiPath) + this.mobileRestApiUrl = baseUrlDirector.buildMobileRestApiURL(this.#restApiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopRestApiUrl.href) + this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileRestApiUrl.href) this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 91e3a8f2..60700cb1 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -212,6 +212,7 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index dc6ed04e..48418461 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -2,6 +2,7 @@ import MediaWiki from './../MediaWiki.js' import { Renderer } from './abstract.renderer.js' import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' +import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' import * as logger from './../Logger.js' @@ -9,7 +10,11 @@ export class RendererBuilder { public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options - const [hasVisualEditorApi, hasWikimediaDesktopRestApi] = await Promise.all([MediaWiki.hasVisualEditorApi(), MediaWiki.hasWikimediaDesktopRestApi()]) + const [hasVisualEditorApi, hasWikimediaDesktopRestApi, hasWikimediaMobileRestApi] = await Promise.all([ + MediaWiki.hasVisualEditorApi(), + MediaWiki.hasWikimediaDesktopRestApi(), + MediaWiki.hasWikimediaMobileRestApi(), + ]) switch (renderType) { case 'desktop': @@ -23,7 +28,11 @@ export class RendererBuilder { process.exit(1) } case 'mobile': - // TODO: return WikimediaMobile renderer + if (hasWikimediaMobileRestApi) { + return new WikimediaMobileRenderer() + } + logger.error('No available mobile renderer.') + process.exit(1) break case 'auto': if (hasWikimediaDesktopRestApi) { @@ -51,8 +60,11 @@ export class RendererBuilder { logger.error('Cannot create an instance of VisualEditor renderer.') process.exit(1) case 'WikimediaMobile': - // TODO: return WikimediaMobile renderer - return + if (hasWikimediaMobileRestApi) { + return new WikimediaMobileRenderer() + } + logger.error('No available mobile renderer.') + process.exit(1) default: throw new Error(`Unknown renderName for specific mode: ${renderName}`) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts new file mode 100644 index 00000000..f7bca864 --- /dev/null +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -0,0 +1,50 @@ +import * as logger from '../Logger.js' +import { Renderer } from './abstract.renderer.js' +import { getStrippedTitleFromHtml } from '../util/misc.js' +import { RenderOpts, RenderOutput } from './abstract.renderer.js' + +// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' +export class WikimediaMobileRenderer extends Renderer { + constructor() { + super() + } + + private async retrieveHtml(renderOpts: RenderOpts): Promise { + const { data, articleId, articleDetail, isMainPage } = renderOpts + + const html = isMainPage ? data : super.injectH1TitleToHtml(data, articleDetail) + const strippedTitle = getStrippedTitleFromHtml(html) + const displayTitle = strippedTitle || articleId.replace('_', ' ') + + return { html, displayTitle } + } + + public async render(renderOpts: RenderOpts): Promise { + try { + const result: RenderOutput = [] + const { data, articleId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + const articleDetail = await renderOpts.articleDetailXId.get(articleId) + + const { html, displayTitle } = await this.retrieveHtml(renderOpts) + if (html) { + let dataWithHeader = '' + if (!isMainPage) { + dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) + } + const { finalHTML, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + + result.push({ + articleId, + displayTitle, + html: finalHTML, + mediaDependencies: null, + subtitles, + }) + return result + } + } catch (err) { + logger.error(err.message) + throw new Error(err.message) + } + } +} diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 3aa7ba3a..2f8f7949 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -28,6 +28,13 @@ export default class BaseURLDirector { .build(true, '/') } + buildMobileRestApiURL(path?: string) { + return urlBuilder + .setDomain(this.baseDomain) + .setPath(path ?? 'api/rest_v1/page/mobile-html') + .build(true, '/') + } + buildModuleURL(path?: string) { return urlBuilder .setDomain(this.baseDomain) diff --git a/src/util/builders/url/mobile.director.ts b/src/util/builders/url/mobile.director.ts new file mode 100644 index 00000000..258b389e --- /dev/null +++ b/src/util/builders/url/mobile.director.ts @@ -0,0 +1,16 @@ +import urlBuilder from './url.builder.js' + +/** + * Interface to build URLs based on MediaWiki mobile URL + */ +export default class MobileURLDirector { + baseDomain: string + + constructor(baseDomain: string) { + this.baseDomain = baseDomain + } + + buildArticleURL(articleId: string) { + return urlBuilder.setDomain(this.baseDomain).setPath(encodeURIComponent(articleId)).build() + } +} diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index cf60d2bb..bb092b35 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -206,7 +206,7 @@ async function saveArticle( filesToDownload[s.path] = { url: s.url, namespace: '-' } }) - if (mediaDependencies.length) { + if (mediaDependencies && mediaDependencies.length) { const existingVals = await RedisStore.filesToDownloadXPath.getMany(mediaDependencies.map((dep) => dep.path)) for (const dep of mediaDependencies) { @@ -260,21 +260,25 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade const rendererBuilder = new RendererBuilder() let rendererBuilderOptions: RendererBuilderOptions + + let mainPageRenderer + let articlesRenderer if (forceRender) { rendererBuilderOptions = { renderType: 'specific', renderName: forceRender, } + // All articles and main page will use the same renderer if 'forceRender' is specified + mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) } else { rendererBuilderOptions = { - renderType: 'auto', + renderType: 'desktop', } + mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) } - const mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - // TODO: article renderer will be switched to the mobile mode later - const articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - if (dump.customProcessor?.shouldKeepArticle) { await getAllArticlesToKeep(downloader, articleDetailXId, dump, mainPageRenderer, articlesRenderer) } diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 9282ff8c..5aadd9d4 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -25,6 +25,20 @@ describe('BaseURLDirector', () => { }) }) + describe('buildMobileRestApiURL', () => { + it('should return mobile rest URL with provided path and trailing char', () => { + const url = baseUrlDirector.buildMobileRestApiURL('api/rest_v2/page/mobile-html') + + expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') + }) + + it('should return mobile rest URL with default path and trailing char', () => { + const url = baseUrlDirector.buildMobileRestApiURL() + + expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') + }) + }) + describe('buildDesktopRestApiURL', () => { it('should return a desktop URL with provided path and trailing char', () => { const url = baseUrlDirector.buildDesktopRestApiURL('api/rest_v2/page/html') diff --git a/test/unit/builders/url/mobile.director.test.ts b/test/unit/builders/url/mobile.director.test.ts new file mode 100644 index 00000000..d5f94a78 --- /dev/null +++ b/test/unit/builders/url/mobile.director.test.ts @@ -0,0 +1,13 @@ +import MobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' + +describe('MobileURLDirector', () => { + const mobuleUrlDirector = new MobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') + + describe('buildArticleURL', () => { + it('should return a URL for retrieving mobile article', () => { + const url = mobuleUrlDirector.buildArticleURL('article-123') + + expect(url).toBe('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/article-123') + }) + }) +}) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 153666e0..e0dd8504 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -33,6 +33,7 @@ describe('Downloader class', () => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() }) diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index c071eff0..8c92d42c 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -82,6 +82,7 @@ describe('RendererBuilder', () => { const { downloader, MediaWiki } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index d1644e4b..d0b3ab23 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -22,6 +22,7 @@ describe('saveArticles', () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() const _articlesDetail = await downloader.getArticleDetailsIds(['London']) @@ -224,6 +225,7 @@ describe('saveArticles', () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 4e76d0c7..0f184007 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -141,6 +141,7 @@ describe('Styles', () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/util.ts b/test/util.ts index c4d6ebc4..2f01b581 100644 --- a/test/util.ts +++ b/test/util.ts @@ -38,6 +38,7 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() const dump = new Dump(format, {} as any, MediaWiki.metaData) From 6b9c7341441e84436ff7151772bf9f42b73961b6 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 13 Sep 2023 15:23:07 +0300 Subject: [PATCH 25/58] Setup renderers for main page and articles --- src/renderers/renderer.builder.ts | 3 ++- src/renderers/wikimedia-mobile.renderer.ts | 4 ++-- src/util/saveArticles.ts | 15 +++++++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index 48418461..47223f3c 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -33,13 +33,14 @@ export class RendererBuilder { } logger.error('No available mobile renderer.') process.exit(1) - break case 'auto': if (hasWikimediaDesktopRestApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { return new VisualEditorRenderer() + } else if (hasWikimediaMobileRestApi) { + return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') process.exit(1) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index f7bca864..ce3fc930 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -31,13 +31,13 @@ export class WikimediaMobileRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } - const { finalHTML, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) result.push({ articleId, displayTitle, html: finalHTML, - mediaDependencies: null, + mediaDependencies, subtitles, }) return result diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index bb092b35..d8b0c765 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -14,6 +14,7 @@ import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } f import urlHelper from './url.helper.js' import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' +import MediaWiki from '../../src/MediaWiki.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { await retryStore.flush() @@ -202,9 +203,11 @@ async function saveArticle( try { const filesToDownload: KVS = {} - subtitles.forEach((s) => { - filesToDownload[s.path] = { url: s.url, namespace: '-' } - }) + if (subtitles?.length > 0) { + subtitles.forEach((s) => { + filesToDownload[s.path] = { url: s.url, namespace: '-' } + }) + } if (mediaDependencies && mediaDependencies.length) { const existingVals = await RedisStore.filesToDownloadXPath.getMany(mediaDependencies.map((dep) => dep.path)) @@ -276,7 +279,11 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade renderType: 'desktop', } mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) + // If the mobile renderer API is not available, switch articles rendering to the auto mode instead + if (await MediaWiki.hasWikimediaMobileRestApi()) { + articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) + } + articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'auto' }) } if (dump.customProcessor?.shouldKeepArticle) { From 2ba20e0043f0db1074de3436e8e444273b195a0a Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 13 Sep 2023 16:53:22 +0300 Subject: [PATCH 26/58] Fix regressions in saveArticles --- src/util/saveArticles.ts | 64 +++++++--------------------------------- 1 file changed, 11 insertions(+), 53 deletions(-) diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index d8b0c765..e43d0fb6 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -130,33 +130,13 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK await articleDetailXId.iterateItems(downloader.speed, async (articleKeyValuePairs) => { for (const [articleId, articleDetail] of Object.entries(articleKeyValuePairs)) { const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) + let rets: any try { const articleUrl = getArticleUrl(downloader, dump, articleId) - let rets: any - if (dump.isMainPage) { - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - mainPageRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - } - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - articlesRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) + const isMainPage = dump.isMainPage(articleId) + const renderer = isMainPage ? mainPageRenderer : articlesRenderer + + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, html } of rets) { if (!html) { continue @@ -280,10 +260,8 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade } mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) // If the mobile renderer API is not available, switch articles rendering to the auto mode instead - if (await MediaWiki.hasWikimediaMobileRestApi()) { - articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'mobile' }) - } - articlesRenderer = await rendererBuilder.createRenderer({ ...rendererBuilderOptions, renderType: 'auto' }) + rendererBuilderOptions.renderType = (await MediaWiki.hasWikimediaMobileRestApi()) ? 'mobile' : 'auto' + articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) } if (dump.customProcessor?.shouldKeepArticle) { @@ -321,30 +299,10 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let rets: any try { const articleUrl = getArticleUrl(downloader, dump, articleId) - if (dump.isMainPage) { - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - mainPageRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - } - rets = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - articlesRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) + const isMainPage = dump.isMainPage(articleId) + const renderer = isMainPage ? mainPageRenderer : articlesRenderer + + rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, subtitles } of rets) { if (!finalHTML) { From 10e82bbc83d2a412b7aa7d740d69eb3ee8abb52c Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 13 Sep 2023 19:47:36 +0300 Subject: [PATCH 27/58] Pass hasWikimediaMobileRestApi to saveArticles as argument to prevent issue with no found module --- src/mwoffliner.lib.ts | 4 ++-- src/util/saveArticles.ts | 5 ++--- test/unit/saveArticles.test.ts | 2 ++ test/unit/treatments/article.treatment.test.ts | 1 + test/unit/urlRewriting.test.ts | 1 + 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 60700cb1..004afd40 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -212,7 +212,7 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + const hasWikimediaMobileRestApi = await MediaWiki.hasWikimediaMobileRestApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) @@ -421,7 +421,7 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, forceRender) + const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileRestApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index e43d0fb6..224c80fc 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -14,7 +14,6 @@ import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } f import urlHelper from './url.helper.js' import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' -import MediaWiki from '../../src/MediaWiki.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { await retryStore.flush() @@ -232,7 +231,7 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str /* * Fetch Articles */ -export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, forceRender = null) { +export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileRestApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() let jsConfigVars = '' @@ -260,7 +259,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade } mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) // If the mobile renderer API is not available, switch articles rendering to the auto mode instead - rendererBuilderOptions.renderType = (await MediaWiki.hasWikimediaMobileRestApi()) ? 'mobile' : 'auto' + rendererBuilderOptions.renderType = hasWikimediaMobileRestApi ? 'mobile' : 'auto' articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) } diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index d0b3ab23..8d1a2e3a 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -45,6 +45,7 @@ describe('saveArticles', () => { } as any, downloader, dump, + true, ) // Successfully scrapped existent articles @@ -273,6 +274,7 @@ describe('saveArticles', () => { } as any, downloader, dump, + true, ) const ParisDocument = domino.createDocument(writtenArticles.Paris.bufferData) diff --git a/test/unit/treatments/article.treatment.test.ts b/test/unit/treatments/article.treatment.test.ts index ab9a129b..cbe4d150 100644 --- a/test/unit/treatments/article.treatment.test.ts +++ b/test/unit/treatments/article.treatment.test.ts @@ -56,6 +56,7 @@ describe('ArticleTreatment', () => { } as any, downloader, dump, + true, ) // Successfully scrapped existent articles diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 0f184007..49001797 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -160,6 +160,7 @@ describe('Styles', () => { } as any, downloader, dump, + true, ) const html = LondonArticle.bufferData.toString() From 9829d7cd75629eccb8ccab6470db415892035ea0 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 14 Sep 2023 17:23:45 +0300 Subject: [PATCH 28/58] Add treatSections method for mobile render (partial impl) --- src/renderers/wikimedia-mobile.renderer.ts | 151 +++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index ce3fc930..776aebae 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,7 +1,9 @@ +import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' +import { categoriesTemplate, leadSectionTemplate, sectionTemplate, subCategoriesTemplate, subPagesTemplate, subSectionTemplate } from '../Templates.js' // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' export class WikimediaMobileRenderer extends Renderer { @@ -31,6 +33,7 @@ export class WikimediaMobileRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } + // TODO: do mobile page transformations before applying other treatments const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) result.push({ @@ -47,4 +50,152 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } + + // TODO: work in progress + private treatSections(data: any, dump: Dump, articleId: string, displayTitle: string, articleDetail: ArticleDetail): string { + const doc = domino.createDocument(data) + + let html = '' + + // set the first section (open by default) + html += leadSectionTemplate({ + lead_display_title: displayTitle, + lead_section_text: doc.querySelector('section[data-mw-section-id="0"]').innerHTML, + strings: dump.strings, + }) + + // Get only top remain setions except first one + // Calculate toclevel to handle nesting. To do so you need to traverse DOM recursevely + + // set all other section (closed by default) + if (!dump.nodet && json.remaining.sections.length > 0) { + const firstTocLevel = json.remaining.sections[0].toclevel + json.remaining.sections.forEach((oneSection: any, i: number) => { + if (oneSection.toclevel === firstTocLevel) { + html = html.replace(`__SUB_LEVEL_SECTION_${i}__`, '') // remove unused anchor for subsection + html += sectionTemplate({ + section_index: i + 1, + section_id: oneSection.id, // Get from data-mw-section-id attribute + section_anchor: oneSection.anchor, // Anchor of the heading, id attr of the heading in html + section_line: oneSection.line, // this is the textContent() from the title. Check mobileapps for reference (checked) + section_text: oneSection.text, // this is the innerHTML of the section, refer to mobileapps + strings: dump.strings, // TODO: investigate + }) + } else { + html = html.replace( + `__SUB_LEVEL_SECTION_${i}__`, + subSectionTemplate({ + section_index: i + 1, + section_toclevel: oneSection.toclevel + 1, + section_id: oneSection.id, + section_anchor: oneSection.anchor, + section_line: oneSection.line, + section_text: oneSection.text, + strings: dump.strings, + }), + ) + } + }) + } + + // For section index + /** + * Iterate over parent and nested sections separately and set section_index. For parent = 1, and nested 2..n respectively + */ + + // For id + /** + * const sectionNumberString = sectionElement && sectionElement.getAttribute('data-mw-section-id'); + return sectionNumberString ? parseInt(sectionNumberString, 10) : undefined; + */ + + // For line + /** + * node.innerHTML.trim() + */ + + // For section text + /** + if (node.nodeType === NodeType.TEXT_NODE) { + currentSection.text += node.textContent; + } else { + currentSection.text += node.outerHTML; + } + */ + + // For anchor + /** + * node.getAttribute('id'); + */ + + const articleResourceNamespace = 'A' + const categoryResourceNamespace = 'U' + const slashesInUrl = articleId.split('/').length - 1 + const upStr = '../'.repeat(slashesInUrl + 1) + if (articleDetail.subCategories && articleDetail.subCategories.length) { + const subCategories = articleDetail.subCategories.map((category) => { + return { + name: category.title.split(':').slice(1).join(':'), + url: `${upStr}${categoryResourceNamespace}/${category.title}`, + } + }) + + const groups = this.groupAlphabetical(subCategories) + + html += subCategoriesTemplate({ + strings: dump.strings, + groups, + prevArticleUrl: articleDetail.prevArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.prevArticleId}` : null, + nextArticleUrl: articleDetail.nextArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.nextArticleId}` : null, + }) + } + + if (articleDetail.pages && articleDetail.pages.length) { + const pages = articleDetail.pages.map((page) => { + return { + name: page.title, + url: `${upStr}${articleResourceNamespace}/${page.title}`, + } + }) + + const groups = this.groupAlphabetical(pages) + + html += subPagesTemplate({ + strings: dump.strings, + groups, + }) + } + + if (articleDetail.categories && articleDetail.categories.length) { + const categories = articleDetail.categories.map((category) => { + return { + name: category.title.split(':').slice(1).join(':'), + url: `${upStr}${categoryResourceNamespace}/${category.title}`, + } + }) + html += categoriesTemplate({ + strings: dump.strings, + categories, + }) + } + html = html.replace(`__SUB_LEVEL_SECTION_${json.remaining.sections.length}__`, '') // remove the last subcestion anchor (all other anchor are removed in the forEach) + return html + } + + private groupAlphabetical(items: PageRef[]) { + const groupsAlphabetical = items.reduce((acc: any, item) => { + const groupId = item.name[0].toLocaleUpperCase() + acc[groupId] = (acc[groupId] || []).concat(item) + return acc + }, {}) + + return Object.keys(groupsAlphabetical) + .sort() + .map((letter) => { + return { + title: letter, + items: groupsAlphabetical[letter], + } + }) + } } From 96f38ab73b777bd1ae6f66f2ac2422d5e8e26696 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 15 Sep 2023 16:54:38 +0300 Subject: [PATCH 29/58] Pass explicit output of mobile-html to the zim creator --- src/Downloader.ts | 2 +- src/renderers/wikimedia-mobile.renderer.ts | 180 ++------------------- 2 files changed, 13 insertions(+), 169 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index ac9d210b..5c77ffa9 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -171,9 +171,9 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ + { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, ]) //* Objects order in array matters! diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 776aebae..9089cab8 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,45 +1,37 @@ -import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' -import { categoriesTemplate, leadSectionTemplate, sectionTemplate, subCategoriesTemplate, subPagesTemplate, subSectionTemplate } from '../Templates.js' -// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' +// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { constructor() { super() } - private async retrieveHtml(renderOpts: RenderOpts): Promise { - const { data, articleId, articleDetail, isMainPage } = renderOpts + private getStrippedTitle(renderOpts: RenderOpts): string { + const { data, articleId } = renderOpts - const html = isMainPage ? data : super.injectH1TitleToHtml(data, articleDetail) - const strippedTitle = getStrippedTitleFromHtml(html) - const displayTitle = strippedTitle || articleId.replace('_', ' ') - - return { html, displayTitle } + const strippedTitle = getStrippedTitleFromHtml(data) + return strippedTitle || articleId.replace('_', ' ') } public async render(renderOpts: RenderOpts): Promise { try { const result: RenderOutput = [] - const { data, articleId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + const { data, articleId, webp, _moduleDependencies, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) - const { html, displayTitle } = await this.retrieveHtml(renderOpts) - if (html) { - let dataWithHeader = '' - if (!isMainPage) { - dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) - } - // TODO: do mobile page transformations before applying other treatments - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const displayTitle = this.getStrippedTitle(renderOpts) + if (data) { + // TODO: Apply mobile page transformations before applying other treatments + const { subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) + // TODO: styles, scripts and most of content are not visible in Kiwix app, but enabled when use Kiwix server result.push({ articleId, displayTitle, - html: finalHTML, + html: data, mediaDependencies, subtitles, }) @@ -50,152 +42,4 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } - - // TODO: work in progress - private treatSections(data: any, dump: Dump, articleId: string, displayTitle: string, articleDetail: ArticleDetail): string { - const doc = domino.createDocument(data) - - let html = '' - - // set the first section (open by default) - html += leadSectionTemplate({ - lead_display_title: displayTitle, - lead_section_text: doc.querySelector('section[data-mw-section-id="0"]').innerHTML, - strings: dump.strings, - }) - - // Get only top remain setions except first one - // Calculate toclevel to handle nesting. To do so you need to traverse DOM recursevely - - // set all other section (closed by default) - if (!dump.nodet && json.remaining.sections.length > 0) { - const firstTocLevel = json.remaining.sections[0].toclevel - json.remaining.sections.forEach((oneSection: any, i: number) => { - if (oneSection.toclevel === firstTocLevel) { - html = html.replace(`__SUB_LEVEL_SECTION_${i}__`, '') // remove unused anchor for subsection - html += sectionTemplate({ - section_index: i + 1, - section_id: oneSection.id, // Get from data-mw-section-id attribute - section_anchor: oneSection.anchor, // Anchor of the heading, id attr of the heading in html - section_line: oneSection.line, // this is the textContent() from the title. Check mobileapps for reference (checked) - section_text: oneSection.text, // this is the innerHTML of the section, refer to mobileapps - strings: dump.strings, // TODO: investigate - }) - } else { - html = html.replace( - `__SUB_LEVEL_SECTION_${i}__`, - subSectionTemplate({ - section_index: i + 1, - section_toclevel: oneSection.toclevel + 1, - section_id: oneSection.id, - section_anchor: oneSection.anchor, - section_line: oneSection.line, - section_text: oneSection.text, - strings: dump.strings, - }), - ) - } - }) - } - - // For section index - /** - * Iterate over parent and nested sections separately and set section_index. For parent = 1, and nested 2..n respectively - */ - - // For id - /** - * const sectionNumberString = sectionElement && sectionElement.getAttribute('data-mw-section-id'); - return sectionNumberString ? parseInt(sectionNumberString, 10) : undefined; - */ - - // For line - /** - * node.innerHTML.trim() - */ - - // For section text - /** - if (node.nodeType === NodeType.TEXT_NODE) { - currentSection.text += node.textContent; - } else { - currentSection.text += node.outerHTML; - } - */ - - // For anchor - /** - * node.getAttribute('id'); - */ - - const articleResourceNamespace = 'A' - const categoryResourceNamespace = 'U' - const slashesInUrl = articleId.split('/').length - 1 - const upStr = '../'.repeat(slashesInUrl + 1) - if (articleDetail.subCategories && articleDetail.subCategories.length) { - const subCategories = articleDetail.subCategories.map((category) => { - return { - name: category.title.split(':').slice(1).join(':'), - url: `${upStr}${categoryResourceNamespace}/${category.title}`, - } - }) - - const groups = this.groupAlphabetical(subCategories) - - html += subCategoriesTemplate({ - strings: dump.strings, - groups, - prevArticleUrl: articleDetail.prevArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.prevArticleId}` : null, - nextArticleUrl: articleDetail.nextArticleId ? `${upStr}${categoryResourceNamespace}/${articleDetail.nextArticleId}` : null, - }) - } - - if (articleDetail.pages && articleDetail.pages.length) { - const pages = articleDetail.pages.map((page) => { - return { - name: page.title, - url: `${upStr}${articleResourceNamespace}/${page.title}`, - } - }) - - const groups = this.groupAlphabetical(pages) - - html += subPagesTemplate({ - strings: dump.strings, - groups, - }) - } - - if (articleDetail.categories && articleDetail.categories.length) { - const categories = articleDetail.categories.map((category) => { - return { - name: category.title.split(':').slice(1).join(':'), - url: `${upStr}${categoryResourceNamespace}/${category.title}`, - } - }) - html += categoriesTemplate({ - strings: dump.strings, - categories, - }) - } - html = html.replace(`__SUB_LEVEL_SECTION_${json.remaining.sections.length}__`, '') // remove the last subcestion anchor (all other anchor are removed in the forEach) - return html - } - - private groupAlphabetical(items: PageRef[]) { - const groupsAlphabetical = items.reduce((acc: any, item) => { - const groupId = item.name[0].toLocaleUpperCase() - acc[groupId] = (acc[groupId] || []).concat(item) - return acc - }, {}) - - return Object.keys(groupsAlphabetical) - .sort() - .map((letter) => { - return { - title: letter, - items: groupsAlphabetical[letter], - } - }) - } } From d92008cbe2e4c268914db24e5743c77d3478b566 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 18 Sep 2023 17:50:58 +0300 Subject: [PATCH 30/58] Apply mobile css and js in mobile renderer --- src/renderers/wikimedia-mobile.renderer.ts | 37 +++++++++++++++++++--- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 9089cab8..7666c436 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,3 +1,4 @@ +import * as domino from 'domino' import * as logger from '../Logger.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' @@ -24,14 +25,14 @@ export class WikimediaMobileRenderer extends Renderer { const displayTitle = this.getStrippedTitle(renderOpts) if (data) { - // TODO: Apply mobile page transformations before applying other treatments - const { subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) - - // TODO: styles, scripts and most of content are not visible in Kiwix app, but enabled when use Kiwix server + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) + // TODO: Add mobile scripts after all treatments but this need to be refactored + // TODO: enable reference list + const dataWithMobileModules = this.addMobileModules(finalHTML) result.push({ articleId, displayTitle, - html: data, + html: dataWithMobileModules, mediaDependencies, subtitles, }) @@ -42,4 +43,30 @@ export class WikimediaMobileRenderer extends Renderer { throw new Error(err.message) } } + + private addMobileModules(data) { + const doc = domino.createDocument(data) + const protocol = 'https://' + // TODO: query this instead of hardcoding. + const offlineResourcesCSSList = [ + 'meta.wikimedia.org/api/rest_v1/data/css/mobile/base', + 'meta.wikimedia.org/api/rest_v1/data/css/mobile/pcs', + 'en.wikipedia.org/api/rest_v1/data/css/mobile/site', + ] + const offlineResourcesJSList = ['meta.wikimedia.org/api/rest_v1/data/javascript/mobile/pcs'] + + offlineResourcesCSSList.forEach((cssUrl) => { + const linkEl = doc.createElement('link') as DominoElement + Object.assign(linkEl, { rel: 'stylesheet', href: `${protocol}${cssUrl}` }) + doc.head.appendChild(linkEl) + }) + + offlineResourcesJSList.forEach((jsUrl) => { + const scriptEl = doc.createElement('script') as DominoElement + scriptEl.setAttribute('src', `${protocol}${jsUrl}`) + doc.head.appendChild(scriptEl) + }) + + return doc.documentElement.outerHTML + } } From 19f4acc7eb70019d0e5deb645fb575ca5a21aaea Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 19 Sep 2023 16:41:11 +0300 Subject: [PATCH 31/58] Adapt reference links and minor treatments --- src/renderers/wikimedia-mobile.renderer.ts | 135 +++++++++++++++++++-- 1 file changed, 128 insertions(+), 7 deletions(-) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 7666c436..004013e4 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -4,6 +4,8 @@ import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' +type PipeFunction = (data: string) => string + // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { constructor() { @@ -26,13 +28,22 @@ export class WikimediaMobileRenderer extends Renderer { const displayTitle = this.getStrippedTitle(renderOpts) if (data) { const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) - // TODO: Add mobile scripts after all treatments but this need to be refactored - // TODO: enable reference list - const dataWithMobileModules = this.addMobileModules(finalHTML) + const finalHTMLDoc = domino.createDocument(finalHTML) + const mobileHTML = this.pipeMobileTransformations( + finalHTMLDoc, + this.addMobileModules, + this.convertLazyLoadToImages, + this.removeEditContainer, + this.removeHiddenClass, + this.restoreLinkDefaults, + this.disableClientLinkListener, + this.overrideMobileStyles, + ) + result.push({ articleId, displayTitle, - html: dataWithMobileModules, + html: mobileHTML.documentElement.outerHTML, mediaDependencies, subtitles, }) @@ -44,8 +55,11 @@ export class WikimediaMobileRenderer extends Renderer { } } - private addMobileModules(data) { - const doc = domino.createDocument(data) + private pipeMobileTransformations(value, ...fns: PipeFunction[]) { + return fns.reduce((acc, fn) => fn(acc), value) + } + + private addMobileModules(doc: DominoElement) { const protocol = 'https://' // TODO: query this instead of hardcoding. const offlineResourcesCSSList = [ @@ -67,6 +81,113 @@ export class WikimediaMobileRenderer extends Renderer { doc.head.appendChild(scriptEl) }) - return doc.documentElement.outerHTML + return doc + } + + private removeEditContainer(doc: DominoElement) { + const editContainers = doc.querySelectorAll('.pcs-edit-section-link-container') + + editContainers.forEach((elem: DominoElement) => { + elem.remove() + }) + + return doc + } + + private convertLazyLoadToImages(doc: DominoElement) { + const protocol = 'https://' + const spans = doc.querySelectorAll('.pcs-lazy-load-placeholder') + + spans.forEach((span: DominoElement) => { + // Create a new img element + const img = doc.createElement('img') as DominoElement + + // Set the attributes for the img element based on the data attributes in the span + img.src = protocol + span.getAttribute('data-src') + img.setAttribute('decoding', 'async') + img.setAttribute('data-file-width', span.getAttribute('data-data-file-width')) + img.setAttribute('data-file-height', span.getAttribute('data-data-file-height')) + img.setAttribute('data-file-type', 'bitmap') + img.width = span.getAttribute('data-width') + img.height = span.getAttribute('data-height') + img.setAttribute('srcset', `${protocol}${span.getAttribute('data-srcset')}`) + img.className = span.getAttribute('data-class') + + // Replace the span with the img element + span.parentNode.replaceChild(img, span) + }) + + return doc + } + + private removeHiddenClass(doc: DominoElement) { + const pcsSectionHidden = 'pcs-section-hidden' + const hiddenSections = doc.querySelectorAll(`.${pcsSectionHidden}`) + hiddenSections.forEach((section) => { + section.classList.remove(pcsSectionHidden) + }) + return doc + } + + private restoreLinkDefaults(doc: DominoElement) { + const supElements = doc.querySelectorAll('sup') + + Array.from(supElements).forEach((sup: DominoElement) => { + const anchor = doc.createElement('a') + const mwRefLinkTextElement = sup.querySelector('.mw-reflink-text') as DominoElement + + let mwRefLinkText = '' + if (mwRefLinkTextElement) { + mwRefLinkText = mwRefLinkTextElement.textContent || '' + } + + const existedAnchor = sup.querySelector('.reference-link') + + if (existedAnchor?.getAttribute('href')) { + anchor.setAttribute('href', existedAnchor.getAttribute('href')) + } + anchor.className = 'reference-link' + anchor.textContent = mwRefLinkText + + sup.innerHTML = '' + sup.appendChild(anchor) + }) + + return doc + } + + private disableClientLinkListener(doc: DominoElement) { + const scriptEl = doc.createElement('script') + scriptEl.type = 'text/javascript' + scriptEl.text = ` + document.addEventListener("DOMContentLoaded", function() { + const supElements = document.querySelectorAll('sup'); + const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); + const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + }); + ` + doc.head.appendChild(scriptEl) + + return doc + } + + private overrideMobileStyles(doc: DominoElement) { + const styleEl = doc.createElement('style') + styleEl.innerHTML = ` + body { + margin: 0 auto; + } + .reference-link::after { + content: none !important; + } + ` + doc.head.appendChild(styleEl) + + return doc } } From 3dec6079fff9640ef485b48831620302a051135e Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 22 Sep 2023 08:56:09 +0300 Subject: [PATCH 32/58] Refactor tests (partial impl) --- src/Downloader.ts | 14 +++---- src/MediaWiki.ts | 40 ++++++++++---------- src/mwoffliner.lib.ts | 6 +-- src/renderers/renderer.builder.ts | 18 ++++----- src/renderers/wikimedia-mobile.renderer.ts | 3 ++ src/util/builders/url/base.director.ts | 6 +-- src/util/saveArticles.ts | 27 ++++++------- test/e2e/articleLists.test.ts | 1 + test/e2e/bm.e2e.test.ts | 1 + test/e2e/downloadImage.e2e.test.ts | 1 + test/e2e/en10.e2e.test.ts | 1 + test/e2e/extra.e2e.test.ts | 1 + test/e2e/multimediaContent.test.ts | 1 + test/e2e/treatMedia.e2e.test.ts | 1 + test/e2e/vikidia.e2e.test.ts | 1 + test/e2e/wikisource.e2e.test.ts | 1 + test/e2e/zimMetadata.e2e.test.ts | 1 + test/unit/builders/url/base.director.test.ts | 18 ++++----- test/unit/downloader.test.ts | 4 +- test/unit/renderers/renderer.builder.test.ts | 6 +-- test/unit/saveArticles.test.ts | 14 ++++--- test/unit/urlRewriting.test.ts | 6 +-- test/unit/webpAndRedirection.test.ts | 1 + test/util.ts | 4 +- 24 files changed, 94 insertions(+), 83 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index 5c77ffa9..12e48210 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -171,23 +171,23 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, - { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileRestApi(), value: MediaWiki.mobileRestApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, ]) } else { switch (forceRender) { case 'WikimediaDesktop': - if (MediaWiki.hasWikimediaDesktopRestApi()) { - this.baseUrl = MediaWiki.desktopRestApiUrl.href - this.baseUrlForMainPage = MediaWiki.desktopRestApiUrl.href + if (MediaWiki.hasWikimediaDesktopApi()) { + this.baseUrl = MediaWiki.desktopApiUrl.href + this.baseUrlForMainPage = MediaWiki.desktopApiUrl.href break } break diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 434ae49d..dd26d16e 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -59,11 +59,11 @@ class MediaWiki { public modulePath: string // only for reading public _modulePathOpt: string // only for whiting to generate modulePath public webUrl: URL - public desktopRestApiUrl: URL - public mobileRestApiUrl: URL + public desktopApiUrl: URL + public mobileApiUrl: URL - #hasWikimediaDesktopRestApi: boolean | null - #hasWikimediaMobileRestApi: boolean | null + #hasWikimediaDesktopApi: boolean | null + #hasWikimediaMobileApi: boolean | null #hasVisualEditorApi: boolean | null #hasCoordinates: boolean | null @@ -123,8 +123,8 @@ class MediaWiki { formatversion: '2', } - this.#hasWikimediaDesktopRestApi = null - this.#hasWikimediaMobileRestApi = null + this.#hasWikimediaDesktopApi = null + this.#hasWikimediaMobileApi = null this.#hasVisualEditorApi = null this.#hasCoordinates = null } @@ -133,20 +133,20 @@ class MediaWiki { this.initializeMediaWikiDefaults() } - public async hasWikimediaDesktopRestApi(): Promise { - if (this.#hasWikimediaDesktopRestApi === null) { - this.#hasWikimediaDesktopRestApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) - return this.#hasWikimediaDesktopRestApi + public async hasWikimediaDesktopApi(): Promise { + if (this.#hasWikimediaDesktopApi === null) { + this.#hasWikimediaDesktopApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaDesktopApi } - return this.#hasWikimediaDesktopRestApi + return this.#hasWikimediaDesktopApi } - public async hasWikimediaMobileRestApi(): Promise { - if (this.#hasWikimediaMobileRestApi === null) { - this.#hasWikimediaMobileRestApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) - return this.#hasWikimediaMobileRestApi + public async hasWikimediaMobileApi(): Promise { + if (this.#hasWikimediaMobileApi === null) { + this.#hasWikimediaMobileApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) + return this.#hasWikimediaMobileApi } - return this.#hasWikimediaMobileRestApi + return this.#hasWikimediaMobileApi } public async hasVisualEditorApi(): Promise { @@ -182,11 +182,11 @@ class MediaWiki { this.apiUrl = baseUrlDirector.buildURL(this.#apiPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.#restApiPath) - this.mobileRestApiUrl = baseUrlDirector.buildMobileRestApiURL(this.#restApiPath) + this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#restApiPath) + this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#restApiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) - this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopRestApiUrl.href) - this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileRestApiUrl.href) + this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) + this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 004afd40..04c2bb99 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -211,8 +211,8 @@ async function execute(argv: any) { MediaWiki.apiCheckArticleId = mwMetaData.mainPage await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - const hasWikimediaMobileRestApi = await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls(forceRender) @@ -421,7 +421,7 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileRestApi, forceRender) + const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index 47223f3c..cffb1cab 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -10,15 +10,15 @@ export class RendererBuilder { public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options - const [hasVisualEditorApi, hasWikimediaDesktopRestApi, hasWikimediaMobileRestApi] = await Promise.all([ + const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ MediaWiki.hasVisualEditorApi(), - MediaWiki.hasWikimediaDesktopRestApi(), - MediaWiki.hasWikimediaMobileRestApi(), + MediaWiki.hasWikimediaDesktopApi(), + MediaWiki.hasWikimediaMobileApi(), ]) switch (renderType) { case 'desktop': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { @@ -28,18 +28,18 @@ export class RendererBuilder { process.exit(1) } case 'mobile': - if (hasWikimediaMobileRestApi) { + if (hasWikimediaMobileApi) { return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') process.exit(1) case 'auto': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { // Choose WikimediaDesktopRenderer if it's present, regardless of hasVisualEditorApi value return new WikimediaDesktopRenderer() } else if (hasVisualEditorApi) { return new VisualEditorRenderer() - } else if (hasWikimediaMobileRestApi) { + } else if (hasWikimediaMobileApi) { return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') @@ -49,7 +49,7 @@ export class RendererBuilder { // renderName argument is required for 'specific' mode switch (renderName) { case 'WikimediaDesktop': - if (hasWikimediaDesktopRestApi) { + if (hasWikimediaDesktopApi) { return new WikimediaDesktopRenderer() } logger.error('Cannot create an instance of WikimediaDesktop renderer.') @@ -61,7 +61,7 @@ export class RendererBuilder { logger.error('Cannot create an instance of VisualEditor renderer.') process.exit(1) case 'WikimediaMobile': - if (hasWikimediaMobileRestApi) { + if (hasWikimediaMobileApi) { return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 004013e4..7a749618 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -185,6 +185,9 @@ export class WikimediaMobileRenderer extends Renderer { .reference-link::after { content: none !important; } + .mw-body h3, .mw-body h2 { + width: auto; + } ` doc.head.appendChild(styleEl) diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 2f8f7949..922d2b9e 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -14,21 +14,21 @@ export default class BaseURLDirector { return urlBuilder.setDomain(this.baseDomain).setPath(path).build(true) } - buildRestApiURL(path?: string) { + buildWikimediaApiURL(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1') .build(true, '/') } - buildDesktopRestApiURL(path?: string) { + buildDesktopApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/html') .build(true, '/') } - buildMobileRestApiURL(path?: string) { + buildMobileApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/mobile-html') diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 224c80fc..a9c45b57 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -12,7 +12,7 @@ import { config } from '../config.js' import { getSizeFromUrl, cleanupAxiosError } from './misc.js' import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } from './const.js' import urlHelper from './url.helper.js' -import { RendererBuilderOptions, Renderer } from '../renderers/abstract.renderer.js' +import { Renderer } from '../renderers/abstract.renderer.js' import { RendererBuilder } from '../renderers/renderer.builder.js' export async function downloadFiles(fileStore: RKVS, retryStore: RKVS, zimCreator: ZimCreator, dump: Dump, downloader: Downloader, retryCounter = 0) { @@ -231,7 +231,7 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str /* * Fetch Articles */ -export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileRestApi: boolean, forceRender = null) { +export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() let jsConfigVars = '' @@ -241,26 +241,21 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade const rendererBuilder = new RendererBuilder() - let rendererBuilderOptions: RendererBuilderOptions - let mainPageRenderer let articlesRenderer if (forceRender) { - rendererBuilderOptions = { + // All articles and main page will use the same renderer if 'forceRender' is specified + const renderer = await rendererBuilder.createRenderer({ renderType: 'specific', renderName: forceRender, - } - // All articles and main page will use the same renderer if 'forceRender' is specified - mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + }) + mainPageRenderer = renderer + articlesRenderer = renderer } else { - rendererBuilderOptions = { - renderType: 'desktop', - } - mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) - // If the mobile renderer API is not available, switch articles rendering to the auto mode instead - rendererBuilderOptions.renderType = hasWikimediaMobileRestApi ? 'mobile' : 'auto' - articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions) + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) + articlesRenderer = await rendererBuilder.createRenderer({ + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }) } if (dump.customProcessor?.shouldKeepArticle) { diff --git a/test/e2e/articleLists.test.ts b/test/e2e/articleLists.test.ts index 84547681..ad85cc1c 100644 --- a/test/e2e/articleLists.test.ts +++ b/test/e2e/articleLists.test.ts @@ -22,6 +22,7 @@ describe('articleList', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', } test('articleList and articleListIgnore check', async () => { diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index e957a333..e1198ec8 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -18,6 +18,7 @@ describe('bm', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', } test('Simple articleList', async () => { diff --git a/test/e2e/downloadImage.e2e.test.ts b/test/e2e/downloadImage.e2e.test.ts index cc3c9078..774d67b0 100644 --- a/test/e2e/downloadImage.e2e.test.ts +++ b/test/e2e/downloadImage.e2e.test.ts @@ -20,6 +20,7 @@ describeIf('Check image downloading from S3 using optimisationCacheUrl parameter articleList: 'Paris', format: ['nodet'], optimisationCacheUrl: process.env.S3_URL, + forceRender: 'WikimediaDesktop', } test('right scrapping from fr.wikipedia.org with optimisationCacheUrl parameter', async () => { diff --git a/test/e2e/en10.e2e.test.ts b/test/e2e/en10.e2e.test.ts index 543fe901..f62e6677 100644 --- a/test/e2e/en10.e2e.test.ts +++ b/test/e2e/en10.e2e.test.ts @@ -21,6 +21,7 @@ describe('en10', () => { redis: process.env.REDIS, // format: ['nopic', 'novid', 'nopdf', 'nodet'], format: ['nopic', 'nopdf'], + forceRender: 'WikimediaDesktop', } test('Simple articleList', async () => { diff --git a/test/e2e/extra.e2e.test.ts b/test/e2e/extra.e2e.test.ts index 78562f80..6ab70a06 100644 --- a/test/e2e/extra.e2e.test.ts +++ b/test/e2e/extra.e2e.test.ts @@ -36,6 +36,7 @@ AC/DC` outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], + forceRender: 'WikimediaDesktop', }) // Created 1 outputs diff --git a/test/e2e/multimediaContent.test.ts b/test/e2e/multimediaContent.test.ts index 511a3280..f16d5808 100644 --- a/test/e2e/multimediaContent.test.ts +++ b/test/e2e/multimediaContent.test.ts @@ -18,6 +18,7 @@ describe('Multimedia', () => { outputDirectory: testId, redis: process.env.REDIS, customZimDescription: 'Example of the description', + forceRender: 'WikimediaDesktop', } test('check multimedia content from wikipedia test page', async () => { diff --git a/test/e2e/treatMedia.e2e.test.ts b/test/e2e/treatMedia.e2e.test.ts index 22e045ec..f5ac7a13 100644 --- a/test/e2e/treatMedia.e2e.test.ts +++ b/test/e2e/treatMedia.e2e.test.ts @@ -18,6 +18,7 @@ describe('treatment test', () => { articleList, outputDirectory: testId, redis: process.env.REDIS, + forcdRender: 'WikimediaDesktop', } test('media file from hidden element should not be downloaded', async () => { diff --git a/test/e2e/vikidia.e2e.test.ts b/test/e2e/vikidia.e2e.test.ts index 694d15f3..30a7f684 100644 --- a/test/e2e/vikidia.e2e.test.ts +++ b/test/e2e/vikidia.e2e.test.ts @@ -18,6 +18,7 @@ describe('vikidia', () => { redis: process.env.REDIS, articleList: 'Alaska', customZimDescription: 'Alaska article', + forcdRender: 'WikimediaDesktop', } test('right scrapping from vikidia.org', async () => { diff --git a/test/e2e/wikisource.e2e.test.ts b/test/e2e/wikisource.e2e.test.ts index 3c8def91..d8c4e11a 100644 --- a/test/e2e/wikisource.e2e.test.ts +++ b/test/e2e/wikisource.e2e.test.ts @@ -18,6 +18,7 @@ describe('wikisource', () => { redis: process.env.REDIS, format: ['nopic'], noLocalParserFallback: true, + forcdRender: 'WikimediaDesktop', } test('Wikisource List', async () => { diff --git a/test/e2e/zimMetadata.e2e.test.ts b/test/e2e/zimMetadata.e2e.test.ts index cc60ee59..75161c15 100644 --- a/test/e2e/zimMetadata.e2e.test.ts +++ b/test/e2e/zimMetadata.e2e.test.ts @@ -23,6 +23,7 @@ describe('zimMetadata', () => { customZimLongDescription: 'Example of the long description', customZimTitle: 'Example of the title', publisher: 'Example of the publisher', + forcdRender: 'WikimediaDesktop', } test('check all zim metadata using zimdump', async () => { diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 5aadd9d4..76a18810 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -11,43 +11,43 @@ describe('BaseURLDirector', () => { }) }) - describe('buildRestApiURL', () => { + describe('buildWikimediaApiURL', () => { it('should return rest URL with provided path and trailing char at the end', () => { - const url = baseUrlDirector.buildRestApiURL('api/rest_v2') + const url = baseUrlDirector.buildWikimediaApiURL('api/rest_v2') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/') }) it('should return rest URL with default path and trailing char at the end', () => { - const url = baseUrlDirector.buildRestApiURL() + const url = baseUrlDirector.buildWikimediaApiURL() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/') }) }) - describe('buildMobileRestApiURL', () => { + describe('buildMobileApiUrl', () => { it('should return mobile rest URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildMobileRestApiURL('api/rest_v2/page/mobile-html') + const url = baseUrlDirector.buildMobileApiUrl('api/rest_v2/page/mobile-html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') }) it('should return mobile rest URL with default path and trailing char', () => { - const url = baseUrlDirector.buildMobileRestApiURL() + const url = baseUrlDirector.buildMobileApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') }) }) - describe('buildDesktopRestApiURL', () => { + describe('buildDesktopApiUrl', () => { it('should return a desktop URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildDesktopRestApiURL('api/rest_v2/page/html') + const url = baseUrlDirector.buildDesktopApiUrl('api/rest_v2/page/html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/html/') }) it('should return a desktop URL with default path and trailing char', () => { - const url = baseUrlDirector.buildDesktopRestApiURL() + const url = baseUrlDirector.buildDesktopApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/html/') }) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index e0dd8504..f4a208f7 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -32,8 +32,8 @@ describe('Downloader class', () => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() }) diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index 8c92d42c..9a6687ee 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -65,7 +65,7 @@ describe('RendererBuilder', () => { const { MediaWiki } = await setupScrapeClasses() // en wikipedia // Force MediaWiki to have capability for the WikimediaDesktop for test purpose - jest.spyOn(MediaWiki, 'hasWikimediaDesktopRestApi').mockResolvedValue(true) + jest.spyOn(MediaWiki, 'hasWikimediaDesktopApi').mockResolvedValue(true) const rendererBuilderOptions = { MediaWiki, @@ -81,8 +81,8 @@ describe('RendererBuilder', () => { it('should throw an error for unknown RendererAPI in specific mode', async () => { const { downloader, MediaWiki } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 8d1a2e3a..124ee2ed 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -21,10 +21,10 @@ describe('saveArticles', () => { test('Article html processing', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') const _articlesDetail = await downloader.getArticleDetailsIds(['London']) const articlesDetail = mwRetToArticleDetail(_articlesDetail) const { articleDetailXId } = RedisStore @@ -46,6 +46,7 @@ describe('saveArticles', () => { downloader, dump, true, + 'WikimediaDesktop', ) // Successfully scrapped existent articles @@ -129,7 +130,7 @@ describe('saveArticles', () => { test('Load main page and check that it is without header', async () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') const articleId = 'Main_Page' const articleUrl = getArticleUrl(downloader, dump, articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) @@ -225,8 +226,8 @@ describe('saveArticles', () => { test('--customFlavour', async () => { const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { @@ -275,6 +276,7 @@ describe('saveArticles', () => { downloader, dump, true, + 'WikimediaDesktop', ) const ParisDocument = domino.createDocument(writtenArticles.Paris.bufferData) diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 49001797..03b17b5c 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -140,10 +140,10 @@ describe('Styles', () => { await RedisStore.redirectsXId.flush() const { MediaWiki, downloader, dump } = await setupScrapeClasses() // en wikipedia await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await downloader.setBaseUrls('WikimediaDesktop') await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) diff --git a/test/unit/webpAndRedirection.test.ts b/test/unit/webpAndRedirection.test.ts index 4a094a76..33f93150 100644 --- a/test/unit/webpAndRedirection.test.ts +++ b/test/unit/webpAndRedirection.test.ts @@ -35,6 +35,7 @@ Real-time computer graphics` outputDirectory: testId, redis: process.env.REDIS, webp: true, + forceRender: 'WikimediaDesktop', }) const zimFile = new ZimReader(outFiles[0].outFile) diff --git a/test/util.ts b/test/util.ts index 2f01b581..7625cb78 100644 --- a/test/util.ts +++ b/test/util.ts @@ -37,8 +37,8 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() - await MediaWiki.hasWikimediaMobileRestApi() + await MediaWiki.hasWikimediaDesktopApi() + await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() const dump = new Dump(format, {} as any, MediaWiki.metaData) From 7562e4ec3aa37e4129c1e8a72de368a6293ed917 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 22 Sep 2023 11:23:17 +0300 Subject: [PATCH 33/58] Update naming for mw api across mwoffliner --- src/MediaWiki.ts | 22 +++++++++++----------- src/mwoffliner.lib.ts | 4 ++-- src/parameterList.ts | 4 ++-- src/types.d.ts | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index dd26d16e..dfba0e71 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -44,10 +44,10 @@ class MediaWiki { public queryOpts: QueryOpts #wikiPath: string - #restApiPath: string + #apiPath: string #username: string #password: string - #apiPath: string + #apiActionPath: string #domain: string private apiUrlDirector: ApiURLDirector private wikimediaDesktopUrlDirector: DesktopURLDirector @@ -75,12 +75,12 @@ class MediaWiki { this.#password = value } - set apiPath(value: string) { - this.#apiPath = value + set apiActionPath(value: string) { + this.#apiActionPath = value } - set restApiPath(value: string) { - this.#restApiPath = value + set apiPath(value: string) { + this.#apiPath = value } set domain(value: string) { @@ -109,7 +109,7 @@ class MediaWiki { this.namespaces = {} this.namespacesToMirror = [] - this.#apiPath = 'w/api.php' + this.#apiActionPath = 'w/api.php' this.#wikiPath = 'wiki/' this.apiCheckArticleId = 'MediaWiki:Sidebar' @@ -179,11 +179,11 @@ class MediaWiki { private initMWApis() { const baseUrlDirector = new BaseURLDirector(this.baseUrl.href) this.webUrl = baseUrlDirector.buildURL(this.#wikiPath) - this.apiUrl = baseUrlDirector.buildURL(this.#apiPath) + this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#restApiPath) - this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#restApiPath) + this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#apiPath) + this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) @@ -416,7 +416,7 @@ class MediaWiki { webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, baseUrl: this.baseUrl.href, - apiPath: this.#apiPath, + apiActionPath: this.#apiActionPath, domain: this.#domain, textDir: textDir as TextDirection, diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 04c2bb99..f58c8711 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -75,8 +75,8 @@ async function execute(argv: any) { keepEmptyParagraphs, mwUrl, mwWikiPath, + mwActionApiPath, mwApiPath, - mwRestApiPath, mwModulePath, mwDomain, mwUsername, @@ -158,8 +158,8 @@ async function execute(argv: any) { /* Wikipedia/... URL; Normalize by adding trailing / as necessary */ MediaWiki.base = mwUrl MediaWiki.getCategories = !!argv.getCategories + MediaWiki.apiActionPath = mwActionApiPath MediaWiki.apiPath = mwApiPath - MediaWiki.restApiPath = mwRestApiPath MediaWiki.modulePathOpt = mwModulePath MediaWiki.domain = mwDomain MediaWiki.password = mwPassword diff --git a/src/parameterList.ts b/src/parameterList.ts index 6f31e797..eee869e4 100644 --- a/src/parameterList.ts +++ b/src/parameterList.ts @@ -17,8 +17,8 @@ export const parameterDescriptions = { 'Specify a flavour for the scraping. If missing, scrape all article contents. Each --format argument will cause a new local file to be created but options can be combined. Supported options are:\n * novid: no video & audio content\n * nopic: no pictures (implies "novid")\n * nopdf: no PDF files\n * nodet: only the first/head paragraph (implies "novid")\nFormat names can also be aliased using a ":"\nExample: "... --format=nopic:mini --format=novid,nopdf"', keepEmptyParagraphs: 'Keep all paragraphs, even empty ones.', mwWikiPath: 'Mediawiki wiki base path (per default "/wiki/")', - mwApiPath: 'Mediawiki API path (per default "/w/api.php")', - mwRestApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")', + mwActionApiPath: 'Mediawiki action API path (per default "/w/api.php")', + mwApiPath: 'Mediawiki Rest API path (per default "/api/rest_v1")', mwModulePath: 'Mediawiki module load path (per default "/w/load.php")', mwDomain: 'Mediawiki user domain (thought for private wikis)', mwUsername: 'Mediawiki username (thought for private wikis)', diff --git a/src/types.d.ts b/src/types.d.ts index 9e25f752..00856ca6 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -159,7 +159,7 @@ interface MWMetaData { baseUrl: string wikiPath: string - apiPath: string + apiActionPath: string domain: string webUrl: string apiUrl: string @@ -178,8 +178,8 @@ interface MWNamespaces { interface MWConfig { base: string wikiPath?: string + apiActionPath?: string apiPath?: string - restApiPath?: string domain?: string username?: string password?: string From 33db5a45806f0e37308fe87f23e4b456df20eea0 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 25 Sep 2023 17:58:00 +0300 Subject: [PATCH 34/58] Split article treatment flow for mobile render to represent images inside figure tag --- res/mobile_article_page.css | 9 ++++ res/script.js | 15 ++++++ src/Downloader.ts | 7 +++ src/config.ts | 1 + src/renderers/wikimedia-mobile.renderer.ts | 56 ++++++++++------------ 5 files changed, 56 insertions(+), 32 deletions(-) create mode 100644 res/mobile_article_page.css diff --git a/res/mobile_article_page.css b/res/mobile_article_page.css new file mode 100644 index 00000000..6de633f2 --- /dev/null +++ b/res/mobile_article_page.css @@ -0,0 +1,9 @@ +body { + margin: 0 auto; +} +.reference-link::after { + content: none !important; +} +.mw-body h3, .mw-body h2 { + width: auto; +} diff --git a/res/script.js b/res/script.js index 2e484cdb..cdb654c6 100644 --- a/res/script.js +++ b/res/script.js @@ -27,6 +27,21 @@ window.onload = function () { /* Add the user-agent to allow dedicated CSS rules (like for KaiOS) */ document.querySelector('body').setAttribute('data-useragent', navigator.userAgent); + + // Check if there is a PCS output page + if (document.querySelector('#pcs')) { + document.addEventListener("DOMContentLoaded", function() { + const supElements = document.querySelectorAll('sup'); + const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); + const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + }); + } + } /* WebP Polyfill */ diff --git a/src/Downloader.ts b/src/Downloader.ts index 12e48210..fd6f8034 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -198,6 +198,13 @@ class Downloader { break } break + case 'WikimediaMobile': + if (MediaWiki.hasWikimediaMobileApi()) { + this.baseUrl = MediaWiki.mobileApiUrl.href + this.baseUrlForMainPage = MediaWiki.mobileApiUrl.href + break + } + break default: throw new Error('Unable to find specific API end-point to retrieve article HTML') } diff --git a/src/config.ts b/src/config.ts index d30ae925..68d4338c 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,6 +56,7 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], + mobileArticleCssResources: ['mobile_article_page'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 7a749618..67cb0d02 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,10 +1,11 @@ import * as domino from 'domino' import * as logger from '../Logger.js' +import { config } from '../config.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' -type PipeFunction = (data: string) => string +type PipeFunction = (value: DominoElement) => DominoElement | Promise // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' export class WikimediaMobileRenderer extends Renderer { @@ -27,25 +28,32 @@ export class WikimediaMobileRenderer extends Renderer { const displayTitle = this.getStrippedTitle(renderOpts) if (data) { - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(data, dump, articleId, articleDetail, _moduleDependencies, webp) - const finalHTMLDoc = domino.createDocument(finalHTML) - const mobileHTML = this.pipeMobileTransformations( - finalHTMLDoc, - this.addMobileModules, + let mediaDependenciesVal + let subtitlesVal + const mobileHTML = domino.createDocument(data) + const finalHTMLMobile = await this.pipeMobileTransformations( + mobileHTML, this.convertLazyLoadToImages, this.removeEditContainer, this.removeHiddenClass, + async (doc) => { + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(doc.documentElement.outerHTML, dump, articleId, articleDetail, _moduleDependencies, webp) + + mediaDependenciesVal = mediaDependencies + subtitlesVal = subtitles + return domino.createDocument(finalHTML) + }, this.restoreLinkDefaults, - this.disableClientLinkListener, + this.addMobileModules, this.overrideMobileStyles, ) result.push({ articleId, displayTitle, - html: mobileHTML.documentElement.outerHTML, - mediaDependencies, - subtitles, + html: finalHTMLMobile.documentElement.outerHTML, + mediaDependencies: mediaDependenciesVal, + subtitles: subtitlesVal, }) return result } @@ -55,8 +63,12 @@ export class WikimediaMobileRenderer extends Renderer { } } - private pipeMobileTransformations(value, ...fns: PipeFunction[]) { - return fns.reduce((acc, fn) => fn(acc), value) + private async pipeMobileTransformations(value: DominoElement, ...fns: PipeFunction[]): Promise { + let result: DominoElement | Promise = value + for (const fn of fns) { + result = fn(await result) + } + return result } private addMobileModules(doc: DominoElement) { @@ -156,26 +168,6 @@ export class WikimediaMobileRenderer extends Renderer { return doc } - private disableClientLinkListener(doc: DominoElement) { - const scriptEl = doc.createElement('script') - scriptEl.type = 'text/javascript' - scriptEl.text = ` - document.addEventListener("DOMContentLoaded", function() { - const supElements = document.querySelectorAll('sup'); - const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); - const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) - disabledElems.forEach((elem) => { - elem.addEventListener('click', (event) => { - event.stopPropagation(); - }, true); - }); - }); - ` - doc.head.appendChild(scriptEl) - - return doc - } - private overrideMobileStyles(doc: DominoElement) { const styleEl = doc.createElement('style') styleEl.innerHTML = ` From 50ebfa1189468b6abc3227dfdf5079096baca4e7 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 26 Sep 2023 17:15:53 +0300 Subject: [PATCH 35/58] Apply css and js module downoload for WikimediaMobile render --- res/script.js | 15 ------ res/templates/page.html | 41 +++++++--------- src/Downloader.ts | 21 +++++++- src/Dump.ts | 1 + src/MediaWiki.ts | 3 ++ src/config.ts | 1 - src/mwoffliner.lib.ts | 1 + src/renderers/abstract.renderer.ts | 54 ++++++++++++++------- src/renderers/renderer.builder.ts | 6 ++- src/renderers/wikimedia-mobile.renderer.ts | 27 ----------- src/types.d.ts | 1 + src/util/builders/url/base.director.ts | 7 +++ src/util/misc.ts | 6 +++ src/util/saveArticles.ts | 22 +++++---- test/e2e/mobileRenderIntegrity.test.ts | 56 ++++++++++++++++++++++ 15 files changed, 167 insertions(+), 95 deletions(-) create mode 100644 test/e2e/mobileRenderIntegrity.test.ts diff --git a/res/script.js b/res/script.js index cdb654c6..2e484cdb 100644 --- a/res/script.js +++ b/res/script.js @@ -27,21 +27,6 @@ window.onload = function () { /* Add the user-agent to allow dedicated CSS rules (like for KaiOS) */ document.querySelector('body').setAttribute('data-useragent', navigator.userAgent); - - // Check if there is a PCS output page - if (document.querySelector('#pcs')) { - document.addEventListener("DOMContentLoaded", function() { - const supElements = document.querySelectorAll('sup'); - const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); - const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) - disabledElems.forEach((elem) => { - elem.addEventListener('click', (event) => { - event.stopPropagation(); - }, true); - }); - }); - } - } /* WebP Polyfill */ diff --git a/res/templates/page.html b/res/templates/page.html index 3035a043..07dec78f 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -1,31 +1,24 @@ - - - - - - __ARTICLE_CANONICAL_LINK__ - __ARTICLE_CSS_LIST__ - __CSS_LINKS__ - __JS_SCRIPTS__ - - - -
-
-
- -
-

-
+ + + + __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ + __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ + + +
+
+
+ +
+

+
-
- __ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ - - + __ARTICLE_CONFIGVARS_LIST__ + __ARTICLE_JS_LIST__ + diff --git a/src/Downloader.ts b/src/Downloader.ts index fd6f8034..a3066079 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -87,6 +87,8 @@ class Downloader { public arrayBufferRequestOptions: AxiosRequestConfig public jsonRequestOptions: AxiosRequestConfig public streamRequestOptions: AxiosRequestConfig + public mobileJsDependenciesList: string[] = [] + public mobileStyleDependenciesList: string[] = [] private readonly uaString: string private activeRequests = 0 @@ -694,7 +696,24 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' - return { jsConfigVars, jsDependenciesList, styleDependenciesList } + // Download mobile page dependencies only once + if (this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { + const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}${title}`) + mobileModulesData.forEach((module: string) => { + if (module.includes('javascript')) { + this.mobileJsDependenciesList.push(module) + } else if (module.includes('css')) { + this.mobileStyleDependenciesList.push(module) + } + }) + } + return { + jsConfigVars, + jsDependenciesList, + styleDependenciesList, + mobileJsDependenciesList: this.mobileJsDependenciesList, + mobileStyleDependenciesList: this.mobileStyleDependenciesList, + } } // Solution to handle aws js sdk v3 from https://github.com/aws/aws-sdk-js-v3/issues/1877 diff --git a/src/Dump.ts b/src/Dump.ts index f467bbea..cbc1e278 100644 --- a/src/Dump.ts +++ b/src/Dump.ts @@ -29,6 +29,7 @@ interface DumpOpts { keepEmptyParagraphs: boolean tags?: string filenameDate: string + isMobileRenderer: boolean } export class Dump { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index dfba0e71..9780bd76 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -58,6 +58,7 @@ class MediaWiki { public apiUrl: URL public modulePath: string // only for reading public _modulePathOpt: string // only for whiting to generate modulePath + public mobileModulePath: string public webUrl: URL public desktopApiUrl: URL public mobileApiUrl: URL @@ -185,6 +186,7 @@ class MediaWiki { this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#apiPath) this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) + this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) @@ -413,6 +415,7 @@ class MediaWiki { webUrl: this.webUrl.href, apiUrl: this.apiUrl.href, modulePath: this.modulePath, + mobileModulePath: this.mobileModulePath, webUrlPath: this.webUrl.pathname, wikiPath: this.#wikiPath, baseUrl: this.baseUrl.href, diff --git a/src/config.ts b/src/config.ts index 68d4338c..d30ae925 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,7 +56,6 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], - mobileArticleCssResources: ['mobile_article_page'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index f58c8711..c8022442 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -338,6 +338,7 @@ async function execute(argv: any) { keepEmptyParagraphs, tags: customZimTags, filenameDate, + isMobileRenderer: false, }, { ...mwMetaData, mainPage }, customProcessor, diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index a6522ca0..0a986f31 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -20,6 +20,8 @@ import { genCanonicalLink, genHeaderScript, genHeaderCSSLink, + genHeaderMobileScript, + genHeaderMobileCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -466,27 +468,43 @@ export abstract class Renderer { articleDetail: ArticleDetail, articleDetailXId: RKVS, ): Promise { - const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { + const { jsConfigVars, jsDependenciesList, styleDependenciesList, mobileJsDependenciesList, mobileStyleDependenciesList } = moduleDependencies as { jsConfigVars: string | RegExpExecArray jsDependenciesList: string[] styleDependenciesList: string[] - } - - const htmlTemplateDoc = domino.createDocument( - htmlTemplateCode(articleId) - .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') - .replace( - '__ARTICLE_JS_LIST__', - jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', - ) - .replace( - '__ARTICLE_CSS_LIST__', - styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ), - ) + mobileJsDependenciesList: string[] + mobileStyleDependenciesList: string[] + } + + const isMobileRenderer = dump.opts.isMobileRenderer + + // Conditional replacements based on mobile render enabling + const articleConfigVarsList = isMobileRenderer ? '' : jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '' + const articleJsList = isMobileRenderer + ? mobileJsDependenciesList.length !== 0 + ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderMobileScript(oneMobJsDep)).join('\n') + : '' + : jsDependenciesList.length !== 0 + ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '' + const articleCssList = isMobileRenderer + ? mobileStyleDependenciesList.length !== 0 + ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderMobileCSSLink(oneMobCssDep)).join('\n') + : '' + : styleDependenciesList.length !== 0 + ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '' + + // Perform replacements + const htmlTemplateString = htmlTemplateCode(articleId) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', articleConfigVarsList) + .replace('__ARTICLE_JS_LIST__', articleJsList) + .replace('__ARTICLE_CSS_LIST__', articleCssList) + .replace('__JS_SCRIPTS_MOBILE__', isMobileRenderer ? articleJsList : '') + .replace('__CSS_LINKS_MOBILE__', isMobileRenderer ? articleCssList : '') + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) /* Create final document by merging template and parsoid documents */ htmlTemplateDoc.getElementById('mw-content-text').style.setProperty('direction', dump.mwMetaData.textDir) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index cffb1cab..d0f17908 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -4,10 +4,11 @@ import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' +import { Dump } from './../Dump.js' import * as logger from './../Logger.js' export class RendererBuilder { - public async createRenderer(options: RendererBuilderOptions): Promise { + public async createRenderer(options: RendererBuilderOptions, dump: Dump): Promise { const { renderType, renderName } = options const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ @@ -29,6 +30,7 @@ export class RendererBuilder { } case 'mobile': if (hasWikimediaMobileApi) { + dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') @@ -40,6 +42,7 @@ export class RendererBuilder { } else if (hasVisualEditorApi) { return new VisualEditorRenderer() } else if (hasWikimediaMobileApi) { + dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') @@ -62,6 +65,7 @@ export class RendererBuilder { process.exit(1) case 'WikimediaMobile': if (hasWikimediaMobileApi) { + dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 67cb0d02..3b741f39 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,6 +1,5 @@ import * as domino from 'domino' import * as logger from '../Logger.js' -import { config } from '../config.js' import { Renderer } from './abstract.renderer.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' @@ -44,7 +43,6 @@ export class WikimediaMobileRenderer extends Renderer { return domino.createDocument(finalHTML) }, this.restoreLinkDefaults, - this.addMobileModules, this.overrideMobileStyles, ) @@ -71,31 +69,6 @@ export class WikimediaMobileRenderer extends Renderer { return result } - private addMobileModules(doc: DominoElement) { - const protocol = 'https://' - // TODO: query this instead of hardcoding. - const offlineResourcesCSSList = [ - 'meta.wikimedia.org/api/rest_v1/data/css/mobile/base', - 'meta.wikimedia.org/api/rest_v1/data/css/mobile/pcs', - 'en.wikipedia.org/api/rest_v1/data/css/mobile/site', - ] - const offlineResourcesJSList = ['meta.wikimedia.org/api/rest_v1/data/javascript/mobile/pcs'] - - offlineResourcesCSSList.forEach((cssUrl) => { - const linkEl = doc.createElement('link') as DominoElement - Object.assign(linkEl, { rel: 'stylesheet', href: `${protocol}${cssUrl}` }) - doc.head.appendChild(linkEl) - }) - - offlineResourcesJSList.forEach((jsUrl) => { - const scriptEl = doc.createElement('script') as DominoElement - scriptEl.setAttribute('src', `${protocol}${jsUrl}`) - doc.head.appendChild(scriptEl) - }) - - return doc - } - private removeEditContainer(doc: DominoElement) { const editContainers = doc.querySelectorAll('.pcs-edit-section-link-container') diff --git a/src/types.d.ts b/src/types.d.ts index 00856ca6..1170a392 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -165,6 +165,7 @@ interface MWMetaData { apiUrl: string webUrlPath: string modulePath: string + mobileModulePath: string } interface MWNamespaces { diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index 922d2b9e..d91c6f13 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -41,4 +41,11 @@ export default class BaseURLDirector { .setPath(path ?? 'w/load.php') .build(false, '?') } + + buildMobileModuleURL(path?: string) { + return urlBuilder + .setDomain(this.baseDomain) + .setPath(path ?? 'api/rest_v1/page/mobile-html-offline-resources') + .build(false, '/') + } } diff --git a/src/util/misc.ts b/src/util/misc.ts index ee31a5f8..6ca59343 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -198,6 +198,9 @@ export function genHeaderCSSLink(config: Config, css: string, articleId: string, const upStr = '../'.repeat(slashesInUrl + 1) return `` } +export function genHeaderMobileCSSLink(css: string) { + return `` +} export function genHeaderScript(config: Config, js: string, articleId: string, subDirectory = '', attributes = '') { const resourceNamespace = '-' const slashesInUrl = articleId.split('/').length - 1 @@ -205,6 +208,9 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } +export function genHeaderMobileScript(js: string) { + return `` +} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index a9c45b57..43ed278b 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -245,17 +245,23 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let articlesRenderer if (forceRender) { // All articles and main page will use the same renderer if 'forceRender' is specified - const renderer = await rendererBuilder.createRenderer({ - renderType: 'specific', - renderName: forceRender, - }) + const renderer = await rendererBuilder.createRenderer( + { + renderType: 'specific', + renderName: forceRender, + }, + dump, + ) mainPageRenderer = renderer articlesRenderer = renderer } else { - mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) - articlesRenderer = await rendererBuilder.createRenderer({ - renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', - }) + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }, dump) + articlesRenderer = await rendererBuilder.createRenderer( + { + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }, + dump, + ) } if (dump.customProcessor?.shouldKeepArticle) { diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts new file mode 100644 index 00000000..c87e8700 --- /dev/null +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -0,0 +1,56 @@ +import 'dotenv/config.js' +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import rimraf from 'rimraf' +import { execa } from 'execa' +import { jest } from '@jest/globals' +import { zimcheckAvailable, zimdumpAvailable, zimcheck } from '../util.js' + +jest.setTimeout(200000) + +let zimcheckIsAvailable +let zimdumpIsAvailable + +beforeAll(async () => { + zimcheckIsAvailable = await zimcheckAvailable() + zimdumpIsAvailable = await zimdumpAvailable() +}) + +async function getOutFiles(testId: string, articleList: string, mwUrl: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'mail@mail.com', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: 'WikimediaMobile', + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +const commonTreatmentTest = async (articleList: string, mwUrl: string) => { + if (!zimcheckIsAvailable || !zimdumpIsAvailable) { + const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' + console.log(`${missingTool} not installed, skipping test`) + return + } + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl) + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + + rimraf.sync(`./${testId}`) +} + +describe('Mobile render zim file integrity', () => { + const mwUrl = 'https://en.wikipedia.org' + const articleList = 'Canada' + + test('Test WikimediaMobile with en.wikipedia.org', async () => { + await commonTreatmentTest(articleList, mwUrl) + }) +}) From 8824f8a9f8f8f601093130e29bbe1fdb37f37212 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 27 Sep 2023 20:54:07 +0300 Subject: [PATCH 36/58] Refactor modules for mobile renderer --- res/templates/page.html | 2 +- src/Downloader.ts | 23 +++++--- src/mwoffliner.lib.ts | 18 +++++- src/renderers/abstract.renderer.ts | 64 +++++++++++---------- src/renderers/renderer.builder.ts | 6 +- src/renderers/wikimedia-desktop.renderer.ts | 1 + src/renderers/wikimedia-mobile.renderer.ts | 1 + src/util/dump.ts | 27 +++++++-- src/util/misc.ts | 6 -- src/util/saveArticles.ts | 32 ++++++----- test/e2e/mobileRenderIntegrity.test.ts | 3 +- 11 files changed, 111 insertions(+), 72 deletions(-) diff --git a/res/templates/page.html b/res/templates/page.html index 07dec78f..ac3fcd47 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -19,6 +19,6 @@

__ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ + __ARTICLE_JS_LIST__ diff --git a/src/Downloader.ts b/src/Downloader.ts index a3066079..dcc730c1 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -697,15 +697,20 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' // Download mobile page dependencies only once - if (this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { - const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}${title}`) - mobileModulesData.forEach((module: string) => { - if (module.includes('javascript')) { - this.mobileJsDependenciesList.push(module) - } else if (module.includes('css')) { - this.mobileStyleDependenciesList.push(module) - } - }) + if ((await MediaWiki.hasWikimediaMobileApi()) && this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { + try { + // TODO: An arbitrary title can be placed since all Wikimedia wikis have the same mobile offline resources + const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}Test`) + mobileModulesData.forEach((module: string) => { + if (module.includes('javascript')) { + this.mobileJsDependenciesList.push(module.replace('//', '')) + } else if (module.includes('css')) { + this.mobileStyleDependenciesList.push(module.replace('//', '')) + } + }) + } catch (err) { + throw new Error(`Error getting mobile modules ${err.message}`) + } } return { jsConfigVars, diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index c8022442..e03f60aa 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -422,17 +422,31 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) + const { jsModuleDependencies, cssModuleDependencies, jsMobileModuleDependencies, cssMobileModuleDependencies } = await saveArticles( + zimCreator, + downloader, + dump, + hasWikimediaMobileApi, + forceRender, + ) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`) - const allDependenciesWithType = [ + let allDependenciesWithType = [ { type: 'js', moduleList: Array.from(jsModuleDependencies) }, { type: 'css', moduleList: Array.from(cssModuleDependencies) }, ] + if (dump.opts.isMobileRenderer) { + allDependenciesWithType = [ + ...allDependenciesWithType, + { type: 'mobileJs', moduleList: Array.from(jsMobileModuleDependencies) }, + { type: 'mobileCss', moduleList: Array.from(cssMobileModuleDependencies) }, + ] + } + if (downloader.webp) { logger.log('Downloading polyfill module') importPolyfillModules(zimCreator) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 0a986f31..4a0938cd 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -20,8 +20,6 @@ import { genCanonicalLink, genHeaderScript, genHeaderCSSLink, - genHeaderMobileScript, - genHeaderMobileCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -476,33 +474,41 @@ export abstract class Renderer { mobileStyleDependenciesList: string[] } - const isMobileRenderer = dump.opts.isMobileRenderer - - // Conditional replacements based on mobile render enabling - const articleConfigVarsList = isMobileRenderer ? '' : jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '' - const articleJsList = isMobileRenderer - ? mobileJsDependenciesList.length !== 0 - ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderMobileScript(oneMobJsDep)).join('\n') - : '' - : jsDependenciesList.length !== 0 - ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '' - const articleCssList = isMobileRenderer - ? mobileStyleDependenciesList.length !== 0 - ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderMobileCSSLink(oneMobCssDep)).join('\n') - : '' - : styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '' - - // Perform replacements - const htmlTemplateString = htmlTemplateCode(articleId) - .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - .replace('__ARTICLE_CONFIGVARS_LIST__', articleConfigVarsList) - .replace('__ARTICLE_JS_LIST__', articleJsList) - .replace('__ARTICLE_CSS_LIST__', articleCssList) - .replace('__JS_SCRIPTS_MOBILE__', isMobileRenderer ? articleJsList : '') - .replace('__CSS_LINKS_MOBILE__', isMobileRenderer ? articleCssList : '') + let htmlTemplateString = htmlTemplateCode(articleId).replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + + if (!dump.opts.isMobileRenderer) { + htmlTemplateString = htmlTemplateString + .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') + .replace( + '__ARTICLE_JS_LIST__', + jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', + ) + .replace( + '__ARTICLE_CSS_LIST__', + styleDependenciesList.length !== 0 + ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace('__JS_SCRIPTS_MOBILE__', '') + .replace('__CSS_LINKS_MOBILE__', '') + } else { + htmlTemplateString = htmlTemplateString + .replace('__ARTICLE_CONFIGVARS_LIST__', '') + .replace('__ARTICLE_JS_LIST__', '') + .replace('__ARTICLE_CSS_LIST__', '') + .replace( + '__JS_SCRIPTS_MOBILE__', + mobileJsDependenciesList.length !== 0 + ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace( + '__CSS_LINKS_MOBILE__', + mobileStyleDependenciesList.length !== 0 + ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + } const htmlTemplateDoc = domino.createDocument(htmlTemplateString) diff --git a/src/renderers/renderer.builder.ts b/src/renderers/renderer.builder.ts index d0f17908..cffb1cab 100644 --- a/src/renderers/renderer.builder.ts +++ b/src/renderers/renderer.builder.ts @@ -4,11 +4,10 @@ import { VisualEditorRenderer } from './visual-editor.renderer.js' import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js' import { WikimediaMobileRenderer } from './wikimedia-mobile.renderer.js' import { RendererBuilderOptions } from './abstract.renderer.js' -import { Dump } from './../Dump.js' import * as logger from './../Logger.js' export class RendererBuilder { - public async createRenderer(options: RendererBuilderOptions, dump: Dump): Promise { + public async createRenderer(options: RendererBuilderOptions): Promise { const { renderType, renderName } = options const [hasVisualEditorApi, hasWikimediaDesktopApi, hasWikimediaMobileApi] = await Promise.all([ @@ -30,7 +29,6 @@ export class RendererBuilder { } case 'mobile': if (hasWikimediaMobileApi) { - dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') @@ -42,7 +40,6 @@ export class RendererBuilder { } else if (hasVisualEditorApi) { return new VisualEditorRenderer() } else if (hasWikimediaMobileApi) { - dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } else { logger.error('No render available at all.') @@ -65,7 +62,6 @@ export class RendererBuilder { process.exit(1) case 'WikimediaMobile': if (hasWikimediaMobileApi) { - dump.opts.isMobileRenderer = true return new WikimediaMobileRenderer() } logger.error('No available mobile renderer.') diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 9bba2c3c..43fc23a3 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -35,6 +35,7 @@ export class WikimediaDesktopRenderer extends Renderer { public async render(renderOpts: RenderOpts): Promise { const result: RenderOutput = [] const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + dump.opts.isMobileRenderer = false const articleDetail = await renderOpts.articleDetailXId.get(articleId) // Paginate when there are more than 200 subCategories diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 3b741f39..cc3df0dc 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -23,6 +23,7 @@ export class WikimediaMobileRenderer extends Renderer { try { const result: RenderOutput = [] const { data, articleId, webp, _moduleDependencies, dump } = renderOpts + dump.opts.isMobileRenderer = true const articleDetail = await renderOpts.articleDetailXId.get(articleId) const displayTitle = this.getStrippedTitle(renderOpts) diff --git a/src/util/dump.ts b/src/util/dump.ts index 2d6e63c5..50ecaa83 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -90,7 +90,7 @@ export async function getAndProcessStylesheets(downloader: Downloader, links: Ar }) } -export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') { +export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css' | 'mobileJs' | 'mobileCss') { const replaceCodeByRegex = (sourceText, replaceMap: Map) => { let text: string replaceMap.forEach((textToReplace, regEx) => { @@ -117,13 +117,19 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: } let apiParameterOnly - if (type === 'js') { + let moduleApiUrl: string + if (type === 'js' || type === 'mobileJs') { apiParameterOnly = 'scripts' - } else if (type === 'css') { + } else if (type === 'css' || type === 'mobileCss') { apiParameterOnly = 'styles' } - const moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) + if (type === 'js' || type === 'css') { + moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) + } else if (type === 'mobileJs' || type === 'mobileCss') { + moduleApiUrl = encodeURI(`https:${module}`) + } + logger.info(`Getting [${type}] module [${moduleApiUrl}]`) const { content } = await downloader.downloadContent(moduleApiUrl) @@ -141,7 +147,18 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: } try { - const articleId = type === 'js' ? jsPath(module, config.output.dirs.mediawiki) : cssPath(module, config.output.dirs.mediawiki) + let articleId + const pathFunctions = { + js: jsPath, + css: cssPath, + mobileJs: jsPath, + mobileCss: cssPath, + } + + const pathFunction = pathFunctions[type] + if (pathFunction) { + articleId = pathFunction(module, config.output.dirs.mediawiki) + } const article = new ZimArticle({ url: articleId, data: text, ns: '-' }) zimCreator.addArticle(article) logger.info(`Saved module [${module}]`) diff --git a/src/util/misc.ts b/src/util/misc.ts index 6ca59343..ee31a5f8 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -198,9 +198,6 @@ export function genHeaderCSSLink(config: Config, css: string, articleId: string, const upStr = '../'.repeat(slashesInUrl + 1) return `` } -export function genHeaderMobileCSSLink(css: string) { - return `` -} export function genHeaderScript(config: Config, js: string, articleId: string, subDirectory = '', attributes = '') { const resourceNamespace = '-' const slashesInUrl = articleId.split('/').length - 1 @@ -208,9 +205,6 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } -export function genHeaderMobileScript(js: string) { - return `` -} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 43ed278b..0e41b40a 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -234,6 +234,8 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() + const jsMobileModuleDependencies = new Set() + const cssMobileModuleDependencies = new Set() let jsConfigVars = '' let prevPercentProgress: string const { articleDetailXId } = RedisStore @@ -245,23 +247,17 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let articlesRenderer if (forceRender) { // All articles and main page will use the same renderer if 'forceRender' is specified - const renderer = await rendererBuilder.createRenderer( - { - renderType: 'specific', - renderName: forceRender, - }, - dump, - ) + const renderer = await rendererBuilder.createRenderer({ + renderType: 'specific', + renderName: forceRender, + }) mainPageRenderer = renderer articlesRenderer = renderer } else { - mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }, dump) - articlesRenderer = await rendererBuilder.createRenderer( - { - renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', - }, - dump, - ) + mainPageRenderer = await rendererBuilder.createRenderer({ renderType: 'desktop' }) + articlesRenderer = await rendererBuilder.createRenderer({ + renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', + }) } if (dump.customProcessor?.shouldKeepArticle) { @@ -317,6 +313,12 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade for (const dep of _moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } + for (const dep of _moduleDependencies.mobileJsDependenciesList) { + jsMobileModuleDependencies.add(dep) + } + for (const dep of _moduleDependencies.mobileStyleDependenciesList) { + cssMobileModuleDependencies.add(dep) + } jsConfigVars = jsConfigVars || _moduleDependencies.jsConfigVars /* @@ -398,5 +400,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade return { jsModuleDependencies, cssModuleDependencies, + jsMobileModuleDependencies, + cssMobileModuleDependencies, } } diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts index c87e8700..0df9fc9d 100644 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -48,7 +48,8 @@ const commonTreatmentTest = async (articleList: string, mwUrl: string) => { describe('Mobile render zim file integrity', () => { const mwUrl = 'https://en.wikipedia.org' - const articleList = 'Canada' + // TODO: some articles such as 'Canada' don't pass this test even with desktop renderer + const articleList = 'BMW' test('Test WikimediaMobile with en.wikipedia.org', async () => { await commonTreatmentTest(articleList, mwUrl) From 0ab3c4a014f58916007eefaf0d37945d4ea0ada4 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 28 Sep 2023 10:24:13 +0300 Subject: [PATCH 37/58] Add PCS override script to enable links for mobile render --- res/pcs/pcs_override_script.js | 15 +++++++++++++++ res/templates/page.html | 1 + src/config.ts | 1 + src/mwoffliner.lib.ts | 4 ++++ src/renderers/abstract.renderer.ts | 3 +++ src/util/misc.ts | 16 ++++++++++++++++ 6 files changed, 40 insertions(+) create mode 100644 res/pcs/pcs_override_script.js diff --git a/res/pcs/pcs_override_script.js b/res/pcs/pcs_override_script.js new file mode 100644 index 00000000..240d5bb0 --- /dev/null +++ b/res/pcs/pcs_override_script.js @@ -0,0 +1,15 @@ +function importScript() { return 1 } // this is to avoid the error from site.js + +window.onload = function () { + // Check if there is a PCS output page + if (document.querySelector('#pcs')) { + const supElements = document.querySelectorAll('sup'); + const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); + const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + disabledElems.forEach((elem) => { + elem.addEventListener('click', (event) => { + event.stopPropagation(); + }, true); + }); + } +} diff --git a/res/templates/page.html b/res/templates/page.html index ac3fcd47..eb49043a 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -20,5 +20,6 @@

__ARTICLE_CONFIGVARS_LIST__ __ARTICLE_JS_LIST__ + __PCS_JS_OVERRIDE__ diff --git a/src/config.ts b/src/config.ts index d30ae925..fb55aef3 100644 --- a/src/config.ts +++ b/src/config.ts @@ -58,6 +58,7 @@ const config = { mainPageCssResources: ['mobile_main_page'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], + pcsJsResources: ['pcs_override_script'], // JS/CSS resources to be imported from MediaWiki mw: { diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index e03f60aa..0b372ae0 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -37,6 +37,7 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, + saveStaticPCSFiles, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -403,6 +404,9 @@ async function execute(argv: any) { logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) + logger.info('Copying Static PCS Files') + await saveStaticPCSFiles(config, zimCreator) + logger.info('Finding stylesheets to download') const stylesheetsToGet = await dump.getRelevantStylesheetUrls(downloader) logger.log(`Found [${stylesheetsToGet.length}] stylesheets to download`) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 4a0938cd..a4beb8ba 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -20,6 +20,7 @@ import { genCanonicalLink, genHeaderScript, genHeaderCSSLink, + genPCSOverrideScript, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -491,6 +492,7 @@ export abstract class Renderer { ) .replace('__JS_SCRIPTS_MOBILE__', '') .replace('__CSS_LINKS_MOBILE__', '') + .replace('__PCS_JS_OVERRIDE__', '') } else { htmlTemplateString = htmlTemplateString .replace('__ARTICLE_CONFIGVARS_LIST__', '') @@ -508,6 +510,7 @@ export abstract class Renderer { ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) + .replace('__PCS_JS_OVERRIDE__', genPCSOverrideScript(config.output.pcsJsResources[0])) } const htmlTemplateDoc = domino.createDocument(htmlTemplateString) diff --git a/src/util/misc.ts b/src/util/misc.ts index ee31a5f8..3a85acf6 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -185,6 +185,19 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { return Promise.all([...cssPromises, ...jsPromises]) } +export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { + const pcsJsPromises = config.output.pcsJsResources.map(async (pcsJs) => { + try { + const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsJs}.js`)) + const article = new ZimArticle({ url: jsPath(pcsJs), data: jsCont, ns: '-' }) + zimCreator.addArticle(article) + } catch (error) { + logger.warn(`Could not create pcs override ${pcsJs} file : ${error}`) + } + }) + return pcsJsPromises +} + export function cssPath(css: string, subDirectory = '') { return `${subDirectory ? `${subDirectory}/` : ''}${css.replace(/(\.css)?$/, '')}.css` } @@ -205,6 +218,9 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } +export function genPCSOverrideScript(js: string) { + return `` +} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } From 40931ff1fca6d3bab665c089654fceced18e687e Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 28 Sep 2023 12:34:46 +0300 Subject: [PATCH 38/58] Enable regular behaviour of links for mobile render --- res/pcs/pcs_override_script.js | 4 ++-- .../pcs_override_style.css} | 6 ++++++ res/templates/page.html | 2 +- src/config.ts | 1 + src/mwoffliner.lib.ts | 3 ++- src/renderers/abstract.renderer.ts | 3 +++ src/renderers/wikimedia-mobile.renderer.ts | 19 ------------------- src/util/misc.ts | 17 +++++++++++++++-- 8 files changed, 30 insertions(+), 25 deletions(-) rename res/{mobile_article_page.css => pcs/pcs_override_style.css} (51%) diff --git a/res/pcs/pcs_override_script.js b/res/pcs/pcs_override_script.js index 240d5bb0..d39c7b2d 100644 --- a/res/pcs/pcs_override_script.js +++ b/res/pcs/pcs_override_script.js @@ -4,8 +4,8 @@ window.onload = function () { // Check if there is a PCS output page if (document.querySelector('#pcs')) { const supElements = document.querySelectorAll('sup'); - const backLinkElements = document.querySelectorAll('a.pcs-ref-back-link'); - const disabledElems = Array.from(supElements).concat(Array.from(backLinkElements)) + const linkElements = document.querySelectorAll('a'); + const disabledElems = Array.from(supElements).concat(Array.from(linkElements)) disabledElems.forEach((elem) => { elem.addEventListener('click', (event) => { event.stopPropagation(); diff --git a/res/mobile_article_page.css b/res/pcs/pcs_override_style.css similarity index 51% rename from res/mobile_article_page.css rename to res/pcs/pcs_override_style.css index 6de633f2..c4d61088 100644 --- a/res/mobile_article_page.css +++ b/res/pcs/pcs_override_style.css @@ -1,6 +1,12 @@ body { margin: 0 auto; } +p#pcs-edit-section-add-title-description { + display: none !important; +} +span.noviewer { + display: none !important; +} .reference-link::after { content: none !important; } diff --git a/res/templates/page.html b/res/templates/page.html index eb49043a..3e2574ed 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -4,7 +4,7 @@ __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ - __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ + __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __PCS_CSS_OVERRIDE__
diff --git a/src/config.ts b/src/config.ts index fb55aef3..92856fd1 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,6 +56,7 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], + pcsCssResources: ['pcs_override_style'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], pcsJsResources: ['pcs_override_script'], diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 0b372ae0..21202e55 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -404,7 +404,8 @@ async function execute(argv: any) { logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) - logger.info('Copying Static PCS Files') + // TODO: refactor sequence, this only needed for mobile renderer + logger.info('Copying Static PCS Override Files') await saveStaticPCSFiles(config, zimCreator) logger.info('Finding stylesheets to download') diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index a4beb8ba..1d053e93 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -21,6 +21,7 @@ import { genHeaderScript, genHeaderCSSLink, genPCSOverrideScript, + genPCSCOverrideCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -492,6 +493,7 @@ export abstract class Renderer { ) .replace('__JS_SCRIPTS_MOBILE__', '') .replace('__CSS_LINKS_MOBILE__', '') + .replace('__PCS_CSS_OVERRIDE__', '') .replace('__PCS_JS_OVERRIDE__', '') } else { htmlTemplateString = htmlTemplateString @@ -510,6 +512,7 @@ export abstract class Renderer { ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) + .replace('__PCS_CSS_OVERRIDE__', genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) .replace('__PCS_JS_OVERRIDE__', genPCSOverrideScript(config.output.pcsJsResources[0])) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index cc3df0dc..e315c97e 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -44,7 +44,6 @@ export class WikimediaMobileRenderer extends Renderer { return domino.createDocument(finalHTML) }, this.restoreLinkDefaults, - this.overrideMobileStyles, ) result.push({ @@ -141,22 +140,4 @@ export class WikimediaMobileRenderer extends Renderer { return doc } - - private overrideMobileStyles(doc: DominoElement) { - const styleEl = doc.createElement('style') - styleEl.innerHTML = ` - body { - margin: 0 auto; - } - .reference-link::after { - content: none !important; - } - .mw-body h3, .mw-body h2 { - width: auto; - } - ` - doc.head.appendChild(styleEl) - - return doc - } } diff --git a/src/util/misc.ts b/src/util/misc.ts index 3a85acf6..2216d42b 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -186,16 +186,26 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { } export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { + const pcsCssPromises = config.output.pcsCssResources.map(async (pcsCss) => { + try { + const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsCss}.css`)) + const article = new ZimArticle({ url: cssPath(pcsCss), data: cssCont, ns: '-' }) + zimCreator.addArticle(article) + } catch (error) { + logger.warn(`Could not create style PCS override ${pcsCss} file : ${error}`) + } + }) + const pcsJsPromises = config.output.pcsJsResources.map(async (pcsJs) => { try { const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsJs}.js`)) const article = new ZimArticle({ url: jsPath(pcsJs), data: jsCont, ns: '-' }) zimCreator.addArticle(article) } catch (error) { - logger.warn(`Could not create pcs override ${pcsJs} file : ${error}`) + logger.warn(`Could not create script PCS override ${pcsJs} file : ${error}`) } }) - return pcsJsPromises + return Promise.all([...pcsCssPromises, ...pcsJsPromises]) } export function cssPath(css: string, subDirectory = '') { @@ -221,6 +231,9 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s export function genPCSOverrideScript(js: string) { return `` } +export function genPCSCOverrideCSSLink(css: string) { + return `` +} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } From 1b4f12dfd3366e39ff1ee660458f441b1645e148 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 3 Oct 2023 09:05:28 +0300 Subject: [PATCH 39/58] Merge mobile and desktop modules output, replace article templating to interim DesktopRenderer and MobileRenderer --- src/Downloader.ts | 6 +- src/mwoffliner.lib.ts | 18 +----- src/renderers/abstract.renderer.ts | 71 +++------------------ src/renderers/abstractDesktop.render.ts | 47 ++++++++++++++ src/renderers/abstractMobile.render.ts | 54 ++++++++++++++++ src/renderers/visual-editor.renderer.ts | 16 +++-- src/renderers/wikimedia-desktop.renderer.ts | 14 +++- src/renderers/wikimedia-mobile.renderer.ts | 14 +++- src/util/dump.ts | 10 +-- src/util/misc.ts | 6 -- src/util/saveArticles.ts | 10 --- 11 files changed, 153 insertions(+), 113 deletions(-) create mode 100644 src/renderers/abstractDesktop.render.ts create mode 100644 src/renderers/abstractMobile.render.ts diff --git a/src/Downloader.ts b/src/Downloader.ts index dcc730c1..a0226d97 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -714,10 +714,8 @@ class Downloader { } return { jsConfigVars, - jsDependenciesList, - styleDependenciesList, - mobileJsDependenciesList: this.mobileJsDependenciesList, - mobileStyleDependenciesList: this.mobileStyleDependenciesList, + jsDependenciesList: jsDependenciesList.concat(this.mobileJsDependenciesList), + styleDependenciesList: styleDependenciesList.concat(this.mobileStyleDependenciesList), } } diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 21202e55..29c46970 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -427,31 +427,17 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies, jsMobileModuleDependencies, cssMobileModuleDependencies } = await saveArticles( - zimCreator, - downloader, - dump, - hasWikimediaMobileApi, - forceRender, - ) + const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`) - let allDependenciesWithType = [ + const allDependenciesWithType = [ { type: 'js', moduleList: Array.from(jsModuleDependencies) }, { type: 'css', moduleList: Array.from(cssModuleDependencies) }, ] - if (dump.opts.isMobileRenderer) { - allDependenciesWithType = [ - ...allDependenciesWithType, - { type: 'mobileJs', moduleList: Array.from(jsMobileModuleDependencies) }, - { type: 'mobileCss', moduleList: Array.from(cssMobileModuleDependencies) }, - ] - } - if (downloader.webp) { logger.log('Downloading polyfill module') importPolyfillModules(zimCreator) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 1d053e93..7ae75cdd 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -9,7 +9,7 @@ import DU from '../DOMUtils.js' import { config } from '../config.js' import { Dump } from '../Dump.js' import { rewriteUrlsOfDoc } from '../util/rewriteUrls.js' -import { footerTemplate, htmlTemplateCode } from '../Templates.js' +import { footerTemplate } from '../Templates.js' import { getFullUrl, getMediaBase, @@ -17,11 +17,6 @@ import { getRelativeFilePath, isWebpCandidateImageMimeType, interpolateTranslationString, - genCanonicalLink, - genHeaderScript, - genHeaderCSSLink, - genPCSOverrideScript, - genPCSCOverrideCSSLink, encodeArticleIdForZimHtmlUrl, } from '../util/misc.js' @@ -388,7 +383,8 @@ export abstract class Renderer { return thumbDiv } - public async processHtml(html: string, dump: Dump, articleId: string, articleDetail: any, _moduleDependencies: any, webp: boolean) { + // TODO: The first part of this method is common for all renders + public async processHtml(html: string, dump: Dump, articleId: string, articleDetail: any, _moduleDependencies: any, webp: boolean, callback) { let mediaDependencies: Array<{ url: string; path: string }> = [] let subtitles: Array<{ url: string; path: string }> = [] let doc = domino.createDocument(html) @@ -432,7 +428,8 @@ export abstract class Renderer { doc = await dump.customProcessor.preProcessArticle(articleId, doc) } - let templatedDoc = await this.templateArticle(doc, _moduleDependencies, dump, articleId, articleDetail, RedisStore.articleDetailXId) + let templatedDoc = callback(_moduleDependencies, articleId) + templatedDoc = await this.mergeTemplateDoc(templatedDoc, doc, dump, articleDetail, RedisStore.articleDetailXId, articleId) if (dump.customProcessor && dump.customProcessor.postProcessArticle) { templatedDoc = await dump.customProcessor.postProcessArticle(articleId, templatedDoc) @@ -460,64 +457,14 @@ export abstract class Renderer { } } - private async templateArticle( + private async mergeTemplateDoc( + htmlTemplateDoc: DominoElement, parsoidDoc: DominoElement, - moduleDependencies: any, dump: Dump, - articleId: string, articleDetail: ArticleDetail, articleDetailXId: RKVS, - ): Promise { - const { jsConfigVars, jsDependenciesList, styleDependenciesList, mobileJsDependenciesList, mobileStyleDependenciesList } = moduleDependencies as { - jsConfigVars: string | RegExpExecArray - jsDependenciesList: string[] - styleDependenciesList: string[] - mobileJsDependenciesList: string[] - mobileStyleDependenciesList: string[] - } - - let htmlTemplateString = htmlTemplateCode(articleId).replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) - - if (!dump.opts.isMobileRenderer) { - htmlTemplateString = htmlTemplateString - .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') - .replace( - '__ARTICLE_JS_LIST__', - jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', - ) - .replace( - '__ARTICLE_CSS_LIST__', - styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ) - .replace('__JS_SCRIPTS_MOBILE__', '') - .replace('__CSS_LINKS_MOBILE__', '') - .replace('__PCS_CSS_OVERRIDE__', '') - .replace('__PCS_JS_OVERRIDE__', '') - } else { - htmlTemplateString = htmlTemplateString - .replace('__ARTICLE_CONFIGVARS_LIST__', '') - .replace('__ARTICLE_JS_LIST__', '') - .replace('__ARTICLE_CSS_LIST__', '') - .replace( - '__JS_SCRIPTS_MOBILE__', - mobileJsDependenciesList.length !== 0 - ? mobileJsDependenciesList.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ) - .replace( - '__CSS_LINKS_MOBILE__', - mobileStyleDependenciesList.length !== 0 - ? mobileStyleDependenciesList.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', - ) - .replace('__PCS_CSS_OVERRIDE__', genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) - .replace('__PCS_JS_OVERRIDE__', genPCSOverrideScript(config.output.pcsJsResources[0])) - } - - const htmlTemplateDoc = domino.createDocument(htmlTemplateString) - + articleId: string, + ) { /* Create final document by merging template and parsoid documents */ htmlTemplateDoc.getElementById('mw-content-text').style.setProperty('direction', dump.mwMetaData.textDir) htmlTemplateDoc.getElementById('mw-content-text').innerHTML = parsoidDoc.getElementsByTagName('body')[0].innerHTML diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts new file mode 100644 index 00000000..169d1825 --- /dev/null +++ b/src/renderers/abstractDesktop.render.ts @@ -0,0 +1,47 @@ +import * as domino from 'domino' +import { Renderer } from './abstract.renderer.js' +import { config } from '../config.js' +import MediaWiki from '../MediaWiki.js' + +import { htmlTemplateCode } from '../Templates.js' +import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' + +export abstract class DesktopRenderer extends Renderer { + constructor() { + super() + } + + public templateDesktopArticle(moduleDependencies: any, articleId: string): Document { + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { + jsConfigVars: string | RegExpExecArray + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const desktopJsModuleDependencies = jsDependenciesList.filter((item) => !item.includes('javascript/mobile')) + const desktopCssModuleDependencies = styleDependenciesList.filter((item) => !item.includes('css/mobile')) + + const htmlTemplateString = htmlTemplateCode(articleId) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') + .replace( + '__ARTICLE_JS_LIST__', + desktopJsModuleDependencies.length !== 0 + ? desktopJsModuleDependencies.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace( + '__ARTICLE_CSS_LIST__', + desktopCssModuleDependencies.length !== 0 + ? desktopCssModuleDependencies.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace('__JS_SCRIPTS_MOBILE__', '') + .replace('__CSS_LINKS_MOBILE__', '') + .replace('__PCS_CSS_OVERRIDE__', '') + .replace('__PCS_JS_OVERRIDE__', '') + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) + return htmlTemplateDoc + } +} diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts new file mode 100644 index 00000000..426ffacb --- /dev/null +++ b/src/renderers/abstractMobile.render.ts @@ -0,0 +1,54 @@ +import * as domino from 'domino' +import { Renderer } from './abstract.renderer.js' +import { config } from '../config.js' +import MediaWiki from '../MediaWiki.js' + +import { htmlTemplateCode } from '../Templates.js' +import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' + +export abstract class MobileRenderer extends Renderer { + constructor() { + super() + } + + private genPCSCOverrideCSSLink(css: string) { + return `` + } + + private genPCSOverrideScript(js: string) { + return `` + } + + public templateMobileArticle(moduleDependencies: any, articleId: string): Document { + const { jsDependenciesList, styleDependenciesList } = moduleDependencies as { + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const mobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) + const mobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) + + const htmlTemplateString = htmlTemplateCode(articleId) + .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) + .replace('__ARTICLE_CONFIGVARS_LIST__', '') + .replace('__ARTICLE_JS_LIST__', '') + .replace('__ARTICLE_CSS_LIST__', '') + .replace( + '__JS_SCRIPTS_MOBILE__', + mobileJsModuleDependencies.length !== 0 + ? mobileJsModuleDependencies.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace( + '__CSS_LINKS_MOBILE__', + mobileCssModuleDependencies.length !== 0 + ? mobileCssModuleDependencies.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + : '', + ) + .replace('__PCS_CSS_OVERRIDE__', this.genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) + .replace('__PCS_JS_OVERRIDE__', this.genPCSOverrideScript(config.output.pcsJsResources[0])) + + const htmlTemplateDoc = domino.createDocument(htmlTemplateString) + return htmlTemplateDoc + } +} diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index 4fb505d2..edece1db 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -1,6 +1,6 @@ -import { DELETED_ARTICLE_ERROR } from '../util/const.js' import * as logger from '../Logger.js' -import { Renderer } from './abstract.renderer.js' +import { DELETED_ARTICLE_ERROR } from '../util/const.js' +import { DesktopRenderer } from './abstractDesktop.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' @@ -9,7 +9,7 @@ Represent 'https://{wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=h or 'https://{3rd-part-wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page={title}' */ -export class VisualEditorRenderer extends Renderer { +export class VisualEditorRenderer extends DesktopRenderer { constructor() { super() } @@ -55,7 +55,15 @@ export class VisualEditorRenderer extends Renderer { const { articleId, articleDetail, webp, _moduleDependencies, dump } = renderOpts const { html, displayTitle } = await this.retrieveHtml(renderOpts) if (html) { - const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(html, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( + html, + dump, + articleId, + articleDetail, + _moduleDependencies, + webp, + super.templateDesktopArticle(_moduleDependencies, articleId), + ) result.push({ articleId, displayTitle, diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 43fc23a3..0aaf4591 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -1,10 +1,10 @@ import domino from 'domino' -import { Renderer } from './abstract.renderer.js' +import { DesktopRenderer } from './abstractDesktop.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/' -export class WikimediaDesktopRenderer extends Renderer { +export class WikimediaDesktopRenderer extends DesktopRenderer { constructor() { super() } @@ -47,7 +47,15 @@ export class WikimediaDesktopRenderer extends Renderer { if (!isMainPage) { dataWithHeader = super.injectH1TitleToHtml(data, articleDetail) } - const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(dataWithHeader || data, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( + dataWithHeader || data, + dump, + articleId, + articleDetail, + _moduleDependencies, + webp, + super.templateDesktopArticle.bind(this), + ) result.push({ articleId: _articleId, diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index e315c97e..6250b44d 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,13 +1,13 @@ import * as domino from 'domino' import * as logger from '../Logger.js' -import { Renderer } from './abstract.renderer.js' +import { MobileRenderer } from './abstractMobile.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' import { RenderOpts, RenderOutput } from './abstract.renderer.js' type PipeFunction = (value: DominoElement) => DominoElement | Promise // Represent 'https://{wikimedia-wiki}/api/rest_v1/page/mobile-html/' -export class WikimediaMobileRenderer extends Renderer { +export class WikimediaMobileRenderer extends MobileRenderer { constructor() { super() } @@ -37,7 +37,15 @@ export class WikimediaMobileRenderer extends Renderer { this.removeEditContainer, this.removeHiddenClass, async (doc) => { - const { finalHTML, subtitles, mediaDependencies } = await super.processHtml(doc.documentElement.outerHTML, dump, articleId, articleDetail, _moduleDependencies, webp) + const { finalHTML, subtitles, mediaDependencies } = await super.processHtml( + doc.documentElement.outerHTML, + dump, + articleId, + articleDetail, + _moduleDependencies, + webp, + super.templateMobileArticle.bind(this), + ) mediaDependenciesVal = mediaDependencies subtitlesVal = subtitles diff --git a/src/util/dump.ts b/src/util/dump.ts index 50ecaa83..26e59b58 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -90,7 +90,7 @@ export async function getAndProcessStylesheets(downloader: Downloader, links: Ar }) } -export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css' | 'mobileJs' | 'mobileCss') { +export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') { const replaceCodeByRegex = (sourceText, replaceMap: Map) => { let text: string replaceMap.forEach((textToReplace, regEx) => { @@ -118,15 +118,15 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: let apiParameterOnly let moduleApiUrl: string - if (type === 'js' || type === 'mobileJs') { + if (type === 'js') { apiParameterOnly = 'scripts' - } else if (type === 'css' || type === 'mobileCss') { + } else if (type === 'css') { apiParameterOnly = 'styles' } - if (type === 'js' || type === 'css') { + if (!module.includes('javascript/mobile') && !module.includes('css/mobile')) { moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`) - } else if (type === 'mobileJs' || type === 'mobileCss') { + } else { moduleApiUrl = encodeURI(`https:${module}`) } diff --git a/src/util/misc.ts b/src/util/misc.ts index 2216d42b..72c738c2 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -228,12 +228,6 @@ export function genHeaderScript(config: Config, js: string, articleId: string, s const path = isNodeModule(js) ? normalizeModule(js) : js return `` } -export function genPCSOverrideScript(js: string) { - return `` -} -export function genPCSCOverrideCSSLink(css: string) { - return `` -} export function genCanonicalLink(config: Config, webUrl: string, articleId: string) { return `` } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 0e41b40a..a9c45b57 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -234,8 +234,6 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() - const jsMobileModuleDependencies = new Set() - const cssMobileModuleDependencies = new Set() let jsConfigVars = '' let prevPercentProgress: string const { articleDetailXId } = RedisStore @@ -313,12 +311,6 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade for (const dep of _moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } - for (const dep of _moduleDependencies.mobileJsDependenciesList) { - jsMobileModuleDependencies.add(dep) - } - for (const dep of _moduleDependencies.mobileStyleDependenciesList) { - cssMobileModuleDependencies.add(dep) - } jsConfigVars = jsConfigVars || _moduleDependencies.jsConfigVars /* @@ -400,7 +392,5 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade return { jsModuleDependencies, cssModuleDependencies, - jsMobileModuleDependencies, - cssMobileModuleDependencies, } } From b315fc3f85ff214a9413d60ccb8b07c5536a90c9 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 3 Oct 2023 09:42:05 +0300 Subject: [PATCH 40/58] Optimize static files saving --- src/Dump.ts | 1 - src/MediaWiki.ts | 4 +- src/mwoffliner.lib.ts | 7 +-- src/renderers/visual-editor.renderer.ts | 2 +- src/renderers/wikimedia-desktop.renderer.ts | 1 - src/renderers/wikimedia-mobile.renderer.ts | 1 - src/util/builders/url/base.director.ts | 4 +- src/util/misc.ts | 51 +++++++------------- test/e2e/mobileRenderIntegrity.test.ts | 5 +- test/unit/builders/url/base.director.test.ts | 12 ++--- 10 files changed, 33 insertions(+), 55 deletions(-) diff --git a/src/Dump.ts b/src/Dump.ts index cbc1e278..f467bbea 100644 --- a/src/Dump.ts +++ b/src/Dump.ts @@ -29,7 +29,6 @@ interface DumpOpts { keepEmptyParagraphs: boolean tags?: string filenameDate: string - isMobileRenderer: boolean } export class Dump { diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 9780bd76..3c314791 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -183,8 +183,8 @@ class MediaWiki { this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopApiUrl = baseUrlDirector.buildDesktopApiUrl(this.#apiPath) - this.mobileApiUrl = baseUrlDirector.buildMobileApiUrl(this.#apiPath) + this.desktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) + this.mobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 29c46970..2afd24e6 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -339,7 +339,6 @@ async function execute(argv: any) { keepEmptyParagraphs, tags: customZimTags, filenameDate, - isMobileRenderer: false, }, { ...mwMetaData, mainPage }, customProcessor, @@ -401,12 +400,10 @@ async function execute(argv: any) { }) zimCreator.addArticle(scraperArticle) - logger.info('Copying Static Resource Files') - await saveStaticFiles(config, zimCreator) - - // TODO: refactor sequence, this only needed for mobile renderer logger.info('Copying Static PCS Override Files') await saveStaticPCSFiles(config, zimCreator) + logger.info('Copying Static Resource Files') + await saveStaticFiles(config, zimCreator) logger.info('Finding stylesheets to download') const stylesheetsToGet = await dump.getRelevantStylesheetUrls(downloader) diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index edece1db..fb406708 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -62,7 +62,7 @@ export class VisualEditorRenderer extends DesktopRenderer { articleDetail, _moduleDependencies, webp, - super.templateDesktopArticle(_moduleDependencies, articleId), + super.templateDesktopArticle.bind(this), ) result.push({ articleId, diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 0aaf4591..074f3e76 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -35,7 +35,6 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { public async render(renderOpts: RenderOpts): Promise { const result: RenderOutput = [] const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts - dump.opts.isMobileRenderer = false const articleDetail = await renderOpts.articleDetailXId.get(articleId) // Paginate when there are more than 200 subCategories diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 6250b44d..d464ab61 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -23,7 +23,6 @@ export class WikimediaMobileRenderer extends MobileRenderer { try { const result: RenderOutput = [] const { data, articleId, webp, _moduleDependencies, dump } = renderOpts - dump.opts.isMobileRenderer = true const articleDetail = await renderOpts.articleDetailXId.get(articleId) const displayTitle = this.getStrippedTitle(renderOpts) diff --git a/src/util/builders/url/base.director.ts b/src/util/builders/url/base.director.ts index d91c6f13..6006441b 100644 --- a/src/util/builders/url/base.director.ts +++ b/src/util/builders/url/base.director.ts @@ -21,14 +21,14 @@ export default class BaseURLDirector { .build(true, '/') } - buildDesktopApiUrl(path?: string) { + buildWikimediaDesktopApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/html') .build(true, '/') } - buildMobileApiUrl(path?: string) { + buildWikimediaMobileApiUrl(path?: string) { return urlBuilder .setDomain(this.baseDomain) .setPath(path ?? 'api/rest_v1/page/mobile-html') diff --git a/src/util/misc.ts b/src/util/misc.ts index 72c738c2..49a0c880 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -162,49 +162,32 @@ export function interpolateTranslationString(str: string, parameters: { [key: st return newString } -export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { - const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map(async (css) => { +function saveResourceFile(resource: string, type: 'css' | 'js', basePath: string, config: Config, zimCreator: ZimCreator) { + return async () => { try { - const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`)) - const article = new ZimArticle({ url: cssPath(css), data: cssCont, ns: '-' }) + const content = await readFilePromise(pathParser.resolve(__dirname, `../../res/${basePath}${resource}.${type}`)) + const article = new ZimArticle({ + url: type === 'css' ? cssPath(resource) : jsPath(resource), + data: content, + ns: '-', + }) zimCreator.addArticle(article) } catch (error) { - logger.warn(`Could not create ${css} file : ${error}`) + const fileType = type === 'css' ? (basePath.includes('pcs') ? 'style PCS override' : 'style') : 'script' + logger.warn(`Could not create ${fileType} ${resource} file : ${error}`) } - }) + } +} - const jsPromises = config.output.jsResources.map(async (js) => { - try { - const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`)) - const article = new ZimArticle({ url: jsPath(js), data: jsCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create ${js} file : ${error}`) - } - }) +export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { + const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map((css) => saveResourceFile(css, 'css', '', config, zimCreator)()) + const jsPromises = config.output.jsResources.map((js) => saveResourceFile(js, 'js', '', config, zimCreator)()) return Promise.all([...cssPromises, ...jsPromises]) } export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { - const pcsCssPromises = config.output.pcsCssResources.map(async (pcsCss) => { - try { - const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsCss}.css`)) - const article = new ZimArticle({ url: cssPath(pcsCss), data: cssCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create style PCS override ${pcsCss} file : ${error}`) - } - }) - - const pcsJsPromises = config.output.pcsJsResources.map(async (pcsJs) => { - try { - const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/pcs/${pcsJs}.js`)) - const article = new ZimArticle({ url: jsPath(pcsJs), data: jsCont, ns: '-' }) - zimCreator.addArticle(article) - } catch (error) { - logger.warn(`Could not create script PCS override ${pcsJs} file : ${error}`) - } - }) + const pcsCssPromises = config.output.pcsCssResources.map((pcsCss) => saveResourceFile(pcsCss, 'css', 'pcs/', config, zimCreator)()) + const pcsJsPromises = config.output.pcsJsResources.map((pcsJs) => saveResourceFile(pcsJs, 'js', 'pcs/', config, zimCreator)()) return Promise.all([...pcsCssPromises, ...pcsJsPromises]) } diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts index 0df9fc9d..2f03f270 100644 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -1,5 +1,6 @@ import 'dotenv/config.js' import * as mwoffliner from '../../src/mwoffliner.lib.js' +import * as logger from '../../src/Logger.js' import rimraf from 'rimraf' import { execa } from 'execa' import { jest } from '@jest/globals' @@ -34,8 +35,8 @@ async function getOutFiles(testId: string, articleList: string, mwUrl: string): const commonTreatmentTest = async (articleList: string, mwUrl: string) => { if (!zimcheckIsAvailable || !zimdumpIsAvailable) { const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - console.log(`${missingTool} not installed, skipping test`) - return + logger.error(`${missingTool} not installed, exiting test`) + process.exit(1) } const now = new Date() const testId = `mwo-test-${+now}` diff --git a/test/unit/builders/url/base.director.test.ts b/test/unit/builders/url/base.director.test.ts index 76a18810..f679d2f9 100644 --- a/test/unit/builders/url/base.director.test.ts +++ b/test/unit/builders/url/base.director.test.ts @@ -25,29 +25,29 @@ describe('BaseURLDirector', () => { }) }) - describe('buildMobileApiUrl', () => { + describe('buildWikimediaMobileApiUrl', () => { it('should return mobile rest URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildMobileApiUrl('api/rest_v2/page/mobile-html') + const url = baseUrlDirector.buildWikimediaMobileApiUrl('api/rest_v2/page/mobile-html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/mobile-html/') }) it('should return mobile rest URL with default path and trailing char', () => { - const url = baseUrlDirector.buildMobileApiUrl() + const url = baseUrlDirector.buildWikimediaMobileApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/mobile-html/') }) }) - describe('buildDesktopApiUrl', () => { + describe('buildWikimediaDesktopApiUrl', () => { it('should return a desktop URL with provided path and trailing char', () => { - const url = baseUrlDirector.buildDesktopApiUrl('api/rest_v2/page/html') + const url = baseUrlDirector.buildWikimediaDesktopApiUrl('api/rest_v2/page/html') expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v2/page/html/') }) it('should return a desktop URL with default path and trailing char', () => { - const url = baseUrlDirector.buildDesktopApiUrl() + const url = baseUrlDirector.buildWikimediaDesktopApiUrl() expect(url.href).toBe('https://en.m.wikipedia.com/api/rest_v1/page/html/') }) From 5320b56cb7eaf93e3ae01ab2abb7aea94d9635c7 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Tue, 3 Oct 2023 11:23:37 +0300 Subject: [PATCH 41/58] Update naming for mobile and desktop api across application --- src/Downloader.ts | 30 +++++++++---------- src/MediaWiki.ts | 26 ++++++++-------- src/util/builders/url/desktop.director.ts | 2 +- src/util/builders/url/mobile.director.ts | 2 +- .../builders/url/desktop.director.test.ts | 6 ++-- .../unit/builders/url/mobile.director.test.ts | 6 ++-- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index a0226d97..4db4532e 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -87,8 +87,8 @@ class Downloader { public arrayBufferRequestOptions: AxiosRequestConfig public jsonRequestOptions: AxiosRequestConfig public streamRequestOptions: AxiosRequestConfig - public mobileJsDependenciesList: string[] = [] - public mobileStyleDependenciesList: string[] = [] + public wikimediaMobileJsDependenciesList: string[] = [] + public wikimediaMobileStyleDependenciesList: string[] = [] private readonly uaString: string private activeRequests = 0 @@ -173,23 +173,23 @@ class Downloader { if (!forceRender) { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.desktopApiUrl.href }, + { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.mobileApiUrl.href }, + { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, ]) } else { switch (forceRender) { case 'WikimediaDesktop': if (MediaWiki.hasWikimediaDesktopApi()) { - this.baseUrl = MediaWiki.desktopApiUrl.href - this.baseUrlForMainPage = MediaWiki.desktopApiUrl.href + this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href + this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href break } break @@ -202,8 +202,8 @@ class Downloader { break case 'WikimediaMobile': if (MediaWiki.hasWikimediaMobileApi()) { - this.baseUrl = MediaWiki.mobileApiUrl.href - this.baseUrlForMainPage = MediaWiki.mobileApiUrl.href + this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href + this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href break } break @@ -697,15 +697,15 @@ class Downloader { jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view' // Download mobile page dependencies only once - if ((await MediaWiki.hasWikimediaMobileApi()) && this.mobileJsDependenciesList.length === 0 && this.mobileStyleDependenciesList.length === 0) { + if ((await MediaWiki.hasWikimediaMobileApi()) && this.wikimediaMobileJsDependenciesList.length === 0 && this.wikimediaMobileStyleDependenciesList.length === 0) { try { // TODO: An arbitrary title can be placed since all Wikimedia wikis have the same mobile offline resources const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}Test`) mobileModulesData.forEach((module: string) => { if (module.includes('javascript')) { - this.mobileJsDependenciesList.push(module.replace('//', '')) + this.wikimediaMobileJsDependenciesList.push(module.replace('//', '')) } else if (module.includes('css')) { - this.mobileStyleDependenciesList.push(module.replace('//', '')) + this.wikimediaMobileStyleDependenciesList.push(module.replace('//', '')) } }) } catch (err) { @@ -714,8 +714,8 @@ class Downloader { } return { jsConfigVars, - jsDependenciesList: jsDependenciesList.concat(this.mobileJsDependenciesList), - styleDependenciesList: styleDependenciesList.concat(this.mobileStyleDependenciesList), + jsDependenciesList: jsDependenciesList.concat(this.wikimediaMobileJsDependenciesList), + styleDependenciesList: styleDependenciesList.concat(this.wikimediaMobileStyleDependenciesList), } } diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 3c314791..60faa856 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -9,8 +9,8 @@ import semver from 'semver' import basicURLDirector from './util/builders/url/basic.director.js' import BaseURLDirector from './util/builders/url/base.director.js' import ApiURLDirector from './util/builders/url/api.director.js' -import DesktopURLDirector from './util/builders/url/desktop.director.js' -import MobileURLDirector from './util/builders/url/mobile.director.js' +import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js' +import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js' import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import { checkApiAvailability } from './util/mw-api.js' import { BLACKLISTED_NS } from './util/const.js' @@ -50,9 +50,9 @@ class MediaWiki { #apiActionPath: string #domain: string private apiUrlDirector: ApiURLDirector - private wikimediaDesktopUrlDirector: DesktopURLDirector - private wikimediaMobileUrlDirector: MobileURLDirector - private visualEditorURLDirector: VisualEditorURLDirector + private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector + private wikimediaMobileUrlDirector: WikimediaMobileURLDirector + private VisualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL public apiUrl: URL @@ -60,8 +60,8 @@ class MediaWiki { public _modulePathOpt: string // only for whiting to generate modulePath public mobileModulePath: string public webUrl: URL - public desktopApiUrl: URL - public mobileApiUrl: URL + public WikimediaDesktopApiUrl: URL + public WikimediaMobileApiUrl: URL #hasWikimediaDesktopApi: boolean | null #hasWikimediaMobileApi: boolean | null @@ -152,7 +152,7 @@ class MediaWiki { public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { - this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) + this.#hasVisualEditorApi = await checkApiAvailability(this.VisualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasVisualEditorApi } return this.#hasVisualEditorApi @@ -183,13 +183,13 @@ class MediaWiki { this.apiUrl = baseUrlDirector.buildURL(this.#apiActionPath) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.desktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) - this.mobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) + this.WikimediaDesktopApiUrl = baseUrlDirector.buildWikimediaDesktopApiUrl(this.#apiPath) + this.WikimediaMobileApiUrl = baseUrlDirector.buildWikimediaMobileApiUrl(this.#apiPath) this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt) this.mobileModulePath = baseUrlDirector.buildMobileModuleURL() - this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopApiUrl.href) - this.wikimediaMobileUrlDirector = new MobileURLDirector(this.mobileApiUrl.href) - this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) + this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.WikimediaDesktopApiUrl.href) + this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.WikimediaMobileApiUrl.href) + this.VisualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) } public async login(downloader: Downloader) { diff --git a/src/util/builders/url/desktop.director.ts b/src/util/builders/url/desktop.director.ts index 100163ae..2f157deb 100644 --- a/src/util/builders/url/desktop.director.ts +++ b/src/util/builders/url/desktop.director.ts @@ -3,7 +3,7 @@ import urlBuilder from './url.builder.js' /** * Interface to build URLs based on Downloader desktop URL */ -export default class DesktopURLDirector { +export default class WikimediaDesktopURLDirector { baseDomain: string constructor(baseDomain: string) { diff --git a/src/util/builders/url/mobile.director.ts b/src/util/builders/url/mobile.director.ts index 258b389e..d33dcf9e 100644 --- a/src/util/builders/url/mobile.director.ts +++ b/src/util/builders/url/mobile.director.ts @@ -3,7 +3,7 @@ import urlBuilder from './url.builder.js' /** * Interface to build URLs based on MediaWiki mobile URL */ -export default class MobileURLDirector { +export default class WikimediaMobileURLDirector { baseDomain: string constructor(baseDomain: string) { diff --git a/test/unit/builders/url/desktop.director.test.ts b/test/unit/builders/url/desktop.director.test.ts index 642a0735..5ca6b5d8 100644 --- a/test/unit/builders/url/desktop.director.test.ts +++ b/test/unit/builders/url/desktop.director.test.ts @@ -1,7 +1,7 @@ -import DesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js' +import WikimediaDesktopURLDirector from '../../../../src/util/builders/url/desktop.director.js' -describe('DesktopURLDirector', () => { - const wikimediaDesktopUrlDirector = new DesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/') +describe('WikimediaDesktopURLDirector', () => { + const wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/html/') describe('buildArticleURL', () => { it('should return the URL to retrieve a desktop article', () => { diff --git a/test/unit/builders/url/mobile.director.test.ts b/test/unit/builders/url/mobile.director.test.ts index d5f94a78..07dd29ae 100644 --- a/test/unit/builders/url/mobile.director.test.ts +++ b/test/unit/builders/url/mobile.director.test.ts @@ -1,7 +1,7 @@ -import MobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' +import WikimediaMobileURLDirector from '../../../../src/util/builders/url/mobile.director.js' -describe('MobileURLDirector', () => { - const mobuleUrlDirector = new MobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') +describe('WikimediaMobileURLDirector', () => { + const mobuleUrlDirector = new WikimediaMobileURLDirector('https://en.m.wikipedia.org/api/rest_v1/page/mobile-html/') describe('buildArticleURL', () => { it('should return a URL for retrieving mobile article', () => { From 07809d4a46590f551903438e2efe83c80970c059 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 10:18:28 +0300 Subject: [PATCH 42/58] Change PCS naming in favor of Wm Mobile --- res/templates/page.html | 6 +++--- .../wm_mobile_override_script.js} | 2 +- .../wm_mobile_override_style.css} | 0 src/config.ts | 4 ++-- src/mwoffliner.lib.ts | 6 +++--- src/renderers/abstractDesktop.render.ts | 4 ++-- src/renderers/abstractMobile.render.ts | 8 ++++---- src/util/misc.ts | 10 +++++----- test/unit/mwApi.test.ts | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) rename res/{pcs/pcs_override_script.js => wm_mobile/wm_mobile_override_script.js} (90%) rename res/{pcs/pcs_override_style.css => wm_mobile/wm_mobile_override_style.css} (100%) diff --git a/res/templates/page.html b/res/templates/page.html index 3e2574ed..0f831328 100644 --- a/res/templates/page.html +++ b/res/templates/page.html @@ -4,7 +4,7 @@ __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ - __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __PCS_CSS_OVERRIDE__ + __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __WM_MOBILE_CSS_OVERRIDE__
@@ -19,7 +19,7 @@

__ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ - __PCS_JS_OVERRIDE__ + __ARTICLE_JS_LIST__ + __WM_MOBILE_JS_OVERRIDE__ diff --git a/res/pcs/pcs_override_script.js b/res/wm_mobile/wm_mobile_override_script.js similarity index 90% rename from res/pcs/pcs_override_script.js rename to res/wm_mobile/wm_mobile_override_script.js index d39c7b2d..ce316eff 100644 --- a/res/pcs/pcs_override_script.js +++ b/res/wm_mobile/wm_mobile_override_script.js @@ -1,7 +1,7 @@ function importScript() { return 1 } // this is to avoid the error from site.js window.onload = function () { - // Check if there is a PCS output page + // Check if there is a Wikimedia mobile output page if (document.querySelector('#pcs')) { const supElements = document.querySelectorAll('sup'); const linkElements = document.querySelectorAll('a'); diff --git a/res/pcs/pcs_override_style.css b/res/wm_mobile/wm_mobile_override_style.css similarity index 100% rename from res/pcs/pcs_override_style.css rename to res/wm_mobile/wm_mobile_override_style.css diff --git a/src/config.ts b/src/config.ts index 92856fd1..2556710a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,10 +56,10 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], - pcsCssResources: ['pcs_override_style'], + wmMobileCssResources: ['wm_mobile_override_style'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], - pcsJsResources: ['pcs_override_script'], + mwMobileJsResources: ['wm_mobile_override_script'], // JS/CSS resources to be imported from MediaWiki mw: { diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 2afd24e6..5b46200a 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -37,7 +37,7 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, - saveStaticPCSFiles, + saveStaticWmMobileFiles, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -400,8 +400,8 @@ async function execute(argv: any) { }) zimCreator.addArticle(scraperArticle) - logger.info('Copying Static PCS Override Files') - await saveStaticPCSFiles(config, zimCreator) + logger.info('Copying Static Wikimedia Mobile Override Files') + await saveStaticWmMobileFiles(config, zimCreator) logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts index 169d1825..fb9a154c 100644 --- a/src/renderers/abstractDesktop.render.ts +++ b/src/renderers/abstractDesktop.render.ts @@ -38,8 +38,8 @@ export abstract class DesktopRenderer extends Renderer { ) .replace('__JS_SCRIPTS_MOBILE__', '') .replace('__CSS_LINKS_MOBILE__', '') - .replace('__PCS_CSS_OVERRIDE__', '') - .replace('__PCS_JS_OVERRIDE__', '') + .replace('__WM_MOBILE_CSS_OVERRIDE__', '') + .replace('__WM_MOBILE_JS_OVERRIDE__', '') const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index 426ffacb..c0fc2949 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -11,11 +11,11 @@ export abstract class MobileRenderer extends Renderer { super() } - private genPCSCOverrideCSSLink(css: string) { + private genWmMobileOverrideCSSLink(css: string) { return `` } - private genPCSOverrideScript(js: string) { + private genWmMobileOverrideScript(js: string) { return `` } @@ -45,8 +45,8 @@ export abstract class MobileRenderer extends Renderer { ? mobileCssModuleDependencies.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) - .replace('__PCS_CSS_OVERRIDE__', this.genPCSCOverrideCSSLink(config.output.pcsCssResources[0])) - .replace('__PCS_JS_OVERRIDE__', this.genPCSOverrideScript(config.output.pcsJsResources[0])) + .replace('__WM_MOBILE_CSS_OVERRIDE__', this.genWmMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) + .replace('__WM_MOBILE_JS_OVERRIDE__', this.genWmMobileOverrideScript(config.output.mwMobileJsResources[0])) const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/util/misc.ts b/src/util/misc.ts index 49a0c880..ec815d34 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -173,7 +173,7 @@ function saveResourceFile(resource: string, type: 'css' | 'js', basePath: string }) zimCreator.addArticle(article) } catch (error) { - const fileType = type === 'css' ? (basePath.includes('pcs') ? 'style PCS override' : 'style') : 'script' + const fileType = type === 'css' ? (basePath.includes('wm_mobile') ? 'style Wikimedia mobile override' : 'style') : 'script' logger.warn(`Could not create ${fileType} ${resource} file : ${error}`) } } @@ -185,10 +185,10 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { return Promise.all([...cssPromises, ...jsPromises]) } -export function saveStaticPCSFiles(config: Config, zimCreator: ZimCreator) { - const pcsCssPromises = config.output.pcsCssResources.map((pcsCss) => saveResourceFile(pcsCss, 'css', 'pcs/', config, zimCreator)()) - const pcsJsPromises = config.output.pcsJsResources.map((pcsJs) => saveResourceFile(pcsJs, 'js', 'pcs/', config, zimCreator)()) - return Promise.all([...pcsCssPromises, ...pcsJsPromises]) +export function saveStaticWmMobileFiles(config: Config, zimCreator: ZimCreator) { + const wmMobileCssPromises = config.output.wmMobileCssResources.map((wmMobileCss) => saveResourceFile(wmMobileCss, 'css', 'wm_mobile/', config, zimCreator)()) + const wmMobileJsPromises = config.output.mwMobileJsResources.map((wmMobileJs) => saveResourceFile(wmMobileJs, 'js', 'wm_mobile/', config, zimCreator)()) + return Promise.all([...wmMobileCssPromises, ...wmMobileJsPromises]) } export function cssPath(css: string, subDirectory = '') { diff --git a/test/unit/mwApi.test.ts b/test/unit/mwApi.test.ts index 9b73fadd..4c44f9b2 100644 --- a/test/unit/mwApi.test.ts +++ b/test/unit/mwApi.test.ts @@ -18,7 +18,7 @@ afterAll(stopRedis) const initMW = async (downloader: Downloader) => { await MediaWiki.getMwMetaData(downloader) await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopRestApi() + await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasVisualEditorApi() await MediaWiki.getNamespaces([], downloader) From 8867024013c1d2bd3dfc6b88c5fdc26d270de0dd Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 11:40:19 +0300 Subject: [PATCH 43/58] Add tests for the format params while using WikimediaMobile render --- test/e2e/mobileRenderFormatParams.test.ts | 126 ++++++++++++++++++++++ test/e2e/mobileRenderIntegrity.test.ts | 1 + 2 files changed, 127 insertions(+) create mode 100644 test/e2e/mobileRenderFormatParams.test.ts diff --git a/test/e2e/mobileRenderFormatParams.test.ts b/test/e2e/mobileRenderFormatParams.test.ts new file mode 100644 index 00000000..c34d5367 --- /dev/null +++ b/test/e2e/mobileRenderFormatParams.test.ts @@ -0,0 +1,126 @@ +import 'dotenv/config.js' +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import * as logger from '../../src/Logger.js' +import domino from 'domino' +import rimraf from 'rimraf' +import { execa } from 'execa' +import { jest } from '@jest/globals' +import { zimdumpAvailable, zimdump } from '../util.js' + +jest.setTimeout(200000) + +let zimdumpIsAvailable + +beforeAll(async () => { + zimdumpIsAvailable = await zimdumpAvailable() + if (!zimdumpIsAvailable) { + logger.error('Zimdump not installed, exiting test') + process.exit(1) + } +}) + +async function getOutFiles(testId: string, articleList: string, mwUrl: string, format?: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'mail@mail.com', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: 'WikimediaMobile', + format, + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +// TODO: articulate this test with /pull/1898 once merged +describe('Mobile render with multiple format params', () => { + const mwUrl = 'https://en.wikipedia.org' + + test('Test WikimediaMobile with en.wikipedia.org using format:nopic param', async () => { + const articleList = 'BMW' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nopic') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + + expect(imgElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:nodet param', async () => { + const articleList = 'BMW' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nodet') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const sectionsElements = Array.from(articleDoc.querySelectorAll('section')) + + expect(sectionsElements).toHaveLength(1) + expect(sectionsElements[0].getAttribute('data-mw-section-id')).toEqual('0') + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:novid param to check no video tags', async () => { + const articleList = 'Animation' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'novid') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const videoElements = Array.from(articleDoc.querySelectorAll('video')) + + expect(videoElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + test('Test WikimediaMobile render with en.wikipedia.org using format:novid param to check no audio tags', async () => { + const articleList = 'English_alphabet' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'novid') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const audioElements = Array.from(articleDoc.querySelectorAll('audio')) + + expect(audioElements).toHaveLength(0) + + rimraf.sync(`./${testId}`) + }) + + // TODO: secure nppdf format for all renders + test.skip('Test WikimediaMobile render with en.wikipedia.org using format:nopdf', async () => { + const articleList = 'PDF' + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(testId, articleList, mwUrl, 'nopdf') + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + const anchorElements = Array.from(articleDoc.querySelectorAll('a')) + + anchorElements.forEach((anchor) => { + expect(anchor.href).not.toContain('.pdf') + }) + + rimraf.sync(`./${testId}`) + }) +}) diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts index 2f03f270..8cb5d81a 100644 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ b/test/e2e/mobileRenderIntegrity.test.ts @@ -47,6 +47,7 @@ const commonTreatmentTest = async (articleList: string, mwUrl: string) => { rimraf.sync(`./${testId}`) } +// TODO: articulate this test with /pull/1898 once merged describe('Mobile render zim file integrity', () => { const mwUrl = 'https://en.wikipedia.org' // TODO: some articles such as 'Canada' don't pass this test even with desktop renderer From defd234f15257df0b9f899695a68d04644637c5e Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 15:20:15 +0300 Subject: [PATCH 44/58] Override image size for WikimediaMobile --- res/wm_mobile/wm_mobile_override_style.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/res/wm_mobile/wm_mobile_override_style.css b/res/wm_mobile/wm_mobile_override_style.css index c4d61088..3cbaef67 100644 --- a/res/wm_mobile/wm_mobile_override_style.css +++ b/res/wm_mobile/wm_mobile_override_style.css @@ -13,3 +13,8 @@ span.noviewer { .mw-body h3, .mw-body h2 { width: auto; } + +.thumbinner img.pcs-widen-image-override { + width: auto !important; + max-width: 100% !important; +} From ecf79447439492567015b929c7833f5d1da80725 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 16:12:49 +0300 Subject: [PATCH 45/58] Update placeholder in the test for nopdf param --- test/e2e/mobileRenderFormatParams.test.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/e2e/mobileRenderFormatParams.test.ts b/test/e2e/mobileRenderFormatParams.test.ts index c34d5367..826f2273 100644 --- a/test/e2e/mobileRenderFormatParams.test.ts +++ b/test/e2e/mobileRenderFormatParams.test.ts @@ -105,7 +105,6 @@ describe('Mobile render with multiple format params', () => { rimraf.sync(`./${testId}`) }) - // TODO: secure nppdf format for all renders test.skip('Test WikimediaMobile render with en.wikipedia.org using format:nopdf', async () => { const articleList = 'PDF' const now = new Date() @@ -117,8 +116,8 @@ describe('Mobile render with multiple format params', () => { const anchorElements = Array.from(articleDoc.querySelectorAll('a')) - anchorElements.forEach((anchor) => { - expect(anchor.href).not.toContain('.pdf') + anchorElements.forEach(() => { + // TODO: Check valid links to pdf source }) rimraf.sync(`./${testId}`) From 113ff0f39df6ae409b919cac1deea8e0c2d869d1 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 16:50:13 +0300 Subject: [PATCH 46/58] Force article body to be centered for mobile render --- res/wm_mobile/wm_mobile_override_style.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/res/wm_mobile/wm_mobile_override_style.css b/res/wm_mobile/wm_mobile_override_style.css index 3cbaef67..ab31918b 100644 --- a/res/wm_mobile/wm_mobile_override_style.css +++ b/res/wm_mobile/wm_mobile_override_style.css @@ -1,5 +1,5 @@ body { - margin: 0 auto; + margin: 0 auto !important; } p#pcs-edit-section-add-title-description { display: none !important; From 546cb7e0f9337b578cff074dc352ad80f1352631 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 5 Oct 2023 10:04:33 +0300 Subject: [PATCH 47/58] Refactor modules resolving logic for mobile renderer --- src/renderers/abstract.renderer.ts | 1 + src/renderers/abstractMobile.render.ts | 24 ++++++++------------- src/renderers/visual-editor.renderer.ts | 1 + src/renderers/wikimedia-desktop.renderer.ts | 1 + src/renderers/wikimedia-mobile.renderer.ts | 18 +++++++++++++++- src/util/const.ts | 2 +- src/util/saveArticles.ts | 8 +++---- test/unit/saveArticles.test.ts | 3 +++ 8 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 7ae75cdd..ca95ba9a 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -55,6 +55,7 @@ export interface RenderSingleOutput { displayTitle: string html: string mediaDependencies: any + moduleDependencies: any subtitles: any } diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index c0fc2949..f92913a9 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -11,22 +11,16 @@ export abstract class MobileRenderer extends Renderer { super() } - private genWmMobileOverrideCSSLink(css: string) { + private genWikimediaMobileOverrideCSSLink(css: string) { return `` } - private genWmMobileOverrideScript(js: string) { + private genWikimediaMobileOverrideScript(js: string) { return `` } public templateMobileArticle(moduleDependencies: any, articleId: string): Document { - const { jsDependenciesList, styleDependenciesList } = moduleDependencies as { - jsDependenciesList: string[] - styleDependenciesList: string[] - } - - const mobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) - const mobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) + const { jsDependenciesList, styleDependenciesList } = moduleDependencies const htmlTemplateString = htmlTemplateCode(articleId) .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) @@ -35,18 +29,18 @@ export abstract class MobileRenderer extends Renderer { .replace('__ARTICLE_CSS_LIST__', '') .replace( '__JS_SCRIPTS_MOBILE__', - mobileJsModuleDependencies.length !== 0 - ? mobileJsModuleDependencies.map((oneMobJsDep) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') + jsDependenciesList.length !== 0 + ? jsDependenciesList.map((oneMobJsDep: string) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) .replace( '__CSS_LINKS_MOBILE__', - mobileCssModuleDependencies.length !== 0 - ? mobileCssModuleDependencies.map((oneMobCssDep) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + styleDependenciesList.length !== 0 + ? styleDependenciesList.map((oneMobCssDep: string) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) - .replace('__WM_MOBILE_CSS_OVERRIDE__', this.genWmMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) - .replace('__WM_MOBILE_JS_OVERRIDE__', this.genWmMobileOverrideScript(config.output.mwMobileJsResources[0])) + .replace('__WM_MOBILE_CSS_OVERRIDE__', this.genWikimediaMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) + .replace('__WM_MOBILE_JS_OVERRIDE__', this.genWikimediaMobileOverrideScript(config.output.mwMobileJsResources[0])) const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index fb406708..aec30f86 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -69,6 +69,7 @@ export class VisualEditorRenderer extends DesktopRenderer { displayTitle, html: finalHTML, mediaDependencies, + moduleDependencies: _moduleDependencies, subtitles, }) return result diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 074f3e76..5dc9c665 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -61,6 +61,7 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { displayTitle: (strippedTitle || articleId.replace(/_/g, ' ')) + (i === 0 ? '' : `/${i}`), html: finalHTML, mediaDependencies, + moduleDependencies: _moduleDependencies, subtitles, }) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index d464ab61..7941df55 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -25,6 +25,21 @@ export class WikimediaMobileRenderer extends MobileRenderer { const { data, articleId, webp, _moduleDependencies, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as { + jsConfigVars: string + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const mobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) + const mobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) + + const wikimediaMobileModuleDependencies = { + jsConfigVars, + jsDependenciesList: mobileJsModuleDependencies, + styleDependenciesList: mobileCssModuleDependencies, + } + const displayTitle = this.getStrippedTitle(renderOpts) if (data) { let mediaDependenciesVal @@ -41,7 +56,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { dump, articleId, articleDetail, - _moduleDependencies, + wikimediaMobileModuleDependencies, webp, super.templateMobileArticle.bind(this), ) @@ -58,6 +73,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { displayTitle, html: finalHTMLMobile.documentElement.outerHTML, mediaDependencies: mediaDependenciesVal, + moduleDependencies: wikimediaMobileModuleDependencies, subtitles: subtitlesVal, }) return result diff --git a/src/util/const.ts b/src/util/const.ts index 6c511f56..b5985ec7 100644 --- a/src/util/const.ts +++ b/src/util/const.ts @@ -20,4 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/ export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js' export const MAX_FILE_DOWNLOAD_RETRIES = 5 export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853 -export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor'] +export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'WikimediaMobile'] diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index a9c45b57..df7a4da1 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -298,20 +298,20 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) - for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, subtitles } of rets) { + for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, subtitles } of rets) { if (!finalHTML) { logger.warn(`No HTML returned for article [${articleId}], skipping`) continue } curStage += 1 - for (const dep of _moduleDependencies.jsDependenciesList) { + for (const dep of moduleDependencies.jsDependenciesList) { jsModuleDependencies.add(dep) } - for (const dep of _moduleDependencies.styleDependenciesList) { + for (const dep of moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } - jsConfigVars = jsConfigVars || _moduleDependencies.jsConfigVars + jsConfigVars = jsConfigVars || moduleDependencies.jsConfigVars /* * getModuleDependencies and downloader.getArticle are diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 124ee2ed..e539f1fc 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -93,6 +93,9 @@ describe('saveArticles', () => { case 'WikimediaDesktop': rendererInstance = new WikimediaDesktopRenderer() break + case 'WikimediaMobile': + rendererInstance = new WikimediaDesktopRenderer() + break default: throw new Error(`Unknown renderer: ${renderer}`) } From d3027e11790ea1476acf2435a363d4435c11d5d0 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 5 Oct 2023 12:50:16 +0300 Subject: [PATCH 48/58] Create separate page templates for mobile and desktop --- .../{page.html => pageWikimediaDesktop.html} | 5 +- res/templates/pageWikimediaMobile.html | 24 +++++++++ src/Downloader.ts | 4 +- src/Templates.ts | 23 +++----- src/config.ts | 3 +- src/renderers/abstractDesktop.render.ts | 53 +++++++++++++------ src/renderers/abstractMobile.render.ts | 39 +++++++++----- src/renderers/visual-editor.renderer.ts | 5 +- src/renderers/wikimedia-desktop.renderer.ts | 6 ++- src/renderers/wikimedia-mobile.renderer.ts | 19 ++----- src/util/dump.ts | 2 - test/e2e/vikidia.e2e.test.ts | 2 +- 12 files changed, 113 insertions(+), 72 deletions(-) rename res/templates/{page.html => pageWikimediaDesktop.html} (83%) create mode 100644 res/templates/pageWikimediaMobile.html diff --git a/res/templates/page.html b/res/templates/pageWikimediaDesktop.html similarity index 83% rename from res/templates/page.html rename to res/templates/pageWikimediaDesktop.html index 0f831328..b2141299 100644 --- a/res/templates/page.html +++ b/res/templates/pageWikimediaDesktop.html @@ -4,7 +4,7 @@ __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ - __CSS_LINKS__ __JS_SCRIPTS__ __CSS_LINKS_MOBILE__ __JS_SCRIPTS_MOBILE__ __WM_MOBILE_CSS_OVERRIDE__ + __CSS_LINKS__ __JS_SCRIPTS__
@@ -19,7 +19,6 @@

__ARTICLE_CONFIGVARS_LIST__ - __ARTICLE_JS_LIST__ - __WM_MOBILE_JS_OVERRIDE__ + __ARTICLE_JS_LIST__ diff --git a/res/templates/pageWikimediaMobile.html b/res/templates/pageWikimediaMobile.html new file mode 100644 index 00000000..79d43efc --- /dev/null +++ b/res/templates/pageWikimediaMobile.html @@ -0,0 +1,24 @@ + + + + + + __ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_LIST__ + __CSS_LINKS__ __ARTICLE_JS_LIST__ + + +
+
+
+ +
+

+
+
+
+
+
+ __ARTICLE_CONFIGVARS_LIST__ + __JS_SCRIPTS__ + + diff --git a/src/Downloader.ts b/src/Downloader.ts index 4db4532e..afb05357 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -703,9 +703,9 @@ class Downloader { const mobileModulesData = await this.getJSON(`${MediaWiki.mobileModulePath}Test`) mobileModulesData.forEach((module: string) => { if (module.includes('javascript')) { - this.wikimediaMobileJsDependenciesList.push(module.replace('//', '')) + this.wikimediaMobileJsDependenciesList.push(module) } else if (module.includes('css')) { - this.wikimediaMobileStyleDependenciesList.push(module.replace('//', '')) + this.wikimediaMobileStyleDependenciesList.push(module) } }) } catch (err) { diff --git a/src/Templates.ts b/src/Templates.ts index e947c6ac..ff083684 100644 --- a/src/Templates.ts +++ b/src/Templates.ts @@ -2,7 +2,6 @@ import swig from 'swig-templates' import pathParser from 'path' import { config } from './config.js' import { readFileSync } from 'fs' -import { genHeaderCSSLink, genHeaderScript } from './util/index.js' import * as path from 'path' import { fileURLToPath } from 'url' @@ -22,21 +21,12 @@ const categoriesTemplate = swig.compile(readTemplate(config.output.templates.cat const subCategoriesTemplate = swig.compile(readTemplate(config.output.templates.subCategories)) const subPagesTemplate = swig.compile(readTemplate(config.output.templates.subPages)) -const htmlTemplateCode = (articleId: string) => { - const cssLinks = config.output.cssResources.reduce((buf, css) => { - return buf + genHeaderCSSLink(config, css, articleId) - }, '') - - const jsScripts = config.output.jsResources.reduce((buf, js) => { - return ( - buf + - (js === 'script' - ? genHeaderScript(config, js, articleId, '', `data-article-id="${articleId.replace(/"/g, '\\\\"')}" id="script-js"`) - : genHeaderScript(config, js, articleId)) - ) - }, '') +const htmlWikimediaMobileTemplateCode = () => { + return readTemplate(config.output.templates.pageWikimediaMobile) +} - return readTemplate(config.output.templates.page).replace('__CSS_LINKS__', cssLinks).replace('__JS_SCRIPTS__', jsScripts) +const htmlWikimediaDesktopTemplateCode = () => { + return readTemplate(config.output.templates.pageWikimediaDesktop) } const articleListHomeTemplate = readTemplate(config.output.templates.articleListHomeTemplate) @@ -46,7 +36,8 @@ export { leadSectionTemplate, sectionTemplate, subSectionTemplate, - htmlTemplateCode, + htmlWikimediaMobileTemplateCode, + htmlWikimediaDesktopTemplateCode, articleListHomeTemplate, categoriesTemplate, subCategoriesTemplate, diff --git a/src/config.ts b/src/config.ts index 2556710a..8ac7aceb 100644 --- a/src/config.ts +++ b/src/config.ts @@ -103,7 +103,8 @@ const config = { * __ARTICLE_CSS_LIST__ ==> list of link tags linking to all the css modules dependencies * __CSS_LINKS__ ==> list of link tags for config.output.cssResources */ - page: './templates/page.html', + pageWikimediaDesktop: './templates/pageWikimediaDesktop.html', + pageWikimediaMobile: './templates/pageWikimediaMobile.html', categories: './templates/categories.html', diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts index fb9a154c..3a59f859 100644 --- a/src/renderers/abstractDesktop.render.ts +++ b/src/renderers/abstractDesktop.render.ts @@ -3,7 +3,7 @@ import { Renderer } from './abstract.renderer.js' import { config } from '../config.js' import MediaWiki from '../MediaWiki.js' -import { htmlTemplateCode } from '../Templates.js' +import { htmlWikimediaDesktopTemplateCode } from '../Templates.js' import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' export abstract class DesktopRenderer extends Renderer { @@ -11,35 +11,58 @@ export abstract class DesktopRenderer extends Renderer { super() } + public filterWikimediaDesktopModules(_moduleDependencies) { + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as { + jsConfigVars: string + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const wikimediaDesktopJsModuleDependencies = jsDependenciesList.filter((item) => !item.includes('javascript/mobile')) + const wikimediaDesktopCssModuleDependencies = styleDependenciesList.filter((item) => !item.includes('css/mobile')) + + const wikimediaDesktopModuleDependencies = { + jsConfigVars, + jsDependenciesList: wikimediaDesktopJsModuleDependencies, + styleDependenciesList: wikimediaDesktopCssModuleDependencies, + } + + return wikimediaDesktopModuleDependencies + } + public templateDesktopArticle(moduleDependencies: any, articleId: string): Document { const { jsConfigVars, jsDependenciesList, styleDependenciesList } = moduleDependencies as { - jsConfigVars: string | RegExpExecArray + jsConfigVars jsDependenciesList: string[] styleDependenciesList: string[] } - const desktopJsModuleDependencies = jsDependenciesList.filter((item) => !item.includes('javascript/mobile')) - const desktopCssModuleDependencies = styleDependenciesList.filter((item) => !item.includes('css/mobile')) + const cssLinks = config.output.cssResources.reduce((buf, css) => { + return buf + genHeaderCSSLink(config, css, articleId) + }, '') + + const jsScripts = config.output.jsResources.reduce((buf, js) => { + return ( + buf + + (js === 'script' + ? genHeaderScript(config, js, articleId, '', `data-article-id="${articleId.replace(/"/g, '\\\\"')}" id="script-js"`) + : genHeaderScript(config, js, articleId)) + ) + }, '') - const htmlTemplateString = htmlTemplateCode(articleId) + const htmlTemplateString = htmlWikimediaDesktopTemplateCode() + .replace('__CSS_LINKS__', cssLinks) + .replace('__JS_SCRIPTS__', jsScripts) .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) .replace('__ARTICLE_CONFIGVARS_LIST__', jsConfigVars !== '' ? genHeaderScript(config, 'jsConfigVars', articleId, config.output.dirs.mediawiki) : '') .replace( '__ARTICLE_JS_LIST__', - desktopJsModuleDependencies.length !== 0 - ? desktopJsModuleDependencies.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', + jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) .replace( '__ARTICLE_CSS_LIST__', - desktopCssModuleDependencies.length !== 0 - ? desktopCssModuleDependencies.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', + styleDependenciesList.length !== 0 ? styleDependenciesList.map((oneCssDep) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) - .replace('__JS_SCRIPTS_MOBILE__', '') - .replace('__CSS_LINKS_MOBILE__', '') - .replace('__WM_MOBILE_CSS_OVERRIDE__', '') - .replace('__WM_MOBILE_JS_OVERRIDE__', '') const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index f92913a9..c8e6ce4f 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -3,7 +3,7 @@ import { Renderer } from './abstract.renderer.js' import { config } from '../config.js' import MediaWiki from '../MediaWiki.js' -import { htmlTemplateCode } from '../Templates.js' +import { htmlWikimediaMobileTemplateCode } from '../Templates.js' import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' export abstract class MobileRenderer extends Renderer { @@ -11,6 +11,25 @@ export abstract class MobileRenderer extends Renderer { super() } + public filterWikimediaMobileModules(_moduleDependencies) { + const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as { + jsConfigVars: string + jsDependenciesList: string[] + styleDependenciesList: string[] + } + + const wikimediaMobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) + const wikimediaMobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) + + const wikimediaMobileModuleDependencies = { + jsConfigVars, + jsDependenciesList: wikimediaMobileJsModuleDependencies, + styleDependenciesList: wikimediaMobileCssModuleDependencies, + } + + return wikimediaMobileModuleDependencies + } + private genWikimediaMobileOverrideCSSLink(css: string) { return `` } @@ -22,25 +41,21 @@ export abstract class MobileRenderer extends Renderer { public templateMobileArticle(moduleDependencies: any, articleId: string): Document { const { jsDependenciesList, styleDependenciesList } = moduleDependencies - const htmlTemplateString = htmlTemplateCode(articleId) + const htmlTemplateString = htmlWikimediaMobileTemplateCode() .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) .replace('__ARTICLE_CONFIGVARS_LIST__', '') - .replace('__ARTICLE_JS_LIST__', '') - .replace('__ARTICLE_CSS_LIST__', '') + .replace('__JS_SCRIPTS__', this.genWikimediaMobileOverrideScript(config.output.mwMobileJsResources[0])) + .replace('__CSS_LINKS__', this.genWikimediaMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) .replace( - '__JS_SCRIPTS_MOBILE__', - jsDependenciesList.length !== 0 - ? jsDependenciesList.map((oneMobJsDep: string) => genHeaderScript(config, oneMobJsDep, articleId, config.output.dirs.mediawiki)).join('\n') - : '', + '__ARTICLE_JS_LIST__', + jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep: string) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) .replace( - '__CSS_LINKS_MOBILE__', + '__ARTICLE_CSS_LIST__', styleDependenciesList.length !== 0 - ? styleDependenciesList.map((oneMobCssDep: string) => genHeaderCSSLink(config, oneMobCssDep, articleId, config.output.dirs.mediawiki)).join('\n') + ? styleDependenciesList.map((oneCssDep: string) => genHeaderCSSLink(config, oneCssDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', ) - .replace('__WM_MOBILE_CSS_OVERRIDE__', this.genWikimediaMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) - .replace('__WM_MOBILE_JS_OVERRIDE__', this.genWikimediaMobileOverrideScript(config.output.mwMobileJsResources[0])) const htmlTemplateDoc = domino.createDocument(htmlTemplateString) return htmlTemplateDoc diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index aec30f86..0330b875 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -53,6 +53,7 @@ export class VisualEditorRenderer extends DesktopRenderer { try { const result: RenderOutput = [] const { articleId, articleDetail, webp, _moduleDependencies, dump } = renderOpts + const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) const { html, displayTitle } = await this.retrieveHtml(renderOpts) if (html) { const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( @@ -60,7 +61,7 @@ export class VisualEditorRenderer extends DesktopRenderer { dump, articleId, articleDetail, - _moduleDependencies, + moduleDependenciesFiltered, webp, super.templateDesktopArticle.bind(this), ) @@ -69,7 +70,7 @@ export class VisualEditorRenderer extends DesktopRenderer { displayTitle, html: finalHTML, mediaDependencies, - moduleDependencies: _moduleDependencies, + moduleDependencies: moduleDependenciesFiltered, subtitles, }) return result diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 5dc9c665..f91dbce2 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -37,6 +37,8 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) + const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) + // Paginate when there are more than 200 subCategories const numberOfPagesToSplitInto = Math.max(Math.ceil((articleDetail.subCategories || []).length / 200), 1) @@ -51,7 +53,7 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { dump, articleId, articleDetail, - _moduleDependencies, + moduleDependenciesFiltered, webp, super.templateDesktopArticle.bind(this), ) @@ -61,7 +63,7 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { displayTitle: (strippedTitle || articleId.replace(/_/g, ' ')) + (i === 0 ? '' : `/${i}`), html: finalHTML, mediaDependencies, - moduleDependencies: _moduleDependencies, + moduleDependencies: moduleDependenciesFiltered, subtitles, }) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 7941df55..a1cd31cf 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -25,20 +25,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { const { data, articleId, webp, _moduleDependencies, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) - const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as { - jsConfigVars: string - jsDependenciesList: string[] - styleDependenciesList: string[] - } - - const mobileJsModuleDependencies = jsDependenciesList.filter((item) => item.includes('javascript/mobile')) - const mobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) - - const wikimediaMobileModuleDependencies = { - jsConfigVars, - jsDependenciesList: mobileJsModuleDependencies, - styleDependenciesList: mobileCssModuleDependencies, - } + const moduleDependenciesFiltered = super.filterWikimediaMobileModules(_moduleDependencies) const displayTitle = this.getStrippedTitle(renderOpts) if (data) { @@ -56,7 +43,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { dump, articleId, articleDetail, - wikimediaMobileModuleDependencies, + moduleDependenciesFiltered, webp, super.templateMobileArticle.bind(this), ) @@ -73,7 +60,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { displayTitle, html: finalHTMLMobile.documentElement.outerHTML, mediaDependencies: mediaDependenciesVal, - moduleDependencies: wikimediaMobileModuleDependencies, + moduleDependencies: moduleDependenciesFiltered, subtitles: subtitlesVal, }) return result diff --git a/src/util/dump.ts b/src/util/dump.ts index 26e59b58..0a396318 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -151,8 +151,6 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: const pathFunctions = { js: jsPath, css: cssPath, - mobileJs: jsPath, - mobileCss: cssPath, } const pathFunction = pathFunctions[type] diff --git a/test/e2e/vikidia.e2e.test.ts b/test/e2e/vikidia.e2e.test.ts index 30a7f684..21ccf233 100644 --- a/test/e2e/vikidia.e2e.test.ts +++ b/test/e2e/vikidia.e2e.test.ts @@ -18,7 +18,7 @@ describe('vikidia', () => { redis: process.env.REDIS, articleList: 'Alaska', customZimDescription: 'Alaska article', - forcdRender: 'WikimediaDesktop', + forceRender: 'WikimediaDesktop', } test('right scrapping from vikidia.org', async () => { From b1e7737dc3e28a79c820612dd07a253afce558cb Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 6 Oct 2023 10:18:41 +0300 Subject: [PATCH 49/58] Add error handling for empty data in WikimediaMobile render --- src/renderers/wikimedia-mobile.renderer.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index a1cd31cf..1d681b31 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -64,6 +64,8 @@ export class WikimediaMobileRenderer extends MobileRenderer { subtitles: subtitlesVal, }) return result + } else { + throw new Error(`No data received for ${displayTitle}`) } } catch (err) { logger.error(err.message) From 3b5398280ae5ebc8a5818a19762b763e4f1bd4a3 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 6 Oct 2023 14:47:57 +0300 Subject: [PATCH 50/58] Decrease image size in mobile render, secure tests --- src/renderers/visual-editor.renderer.ts | 2 +- src/renderers/wikimedia-mobile.renderer.ts | 36 ++++++++++++++++++---- test/e2e/en.e2e.test.ts | 2 +- test/e2e/vikidia.e2e.test.ts | 1 - 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index 0330b875..9868f195 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -53,9 +53,9 @@ export class VisualEditorRenderer extends DesktopRenderer { try { const result: RenderOutput = [] const { articleId, articleDetail, webp, _moduleDependencies, dump } = renderOpts - const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) const { html, displayTitle } = await this.retrieveHtml(renderOpts) if (html) { + const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) const { finalHTML, mediaDependencies, subtitles } = await super.processHtml( html, dump, diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 1d681b31..de13421b 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -25,16 +25,15 @@ export class WikimediaMobileRenderer extends MobileRenderer { const { data, articleId, webp, _moduleDependencies, dump } = renderOpts const articleDetail = await renderOpts.articleDetailXId.get(articleId) - const moduleDependenciesFiltered = super.filterWikimediaMobileModules(_moduleDependencies) - const displayTitle = this.getStrippedTitle(renderOpts) if (data) { + const moduleDependenciesFiltered = super.filterWikimediaMobileModules(_moduleDependencies) let mediaDependenciesVal let subtitlesVal const mobileHTML = domino.createDocument(data) const finalHTMLMobile = await this.pipeMobileTransformations( mobileHTML, - this.convertLazyLoadToImages, + this.convertLazyLoadToImages.bind(this), this.removeEditContainer, this.removeHiddenClass, async (doc) => { @@ -102,18 +101,43 @@ export class WikimediaMobileRenderer extends MobileRenderer { // Set the attributes for the img element based on the data attributes in the span img.src = protocol + span.getAttribute('data-src') img.setAttribute('decoding', 'async') - img.setAttribute('data-file-width', span.getAttribute('data-data-file-width')) - img.setAttribute('data-file-height', span.getAttribute('data-data-file-height')) img.setAttribute('data-file-type', 'bitmap') img.width = span.getAttribute('data-width') img.height = span.getAttribute('data-height') - img.setAttribute('srcset', `${protocol}${span.getAttribute('data-srcset')}`) img.className = span.getAttribute('data-class') // Replace the span with the img element span.parentNode.replaceChild(img, span) }) + doc = this.resizeMobileImages(doc) + + return doc + } + + private resizeMobileImages(doc: DominoElement) { + const mobileImageWidth = 420 + const imageWidthPattern = /(\.jpg\/|\.png\/|\.svg\/|\.gif\/)(\d+)px/ + + // Directly filter images hosted on Commons wiki + const imgs: NodeList = doc.querySelectorAll('img[src*="/commons/"]') + + imgs.forEach((img: DominoElement) => { + const imgWidth = img.getAttribute('width') + const imgHeight = img.getAttribute('height') + const imgSrc = img.getAttribute('src') + const imageWidthMatchSrc = imgSrc.match(imageWidthPattern) + + if (imgWidth && imgWidth > mobileImageWidth && imageWidthMatchSrc) { + const heightScaleFactor = Math.round((imgWidth / imgHeight) * 100) / 100 + const newImgSrc = imgSrc.replace(imageWidthMatchSrc[2], mobileImageWidth.toString()) + + img.setAttribute('src', newImgSrc) + img.setAttribute('width', mobileImageWidth.toString()) + img.setAttribute('height', Math.round(mobileImageWidth / heightScaleFactor).toString()) + } + }) + return doc } diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index 19f66928..ef1efd8f 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -28,7 +28,7 @@ await testAllRenders(mwUrl, articleList, format, async (outFiles) => { describe('e2e test for en.wikipedia.org', () => { const articleDoc = domino.createDocument(articleFromDump) test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => { - expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + expect(articleDoc.querySelector('h1.article-header, h1.pcs-edit-section-title')).toBeTruthy() }) test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => { const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) diff --git a/test/e2e/vikidia.e2e.test.ts b/test/e2e/vikidia.e2e.test.ts index 21ccf233..694d15f3 100644 --- a/test/e2e/vikidia.e2e.test.ts +++ b/test/e2e/vikidia.e2e.test.ts @@ -18,7 +18,6 @@ describe('vikidia', () => { redis: process.env.REDIS, articleList: 'Alaska', customZimDescription: 'Alaska article', - forceRender: 'WikimediaDesktop', } test('right scrapping from vikidia.org', async () => { From 79d5acf8d709be59cdf3095e78fa51327ec10cd7 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 6 Oct 2023 16:25:11 +0300 Subject: [PATCH 51/58] Don't save jsConfigVars for mobile render --- src/renderers/abstractMobile.render.ts | 4 +--- src/util/saveArticles.ts | 9 ++++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index c8e6ce4f..7a25343f 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -12,8 +12,7 @@ export abstract class MobileRenderer extends Renderer { } public filterWikimediaMobileModules(_moduleDependencies) { - const { jsConfigVars, jsDependenciesList, styleDependenciesList } = _moduleDependencies as { - jsConfigVars: string + const { jsDependenciesList, styleDependenciesList } = _moduleDependencies as { jsDependenciesList: string[] styleDependenciesList: string[] } @@ -22,7 +21,6 @@ export abstract class MobileRenderer extends Renderer { const wikimediaMobileCssModuleDependencies = styleDependenciesList.filter((item) => item.includes('css/mobile')) const wikimediaMobileModuleDependencies = { - jsConfigVars, jsDependenciesList: wikimediaMobileJsModuleDependencies, styleDependenciesList: wikimediaMobileCssModuleDependencies, } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index df7a4da1..3225a060 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -311,7 +311,8 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade for (const dep of moduleDependencies.styleDependenciesList) { cssModuleDependencies.add(dep) } - jsConfigVars = jsConfigVars || moduleDependencies.jsConfigVars + + jsConfigVars = moduleDependencies.jsConfigVars || '' /* * getModuleDependencies and downloader.getArticle are @@ -386,8 +387,10 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade logger.log(`Done with downloading a total of [${articlesTotal}] articles`) - const jsConfigVarArticle = new ZimArticle({ url: jsPath('jsConfigVars', config.output.dirs.mediawiki), data: jsConfigVars, ns: '-' }) - zimCreator.addArticle(jsConfigVarArticle) + if (jsConfigVars) { + const jsConfigVarArticle = new ZimArticle({ url: jsPath('jsConfigVars', config.output.dirs.mediawiki), data: jsConfigVars, ns: '-' }) + zimCreator.addArticle(jsConfigVarArticle) + } return { jsModuleDependencies, From 5ea91e808a0eb4411a6cb8c267362362a51fd7fe Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 6 Oct 2023 17:15:22 +0300 Subject: [PATCH 52/58] Throw an error if data no present for WikimediaDesktop render --- src/renderers/wikimedia-desktop.renderer.ts | 5 +++++ src/renderers/wikimedia-mobile.renderer.ts | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index f91dbce2..2a8b3d68 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -35,6 +35,11 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { public async render(renderOpts: RenderOpts): Promise { const result: RenderOutput = [] const { data, articleId, articleDetailXId, webp, _moduleDependencies, isMainPage, dump } = renderOpts + + if (!data) { + throw new Error(`Cannot render [${data}] into an article`) + } + const articleDetail = await renderOpts.articleDetailXId.get(articleId) const moduleDependenciesFiltered = super.filterWikimediaDesktopModules(_moduleDependencies) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index de13421b..b82f43e5 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -64,7 +64,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { }) return result } else { - throw new Error(`No data received for ${displayTitle}`) + throw new Error(`Cannot render [${data}] into an article`) } } catch (err) { logger.error(err.message) From 99a4edacab832cd2801243c0f087e5be0f5f9f50 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 9 Oct 2023 16:15:02 +0300 Subject: [PATCH 53/58] Update image optimization for mobile render --- src/renderers/wikimedia-mobile.renderer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index b82f43e5..e46105c9 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -116,8 +116,8 @@ export class WikimediaMobileRenderer extends MobileRenderer { } private resizeMobileImages(doc: DominoElement) { - const mobileImageWidth = 420 - const imageWidthPattern = /(\.jpg\/|\.png\/|\.svg\/|\.gif\/)(\d+)px/ + const mobileImageWidth = 375 + const imageWidthPattern = /(\.jpg\/|\.png\/|\.svg\/|\.gif\/)(\d+)px/i // Directly filter images hosted on Commons wiki const imgs: NodeList = doc.querySelectorAll('img[src*="/commons/"]') From 17a50c0ed8fa8e493b7772c48d11ebe48b13c2a6 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 9 Oct 2023 16:48:15 +0300 Subject: [PATCH 54/58] Update naming and minor refactoring of resizeMobileImages() method --- src/config.ts | 2 +- src/mwoffliner.lib.ts | 4 ++-- src/renderers/abstractMobile.render.ts | 2 +- src/renderers/wikimedia-mobile.renderer.ts | 4 ---- src/util/misc.ts | 10 ++++++---- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/config.ts b/src/config.ts index 8ac7aceb..27223528 100644 --- a/src/config.ts +++ b/src/config.ts @@ -56,7 +56,7 @@ const config = { // CSS resources added by Kiwix cssResources: ['style', 'content.parsoid', 'inserted_style'], mainPageCssResources: ['mobile_main_page'], - wmMobileCssResources: ['wm_mobile_override_style'], + wikimediaMobileCssResources: ['wm_mobile_override_style'], jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], mwMobileJsResources: ['wm_mobile_override_script'], diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 5b46200a..12afdcb6 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -37,7 +37,7 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, - saveStaticWmMobileFiles, + saveStaticWikimediaMobileFiles, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -401,7 +401,7 @@ async function execute(argv: any) { zimCreator.addArticle(scraperArticle) logger.info('Copying Static Wikimedia Mobile Override Files') - await saveStaticWmMobileFiles(config, zimCreator) + await saveStaticWikimediaMobileFiles(config, zimCreator) logger.info('Copying Static Resource Files') await saveStaticFiles(config, zimCreator) diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index 7a25343f..41a3b0ba 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -43,7 +43,7 @@ export abstract class MobileRenderer extends Renderer { .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) .replace('__ARTICLE_CONFIGVARS_LIST__', '') .replace('__JS_SCRIPTS__', this.genWikimediaMobileOverrideScript(config.output.mwMobileJsResources[0])) - .replace('__CSS_LINKS__', this.genWikimediaMobileOverrideCSSLink(config.output.wmMobileCssResources[0])) + .replace('__CSS_LINKS__', this.genWikimediaMobileOverrideCSSLink(config.output.wikimediaMobileCssResources[0])) .replace( '__ARTICLE_JS_LIST__', jsDependenciesList.length !== 0 ? jsDependenciesList.map((oneJsDep: string) => genHeaderScript(config, oneJsDep, articleId, config.output.dirs.mediawiki)).join('\n') : '', diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index e46105c9..0e5da4cd 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -101,10 +101,6 @@ export class WikimediaMobileRenderer extends MobileRenderer { // Set the attributes for the img element based on the data attributes in the span img.src = protocol + span.getAttribute('data-src') img.setAttribute('decoding', 'async') - img.setAttribute('data-file-type', 'bitmap') - img.width = span.getAttribute('data-width') - img.height = span.getAttribute('data-height') - img.className = span.getAttribute('data-class') // Replace the span with the img element span.parentNode.replaceChild(img, span) diff --git a/src/util/misc.ts b/src/util/misc.ts index ec815d34..05350def 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -185,10 +185,12 @@ export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { return Promise.all([...cssPromises, ...jsPromises]) } -export function saveStaticWmMobileFiles(config: Config, zimCreator: ZimCreator) { - const wmMobileCssPromises = config.output.wmMobileCssResources.map((wmMobileCss) => saveResourceFile(wmMobileCss, 'css', 'wm_mobile/', config, zimCreator)()) - const wmMobileJsPromises = config.output.mwMobileJsResources.map((wmMobileJs) => saveResourceFile(wmMobileJs, 'js', 'wm_mobile/', config, zimCreator)()) - return Promise.all([...wmMobileCssPromises, ...wmMobileJsPromises]) +export function saveStaticWikimediaMobileFiles(config: Config, zimCreator: ZimCreator) { + const wikimediaMobileCssPromises = config.output.wikimediaMobileCssResources.map((wikimediaMobileCss) => + saveResourceFile(wikimediaMobileCss, 'css', 'wm_mobile/', config, zimCreator)(), + ) + const wikimediaMobileJsPromises = config.output.mwMobileJsResources.map((wikimediaMobileJs) => saveResourceFile(wikimediaMobileJs, 'js', 'wm_mobile/', config, zimCreator)()) + return Promise.all([...wikimediaMobileCssPromises, ...wikimediaMobileJsPromises]) } export function cssPath(css: string, subDirectory = '') { From dbc3116cded996af1de25ca6c9c7aa0f3b52681a Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 9 Oct 2023 17:56:06 +0300 Subject: [PATCH 55/58] Update attributes for lazy loaded images of mobile render --- src/renderers/wikimedia-mobile.renderer.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 0e5da4cd..b99ed5eb 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -101,6 +101,9 @@ export class WikimediaMobileRenderer extends MobileRenderer { // Set the attributes for the img element based on the data attributes in the span img.src = protocol + span.getAttribute('data-src') img.setAttribute('decoding', 'async') + img.width = span.getAttribute('data-width') + img.height = span.getAttribute('data-height') + img.className = span.getAttribute('data-class') // Replace the span with the img element span.parentNode.replaceChild(img, span) From 61d776829cec14d9677caa01a18b5780ed942600 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Sun, 15 Oct 2023 21:13:18 +0300 Subject: [PATCH 56/58] Move static files list preparation into renders (partial impl) --- .../wm_mobile_override_script.js | 0 .../wm_mobile_override_style.css | 0 src/mwoffliner.lib.ts | 11 ++---- src/renderers/abstract.renderer.ts | 1 + src/renderers/abstractDesktop.render.ts | 3 ++ src/renderers/abstractMobile.render.ts | 3 ++ src/renderers/visual-editor.renderer.ts | 1 + src/renderers/wikimedia-desktop.renderer.ts | 1 + src/renderers/wikimedia-mobile.renderer.ts | 31 +--------------- src/util/misc.ts | 37 ++++++------------- src/util/saveArticles.ts | 8 +++- 11 files changed, 34 insertions(+), 62 deletions(-) rename res/{wm_mobile => }/wm_mobile_override_script.js (100%) rename res/{wm_mobile => }/wm_mobile_override_style.css (100%) diff --git a/res/wm_mobile/wm_mobile_override_script.js b/res/wm_mobile_override_script.js similarity index 100% rename from res/wm_mobile/wm_mobile_override_script.js rename to res/wm_mobile_override_script.js diff --git a/res/wm_mobile/wm_mobile_override_style.css b/res/wm_mobile_override_style.css similarity index 100% rename from res/wm_mobile/wm_mobile_override_style.css rename to res/wm_mobile_override_style.css diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 12afdcb6..0776a720 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -37,7 +37,6 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, - saveStaticWikimediaMobileFiles, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -400,11 +399,6 @@ async function execute(argv: any) { }) zimCreator.addArticle(scraperArticle) - logger.info('Copying Static Wikimedia Mobile Override Files') - await saveStaticWikimediaMobileFiles(config, zimCreator) - logger.info('Copying Static Resource Files') - await saveStaticFiles(config, zimCreator) - logger.info('Finding stylesheets to download') const stylesheetsToGet = await dump.getRelevantStylesheetUrls(downloader) logger.log(`Found [${stylesheetsToGet.length}] stylesheets to download`) @@ -424,12 +418,15 @@ async function execute(argv: any) { logger.log('Getting articles') stime = Date.now() - const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) + const { jsModuleDependencies, cssModuleDependencies, staticFilesList } = await saveArticles(zimCreator, downloader, dump, hasWikimediaMobileApi, forceRender) logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`) logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`) logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`) + logger.info('Copying Static Resource Files') + await saveStaticFiles(staticFilesList, zimCreator) + const allDependenciesWithType = [ { type: 'js', moduleList: Array.from(jsModuleDependencies) }, { type: 'css', moduleList: Array.from(cssModuleDependencies) }, diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index ca95ba9a..22c07559 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -56,6 +56,7 @@ export interface RenderSingleOutput { html: string mediaDependencies: any moduleDependencies: any + staticFiles: string[] subtitles: any } diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts index 3a59f859..6067f525 100644 --- a/src/renderers/abstractDesktop.render.ts +++ b/src/renderers/abstractDesktop.render.ts @@ -1,5 +1,6 @@ import * as domino from 'domino' import { Renderer } from './abstract.renderer.js' +import { getStaticFiles } from '../util/misc.js' import { config } from '../config.js' import MediaWiki from '../MediaWiki.js' @@ -7,8 +8,10 @@ import { htmlWikimediaDesktopTemplateCode } from '../Templates.js' import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' export abstract class DesktopRenderer extends Renderer { + public staticFilesListDesktop: string[] = [] constructor() { super() + this.staticFilesListDesktop = getStaticFiles(config.output.jsResources, config.output.mainPageCssResources.concat(config.output.cssResources)) } public filterWikimediaDesktopModules(_moduleDependencies) { diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index 41a3b0ba..087c4a05 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -1,5 +1,6 @@ import * as domino from 'domino' import { Renderer } from './abstract.renderer.js' +import { getStaticFiles } from '../util/misc.js' import { config } from '../config.js' import MediaWiki from '../MediaWiki.js' @@ -7,8 +8,10 @@ import { htmlWikimediaMobileTemplateCode } from '../Templates.js' import { genCanonicalLink, genHeaderScript, genHeaderCSSLink } from '../util/misc.js' export abstract class MobileRenderer extends Renderer { + public staticFilesListMobile: string[] = [] constructor() { super() + this.staticFilesListMobile = getStaticFiles(config.output.mwMobileJsResources, config.output.wikimediaMobileCssResources) } public filterWikimediaMobileModules(_moduleDependencies) { diff --git a/src/renderers/visual-editor.renderer.ts b/src/renderers/visual-editor.renderer.ts index 9868f195..1aa28069 100644 --- a/src/renderers/visual-editor.renderer.ts +++ b/src/renderers/visual-editor.renderer.ts @@ -71,6 +71,7 @@ export class VisualEditorRenderer extends DesktopRenderer { html: finalHTML, mediaDependencies, moduleDependencies: moduleDependenciesFiltered, + staticFiles: this.staticFilesListDesktop, subtitles, }) return result diff --git a/src/renderers/wikimedia-desktop.renderer.ts b/src/renderers/wikimedia-desktop.renderer.ts index 2a8b3d68..91113cf8 100644 --- a/src/renderers/wikimedia-desktop.renderer.ts +++ b/src/renderers/wikimedia-desktop.renderer.ts @@ -69,6 +69,7 @@ export class WikimediaDesktopRenderer extends DesktopRenderer { html: finalHTML, mediaDependencies, moduleDependencies: moduleDependenciesFiltered, + staticFiles: this.staticFilesListDesktop, subtitles, }) } diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index b99ed5eb..1697f493 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -33,7 +33,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { const mobileHTML = domino.createDocument(data) const finalHTMLMobile = await this.pipeMobileTransformations( mobileHTML, - this.convertLazyLoadToImages.bind(this), + this.convertLazyLoadToImages, this.removeEditContainer, this.removeHiddenClass, async (doc) => { @@ -60,6 +60,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { html: finalHTMLMobile.documentElement.outerHTML, mediaDependencies: mediaDependenciesVal, moduleDependencies: moduleDependenciesFiltered, + staticFiles: this.staticFilesListMobile, subtitles: subtitlesVal, }) return result @@ -109,34 +110,6 @@ export class WikimediaMobileRenderer extends MobileRenderer { span.parentNode.replaceChild(img, span) }) - doc = this.resizeMobileImages(doc) - - return doc - } - - private resizeMobileImages(doc: DominoElement) { - const mobileImageWidth = 375 - const imageWidthPattern = /(\.jpg\/|\.png\/|\.svg\/|\.gif\/)(\d+)px/i - - // Directly filter images hosted on Commons wiki - const imgs: NodeList = doc.querySelectorAll('img[src*="/commons/"]') - - imgs.forEach((img: DominoElement) => { - const imgWidth = img.getAttribute('width') - const imgHeight = img.getAttribute('height') - const imgSrc = img.getAttribute('src') - const imageWidthMatchSrc = imgSrc.match(imageWidthPattern) - - if (imgWidth && imgWidth > mobileImageWidth && imageWidthMatchSrc) { - const heightScaleFactor = Math.round((imgWidth / imgHeight) * 100) / 100 - const newImgSrc = imgSrc.replace(imageWidthMatchSrc[2], mobileImageWidth.toString()) - - img.setAttribute('src', newImgSrc) - img.setAttribute('width', mobileImageWidth.toString()) - img.setAttribute('height', Math.round(mobileImageWidth / heightScaleFactor).toString()) - } - }) - return doc } diff --git a/src/util/misc.ts b/src/util/misc.ts index 05350def..e7d6fb2e 100644 --- a/src/util/misc.ts +++ b/src/util/misc.ts @@ -162,35 +162,22 @@ export function interpolateTranslationString(str: string, parameters: { [key: st return newString } -function saveResourceFile(resource: string, type: 'css' | 'js', basePath: string, config: Config, zimCreator: ZimCreator) { - return async () => { - try { - const content = await readFilePromise(pathParser.resolve(__dirname, `../../res/${basePath}${resource}.${type}`)) - const article = new ZimArticle({ - url: type === 'css' ? cssPath(resource) : jsPath(resource), - data: content, - ns: '-', - }) +export async function saveStaticFiles(staticFiles: Set, zimCreator: ZimCreator) { + try { + staticFiles.forEach(async (file) => { + const staticFilesContent = await readFilePromise(pathParser.resolve(__dirname, `../../res/${file}`)) + const article = new ZimArticle({ url: file.endsWith('.css') ? cssPath(file) : jsPath(file), data: staticFilesContent, ns: '-' }) zimCreator.addArticle(article) - } catch (error) { - const fileType = type === 'css' ? (basePath.includes('wm_mobile') ? 'style Wikimedia mobile override' : 'style') : 'script' - logger.warn(`Could not create ${fileType} ${resource} file : ${error}`) - } + }) + } catch (err) { + logger.error(err) } } -export function saveStaticFiles(config: Config, zimCreator: ZimCreator) { - const cssPromises = config.output.cssResources.concat(config.output.mainPageCssResources).map((css) => saveResourceFile(css, 'css', '', config, zimCreator)()) - const jsPromises = config.output.jsResources.map((js) => saveResourceFile(js, 'js', '', config, zimCreator)()) - return Promise.all([...cssPromises, ...jsPromises]) -} - -export function saveStaticWikimediaMobileFiles(config: Config, zimCreator: ZimCreator) { - const wikimediaMobileCssPromises = config.output.wikimediaMobileCssResources.map((wikimediaMobileCss) => - saveResourceFile(wikimediaMobileCss, 'css', 'wm_mobile/', config, zimCreator)(), - ) - const wikimediaMobileJsPromises = config.output.mwMobileJsResources.map((wikimediaMobileJs) => saveResourceFile(wikimediaMobileJs, 'js', 'wm_mobile/', config, zimCreator)()) - return Promise.all([...wikimediaMobileCssPromises, ...wikimediaMobileJsPromises]) +export function getStaticFiles(jsStaticFiles: string[], cssStaticFiles: string[]): string[] { + jsStaticFiles = jsStaticFiles.map((jsFile) => jsFile.concat('.js')) + cssStaticFiles = cssStaticFiles.map((cssFile) => cssFile.concat('.css')) + return jsStaticFiles.concat(cssStaticFiles) } export function cssPath(css: string, subDirectory = '') { diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 3225a060..847ffae8 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -234,6 +234,7 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, hasWikimediaMobileApi: boolean, forceRender = null) { const jsModuleDependencies = new Set() const cssModuleDependencies = new Set() + const staticFilesList = new Set() let jsConfigVars = '' let prevPercentProgress: string const { articleDetailXId } = RedisStore @@ -298,7 +299,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) - for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, subtitles } of rets) { + for (const { articleId, displayTitle: articleTitle, html: finalHTML, mediaDependencies, moduleDependencies, staticFiles, subtitles } of rets) { if (!finalHTML) { logger.warn(`No HTML returned for article [${articleId}], skipping`) continue @@ -312,6 +313,10 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade cssModuleDependencies.add(dep) } + for (const file of staticFiles) { + staticFilesList.add(file) + } + jsConfigVars = moduleDependencies.jsConfigVars || '' /* @@ -393,6 +398,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade } return { + staticFilesList, jsModuleDependencies, cssModuleDependencies, } From bdd0847195dbfc3408c3a1b904a9af7a7ca330a3 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 16 Oct 2023 10:12:58 +0300 Subject: [PATCH 57/58] Update en.e2e.test.ts file to check zim integrity --- test/e2e/en.e2e.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index ef1efd8f..204dd159 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -1,6 +1,6 @@ import { testAllRenders } from '../testAllRenders.js' import domino from 'domino' -import { zimdump } from '../util.js' +import { zimdump, zimcheck } from '../util.js' import 'dotenv/config.js' import { jest } from '@jest/globals' import rimraf from 'rimraf' @@ -25,6 +25,7 @@ const format = '' await testAllRenders(mwUrl, articleList, format, async (outFiles) => { const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() describe('e2e test for en.wikipedia.org', () => { const articleDoc = domino.createDocument(articleFromDump) test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => { From d412ee4542cbd793b81db982b917b4596f5ab232 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 16 Oct 2023 13:48:53 +0300 Subject: [PATCH 58/58] Move common static files handling to abstract render --- src/config.ts | 12 ++--- src/mwoffliner.lib.ts | 1 - src/renderers/abstract.renderer.ts | 8 ++++ src/renderers/abstractDesktop.render.ts | 2 +- src/renderers/abstractMobile.render.ts | 2 +- test/e2e/en.e2e.test.ts | 6 ++- test/e2e/mobileRenderIntegrity.test.ts | 59 ------------------------- test/unit/saveStaticFiles.test.ts | 48 ++++++++++++++++++++ 8 files changed, 69 insertions(+), 69 deletions(-) delete mode 100644 test/e2e/mobileRenderIntegrity.test.ts create mode 100644 test/unit/saveStaticFiles.test.ts diff --git a/src/config.ts b/src/config.ts index 27223528..1bc3da66 100644 --- a/src/config.ts +++ b/src/config.ts @@ -53,12 +53,14 @@ const config = { }, output: { - // CSS resources added by Kiwix - cssResources: ['style', 'content.parsoid', 'inserted_style'], - mainPageCssResources: ['mobile_main_page'], - wikimediaMobileCssResources: ['wm_mobile_override_style'], + // CSS and JS resources added by Kiwix + cssResourcesCommon: ['style', 'mobile_main_page'], + jsResourcesCommon: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min'], + + cssResources: ['content.parsoid', 'inserted_style'], + jsResources: ['../node_modules/details-element-polyfill/dist/details-element-polyfill'], - jsResources: ['script', 'masonry.min', 'article_list_home', 'images_loaded.min', '../node_modules/details-element-polyfill/dist/details-element-polyfill'], + wikimediaMobileCssResources: ['wm_mobile_override_style'], mwMobileJsResources: ['wm_mobile_override_script'], // JS/CSS resources to be imported from MediaWiki diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 0776a720..53f21463 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -219,7 +219,6 @@ async function execute(argv: any) { RedisStore.setOptions(argv.redis || config.defaults.redisPath) await RedisStore.connect() const { articleDetailXId, filesToDownloadXPath, filesToRetryXPath, redirectsXId } = RedisStore - await downloader.setBaseUrls(forceRender) // Output directory const outputDirectory = path.isAbsolute(_outputDirectory || '') ? _outputDirectory : path.join(process.cwd(), _outputDirectory || 'out') await mkdirPromise(outputDirectory) diff --git a/src/renderers/abstract.renderer.ts b/src/renderers/abstract.renderer.ts index 22c07559..c4bbfebe 100644 --- a/src/renderers/abstract.renderer.ts +++ b/src/renderers/abstract.renderer.ts @@ -18,6 +18,7 @@ import { isWebpCandidateImageMimeType, interpolateTranslationString, encodeArticleIdForZimHtmlUrl, + getStaticFiles, } from '../util/misc.js' type renderType = 'auto' | 'desktop' | 'mobile' | 'specific' @@ -63,6 +64,13 @@ export interface RenderSingleOutput { export type RenderOutput = RenderSingleOutput[] export abstract class Renderer { + public staticFilesListCommon: string[] = [] + constructor() { + if (this.staticFilesListCommon.length === 0) { + this.staticFilesListCommon = getStaticFiles(config.output.jsResourcesCommon, config.output.cssResourcesCommon) + } + } + protected async treatVideo( dump: Dump, srcCache: KVS, diff --git a/src/renderers/abstractDesktop.render.ts b/src/renderers/abstractDesktop.render.ts index 6067f525..3fac4ffe 100644 --- a/src/renderers/abstractDesktop.render.ts +++ b/src/renderers/abstractDesktop.render.ts @@ -11,7 +11,7 @@ export abstract class DesktopRenderer extends Renderer { public staticFilesListDesktop: string[] = [] constructor() { super() - this.staticFilesListDesktop = getStaticFiles(config.output.jsResources, config.output.mainPageCssResources.concat(config.output.cssResources)) + this.staticFilesListDesktop = this.staticFilesListCommon.concat(getStaticFiles(config.output.jsResources, config.output.cssResources)) } public filterWikimediaDesktopModules(_moduleDependencies) { diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index 087c4a05..b771bd0b 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -11,7 +11,7 @@ export abstract class MobileRenderer extends Renderer { public staticFilesListMobile: string[] = [] constructor() { super() - this.staticFilesListMobile = getStaticFiles(config.output.mwMobileJsResources, config.output.wikimediaMobileCssResources) + this.staticFilesListMobile = this.staticFilesListCommon.concat(getStaticFiles(config.output.mwMobileJsResources, config.output.wikimediaMobileCssResources)) } public filterWikimediaMobileModules(_moduleDependencies) { diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index 204dd159..fb333e79 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -20,14 +20,16 @@ const verifyImgElements = (imgFilesArr, imgElements) => { } const mwUrl = 'https://en.wikipedia.org' -const articleList = 'User:Kelson/MWoffliner_CI_reference' +const articleList = 'BMW' const format = '' await testAllRenders(mwUrl, articleList, format, async (outFiles) => { const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() describe('e2e test for en.wikipedia.org', () => { const articleDoc = domino.createDocument(articleFromDump) + test(`test zim integrity for ${outFiles[0]?.renderer} renderer`, async () => { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + }) test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => { expect(articleDoc.querySelector('h1.article-header, h1.pcs-edit-section-title')).toBeTruthy() }) diff --git a/test/e2e/mobileRenderIntegrity.test.ts b/test/e2e/mobileRenderIntegrity.test.ts deleted file mode 100644 index 8cb5d81a..00000000 --- a/test/e2e/mobileRenderIntegrity.test.ts +++ /dev/null @@ -1,59 +0,0 @@ -import 'dotenv/config.js' -import * as mwoffliner from '../../src/mwoffliner.lib.js' -import * as logger from '../../src/Logger.js' -import rimraf from 'rimraf' -import { execa } from 'execa' -import { jest } from '@jest/globals' -import { zimcheckAvailable, zimdumpAvailable, zimcheck } from '../util.js' - -jest.setTimeout(200000) - -let zimcheckIsAvailable -let zimdumpIsAvailable - -beforeAll(async () => { - zimcheckIsAvailable = await zimcheckAvailable() - zimdumpIsAvailable = await zimdumpAvailable() -}) - -async function getOutFiles(testId: string, articleList: string, mwUrl: string): Promise { - const parameters = { - mwUrl, - adminEmail: 'mail@mail.com', - outputDirectory: testId, - redis: process.env.REDIS, - articleList, - forceRender: 'WikimediaMobile', - } - - await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute(parameters) - - return outFiles -} - -const commonTreatmentTest = async (articleList: string, mwUrl: string) => { - if (!zimcheckIsAvailable || !zimdumpIsAvailable) { - const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - logger.error(`${missingTool} not installed, exiting test`) - process.exit(1) - } - const now = new Date() - const testId = `mwo-test-${+now}` - - const outFiles = await getOutFiles(testId, articleList, mwUrl) - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - - rimraf.sync(`./${testId}`) -} - -// TODO: articulate this test with /pull/1898 once merged -describe('Mobile render zim file integrity', () => { - const mwUrl = 'https://en.wikipedia.org' - // TODO: some articles such as 'Canada' don't pass this test even with desktop renderer - const articleList = 'BMW' - - test('Test WikimediaMobile with en.wikipedia.org', async () => { - await commonTreatmentTest(articleList, mwUrl) - }) -}) diff --git a/test/unit/saveStaticFiles.test.ts b/test/unit/saveStaticFiles.test.ts new file mode 100644 index 00000000..ba63f519 --- /dev/null +++ b/test/unit/saveStaticFiles.test.ts @@ -0,0 +1,48 @@ +import { startRedis, stopRedis } from './bootstrap.js' +import { jest } from '@jest/globals' +import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' +import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js' + +jest.setTimeout(10000) + +describe('saveStaticFiles', () => { + beforeAll(startRedis) + afterAll(stopRedis) + + test('Compare desktop static files list', async () => { + const desktopAndCommonStaticFiles = [ + 'script.js', + 'masonry.min.js', + 'article_list_home.js', + 'images_loaded.min.js', + 'style.css', + 'mobile_main_page.css', + '../node_modules/details-element-polyfill/dist/details-element-polyfill.js', + 'content.parsoid.css', + 'inserted_style.css', + ] + + const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() + const staticFilesFromRenderer = wikimediaDesktopRenderer.staticFilesListDesktop + + expect(desktopAndCommonStaticFiles).toEqual(staticFilesFromRenderer) + }) + + test('Compare mobile static files list', async () => { + const mobileAndCommonStatiFiles = [ + 'script.js', + 'masonry.min.js', + 'article_list_home.js', + 'images_loaded.min.js', + 'style.css', + 'mobile_main_page.css', + 'wm_mobile_override_script.js', + 'wm_mobile_override_style.css', + ] + + const wikimediaMobileRenderer = new WikimediaMobileRenderer() + const staticFilesFromRenderer = wikimediaMobileRenderer.staticFilesListMobile + + expect(mobileAndCommonStatiFiles).toEqual(staticFilesFromRenderer) + }) +})