Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modularizing e2e tests #1898

Merged
merged 14 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ async function execute(argv: any) {
RedisStore.setOptions(argv.redis || config.defaults.redisPath)
await RedisStore.connect()
const { articleDetailXId, filesToDownloadXPath, filesToRetryXPath, redirectsXId } = RedisStore

await downloader.setBaseUrls(forceRender)
// Output directory
const outputDirectory = path.isAbsolute(_outputDirectory || '') ? _outputDirectory : path.join(process.cwd(), _outputDirectory || 'out')
await mkdirPromise(outputDirectory)
Expand Down
4 changes: 2 additions & 2 deletions src/sanitize-argument.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { isValidEmail } from './util/index.js'
import * as path from 'path'
import { fileURLToPath } from 'url'
import { parameterDescriptions } from './parameterList.js'
import { RENDERERS_LIST } from './util/const.js'

const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
Expand Down Expand Up @@ -192,11 +193,10 @@ export function sanitize_customFlavour(customFlavour: string): string {
}

export function sanitize_forceRender(renderName: string): string {
const renderNames = ['VisualEditor', 'WikimediaDesktop', 'WikimediaMobile']
const checkRenderName = (arr: string[], val: string) => {
return arr.some((arrVal) => val === arrVal)
}
if (checkRenderName(renderNames, renderName)) {
if (checkRenderName(RENDERERS_LIST, renderName)) {
return renderName
}
throw new Error(`Invalid render name: ${renderName}`)
Expand Down
1 change: 1 addition & 0 deletions src/util/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
export const MAX_FILE_DOWNLOAD_RETRIES = 5
export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor']
45 changes: 45 additions & 0 deletions test/e2e/en.e2e.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { testAllRenders } from '../testAllRenders.js'
import domino from 'domino'
import { zimdump } from '../util.js'
import 'dotenv/config.js'
import { jest } from '@jest/globals'
import rimraf from 'rimraf'

jest.setTimeout(60000)

// Check the integrity of img elements between zim file and article html taken from it
const verifyImgElements = (imgFilesArr, imgElements) => {
for (const img of imgElements) {
for (const imgFile of imgFilesArr) {
if (img.getAttribute('src').includes(imgFile)) {
return true
}
}
}
return false
}

const mwUrl = 'https://en.wikipedia.org'
const articleList = 'User:Kelson/MWoffliner_CI_reference'
const format = ''

await testAllRenders(mwUrl, articleList, format, async (outFiles) => {
const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`)
describe('e2e test for en.wikipedia.org', () => {
const articleDoc = domino.createDocument(articleFromDump)
test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => {
expect(articleDoc.querySelector('h1.article-header')).toBeTruthy()
})
test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => {
const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`)
const mediaFilesArr = mediaFiles.split('\n')
const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg'))
const imgElements = Array.from(articleDoc.querySelectorAll('img'))
expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true)
})

afterAll(() => {
rimraf.sync(`./${outFiles[0].testId}`)
})
})
})
59 changes: 59 additions & 0 deletions test/testAllRenders.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import * as logger from '../src/Logger.js'
import * as mwoffliner from '../src/mwoffliner.lib.js'
import { execa } from 'execa'
import { RENDERERS_LIST } from '../src/util/const.js'
import { zimcheckAvailable, zimdumpAvailable } from './util.js'

/*
This is the template for e2e tests of different wikis
1. Verify zimcheck and zimdump availability and caches result
2. Gets output file and checks its integrity
3. Returns output file per renderer in the callback function
*/

let zimToolsChecked = false
async function checkZimTools() {
if (zimToolsChecked) {
return
}

const zimcheckIsAvailable = await zimcheckAvailable()
const zimdumpIsAvailable = await zimdumpAvailable()

if (!zimcheckIsAvailable || !zimdumpIsAvailable) {
const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump'
logger.error(`${missingTool} not installed, exiting test`)
process.exit(1)
}

zimToolsChecked = true
}

async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string | string[]): Promise<any> {
const parameters = {
mwUrl,
adminEmail: '[email protected]',
outputDirectory: testId,
redis: process.env.REDIS,
articleList,
forceRender: renderName,
format,
}

await execa('redis-cli flushall', { shell: true })
const outFiles = await mwoffliner.execute(parameters)

return outFiles
}

export async function testAllRenders(mwUrl: string, articleList: string, format: string | string[], callback) {
await checkZimTools()
for (const renderer of RENDERERS_LIST) {
const now = new Date()
const testId = `mwo-test-${+now}`
const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format)
outFiles[0].testId = testId
outFiles[0].renderer = renderer
await callback(outFiles)
}
}
103 changes: 42 additions & 61 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { jest } from '@jest/globals'
import { getArticleUrl } from '../../src/util/saveArticles.js'
import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js'
import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js'
import { RENDERERS_LIST } from '../../src/util/const.js'

jest.setTimeout(40000)

Expand Down Expand Up @@ -79,69 +80,49 @@ describe('saveArticles', () => {
expect(articleDoc.querySelector('h1.article-header')).toBeTruthy()
})

test('Check nodet article for en.wikipedia.org using Visual Editor renderer', async () => {
const visualEditorRenderer = new VisualEditorRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls('VisualEditor')
const articleId = 'Canada'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
visualEditorRenderer,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)

const articleDoc = domino.createDocument(result[0].html)

const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})

test('Check nodet article for en.wikipedia.org using Wikimedia Desktop renderer', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls('WikimediaDesktop')
const articleId = 'London'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
wikimediaDesktopRenderer,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)
for (const renderer of RENDERERS_LIST) {
test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => {
let rendererInstance
switch (renderer) {
case 'VisualEditor':
rendererInstance = new VisualEditorRenderer()
break
case 'WikimediaDesktop':
rendererInstance = new WikimediaDesktopRenderer()
break
default:
throw new Error(`Unknown renderer: ${renderer}`)
}
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls(renderer)
const articleId = 'Canada'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
rendererInstance,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)

const articleDoc = domino.createDocument(result[0].html)
const articleDoc = domino.createDocument(result[0].html)

const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})
const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})
}

test('Load main page and check that it is without header', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
Expand Down