Skip to content

Commit

Permalink
Merge pull request #1902 from openzim/1868-nodet-fix
Browse files Browse the repository at this point in the history
Fix nodet format option
  • Loading branch information
kelson42 authored Sep 20, 2023
2 parents 788f2f6 + 9fc26c3 commit 190cd26
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ const config = {
/* All DOM nodes which we should for to display */
cssClassDisplayList: ['thumb'],

/* Lead section to display */
leadSectonId: '0',

/* List of style to be removed. 'onlyoffline' is a *hack* to
/* display WPEN medical articles, see for example
/* [[Carcinoid_syndrome]] */
Expand Down
6 changes: 6 additions & 0 deletions src/renderers/abstract.renderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,12 @@ export abstract class Renderer {
filtersConfig.nodetCssClassBlackList.forEach((classname: string) => {
nodesToDelete.push({ class: classname })
})
nodesToDelete.push({
tag: 'section',
filter(n) {
return n.getAttribute('data-mw-section-id') !== filtersConfig.leadSectonId
},
})
}

/* Remove element with black listed CSS classes and no link */
Expand Down
2 changes: 1 addition & 1 deletion src/util/mw-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ export function mwRetToArticleDetail(obj: QueryMwRet): KVS<ArticleDetail> {

export async function checkApiAvailability(url: string, loginCookie = ''): Promise<boolean> {
try {
const resp = await axios.get(url, { maxRedirects: 0, headers: { cookie: loginCookie } })
const resp = await axios.get(decodeURI(url), { maxRedirects: 0, headers: { cookie: loginCookie } })
return resp.status === 200 && !resp.headers['mediawiki-api-error']
} catch (err) {
return false
Expand Down
64 changes: 64 additions & 0 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,70 @@ describe('saveArticles', () => {
expect(articleDoc.querySelector('h1.article-header')).toBeTruthy()
})

test('Check nodet article for en.wikipedia.org using Visual Editor renderer', async () => {
const visualEditorRenderer = new VisualEditorRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls('VisualEditor')
const articleId = 'Canada'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
visualEditorRenderer,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)

const articleDoc = domino.createDocument(result[0].html)

const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})

test('Check nodet article for en.wikipedia.org using Wikimedia Desktop renderer', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia
await downloader.setBaseUrls('WikimediaDesktop')
const articleId = 'London'
const articleUrl = getArticleUrl(downloader, dump, articleId)
const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId])
const articlesDetail = mwRetToArticleDetail(_articleDetailsRet)
const { articleDetailXId } = RedisStore
const articleDetail = { title: articleId }
const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title)
articleDetailXId.setMany(articlesDetail)
const result = await downloader.getArticle(
downloader.webp,
_moduleDependencies,
articleId,
articleDetailXId,
wikimediaDesktopRenderer,
articleUrl,
dump,
articleDetail,
dump.isMainPage(articleId),
)

const articleDoc = domino.createDocument(result[0].html)

const sections = Array.from(articleDoc.querySelectorAll('section'))
const leadSection = sections[0]
expect(sections.length).toEqual(1)
expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0')
})

test('Load main page and check that it is without header', async () => {
const wikimediaDesktopRenderer = new WikimediaDesktopRenderer()
const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia
Expand Down

0 comments on commit 190cd26

Please sign in to comment.