Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't mirror 'Story' namespace #1919

Merged
merged 2 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/MediaWiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import ApiURLDirector from './util/builders/url/api.director.js'
import DesktopURLDirector from './util/builders/url/desktop.director.js'
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
import { checkApiAvailability } from './util/mw-api.js'
import { BLACKLISTED_NS } from './util/const.js'

export interface QueryOpts {
action: string
Expand All @@ -34,7 +35,6 @@ class MediaWiki {
}

public metaData: MWMetaData
public _base: string
kelson42 marked this conversation as resolved.
Show resolved Hide resolved
public baseUrl: URL
public getCategories: boolean
public namespaces: MWNamespaces = {}
Expand Down Expand Up @@ -227,18 +227,22 @@ class MediaWiki {
const num = entry.id
const allowedSubpages = 'subpages' in entry
const isContent = type === 'namespaces' ? !!(entry.content || util.contains(addNamespaces, num)) : !!(entry.content !== undefined || util.contains(addNamespaces, num))
const isBlacklisted = BLACKLISTED_NS.includes(name)
kelson42 marked this conversation as resolved.
Show resolved Hide resolved
const canonical = entry.canonical ? entry.canonical : ''
const details = { num, allowedSubpages, isContent }

/* Namespaces in local language */
this.namespaces[util.lcFirst(name)] = details
this.namespaces[util.ucFirst(name)] = details

/* Namespaces in English (if available) */
if (canonical) {
this.namespaces[util.lcFirst(canonical)] = details
this.namespaces[util.ucFirst(canonical)] = details
}

/* Is content to mirror */
if (isContent) {
if (isContent && !isBlacklisted) {
this.namespacesToMirror.push(name)
}
})
Expand Down
1 change: 1 addition & 0 deletions src/util/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ export const LOAD_PHP = /script.src = ".*load\.php.*";/
export const RULE_TO_REDIRECT = /window\.top !== window\.self/
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
export const MAX_FILE_DOWNLOAD_RETRIES = 5
export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
48 changes: 38 additions & 10 deletions test/unit/mwApi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,32 @@ import { jest } from '@jest/globals'

jest.setTimeout(10000)

describe('mwApi', () => {
beforeAll(startRedis)
afterAll(stopRedis)
beforeAll(async () => {
MediaWiki.reset()
await startRedis()
})
afterAll(stopRedis)

const initMW = async (downloader: Downloader) => {
await MediaWiki.getMwMetaData(downloader)
await MediaWiki.hasCoordinates(downloader)
await MediaWiki.hasWikimediaDesktopRestApi()
await MediaWiki.hasVisualEditorApi()

await MediaWiki.getNamespaces([], downloader)
}

describe('mwApi', () => {
let downloader: Downloader

beforeEach(async () => {
await RedisStore.articleDetailXId.flush()

MediaWiki.base = 'https://en.wikipedia.org'
MediaWiki.getCategories = true

downloader = new Downloader({ uaString: `${config.userAgent} ([email protected])`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' })

await MediaWiki.getMwMetaData(downloader)
await MediaWiki.hasCoordinates(downloader)
await MediaWiki.hasWikimediaDesktopRestApi()
await MediaWiki.hasVisualEditorApi()

await MediaWiki.getNamespaces([], downloader)
await initMW(downloader)
})

test('MWApi Article Ids', async () => {
Expand Down Expand Up @@ -116,3 +122,25 @@ describe('mwApi', () => {
expect(interWikiTitle).toBeNull()
})
})

describe('Test blacklisted NSs', () => {
let downloader: Downloader

beforeEach(async () => {
await RedisStore.articleDetailXId.flush()

MediaWiki.base = 'https://id.wikipedia.org'
MediaWiki.getCategories = true

downloader = new Downloader({ uaString: `${config.userAgent} ([email protected])`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' })

await initMW(downloader)
})

test('Prevent blacklisted namespaces to mirroring', async () => {
const aIds = ['Story:Satelit_Oberon', 'London']
await getArticleIds(downloader, 'Main_Page', aIds)

expect(MediaWiki.namespacesToMirror).not.toContain('Story')
})
})
Loading