Skip to content

Commit

Permalink
Implement MediaWiki REST API render (partial impl)
Browse files Browse the repository at this point in the history
  • Loading branch information
VadimKovalenkoSNF committed Oct 11, 2023
1 parent af4f200 commit 8c7ba86
Show file tree
Hide file tree
Showing 16 changed files with 171 additions and 67 deletions.
9 changes: 9 additions & 0 deletions src/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,14 @@ class Downloader {
this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
{ condition: await MediaWiki.hasMediaWikiRESTApi(), value: MediaWiki.mediawikiRESTApiURL.href },
])

//* Objects order in array matters!
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
{ condition: await MediaWiki.hasMediaWikiRESTApi(), value: MediaWiki.mediawikiRESTApiURL.href },
])
} else {
switch (forceRender) {
Expand All @@ -196,6 +198,13 @@ class Downloader {
break
}
break
case 'MediawikiRESTApi':
if (MediaWiki.hasMediaWikiRESTApi()) {
this.baseUrl = MediaWiki.mediawikiRESTApiURL.href
this.baseUrlForMainPage = MediaWiki.mediawikiRESTApiURL.href
break
}
break
default:
throw new Error('Unable to find specific API end-point to retrieve article HTML')
}
Expand Down
22 changes: 22 additions & 0 deletions src/MediaWiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import BaseURLDirector from './util/builders/url/base.director.js'
import ApiURLDirector from './util/builders/url/api.director.js'
import DesktopURLDirector from './util/builders/url/desktop.director.js'
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
import MediaWikiRESTApiDirector from './util/builders/url/mediawiki-rest-api.director.js'
import { checkApiAvailability } from './util/mw-api.js'
import { BLACKLISTED_NS } from './util/const.js'

Expand Down Expand Up @@ -48,19 +49,23 @@ class MediaWiki {
#password: string
#apiPath: string
#domain: string
#mediawikiRESTApiPath: string
private apiUrlDirector: ApiURLDirector
private wikimediaDesktopUrlDirector: DesktopURLDirector
private visualEditorURLDirector: VisualEditorURLDirector
private mediaWikiRESTApiDirector: MediaWikiRESTApiDirector

public visualEditorApiUrl: URL
public apiUrl: URL
public mediawikiRESTApiURL: URL
public modulePath: string // only for reading
public _modulePathOpt: string // only for whiting to generate modulePath
public webUrl: URL
public desktopRestApiUrl: URL

#hasWikimediaDesktopRestApi: boolean | null
#hasVisualEditorApi: boolean | null
#hasMediaWikiRESTApi: boolean | null
#hasCoordinates: boolean | null

set username(value: string) {
Expand All @@ -87,6 +92,10 @@ class MediaWiki {
this.#wikiPath = value
}

set mediawikiRESTAPiPath(value: string) {
this.#mediawikiRESTApiPath = value
}

set base(value: string) {
this.baseUrl = basicURLDirector.buildMediawikiBaseURL(value)
this.initMWApis()
Expand All @@ -105,6 +114,7 @@ class MediaWiki {
this.namespaces = {}
this.namespacesToMirror = []

this.#mediawikiRESTApiPath = 'w/rest.php/v1/page'
this.#apiPath = 'w/api.php'
this.#wikiPath = 'wiki/'
this.apiCheckArticleId = 'MediaWiki:Sidebar'
Expand All @@ -121,6 +131,7 @@ class MediaWiki {

this.#hasWikimediaDesktopRestApi = null
this.#hasVisualEditorApi = null
this.#hasMediaWikiRESTApi = null
this.#hasCoordinates = null
}

Expand All @@ -144,6 +155,14 @@ class MediaWiki {
return this.#hasVisualEditorApi
}

public async hasMediaWikiRESTApi(): Promise<boolean> {
if (this.#hasMediaWikiRESTApi === null) {
this.#hasMediaWikiRESTApi = await checkApiAvailability(this.mediaWikiRESTApiDirector.buildArticleURL(this.apiCheckArticleId))
return this.#hasMediaWikiRESTApi
}
return this.#hasMediaWikiRESTApi
}

public async hasCoordinates(downloader: Downloader): Promise<boolean> {
if (this.#hasCoordinates === null) {
const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num)
Expand All @@ -170,9 +189,11 @@ class MediaWiki {
this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href)
this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL()
this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.#restApiPath)
this.mediawikiRESTApiURL = baseUrlDirector.buildMediaWikiREST(this.#mediawikiRESTApiPath)
this.modulePath = baseUrlDirector.buildModuleURL(this._modulePathOpt)
this.wikimediaDesktopUrlDirector = new DesktopURLDirector(this.desktopRestApiUrl.href)
this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href)
this.mediaWikiRESTApiDirector = new MediaWikiRESTApiDirector(this.mediawikiRESTApiURL.href)
}

public async login(downloader: Downloader) {
Expand Down Expand Up @@ -397,6 +418,7 @@ class MediaWiki {
const mwMetaData: MWMetaData = {
webUrl: this.webUrl.href,
apiUrl: this.apiUrl.href,
mediawikiRESTAPiPath: this.mediawikiRESTAPiPath,
modulePath: this.modulePath,
webUrlPath: this.webUrl.pathname,
wikiPath: this.#wikiPath,
Expand Down
1 change: 1 addition & 0 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ async function execute(argv: any) {
await MediaWiki.hasCoordinates(downloader)
await MediaWiki.hasWikimediaDesktopRestApi()
await MediaWiki.hasVisualEditorApi()
await MediaWiki.hasMediaWikiRESTApi()
await downloader.setBaseUrls(forceRender)

RedisStore.setOptions(argv.redis || config.defaults.redisPath)
Expand Down
2 changes: 1 addition & 1 deletion src/renderers/abstract.renderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import {
} from '../util/misc.js'

type renderType = 'auto' | 'desktop' | 'mobile' | 'specific'
type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile'
type renderName = 'VisualEditor' | 'WikimediaDesktop' | 'WikimediaMobile' | 'MediawikiRESTApi'

interface RendererBuilderOptionsBase {
renderType: renderType
Expand Down
17 changes: 16 additions & 1 deletion src/renderers/renderer.builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ import { Renderer } from './abstract.renderer.js'
import { VisualEditorRenderer } from './visual-editor.renderer.js'
import { WikimediaDesktopRenderer } from './wikimedia-desktop.renderer.js'
import { RendererBuilderOptions } from './abstract.renderer.js'
import { MediawikiRESTApiRenderer } from './wikimedia-rest-api.renderer.js'
import * as logger from './../Logger.js'

export class RendererBuilder {
public async createRenderer(options: RendererBuilderOptions): Promise<Renderer> {
const { renderType, renderName } = options

const [hasVisualEditorApi, hasWikimediaDesktopRestApi] = await Promise.all([MediaWiki.hasVisualEditorApi(), MediaWiki.hasWikimediaDesktopRestApi()])
const [hasVisualEditorApi, hasWikimediaDesktopRestApi, hasMediaWikiRESTApi] = await Promise.all([
MediaWiki.hasVisualEditorApi(),
MediaWiki.hasWikimediaDesktopRestApi(),
MediaWiki.hasMediaWikiRESTApi(),
])

switch (renderType) {
case 'desktop':
Expand All @@ -18,6 +23,8 @@ export class RendererBuilder {
return new WikimediaDesktopRenderer()
} else if (hasVisualEditorApi) {
return new VisualEditorRenderer()
} else if (hasMediaWikiRESTApi) {
return new MediawikiRESTApiRenderer()
} else {
logger.error('No available desktop renderer.')
process.exit(1)
Expand All @@ -31,6 +38,8 @@ export class RendererBuilder {
return new WikimediaDesktopRenderer()
} else if (hasVisualEditorApi) {
return new VisualEditorRenderer()
} else if (hasMediaWikiRESTApi) {
return new MediawikiRESTApiRenderer()
} else {
logger.error('No render available at all.')
process.exit(1)
Expand All @@ -50,6 +59,12 @@ export class RendererBuilder {
}
logger.error('Cannot create an instance of VisualEditor renderer.')
process.exit(1)
case 'MediawikiRESTApi':
if (hasMediaWikiRESTApi) {
return new MediawikiRESTApiRenderer()
}
logger.error('Cannot create an instance of MediawikiRESTApi renderer.')
process.exit(1)
case 'WikimediaMobile':
// TODO: return WikimediaMobile renderer
return
Expand Down
62 changes: 62 additions & 0 deletions src/renderers/wikimedia-rest-api.renderer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import * as logger from '../Logger.js'
import { Renderer } from './abstract.renderer.js'
import { getStrippedTitleFromHtml } from '../util/misc.js'
import { RenderOpts, RenderOutput } from './abstract.renderer.js'

/*
Represent 'https://{wikimedia-wiki}/w/rest.php/v1/page/{title}/with_html'
or
'https://{3rd-part-wikimedia-wiki}/w/rest.php/v1/page/{title}/with_html'
*/
export class MediawikiRESTApiRenderer extends Renderer {
constructor() {
super()
}

private async retrieveHtml(renderOpts: RenderOpts): Promise<any> {
const { data, articleId, articleDetail, isMainPage } = renderOpts

if (!data) {
throw new Error(`Cannot render [${data}] into an article`)
}

let html: string
let displayTitle: string
let strippedTitle: string

if (data.html) {
html = isMainPage ? data.html : super.injectH1TitleToHtml(data.html, articleDetail)
strippedTitle = getStrippedTitleFromHtml(html)
displayTitle = strippedTitle || articleId.replace('_', ' ')
return { html, displayTitle }
} else if (data.errorKey) {
logger.error(`Error in retrieved article [${articleId}]:`, data.errorKey)
return ''
}
logger.error('Unable to parse data from visual editor')
return ''
}

public async render(renderOpts: RenderOpts): Promise<any> {
try {
const result: RenderOutput = []
const { articleId, articleDetail, webp, _moduleDependencies, dump } = renderOpts
const { html, displayTitle } = await this.retrieveHtml(renderOpts)
if (html) {
const { finalHTML, mediaDependencies, subtitles } = await super.processHtml(html, dump, articleId, articleDetail, _moduleDependencies, webp)
result.push({
articleId,
displayTitle,
html: finalHTML,
mediaDependencies,
subtitles,
})
return result
}
return ''
} catch (err) {
logger.error(err.message)
throw new Error(err.message)
}
}
}
1 change: 1 addition & 0 deletions src/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ interface MWMetaData {

baseUrl: string
wikiPath: string
mediawikiRESTAPiPath: string
apiPath: string
domain: string
webUrl: string
Expand Down
7 changes: 7 additions & 0 deletions src/util/builders/url/base.director.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,11 @@ export default class BaseURLDirector {
.setPath(path ?? 'w/load.php')
.build(false, '?')
}

buildMediaWikiREST(path?: string) {
return urlBuilder
.setDomain(this.baseDomain)
.setPath(path ?? 'w/rest.php/v1/page')
.build(true, '/')
}
}
17 changes: 17 additions & 0 deletions src/util/builders/url/mediawiki-rest-api.director.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import urlBuilder from './url.builder.js'

/**
* Interface to build URLs based on MediaWiki REST API URL
*/
export default class MediaWikiRESTApiURL {
baseDomain: string

constructor(baseDomain: string) {
this.baseDomain = baseDomain
}

buildArticleURL(articleId: string) {
const base = urlBuilder.setDomain(this.baseDomain).build()
return `${base}${articleId}/with_html`
}
}
2 changes: 1 addition & 1 deletion src/util/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/
export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js'
export const MAX_FILE_DOWNLOAD_RETRIES = 5
export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor']
export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor', 'MediawikiRESTApi']
Loading

0 comments on commit 8c7ba86

Please sign in to comment.