Skip to content

Commit

Permalink
Implement 'legunto install'
Browse files Browse the repository at this point in the history
  • Loading branch information
lens0021 committed Jul 21, 2020
1 parent 27b0f16 commit 5391d6c
Show file tree
Hide file tree
Showing 11 changed files with 288 additions and 11 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,6 @@ jobs:
run: |
python setup.py bdist_wheel
pip install --no-cache-dir dist/legunto-*.whl
- name: Unit test
run: pytest
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Legunto
scribunto.json
scribunto.lock
lua/


# Node
node_modules/


# Python

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
3 changes: 3 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
.venv/
scribunto.json
scribunto.lock
lua/
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# legunto

Fetch MediaWiki Scribunto modules from wikis
Fetch MediaWiki Scribunto modules from wikis and save as files

## Limitations

- Wiki pages can have a slash(`/`) in their name, but filenames in unix can't. Slashes is converted
to hyphens(`-`) on saving.
8 changes: 0 additions & 8 deletions legunto

This file was deleted.

12 changes: 10 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,18 @@
setuptools.setup(
name="legunto",
version="0.0.1",
scripts=["legunto"],
description="Fetch MediaWiki Scribunto modules from wikis",
long_description=long_description,
long_description_content_type="text/markdown",
packages=setuptools.find_packages(),

packages=[
'legunto',
'scribunto'
],
install_requires=["mwclient"],
package_dir={'': 'src'},

entry_points={
'console_scripts': ['legunto = legunto:console_main'],
}
)
132 changes: 132 additions & 0 deletions src/legunto/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from scribunto import search_dependencies, rewrite_requires
from urllib.parse import urlparse
import json
import logging
import mwclient
import os
import pathlib
import sys


def print_help_massage() -> None:
print("""
Usage: legunto COMMAND
Commands:
install fetch lua modules based on 'scribunto.json'
""")


def get_interwiki_map() -> hash:
site = mwclient.Site('meta.wikimedia.org')
result = site.api('query', meta='siteinfo', siprop='interwikimap')
result = result["query"]["interwikimap"]

iw_map = {}
for wiki in result:
iw_map[wiki['prefix']] = wiki['url']

return iw_map


def fetch_module(url: str, module_name: str) -> hash:
if not module_name.startswith('Module:'):
module_name = 'Module:'+module_name

module = {}

url = urlparse(url)
print(f'Fetching "{module_name}" from {url.netloc} ...', end='')
site = mwclient.Site(url.netloc, scheme=url.scheme)

result = site.api('query', titles=module_name, prop='info', utf8="1")
result = list(result['query']['pages'].values())[0]
module['pageid'] = result['pageid']
module['revid'] = result['lastrevid']
module['title'] = result['title']
module['text'] = site.pages[module_name].text()
print(' Done')

return module


def to_filename(name: str) -> str:
return name.split(':')[1] \
.replace('/', '-')


def install_dependencies() -> None:
SCRIBUNTO_FILE_PATH = os.getcwd()+"/scribunto.json"
if not os.path.exists(SCRIBUNTO_FILE_PATH):
logging.error("Can't find 'scribunto.json' file in this directory")

dependencies = json.loads(open(SCRIBUNTO_FILE_PATH, "r").read())[
"dependencies"]

interwiki = get_interwiki_map()

lock = {
'modules': {}
}

deps_to_add = dependencies

print(f'{len(deps_to_add)} dependency(s) found')

while deps_to_add:
dep = deps_to_add.pop()
if dep in lock['modules']:
continue

wiki, page = dep.split("/", 1)
if wiki[0] == "@":
wiki = wiki[1:]

if wiki not in interwiki:
logging.warning(f"'{wiki}' is not a valid interwiki prefix")
logging.warning(f"skip '{dep}'...")
continue

# TODO read lock file and compare revids to skip fetching

module = fetch_module(interwiki[wiki], page)
lock['modules'][dep] = {
'pageid': module['pageid'],
'revid': module['revid'],
'title': module['title'],
}
indirect_deps = search_dependencies(module['text'], prefix=wiki)
if indirect_deps:
lock['modules'][dep]['dependencies'] = indirect_deps

path = os.getcwd()+"/lua/"+wiki
if not os.path.exists(path):
pathlib.Path(path).mkdir(parents=True)

f = open(path+"/" + to_filename(module['title']), "w")
f.write(rewrite_requires(module['text'], prefix=wiki))
f.close()

deps_to_add += indirect_deps

f = open(os.getcwd()+"/scribunto.lock", "w")
f.write(json.dumps(lock, indent=2))
f.close()


def console_main() -> None:
if len(sys.argv) == 1 or \
(len(sys.argv) == 2 and sys.argv[1] in ['--help', 'help']):
print_help_massage()
return
elif len(sys.argv) == 2 and sys.argv[1] == 'install':
install_dependencies()
else:
logging.error(
f"legunto: '{sys.argv[1]}' is not a legunto command."
"See 'legunto --help'")


__all__ = [
"console_main"
]
5 changes: 5 additions & 0 deletions src/legunto/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import legunto


if __name__ == '__main__':
raise SystemExit(legunto.console_main())
43 changes: 43 additions & 0 deletions src/scribunto/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import re
from typing import List


def search_dependencies(text: str, prefix=None) -> List[str]:
regex = \
r'''(?:require|mw\.loadData)\s*\(\s*['"](?:[Mm]odule|모듈):(.+)['"]\s*'''

find = re.findall(regex, text)
find = list(set(find))
if prefix:
find = [
f"@{prefix}/{name}"
for name in find
]

return find


def rewrite_requires(text: str, prefix: str) -> str:
# Module:foo/bar -> Module:foo-bar
regex = \
r'''(?:require|mw\.loadData)\s*\(\s*['"](?:[Mm]odule|모듈):.+['"]\s*'''

module_names = re.findall(regex, text)
for name in module_names:
text = text.replace(name, name.replace("/", "-"))

# Module:foo -> Module:@en/foo
regex = (
r"""((?:require|mw\.loadData)\s*\(\s*['"](?:[Mm]odule|모듈):)"""
r"""(.+)(['"]\s*)"""
)

text = re.sub(regex, fr'\1@{prefix}/\2\3', text)

return text


__all__ = [
'search_dependencies',
'rewrite_requires',
]
15 changes: 15 additions & 0 deletions test/legunto_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import legunto


def test_to_filename():
assert legunto.to_filename('Module:foo') == \
'foo'

assert legunto.to_filename('Module:foo bar') == \
'foo bar'

assert legunto.to_filename('Module:foo/bar') == \
'foo-bar'

assert legunto.to_filename('모듈:foo/bar') == \
'foo-bar'
61 changes: 61 additions & 0 deletions test/scribunto_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import scribunto


def test_search_dependencies() -> None:
text = '''
local mHatnote = require('Module:Hatnote')
local mHatList = require('Module:Hatnote list')
local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType
local p = {}
'''

expected = [
'Hatnote',
'Hatnote list',
]
actual = scribunto.search_dependencies(text)

assert actual.sort() == expected.sort()

# with prefix

text = '''
local mHatnote = require('Module:Hatnote')
local mHatList = require('Module:Hatnote list')
'''

expected = [
'@en/Hatnote',
'@en/Hatnote list',
]
actual = scribunto.search_dependencies(text, prefix='en')

assert actual.sort() == expected.sort()


def test_rewrite_requires() -> None:
text = '''
local mHatnote = require('Module:Hatnote')
local mHatList = require('Module:Hatnote list')
local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType
local p = {}
'''

expected = '''
local mHatnote = require('Module:@en/Hatnote')
local mHatList = require('Module:@en/Hatnote list')
local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType
local p = {}
'''
actual = scribunto.rewrite_requires(text, 'en')

assert actual == expected

text = "require('Module:Foo/bar')"
expected = "require('Module:@en/Foo-bar')"
actual = scribunto.rewrite_requires(text, 'en')

assert actual == expected

0 comments on commit 5391d6c

Please sign in to comment.