diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd83887..7e41db4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,3 +50,6 @@ jobs: run: | python setup.py bdist_wheel pip install --no-cache-dir dist/legunto-*.whl + + - name: Unit test + run: pytest diff --git a/.gitignore b/.gitignore index 25d4cad..77aac90 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,15 @@ +# Legunto +scribunto.json +scribunto.lock +lua/ + + +# Node node_modules/ + +# Python + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.prettierignore b/.prettierignore index 21d0b89..c627289 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1 +1,4 @@ .venv/ +scribunto.json +scribunto.lock +lua/ diff --git a/README.md b/README.md index 63e4439..931c856 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ # legunto -Fetch MediaWiki Scribunto modules from wikis +Fetch MediaWiki Scribunto modules from wikis and save as files + +## Limitations + +- Wiki pages can have a slash(`/`) in their name, but filenames in unix can't. Slashes is converted + to hyphens(`-`) on saving. diff --git a/legunto b/legunto deleted file mode 100755 index 07728b6..0000000 --- a/legunto +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python - -def main(): - print("Hello legunto!") - - -if __name__ == '__main__': - main() diff --git a/setup.py b/setup.py index 356fdef..8efb9a7 100644 --- a/setup.py +++ b/setup.py @@ -6,10 +6,18 @@ setuptools.setup( name="legunto", version="0.0.1", - scripts=["legunto"], description="Fetch MediaWiki Scribunto modules from wikis", long_description=long_description, long_description_content_type="text/markdown", - packages=setuptools.find_packages(), + + packages=[ + 'legunto', + 'scribunto' + ], install_requires=["mwclient"], + package_dir={'': 'src'}, + + entry_points={ + 'console_scripts': ['legunto = legunto:console_main'], + } ) diff --git a/src/legunto/__init__.py b/src/legunto/__init__.py new file mode 100644 index 0000000..c73fe2c --- /dev/null +++ b/src/legunto/__init__.py @@ -0,0 +1,132 @@ +from scribunto import search_dependencies, rewrite_requires +from urllib.parse import urlparse +import json +import logging +import mwclient +import os +import pathlib +import sys + + +def print_help_massage() -> None: + print(""" +Usage: legunto COMMAND + +Commands: + install fetch lua modules based on 'scribunto.json' +""") + + +def get_interwiki_map() -> hash: + site = mwclient.Site('meta.wikimedia.org') + result = site.api('query', meta='siteinfo', siprop='interwikimap') + result = result["query"]["interwikimap"] + + iw_map = {} + for wiki in result: + iw_map[wiki['prefix']] = wiki['url'] + + return iw_map + + +def fetch_module(url: str, module_name: str) -> hash: + if not module_name.startswith('Module:'): + module_name = 'Module:'+module_name + + module = {} + + url = urlparse(url) + print(f'Fetching "{module_name}" from {url.netloc} ...', end='') + site = mwclient.Site(url.netloc, scheme=url.scheme) + + result = site.api('query', titles=module_name, prop='info', utf8="1") + result = list(result['query']['pages'].values())[0] + module['pageid'] = result['pageid'] + module['revid'] = result['lastrevid'] + module['title'] = result['title'] + module['text'] = site.pages[module_name].text() + print(' Done') + + return module + + +def to_filename(name: str) -> str: + return name.split(':')[1] \ + .replace('/', '-') + + +def install_dependencies() -> None: + SCRIBUNTO_FILE_PATH = os.getcwd()+"/scribunto.json" + if not os.path.exists(SCRIBUNTO_FILE_PATH): + logging.error("Can't find 'scribunto.json' file in this directory") + + dependencies = json.loads(open(SCRIBUNTO_FILE_PATH, "r").read())[ + "dependencies"] + + interwiki = get_interwiki_map() + + lock = { + 'modules': {} + } + + deps_to_add = dependencies + + print(f'{len(deps_to_add)} dependency(s) found') + + while deps_to_add: + dep = deps_to_add.pop() + if dep in lock['modules']: + continue + + wiki, page = dep.split("/", 1) + if wiki[0] == "@": + wiki = wiki[1:] + + if wiki not in interwiki: + logging.warning(f"'{wiki}' is not a valid interwiki prefix") + logging.warning(f"skip '{dep}'...") + continue + + # TODO read lock file and compare revids to skip fetching + + module = fetch_module(interwiki[wiki], page) + lock['modules'][dep] = { + 'pageid': module['pageid'], + 'revid': module['revid'], + 'title': module['title'], + } + indirect_deps = search_dependencies(module['text'], prefix=wiki) + if indirect_deps: + lock['modules'][dep]['dependencies'] = indirect_deps + + path = os.getcwd()+"/lua/"+wiki + if not os.path.exists(path): + pathlib.Path(path).mkdir(parents=True) + + f = open(path+"/" + to_filename(module['title']), "w") + f.write(rewrite_requires(module['text'], prefix=wiki)) + f.close() + + deps_to_add += indirect_deps + + f = open(os.getcwd()+"/scribunto.lock", "w") + f.write(json.dumps(lock, indent=2)) + f.close() + + +def console_main() -> None: + if len(sys.argv) == 1 or \ + (len(sys.argv) == 2 and sys.argv[1] in ['--help', 'help']): + print_help_massage() + return + elif len(sys.argv) == 2 and sys.argv[1] == 'install': + install_dependencies() + else: + logging.error( + f"legunto: '{sys.argv[1]}' is not a legunto command." + "See 'legunto --help'") + + +__all__ = [ + "console_main" +] diff --git a/src/legunto/__main__.py b/src/legunto/__main__.py new file mode 100644 index 0000000..b46d6c9 --- /dev/null +++ b/src/legunto/__main__.py @@ -0,0 +1,5 @@ +import legunto + + +if __name__ == '__main__': + raise SystemExit(legunto.console_main()) diff --git a/src/scribunto/__init__.py b/src/scribunto/__init__.py new file mode 100644 index 0000000..5968624 --- /dev/null +++ b/src/scribunto/__init__.py @@ -0,0 +1,43 @@ +import re +from typing import List + + +def search_dependencies(text: str, prefix=None) -> List[str]: + regex = \ + r'''(?:require|mw\.loadData)\s*\(\s*['"](?:[Mm]odule|모듈):(.+)['"]\s*''' + + find = re.findall(regex, text) + find = list(set(find)) + if prefix: + find = [ + f"@{prefix}/{name}" + for name in find + ] + + return find + + +def rewrite_requires(text: str, prefix: str) -> str: + # Module:foo/bar -> Module:foo-bar + regex = \ + r'''(?:require|mw\.loadData)\s*\(\s*['"](?:[Mm]odule|모듈):.+['"]\s*''' + + module_names = re.findall(regex, text) + for name in module_names: + text = text.replace(name, name.replace("/", "-")) + + # Module:foo -> Module:@en/foo + regex = ( + r"""((?:require|mw\.loadData)\s*\(\s*['"](?:[Mm]odule|모듈):)""" + r"""(.+)(['"]\s*)""" + ) + + text = re.sub(regex, fr'\1@{prefix}/\2\3', text) + + return text + + +__all__ = [ + 'search_dependencies', + 'rewrite_requires', +] diff --git a/test/legunto_test.py b/test/legunto_test.py new file mode 100644 index 0000000..1a7eecd --- /dev/null +++ b/test/legunto_test.py @@ -0,0 +1,15 @@ +import legunto + + +def test_to_filename(): + assert legunto.to_filename('Module:foo') == \ + 'foo' + + assert legunto.to_filename('Module:foo bar') == \ + 'foo bar' + + assert legunto.to_filename('Module:foo/bar') == \ + 'foo-bar' + + assert legunto.to_filename('모듈:foo/bar') == \ + 'foo-bar' diff --git a/test/scribunto_test.py b/test/scribunto_test.py new file mode 100644 index 0000000..3ba5103 --- /dev/null +++ b/test/scribunto_test.py @@ -0,0 +1,61 @@ +import scribunto + + +def test_search_dependencies() -> None: + text = ''' +local mHatnote = require('Module:Hatnote') +local mHatList = require('Module:Hatnote list') +local libraryUtil = require('libraryUtil') +local checkType = libraryUtil.checkType +local p = {} +''' + + expected = [ + 'Hatnote', + 'Hatnote list', + ] + actual = scribunto.search_dependencies(text) + + assert actual.sort() == expected.sort() + + # with prefix + + text = ''' +local mHatnote = require('Module:Hatnote') +local mHatList = require('Module:Hatnote list') +''' + + expected = [ + '@en/Hatnote', + '@en/Hatnote list', + ] + actual = scribunto.search_dependencies(text, prefix='en') + + assert actual.sort() == expected.sort() + + +def test_rewrite_requires() -> None: + text = ''' +local mHatnote = require('Module:Hatnote') +local mHatList = require('Module:Hatnote list') +local libraryUtil = require('libraryUtil') +local checkType = libraryUtil.checkType +local p = {} +''' + + expected = ''' +local mHatnote = require('Module:@en/Hatnote') +local mHatList = require('Module:@en/Hatnote list') +local libraryUtil = require('libraryUtil') +local checkType = libraryUtil.checkType +local p = {} +''' + actual = scribunto.rewrite_requires(text, 'en') + + assert actual == expected + + text = "require('Module:Foo/bar')" + expected = "require('Module:@en/Foo-bar')" + actual = scribunto.rewrite_requires(text, 'en') + + assert actual == expected