From ce1f163284359951926decafcf507581d6293314 Mon Sep 17 00:00:00 2001 From: Dongwoo Arthur Kim Date: Mon, 19 Jun 2017 16:56:16 +0900 Subject: [PATCH] Add requirements.txt, .gitignore, corpora simple shortcut. --- .gitignore | 103 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 6 +-- requirements.txt | 4 ++ 3 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 .gitignore create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..55bc9f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,103 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +corpora +logdir +preprocessed diff --git a/README.md b/README.md index 0256c23..006b67a 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ I don't intend to replicate the paper exactly. Rather, I aim to implement the ma ## Training * STEP 1. Download [IWSLT 2016 German–English parallel corpus](https://wit3.fbk.eu/download.php?release=2016-01&type=texts&slang=de&tlang=en) and extract it to `corpora/` folder. +```sh +wget -qO- --show-progress https://wit3.fbk.eu/archive/2016-01//texts/de/en/de-en.tgz | tar xz; mv de-en corpora +``` * STEP 2. Adjust hyper parameters in `hyperparams.py` if necessary. * STEP 3. Run `prepro.py` to generate vocabulary files to the `preprocessed` folder. * STEP 4. Run `train.py` or download the [pretrained files](https://u42868014.dl.dropboxusercontent.com/u/42868014/transformer/logdir.zip). @@ -86,6 +89,3 @@ got: Oh yeah you all are incredibly source: Dies ist nicht meine Meinung Das sind Fakten
expected: This is not my opinion These are the facts
got: This is not my opinion These are facts - - - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c806b13 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +nltk>=3.2.4 +numpy>=1.13.0 +regex>=2017.6.7 +tensorflow>=1.2.0